verifiers 0.1.15.dev5__tar.gz → 0.1.15.dev6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (312) hide show
  1. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/PKG-INFO +14 -8
  2. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/README.md +13 -7
  3. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_langchain_deep_agents_wikispeedia.py +74 -19
  4. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_mcp_search_env.py +5 -3
  5. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_opencode_harbor.py +2 -2
  6. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_save_utils.py +4 -0
  7. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_v1_bfcl.py +18 -10
  8. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_v1_config_extension.py +181 -29
  9. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_v1_group_reward_env.py +8 -3
  10. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_v1_rlm_swe.py +3 -3
  11. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/__init__.py +1 -1
  12. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/clients/openai_chat_completions_client.py +3 -24
  13. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/clients/openai_completions_client.py +5 -2
  14. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/environment.py +4 -0
  15. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/scripts/init.py +77 -15
  16. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/serve/types.py +13 -8
  17. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/types.py +2 -2
  18. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/utils/response_utils.py +29 -3
  19. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/utils/save_utils.py +1 -3
  20. verifiers-0.1.15.dev6/verifiers/v1/ENVIRONMENT_BEST_PRACTICES.md +252 -0
  21. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/v1/README.md +21 -37
  22. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/v1/RE_MIGRATION.md +4 -4
  23. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/v1/config.py +66 -27
  24. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/v1/utils/config_utils.py +24 -1
  25. verifiers-0.1.15.dev5/verifiers/v1/ENVIRONMENT_BEST_PRACTICES.md +0 -73
  26. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/.gitignore +0 -0
  27. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/LICENSE +0 -0
  28. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/pyproject.toml +0 -0
  29. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/AGENTS.md +0 -0
  30. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/README.md +0 -0
  31. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/__init__.py +0 -0
  32. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/conftest.py +0 -0
  33. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_browser_env.py +0 -0
  34. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_build_script.py +0 -0
  35. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_cli_agent_env.py +0 -0
  36. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_client_auth_errors.py +0 -0
  37. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_client_config.py +0 -0
  38. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_client_multimodal_types.py +0 -0
  39. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_composable_env.py +0 -0
  40. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_context_token_metrics.py +0 -0
  41. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_decorator_ranks.py +0 -0
  42. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_endpoint_registry.py +0 -0
  43. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_env_group.py +0 -0
  44. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_env_server.py +0 -0
  45. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_environment.py +0 -0
  46. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_environment_extra.py +0 -0
  47. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_envs.py +0 -0
  48. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_error_chain.py +0 -0
  49. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_eval_cli.py +0 -0
  50. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_eval_display.py +0 -0
  51. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_eval_utils.py +0 -0
  52. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_gepa_cli.py +0 -0
  53. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_gepa_utils.py +0 -0
  54. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_gym_env.py +0 -0
  55. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_harbor_env_mcp.py +0 -0
  56. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_imports.py +0 -0
  57. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_install_utils.py +0 -0
  58. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_interception_utils.py +0 -0
  59. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_lean_task.py +0 -0
  60. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_logging.py +0 -0
  61. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_math_rubric.py +0 -0
  62. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_maybe_think_parser.py +0 -0
  63. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_message_utils.py +0 -0
  64. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_message_utils_multimodal.py +0 -0
  65. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_multiturn_env.py +0 -0
  66. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_nemorl_client.py +0 -0
  67. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_openai_chat_completions_token_client.py +0 -0
  68. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_openai_responses_client.py +0 -0
  69. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_opencode_rlm_env.py +0 -0
  70. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_openenv_client.py +0 -0
  71. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_parser.py +0 -0
  72. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_path_utils.py +0 -0
  73. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_per_turn_timing.py +0 -0
  74. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_pricing_utils.py +0 -0
  75. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_prime_plugin.py +0 -0
  76. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_renderer_client.py +0 -0
  77. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_renderer_e2e.py +0 -0
  78. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_rlm_composable_env.py +0 -0
  79. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_rlm_env.py +0 -0
  80. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_rubric.py +0 -0
  81. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_rubric_group.py +0 -0
  82. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_sandbox_env.py +0 -0
  83. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_sandbox_mixin.py +0 -0
  84. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_setup_script.py +0 -0
  85. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_singleturn_env.py +0 -0
  86. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_stateful_tool_env.py +0 -0
  87. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_think_parser.py +0 -0
  88. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_tool_env.py +0 -0
  89. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_tool_utils.py +0 -0
  90. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_trajectory_processing.py +0 -0
  91. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_tui_info_formatting.py +0 -0
  92. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_types.py +0 -0
  93. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_v1_empty_completions.py +0 -0
  94. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_v1_endpoint_protocols.py +0 -0
  95. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_v1_example_counts.py +0 -0
  96. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_v1_harbor_cli.py +0 -0
  97. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_v1_mini_swe_agent.py +0 -0
  98. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_v1_runtime_lifecycle.py +0 -0
  99. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_v1_scoring_functions.py +0 -0
  100. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_v1_taskset_bindings.py +0 -0
  101. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_wordle_env.py +0 -0
  102. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/tests/test_xml_parser.py +0 -0
  103. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/AGENTS.md +0 -0
  104. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/cli/__init__.py +0 -0
  105. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/cli/commands/__init__.py +0 -0
  106. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/cli/commands/build.py +0 -0
  107. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/cli/commands/eval.py +0 -0
  108. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/cli/commands/gepa.py +0 -0
  109. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/cli/commands/init.py +0 -0
  110. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/cli/commands/install.py +0 -0
  111. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/cli/commands/setup.py +0 -0
  112. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/cli/plugins/__init__.py +0 -0
  113. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/cli/plugins/prime.py +0 -0
  114. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/cli/tui.py +0 -0
  115. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/clients/__init__.py +0 -0
  116. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/clients/anthropic_messages_client.py +0 -0
  117. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/clients/client.py +0 -0
  118. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/clients/nemorl_chat_completions_client.py +0 -0
  119. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/clients/openai_chat_completions_token_client.py +0 -0
  120. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/clients/openai_responses_client.py +0 -0
  121. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/clients/renderer_client.py +0 -0
  122. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/decorators.py +0 -0
  123. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/AGENTS.md +0 -0
  124. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/__init__.py +0 -0
  125. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/env_group.py +0 -0
  126. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/experimental/README.md +0 -0
  127. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/experimental/__init__.py +0 -0
  128. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/experimental/cli_agent_env.py +0 -0
  129. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/experimental/composable/README.md +0 -0
  130. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/experimental/composable/__init__.py +0 -0
  131. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/experimental/composable/_filter.py +0 -0
  132. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/experimental/composable/composable_env.py +0 -0
  133. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/experimental/composable/harness.py +0 -0
  134. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/experimental/composable/harnesses/__init__.py +0 -0
  135. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/experimental/composable/harnesses/mini_swe_agent.py +0 -0
  136. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/experimental/composable/harnesses/opencode.py +0 -0
  137. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/experimental/composable/harnesses/prompt.txt +0 -0
  138. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/experimental/composable/harnesses/rlm.py +0 -0
  139. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/experimental/composable/swe_debug_env.py +0 -0
  140. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/experimental/composable/task.py +0 -0
  141. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/experimental/composable/tasksets/__init__.py +0 -0
  142. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/experimental/composable/tasksets/cp/__init__.py +0 -0
  143. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/experimental/composable/tasksets/cp/cp_task.py +0 -0
  144. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/experimental/composable/tasksets/cp/test_utils.py +0 -0
  145. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/experimental/composable/tasksets/harbor/__init__.py +0 -0
  146. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/experimental/composable/tasksets/harbor/harbor.py +0 -0
  147. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/experimental/composable/tasksets/lean/__init__.py +0 -0
  148. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/experimental/composable/tasksets/lean/lean_task.py +0 -0
  149. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/experimental/composable/tasksets/math/__init__.py +0 -0
  150. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/experimental/composable/tasksets/math/math_task.py +0 -0
  151. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/experimental/composable/tasksets/swe/__init__.py +0 -0
  152. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/experimental/composable/tasksets/swe/_test_patch.py +0 -0
  153. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/experimental/composable/tasksets/swe/create_fix_patch.sh +0 -0
  154. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/experimental/composable/tasksets/swe/log_parser.py +0 -0
  155. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/experimental/composable/tasksets/swe/multi_swe.py +0 -0
  156. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/experimental/composable/tasksets/swe/openswe.py +0 -0
  157. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/experimental/composable/tasksets/swe/r2e_gym.py +0 -0
  158. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/experimental/composable/tasksets/swe/swe_bench.py +0 -0
  159. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/experimental/composable/tasksets/swe/swe_lego.py +0 -0
  160. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/experimental/composable/tasksets/swe/swe_rebench_v2.py +0 -0
  161. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/experimental/composable/tasksets/swe/swe_rebench_v2_log_parsers.py +0 -0
  162. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/experimental/composable/tasksets/swe/swe_smith.py +0 -0
  163. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/experimental/composable/tasksets/swe/swe_tasksets.py +0 -0
  164. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/experimental/gym_env.py +0 -0
  165. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/experimental/harbor_env/__init__.py +0 -0
  166. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/experimental/harbor_env/env.py +0 -0
  167. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/experimental/harbor_env/mcp.py +0 -0
  168. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/experimental/mcp_env.py +0 -0
  169. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/experimental/opencode_env.py +0 -0
  170. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/experimental/opencode_qa_env.py +0 -0
  171. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/experimental/opencode_rlm_env.py +0 -0
  172. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/experimental/rlm_env.py +0 -0
  173. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/experimental/sandbox_mixin.py +0 -0
  174. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/experimental/utils/__init__.py +0 -0
  175. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/experimental/utils/file_locks.py +0 -0
  176. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/experimental/utils/git_checkout_cache.py +0 -0
  177. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/integrations/README.md +0 -0
  178. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/integrations/__init__.py +0 -0
  179. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/integrations/browser_env/README.md +0 -0
  180. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/integrations/browser_env/__init__.py +0 -0
  181. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/integrations/browser_env/browser_env.py +0 -0
  182. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/integrations/browser_env/modes/__init__.py +0 -0
  183. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/integrations/browser_env/modes/base.py +0 -0
  184. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/integrations/browser_env/modes/cua_mode.py +0 -0
  185. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/integrations/browser_env/modes/dom_mode.py +0 -0
  186. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/integrations/openenv_env.py +0 -0
  187. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/integrations/reasoninggym_env.py +0 -0
  188. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/integrations/textarena_env.py +0 -0
  189. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/multiturn_env.py +0 -0
  190. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/python_env.py +0 -0
  191. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/sandbox_env.py +0 -0
  192. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/singleturn_env.py +0 -0
  193. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/stateful_tool_env.py +0 -0
  194. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/envs/tool_env.py +0 -0
  195. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/errors.py +0 -0
  196. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/gepa/__init__.py +0 -0
  197. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/gepa/adapter.py +0 -0
  198. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/gepa/config.py +0 -0
  199. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/gepa/display.py +0 -0
  200. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/gepa/gepa_utils.py +0 -0
  201. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/parsers/__init__.py +0 -0
  202. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/parsers/maybe_think_parser.py +0 -0
  203. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/parsers/parser.py +0 -0
  204. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/parsers/think_parser.py +0 -0
  205. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/parsers/xml_parser.py +0 -0
  206. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/rl/README.md +0 -0
  207. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/rl/__init__.py +0 -0
  208. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/rl/inference/__init__.py +0 -0
  209. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/rl/inference/client.py +0 -0
  210. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/rl/inference/server.py +0 -0
  211. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/rl/trainer/__init__.py +0 -0
  212. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/rl/trainer/config.py +0 -0
  213. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/rl/trainer/orchestrator.py +0 -0
  214. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/rl/trainer/trainer.py +0 -0
  215. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/rl/trainer/utils.py +0 -0
  216. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/rubrics/__init__.py +0 -0
  217. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/rubrics/experimental/hybrid_math_rubric.py +0 -0
  218. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/rubrics/judge_rubric.py +0 -0
  219. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/rubrics/math_rubric.py +0 -0
  220. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/rubrics/rubric.py +0 -0
  221. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/rubrics/rubric_group.py +0 -0
  222. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/scripts/__init__.py +0 -0
  223. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/scripts/build.py +0 -0
  224. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/scripts/eval.py +0 -0
  225. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/scripts/gepa.py +0 -0
  226. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/scripts/install.py +0 -0
  227. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/scripts/rl.py +0 -0
  228. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/scripts/setup.py +0 -0
  229. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/scripts/train.py +0 -0
  230. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/scripts/tui.py +0 -0
  231. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/scripts/vllm.py +0 -0
  232. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/serve/__init__.py +0 -0
  233. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/serve/client/env_client.py +0 -0
  234. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/serve/client/zmq_env_client.py +0 -0
  235. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/serve/server/__init__.py +0 -0
  236. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/serve/server/env_router.py +0 -0
  237. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/serve/server/env_server.py +0 -0
  238. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/serve/server/env_worker.py +0 -0
  239. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/serve/server/zmq_env_server.py +0 -0
  240. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/utils/__init__.py +0 -0
  241. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/utils/async_utils.py +0 -0
  242. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/utils/client_utils.py +0 -0
  243. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/utils/config_utils.py +0 -0
  244. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/utils/data_utils.py +0 -0
  245. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/utils/display_utils.py +0 -0
  246. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/utils/env_config_utils.py +0 -0
  247. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/utils/env_utils.py +0 -0
  248. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/utils/error_utils.py +0 -0
  249. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/utils/eval_display.py +0 -0
  250. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/utils/eval_utils.py +0 -0
  251. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/utils/heartbeat.py +0 -0
  252. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/utils/import_utils.py +0 -0
  253. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/utils/install_utils.py +0 -0
  254. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/utils/interception_utils.py +0 -0
  255. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/utils/logging_utils.py +0 -0
  256. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/utils/message_utils.py +0 -0
  257. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/utils/metric_utils.py +0 -0
  258. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/utils/path_utils.py +0 -0
  259. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/utils/pricing_utils.py +0 -0
  260. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/utils/process_utils.py +0 -0
  261. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/utils/serve_utils.py +0 -0
  262. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/utils/thread_utils.py +0 -0
  263. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/utils/threaded_sandbox_client.py +0 -0
  264. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/utils/tool_utils.py +0 -0
  265. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/utils/tunnel_utils.py +0 -0
  266. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/utils/usage_utils.py +0 -0
  267. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/utils/version_utils.py +0 -0
  268. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/v1/__init__.py +0 -0
  269. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/v1/env.py +0 -0
  270. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/v1/harness.py +0 -0
  271. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/v1/packages/__init__.py +0 -0
  272. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/v1/packages/harnesses/__init__.py +0 -0
  273. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/v1/packages/harnesses/command.py +0 -0
  274. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/v1/packages/harnesses/configs.py +0 -0
  275. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/v1/packages/harnesses/mini_swe_agent.py +0 -0
  276. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/v1/packages/harnesses/opencode.py +0 -0
  277. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/v1/packages/harnesses/pi.py +0 -0
  278. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/v1/packages/harnesses/rlm.py +0 -0
  279. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/v1/packages/harnesses/terminus_2.py +0 -0
  280. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/v1/packages/tasksets/__init__.py +0 -0
  281. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/v1/packages/tasksets/harbor.py +0 -0
  282. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/v1/runtime.py +0 -0
  283. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/v1/state.py +0 -0
  284. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/v1/task.py +0 -0
  285. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/v1/taskset.py +0 -0
  286. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/v1/toolset.py +0 -0
  287. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/v1/types.py +0 -0
  288. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/v1/user.py +0 -0
  289. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/v1/utils/__init__.py +0 -0
  290. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/v1/utils/artifact_utils.py +0 -0
  291. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/v1/utils/binding_utils.py +0 -0
  292. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/v1/utils/config_callable_utils.py +0 -0
  293. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/v1/utils/endpoint_utils.py +0 -0
  294. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/v1/utils/json_utils.py +0 -0
  295. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/v1/utils/judge_utils.py +0 -0
  296. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/v1/utils/lifecycle_utils.py +0 -0
  297. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/v1/utils/mcp_proxy_utils.py +0 -0
  298. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/v1/utils/mcp_utils.py +0 -0
  299. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/v1/utils/object_utils.py +0 -0
  300. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/v1/utils/program_utils.py +0 -0
  301. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/v1/utils/prompt_utils.py +0 -0
  302. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/v1/utils/runtime_registry.py +0 -0
  303. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/v1/utils/sandbox_program_utils.py +0 -0
  304. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/v1/utils/sandbox_utils.py +0 -0
  305. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/v1/utils/scoring_utils.py +0 -0
  306. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/v1/utils/serialization_utils.py +0 -0
  307. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/v1/utils/task_freeze_utils.py +0 -0
  308. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/v1/utils/taskset_utils.py +0 -0
  309. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/v1/utils/timing_utils.py +0 -0
  310. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/v1/utils/tool_utils.py +0 -0
  311. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/v1/utils/trajectory_utils.py +0 -0
  312. {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev6}/verifiers/v1/utils/usage_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: verifiers
3
- Version: 0.1.15.dev5
3
+ Version: 0.1.15.dev6
4
4
  Summary: Verifiers: Environments for LLM Reinforcement Learning
5
5
  Project-URL: Homepage, https://github.com/primeintellect-ai/verifiers
6
6
  Project-URL: Documentation, https://github.com/primeintellect-ai/verifiers
@@ -174,6 +174,10 @@ Environments built with Verifiers are self-contained Python modules. To initiali
174
174
  ```bash
175
175
  prime env init my-env # creates a new template in ./environments/my_env
176
176
  ```
177
+ Add an explicit harness loader when the environment owns harness behavior:
178
+ ```bash
179
+ prime env init my-env --with-harness
180
+ ```
177
181
  For OpenEnv integration, use:
178
182
  ```bash
179
183
  prime env init my-openenv --openenv
@@ -191,7 +195,9 @@ environments/my_env/
191
195
  └── README.md # Documentation
192
196
  ```
193
197
 
194
- Environment modules should expose a `load_environment` function which returns an instance of the Environment object, and which can accept custom arguments. For example:
198
+ Environment modules should expose a `load_environment` function which returns an
199
+ environment object. For simple legacy environments, this can still be a direct
200
+ constructor:
195
201
  ```python
196
202
  # my_env.py
197
203
  import verifiers as vf
@@ -223,7 +229,7 @@ def source():
223
229
  async def contains_answer(task, state) -> float:
224
230
  return float(task["answer"] in str(state.get("completion") or ""))
225
231
 
226
- def load_taskset(config: vf.TasksetConfig | None = None):
232
+ def load_taskset(config: vf.TasksetConfig):
227
233
  return vf.Taskset(source=source, rewards=[contains_answer], config=config)
228
234
 
229
235
  def load_environment(config: vf.EnvConfig) -> vf.Env:
@@ -244,8 +250,8 @@ env = vf.Env(
244
250
  ```
245
251
 
246
252
  The same environment package is the unit used by evals and `prime-rl`. The
247
- trainer owns model, endpoint, sampling, and rollout count; v1-specific taskset
248
- and harness options stay under `env.taskset` and `env.harness`:
253
+ trainer owns model, endpoint, sampling, and rollout count; v1-specific options
254
+ stay on the taskset or harness config that owns them:
249
255
 
250
256
  ```toml
251
257
  # configs/rl/my-v1-env.toml
@@ -260,12 +266,12 @@ max_tokens = 4096
260
266
  [[env]]
261
267
  id = "my-env"
262
268
 
263
- [env.args]
264
- arg1 = "non-th-arg"
265
-
266
269
  [env.harness]
267
270
  max_turns = 1
268
271
 
272
+ [env.taskset]
273
+ split = "train"
274
+
269
275
  [env.taskset.scoring.contains_answer]
270
276
  weight = 1.0
271
277
  ```
@@ -99,6 +99,10 @@ Environments built with Verifiers are self-contained Python modules. To initiali
99
99
  ```bash
100
100
  prime env init my-env # creates a new template in ./environments/my_env
101
101
  ```
102
+ Add an explicit harness loader when the environment owns harness behavior:
103
+ ```bash
104
+ prime env init my-env --with-harness
105
+ ```
102
106
  For OpenEnv integration, use:
103
107
  ```bash
104
108
  prime env init my-openenv --openenv
@@ -116,7 +120,9 @@ environments/my_env/
116
120
  └── README.md # Documentation
117
121
  ```
118
122
 
119
- Environment modules should expose a `load_environment` function which returns an instance of the Environment object, and which can accept custom arguments. For example:
123
+ Environment modules should expose a `load_environment` function which returns an
124
+ environment object. For simple legacy environments, this can still be a direct
125
+ constructor:
120
126
  ```python
121
127
  # my_env.py
122
128
  import verifiers as vf
@@ -148,7 +154,7 @@ def source():
148
154
  async def contains_answer(task, state) -> float:
149
155
  return float(task["answer"] in str(state.get("completion") or ""))
150
156
 
151
- def load_taskset(config: vf.TasksetConfig | None = None):
157
+ def load_taskset(config: vf.TasksetConfig):
152
158
  return vf.Taskset(source=source, rewards=[contains_answer], config=config)
153
159
 
154
160
  def load_environment(config: vf.EnvConfig) -> vf.Env:
@@ -169,8 +175,8 @@ env = vf.Env(
169
175
  ```
170
176
 
171
177
  The same environment package is the unit used by evals and `prime-rl`. The
172
- trainer owns model, endpoint, sampling, and rollout count; v1-specific taskset
173
- and harness options stay under `env.taskset` and `env.harness`:
178
+ trainer owns model, endpoint, sampling, and rollout count; v1-specific options
179
+ stay on the taskset or harness config that owns them:
174
180
 
175
181
  ```toml
176
182
  # configs/rl/my-v1-env.toml
@@ -185,12 +191,12 @@ max_tokens = 4096
185
191
  [[env]]
186
192
  id = "my-env"
187
193
 
188
- [env.args]
189
- arg1 = "non-th-arg"
190
-
191
194
  [env.harness]
192
195
  max_turns = 1
193
196
 
197
+ [env.taskset]
198
+ split = "train"
199
+
194
200
  [env.taskset.scoring.contains_answer]
195
201
  weight = 1.0
196
202
  ```
@@ -57,7 +57,7 @@ def test_wikispeedia_loads_as_v1_taskset_harness(
57
57
  ) -> None:
58
58
  module = load_module(monkeypatch)
59
59
 
60
- env = module.load_environment(config=vf.EnvConfig(), train_size=1, eval_size=1)
60
+ env = module.load_environment(config=module.WikispeediaEnvConfig())
61
61
 
62
62
  assert isinstance(env, vf.Env)
63
63
  assert isinstance(env.taskset, vf.Taskset)
@@ -65,6 +65,43 @@ def test_wikispeedia_loads_as_v1_taskset_harness(
65
65
  assert env.taskset.taskset_id == "langchain-deep-agents-wikispeedia"
66
66
 
67
67
 
68
+ def test_wikispeedia_env_config_reaches_taskset_and_harness(
69
+ monkeypatch: pytest.MonkeyPatch,
70
+ ) -> None:
71
+ module = load_module(monkeypatch)
72
+ wiki = make_small_wiki(module)
73
+ monkeypatch.setattr(module, "load_wiki_graph", lambda cache_dir=None: wiki)
74
+
75
+ env = module.load_environment(
76
+ config=module.WikispeediaEnvConfig(
77
+ taskset={
78
+ "train_size": 2,
79
+ "eval_size": 1,
80
+ "min_path_length": 1,
81
+ "max_path_length": 1,
82
+ "eval_target_fraction": 0.5,
83
+ "allow_go_back": False,
84
+ "links_only": True,
85
+ "max_turns": 7,
86
+ },
87
+ harness={
88
+ "max_turns": 8,
89
+ "timeout_seconds": 9.0,
90
+ },
91
+ )
92
+ )
93
+
94
+ train_rows = list(env.taskset.source())
95
+ eval_rows = list(env.taskset.eval_source())
96
+
97
+ assert len(train_rows) == 2
98
+ assert len(eval_rows) == 1
99
+ assert train_rows[0]["max_turns"] == 7
100
+ assert env.harness.config.max_turns == 8
101
+ assert env.harness.config.timeout_seconds == 9.0
102
+ assert [tool.__name__ for tool in env.taskset.toolsets[0].tools] == ["click_link"]
103
+
104
+
68
105
  def test_wikispeedia_rows_use_v1_task_shape(
69
106
  monkeypatch: pytest.MonkeyPatch,
70
107
  ) -> None:
@@ -90,11 +127,13 @@ def test_wikispeedia_taskset_sources_use_disjoint_target_split(
90
127
  wiki = make_small_wiki(module)
91
128
  monkeypatch.setattr(module, "load_wiki_graph", lambda cache_dir=None: wiki)
92
129
  taskset = module.load_taskset(
93
- train_size=2,
94
- eval_size=1,
95
- min_path_length=1,
96
- max_path_length=1,
97
- eval_target_fraction=0.5,
130
+ config=module.WikispeediaTasksetConfig(
131
+ train_size=2,
132
+ eval_size=1,
133
+ min_path_length=1,
134
+ max_path_length=1,
135
+ eval_target_fraction=0.5,
136
+ )
98
137
  )
99
138
 
100
139
  train_rows = list(taskset.source())
@@ -114,8 +153,12 @@ def test_wikispeedia_efficiency_weight_uses_fresh_reward_wrapper(
114
153
  wiki = make_small_wiki(module)
115
154
  monkeypatch.setattr(module, "load_wiki_graph", lambda cache_dir=None: wiki)
116
155
 
117
- weighted = module.load_taskset(efficiency_weight=0.5)
118
- plain = module.load_taskset(efficiency_weight=0.0)
156
+ weighted = module.load_taskset(
157
+ config=module.WikispeediaTasksetConfig(efficiency_weight=0.5)
158
+ )
159
+ plain = module.load_taskset(
160
+ config=module.WikispeediaTasksetConfig(efficiency_weight=0.0)
161
+ )
119
162
 
120
163
  assert any(fn.__name__ == "path_efficiency" for fn in weighted.rewards)
121
164
  assert any(fn is module.path_efficiency for fn in plain.metrics)
@@ -127,13 +170,17 @@ def test_wikispeedia_taskset_owns_navigation_tools(
127
170
  ) -> None:
128
171
  module = load_module(monkeypatch)
129
172
 
130
- taskset = module.load_taskset(allow_go_back=True)
173
+ taskset = module.load_taskset(
174
+ config=module.WikispeediaTasksetConfig(allow_go_back=True)
175
+ )
131
176
  names = [tool.__name__ for tool in taskset.toolsets[0].tools]
132
- no_back = module.load_taskset(allow_go_back=False)
177
+ no_back = module.load_taskset(
178
+ config=module.WikispeediaTasksetConfig(allow_go_back=False)
179
+ )
133
180
 
134
181
  assert names == ["click_link", "go_back"]
135
182
  assert [tool.__name__ for tool in no_back.toolsets[0].tools] == ["click_link"]
136
- assert module.load_harness().toolsets == []
183
+ assert module.load_harness(config=module.WikispeediaHarnessConfig()).toolsets == []
137
184
 
138
185
 
139
186
  def test_wikispeedia_system_prompt_matches_available_tools(
@@ -141,8 +188,12 @@ def test_wikispeedia_system_prompt_matches_available_tools(
141
188
  ) -> None:
142
189
  module = load_module(monkeypatch)
143
190
 
144
- with_back = module.load_taskset(allow_go_back=True)
145
- without_back = module.load_taskset(allow_go_back=False)
191
+ with_back = module.load_taskset(
192
+ config=module.WikispeediaTasksetConfig(allow_go_back=True)
193
+ )
194
+ without_back = module.load_taskset(
195
+ config=module.WikispeediaTasksetConfig(allow_go_back=False)
196
+ )
146
197
 
147
198
  assert "go_back" in with_back.system_prompt[0]["content"]
148
199
  assert "go_back" not in without_back.system_prompt[0]["content"]
@@ -156,12 +207,16 @@ async def test_wikispeedia_tools_resolve_through_v1_runtime(
156
207
  module = load_module(monkeypatch)
157
208
  wiki = make_small_wiki(module)
158
209
  monkeypatch.setattr(module, "load_wiki_graph", lambda cache_dir=None: wiki)
159
- env = module.load_environment(
160
- config=vf.EnvConfig(),
161
- train_size=2,
162
- eval_size=1,
163
- min_path_length=1,
164
- max_path_length=1,
210
+ env = vf.Env(
211
+ taskset=module.load_taskset(
212
+ config=module.WikispeediaTasksetConfig(
213
+ train_size=2,
214
+ eval_size=1,
215
+ min_path_length=1,
216
+ max_path_length=1,
217
+ )
218
+ ),
219
+ harness=module.load_harness(config=module.WikispeediaHarnessConfig()),
165
220
  )
166
221
  task = module.vf.Task(list(env.taskset.source())[0]).freeze()
167
222
  state = module.vf.State.for_task(task)
@@ -26,7 +26,9 @@ def _load_mcp_search_module() -> Any:
26
26
  def test_mcp_search_env_is_v1_only() -> None:
27
27
  module = _load_mcp_search_module()
28
28
 
29
- env = module.load_environment(config=vf.EnvConfig(), max_turns=4)
29
+ env = module.load_environment(
30
+ config=module.MCPSearchEnvConfig(taskset={"max_turns": 4})
31
+ )
30
32
 
31
33
  assert isinstance(env, vf.Env)
32
34
  assert isinstance(env.taskset, vf.Taskset)
@@ -40,7 +42,7 @@ def test_mcp_search_env_is_v1_only() -> None:
40
42
  def test_mcp_search_default_taskset_has_stable_non_doc_fixture() -> None:
41
43
  module = _load_mcp_search_module()
42
44
 
43
- rows = module.load_taskset().rows()
45
+ rows = module.load_taskset(config=module.MCPSearchTasksetConfig()).rows()
44
46
 
45
47
  assert len(rows) >= 10
46
48
  assert len({row["answer"] for row in rows}) == len(rows)
@@ -52,7 +54,7 @@ def test_mcp_search_taskset_accepts_v1_taskset_config() -> None:
52
54
  module = _load_mcp_search_module()
53
55
 
54
56
  env = module.load_environment(
55
- config=vf.EnvConfig(taskset={"max_turns": 3}),
57
+ config=module.MCPSearchEnvConfig(taskset={"max_turns": 3}),
56
58
  )
57
59
  rows = env.taskset.rows()
58
60
 
@@ -28,7 +28,7 @@ def _load_opencode_module() -> Any:
28
28
  def test_load_environment_uses_v1_taskset_and_harness() -> None:
29
29
  module = _load_opencode_module()
30
30
 
31
- env = module.load_environment(config=vf.EnvConfig())
31
+ env = module.load_environment(config=module.OpenCodeHarborEnvConfig())
32
32
 
33
33
  assert isinstance(env, vf.Env)
34
34
  assert isinstance(env.taskset, vf.HarborTaskset)
@@ -52,7 +52,7 @@ def test_load_environment_accepts_v1_taskset_and_harness_config() -> None:
52
52
  module = _load_opencode_module()
53
53
 
54
54
  env = module.load_environment(
55
- config=vf.EnvConfig(
55
+ config=module.OpenCodeHarborEnvConfig(
56
56
  taskset={
57
57
  "task_names": ["task-a"],
58
58
  "cpu_cores": 1.5,
@@ -32,6 +32,7 @@ from verifiers.utils.save_utils import (
32
32
  make_serializable,
33
33
  save_new_outputs,
34
34
  states_to_outputs,
35
+ truncate_malformed_trailing_line,
35
36
  validate_resume_metadata,
36
37
  )
37
38
  from verifiers.utils.usage_utils import StateUsageTracker, response_usage_tokens
@@ -488,6 +489,9 @@ class TestSaveNewOutputs:
488
489
  "\n".join(lines + [malformed_trailing_line]), encoding="utf-8"
489
490
  )
490
491
 
492
+ # Caller drops the partial trailing row before appending so the new
493
+ # row lands on a valid JSONL boundary.
494
+ truncate_malformed_trailing_line(outputs_path)
491
495
  save_new_outputs(
492
496
  [{"example_id": 3, "label": "row-3"}],
493
497
  results_path,
@@ -75,12 +75,12 @@ def test_bfcl_public_loader_is_v1_only(monkeypatch: pytest.MonkeyPatch) -> None:
75
75
  seen_taskset_config: vf.TasksetConfig | None = None
76
76
  seen_harness_config: vf.HarnessConfig | None = None
77
77
 
78
- def fake_taskset(config: vf.TasksetConfig | None = None) -> vf.Taskset:
78
+ def fake_taskset(config: vf.TasksetConfig) -> vf.Taskset:
79
79
  nonlocal seen_taskset_config
80
80
  seen_taskset_config = config
81
81
  return vf.Taskset(source=[], config=config)
82
82
 
83
- def fake_harness(config: vf.HarnessConfig | None = None) -> vf.Harness:
83
+ def fake_harness(config: vf.HarnessConfig) -> vf.Harness:
84
84
  nonlocal seen_harness_config
85
85
  seen_harness_config = config
86
86
  return vf.Harness(config=config)
@@ -89,9 +89,13 @@ def test_bfcl_public_loader_is_v1_only(monkeypatch: pytest.MonkeyPatch) -> None:
89
89
  monkeypatch.setattr(bfcl, "load_harness", fake_harness)
90
90
 
91
91
  env = bfcl.load_environment(
92
- config=vf.EnvConfig(),
93
- test_category="simple_python",
94
- examples_per_category=0,
92
+ config=bfcl.BFCLEnvConfig(
93
+ taskset=bfcl.BFCLTasksetConfig(
94
+ test_category="simple_python",
95
+ examples_per_category=0,
96
+ ),
97
+ harness=bfcl.BFCLHarnessConfig(),
98
+ )
95
99
  )
96
100
 
97
101
  assert isinstance(env, vf.Env)
@@ -110,12 +114,12 @@ def test_bfcl_loader_supports_category_groups(
110
114
  seen_taskset_categories = []
111
115
  seen_harness_categories = []
112
116
 
113
- def fake_taskset(config: vf.TasksetConfig | None = None) -> vf.Taskset:
117
+ def fake_taskset(config: vf.TasksetConfig) -> vf.Taskset:
114
118
  assert isinstance(config, bfcl.BFCLTasksetConfig)
115
119
  seen_taskset_categories.append(config.test_category)
116
120
  return vf.Taskset(source=[{"question": "q", "answer": "a"}], config=config)
117
121
 
118
- def fake_harness(config: vf.HarnessConfig | None = None) -> vf.Harness:
122
+ def fake_harness(config: vf.HarnessConfig) -> vf.Harness:
119
123
  assert isinstance(config, bfcl.BFCLHarnessConfig)
120
124
  seen_harness_categories.append(config.test_category)
121
125
  return vf.Harness(config=config)
@@ -124,9 +128,13 @@ def test_bfcl_loader_supports_category_groups(
124
128
  monkeypatch.setattr(bfcl, "load_harness", fake_harness)
125
129
 
126
130
  env = bfcl.load_environment(
127
- config=vf.EnvConfig(),
128
- test_categories=["simple_python", "simple_java"],
129
- examples_per_category=0,
131
+ config=bfcl.BFCLEnvConfig(
132
+ taskset=bfcl.BFCLTasksetConfig(
133
+ test_categories=["simple_python", "simple_java"],
134
+ examples_per_category=0,
135
+ ),
136
+ harness=bfcl.BFCLHarnessConfig(),
137
+ )
130
138
  )
131
139
 
132
140
  assert isinstance(env, root_vf.EnvGroup)
@@ -1185,6 +1185,29 @@ def test_config_schema_is_visible_from_primary_types() -> None:
1185
1185
  assert "bindings" in vf.ToolsetConfig.schema_text()
1186
1186
 
1187
1187
 
1188
+ def test_config_annotation_only_nested_config_defaults_recursively() -> None:
1189
+ class LeafConfig(Config):
1190
+ value: int = 1
1191
+
1192
+ class ChildConfig(Config):
1193
+ leaf: LeafConfig
1194
+
1195
+ class ParentConfig(Config):
1196
+ child: ChildConfig
1197
+
1198
+ first = ParentConfig()
1199
+ second = ParentConfig()
1200
+ configured = ParentConfig({"child": {"leaf": {"value": 3}}})
1201
+
1202
+ assert isinstance(first.child, ChildConfig)
1203
+ assert isinstance(first.child.leaf, LeafConfig)
1204
+ assert first.child.leaf.value == 1
1205
+ assert first.child is not second.child
1206
+ assert first.child.leaf is not second.child.leaf
1207
+ assert configured.child.leaf.value == 3
1208
+ assert "child: ChildConfig = <factory>" in ParentConfig.schema_text()
1209
+
1210
+
1188
1211
  def test_env_config_normalizes_mapping_config_to_attributes() -> None:
1189
1212
  config = EnvConfig(
1190
1213
  {
@@ -1193,8 +1216,17 @@ def test_env_config_normalizes_mapping_config_to_attributes() -> None:
1193
1216
  }
1194
1217
  )
1195
1218
 
1196
- assert config.taskset == {"taskset_id": "dict"}
1197
- assert config.harness == {"model": "configured-model"}
1219
+ assert isinstance(config.taskset, TasksetConfig)
1220
+ assert isinstance(config.harness, HarnessConfig)
1221
+ assert config.taskset.taskset_id == "dict"
1222
+ assert config.harness.model == "configured-model"
1223
+
1224
+
1225
+ def test_env_config_defaults_taskset_and_harness_to_base_configs() -> None:
1226
+ config = EnvConfig()
1227
+
1228
+ assert isinstance(config.taskset, TasksetConfig)
1229
+ assert isinstance(config.harness, HarnessConfig)
1198
1230
 
1199
1231
 
1200
1232
  def test_env_config_rejects_unknown_top_level_sections() -> None:
@@ -1205,6 +1237,34 @@ def test_env_config_rejects_unknown_top_level_sections() -> None:
1205
1237
  def test_env_config_requires_child_sections_to_be_configs() -> None:
1206
1238
  with pytest.raises(ValueError):
1207
1239
  EnvConfig({"taskset": 1})
1240
+ with pytest.raises(ValueError, match="EnvConfig.taskset cannot be None"):
1241
+ EnvConfig({"taskset": None})
1242
+ with pytest.raises(ValueError, match="EnvConfig.harness cannot be None"):
1243
+ EnvConfig(harness=None)
1244
+
1245
+
1246
+ def test_env_config_child_config_objects_must_match_domain() -> None:
1247
+ class LocalTasksetConfig(TasksetConfig):
1248
+ split: str = "train"
1249
+
1250
+ class LocalHarnessConfig(HarnessConfig):
1251
+ mode: str = "default"
1252
+
1253
+ config = EnvConfig(
1254
+ taskset=LocalTasksetConfig(split="test"),
1255
+ harness=LocalHarnessConfig(mode="custom"),
1256
+ )
1257
+
1258
+ assert isinstance(config.taskset, LocalTasksetConfig)
1259
+ assert isinstance(config.harness, LocalHarnessConfig)
1260
+
1261
+ class LocalConfig(Config):
1262
+ split: str = "train"
1263
+
1264
+ with pytest.raises(ValueError):
1265
+ EnvConfig(taskset=LocalConfig())
1266
+ with pytest.raises(ValueError):
1267
+ EnvConfig(harness=LocalConfig())
1208
1268
 
1209
1269
 
1210
1270
  def test_env_config_merges_child_config_defaults_with_nested_sections() -> None:
@@ -1234,25 +1294,53 @@ def test_env_config_merges_child_config_defaults_with_nested_sections() -> None:
1234
1294
  assert default_config.taskset.split == "kwarg"
1235
1295
 
1236
1296
 
1237
- def test_env_config_args_supplies_typed_top_level_args() -> None:
1238
- class LocalArgsConfig(Config):
1239
- split: str = "train"
1240
- max_turns: int = 4
1241
-
1242
- config = EnvConfig(
1243
- {"args": {"max_turns": 7}},
1244
- args=LocalArgsConfig(split="args"),
1297
+ def test_config_object_merge_omits_nested_none_values() -> None:
1298
+ base = HarnessConfig(
1299
+ sampling_args={
1300
+ "temperature": 0.7,
1301
+ "extra_body": {
1302
+ "top_k": 40,
1303
+ "top_p": 0.9,
1304
+ },
1305
+ }
1245
1306
  )
1307
+ override = HarnessConfig(
1308
+ sampling_args={
1309
+ "extra_body": {
1310
+ "top_p": None,
1311
+ "min_p": 0.05,
1312
+ },
1313
+ "stop": [None, "DONE"],
1314
+ }
1315
+ )
1316
+ config = EnvConfig(EnvConfig(harness=override), harness=base)
1317
+
1318
+ assert config.harness.sampling_args == {
1319
+ "temperature": 0.7,
1320
+ "extra_body": {
1321
+ "top_k": 40,
1322
+ "top_p": 0.9,
1323
+ "min_p": 0.05,
1324
+ },
1325
+ "stop": [None, "DONE"],
1326
+ }
1246
1327
 
1247
- assert isinstance(config.args, LocalArgsConfig)
1248
- assert config.args.split == "args"
1249
- assert config.args.max_turns == 7
1250
1328
 
1329
+ def test_env_config_subclasses_cannot_define_root_fields() -> None:
1330
+ with pytest.raises(TypeError, match="unsupported root env config fields"):
1251
1331
 
1252
- def test_env_config_args_accepts_arbitrary_user_args() -> None:
1253
- config = EnvConfig(args={"k1": "v1", "k2": "v2"})
1332
+ class LocalEnvConfig(EnvConfig):
1333
+ split: str = "train"
1254
1334
 
1255
- assert config.args == {"k1": "v1", "k2": "v2"}
1335
+
1336
+ def test_env_config_subclasses_must_use_domain_child_configs() -> None:
1337
+ class LocalConfig(Config):
1338
+ split: str = "train"
1339
+
1340
+ with pytest.raises(TypeError, match="taskset must be typed"):
1341
+
1342
+ class LocalEnvConfig(EnvConfig):
1343
+ taskset: LocalConfig
1256
1344
 
1257
1345
 
1258
1346
  def test_env_config_harness_section_extends_imported_config() -> None:
@@ -1335,6 +1423,56 @@ def test_load_environment_coerces_typed_env_config_arg(
1335
1423
  }
1336
1424
 
1337
1425
 
1426
+ def test_load_environment_coerces_env_config_subclass_sections(
1427
+ monkeypatch: pytest.MonkeyPatch,
1428
+ ) -> None:
1429
+ module_name = "typed_env_config_subclass"
1430
+ module = types.ModuleType(module_name)
1431
+ seen: dict[str, object] = {}
1432
+
1433
+ class LocalTasksetConfig(TasksetConfig):
1434
+ split: str = "train"
1435
+
1436
+ class LocalHarnessConfig(HarnessConfig):
1437
+ mode: str = "default"
1438
+
1439
+ class LocalEnvConfig(EnvConfig):
1440
+ taskset: LocalTasksetConfig
1441
+ harness: LocalHarnessConfig
1442
+
1443
+ class LocalTaskset(Taskset):
1444
+ config_type = LocalTasksetConfig
1445
+
1446
+ class LocalHarness(Harness):
1447
+ config_type = LocalHarnessConfig
1448
+
1449
+ def load_environment(config: LocalEnvConfig) -> Env:
1450
+ seen["config"] = config
1451
+ return Env(
1452
+ taskset=LocalTaskset(source=source_loader, config=config.taskset),
1453
+ harness=LocalHarness(config=config.harness),
1454
+ )
1455
+
1456
+ module.load_environment = load_environment
1457
+ monkeypatch.setitem(sys.modules, module_name, module)
1458
+
1459
+ env = vf.load_environment(
1460
+ "typed-env-config-subclass",
1461
+ config={
1462
+ "taskset": {"taskset_id": "typed", "split": "test"},
1463
+ "harness": {"mode": "custom"},
1464
+ },
1465
+ )
1466
+ config = seen["config"]
1467
+
1468
+ assert isinstance(config, LocalEnvConfig)
1469
+ assert isinstance(config.taskset, LocalTasksetConfig)
1470
+ assert isinstance(config.harness, LocalHarnessConfig)
1471
+ assert env.taskset.config.taskset_id == "typed"
1472
+ assert env.taskset.config.split == "test"
1473
+ assert env.harness.config.mode == "custom"
1474
+
1475
+
1338
1476
  def test_load_environment_supplies_default_typed_env_config(
1339
1477
  monkeypatch: pytest.MonkeyPatch,
1340
1478
  ) -> None:
@@ -1457,9 +1595,24 @@ def test_reference_v1_harness_loaders_preserve_child_defaults() -> None:
1457
1595
  "environments.hello_self_judge_v1.hello_self_judge_v1"
1458
1596
  )
1459
1597
 
1460
- assert group_reward.load_harness().config.max_turns == 1
1461
- assert parallel_sandbox.load_harness().config.max_turns == 4
1462
- assert self_judge.load_harness().config.max_turns == 8
1598
+ assert (
1599
+ group_reward.load_harness(
1600
+ config=group_reward.GroupRewardHarnessConfig()
1601
+ ).config.max_turns
1602
+ == 1
1603
+ )
1604
+ assert (
1605
+ parallel_sandbox.load_harness(
1606
+ config=parallel_sandbox.ParallelSandboxHarnessConfig()
1607
+ ).config.max_turns
1608
+ == 4
1609
+ )
1610
+ assert (
1611
+ self_judge.load_harness(
1612
+ config=self_judge.SelfJudgeHarnessConfig()
1613
+ ).config.max_turns
1614
+ == 8
1615
+ )
1463
1616
 
1464
1617
 
1465
1618
  def test_bfcl_loader_preserves_mapping_config_sections(
@@ -1482,7 +1635,7 @@ def test_bfcl_loader_preserves_mapping_config_sections(
1482
1635
  monkeypatch.setattr(module, "load_harness", fake_harness)
1483
1636
 
1484
1637
  env = module.load_environment(
1485
- config=EnvConfig(
1638
+ config=module.BFCLEnvConfig(
1486
1639
  taskset={"taskset_id": "bfcl-env-args"},
1487
1640
  harness={"model": "bfcl-model"},
1488
1641
  )
@@ -1508,7 +1661,7 @@ def test_tau2_loader_forwards_mapping_harness_config(
1508
1661
  monkeypatch.setattr(module, "load_taskset", fake_taskset)
1509
1662
 
1510
1663
  env = module.load_environment(
1511
- config=EnvConfig(
1664
+ config=module.Tau2EnvConfig(
1512
1665
  taskset={"max_turns": 7},
1513
1666
  harness={"model": "configured-model", "max_turns": 3},
1514
1667
  )
@@ -1623,17 +1776,16 @@ def test_self_judge_loader_projects_shortcuts_to_child_configs() -> None:
1623
1776
  "environments.hello_self_judge_v1.hello_self_judge_v1"
1624
1777
  )
1625
1778
 
1626
- taskset = module.load_taskset(num_examples=2)
1627
- harness = module.load_harness(max_turns=3)
1779
+ taskset = module.load_taskset(config=module.SelfJudgeTasksetConfig(num_examples=2))
1780
+ harness = module.load_harness(config=module.SelfJudgeHarnessConfig(max_turns=3))
1628
1781
  shortcut_env = module.load_environment(
1629
- num_examples=2,
1630
- max_turns=3,
1631
- config=EnvConfig(),
1782
+ config=module.SelfJudgeEnvConfig(
1783
+ taskset={"num_examples": 2},
1784
+ harness={"max_turns": 3},
1785
+ ),
1632
1786
  )
1633
1787
  override_env = module.load_environment(
1634
- num_examples=2,
1635
- max_turns=3,
1636
- config=EnvConfig(
1788
+ config=module.SelfJudgeEnvConfig(
1637
1789
  taskset={"num_examples": 1},
1638
1790
  harness={"max_turns": 5},
1639
1791
  ),