verifiers 0.1.15.dev6__tar.gz → 0.1.15.dev8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (325) hide show
  1. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/PKG-INFO +30 -26
  2. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/README.md +27 -24
  3. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/pyproject.toml +5 -3
  4. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_client_auth_errors.py +3 -0
  5. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_envs.py +36 -6
  6. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_eval_cli.py +189 -2
  7. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_eval_display.py +16 -0
  8. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_eval_utils.py +16 -0
  9. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_mcp_search_env.py +13 -1
  10. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_openai_chat_completions_token_client.py +21 -2
  11. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_opencode_harbor.py +9 -9
  12. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_path_utils.py +14 -0
  13. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_renderer_client.py +45 -1
  14. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_save_utils.py +31 -0
  15. verifiers-0.1.15.dev8/tests/test_tui_info_formatting.py +9 -0
  16. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_v1_bfcl.py +8 -13
  17. verifiers-0.1.15.dev8/tests/test_v1_config_extension.py +3090 -0
  18. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_v1_group_reward_env.py +1 -4
  19. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_v1_harbor_cli.py +78 -34
  20. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_v1_mini_swe_agent.py +7 -2
  21. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_v1_rlm_swe.py +36 -14
  22. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_v1_runtime_lifecycle.py +131 -53
  23. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_v1_taskset_bindings.py +104 -27
  24. verifiers-0.1.15.dev8/tests/test_wiki_search_v1.py +136 -0
  25. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/AGENTS.md +2 -3
  26. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/__init__.py +22 -1
  27. verifiers-0.1.15.dev8/verifiers/cli/tui.py +9 -0
  28. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/clients/openai_chat_completions_client.py +22 -20
  29. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/clients/openai_chat_completions_token_client.py +12 -10
  30. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/clients/openai_completions_client.py +0 -4
  31. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/clients/renderer_client.py +52 -23
  32. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/experimental/composable/tasksets/swe/multi_swe.py +1 -1
  33. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/experimental/composable/tasksets/swe/openswe.py +1 -1
  34. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/experimental/composable/tasksets/swe/r2e_gym.py +1 -1
  35. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/experimental/composable/tasksets/swe/swe_bench.py +1 -1
  36. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/experimental/composable/tasksets/swe/swe_lego.py +1 -1
  37. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/experimental/composable/tasksets/swe/swe_rebench_v2.py +1 -1
  38. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/experimental/composable/tasksets/swe/swe_smith.py +1 -1
  39. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/scripts/eval.py +16 -1
  40. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/scripts/init.py +83 -36
  41. verifiers-0.1.15.dev8/verifiers/scripts/tui.py +11 -0
  42. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/types.py +10 -2
  43. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/utils/client_utils.py +30 -0
  44. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/utils/env_utils.py +63 -13
  45. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/utils/eval_display.py +25 -9
  46. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/utils/eval_utils.py +137 -17
  47. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/utils/path_utils.py +9 -3
  48. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/utils/response_utils.py +12 -25
  49. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/utils/save_utils.py +39 -0
  50. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/v1/ENVIRONMENT_BEST_PRACTICES.md +75 -37
  51. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/v1/README.md +290 -222
  52. verifiers-0.1.15.dev8/verifiers/v1/RE_MIGRATION.md +465 -0
  53. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/v1/__init__.py +13 -1
  54. verifiers-0.1.15.dev8/verifiers/v1/config.py +484 -0
  55. verifiers-0.1.15.dev8/verifiers/v1/env.py +351 -0
  56. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/v1/harness.py +66 -115
  57. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/v1/packages/harnesses/__init__.py +10 -1
  58. verifiers-0.1.15.dev8/verifiers/v1/packages/harnesses/command.py +164 -0
  59. verifiers-0.1.15.dev8/verifiers/v1/packages/harnesses/configs.py +168 -0
  60. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/v1/packages/harnesses/mini_swe_agent.py +75 -107
  61. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/v1/packages/harnesses/opencode.py +58 -145
  62. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/v1/packages/harnesses/pi.py +59 -76
  63. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/v1/packages/harnesses/rlm.py +23 -80
  64. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/v1/packages/harnesses/terminus_2.py +67 -96
  65. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/v1/packages/tasksets/harbor.py +51 -105
  66. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/v1/runtime.py +2 -1
  67. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/v1/taskset.py +31 -137
  68. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/v1/toolset.py +74 -30
  69. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/v1/types.py +2 -7
  70. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/v1/user.py +12 -8
  71. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/v1/utils/binding_utils.py +4 -2
  72. verifiers-0.1.15.dev8/verifiers/v1/utils/component_utils.py +136 -0
  73. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/v1/utils/config_callable_utils.py +4 -0
  74. verifiers-0.1.15.dev8/verifiers/v1/utils/config_utils.py +185 -0
  75. verifiers-0.1.15.dev8/verifiers/v1/utils/object_utils.py +52 -0
  76. verifiers-0.1.15.dev8/verifiers/v1/utils/runtime_owner_utils.py +129 -0
  77. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/v1/utils/taskset_utils.py +36 -2
  78. verifiers-0.1.15.dev6/tests/test_tui_info_formatting.py +0 -1524
  79. verifiers-0.1.15.dev6/tests/test_v1_config_extension.py +0 -1989
  80. verifiers-0.1.15.dev6/verifiers/cli/tui.py +0 -9
  81. verifiers-0.1.15.dev6/verifiers/scripts/tui.py +0 -5928
  82. verifiers-0.1.15.dev6/verifiers/v1/RE_MIGRATION.md +0 -825
  83. verifiers-0.1.15.dev6/verifiers/v1/config.py +0 -425
  84. verifiers-0.1.15.dev6/verifiers/v1/env.py +0 -134
  85. verifiers-0.1.15.dev6/verifiers/v1/packages/harnesses/command.py +0 -116
  86. verifiers-0.1.15.dev6/verifiers/v1/packages/harnesses/configs.py +0 -102
  87. verifiers-0.1.15.dev6/verifiers/v1/utils/config_utils.py +0 -200
  88. verifiers-0.1.15.dev6/verifiers/v1/utils/object_utils.py +0 -32
  89. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/.gitignore +0 -0
  90. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/LICENSE +0 -0
  91. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/AGENTS.md +0 -0
  92. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/README.md +0 -0
  93. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/__init__.py +0 -0
  94. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/conftest.py +0 -0
  95. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_browser_env.py +0 -0
  96. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_build_script.py +0 -0
  97. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_cli_agent_env.py +0 -0
  98. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_client_config.py +0 -0
  99. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_client_multimodal_types.py +0 -0
  100. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_composable_env.py +0 -0
  101. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_context_token_metrics.py +0 -0
  102. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_decorator_ranks.py +0 -0
  103. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_endpoint_registry.py +0 -0
  104. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_env_group.py +0 -0
  105. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_env_server.py +0 -0
  106. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_environment.py +0 -0
  107. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_environment_extra.py +0 -0
  108. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_error_chain.py +0 -0
  109. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_gepa_cli.py +0 -0
  110. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_gepa_utils.py +0 -0
  111. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_gym_env.py +0 -0
  112. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_harbor_env_mcp.py +0 -0
  113. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_imports.py +0 -0
  114. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_install_utils.py +0 -0
  115. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_interception_utils.py +0 -0
  116. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_langchain_deep_agents_wikispeedia.py +0 -0
  117. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_lean_task.py +0 -0
  118. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_logging.py +0 -0
  119. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_math_rubric.py +0 -0
  120. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_maybe_think_parser.py +0 -0
  121. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_message_utils.py +0 -0
  122. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_message_utils_multimodal.py +0 -0
  123. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_multiturn_env.py +0 -0
  124. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_nemorl_client.py +0 -0
  125. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_openai_responses_client.py +0 -0
  126. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_opencode_rlm_env.py +0 -0
  127. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_openenv_client.py +0 -0
  128. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_parser.py +0 -0
  129. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_per_turn_timing.py +0 -0
  130. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_pricing_utils.py +0 -0
  131. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_prime_plugin.py +0 -0
  132. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_renderer_e2e.py +0 -0
  133. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_rlm_composable_env.py +0 -0
  134. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_rlm_env.py +0 -0
  135. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_rubric.py +0 -0
  136. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_rubric_group.py +0 -0
  137. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_sandbox_env.py +0 -0
  138. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_sandbox_mixin.py +0 -0
  139. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_setup_script.py +0 -0
  140. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_singleturn_env.py +0 -0
  141. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_stateful_tool_env.py +0 -0
  142. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_think_parser.py +0 -0
  143. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_tool_env.py +0 -0
  144. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_tool_utils.py +0 -0
  145. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_trajectory_processing.py +0 -0
  146. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_types.py +0 -0
  147. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_v1_empty_completions.py +0 -0
  148. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_v1_endpoint_protocols.py +0 -0
  149. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_v1_example_counts.py +0 -0
  150. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_v1_scoring_functions.py +0 -0
  151. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_wordle_env.py +0 -0
  152. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/tests/test_xml_parser.py +0 -0
  153. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/cli/__init__.py +0 -0
  154. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/cli/commands/__init__.py +0 -0
  155. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/cli/commands/build.py +0 -0
  156. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/cli/commands/eval.py +0 -0
  157. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/cli/commands/gepa.py +0 -0
  158. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/cli/commands/init.py +0 -0
  159. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/cli/commands/install.py +0 -0
  160. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/cli/commands/setup.py +0 -0
  161. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/cli/plugins/__init__.py +0 -0
  162. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/cli/plugins/prime.py +0 -0
  163. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/clients/__init__.py +0 -0
  164. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/clients/anthropic_messages_client.py +0 -0
  165. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/clients/client.py +0 -0
  166. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/clients/nemorl_chat_completions_client.py +0 -0
  167. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/clients/openai_responses_client.py +0 -0
  168. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/decorators.py +0 -0
  169. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/AGENTS.md +0 -0
  170. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/__init__.py +0 -0
  171. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/env_group.py +0 -0
  172. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/environment.py +0 -0
  173. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/experimental/README.md +0 -0
  174. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/experimental/__init__.py +0 -0
  175. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/experimental/cli_agent_env.py +0 -0
  176. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/experimental/composable/README.md +0 -0
  177. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/experimental/composable/__init__.py +0 -0
  178. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/experimental/composable/_filter.py +0 -0
  179. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/experimental/composable/composable_env.py +0 -0
  180. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/experimental/composable/harness.py +0 -0
  181. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/experimental/composable/harnesses/__init__.py +0 -0
  182. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/experimental/composable/harnesses/mini_swe_agent.py +0 -0
  183. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/experimental/composable/harnesses/opencode.py +0 -0
  184. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/experimental/composable/harnesses/prompt.txt +0 -0
  185. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/experimental/composable/harnesses/rlm.py +0 -0
  186. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/experimental/composable/swe_debug_env.py +0 -0
  187. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/experimental/composable/task.py +0 -0
  188. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/experimental/composable/tasksets/__init__.py +0 -0
  189. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/experimental/composable/tasksets/cp/__init__.py +0 -0
  190. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/experimental/composable/tasksets/cp/cp_task.py +0 -0
  191. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/experimental/composable/tasksets/cp/test_utils.py +0 -0
  192. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/experimental/composable/tasksets/harbor/__init__.py +0 -0
  193. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/experimental/composable/tasksets/harbor/harbor.py +0 -0
  194. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/experimental/composable/tasksets/lean/__init__.py +0 -0
  195. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/experimental/composable/tasksets/lean/lean_task.py +0 -0
  196. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/experimental/composable/tasksets/math/__init__.py +0 -0
  197. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/experimental/composable/tasksets/math/math_task.py +0 -0
  198. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/experimental/composable/tasksets/swe/__init__.py +0 -0
  199. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/experimental/composable/tasksets/swe/_test_patch.py +0 -0
  200. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/experimental/composable/tasksets/swe/create_fix_patch.sh +0 -0
  201. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/experimental/composable/tasksets/swe/log_parser.py +0 -0
  202. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/experimental/composable/tasksets/swe/swe_rebench_v2_log_parsers.py +0 -0
  203. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/experimental/composable/tasksets/swe/swe_tasksets.py +0 -0
  204. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/experimental/gym_env.py +0 -0
  205. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/experimental/harbor_env/__init__.py +0 -0
  206. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/experimental/harbor_env/env.py +0 -0
  207. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/experimental/harbor_env/mcp.py +0 -0
  208. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/experimental/mcp_env.py +0 -0
  209. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/experimental/opencode_env.py +0 -0
  210. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/experimental/opencode_qa_env.py +0 -0
  211. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/experimental/opencode_rlm_env.py +0 -0
  212. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/experimental/rlm_env.py +0 -0
  213. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/experimental/sandbox_mixin.py +0 -0
  214. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/experimental/utils/__init__.py +0 -0
  215. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/experimental/utils/file_locks.py +0 -0
  216. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/experimental/utils/git_checkout_cache.py +0 -0
  217. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/integrations/README.md +0 -0
  218. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/integrations/__init__.py +0 -0
  219. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/integrations/browser_env/README.md +0 -0
  220. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/integrations/browser_env/__init__.py +0 -0
  221. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/integrations/browser_env/browser_env.py +0 -0
  222. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/integrations/browser_env/modes/__init__.py +0 -0
  223. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/integrations/browser_env/modes/base.py +0 -0
  224. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/integrations/browser_env/modes/cua_mode.py +0 -0
  225. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/integrations/browser_env/modes/dom_mode.py +0 -0
  226. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/integrations/openenv_env.py +0 -0
  227. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/integrations/reasoninggym_env.py +0 -0
  228. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/integrations/textarena_env.py +0 -0
  229. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/multiturn_env.py +0 -0
  230. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/python_env.py +0 -0
  231. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/sandbox_env.py +0 -0
  232. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/singleturn_env.py +0 -0
  233. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/stateful_tool_env.py +0 -0
  234. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/envs/tool_env.py +0 -0
  235. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/errors.py +0 -0
  236. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/gepa/__init__.py +0 -0
  237. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/gepa/adapter.py +0 -0
  238. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/gepa/config.py +0 -0
  239. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/gepa/display.py +0 -0
  240. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/gepa/gepa_utils.py +0 -0
  241. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/parsers/__init__.py +0 -0
  242. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/parsers/maybe_think_parser.py +0 -0
  243. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/parsers/parser.py +0 -0
  244. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/parsers/think_parser.py +0 -0
  245. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/parsers/xml_parser.py +0 -0
  246. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/rl/README.md +0 -0
  247. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/rl/__init__.py +0 -0
  248. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/rl/inference/__init__.py +0 -0
  249. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/rl/inference/client.py +0 -0
  250. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/rl/inference/server.py +0 -0
  251. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/rl/trainer/__init__.py +0 -0
  252. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/rl/trainer/config.py +0 -0
  253. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/rl/trainer/orchestrator.py +0 -0
  254. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/rl/trainer/trainer.py +0 -0
  255. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/rl/trainer/utils.py +0 -0
  256. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/rubrics/__init__.py +0 -0
  257. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/rubrics/experimental/hybrid_math_rubric.py +0 -0
  258. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/rubrics/judge_rubric.py +0 -0
  259. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/rubrics/math_rubric.py +0 -0
  260. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/rubrics/rubric.py +0 -0
  261. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/rubrics/rubric_group.py +0 -0
  262. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/scripts/__init__.py +0 -0
  263. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/scripts/build.py +0 -0
  264. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/scripts/gepa.py +0 -0
  265. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/scripts/install.py +0 -0
  266. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/scripts/rl.py +0 -0
  267. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/scripts/setup.py +0 -0
  268. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/scripts/train.py +0 -0
  269. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/scripts/vllm.py +0 -0
  270. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/serve/__init__.py +0 -0
  271. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/serve/client/env_client.py +0 -0
  272. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/serve/client/zmq_env_client.py +0 -0
  273. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/serve/server/__init__.py +0 -0
  274. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/serve/server/env_router.py +0 -0
  275. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/serve/server/env_server.py +0 -0
  276. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/serve/server/env_worker.py +0 -0
  277. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/serve/server/zmq_env_server.py +0 -0
  278. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/serve/types.py +0 -0
  279. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/utils/__init__.py +0 -0
  280. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/utils/async_utils.py +0 -0
  281. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/utils/config_utils.py +0 -0
  282. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/utils/data_utils.py +0 -0
  283. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/utils/display_utils.py +0 -0
  284. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/utils/env_config_utils.py +0 -0
  285. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/utils/error_utils.py +0 -0
  286. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/utils/heartbeat.py +0 -0
  287. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/utils/import_utils.py +0 -0
  288. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/utils/install_utils.py +0 -0
  289. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/utils/interception_utils.py +0 -0
  290. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/utils/logging_utils.py +0 -0
  291. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/utils/message_utils.py +0 -0
  292. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/utils/metric_utils.py +0 -0
  293. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/utils/pricing_utils.py +0 -0
  294. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/utils/process_utils.py +0 -0
  295. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/utils/serve_utils.py +0 -0
  296. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/utils/thread_utils.py +0 -0
  297. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/utils/threaded_sandbox_client.py +0 -0
  298. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/utils/tool_utils.py +0 -0
  299. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/utils/tunnel_utils.py +0 -0
  300. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/utils/usage_utils.py +0 -0
  301. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/utils/version_utils.py +0 -0
  302. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/v1/packages/__init__.py +0 -0
  303. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/v1/packages/tasksets/__init__.py +0 -0
  304. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/v1/state.py +0 -0
  305. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/v1/task.py +0 -0
  306. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/v1/utils/__init__.py +0 -0
  307. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/v1/utils/artifact_utils.py +0 -0
  308. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/v1/utils/endpoint_utils.py +0 -0
  309. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/v1/utils/json_utils.py +0 -0
  310. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/v1/utils/judge_utils.py +0 -0
  311. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/v1/utils/lifecycle_utils.py +0 -0
  312. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/v1/utils/mcp_proxy_utils.py +0 -0
  313. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/v1/utils/mcp_utils.py +0 -0
  314. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/v1/utils/program_utils.py +0 -0
  315. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/v1/utils/prompt_utils.py +0 -0
  316. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/v1/utils/runtime_registry.py +0 -0
  317. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/v1/utils/sandbox_program_utils.py +0 -0
  318. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/v1/utils/sandbox_utils.py +0 -0
  319. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/v1/utils/scoring_utils.py +0 -0
  320. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/v1/utils/serialization_utils.py +0 -0
  321. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/v1/utils/task_freeze_utils.py +0 -0
  322. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/v1/utils/timing_utils.py +0 -0
  323. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/v1/utils/tool_utils.py +0 -0
  324. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/v1/utils/trajectory_utils.py +0 -0
  325. {verifiers-0.1.15.dev6 → verifiers-0.1.15.dev8}/verifiers/v1/utils/usage_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: verifiers
3
- Version: 0.1.15.dev6
3
+ Version: 0.1.15.dev8
4
4
  Summary: Verifiers: Environments for LLM Reinforcement Learning
5
5
  Project-URL: Homepage, https://github.com/primeintellect-ai/verifiers
6
6
  Project-URL: Documentation, https://github.com/primeintellect-ai/verifiers
@@ -35,6 +35,7 @@ Requires-Dist: nest-asyncio>=1.6.0
35
35
  Requires-Dist: numpy
36
36
  Requires-Dist: openai-agents>=0.0.7
37
37
  Requires-Dist: openai>=1.108.1
38
+ Requires-Dist: prime-pydantic-config[toml]
38
39
  Requires-Dist: prime-sandboxes>=0.2.25
39
40
  Requires-Dist: prime-tunnel>=0.1.6
40
41
  Requires-Dist: pydantic>=2.11.9
@@ -54,7 +55,7 @@ Requires-Dist: stagehand>=3.0.0; extra == 'browser'
54
55
  Provides-Extra: openenv
55
56
  Requires-Dist: openenv-core>=0.3.0; extra == 'openenv'
56
57
  Provides-Extra: renderers
57
- Requires-Dist: renderers>=0.1.8.dev0; extra == 'renderers'
58
+ Requires-Dist: renderers>=0.1.8.dev4; extra == 'renderers'
58
59
  Provides-Extra: rg
59
60
  Requires-Dist: reasoning-gym; extra == 'rg'
60
61
  Provides-Extra: rl
@@ -218,22 +219,35 @@ custom harnesses, use the v1 Taskset/Harness path:
218
219
  # my_env.py
219
220
  import verifiers as vf
220
221
 
221
- def source():
222
- yield {
223
- "prompt": [{"role": "user", "content": "Reverse abc."}],
224
- "answer": "cba",
225
- "max_turns": 1,
226
- }
227
-
228
222
  @vf.reward(weight=1.0)
229
223
  async def contains_answer(task, state) -> float:
230
224
  return float(task["answer"] in str(state.get("completion") or ""))
231
225
 
232
- def load_taskset(config: vf.TasksetConfig):
233
- return vf.Taskset(source=source, rewards=[contains_answer], config=config)
226
+ class MyTasksetConfig(vf.TasksetConfig):
227
+ split: str = "train"
228
+
229
+
230
+ class MyTaskset(vf.Taskset[MyTasksetConfig]):
231
+ _default_rewards = (contains_answer,)
232
+
233
+ def rows(self) -> list[dict[str, object]]:
234
+ rows = [
235
+ {
236
+ "prompt": [{"role": "user", "content": "Reverse abc."}],
237
+ "answer": "cba",
238
+ "split": "train",
239
+ "max_turns": 1,
240
+ }
241
+ ]
242
+ return [row for row in rows if row["split"] == self.config.split]
243
+
244
+
245
+ class MyEnvConfig(vf.EnvConfig):
246
+ taskset: MyTasksetConfig = MyTasksetConfig()
247
+
234
248
 
235
- def load_environment(config: vf.EnvConfig) -> vf.Env:
236
- return vf.Env(taskset=load_taskset(config=config.taskset))
249
+ def load_environment(config: MyEnvConfig) -> vf.Env:
250
+ return vf.Env(taskset=MyTaskset(config=config.taskset))
237
251
  ```
238
252
  If no harness is passed, `vf.Env` uses the base endpoint-backed harness. See
239
253
  **[BYO Harness](docs/byo-harness.md)** for the advanced v1 taskset/harness API.
@@ -244,8 +258,8 @@ harness with:
244
258
 
245
259
  ```python
246
260
  env = vf.Env(
247
- taskset=vf.HarborTaskset(),
248
- harness=vf.OpenCode(),
261
+ taskset=vf.HarborTaskset(config=vf.HarborTasksetConfig()),
262
+ harness=vf.OpenCode(config=vf.OpenCodeConfig()),
249
263
  )
250
264
  ```
251
265
 
@@ -282,16 +296,6 @@ prime env install my-env
282
296
 
283
297
  For self-managed training launch commands, use the `prime-rl` documentation.
284
298
 
285
- To install the environment module into your project, do:
286
- ```bash
287
- prime env install my-env # installs from ./environments/my_env
288
- ```
289
-
290
- To install an environment from the Environments Hub into your project, do:
291
- ```bash
292
- prime env install primeintellect/math-python
293
- ```
294
-
295
299
  To run a local evaluation with any OpenAI-compatible model, do:
296
300
  ```bash
297
301
  prime eval run my-env -m openai/gpt-5-nano # run and save eval results locally
@@ -300,7 +304,7 @@ Evaluations use [Prime Inference](https://docs.primeintellect.ai/inference/overv
300
304
 
301
305
  View local evaluation results in the terminal UI:
302
306
  ```bash
303
- prime eval tui
307
+ prime eval view
304
308
  ```
305
309
 
306
310
  To publish the environment to the [Environments Hub](https://app.primeintellect.ai/dashboard/environments?ex_sort=most_stars), do:
@@ -143,22 +143,35 @@ custom harnesses, use the v1 Taskset/Harness path:
143
143
  # my_env.py
144
144
  import verifiers as vf
145
145
 
146
- def source():
147
- yield {
148
- "prompt": [{"role": "user", "content": "Reverse abc."}],
149
- "answer": "cba",
150
- "max_turns": 1,
151
- }
152
-
153
146
  @vf.reward(weight=1.0)
154
147
  async def contains_answer(task, state) -> float:
155
148
  return float(task["answer"] in str(state.get("completion") or ""))
156
149
 
157
- def load_taskset(config: vf.TasksetConfig):
158
- return vf.Taskset(source=source, rewards=[contains_answer], config=config)
150
+ class MyTasksetConfig(vf.TasksetConfig):
151
+ split: str = "train"
152
+
153
+
154
+ class MyTaskset(vf.Taskset[MyTasksetConfig]):
155
+ _default_rewards = (contains_answer,)
156
+
157
+ def rows(self) -> list[dict[str, object]]:
158
+ rows = [
159
+ {
160
+ "prompt": [{"role": "user", "content": "Reverse abc."}],
161
+ "answer": "cba",
162
+ "split": "train",
163
+ "max_turns": 1,
164
+ }
165
+ ]
166
+ return [row for row in rows if row["split"] == self.config.split]
167
+
168
+
169
+ class MyEnvConfig(vf.EnvConfig):
170
+ taskset: MyTasksetConfig = MyTasksetConfig()
171
+
159
172
 
160
- def load_environment(config: vf.EnvConfig) -> vf.Env:
161
- return vf.Env(taskset=load_taskset(config=config.taskset))
173
+ def load_environment(config: MyEnvConfig) -> vf.Env:
174
+ return vf.Env(taskset=MyTaskset(config=config.taskset))
162
175
  ```
163
176
  If no harness is passed, `vf.Env` uses the base endpoint-backed harness. See
164
177
  **[BYO Harness](docs/byo-harness.md)** for the advanced v1 taskset/harness API.
@@ -169,8 +182,8 @@ harness with:
169
182
 
170
183
  ```python
171
184
  env = vf.Env(
172
- taskset=vf.HarborTaskset(),
173
- harness=vf.OpenCode(),
185
+ taskset=vf.HarborTaskset(config=vf.HarborTasksetConfig()),
186
+ harness=vf.OpenCode(config=vf.OpenCodeConfig()),
174
187
  )
175
188
  ```
176
189
 
@@ -207,16 +220,6 @@ prime env install my-env
207
220
 
208
221
  For self-managed training launch commands, use the `prime-rl` documentation.
209
222
 
210
- To install the environment module into your project, do:
211
- ```bash
212
- prime env install my-env # installs from ./environments/my_env
213
- ```
214
-
215
- To install an environment from the Environments Hub into your project, do:
216
- ```bash
217
- prime env install primeintellect/math-python
218
- ```
219
-
220
223
  To run a local evaluation with any OpenAI-compatible model, do:
221
224
  ```bash
222
225
  prime eval run my-env -m openai/gpt-5-nano # run and save eval results locally
@@ -225,7 +228,7 @@ Evaluations use [Prime Inference](https://docs.primeintellect.ai/inference/overv
225
228
 
226
229
  View local evaluation results in the terminal UI:
227
230
  ```bash
228
- prime eval tui
231
+ prime eval view
229
232
  ```
230
233
 
231
234
  To publish the environment to the [Environments Hub](https://app.primeintellect.ai/dashboard/environments?ex_sort=most_stars), do:
@@ -53,6 +53,7 @@ dependencies = [
53
53
  "setproctitle>=1.3.0",
54
54
  "regex<2026.4.4",
55
55
  "httpx>=0.27.0",
56
+ "prime-pydantic-config[toml]",
56
57
  ]
57
58
 
58
59
  [dependency-groups]
@@ -72,7 +73,7 @@ dev = [
72
73
  "aiohttp>=3.9.0",
73
74
  "python-dotenv>=1.0.0",
74
75
  "nltk",
75
- "renderers>=0.1.8.dev0",
76
+ "renderers>=0.1.8.dev4",
76
77
  ]
77
78
  policy = [
78
79
  "semgrep>=1.150.0",
@@ -95,7 +96,7 @@ openenv = [
95
96
  "openenv-core>=0.3.0",
96
97
  ]
97
98
  renderers = [
98
- "renderers>=0.1.8.dev0",
99
+ "renderers>=0.1.8.dev4",
99
100
  ]
100
101
  rl = [
101
102
  "torch>=2.8.0,<2.9.0",
@@ -113,6 +114,7 @@ rl = [
113
114
  [tool.uv]
114
115
  preview = true
115
116
  required-version = ">=0.11.1"
117
+ exclude-newer = "7 days"
116
118
  conflicts = [
117
119
  [
118
120
  { extra = "openenv" },
@@ -123,12 +125,12 @@ conflicts = [
123
125
  name = "pypi"
124
126
  url = "https://pypi.org/simple"
125
127
  default = true
126
- exclude-newer = "7 days"
127
128
 
128
129
  [tool.uv.exclude-newer-package]
129
130
  # PrimeIntellect-published on PyPI (trusted publisher)
130
131
  prime-tunnel = false
131
132
  prime-sandboxes = false
133
+ prime-pydantic-config = false
132
134
  renderers = false
133
135
  openenv-core = false
134
136
 
@@ -130,6 +130,9 @@ class _OverlongOpenAIChatClient:
130
130
  def __init__(self, message: str) -> None:
131
131
  self.chat = self._Chat(message)
132
132
 
133
+ async def post(self, *args, **kwargs): # noqa: ANN002, ANN003
134
+ return await self.chat.completions.create(*args, **kwargs)
135
+
133
136
 
134
137
  @pytest.mark.parametrize(
135
138
  "error_message",
@@ -101,18 +101,44 @@ def test_alphabet_sort_v1_validates_parameters():
101
101
  spec.loader.exec_module(module)
102
102
 
103
103
  with pytest.raises(ValueError, match="min_turns must be at least 1"):
104
- module.load_taskset(min_turns=0)
104
+ module.AlphabetSortTaskset(config=module.AlphabetSortTasksetConfig(min_turns=0))
105
105
  with pytest.raises(
106
106
  ValueError, match="min_turns must be less than or equal to max_turns"
107
107
  ):
108
- module.load_taskset(min_turns=3, max_turns=2)
108
+ module.AlphabetSortTaskset(
109
+ config=module.AlphabetSortTasksetConfig(min_turns=3, max_turns=2)
110
+ )
109
111
  with pytest.raises(ValueError, match="min_names_per_turn must be at least 1"):
110
- module.load_taskset(min_names_per_turn=0)
112
+ module.AlphabetSortTaskset(
113
+ config=module.AlphabetSortTasksetConfig(min_names_per_turn=0)
114
+ )
111
115
  with pytest.raises(
112
116
  ValueError,
113
117
  match="min_names_per_turn must be less than or equal to max_names_per_turn",
114
118
  ):
115
- module.load_taskset(min_names_per_turn=3, max_names_per_turn=2)
119
+ module.AlphabetSortTaskset(
120
+ config=module.AlphabetSortTasksetConfig(
121
+ min_names_per_turn=3,
122
+ max_names_per_turn=2,
123
+ )
124
+ )
125
+
126
+
127
+ @pytest.mark.parametrize("env_name", ["alphabet_sort", "math_python"])
128
+ def test_v1_wrapper_rejects_unknown_kwargs(env_name: str):
129
+ module_path = Path("environments") / env_name / f"{env_name}.py"
130
+ spec = importlib.util.spec_from_file_location(
131
+ f"{env_name}_wrapper_test", module_path
132
+ )
133
+ assert spec is not None and spec.loader is not None
134
+ module = importlib.util.module_from_spec(spec)
135
+ sys.modules[spec.name] = module
136
+ spec.loader.exec_module(module)
137
+
138
+ with pytest.raises(
139
+ TypeError, match="Unsupported v1 load_environment kwargs: extra"
140
+ ):
141
+ module.load_environment(v1=True, extra=True)
116
142
 
117
143
 
118
144
  @pytest.mark.slow
@@ -127,8 +153,12 @@ def test_env(env_dir: Path, tmp_path_factory: pytest.TempPathFactory):
127
153
  repo_root = Path(__file__).parent.parent
128
154
  cmd = (
129
155
  f"cd {tmp_venv_dir} && uv venv --clear && source .venv/bin/activate && "
130
- f"uv pip install {repo_root.as_posix()} && "
131
- f"uv pip install {env_dir.absolute().as_posix()}"
156
+ "uv pip install "
157
+ "--exclude-newer-package prime-pydantic-config=2026-05-20T00:00:00Z "
158
+ f"{repo_root.as_posix()} && "
159
+ "uv pip install "
160
+ "--exclude-newer-package prime-pydantic-config=2026-05-20T00:00:00Z "
161
+ f"{env_dir.absolute().as_posix()}"
132
162
  )
133
163
  try:
134
164
  process = subprocess.run(
@@ -13,6 +13,7 @@ import verifiers.scripts.eval as vf_eval
13
13
  import verifiers.utils.eval_utils
14
14
  from verifiers.types import GenerateOutputs
15
15
  from verifiers.utils.eval_utils import load_toml_config
16
+ from verifiers.utils.path_utils import get_eval_results_path
16
17
  from verifiers.utils.save_utils import states_to_outputs
17
18
 
18
19
 
@@ -706,6 +707,34 @@ def test_load_toml_config_multi_env():
706
707
  assert result[1]["env_id"] == "env2"
707
708
 
708
709
 
710
+ def test_load_toml_config_duplicate_envs_accept_names():
711
+ """Duplicate env ids can be labeled and configured independently."""
712
+ with tempfile.NamedTemporaryFile(suffix=".toml", delete=False, mode="w") as f:
713
+ f.write(
714
+ '[[eval]]\nid = "env1"\nname = "env1-short"\n'
715
+ "[eval.args]\n"
716
+ 'split = "short"\n\n'
717
+ '[[eval]]\nid = "env1"\nname = "env1-long"\n'
718
+ "[eval.args]\n"
719
+ 'split = "long"\n'
720
+ )
721
+ f.flush()
722
+ result = load_toml_config(Path(f.name))
723
+
724
+ assert len(result) == 2
725
+ assert [config["env_id"] for config in result] == ["env1", "env1"]
726
+ assert [config["name"] for config in result] == ["env1-short", "env1-long"]
727
+ assert [config["env_args"]["split"] for config in result] == ["short", "long"]
728
+
729
+
730
+ def test_load_toml_config_rejects_global_name():
731
+ with tempfile.NamedTemporaryFile(suffix=".toml", delete=False, mode="w") as f:
732
+ f.write('name = "shared-name"\n\n[[eval]]\nid = "env1"\n')
733
+ f.flush()
734
+ with pytest.raises(ValueError, match="Invalid global field"):
735
+ load_toml_config(Path(f.name))
736
+
737
+
709
738
  def test_load_toml_config_with_env_args():
710
739
  """Multiple sections with env_args field loads correctly."""
711
740
  with tempfile.NamedTemporaryFile(suffix=".toml", delete=False, mode="w") as f:
@@ -720,6 +749,92 @@ def test_load_toml_config_with_env_args():
720
749
  assert result[0]["env_args"]["max_examples"] == 100
721
750
 
722
751
 
752
+ def test_load_toml_config_sampling_section_mirrors_chat_template_kwargs():
753
+ with tempfile.NamedTemporaryFile(suffix=".toml", delete=False, mode="w") as f:
754
+ f.write(
755
+ "[sampling]\n"
756
+ "max_tokens = 1024\n"
757
+ 'reasoning_effort = "medium"\n'
758
+ "enable_thinking = false\n\n"
759
+ "[sampling.extra_body]\n"
760
+ 'custom = "value"\n\n'
761
+ "[sampling.extra_body.chat_template_kwargs]\n"
762
+ "clear_thinking = true\n\n"
763
+ "[[eval]]\n"
764
+ 'env_id = "env1"\n'
765
+ )
766
+ f.flush()
767
+ result = load_toml_config(Path(f.name))
768
+
769
+ assert result[0]["sampling_args"] == {
770
+ "max_tokens": 1024,
771
+ "reasoning_effort": "medium",
772
+ "enable_thinking": False,
773
+ "extra_body": {
774
+ "custom": "value",
775
+ "chat_template_kwargs": {
776
+ "clear_thinking": True,
777
+ "reasoning_effort": "medium",
778
+ "enable_thinking": False,
779
+ },
780
+ },
781
+ }
782
+
783
+
784
+ def test_load_toml_config_sampling_args_mirrors_chat_template_kwargs():
785
+ with tempfile.NamedTemporaryFile(suffix=".toml", delete=False, mode="w") as f:
786
+ f.write(
787
+ "[[eval]]\n"
788
+ 'env_id = "env1"\n'
789
+ 'sampling_args = { max_tokens = 256, reasoning_effort = "high", enable_thinking = true }\n'
790
+ )
791
+ f.flush()
792
+ result = load_toml_config(Path(f.name))
793
+
794
+ assert result[0]["sampling_args"] == {
795
+ "max_tokens": 256,
796
+ "reasoning_effort": "high",
797
+ "enable_thinking": True,
798
+ "extra_body": {
799
+ "chat_template_kwargs": {
800
+ "reasoning_effort": "high",
801
+ "enable_thinking": True,
802
+ }
803
+ },
804
+ }
805
+
806
+
807
+ def test_cli_toml_eval_sampling_section_pipes_thinking_args(monkeypatch, run_cli):
808
+ with tempfile.NamedTemporaryFile(suffix=".toml", delete=False, mode="w") as f:
809
+ f.write(
810
+ "[[eval]]\n"
811
+ 'env_id = "env1"\n\n'
812
+ "[eval.sampling]\n"
813
+ "max_tokens = 512\n"
814
+ 'reasoning_effort = "low"\n'
815
+ "enable_thinking = true\n"
816
+ )
817
+ f.flush()
818
+ captured = run_cli(
819
+ monkeypatch,
820
+ {
821
+ "env_id_or_config": f.name,
822
+ },
823
+ )
824
+
825
+ assert captured["sampling_args"] == {
826
+ "max_tokens": 512,
827
+ "reasoning_effort": "low",
828
+ "enable_thinking": True,
829
+ "extra_body": {
830
+ "chat_template_kwargs": {
831
+ "reasoning_effort": "low",
832
+ "enable_thinking": True,
833
+ }
834
+ },
835
+ }
836
+
837
+
723
838
  def test_load_toml_config_with_args_taskset_harness():
724
839
  """args/taskset/harness sections normalize into load_environment kwargs."""
725
840
  with tempfile.NamedTemporaryFile(suffix=".toml", delete=False, mode="w") as f:
@@ -728,8 +843,10 @@ def test_load_toml_config_with_args_taskset_harness():
728
843
  "[eval.args]\n"
729
844
  'split = "train"\n\n'
730
845
  "[eval.taskset]\n"
846
+ 'id = "user/taskset-package"\n'
731
847
  "num_examples = 10\n\n"
732
848
  "[eval.harness]\n"
849
+ 'id = "user/harness-package"\n'
733
850
  "max_turns = 5\n"
734
851
  )
735
852
  f.flush()
@@ -740,8 +857,8 @@ def test_load_toml_config_with_args_taskset_harness():
740
857
  assert result[0]["env_args"] == {
741
858
  "split": "train",
742
859
  "config": {
743
- "taskset": {"num_examples": 10},
744
- "harness": {"max_turns": 5},
860
+ "taskset": {"id": "user/taskset-package", "num_examples": 10},
861
+ "harness": {"id": "user/harness-package", "max_turns": 5},
745
862
  },
746
863
  }
747
864
  assert "args" not in result[0]
@@ -815,6 +932,28 @@ def test_cli_multi_env_via_toml_config(monkeypatch, run_cli):
815
932
  assert configs[1].env_id == "env2"
816
933
 
817
934
 
935
+ def test_cli_duplicate_env_names_disambiguate_result_paths(monkeypatch, run_cli):
936
+ with tempfile.NamedTemporaryFile(suffix=".toml", delete=False, mode="w") as f:
937
+ f.write(
938
+ '[[eval]]\nid = "env1"\nname = "env1-short"\n'
939
+ "[eval.args]\n"
940
+ 'split = "short"\n\n'
941
+ '[[eval]]\nid = "env1"\nname = "env1-long"\n'
942
+ "[eval.args]\n"
943
+ 'split = "long"\n'
944
+ )
945
+ f.flush()
946
+ captured = run_cli(monkeypatch, {"env_id_or_config": f.name})
947
+
948
+ configs = captured["configs"]
949
+ assert len(configs) == 2
950
+ assert [config.env_id for config in configs] == ["env1", "env1"]
951
+ assert [config.name for config in configs] == ["env1-short", "env1-long"]
952
+ assert [config.env_args["split"] for config in configs] == ["short", "long"]
953
+ assert get_eval_results_path(configs[0]).parent.name.startswith("env1-short--")
954
+ assert get_eval_results_path(configs[1]).parent.name.startswith("env1-long--")
955
+
956
+
818
957
  def test_cli_toml_ignores_cli_args(monkeypatch, run_cli):
819
958
  """TOML config ignores CLI args, uses defaults for unspecified values."""
820
959
  with tempfile.NamedTemporaryFile(suffix=".toml", delete=False, mode="w") as f:
@@ -838,6 +977,16 @@ def test_cli_toml_ignores_cli_args(monkeypatch, run_cli):
838
977
  assert config.rollouts_per_example == 3 # DEFAULT_ROLLOUTS_PER_EXAMPLE
839
978
  assert config.max_concurrent == 32 # default
840
979
  assert config.sampling_args["max_tokens"] is None # default
980
+ assert config.save_results is True
981
+
982
+
983
+ def test_cli_toml_respects_save_results_false(monkeypatch, run_cli):
984
+ with tempfile.NamedTemporaryFile(suffix=".toml", delete=False, mode="w") as f:
985
+ f.write('[[eval]]\nenv_id = "env1"\nsave_results = false\n')
986
+ f.flush()
987
+ captured = run_cli(monkeypatch, {"env_id_or_config": f.name})
988
+
989
+ assert captured["configs"][0].save_results is False
841
990
 
842
991
 
843
992
  def test_cli_toml_per_env_num_examples(monkeypatch, run_cli):
@@ -1212,6 +1361,44 @@ def test_ablation_global_defaults_apply():
1212
1361
  assert all(c["num_examples"] == 100 for c in configs)
1213
1362
 
1214
1363
 
1364
+ def test_ablation_sampling_sweep_merges_with_global_sampling_defaults():
1365
+ with tempfile.NamedTemporaryFile(suffix=".toml", delete=False, mode="w") as f:
1366
+ f.write(
1367
+ "[sampling]\n"
1368
+ "max_tokens = 1024\n"
1369
+ 'reasoning_effort = "medium"\n\n'
1370
+ '[[ablation]]\nenv_id = "my-env"\n\n'
1371
+ "[ablation.sweep]\n"
1372
+ "sampling = [{ temperature = 0.0 }, { temperature = 1.0, enable_thinking = false }]\n"
1373
+ )
1374
+ f.flush()
1375
+ configs = load_toml_config(Path(f.name))
1376
+
1377
+ assert len(configs) == 2
1378
+ assert configs[0]["sampling_args"] == {
1379
+ "max_tokens": 1024,
1380
+ "reasoning_effort": "medium",
1381
+ "temperature": 0.0,
1382
+ "extra_body": {
1383
+ "chat_template_kwargs": {
1384
+ "reasoning_effort": "medium",
1385
+ }
1386
+ },
1387
+ }
1388
+ assert configs[1]["sampling_args"] == {
1389
+ "max_tokens": 1024,
1390
+ "reasoning_effort": "medium",
1391
+ "temperature": 1.0,
1392
+ "enable_thinking": False,
1393
+ "extra_body": {
1394
+ "chat_template_kwargs": {
1395
+ "reasoning_effort": "medium",
1396
+ "enable_thinking": False,
1397
+ }
1398
+ },
1399
+ }
1400
+
1401
+
1215
1402
  def test_ablation_endpoint_id_override_removes_global_model():
1216
1403
  with tempfile.NamedTemporaryFile(suffix=".toml", delete=False, mode="w") as f:
1217
1404
  f.write(
@@ -11,9 +11,11 @@ def make_config(
11
11
  independent_scoring: bool = False,
12
12
  endpoint_id: str | None = None,
13
13
  client_config: ClientConfig | None = None,
14
+ name: str | None = None,
14
15
  ) -> EvalConfig:
15
16
  return EvalConfig(
16
17
  env_id="dummy-env",
18
+ name=name,
17
19
  env_args={},
18
20
  env_dir_path="./environments",
19
21
  endpoint_id=endpoint_id,
@@ -82,6 +84,20 @@ def test_format_client_target_uses_single_resolved_base_url() -> None:
82
84
  assert EvalDisplay._format_client_target(config) == "http://localhost:8001/v1"
83
85
 
84
86
 
87
+ def test_display_uses_eval_name_for_duplicate_env_labels() -> None:
88
+ display = EvalDisplay(
89
+ [
90
+ make_config(max_concurrent=1, name="dummy-env-short"),
91
+ make_config(max_concurrent=1, name="dummy-env-long"),
92
+ ]
93
+ )
94
+
95
+ rendered = render_plain(display._make_compact_env_row(0))
96
+
97
+ assert "dummy-env-short" in rendered
98
+ assert "dummy-env-long" not in rendered
99
+
100
+
85
101
  def render_plain(renderable) -> str:
86
102
  console = Console(width=100, record=True)
87
103
  console.print(renderable)
@@ -87,6 +87,22 @@ def test_print_results_single_rollout(capsys, make_metadata, make_state, make_in
87
87
  assert "r1: [0.1, 0.2, 0.3]" in captured.out
88
88
 
89
89
 
90
+ def test_print_results_includes_eval_name(capsys, make_metadata, make_output):
91
+ from verifiers.utils.eval_utils import print_results
92
+
93
+ metadata = make_metadata(env_id="env1")
94
+ metadata["name"] = "env1-short"
95
+ results = GenerateOutputs(
96
+ outputs=[make_output(example_id=0, reward=1.0)],
97
+ metadata=metadata,
98
+ )
99
+
100
+ print_results(results)
101
+ captured = capsys.readouterr()
102
+
103
+ assert "Environment: env1-short (env1)" in captured.out
104
+
105
+
90
106
  def test_print_results_three_rollouts(capsys, make_metadata, make_state, make_input):
91
107
  """Test print_results with three rollouts per example."""
92
108
  from verifiers.utils.eval_utils import print_results
@@ -1,5 +1,6 @@
1
1
  import importlib.util
2
2
  import inspect
3
+ import sys
3
4
  from pathlib import Path
4
5
  from typing import Any
5
6
 
@@ -19,6 +20,7 @@ def _load_mcp_search_module() -> Any:
19
20
  assert spec.loader is not None
20
21
 
21
22
  module = importlib.util.module_from_spec(spec)
23
+ sys.modules[spec.name] = module
22
24
  spec.loader.exec_module(module)
23
25
  return module
24
26
 
@@ -39,10 +41,20 @@ def test_mcp_search_env_is_v1_only() -> None:
39
41
  assert env.taskset.config.max_turns == 4
40
42
 
41
43
 
44
+ def test_mcp_search_env_preserves_harness_config() -> None:
45
+ module = _load_mcp_search_module()
46
+
47
+ env = module.load_environment(
48
+ config=module.MCPSearchEnvConfig(harness={"max_turns": 7})
49
+ )
50
+
51
+ assert env.harness.config.max_turns == 7
52
+
53
+
42
54
  def test_mcp_search_default_taskset_has_stable_non_doc_fixture() -> None:
43
55
  module = _load_mcp_search_module()
44
56
 
45
- rows = module.load_taskset(config=module.MCPSearchTasksetConfig()).rows()
57
+ rows = module.MCPSearchTaskset(config=module.MCPSearchTasksetConfig()).rows()
46
58
 
47
59
  assert len(rows) >= 10
48
60
  assert len({row["answer"] for row in rows}) == len(rows)