verifiers 0.1.15.dev16__tar.gz → 0.1.15.dev17__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (313) hide show
  1. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/PKG-INFO +1 -1
  2. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_gepa_cli.py +8 -0
  3. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_save_utils.py +23 -0
  4. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_v1_config_extension.py +21 -4
  5. verifiers-0.1.15.dev17/tests/test_v1_taskset_utils.py +46 -0
  6. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/__init__.py +1 -1
  7. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/utils/metric_utils.py +3 -1
  8. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/utils/save_utils.py +13 -2
  9. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/__init__.py +1 -2
  10. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/harness.py +3 -6
  11. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/taskset.py +3 -6
  12. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/types.py +0 -1
  13. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/utils/prompt_utils.py +13 -8
  14. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/utils/taskset_utils.py +8 -9
  15. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/.gitignore +0 -0
  16. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/LICENSE +0 -0
  17. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/README.md +0 -0
  18. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/pyproject.toml +0 -0
  19. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/AGENTS.md +0 -0
  20. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/README.md +0 -0
  21. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/__init__.py +0 -0
  22. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/conftest.py +0 -0
  23. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_browser_env.py +0 -0
  24. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_build_script.py +0 -0
  25. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_cli_agent_env.py +0 -0
  26. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_client_auth_errors.py +0 -0
  27. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_client_config.py +0 -0
  28. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_client_multimodal_types.py +0 -0
  29. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_composable_env.py +0 -0
  30. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_context_token_metrics.py +0 -0
  31. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_decorator_ranks.py +0 -0
  32. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_endpoint_registry.py +0 -0
  33. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_env_group.py +0 -0
  34. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_env_server.py +0 -0
  35. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_environment.py +0 -0
  36. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_environment_extra.py +0 -0
  37. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_envs.py +0 -0
  38. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_error_chain.py +0 -0
  39. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_eval_cli.py +0 -0
  40. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_eval_display.py +0 -0
  41. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_eval_utils.py +0 -0
  42. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_gepa_utils.py +0 -0
  43. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_gym_env.py +0 -0
  44. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_harbor_env_mcp.py +0 -0
  45. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_imports.py +0 -0
  46. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_init_script.py +0 -0
  47. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_install_utils.py +0 -0
  48. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_interception_utils.py +0 -0
  49. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_langchain_deep_agents_wikispeedia.py +0 -0
  50. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_lean_task.py +0 -0
  51. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_logging.py +0 -0
  52. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_math_rubric.py +0 -0
  53. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_maybe_think_parser.py +0 -0
  54. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_mcp_search_env.py +0 -0
  55. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_message_utils.py +0 -0
  56. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_message_utils_multimodal.py +0 -0
  57. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_multiturn_env.py +0 -0
  58. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_nemorl_client.py +0 -0
  59. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_openai_chat_completions_token_client.py +0 -0
  60. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_openai_responses_client.py +0 -0
  61. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_opencode_harbor.py +0 -0
  62. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_opencode_rlm_env.py +0 -0
  63. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_openenv_client.py +0 -0
  64. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_parser.py +0 -0
  65. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_path_utils.py +0 -0
  66. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_per_turn_timing.py +0 -0
  67. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_pricing_utils.py +0 -0
  68. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_prime_plugin.py +0 -0
  69. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_renderer_client.py +0 -0
  70. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_renderer_e2e.py +0 -0
  71. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_rlm_composable_env.py +0 -0
  72. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_rlm_env.py +0 -0
  73. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_rubric.py +0 -0
  74. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_rubric_group.py +0 -0
  75. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_sandbox_env.py +0 -0
  76. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_sandbox_mixin.py +0 -0
  77. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_setup_script.py +0 -0
  78. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_singleturn_env.py +0 -0
  79. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_stateful_tool_env.py +0 -0
  80. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_think_parser.py +0 -0
  81. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_tool_env.py +0 -0
  82. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_tool_utils.py +0 -0
  83. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_trajectory_processing.py +0 -0
  84. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_tui_info_formatting.py +0 -0
  85. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_types.py +0 -0
  86. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_v1_bfcl.py +0 -0
  87. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_v1_empty_completions.py +0 -0
  88. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_v1_endpoint_protocols.py +0 -0
  89. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_v1_example_counts.py +0 -0
  90. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_v1_group_reward_env.py +0 -0
  91. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_v1_harbor_cli.py +0 -0
  92. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_v1_mini_swe_agent.py +0 -0
  93. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_v1_nemo_gym_harness.py +0 -0
  94. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_v1_openenv_taskset.py +0 -0
  95. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_v1_openreward_taskset.py +0 -0
  96. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_v1_rlm_swe.py +0 -0
  97. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_v1_runtime_lifecycle.py +0 -0
  98. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_v1_scoring_functions.py +0 -0
  99. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_v1_taskset_bindings.py +0 -0
  100. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_v1_textarena_taskset.py +0 -0
  101. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_wiki_search_v1.py +0 -0
  102. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_wordle_env.py +0 -0
  103. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_wordle_v1_env.py +0 -0
  104. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_xml_parser.py +0 -0
  105. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/AGENTS.md +0 -0
  106. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/cli/__init__.py +0 -0
  107. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/cli/commands/__init__.py +0 -0
  108. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/cli/commands/build.py +0 -0
  109. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/cli/commands/eval.py +0 -0
  110. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/cli/commands/gepa.py +0 -0
  111. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/cli/commands/init.py +0 -0
  112. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/cli/commands/install.py +0 -0
  113. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/cli/commands/setup.py +0 -0
  114. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/cli/plugins/__init__.py +0 -0
  115. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/cli/plugins/prime.py +0 -0
  116. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/cli/tui.py +0 -0
  117. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/clients/__init__.py +0 -0
  118. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/clients/anthropic_messages_client.py +0 -0
  119. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/clients/client.py +0 -0
  120. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/clients/nemorl_chat_completions_client.py +0 -0
  121. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/clients/openai_chat_completions_client.py +0 -0
  122. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/clients/openai_chat_completions_token_client.py +0 -0
  123. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/clients/openai_completions_client.py +0 -0
  124. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/clients/openai_responses_client.py +0 -0
  125. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/clients/renderer_client.py +0 -0
  126. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/decorators.py +0 -0
  127. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/AGENTS.md +0 -0
  128. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/__init__.py +0 -0
  129. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/env_group.py +0 -0
  130. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/environment.py +0 -0
  131. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/README.md +0 -0
  132. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/__init__.py +0 -0
  133. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/cli_agent_env.py +0 -0
  134. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/README.md +0 -0
  135. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/__init__.py +0 -0
  136. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/_filter.py +0 -0
  137. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/composable_env.py +0 -0
  138. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/harness.py +0 -0
  139. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/harnesses/__init__.py +0 -0
  140. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/harnesses/mini_swe_agent.py +0 -0
  141. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/harnesses/opencode.py +0 -0
  142. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/harnesses/prompt.txt +0 -0
  143. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/harnesses/rlm.py +0 -0
  144. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/swe_debug_env.py +0 -0
  145. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/task.py +0 -0
  146. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/__init__.py +0 -0
  147. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/cp/__init__.py +0 -0
  148. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/cp/cp_task.py +0 -0
  149. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/cp/test_utils.py +0 -0
  150. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/harbor/__init__.py +0 -0
  151. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/harbor/harbor.py +0 -0
  152. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/lean/__init__.py +0 -0
  153. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/lean/lean_task.py +0 -0
  154. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/math/__init__.py +0 -0
  155. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/math/math_task.py +0 -0
  156. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/swe/__init__.py +0 -0
  157. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/swe/_test_patch.py +0 -0
  158. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/swe/create_fix_patch.sh +0 -0
  159. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/swe/log_parser.py +0 -0
  160. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/swe/multi_swe.py +0 -0
  161. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/swe/openswe.py +0 -0
  162. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/swe/r2e_gym.py +0 -0
  163. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/swe/swe_bench.py +0 -0
  164. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/swe/swe_lego.py +0 -0
  165. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/swe/swe_rebench_v2.py +0 -0
  166. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/swe/swe_rebench_v2_log_parsers.py +0 -0
  167. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/swe/swe_smith.py +0 -0
  168. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/swe/swe_tasksets.py +0 -0
  169. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/gym_env.py +0 -0
  170. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/harbor_env/__init__.py +0 -0
  171. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/harbor_env/env.py +0 -0
  172. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/harbor_env/mcp.py +0 -0
  173. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/mcp_env.py +0 -0
  174. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/opencode_env.py +0 -0
  175. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/opencode_qa_env.py +0 -0
  176. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/opencode_rlm_env.py +0 -0
  177. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/rlm_env.py +0 -0
  178. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/sandbox_mixin.py +0 -0
  179. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/utils/__init__.py +0 -0
  180. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/utils/file_locks.py +0 -0
  181. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/utils/git_checkout_cache.py +0 -0
  182. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/integrations/README.md +0 -0
  183. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/integrations/__init__.py +0 -0
  184. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/integrations/browser_env/README.md +0 -0
  185. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/integrations/browser_env/__init__.py +0 -0
  186. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/integrations/browser_env/browser_env.py +0 -0
  187. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/integrations/browser_env/modes/__init__.py +0 -0
  188. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/integrations/browser_env/modes/base.py +0 -0
  189. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/integrations/browser_env/modes/cua_mode.py +0 -0
  190. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/integrations/browser_env/modes/dom_mode.py +0 -0
  191. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/integrations/openenv_env.py +0 -0
  192. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/integrations/reasoninggym_env.py +0 -0
  193. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/integrations/textarena_env.py +0 -0
  194. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/multiturn_env.py +0 -0
  195. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/python_env.py +0 -0
  196. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/sandbox_env.py +0 -0
  197. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/singleturn_env.py +0 -0
  198. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/stateful_tool_env.py +0 -0
  199. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/tool_env.py +0 -0
  200. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/errors.py +0 -0
  201. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/gepa/__init__.py +0 -0
  202. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/gepa/adapter.py +0 -0
  203. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/gepa/config.py +0 -0
  204. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/gepa/display.py +0 -0
  205. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/gepa/gepa_utils.py +0 -0
  206. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/parsers/__init__.py +0 -0
  207. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/parsers/maybe_think_parser.py +0 -0
  208. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/parsers/parser.py +0 -0
  209. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/parsers/think_parser.py +0 -0
  210. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/parsers/xml_parser.py +0 -0
  211. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/rl/README.md +0 -0
  212. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/rl/__init__.py +0 -0
  213. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/rl/inference/__init__.py +0 -0
  214. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/rl/inference/client.py +0 -0
  215. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/rl/inference/server.py +0 -0
  216. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/rl/trainer/__init__.py +0 -0
  217. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/rl/trainer/config.py +0 -0
  218. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/rl/trainer/orchestrator.py +0 -0
  219. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/rl/trainer/trainer.py +0 -0
  220. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/rl/trainer/utils.py +0 -0
  221. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/rubrics/__init__.py +0 -0
  222. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/rubrics/experimental/hybrid_math_rubric.py +0 -0
  223. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/rubrics/judge_rubric.py +0 -0
  224. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/rubrics/math_rubric.py +0 -0
  225. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/rubrics/rubric.py +0 -0
  226. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/rubrics/rubric_group.py +0 -0
  227. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/scripts/__init__.py +0 -0
  228. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/scripts/build.py +0 -0
  229. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/scripts/eval.py +0 -0
  230. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/scripts/gepa.py +0 -0
  231. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/scripts/init.py +0 -0
  232. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/scripts/install.py +0 -0
  233. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/scripts/rl.py +0 -0
  234. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/scripts/setup.py +0 -0
  235. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/scripts/train.py +0 -0
  236. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/scripts/tui.py +0 -0
  237. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/scripts/vllm.py +0 -0
  238. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/serve/__init__.py +0 -0
  239. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/serve/client/env_client.py +0 -0
  240. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/serve/client/zmq_env_client.py +0 -0
  241. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/serve/server/__init__.py +0 -0
  242. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/serve/server/env_router.py +0 -0
  243. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/serve/server/env_server.py +0 -0
  244. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/serve/server/env_worker.py +0 -0
  245. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/serve/server/zmq_env_server.py +0 -0
  246. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/serve/types.py +0 -0
  247. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/types.py +0 -0
  248. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/utils/__init__.py +0 -0
  249. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/utils/async_utils.py +0 -0
  250. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/utils/client_utils.py +0 -0
  251. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/utils/config_utils.py +0 -0
  252. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/utils/data_utils.py +0 -0
  253. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/utils/display_utils.py +0 -0
  254. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/utils/env_config_utils.py +0 -0
  255. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/utils/env_utils.py +0 -0
  256. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/utils/error_utils.py +0 -0
  257. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/utils/eval_display.py +0 -0
  258. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/utils/eval_utils.py +0 -0
  259. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/utils/heartbeat.py +0 -0
  260. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/utils/import_utils.py +0 -0
  261. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/utils/install_utils.py +0 -0
  262. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/utils/interception_utils.py +0 -0
  263. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/utils/logging_utils.py +0 -0
  264. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/utils/message_utils.py +0 -0
  265. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/utils/path_utils.py +0 -0
  266. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/utils/pricing_utils.py +0 -0
  267. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/utils/process_utils.py +0 -0
  268. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/utils/response_utils.py +0 -0
  269. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/utils/serve_utils.py +0 -0
  270. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/utils/thread_utils.py +0 -0
  271. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/utils/threaded_sandbox_client.py +0 -0
  272. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/utils/tool_utils.py +0 -0
  273. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/utils/usage_utils.py +0 -0
  274. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/utils/version_utils.py +0 -0
  275. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/ENVIRONMENT_BEST_PRACTICES.md +0 -0
  276. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/README.md +0 -0
  277. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/RE_MIGRATION.md +0 -0
  278. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/artifact.py +0 -0
  279. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/config.py +0 -0
  280. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/env.py +0 -0
  281. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/model.py +0 -0
  282. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/program.py +0 -0
  283. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/runtime.py +0 -0
  284. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/runtime_handles.py +0 -0
  285. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/sandbox.py +0 -0
  286. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/state.py +0 -0
  287. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/task.py +0 -0
  288. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/toolset.py +0 -0
  289. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/user.py +0 -0
  290. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/utils/__init__.py +0 -0
  291. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/utils/binding_utils.py +0 -0
  292. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/utils/config_callable_utils.py +0 -0
  293. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/utils/config_utils.py +0 -0
  294. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/utils/endpoint_utils.py +0 -0
  295. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/utils/json_utils.py +0 -0
  296. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/utils/judge_utils.py +0 -0
  297. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/utils/lifecycle_utils.py +0 -0
  298. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/utils/mcp_proxy_utils.py +0 -0
  299. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/utils/mcp_utils.py +0 -0
  300. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/utils/object_utils.py +0 -0
  301. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/utils/program_utils.py +0 -0
  302. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/utils/runtime_owner_utils.py +0 -0
  303. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/utils/runtime_registry.py +0 -0
  304. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/utils/sandbox_program_utils.py +0 -0
  305. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/utils/sandbox_python_utils.py +0 -0
  306. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/utils/sandbox_utils.py +0 -0
  307. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/utils/scoring_utils.py +0 -0
  308. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/utils/serialization_utils.py +0 -0
  309. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/utils/task_freeze_utils.py +0 -0
  310. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/utils/tool_utils.py +0 -0
  311. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/utils/toolset_utils.py +0 -0
  312. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/utils/trajectory_utils.py +0 -0
  313. {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/utils/usage_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: verifiers
3
- Version: 0.1.15.dev16
3
+ Version: 0.1.15.dev17
4
4
  Summary: Verifiers: Environments for LLM Reinforcement Learning
5
5
  Project-URL: Homepage, https://github.com/primeintellect-ai/verifiers
6
6
  Project-URL: Documentation, https://github.com/primeintellect-ai/verifiers
@@ -189,6 +189,14 @@ def test_load_gepa_toml_config_requires_env_table(tmp_path: Path):
189
189
  load_gepa_toml_config(config_path)
190
190
 
191
191
 
192
+ def test_repo_gepa_example_configs_are_valid():
193
+ config_paths = sorted(Path("configs/gepa").glob("*.toml"))
194
+ assert config_paths
195
+ for config_path in config_paths:
196
+ loaded = load_gepa_toml_config(config_path)
197
+ assert loaded["envs"], f"{config_path} should contain at least one [[env]]"
198
+
199
+
192
200
  def test_resolve_gepa_config_args_supports_plain_env_id():
193
201
  args = argparse.Namespace(env_id_or_config="primeintellect/wordle")
194
202
 
@@ -258,6 +258,13 @@ class TestSavingResults:
258
258
  assert result[0].get("foo") == "bar" # custom field from make_state fixture
259
259
  assert result[0]["reward"] == 1.0
260
260
 
261
+ def test_states_to_outputs_requires_example_id(self, make_state):
262
+ state = make_state()
263
+ del state["example_id"]
264
+
265
+ with pytest.raises(KeyError):
266
+ states_to_outputs([state], state_columns=[])
267
+
261
268
  def test_states_to_outputs_completion_keeps_messages(self, make_state):
262
269
  states = [
263
270
  make_state(
@@ -647,6 +654,22 @@ class TestBuilderPassAtK:
647
654
  # 1 of 4 correct at threshold=0.7: pass^1 = C(1,1)/C(4,1) = 0.25
648
655
  assert metadata["pass_all_k"]["1"] == pytest.approx(0.25)
649
656
 
657
+ def test_builder_requires_example_id(self):
658
+ builder = GenerateOutputsBuilder(
659
+ env_id="test-env",
660
+ env_args={},
661
+ model="test-model",
662
+ client=ClientConfig(api_base_url="http://localhost:8000/v1"),
663
+ num_examples=1,
664
+ rollouts_per_example=1,
665
+ state_columns=[],
666
+ sampling_args={},
667
+ results_path=Path("/tmp/test-results"),
668
+ )
669
+
670
+ with pytest.raises(KeyError):
671
+ builder.add_outputs([{"reward": 1.0, "metrics": {}}])
672
+
650
673
 
651
674
  class TestMetricProtocol:
652
675
  def test_all_metrics_satisfy_protocol(self):
@@ -2310,16 +2310,14 @@ def test_taskset_subclasses_inherit_registered_config_type() -> None:
2310
2310
 
2311
2311
  def test_taskset_class_loader_owns_split_loading() -> None:
2312
2312
  class LoaderTasksetConfig(TasksetConfig):
2313
- system_prompt: vf.SystemPrompt | None = "class prompt"
2313
+ system_prompt: vf.SystemPrompt = "class prompt"
2314
2314
 
2315
2315
  class LoaderTaskset(Taskset[LoaderTasksetConfig]):
2316
2316
  def load_tasks(self, split: vf.TaskSplit = "train") -> vf.Tasks:
2317
2317
  answer = "class eval" if split == "eval" else "class tasks"
2318
2318
  return [{"prompt": [], "answer": answer}]
2319
2319
 
2320
- def load_system_prompt(
2321
- self, config: LoaderTasksetConfig
2322
- ) -> vf.SystemPrompt | None:
2320
+ def load_system_prompt(self, config: LoaderTasksetConfig) -> vf.SystemPrompt:
2323
2321
  return config.system_prompt
2324
2322
 
2325
2323
  defaulted = LoaderTaskset(config=LoaderTasksetConfig())
@@ -2341,6 +2339,25 @@ def test_taskset_class_loader_owns_split_loading() -> None:
2341
2339
  assert disabled_prompt.system_prompt == []
2342
2340
 
2343
2341
 
2342
+ def test_system_prompt_alias_accepts_config_data(tmp_path) -> None:
2343
+ prompt_path = tmp_path / "system_prompt.txt"
2344
+ prompt_path.write_text("alias path system prompt", encoding="utf-8")
2345
+
2346
+ class PromptTasksetConfig(TasksetConfig):
2347
+ system_prompt: vf.SystemPrompt = None
2348
+
2349
+ config = PromptTasksetConfig.model_validate(
2350
+ {"system_prompt": {"path": str(prompt_path)}}
2351
+ )
2352
+ assert isinstance(config.system_prompt, vf.SystemPromptConfig)
2353
+
2354
+ taskset = Taskset(config=config)
2355
+
2356
+ assert taskset.system_prompt == [
2357
+ {"role": "system", "content": "alias path system prompt"}
2358
+ ]
2359
+
2360
+
2344
2361
  def test_taskset_load_tasks_can_return_empty_dataset() -> None:
2345
2362
  class LocalTasksetConfig(TasksetConfig):
2346
2363
  enabled: bool = True
@@ -0,0 +1,46 @@
1
+ import json
2
+
3
+ from datasets import Dataset
4
+
5
+ from verifiers.v1.utils.taskset_utils import dataset_from_result
6
+
7
+
8
+ def task_payload(row: dict) -> dict:
9
+ return json.loads(row["info"]["task"])
10
+
11
+
12
+ def test_dataset_from_result_assigns_example_id_to_iterable_records():
13
+ dataset = dataset_from_result(
14
+ [
15
+ {"question": "Reverse abc.", "answer": "cba"},
16
+ {"question": "Reverse xyz.", "answer": "zyx"},
17
+ ],
18
+ "ReverseTextTaskset",
19
+ )
20
+
21
+ rows = list(dataset)
22
+ payloads = [task_payload(row) for row in rows]
23
+
24
+ assert [row["example_id"] for row in rows] == [0, 1]
25
+ assert [payload["example_id"] for payload in payloads] == [0, 1]
26
+ assert all(len(payload["task_id"]) == 32 for payload in payloads)
27
+ assert {payload["task_id"] for payload in payloads}.isdisjoint({"0", "1"})
28
+
29
+
30
+ def test_dataset_from_result_overwrites_existing_example_id_column():
31
+ raw_dataset = Dataset.from_list(
32
+ [
33
+ {"question": "Reverse abc.", "answer": "cba", "example_id": None},
34
+ {"question": "Reverse xyz.", "answer": "zyx", "example_id": 99},
35
+ ]
36
+ )
37
+
38
+ dataset = dataset_from_result(raw_dataset, "ReverseTextTaskset")
39
+
40
+ rows = list(dataset)
41
+ payloads = [task_payload(row) for row in rows]
42
+
43
+ assert [row["example_id"] for row in rows] == [0, 1]
44
+ assert [payload["example_id"] for payload in payloads] == [0, 1]
45
+ assert all(len(payload["task_id"]) == 32 for payload in payloads)
46
+ assert {payload["task_id"] for payload in payloads}.isdisjoint({"0", "1", "99"})
@@ -1,4 +1,4 @@
1
- __version__ = "0.1.15.dev16"
1
+ __version__ = "0.1.15.dev17"
2
2
 
3
3
  import importlib
4
4
  import os
@@ -162,10 +162,12 @@ class PassAtKMetric:
162
162
  self.reset()
163
163
 
164
164
  def add_output(self, output: RolloutOutput) -> None:
165
+ example_id = output["example_id"]
166
+ if example_id is None:
167
+ raise ValueError("output['example_id'] is required.")
165
168
  if not self._k_values:
166
169
  return
167
170
 
168
- example_id = output.get("example_id", 0)
169
171
  self._example_counts[example_id] += 1
170
172
  if output.get("reward", 0.0) >= self.threshold:
171
173
  self._example_correct[example_id] += 1
@@ -218,8 +218,12 @@ def state_to_output(
218
218
  else:
219
219
  raise TypeError("state['timing'] must be a RolloutTiming or mapping.")
220
220
 
221
+ example_id = state["example_id"]
222
+ if example_id is None:
223
+ raise ValueError("state['example_id'] is required.")
224
+
221
225
  output = RolloutOutput(
222
- example_id=state.get("example_id", 0),
226
+ example_id=example_id,
223
227
  prompt=state.get("prompt"),
224
228
  completion=state.get("completion"),
225
229
  answer=state.get("answer", ""),
@@ -671,9 +675,16 @@ class GenerateOutputsBuilder:
671
675
  def build_outputs(self, sort_by_example_id: bool = False) -> list[RolloutOutput]:
672
676
  """Return (sorted) accumulated outputs"""
673
677
  if sort_by_example_id:
674
- return sorted(self.outputs, key=lambda o: o.get("example_id", 0))
678
+ return sorted(self.outputs, key=self.output_example_id)
675
679
  return self.outputs
676
680
 
681
+ @staticmethod
682
+ def output_example_id(output: RolloutOutput) -> int:
683
+ example_id = output["example_id"]
684
+ if example_id is None:
685
+ raise ValueError("output['example_id'] is required.")
686
+ return example_id
687
+
677
688
  def build(self, sort_by_example_id: bool = False) -> GenerateOutputs:
678
689
  """Build GenerateOutputs from accumulated outputs."""
679
690
  return GenerateOutputs(
@@ -59,14 +59,13 @@ from .toolset import (
59
59
  )
60
60
  from .utils.endpoint_utils import Endpoint
61
61
  from .utils.binding_utils import BindingsConfig, ObjectsConfig
62
- from .utils.prompt_utils import SystemPromptConfig, SystemPromptStrategy
62
+ from .utils.prompt_utils import SystemPrompt, SystemPromptConfig, SystemPromptStrategy
63
63
  from .types import (
64
64
  ConfigData,
65
65
  Handler,
66
66
  JsonData,
67
67
  Objects,
68
68
  PromptInput,
69
- SystemPrompt,
70
69
  TaskSplit,
71
70
  Tasks,
72
71
  )
@@ -72,8 +72,8 @@ from .utils.sandbox_program_utils import (
72
72
  run_sandbox_python_program,
73
73
  )
74
74
  from .utils.prompt_utils import (
75
+ SystemPrompt,
75
76
  SystemPromptStrategy,
76
- SystemPromptConfig,
77
77
  normalize_prompt,
78
78
  normalize_system_prompt,
79
79
  resolve_system_prompt,
@@ -88,7 +88,6 @@ from .types import (
88
88
  ConfigData,
89
89
  JsonData,
90
90
  Objects,
91
- PromptInput,
92
91
  )
93
92
 
94
93
  if TYPE_CHECKING:
@@ -106,7 +105,7 @@ class HarnessConfig(LifecycleConfig):
106
105
  )
107
106
  program: ProgramConfig = ProgramConfig()
108
107
  model: ModelConfig = ModelConfig()
109
- system_prompt: PromptInput | SystemPromptConfig | None = None
108
+ system_prompt: SystemPrompt = None
110
109
  system_prompt_strategy: SystemPromptStrategy = "HT"
111
110
  sandbox: SandboxConfig | None = None
112
111
  user: UserConfig | None = None
@@ -217,9 +216,7 @@ class Harness(RuntimeOwnerMixin[ConfigT], Generic[ConfigT]):
217
216
  self.endpoint = self.load_endpoint()
218
217
  self.program = self.compile_program(self.program_config)
219
218
 
220
- def load_system_prompt(
221
- self, config: ConfigT
222
- ) -> PromptInput | SystemPromptConfig | None:
219
+ def load_system_prompt(self, config: ConfigT) -> SystemPrompt:
223
220
  return config.system_prompt
224
221
 
225
222
  def load_sandbox(self, config: SandboxConfig | None) -> SandboxConfig | None:
@@ -18,7 +18,7 @@ from .utils.binding_utils import (
18
18
  BindingsConfig,
19
19
  ObjectsConfig,
20
20
  )
21
- from .utils.prompt_utils import SystemPromptConfig, normalize_system_prompt
21
+ from .utils.prompt_utils import SystemPrompt, normalize_system_prompt
22
22
  from .utils.config_utils import (
23
23
  coerce_config,
24
24
  config_ref_context,
@@ -36,7 +36,6 @@ from .utils.taskset_utils import (
36
36
  from .types import (
37
37
  JsonData,
38
38
  Objects,
39
- PromptInput,
40
39
  TaskSplit,
41
40
  Tasks,
42
41
  )
@@ -48,7 +47,7 @@ class TasksetConfig(LifecycleConfig):
48
47
  default=None,
49
48
  validation_alias=AliasChoices("taskset_id", "id"),
50
49
  )
51
- system_prompt: PromptInput | SystemPromptConfig | None = None
50
+ system_prompt: SystemPrompt = None
52
51
  user: UserConfig | None = None
53
52
  bindings: BindingsConfig = BindingsConfig()
54
53
  objects: ObjectsConfig = ObjectsConfig()
@@ -152,7 +151,5 @@ class Taskset(RuntimeOwnerMixin[ConfigT], Generic[ConfigT]):
152
151
  def __len__(self) -> int:
153
152
  return len(self.get_dataset())
154
153
 
155
- def load_system_prompt(
156
- self, config: ConfigT
157
- ) -> PromptInput | SystemPromptConfig | None:
154
+ def load_system_prompt(self, config: ConfigT) -> SystemPrompt:
158
155
  return config.system_prompt
@@ -41,7 +41,6 @@ Tasks: TypeAlias = Dataset | Iterable[JsonData] | Iterable["Task"]
41
41
 
42
42
  PromptMessage: TypeAlias = Message | JsonData
43
43
  PromptInput: TypeAlias = str | Sequence[PromptMessage]
44
- SystemPrompt: TypeAlias = PromptInput
45
44
 
46
45
  ModelClient: TypeAlias = Client | ClientConfig
47
46
  RuntimeObject: TypeAlias = object
@@ -1,7 +1,7 @@
1
1
  import importlib.util
2
2
  from dataclasses import dataclass
3
3
  from pathlib import Path
4
- from typing import TYPE_CHECKING, Literal, cast
4
+ from typing import TYPE_CHECKING, Literal, TypeAlias, cast
5
5
 
6
6
  from pydantic import model_validator
7
7
  from typing_extensions import Self
@@ -9,7 +9,7 @@ from verifiers.types import Messages, SystemMessage
9
9
  from verifiers.utils.message_utils import normalize_messages
10
10
 
11
11
  from ..config import Config
12
- from ..types import JsonData, PromptInput, SystemPrompt
12
+ from ..types import JsonData, PromptInput
13
13
  from .config_utils import current_config_ref_module
14
14
 
15
15
  if TYPE_CHECKING:
@@ -64,13 +64,15 @@ class SystemPromptConfig(Config):
64
64
  messages: list[JsonData] = []
65
65
 
66
66
  @model_validator(mode="after")
67
- def validate_one_source(self) -> Self:
68
- sources = [
67
+ def validate_one_input(self) -> Self:
68
+ inputs = [
69
69
  self.path is not None,
70
70
  bool(self.messages),
71
71
  ]
72
- if sum(sources) != 1:
73
- raise ValueError("SystemPromptConfig requires exactly one source.")
72
+ if sum(inputs) != 1:
73
+ raise ValueError(
74
+ "SystemPromptConfig requires exactly one of path or messages."
75
+ )
74
76
  return self
75
77
 
76
78
  def load(self, field_name: str) -> PromptInput | None:
@@ -81,6 +83,9 @@ class SystemPromptConfig(Config):
81
83
  return self.messages
82
84
 
83
85
 
86
+ SystemPrompt: TypeAlias = PromptInput | SystemPromptConfig | None
87
+
88
+
84
89
  def normalize_prompt(
85
90
  value: PromptInput | None, field_name: str = "prompt"
86
91
  ) -> list[JsonData]:
@@ -95,7 +100,7 @@ def normalize_prompt(
95
100
 
96
101
 
97
102
  def normalize_system_prompt(
98
- value: SystemPrompt | SystemPromptConfig | None,
103
+ value: SystemPrompt,
99
104
  field_name: str = "system_prompt",
100
105
  ) -> list[JsonData]:
101
106
  value = resolve_system_prompt_input(value, field_name=field_name)
@@ -111,7 +116,7 @@ def normalize_system_prompt(
111
116
 
112
117
 
113
118
  def resolve_system_prompt_input(
114
- value: PromptInput | SystemPromptConfig | None,
119
+ value: SystemPrompt,
115
120
  *,
116
121
  field_name: str,
117
122
  ) -> PromptInput | None:
@@ -38,10 +38,8 @@ def prepare_task(task: Task, taskset_id: str) -> Task:
38
38
  raise TypeError("v1 task loaders must return Task objects.")
39
39
  prepared = Task(cast(JsonData, dict(task)))
40
40
  prepared["taskset_id"] = taskset_id
41
- if "task_id" in prepared:
41
+ if prepared.get("task_id") is not None:
42
42
  prepared["task_id"] = str(prepared["task_id"])
43
- elif "example_id" in prepared:
44
- prepared["task_id"] = str(prepared["example_id"])
45
43
  else:
46
44
  prepared["task_id"] = uuid.uuid4().hex
47
45
  return prepared.freeze()
@@ -51,13 +49,13 @@ def dataset_record_from_task(
51
49
  task: Task,
52
50
  taskset_id: str,
53
51
  index: int,
54
- source: JsonData | None = None,
52
+ record: JsonData | None = None,
55
53
  ) -> JsonData:
56
54
  data = Task(cast(JsonData, dict(task)))
57
- data.setdefault("example_id", source.get("example_id") if source else index)
55
+ data["example_id"] = index
58
56
  normalized = prepare_task(data, taskset_id)
59
57
  task_payload = dict(normalized)
60
- dataset_record = deepcopy(dict(source or {}))
58
+ dataset_record = deepcopy(dict(record or {}))
61
59
  dataset_record["prompt"] = task_payload["prompt"]
62
60
  dataset_record["example_id"] = task_payload["example_id"]
63
61
  info = dataset_record.get("info")
@@ -82,9 +80,10 @@ def dataset_from_result(result: Tasks, taskset_id: str) -> Dataset:
82
80
  if isinstance(result, Dataset):
83
81
  records: list[JsonData] = []
84
82
  for index, record in enumerate(result):
85
- source = cast(JsonData, dict(record))
86
- task = task_from_dataset_record(source, taskset_id)
87
- records.append(dataset_record_from_task(task, taskset_id, index, source))
83
+ row = cast(JsonData, dict(record))
84
+ row["example_id"] = index
85
+ task = task_from_dataset_record(row, taskset_id)
86
+ records.append(dataset_record_from_task(task, taskset_id, index, row))
88
87
  return Dataset.from_list(records)
89
88
  tasks = tasks_from_result(result, taskset_id)
90
89
  return Dataset.from_list(dataset_records_from_tasks(tasks, taskset_id))