verifiers 0.1.15.dev167__tar.gz → 0.1.15.dev169__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (346) hide show
  1. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/PKG-INFO +5 -1
  2. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/pyproject.toml +4 -0
  3. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_environment.py +24 -0
  4. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_trajectory_processing.py +43 -0
  5. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_v1_runtime_lifecycle.py +180 -3
  6. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/clients/anthropic_messages_client.py +6 -6
  7. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/clients/openai_responses_client.py +2 -2
  8. verifiers-0.1.15.dev169/verifiers/envs/experimental/composable/tasksets/search/README.md +35 -0
  9. verifiers-0.1.15.dev169/verifiers/envs/experimental/composable/tasksets/search/__init__.py +5 -0
  10. verifiers-0.1.15.dev169/verifiers/envs/experimental/composable/tasksets/search/quest/README.md +52 -0
  11. verifiers-0.1.15.dev169/verifiers/envs/experimental/composable/tasksets/search/quest/__init__.py +5 -0
  12. verifiers-0.1.15.dev169/verifiers/envs/experimental/composable/tasksets/search/quest/obj_task_eval/__init__.py +17 -0
  13. verifiers-0.1.15.dev169/verifiers/envs/experimental/composable/tasksets/search/quest/obj_task_eval/api_tools/__init__.py +5 -0
  14. verifiers-0.1.15.dev169/verifiers/envs/experimental/composable/tasksets/search/quest/obj_task_eval/api_tools/tool_pdf.py +275 -0
  15. verifiers-0.1.15.dev169/verifiers/envs/experimental/composable/tasksets/search/quest/obj_task_eval/eval_toolkit.py +1119 -0
  16. verifiers-0.1.15.dev169/verifiers/envs/experimental/composable/tasksets/search/quest/obj_task_eval/evaluator.py +1271 -0
  17. verifiers-0.1.15.dev169/verifiers/envs/experimental/composable/tasksets/search/quest/obj_task_eval/llm_client/__init__.py +5 -0
  18. verifiers-0.1.15.dev169/verifiers/envs/experimental/composable/tasksets/search/quest/obj_task_eval/llm_client/base_client.py +15 -0
  19. verifiers-0.1.15.dev169/verifiers/envs/experimental/composable/tasksets/search/quest/obj_task_eval/prompts/__init__.py +4 -0
  20. verifiers-0.1.15.dev169/verifiers/envs/experimental/composable/tasksets/search/quest/obj_task_eval/prompts/cache_prompts.py +15 -0
  21. verifiers-0.1.15.dev169/verifiers/envs/experimental/composable/tasksets/search/quest/obj_task_eval/utils/__init__.py +7 -0
  22. verifiers-0.1.15.dev169/verifiers/envs/experimental/composable/tasksets/search/quest/obj_task_eval/utils/cache_filesys.py +45 -0
  23. verifiers-0.1.15.dev169/verifiers/envs/experimental/composable/tasksets/search/quest/obj_task_eval/utils/load_eval_script.py +107 -0
  24. verifiers-0.1.15.dev169/verifiers/envs/experimental/composable/tasksets/search/quest/obj_task_eval/utils/misc.py +106 -0
  25. verifiers-0.1.15.dev169/verifiers/envs/experimental/composable/tasksets/search/quest/obj_task_eval/utils/tool_visit.py +69 -0
  26. verifiers-0.1.15.dev169/verifiers/envs/experimental/composable/tasksets/search/quest/obj_task_eval/utils/url_tools.py +27 -0
  27. verifiers-0.1.15.dev169/verifiers/envs/experimental/composable/tasksets/search/quest/obj_task_eval/verification_tree.py +153 -0
  28. verifiers-0.1.15.dev169/verifiers/envs/experimental/composable/tasksets/search/quest/taskset.py +667 -0
  29. verifiers-0.1.15.dev169/verifiers/envs/experimental/composable/tasksets/search/search_tasksets.py +26 -0
  30. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/types.py +35 -1
  31. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/utils/async_utils.py +14 -15
  32. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/utils/interception_utils.py +38 -0
  33. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/v1/harness.py +9 -2
  34. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/v1/utils/endpoint_utils.py +131 -25
  35. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/v1/utils/sandbox_program_utils.py +40 -240
  36. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/v1/utils/sandbox_utils.py +58 -17
  37. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/.gitignore +0 -0
  38. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/LICENSE +0 -0
  39. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/README.md +0 -0
  40. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/AGENTS.md +0 -0
  41. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/README.md +0 -0
  42. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/__init__.py +0 -0
  43. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/conftest.py +0 -0
  44. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_browser_env.py +0 -0
  45. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_build_script.py +0 -0
  46. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_cli_agent_env.py +0 -0
  47. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_client_auth_errors.py +0 -0
  48. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_client_config.py +0 -0
  49. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_client_multimodal_types.py +0 -0
  50. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_composable_env.py +0 -0
  51. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_context_token_metrics.py +0 -0
  52. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_decorator_ranks.py +0 -0
  53. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_endpoint_registry.py +0 -0
  54. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_env_group.py +0 -0
  55. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_env_server.py +0 -0
  56. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_environment_extra.py +0 -0
  57. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_envs.py +0 -0
  58. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_error_chain.py +0 -0
  59. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_eval_cli.py +0 -0
  60. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_eval_display.py +0 -0
  61. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_eval_utils.py +0 -0
  62. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_gepa_cli.py +0 -0
  63. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_gepa_utils.py +0 -0
  64. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_gym_env.py +0 -0
  65. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_harbor_env_mcp.py +0 -0
  66. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_imports.py +0 -0
  67. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_init_script.py +0 -0
  68. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_install_utils.py +0 -0
  69. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_interception_utils.py +0 -0
  70. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_langchain_deep_agents_wikispeedia.py +0 -0
  71. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_lean_task.py +0 -0
  72. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_logging.py +0 -0
  73. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_math_rubric.py +0 -0
  74. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_maybe_think_parser.py +0 -0
  75. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_mcp_search_env.py +0 -0
  76. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_message_utils.py +0 -0
  77. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_message_utils_multimodal.py +0 -0
  78. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_multiturn_env.py +0 -0
  79. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_nemorl_client.py +0 -0
  80. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_openai_chat_completions_token_client.py +0 -0
  81. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_openai_responses_client.py +0 -0
  82. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_opencode_harbor.py +0 -0
  83. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_opencode_rlm_env.py +0 -0
  84. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_openenv_client.py +0 -0
  85. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_parser.py +0 -0
  86. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_path_utils.py +0 -0
  87. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_per_turn_timing.py +0 -0
  88. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_pricing_utils.py +0 -0
  89. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_prime_plugin.py +0 -0
  90. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_renderer_client.py +0 -0
  91. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_renderer_e2e.py +0 -0
  92. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_rlm_composable_env.py +0 -0
  93. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_rubric.py +0 -0
  94. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_rubric_group.py +0 -0
  95. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_sandbox_env.py +0 -0
  96. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_sandbox_mixin.py +0 -0
  97. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_save_utils.py +0 -0
  98. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_setup_script.py +0 -0
  99. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_singleturn_env.py +0 -0
  100. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_stateful_tool_env.py +0 -0
  101. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_think_parser.py +0 -0
  102. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_tool_env.py +0 -0
  103. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_tool_utils.py +0 -0
  104. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_tui_info_formatting.py +0 -0
  105. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_types.py +0 -0
  106. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_v1_bfcl.py +0 -0
  107. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_v1_config_extension.py +0 -0
  108. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_v1_empty_completions.py +0 -0
  109. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_v1_endpoint_protocols.py +0 -0
  110. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_v1_example_counts.py +0 -0
  111. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_v1_group_reward_env.py +0 -0
  112. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_v1_harbor_cli.py +0 -0
  113. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_v1_mini_swe_agent.py +0 -0
  114. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_v1_nemo_gym_harness.py +0 -0
  115. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_v1_openenv_taskset.py +0 -0
  116. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_v1_openreward_taskset.py +0 -0
  117. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_v1_replay_harness.py +0 -0
  118. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_v1_rlm_swe.py +0 -0
  119. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_v1_scoring_functions.py +0 -0
  120. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_v1_taskset_bindings.py +0 -0
  121. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_v1_taskset_utils.py +0 -0
  122. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_v1_textarena_taskset.py +0 -0
  123. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_wiki_search_v1.py +0 -0
  124. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_wordle_env.py +0 -0
  125. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_wordle_v1_env.py +0 -0
  126. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/tests/test_xml_parser.py +0 -0
  127. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/AGENTS.md +0 -0
  128. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/__init__.py +0 -0
  129. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/cli/__init__.py +0 -0
  130. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/cli/commands/__init__.py +0 -0
  131. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/cli/commands/build.py +0 -0
  132. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/cli/commands/eval.py +0 -0
  133. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/cli/commands/gepa.py +0 -0
  134. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/cli/commands/init.py +0 -0
  135. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/cli/commands/install.py +0 -0
  136. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/cli/commands/setup.py +0 -0
  137. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/cli/plugins/__init__.py +0 -0
  138. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/cli/plugins/prime.py +0 -0
  139. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/cli/tui.py +0 -0
  140. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/clients/__init__.py +0 -0
  141. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/clients/client.py +0 -0
  142. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/clients/nemorl_chat_completions_client.py +0 -0
  143. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/clients/openai_chat_completions_client.py +0 -0
  144. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/clients/openai_chat_completions_token_client.py +0 -0
  145. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/clients/openai_completions_client.py +0 -0
  146. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/clients/renderer_client.py +0 -0
  147. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/decorators.py +0 -0
  148. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/AGENTS.md +0 -0
  149. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/__init__.py +0 -0
  150. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/env_group.py +0 -0
  151. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/environment.py +0 -0
  152. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/experimental/README.md +0 -0
  153. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/experimental/__init__.py +0 -0
  154. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/experimental/cli_agent_env.py +0 -0
  155. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/experimental/composable/README.md +0 -0
  156. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/experimental/composable/__init__.py +0 -0
  157. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/experimental/composable/_filter.py +0 -0
  158. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/experimental/composable/composable_env.py +0 -0
  159. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/experimental/composable/harness.py +0 -0
  160. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/experimental/composable/harnesses/__init__.py +0 -0
  161. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/experimental/composable/harnesses/mini_swe_agent.py +0 -0
  162. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/experimental/composable/harnesses/opencode.py +0 -0
  163. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/experimental/composable/harnesses/prompt.txt +0 -0
  164. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/experimental/composable/harnesses/rlm.py +0 -0
  165. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/experimental/composable/swe_debug_env.py +0 -0
  166. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/experimental/composable/task.py +0 -0
  167. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/experimental/composable/tasksets/__init__.py +0 -0
  168. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/experimental/composable/tasksets/cp/__init__.py +0 -0
  169. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/experimental/composable/tasksets/cp/cp_task.py +0 -0
  170. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/experimental/composable/tasksets/cp/test_utils.py +0 -0
  171. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/experimental/composable/tasksets/harbor/__init__.py +0 -0
  172. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/experimental/composable/tasksets/harbor/harbor.py +0 -0
  173. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/experimental/composable/tasksets/lean/__init__.py +0 -0
  174. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/experimental/composable/tasksets/lean/lean_task.py +0 -0
  175. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/experimental/composable/tasksets/math/__init__.py +0 -0
  176. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/experimental/composable/tasksets/math/math_task.py +0 -0
  177. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/experimental/composable/tasksets/swe/README.md +0 -0
  178. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/experimental/composable/tasksets/swe/__init__.py +0 -0
  179. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/experimental/composable/tasksets/swe/multi_swe/__init__.py +0 -0
  180. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/experimental/composable/tasksets/swe/multi_swe/extract_fix_patch.sh +0 -0
  181. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/experimental/composable/tasksets/swe/multi_swe/taskset.py +0 -0
  182. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/experimental/composable/tasksets/swe/openswe/__init__.py +0 -0
  183. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/experimental/composable/tasksets/swe/openswe/taskset.py +0 -0
  184. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/experimental/composable/tasksets/swe/r2e_gym/__init__.py +0 -0
  185. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/experimental/composable/tasksets/swe/r2e_gym/log_parser.py +0 -0
  186. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/experimental/composable/tasksets/swe/r2e_gym/taskset.py +0 -0
  187. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/experimental/composable/tasksets/swe/scale_swe/__init__.py +0 -0
  188. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/experimental/composable/tasksets/swe/scale_swe/taskset.py +0 -0
  189. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/experimental/composable/tasksets/swe/shared/__init__.py +0 -0
  190. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/experimental/composable/tasksets/swe/shared/test_patch.py +0 -0
  191. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/experimental/composable/tasksets/swe/swe_bench/__init__.py +0 -0
  192. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/experimental/composable/tasksets/swe/swe_bench/taskset.py +0 -0
  193. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/experimental/composable/tasksets/swe/swe_lego/__init__.py +0 -0
  194. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/experimental/composable/tasksets/swe/swe_lego/taskset.py +0 -0
  195. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/experimental/composable/tasksets/swe/swe_rebench_v2/__init__.py +0 -0
  196. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/experimental/composable/tasksets/swe/swe_rebench_v2/log_parsers.py +0 -0
  197. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/experimental/composable/tasksets/swe/swe_rebench_v2/taskset.py +0 -0
  198. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/experimental/composable/tasksets/swe/swe_smith/__init__.py +0 -0
  199. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/experimental/composable/tasksets/swe/swe_smith/taskset.py +0 -0
  200. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/experimental/composable/tasksets/swe/swe_tasksets.py +0 -0
  201. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/experimental/gym_env.py +0 -0
  202. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/experimental/harbor_env/__init__.py +0 -0
  203. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/experimental/harbor_env/env.py +0 -0
  204. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/experimental/harbor_env/mcp.py +0 -0
  205. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/experimental/mcp_env.py +0 -0
  206. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/experimental/opencode_env.py +0 -0
  207. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/experimental/opencode_qa_env.py +0 -0
  208. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/experimental/opencode_rlm_env.py +0 -0
  209. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/experimental/sandbox_mixin.py +0 -0
  210. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/experimental/utils/__init__.py +0 -0
  211. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/experimental/utils/file_locks.py +0 -0
  212. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/experimental/utils/git_checkout_cache.py +0 -0
  213. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/integrations/README.md +0 -0
  214. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/integrations/__init__.py +0 -0
  215. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/integrations/browser_env/README.md +0 -0
  216. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/integrations/browser_env/__init__.py +0 -0
  217. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/integrations/browser_env/browser_env.py +0 -0
  218. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/integrations/browser_env/modes/__init__.py +0 -0
  219. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/integrations/browser_env/modes/base.py +0 -0
  220. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/integrations/browser_env/modes/cua_mode.py +0 -0
  221. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/integrations/browser_env/modes/dom_mode.py +0 -0
  222. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/integrations/openenv_env.py +0 -0
  223. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/integrations/reasoninggym_env.py +0 -0
  224. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/integrations/textarena_env.py +0 -0
  225. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/multiturn_env.py +0 -0
  226. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/python_env.py +0 -0
  227. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/sandbox_env.py +0 -0
  228. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/singleturn_env.py +0 -0
  229. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/stateful_tool_env.py +0 -0
  230. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/envs/tool_env.py +0 -0
  231. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/errors.py +0 -0
  232. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/gepa/__init__.py +0 -0
  233. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/gepa/adapter.py +0 -0
  234. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/gepa/config.py +0 -0
  235. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/gepa/display.py +0 -0
  236. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/gepa/gepa_utils.py +0 -0
  237. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/parsers/__init__.py +0 -0
  238. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/parsers/maybe_think_parser.py +0 -0
  239. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/parsers/parser.py +0 -0
  240. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/parsers/think_parser.py +0 -0
  241. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/parsers/xml_parser.py +0 -0
  242. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/rl/README.md +0 -0
  243. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/rl/__init__.py +0 -0
  244. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/rl/inference/__init__.py +0 -0
  245. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/rl/inference/client.py +0 -0
  246. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/rl/inference/server.py +0 -0
  247. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/rl/trainer/__init__.py +0 -0
  248. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/rl/trainer/config.py +0 -0
  249. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/rl/trainer/orchestrator.py +0 -0
  250. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/rl/trainer/trainer.py +0 -0
  251. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/rl/trainer/utils.py +0 -0
  252. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/rubrics/__init__.py +0 -0
  253. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/rubrics/experimental/hybrid_math_rubric.py +0 -0
  254. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/rubrics/judge_rubric.py +0 -0
  255. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/rubrics/math_rubric.py +0 -0
  256. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/rubrics/rubric.py +0 -0
  257. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/rubrics/rubric_group.py +0 -0
  258. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/scripts/__init__.py +0 -0
  259. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/scripts/build.py +0 -0
  260. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/scripts/eval.py +0 -0
  261. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/scripts/gepa.py +0 -0
  262. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/scripts/init.py +0 -0
  263. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/scripts/install.py +0 -0
  264. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/scripts/rl.py +0 -0
  265. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/scripts/setup.py +0 -0
  266. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/scripts/train.py +0 -0
  267. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/scripts/tui.py +0 -0
  268. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/scripts/vllm.py +0 -0
  269. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/serve/__init__.py +0 -0
  270. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/serve/client/env_client.py +0 -0
  271. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/serve/client/zmq_env_client.py +0 -0
  272. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/serve/server/__init__.py +0 -0
  273. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/serve/server/env_router.py +0 -0
  274. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/serve/server/env_server.py +0 -0
  275. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/serve/server/env_worker.py +0 -0
  276. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/serve/server/zmq_env_server.py +0 -0
  277. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/serve/types.py +0 -0
  278. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/utils/__init__.py +0 -0
  279. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/utils/client_utils.py +0 -0
  280. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/utils/config_utils.py +0 -0
  281. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/utils/data_utils.py +0 -0
  282. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/utils/display_utils.py +0 -0
  283. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/utils/env_config_utils.py +0 -0
  284. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/utils/env_utils.py +0 -0
  285. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/utils/error_utils.py +0 -0
  286. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/utils/eval_display.py +0 -0
  287. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/utils/eval_utils.py +0 -0
  288. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/utils/heartbeat.py +0 -0
  289. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/utils/import_utils.py +0 -0
  290. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/utils/install_utils.py +0 -0
  291. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/utils/logging_utils.py +0 -0
  292. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/utils/message_utils.py +0 -0
  293. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/utils/metric_utils.py +0 -0
  294. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/utils/path_utils.py +0 -0
  295. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/utils/pricing_utils.py +0 -0
  296. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/utils/process_utils.py +0 -0
  297. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/utils/response_utils.py +0 -0
  298. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/utils/save_utils.py +0 -0
  299. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/utils/serve_utils.py +0 -0
  300. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/utils/thread_utils.py +0 -0
  301. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/utils/threaded_sandbox_client.py +0 -0
  302. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/utils/tool_utils.py +0 -0
  303. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/utils/usage_utils.py +0 -0
  304. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/utils/version_utils.py +0 -0
  305. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/v1/ENVIRONMENT_BEST_PRACTICES.md +0 -0
  306. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/v1/README.md +0 -0
  307. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/v1/RE_MIGRATION.md +0 -0
  308. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/v1/__init__.py +0 -0
  309. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/v1/artifact.py +0 -0
  310. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/v1/config.py +0 -0
  311. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/v1/env.py +0 -0
  312. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/v1/model.py +0 -0
  313. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/v1/program.py +0 -0
  314. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/v1/runtime.py +0 -0
  315. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/v1/runtime_handles.py +0 -0
  316. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/v1/sandbox.py +0 -0
  317. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/v1/state.py +0 -0
  318. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/v1/task.py +0 -0
  319. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/v1/taskset.py +0 -0
  320. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/v1/toolset.py +0 -0
  321. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/v1/types.py +0 -0
  322. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/v1/user.py +0 -0
  323. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/v1/utils/__init__.py +0 -0
  324. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/v1/utils/binding_utils.py +0 -0
  325. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/v1/utils/config_callable_utils.py +0 -0
  326. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/v1/utils/config_utils.py +0 -0
  327. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/v1/utils/json_utils.py +0 -0
  328. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/v1/utils/judge_utils.py +0 -0
  329. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/v1/utils/lifecycle_utils.py +0 -0
  330. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/v1/utils/logging_utils.py +0 -0
  331. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/v1/utils/mcp_proxy_utils.py +0 -0
  332. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/v1/utils/mcp_utils.py +0 -0
  333. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/v1/utils/object_utils.py +0 -0
  334. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/v1/utils/program_utils.py +0 -0
  335. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/v1/utils/prompt_utils.py +0 -0
  336. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/v1/utils/runtime_owner_utils.py +0 -0
  337. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/v1/utils/runtime_registry.py +0 -0
  338. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/v1/utils/sandbox_python_utils.py +0 -0
  339. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/v1/utils/scoring_utils.py +0 -0
  340. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/v1/utils/serialization_utils.py +0 -0
  341. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/v1/utils/task_freeze_utils.py +0 -0
  342. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/v1/utils/taskset_utils.py +0 -0
  343. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/v1/utils/tool_utils.py +0 -0
  344. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/v1/utils/toolset_utils.py +0 -0
  345. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/v1/utils/trajectory_utils.py +0 -0
  346. {verifiers-0.1.15.dev167 → verifiers-0.1.15.dev169}/verifiers/v1/utils/usage_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: verifiers
3
- Version: 0.1.15.dev167
3
+ Version: 0.1.15.dev169
4
4
  Summary: Verifiers: Environments for LLM Reinforcement Learning
5
5
  Project-URL: Homepage, https://github.com/primeintellect-ai/verifiers
6
6
  Project-URL: Documentation, https://github.com/primeintellect-ai/verifiers
@@ -22,8 +22,10 @@ Classifier: Programming Language :: Python :: 3.13
22
22
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
23
23
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
24
24
  Requires-Python: <3.14,>=3.10
25
+ Requires-Dist: aiohttp>=3.9.0
25
26
  Requires-Dist: aiolimiter>=1.2.1
26
27
  Requires-Dist: anthropic>=0.78.0
28
+ Requires-Dist: certifi
27
29
  Requires-Dist: datasets<4.7.0,>=3.0.0
28
30
  Requires-Dist: gepa
29
31
  Requires-Dist: httpx>=0.27.0
@@ -35,10 +37,12 @@ Requires-Dist: nest-asyncio>=1.6.0
35
37
  Requires-Dist: numpy
36
38
  Requires-Dist: openai-agents>=0.0.7
37
39
  Requires-Dist: openai>=1.108.1
40
+ Requires-Dist: pillow
38
41
  Requires-Dist: prime-pydantic-config[toml]
39
42
  Requires-Dist: prime-sandboxes>=0.2.25
40
43
  Requires-Dist: prime-tunnel>=0.1.6
41
44
  Requires-Dist: pydantic>=2.11.9
45
+ Requires-Dist: pymupdf
42
46
  Requires-Dist: pyzmq>=27.1.0
43
47
  Requires-Dist: regex<2026.4.4
44
48
  Requires-Dist: requests
@@ -53,6 +53,10 @@ dependencies = [
53
53
  "setproctitle>=1.3.0",
54
54
  "regex<2026.4.4",
55
55
  "httpx>=0.27.0",
56
+ "aiohttp>=3.9.0",
57
+ "pymupdf",
58
+ "pillow",
59
+ "certifi",
56
60
  "prime-pydantic-config[toml]",
57
61
  "uvloop>=0.21.0; sys_platform != 'win32' and sys_platform != 'cygwin' and platform_python_implementation != 'PyPy'",
58
62
  ]
@@ -697,6 +697,30 @@ class TestMaybeRetry:
697
697
  error_data = rollout_outputs[0]["error"]
698
698
  assert "InfraError" == error_data["error"]
699
699
 
700
+ @pytest.mark.asyncio
701
+ async def test_retries_serialized_infra_error_subclass(self):
702
+ """A serialized InfraError subclass (e.g. SandboxError) in returned state
703
+ must trigger retry.
704
+
705
+ The v1 harness serializes state["error"] to ErrorData before maybe_retry
706
+ inspects it, so matching must be subclass-aware (rebuild concrete error +
707
+ isinstance) — base-name substring matching missed SandboxError, which is
708
+ an InfraError and should be retried.
709
+ """
710
+ from verifiers.utils.async_utils import maybe_retry
711
+ from verifiers.utils.error_utils import error_data
712
+
713
+ serialized = error_data(vf.SandboxError("Program file upload failed"))
714
+ calls = {"n": 0}
715
+
716
+ async def attempt():
717
+ calls["n"] += 1
718
+ return {"error": serialized}
719
+
720
+ result = await maybe_retry(attempt, max_retries=2, initial=0.0, max_wait=0.0)()
721
+ assert calls["n"] == 3 # 1 initial + 2 retries (InfraError is retryable)
722
+ assert result["error"] == serialized # last result returned after exhaustion
723
+
700
724
 
701
725
  class TestEmptyModelResponseErrors:
702
726
  """Test cases for empty and invalid model response error handling."""
@@ -282,6 +282,49 @@ async def test_parsed_prompt_attribution_survives_v1_assert_serializable():
282
282
  State({"trajectory": [step]}).assert_serializable()
283
283
 
284
284
 
285
+ def test_assert_serializable_accepts_msgpack_sidecars_rejects_unknown():
286
+ """The ``assert_serializable`` json.dumps gate must accept exactly what the
287
+ trainer transport (msgpack) accepts, while staying strict otherwise.
288
+
289
+ Trajectory token steps carry sidecars that are non-JSON by design and reach
290
+ the trainer via msgpack, not JSON: the renderer ``MultiModalData`` (a
291
+ dataclass holding numpy pixel arrays) and ``routed_experts`` (a raw
292
+ ``memoryview`` buffer). Both must clear the gate; any other
293
+ non-serializable object must still raise.
294
+ """
295
+ import dataclasses
296
+
297
+ import numpy as np
298
+
299
+ @dataclasses.dataclass
300
+ class _FakeMultiModalData:
301
+ mm_hashes: dict
302
+ mm_items: dict
303
+ mm_placeholders: dict
304
+
305
+ mm = _FakeMultiModalData(
306
+ mm_hashes={"image": ["h1"]},
307
+ mm_items={"image": [np.zeros((2, 2), dtype=np.uint8)]},
308
+ mm_placeholders={"image": [{"offset": 0, "length": 4}]},
309
+ )
310
+ step = {
311
+ "tokens": {
312
+ "prompt_ids": [1, 2],
313
+ "multi_modal_data": mm,
314
+ "routed_experts": {"data": memoryview(b"abc"), "shape": [3], "start": 0},
315
+ }
316
+ }
317
+ # Must not raise: both sidecars are msgpack-transported, not JSON.
318
+ State({"trajectory": [step]}).assert_serializable()
319
+
320
+ # A genuinely non-serializable object must still be rejected.
321
+ class _Unknown:
322
+ pass
323
+
324
+ with pytest.raises(TypeError):
325
+ State({"trajectory": [{"tokens": _Unknown()}]}).assert_serializable()
326
+
327
+
285
328
  def test_process_trajectory_steps_for_training(make_input):
286
329
  """Test processing trajectory steps into training examples."""
287
330
  state1 = State(
@@ -110,6 +110,15 @@ class BlockingModelClient(CapturingModelClient):
110
110
  return await super().get_response(**kwargs)
111
111
 
112
112
 
113
+ class RaisingModelClient:
114
+ def __init__(self, error: vf.Error):
115
+ self.error = error
116
+
117
+ async def get_response(self, **kwargs: object) -> Response:
118
+ _ = kwargs
119
+ raise self.error
120
+
121
+
113
122
  class FakeCreateSandboxRequest:
114
123
  def __init__(self, **kwargs: object):
115
124
  self.kwargs = kwargs
@@ -517,6 +526,31 @@ async def endpoint_program(task, state):
517
526
  }
518
527
 
519
528
 
529
+ async def endpoint_model_error_program(task, state):
530
+ _ = task
531
+ root = state["endpoint_root_url"].rstrip("/")
532
+ endpoint_client = cast(OpenAI, state.get_client(api="chat", sync=True))
533
+ auth_headers = {"Authorization": f"Bearer {endpoint_client.api_key}"}
534
+ endpoint_client.close()
535
+
536
+ def post_model() -> None:
537
+ request = urllib.request.Request(
538
+ f"{root}/vf/model",
539
+ data=json.dumps(
540
+ {"messages": [{"role": "user", "content": "too long"}]}
541
+ ).encode(),
542
+ headers={"content-type": "application/json", **auth_headers},
543
+ )
544
+ with urllib.request.urlopen(request):
545
+ pass
546
+
547
+ try:
548
+ await asyncio.to_thread(post_model)
549
+ except Exception as exc:
550
+ raise vf.SandboxError("Sandbox command failed") from exc
551
+ raise AssertionError("Expected /vf/model to fail")
552
+
553
+
520
554
  async def endpoint_trajectory_program(task, state):
521
555
  _ = task
522
556
  root = state["endpoint_root_url"].rstrip("/")
@@ -725,6 +759,7 @@ for _name, _value in {
725
759
  "initialize_from_taskset": initialize_from_taskset,
726
760
  "child_reads_program_sandbox": child_reads_program_sandbox,
727
761
  "endpoint_program": endpoint_program,
762
+ "endpoint_model_error_program": endpoint_model_error_program,
728
763
  "endpoint_trajectory_program": endpoint_trajectory_program,
729
764
  "concurrent_endpoint_program": concurrent_endpoint_program,
730
765
  "mcp_proxy_program": mcp_proxy_program,
@@ -827,6 +862,41 @@ async def test_endpoint_exposes_tool_user_and_stop_surfaces() -> None:
827
862
  assert "endpoint_root_url" not in state
828
863
 
829
864
 
865
+ @pytest.mark.asyncio
866
+ async def test_vf_model_bridge_preserves_overlong_prompt_error() -> None:
867
+ harness = make_harness(
868
+ program={"fn": program_ref("endpoint_model_error_program")},
869
+ model="test-model",
870
+ client=RaisingModelClient(vf.OverlongPromptError("too long")),
871
+ )
872
+ task = vf.Task({"prompt": [{"role": "user", "content": "hi"}]}).freeze()
873
+
874
+ state = await harness.run(task)
875
+ await harness.teardown()
876
+
877
+ assert state["prompt_too_long"] is True
878
+ assert state["is_truncated"] is True
879
+ assert state["stop_condition"] == "prompt_too_long"
880
+ assert state.get("error") is None
881
+
882
+
883
+ @pytest.mark.asyncio
884
+ async def test_vf_model_bridge_preserves_model_error() -> None:
885
+ harness = make_harness(
886
+ program={"fn": program_ref("endpoint_model_error_program")},
887
+ model="test-model",
888
+ client=RaisingModelClient(vf.ModelError("model failed")),
889
+ )
890
+ task = vf.Task({"prompt": [{"role": "user", "content": "hi"}]}).freeze()
891
+
892
+ state = await harness.run(task)
893
+ await harness.teardown()
894
+
895
+ assert state["stop_condition"] == "has_error"
896
+ assert state["error"]["error"] == "ModelError"
897
+ assert "SandboxError" not in state["error"]["error_chain_str"]
898
+
899
+
830
900
  @pytest.mark.asyncio
831
901
  async def test_endpoint_request_can_hide_internal_model_call_from_trajectory() -> None:
832
902
  client = FakeModelClient([fake_response("hidden"), fake_response("shown")])
@@ -1462,6 +1532,70 @@ async def test_create_sandbox_cleans_up_wait_failure_with_retry(
1462
1532
  assert client.delete_calls == 2
1463
1533
 
1464
1534
 
1535
+ @pytest.mark.asyncio
1536
+ async def test_upload_program_files_retries_transient_transfer_error(
1537
+ monkeypatch: pytest.MonkeyPatch,
1538
+ ) -> None:
1539
+ install_fake_sandboxes(monkeypatch)
1540
+ disable_sandbox_retry_sleep(monkeypatch)
1541
+
1542
+ class FlakyUploadClient:
1543
+ calls = 0
1544
+
1545
+ async def upload_bytes(self, *args: object, **kwargs: object) -> None:
1546
+ _ = args, kwargs
1547
+ self.calls += 1
1548
+ if self.calls == 1:
1549
+ raise FakeAPIError("Upload failed: ")
1550
+
1551
+ client = FlakyUploadClient()
1552
+ task = vf.Task({"prompt": [{"role": "user", "content": "hi"}]}).freeze()
1553
+ state = vf.State.for_task(task)
1554
+
1555
+ await sandbox_utils.upload_program_files(
1556
+ cast(sandbox_utils.SandboxClient, client),
1557
+ "sbx-upload",
1558
+ {"files": {"/tmp/file.txt": "content"}},
1559
+ task,
1560
+ state,
1561
+ Runtime(),
1562
+ )
1563
+
1564
+ assert client.calls == 2
1565
+
1566
+
1567
+ @pytest.mark.asyncio
1568
+ async def test_upload_program_files_does_not_retry_non_transient_api_error(
1569
+ monkeypatch: pytest.MonkeyPatch,
1570
+ ) -> None:
1571
+ install_fake_sandboxes(monkeypatch)
1572
+ disable_sandbox_retry_sleep(monkeypatch)
1573
+
1574
+ class FailingUploadClient:
1575
+ calls = 0
1576
+
1577
+ async def upload_bytes(self, *args: object, **kwargs: object) -> None:
1578
+ _ = args, kwargs
1579
+ self.calls += 1
1580
+ raise FakeAPIError("Upload failed: HTTP 400: bad request")
1581
+
1582
+ client = FailingUploadClient()
1583
+ task = vf.Task({"prompt": [{"role": "user", "content": "hi"}]}).freeze()
1584
+ state = vf.State.for_task(task)
1585
+
1586
+ with pytest.raises(vf.SandboxError, match="HTTP 400"):
1587
+ await sandbox_utils.upload_program_files(
1588
+ cast(sandbox_utils.SandboxClient, client),
1589
+ "sbx-upload",
1590
+ {"files": {"/tmp/file.txt": "content"}},
1591
+ task,
1592
+ state,
1593
+ Runtime(),
1594
+ )
1595
+
1596
+ assert client.calls == 1
1597
+
1598
+
1465
1599
  @pytest.mark.asyncio
1466
1600
  async def test_create_sandbox_cancellation_deletes_late_provider_result(
1467
1601
  monkeypatch: pytest.MonkeyPatch,
@@ -1603,8 +1737,8 @@ async def test_sandbox_base_program_max_turns_zero_is_unbounded(
1603
1737
  config_path.write_text(json.dumps({"max_turns": 0}))
1604
1738
  namespace["RUNNER_CONFIG_PATH"] = str(config_path)
1605
1739
 
1606
- async def create_model_message(state, messages, client):
1607
- _ = state, messages, client
1740
+ async def create_model_message(state, messages):
1741
+ _ = state, messages
1608
1742
  return {"role": "assistant", "content": "done"}
1609
1743
 
1610
1744
  async def call_user(state, messages):
@@ -1621,12 +1755,55 @@ async def test_sandbox_base_program_max_turns_zero_is_unbounded(
1621
1755
 
1622
1756
  state = {"prompt": [{"role": "user", "content": "hi"}], "runtime": {}}
1623
1757
  run_base = cast(Any, namespace["run_base"])
1624
- result = await run_base({}, state, object())
1758
+ result = await run_base({}, state)
1625
1759
 
1626
1760
  assert result["completion"] == [{"role": "assistant", "content": "done"}]
1627
1761
  assert result["stop_condition"] == "no_tools"
1628
1762
 
1629
1763
 
1764
+ @pytest.mark.asyncio
1765
+ async def test_sandbox_base_program_model_call_uses_vf_model_bridge() -> None:
1766
+ namespace: dict[str, object] = {}
1767
+ source = runner_source().rsplit("asyncio.run(main())", 1)[0]
1768
+ exec(source, namespace)
1769
+
1770
+ posted: list[tuple[str, Any, object]] = []
1771
+
1772
+ async def vf_post(state, path, payload, timeout=None):
1773
+ _ = state
1774
+ posted.append((path, payload, timeout))
1775
+ return {"message": {"role": "assistant", "content": "ok"}}
1776
+
1777
+ namespace["vf_post"] = vf_post
1778
+ create_model_message = cast(Any, namespace["create_model_message"])
1779
+
1780
+ # Canonical Messages (incl. an image content part) are sent unchanged over the
1781
+ # /vf/model bridge; the host owns client resolution + tokenization and returns
1782
+ # the assistant message.
1783
+ messages = [
1784
+ {"role": "user", "content": "hi"},
1785
+ {
1786
+ "role": "tool",
1787
+ "tool_call_id": "call_1",
1788
+ "content": [
1789
+ {"type": "text", "text": "shot"},
1790
+ {
1791
+ "type": "image_url",
1792
+ "image_url": {"url": "data:image/png;base64,AAA"},
1793
+ },
1794
+ ],
1795
+ },
1796
+ ]
1797
+ message = await create_model_message({"runtime": {}}, messages)
1798
+
1799
+ assert message == {"role": "assistant", "content": "ok"}
1800
+ assert len(posted) == 1
1801
+ path, payload, timeout = posted[0]
1802
+ assert path == "model"
1803
+ assert payload["messages"] == messages # image part preserved verbatim
1804
+ assert timeout is None
1805
+
1806
+
1630
1807
  def test_sandbox_program_patch_cannot_set_lifecycle_fields() -> None:
1631
1808
  state = vf.State.for_task(vf.Task({"prompt": []}).freeze())
1632
1809
 
@@ -214,15 +214,15 @@ class AnthropicMessagesClient(
214
214
  return {}
215
215
 
216
216
  def build_tool_result_block(message: ToolMessage) -> ToolResultBlockParam:
217
+ if isinstance(message.content, str):
218
+ result_content: Any = message.content
219
+ else:
220
+ # Keep images: image_url parts -> Anthropic image blocks (not "[image]" text).
221
+ result_content = normalize_anthropic_content(message.content)
217
222
  return ToolResultBlockParam(
218
223
  type="tool_result",
219
224
  tool_use_id=message.tool_call_id,
220
- content=cast(
221
- Any,
222
- message.content
223
- if isinstance(message.content, str)
224
- else " ".join(content_to_text_chunks(message.content)),
225
- ),
225
+ content=cast(Any, result_content),
226
226
  )
227
227
 
228
228
  def from_chat_message(message: Message) -> AnthropicMessageParam | None:
@@ -156,8 +156,8 @@ class OpenAIResponsesClient(
156
156
  if isinstance(message, ToolMessage):
157
157
  output = message.content
158
158
  if not isinstance(output, str):
159
- text = content_to_text(output)
160
- output = text if text else str(output)
159
+ # Keep images: image_url parts -> Responses input_image (not text).
160
+ output = normalize_message_content(output)
161
161
  return [
162
162
  {
163
163
  "type": "function_call_output",
@@ -0,0 +1,35 @@
1
+ # Search Tasksets
2
+
3
+ Composable search/research tasksets for agents that solve live information-seeking tasks in a sandbox.
4
+
5
+ The search family is intentionally backend-oriented, mirroring the SWE taskset pattern while keeping the task contract research-centric: each task expects a single final answer rather than a code patch. Agents may use web/search tools, browser helpers, or other sandbox resources provided by the paired environment.
6
+
7
+ ## Backends
8
+
9
+ | Backend | Source | Default dataset | Status |
10
+ |---|---|---|---|
11
+ | `quest` | [OSU-NLP-Group/QUEST](https://github.com/OSU-NLP-Group/QUEST) | [`osunlp/QUEST-RL-Data`](https://huggingface.co/datasets/osunlp/QUEST-RL-Data) | Objective tasks supported |
12
+
13
+ ## Usage
14
+
15
+ ```python
16
+ from verifiers.envs.experimental.composable.tasksets.search import make_search_taskset
17
+
18
+ taskset = make_search_taskset(backend="quest", category="objective")
19
+ ```
20
+
21
+ `make_search_taskset()` dispatches by backend name. Unknown backends raise `ValueError` with the available backend list.
22
+
23
+ ## Output Contract
24
+
25
+ Search tasksets should define their own output contract. The initial `quest` backend expects the agent to write one final researched response to `/task/answer.txt`, including supporting URLs/citations when available. Scratch reasoning, tool traces, and logs should not be written as the final answer.
26
+
27
+ ## Error Handling
28
+
29
+ Search tasksets should use the framework error taxonomy for infrastructure failures:
30
+
31
+ - `vf.SandboxError` for sandbox setup, command, or lifecycle failures.
32
+ - `vf.ModelError` for judge/model provider failures.
33
+ - `vf.InfraError` for dataset, evaluator, or external runtime failures.
34
+
35
+ Incorrect answers should not set `state["error"]`; they should score normally, often as `0.0`.
@@ -0,0 +1,5 @@
1
+ """Composable search/research tasksets."""
2
+
3
+ from .search_tasksets import make_quest_taskset, make_search_taskset
4
+
5
+ __all__ = ["make_quest_taskset", "make_search_taskset"]
@@ -0,0 +1,52 @@
1
+ # QUEST Search Taskset
2
+
3
+ Objective QUEST tasks ported into the composable search taskset framework.
4
+
5
+ ## Source
6
+
7
+ - Dataset: [`osunlp/QUEST-RL-Data`](https://huggingface.co/datasets/osunlp/QUEST-RL-Data)
8
+ - Upstream project: [`OSU-NLP-Group/QUEST`](https://github.com/OSU-NLP-Group/QUEST)
9
+
10
+ The taskset loads the Hugging Face dataset, filters to `rl_task_category == "objective"` by default, and uses the dataset-provided generated evaluation scripts under `eval_scripts/*.py`.
11
+
12
+ ## Task Contract
13
+
14
+ Each example is a live research question. The agent should produce one final answer in `/task/answer.txt`.
15
+
16
+ The paired `rlm_search` environment prompts RLM to write this file and provides web search/open-page skills. The rubric can fall back to the final assistant text if the answer file is empty, but agents should still write the file directly.
17
+
18
+ ## Scoring
19
+
20
+ `QuestRubric` loads the generated eval script for the example's `task_id` and calls its async `evaluate_answer(...)` entrypoint using the vendored minimal `obj_task_eval` runtime. The rollout reward is `summary["final_score"]`, clipped to `[0.0, 1.0]`.
21
+
22
+ Generated scripts may request URL-backed verification. PDF URLs are detected and parsed with the upstream QUEST PDF parser path before falling back to generic webpage retrieval.
23
+
24
+ This port intentionally preserves upstream QUEST behavior for URL-backed verification semantics. The upstream verifier generally treats invalid, irrelevant, or inaccessible cited webpages as unsupported claims, which can assign `0.0` to the affected verification node even when the immediate cause is source access such as a bot challenge, rate limit, timeout, or parser failure. Future work should consider a finer-grained source-access taxonomy so verifier infrastructure limitations can be distinguished from model-provided bad URLs or unsupported claims.
25
+
26
+ A reward of `0.0` with no `state["error"]` means the QUEST evaluator ran and judged the answer incorrect under the upstream-compatible scoring path. Infrastructure and evaluator failures outside normal QUEST source verification are represented with `vf.Error` subclasses instead of ad hoc success metrics.
27
+
28
+ ## Error Handling
29
+
30
+ QUEST uses Verifiers' framework-managed error field for non-answer failures when the failure comes from external runtime systems:
31
+
32
+ - Missing live sandbox or answer-file read failure: `vf.SandboxError`.
33
+ - Transient judge provider/network/rate-limit/server failures: retryable `vf.InfraError`.
34
+ - Empty or invalid judge responses: retryable `vf.InvalidModelResponseError` / `vf.EmptyModelResponseError`.
35
+ - Judge auth, model-not-found, content-filter, or invalid request failures: non-retryable `vf.ModelError`.
36
+ - QUEST eval-script download/cache resolution failure: `vf.InfraError`.
37
+
38
+ Wrong answers, empty answers, and inaccessible or irrelevant cited sources remain ordinary scored outcomes and return `0.0` without setting `state["error"]`. Generated eval-script source errors, missing task metadata, missing eval-script files, import/load failures, and unexpected evaluator runtime bugs are not converted to `vf.Error`; they raise normally so broken evaluator code fails hard.
39
+
40
+ ## Common Arguments
41
+
42
+ | Argument | Default | Description |
43
+ |---|---:|---|
44
+ | `dataset_name` | `osunlp/QUEST-RL-Data` | Hugging Face dataset name. |
45
+ | `split` | `train` | Dataset split. |
46
+ | `category` | `objective` | Initial implementation supports objective tasks only. |
47
+ | `answer_file` | `/task/answer.txt` | Final answer path in the sandbox. |
48
+ | `judge_model` | `openai/gpt-5.4-mini` | OpenAI-compatible model for QUEST verifier calls. |
49
+ | `judge_base_url` | `https://api.pinference.ai/api/v1` | Judge API base URL. |
50
+ | `judge_api_key_var` | `PRIME_API_KEY` | Env var containing the judge API key. |
51
+ | `quest_eval_scripts_dir` | HF cache | Optional local directory containing `eval_scripts/*.py`. |
52
+ | `quest_cache_dir` | `~/.cache/verifiers/quest` | Host cache for QUEST verifier state. |
@@ -0,0 +1,5 @@
1
+ """QUEST search taskset."""
2
+
3
+ from .taskset import QuestRubric, QuestTaskSet
4
+
5
+ __all__ = ["QuestRubric", "QuestTaskSet"]
@@ -0,0 +1,17 @@
1
+ """Vendored QUEST objective evaluation runtime."""
2
+
3
+ from .eval_toolkit import BinaryEvalResult, Extractor, Verifier, create_evaluator
4
+ from .evaluator import Evaluator
5
+ from .utils import CacheFileSys
6
+ from .verification_tree import AggregationStrategy, VerificationNode
7
+
8
+ __all__ = [
9
+ "AggregationStrategy",
10
+ "BinaryEvalResult",
11
+ "CacheFileSys",
12
+ "Evaluator",
13
+ "Extractor",
14
+ "Verifier",
15
+ "VerificationNode",
16
+ "create_evaluator",
17
+ ]
@@ -0,0 +1,5 @@
1
+ """Vendored QUEST API tool shims."""
2
+
3
+ from .tool_pdf import PDFParser
4
+
5
+ __all__ = ["PDFParser"]