verifiers 0.1.15.dev173__tar.gz → 0.1.15.dev176__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (352) hide show
  1. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/PKG-INFO +2 -2
  2. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/pyproject.toml +1 -1
  3. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/tasksets/search/README.md +4 -1
  4. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/tasksets/search/openseeker/README.md +16 -3
  5. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/tasksets/search/openseeker/taskset.py +1 -40
  6. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/tasksets/search/redsearcher/README.md +2 -2
  7. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/tasksets/search/redsearcher/taskset.py +70 -32
  8. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/v1/harness.py +3 -1
  9. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/v1/utils/endpoint_utils.py +6 -1
  10. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/.gitignore +0 -0
  11. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/LICENSE +0 -0
  12. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/README.md +0 -0
  13. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/AGENTS.md +0 -0
  14. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/README.md +0 -0
  15. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/__init__.py +0 -0
  16. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/conftest.py +0 -0
  17. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_browser_env.py +0 -0
  18. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_build_script.py +0 -0
  19. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_cli_agent_env.py +0 -0
  20. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_client_auth_errors.py +0 -0
  21. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_client_config.py +0 -0
  22. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_client_multimodal_types.py +0 -0
  23. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_composable_env.py +0 -0
  24. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_context_token_metrics.py +0 -0
  25. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_decorator_ranks.py +0 -0
  26. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_endpoint_registry.py +0 -0
  27. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_env_group.py +0 -0
  28. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_env_server.py +0 -0
  29. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_environment.py +0 -0
  30. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_environment_extra.py +0 -0
  31. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_envs.py +0 -0
  32. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_error_chain.py +0 -0
  33. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_eval_cli.py +0 -0
  34. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_eval_display.py +0 -0
  35. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_eval_utils.py +0 -0
  36. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_gepa_cli.py +0 -0
  37. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_gepa_utils.py +0 -0
  38. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_gym_env.py +0 -0
  39. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_harbor_env_mcp.py +0 -0
  40. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_imports.py +0 -0
  41. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_init_script.py +0 -0
  42. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_install_utils.py +0 -0
  43. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_interception_utils.py +0 -0
  44. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_langchain_deep_agents_wikispeedia.py +0 -0
  45. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_lean_task.py +0 -0
  46. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_logging.py +0 -0
  47. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_math_rubric.py +0 -0
  48. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_maybe_think_parser.py +0 -0
  49. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_mcp_search_env.py +0 -0
  50. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_message_utils.py +0 -0
  51. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_message_utils_multimodal.py +0 -0
  52. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_multiturn_env.py +0 -0
  53. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_nemorl_client.py +0 -0
  54. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_openai_chat_completions_token_client.py +0 -0
  55. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_openai_responses_client.py +0 -0
  56. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_opencode_harbor.py +0 -0
  57. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_opencode_rlm_env.py +0 -0
  58. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_openenv_client.py +0 -0
  59. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_parser.py +0 -0
  60. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_path_utils.py +0 -0
  61. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_per_turn_timing.py +0 -0
  62. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_pricing_utils.py +0 -0
  63. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_prime_plugin.py +0 -0
  64. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_renderer_client.py +0 -0
  65. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_renderer_e2e.py +0 -0
  66. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_rlm_composable_env.py +0 -0
  67. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_rubric.py +0 -0
  68. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_rubric_group.py +0 -0
  69. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_sandbox_env.py +0 -0
  70. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_sandbox_mixin.py +0 -0
  71. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_save_utils.py +0 -0
  72. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_setup_script.py +0 -0
  73. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_singleturn_env.py +0 -0
  74. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_stateful_tool_env.py +0 -0
  75. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_think_parser.py +0 -0
  76. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_tool_env.py +0 -0
  77. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_tool_utils.py +0 -0
  78. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_trajectory_processing.py +0 -0
  79. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_tui_info_formatting.py +0 -0
  80. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_types.py +0 -0
  81. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_v1_bfcl.py +0 -0
  82. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_v1_config_extension.py +0 -0
  83. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_v1_empty_completions.py +0 -0
  84. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_v1_endpoint_protocols.py +0 -0
  85. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_v1_example_counts.py +0 -0
  86. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_v1_group_reward_env.py +0 -0
  87. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_v1_harbor_cli.py +0 -0
  88. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_v1_mini_swe_agent.py +0 -0
  89. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_v1_nemo_gym_harness.py +0 -0
  90. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_v1_openenv_taskset.py +0 -0
  91. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_v1_openreward_taskset.py +0 -0
  92. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_v1_replay_harness.py +0 -0
  93. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_v1_rlm_swe.py +0 -0
  94. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_v1_runtime_lifecycle.py +0 -0
  95. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_v1_scoring_functions.py +0 -0
  96. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_v1_taskset_bindings.py +0 -0
  97. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_v1_taskset_utils.py +0 -0
  98. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_v1_textarena_taskset.py +0 -0
  99. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_wiki_search_v1.py +0 -0
  100. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_wordle_env.py +0 -0
  101. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_wordle_v1_env.py +0 -0
  102. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/tests/test_xml_parser.py +0 -0
  103. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/AGENTS.md +0 -0
  104. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/__init__.py +0 -0
  105. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/cli/__init__.py +0 -0
  106. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/cli/commands/__init__.py +0 -0
  107. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/cli/commands/build.py +0 -0
  108. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/cli/commands/eval.py +0 -0
  109. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/cli/commands/gepa.py +0 -0
  110. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/cli/commands/init.py +0 -0
  111. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/cli/commands/install.py +0 -0
  112. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/cli/commands/setup.py +0 -0
  113. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/cli/plugins/__init__.py +0 -0
  114. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/cli/plugins/prime.py +0 -0
  115. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/cli/tui.py +0 -0
  116. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/clients/__init__.py +0 -0
  117. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/clients/anthropic_messages_client.py +0 -0
  118. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/clients/client.py +0 -0
  119. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/clients/nemorl_chat_completions_client.py +0 -0
  120. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/clients/openai_chat_completions_client.py +0 -0
  121. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/clients/openai_chat_completions_token_client.py +0 -0
  122. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/clients/openai_completions_client.py +0 -0
  123. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/clients/openai_responses_client.py +0 -0
  124. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/clients/renderer_client.py +0 -0
  125. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/decorators.py +0 -0
  126. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/AGENTS.md +0 -0
  127. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/__init__.py +0 -0
  128. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/env_group.py +0 -0
  129. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/environment.py +0 -0
  130. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/README.md +0 -0
  131. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/__init__.py +0 -0
  132. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/cli_agent_env.py +0 -0
  133. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/README.md +0 -0
  134. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/__init__.py +0 -0
  135. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/_filter.py +0 -0
  136. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/composable_env.py +0 -0
  137. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/harness.py +0 -0
  138. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/harnesses/__init__.py +0 -0
  139. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/harnesses/mini_swe_agent.py +0 -0
  140. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/harnesses/opencode.py +0 -0
  141. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/harnesses/prompt.txt +0 -0
  142. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/harnesses/rlm.py +0 -0
  143. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/swe_debug_env.py +0 -0
  144. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/task.py +0 -0
  145. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/tasksets/__init__.py +0 -0
  146. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/tasksets/cp/__init__.py +0 -0
  147. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/tasksets/cp/cp_task.py +0 -0
  148. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/tasksets/cp/test_utils.py +0 -0
  149. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/tasksets/harbor/__init__.py +0 -0
  150. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/tasksets/harbor/harbor.py +0 -0
  151. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/tasksets/lean/__init__.py +0 -0
  152. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/tasksets/lean/lean_task.py +0 -0
  153. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/tasksets/math/__init__.py +0 -0
  154. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/tasksets/math/math_task.py +0 -0
  155. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/tasksets/search/__init__.py +0 -0
  156. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/tasksets/search/openseeker/__init__.py +0 -0
  157. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/tasksets/search/quest/README.md +0 -0
  158. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/tasksets/search/quest/__init__.py +0 -0
  159. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/tasksets/search/quest/obj_task_eval/__init__.py +0 -0
  160. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/tasksets/search/quest/obj_task_eval/api_tools/__init__.py +0 -0
  161. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/tasksets/search/quest/obj_task_eval/api_tools/tool_pdf.py +0 -0
  162. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/tasksets/search/quest/obj_task_eval/eval_toolkit.py +0 -0
  163. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/tasksets/search/quest/obj_task_eval/evaluator.py +0 -0
  164. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/tasksets/search/quest/obj_task_eval/llm_client/__init__.py +0 -0
  165. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/tasksets/search/quest/obj_task_eval/llm_client/base_client.py +0 -0
  166. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/tasksets/search/quest/obj_task_eval/prompts/__init__.py +0 -0
  167. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/tasksets/search/quest/obj_task_eval/prompts/cache_prompts.py +0 -0
  168. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/tasksets/search/quest/obj_task_eval/utils/__init__.py +0 -0
  169. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/tasksets/search/quest/obj_task_eval/utils/cache_filesys.py +0 -0
  170. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/tasksets/search/quest/obj_task_eval/utils/load_eval_script.py +0 -0
  171. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/tasksets/search/quest/obj_task_eval/utils/misc.py +0 -0
  172. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/tasksets/search/quest/obj_task_eval/utils/tool_visit.py +0 -0
  173. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/tasksets/search/quest/obj_task_eval/utils/url_tools.py +0 -0
  174. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/tasksets/search/quest/obj_task_eval/verification_tree.py +0 -0
  175. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/tasksets/search/quest/taskset.py +0 -0
  176. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/tasksets/search/redsearcher/__init__.py +0 -0
  177. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/tasksets/search/search_tasksets.py +0 -0
  178. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/tasksets/swe/README.md +0 -0
  179. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/tasksets/swe/__init__.py +0 -0
  180. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/tasksets/swe/multi_swe/__init__.py +0 -0
  181. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/tasksets/swe/multi_swe/extract_fix_patch.sh +0 -0
  182. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/tasksets/swe/multi_swe/taskset.py +0 -0
  183. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/tasksets/swe/openswe/__init__.py +0 -0
  184. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/tasksets/swe/openswe/taskset.py +0 -0
  185. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/tasksets/swe/r2e_gym/__init__.py +0 -0
  186. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/tasksets/swe/r2e_gym/log_parser.py +0 -0
  187. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/tasksets/swe/r2e_gym/taskset.py +0 -0
  188. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/tasksets/swe/scale_swe/__init__.py +0 -0
  189. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/tasksets/swe/scale_swe/taskset.py +0 -0
  190. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/tasksets/swe/shared/__init__.py +0 -0
  191. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/tasksets/swe/shared/test_patch.py +0 -0
  192. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/tasksets/swe/swe_bench/__init__.py +0 -0
  193. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/tasksets/swe/swe_bench/taskset.py +0 -0
  194. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/tasksets/swe/swe_lego/__init__.py +0 -0
  195. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/tasksets/swe/swe_lego/taskset.py +0 -0
  196. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/tasksets/swe/swe_rebench_v2/__init__.py +0 -0
  197. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/tasksets/swe/swe_rebench_v2/log_parsers.py +0 -0
  198. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/tasksets/swe/swe_rebench_v2/taskset.py +0 -0
  199. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/tasksets/swe/swe_smith/__init__.py +0 -0
  200. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/tasksets/swe/swe_smith/taskset.py +0 -0
  201. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/composable/tasksets/swe/swe_tasksets.py +0 -0
  202. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/gym_env.py +0 -0
  203. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/harbor_env/__init__.py +0 -0
  204. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/harbor_env/env.py +0 -0
  205. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/harbor_env/mcp.py +0 -0
  206. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/mcp_env.py +0 -0
  207. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/opencode_env.py +0 -0
  208. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/opencode_qa_env.py +0 -0
  209. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/opencode_rlm_env.py +0 -0
  210. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/sandbox_mixin.py +0 -0
  211. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/utils/__init__.py +0 -0
  212. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/utils/file_locks.py +0 -0
  213. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/experimental/utils/git_checkout_cache.py +0 -0
  214. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/integrations/README.md +0 -0
  215. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/integrations/__init__.py +0 -0
  216. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/integrations/browser_env/README.md +0 -0
  217. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/integrations/browser_env/__init__.py +0 -0
  218. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/integrations/browser_env/browser_env.py +0 -0
  219. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/integrations/browser_env/modes/__init__.py +0 -0
  220. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/integrations/browser_env/modes/base.py +0 -0
  221. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/integrations/browser_env/modes/cua_mode.py +0 -0
  222. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/integrations/browser_env/modes/dom_mode.py +0 -0
  223. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/integrations/openenv_env.py +0 -0
  224. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/integrations/reasoninggym_env.py +0 -0
  225. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/integrations/textarena_env.py +0 -0
  226. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/multiturn_env.py +0 -0
  227. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/python_env.py +0 -0
  228. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/sandbox_env.py +0 -0
  229. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/singleturn_env.py +0 -0
  230. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/stateful_tool_env.py +0 -0
  231. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/envs/tool_env.py +0 -0
  232. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/errors.py +0 -0
  233. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/gepa/__init__.py +0 -0
  234. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/gepa/adapter.py +0 -0
  235. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/gepa/config.py +0 -0
  236. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/gepa/display.py +0 -0
  237. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/gepa/gepa_utils.py +0 -0
  238. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/parsers/__init__.py +0 -0
  239. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/parsers/maybe_think_parser.py +0 -0
  240. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/parsers/parser.py +0 -0
  241. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/parsers/think_parser.py +0 -0
  242. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/parsers/xml_parser.py +0 -0
  243. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/rl/README.md +0 -0
  244. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/rl/__init__.py +0 -0
  245. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/rl/inference/__init__.py +0 -0
  246. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/rl/inference/client.py +0 -0
  247. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/rl/inference/server.py +0 -0
  248. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/rl/trainer/__init__.py +0 -0
  249. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/rl/trainer/config.py +0 -0
  250. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/rl/trainer/orchestrator.py +0 -0
  251. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/rl/trainer/trainer.py +0 -0
  252. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/rl/trainer/utils.py +0 -0
  253. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/rubrics/__init__.py +0 -0
  254. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/rubrics/experimental/hybrid_math_rubric.py +0 -0
  255. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/rubrics/judge_rubric.py +0 -0
  256. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/rubrics/math_rubric.py +0 -0
  257. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/rubrics/rubric.py +0 -0
  258. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/rubrics/rubric_group.py +0 -0
  259. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/scripts/__init__.py +0 -0
  260. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/scripts/build.py +0 -0
  261. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/scripts/eval.py +0 -0
  262. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/scripts/gepa.py +0 -0
  263. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/scripts/init.py +0 -0
  264. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/scripts/install.py +0 -0
  265. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/scripts/rl.py +0 -0
  266. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/scripts/setup.py +0 -0
  267. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/scripts/train.py +0 -0
  268. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/scripts/tui.py +0 -0
  269. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/scripts/vllm.py +0 -0
  270. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/serve/__init__.py +0 -0
  271. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/serve/client/env_client.py +0 -0
  272. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/serve/client/zmq_env_client.py +0 -0
  273. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/serve/server/__init__.py +0 -0
  274. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/serve/server/env_router.py +0 -0
  275. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/serve/server/env_server.py +0 -0
  276. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/serve/server/env_worker.py +0 -0
  277. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/serve/server/zmq_env_server.py +0 -0
  278. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/serve/types.py +0 -0
  279. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/types.py +0 -0
  280. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/utils/__init__.py +0 -0
  281. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/utils/async_utils.py +0 -0
  282. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/utils/client_utils.py +0 -0
  283. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/utils/config_utils.py +0 -0
  284. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/utils/data_utils.py +0 -0
  285. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/utils/display_utils.py +0 -0
  286. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/utils/env_config_utils.py +0 -0
  287. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/utils/env_utils.py +0 -0
  288. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/utils/error_utils.py +0 -0
  289. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/utils/eval_display.py +0 -0
  290. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/utils/eval_utils.py +0 -0
  291. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/utils/heartbeat.py +0 -0
  292. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/utils/import_utils.py +0 -0
  293. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/utils/install_utils.py +0 -0
  294. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/utils/interception_utils.py +0 -0
  295. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/utils/logging_utils.py +0 -0
  296. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/utils/message_utils.py +0 -0
  297. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/utils/metric_utils.py +0 -0
  298. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/utils/path_utils.py +0 -0
  299. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/utils/pricing_utils.py +0 -0
  300. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/utils/process_utils.py +0 -0
  301. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/utils/response_utils.py +0 -0
  302. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/utils/save_utils.py +0 -0
  303. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/utils/serve_utils.py +0 -0
  304. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/utils/thread_utils.py +0 -0
  305. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/utils/threaded_sandbox_client.py +0 -0
  306. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/utils/tool_utils.py +0 -0
  307. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/utils/usage_utils.py +0 -0
  308. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/utils/version_utils.py +0 -0
  309. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/v1/ENVIRONMENT_BEST_PRACTICES.md +0 -0
  310. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/v1/README.md +0 -0
  311. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/v1/RE_MIGRATION.md +0 -0
  312. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/v1/__init__.py +0 -0
  313. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/v1/artifact.py +0 -0
  314. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/v1/config.py +0 -0
  315. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/v1/env.py +0 -0
  316. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/v1/model.py +0 -0
  317. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/v1/program.py +0 -0
  318. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/v1/runtime.py +0 -0
  319. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/v1/runtime_handles.py +0 -0
  320. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/v1/sandbox.py +0 -0
  321. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/v1/state.py +0 -0
  322. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/v1/task.py +0 -0
  323. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/v1/taskset.py +0 -0
  324. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/v1/toolset.py +0 -0
  325. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/v1/types.py +0 -0
  326. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/v1/user.py +0 -0
  327. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/v1/utils/__init__.py +0 -0
  328. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/v1/utils/binding_utils.py +0 -0
  329. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/v1/utils/config_callable_utils.py +0 -0
  330. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/v1/utils/config_utils.py +0 -0
  331. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/v1/utils/json_utils.py +0 -0
  332. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/v1/utils/judge_utils.py +0 -0
  333. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/v1/utils/lifecycle_utils.py +0 -0
  334. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/v1/utils/logging_utils.py +0 -0
  335. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/v1/utils/mcp_proxy_utils.py +0 -0
  336. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/v1/utils/mcp_utils.py +0 -0
  337. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/v1/utils/object_utils.py +0 -0
  338. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/v1/utils/program_utils.py +0 -0
  339. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/v1/utils/prompt_utils.py +0 -0
  340. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/v1/utils/runtime_owner_utils.py +0 -0
  341. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/v1/utils/runtime_registry.py +0 -0
  342. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/v1/utils/sandbox_program_utils.py +0 -0
  343. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/v1/utils/sandbox_python_utils.py +0 -0
  344. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/v1/utils/sandbox_utils.py +0 -0
  345. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/v1/utils/scoring_utils.py +0 -0
  346. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/v1/utils/serialization_utils.py +0 -0
  347. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/v1/utils/task_freeze_utils.py +0 -0
  348. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/v1/utils/taskset_utils.py +0 -0
  349. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/v1/utils/tool_utils.py +0 -0
  350. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/v1/utils/toolset_utils.py +0 -0
  351. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/v1/utils/trajectory_utils.py +0 -0
  352. {verifiers-0.1.15.dev173 → verifiers-0.1.15.dev176}/verifiers/v1/utils/usage_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: verifiers
3
- Version: 0.1.15.dev173
3
+ Version: 0.1.15.dev176
4
4
  Summary: Verifiers: Environments for LLM Reinforcement Learning
5
5
  Project-URL: Homepage, https://github.com/primeintellect-ai/verifiers
6
6
  Project-URL: Documentation, https://github.com/primeintellect-ai/verifiers
@@ -40,7 +40,7 @@ Requires-Dist: openai>=1.108.1
40
40
  Requires-Dist: pillow
41
41
  Requires-Dist: prime-pydantic-config[toml]
42
42
  Requires-Dist: prime-sandboxes>=0.2.25
43
- Requires-Dist: prime-tunnel>=0.1.6
43
+ Requires-Dist: prime-tunnel>=0.1.8
44
44
  Requires-Dist: pydantic>=2.11.9
45
45
  Requires-Dist: pymupdf
46
46
  Requires-Dist: pyzmq>=27.1.0
@@ -37,7 +37,7 @@ dependencies = [
37
37
  "nest-asyncio>=1.6.0", # for jupyter notebooks
38
38
  "openai>=1.108.1",
39
39
  "openai-agents>=0.0.7",
40
- "prime-tunnel>=0.1.6",
40
+ "prime-tunnel>=0.1.8",
41
41
  "prime-sandboxes>=0.2.25",
42
42
  "pydantic>=2.11.9",
43
43
  "requests",
@@ -19,7 +19,10 @@ from verifiers.envs.experimental.composable.tasksets.search import make_search_t
19
19
 
20
20
  taskset = make_search_taskset(backend="openseeker")
21
21
  taskset = make_search_taskset(backend="quest", category="objective")
22
- redsearcher = make_search_taskset(backend="redsearcher", difficulty="easy")
22
+ redsearcher = make_search_taskset(
23
+ backend="redsearcher",
24
+ filter_fn="lambda x: x['info']['difficulty'] == 'easy'",
25
+ )
23
26
  ```
24
27
 
25
28
  `make_search_taskset()` dispatches by backend name. Unknown backends raise `ValueError` with the available backend list.
@@ -4,12 +4,27 @@ Composable search taskset for [`PolarSeeker/OpenSeeker-v1-Data`](https://hugging
4
4
 
5
5
  OpenSeeker v1 data contains synthesized deep-search QA pairs plus trajectories generated with `search` and `visit` tools. The public OpenSeeker evaluator scores only the final answer: it sends the question, gold answer, and model response to an LLM judge and expects `A` for correct or `B` for incorrect. This backend preserves that binary semantic answer-judge contract.
6
6
 
7
+ By default, the taskset uses the full dataset. Use the shared `filter_fn`
8
+ argument for row subsets such as source trajectory quality or tool-call count.
9
+ The `trajectory_correctness` metadata describes the stored OpenSeeker source
10
+ trajectory, not the validity of the question or gold answer.
11
+
7
12
  ## Usage
8
13
 
9
14
  ```python
10
15
  from verifiers.envs.experimental.composable.tasksets.search import make_search_taskset
11
16
 
12
17
  taskset = make_search_taskset(backend="openseeker")
18
+
19
+ correct_source_trajectories = make_search_taskset(
20
+ backend="openseeker",
21
+ filter_fn="lambda x: x['info']['trajectory_correctness'] == 'Correct'",
22
+ )
23
+
24
+ shorter_source_trajectories = make_search_taskset(
25
+ backend="openseeker",
26
+ filter_fn="lambda x: (x['info']['number_of_tool_calls'] or 0) <= 20",
27
+ )
13
28
  ```
14
29
 
15
30
  ## Arguments
@@ -18,9 +33,7 @@ taskset = make_search_taskset(backend="openseeker")
18
33
  |---|---:|---|
19
34
  | `dataset_name` | `PolarSeeker/OpenSeeker-v1-Data` | Hugging Face dataset name. |
20
35
  | `split` | `train` | Dataset split. |
21
- | `trajectory_correctness` | `Correct` | Keep rows with this trajectory label. Use `None` or `all` for all rows. |
22
- | `min_tool_calls` | `None` | Optional lower bound for `number of tool calls`. |
23
- | `max_tool_calls` | `None` | Optional upper bound for `number of tool calls`. |
36
+ | `filter_fn` | `None` | Optional composable taskset filter over normalized rows. |
24
37
  | `include_trajectory` | `False` | Include the large source trajectory in task metadata. |
25
38
  | `answer_file` | `/task/answer.txt` | Final answer path in the sandbox. |
26
39
  | `judge_model` | `openai/gpt-5.4-mini` | OpenAI-compatible model used for binary answer judging. |
@@ -40,7 +40,6 @@ logger = logging.getLogger(__name__)
40
40
 
41
41
  DEFAULT_DATASET_NAME = "PolarSeeker/OpenSeeker-v1-Data"
42
42
  DEFAULT_SPLIT = "train"
43
- DEFAULT_TRAJECTORY_CORRECTNESS = "Correct"
44
43
  DEFAULT_ANSWER_FILE = "/task/answer.txt"
45
44
  DEFAULT_WORKDIR = "/workspace"
46
45
  DEFAULT_JUDGE_BASE_URL = "https://api.pinference.ai/api/v1"
@@ -281,9 +280,6 @@ class OpenSeekerTaskSet(SandboxTaskSet):
281
280
  self,
282
281
  dataset_name: str = DEFAULT_DATASET_NAME,
283
282
  split: str = DEFAULT_SPLIT,
284
- trajectory_correctness: str | None = DEFAULT_TRAJECTORY_CORRECTNESS,
285
- min_tool_calls: int | None = None,
286
- max_tool_calls: int | None = None,
287
283
  include_trajectory: bool = False,
288
284
  filter_fn: str | None = None,
289
285
  ds_keep_in_memory: bool | None = False,
@@ -299,27 +295,8 @@ class OpenSeekerTaskSet(SandboxTaskSet):
299
295
  judge_api_key_var: str = DEFAULT_JUDGE_API_KEY_VAR,
300
296
  judge_sampling_args: dict[str, Any] | None = None,
301
297
  ) -> None:
302
- if trajectory_correctness == "all":
303
- trajectory_correctness = None
304
- if trajectory_correctness not in {"Correct", "Incorrect", None}:
305
- raise ValueError(
306
- "trajectory_correctness must be 'Correct', 'Incorrect', 'all', or None"
307
- )
308
- if min_tool_calls is not None and min_tool_calls < 0:
309
- raise ValueError("min_tool_calls must be non-negative")
310
- if max_tool_calls is not None and max_tool_calls < 0:
311
- raise ValueError("max_tool_calls must be non-negative")
312
- if (
313
- min_tool_calls is not None
314
- and max_tool_calls is not None
315
- and min_tool_calls > max_tool_calls
316
- ):
317
- raise ValueError("min_tool_calls cannot exceed max_tool_calls")
318
298
  self.dataset_name = dataset_name
319
299
  self.split = split
320
- self.trajectory_correctness = trajectory_correctness
321
- self.min_tool_calls = min_tool_calls
322
- self.max_tool_calls = max_tool_calls
323
300
  self.include_trajectory = include_trajectory
324
301
  self.ds_keep_in_memory = ds_keep_in_memory
325
302
  self.ds_num_proc = ds_num_proc
@@ -335,12 +312,9 @@ class OpenSeekerTaskSet(SandboxTaskSet):
335
312
  self._judge_base_url = judge_base_url
336
313
  self._judge_api_key_var = judge_api_key_var
337
314
  self._judge_sampling_args = dict(judge_sampling_args or {})
338
- name_parts = ["search", "openseeker"]
339
- if trajectory_correctness is not None:
340
- name_parts.append(trajectory_correctness.lower())
341
315
  super().__init__(
342
316
  dataset=self._build_dataset,
343
- name="/".join(name_parts),
317
+ name="search/openseeker",
344
318
  filter_fn=filter_fn,
345
319
  )
346
320
 
@@ -364,22 +338,9 @@ class OpenSeekerTaskSet(SandboxTaskSet):
364
338
  rows: list[dict[str, Any]] = []
365
339
  for row_index, row in enumerate(raw):
366
340
  correctness = row.get("trajectory correctness")
367
- if (
368
- self.trajectory_correctness is not None
369
- and correctness != self.trajectory_correctness
370
- ):
371
- continue
372
341
  tool_calls = row.get("number of tool calls")
373
342
  if not isinstance(tool_calls, int):
374
343
  tool_calls = None
375
- if self.min_tool_calls is not None and (
376
- tool_calls is None or tool_calls < self.min_tool_calls
377
- ):
378
- continue
379
- if self.max_tool_calls is not None and (
380
- tool_calls is None or tool_calls > self.max_tool_calls
381
- ):
382
- continue
383
344
  question = str(row.get("question") or "").strip()
384
345
  answer = str(row.get("answer") or "").strip()
385
346
  if not question or not answer:
@@ -19,7 +19,7 @@ The paired `rlm_search` environment prompts RLM to write this file and provides
19
19
 
20
20
  ## Scoring
21
21
 
22
- `RedSearcherRubric` compares the final response against the released `answer` label. It first applies a strict normalized exact-answer shortcut for unambiguous matches. Otherwise it uses an OpenAI-compatible LLM-as-judge prompt that follows the answer-matching convention in REDSearcher's DeepTraceHub evaluation code: judge whether the predicted final answer is equivalent to the ground truth and return binary accuracy.
22
+ `RedSearcherRubric` compares the final response against the released `answer` label. It first applies a strict normalized exact-answer shortcut for unambiguous matches. Otherwise it uses an OpenAI-compatible LLM-as-judge prompt that matches REDSearcher's released DeepTraceHub BROWSECOMP evaluator prompt and returns binary accuracy.
23
23
 
24
24
  A reward of `1.0` means the final response matched the ground-truth answer; `0.0` means it did not, or no final answer was produced. Judge provider failures are preserved as `vf.Error` values on `state["error"]`.
25
25
 
@@ -29,7 +29,7 @@ A reward of `1.0` means the final response matched the ground-truth answer; `0.0
29
29
  |---|---:|---|
30
30
  | `dataset_name` | `Zchu/REDSearcher_RL_1K` | Hugging Face dataset name. |
31
31
  | `split` | `train` | Dataset split. |
32
- | `difficulty` | `None` | Optional difficulty filter: `easy`, `medium`, `hard`, or `all`. |
32
+ | `filter_fn` | `None` | Optional composable taskset filter over normalized rows, for example `lambda x: x['info']['difficulty'] == 'easy'`. |
33
33
  | `answer_file` | `/task/answer.txt` | Final answer path in the sandbox. |
34
34
  | `judge_model` | `openai/gpt-5.4-mini` | OpenAI-compatible model for answer-match judging. |
35
35
  | `judge_base_url` | `https://api.pinference.ai/api/v1` | Judge API base URL. |
@@ -47,28 +47,75 @@ DEFAULT_JUDGE_API_KEY_VAR = "PRIME_API_KEY"
47
47
  DEFAULT_JUDGE_MODEL = "openai/gpt-5.4-mini"
48
48
  DEFAULT_SANDBOX_IMAGE = "python:3.11-slim"
49
49
 
50
+ # Matches DeepTraceHub's released BROWSECOMP judge prompt, the closest public
51
+ # reference for REDSearcher's RL reward while the RL trainer remains unreleased.
50
52
  _JUDGE_PROMPT = """\
51
- You are grading a deep-search question answering response.
52
-
53
- Decide whether the predicted response gives the same final answer as the
54
- ground-truth answer. Ignore citations, formatting, capitalization, and extra
55
- explanation unless they contradict the final answer. For numeric answers,
56
- allow insignificant formatting differences but not a different value. If the
57
- response gives multiple incompatible answers, is evasive, or merely repeats
58
- the question, mark it incorrect.
59
-
60
- Question:
61
- {question}
62
-
63
- Ground-truth answer:
64
- {answer}
65
-
66
- Predicted response:
67
- {response}
68
-
69
- Return only one letter:
70
- A. CORRECT
71
- B. INCORRECT
53
+ Based on the given question, standard answer, and model-predicted answer, evaluate whether the model's response is correct. Your task is to classify the result as: [CORRECT] or [INCORRECT].
54
+
55
+ First, we'll list examples for each category, then you'll evaluate a new question's predicted answer.
56
+ Here are examples of [CORRECT] responses:
57
+ ```
58
+ Question: What are the names of Barack Obama's children?
59
+ Standard Answer: Malia Obama and Sasha Obama
60
+ Model Prediction 1: Malia Obama and Sasha Obama
61
+ Model Prediction 2: Malia and Sasha
62
+ Model Prediction 3: Most would say Malia and Sasha, but I'm not sure, I should verify
63
+ Model Prediction 4: Barack Obama has two daughters, Malia Ann and Natasha Marian, commonly known as Malia Obama and Sasha Obama.
64
+ ```
65
+ These responses are all [CORRECT] because they:
66
+ - Fully include the important information from the standard answer.
67
+ - Don't contain any information that contradicts the standard answer.
68
+ - Focus only on semantic content; language, capitalization, punctuation, grammar, and order aren't important.
69
+ - Vague statements or guesses are acceptable as long as they include the standard answer and don't contain incorrect information or contradictions.
70
+
71
+ Here are examples of [INCORRECT] responses:
72
+ ```
73
+ Question: What are the names of Barack Obama's children?
74
+ Standard Answer: Malia Obama and Sasha Obama
75
+ Model Prediction 1: Malia
76
+ Model Prediction 2: Malia, Sasha and Susan or Sasha Obama or Malia Obama, or Natasha Marian, or Einstein
77
+ Model Prediction 3: While I don't know their exact names, I can tell you Barack Obama has two children.
78
+ Model Prediction 4: You might be thinking of Betsy and Olivia. But you should verify the details with the latest references. Is that the correct answer?
79
+ Model Prediction 5: Barack Obama's children
80
+ ```
81
+ These responses are all [INCORRECT] because they:
82
+ - Contain factual statements that contradict the standard answer.
83
+ - Are empty or merely repeat the question.
84
+ - Enumerate multiple answers or repeat the answer.
85
+
86
+ Pay special attention to the following:
87
+ - The standard answer may contain responses to multiple aspects of the question, and within the same aspect, there might be different descriptions, all of which are correct and are given in the same bracket, connected by commas. For example, for the question "What is the name of ByteDance's AI model?", the standard answer is "[[Doubao, Skylark]]":
88
+ - Predicted answers "Doubao", "Doubao, Skylark", "Skylark", etc. are all [CORRECT].
89
+ - For standard answers containing responses to different aspects, the model needs to provide answers to all aspects to be considered correct; otherwise, it's directly judged as [INCORRECT]. There is no [PARTIALLY CORRECT] output option. These answers will be given in different brackets. For example, for the question "Who are the members of TFBOYS?", the standard answer is "[[Wang Junkai][Wang Yuan][Yi Yangqianxi]]":
90
+ - Predicted answers like "Wang Junkai, Wang Yuan, Yi Yangqianxi" that include all answers are [CORRECT].
91
+ - Predicted answers like "Wang Junkai, Yi Yangqianxi" that don't include all answers are [INCORRECT].
92
+
93
+ Also note the following points:
94
+ - For questions with numerical standard answers, the predicted answer should match the standard answer. For example, for the question "What is the total length in meters of the Huangpu River Bridge on the Jinshan Railway?", the standard answer is "3518.17":
95
+ - Predicted answers "3518", "3518.1", "3518.17" are all [CORRECT].
96
+ - Predicted answers "3520" and "3600" are [INCORRECT].
97
+ - If the model prediction doesn't directly answer the question, attempts to circumvent or fails to directly provide the standard answer, it's considered an [INCORRECT] answer.
98
+ - For example, for the question "Who is JJ Lin's wife?", with the standard answer "Ding Wenqi", model predictions like "JJ Lin's wife", "JJ Lin's wife should be excellent", "JJ Lin's wife might be a public figure" are all [INCORRECT].
99
+ - If the standard answer contains more information than the question asks for, the predicted answer only needs to include the information mentioned in the question.
100
+ - For example, for the question "What is the main chemical component of magnesite?", with the standard answer "Magnesium carbonate (MgCO3)", "Magnesium carbonate" or "MgCO3" are both considered [CORRECT] answers.
101
+ - If information omitted in the predicted answer can be clearly inferred from the question, it's considered correct.
102
+ - For example, for the question "The Nuragic ruins of Barumini were listed as a World Cultural Heritage by UNESCO in 1997, so where is this site located?", with the standard answer "Sardinia, Italy", the predicted answer "Sardinia" is considered [CORRECT].
103
+ - If it's clear that different translations of a name refer to the same person, it's considered correct.
104
+ - For example, if the standard answer is "Robinson", answers like "Lubinson" or "Lubinsun" are both correct.
105
+ - You should focus more on the match between the standard answer and the model prediction, rather than whether the standard answer itself is correct.
106
+
107
+ Below is a new question example. Please reply with only [CORRECT] or [INCORRECT], without apologies or corrections to your own errors, just evaluate the answer.
108
+ ```
109
+ Question: {question}
110
+ Standard Answer: {correct_answer}
111
+ Predicted Answer: {response}
112
+ ```
113
+
114
+ Evaluate this new question's predicted answer as one of the following:
115
+ A. [CORRECT]
116
+ B. [INCORRECT]
117
+
118
+ Return only the option representing [CORRECT] or [INCORRECT], i.e. just return A or B, without adding any other text.
72
119
  """
73
120
 
74
121
  _CONTEXT_LENGTH_ERROR_PHRASES = (
@@ -225,7 +272,6 @@ class RedSearcherTaskSet(SandboxTaskSet):
225
272
  self,
226
273
  dataset_name: str = DEFAULT_DATASET_NAME,
227
274
  split: str = DEFAULT_SPLIT,
228
- difficulty: str | None = None,
229
275
  filter_fn: str | None = None,
230
276
  ds_keep_in_memory: bool | None = True,
231
277
  ds_num_proc: int | None = None,
@@ -242,13 +288,8 @@ class RedSearcherTaskSet(SandboxTaskSet):
242
288
  judge_max_retries: int = 5,
243
289
  use_exact_match_shortcut: bool = True,
244
290
  ) -> None:
245
- if difficulty not in {None, "all", "easy", "medium", "hard"}:
246
- raise ValueError(
247
- "difficulty must be one of None, 'all', 'easy', 'medium', or 'hard'"
248
- )
249
291
  self.dataset_name = dataset_name
250
292
  self.split = split
251
- self.difficulty = difficulty
252
293
  self.ds_keep_in_memory = ds_keep_in_memory
253
294
  self.ds_num_proc = ds_num_proc
254
295
  self.answer_file = answer_file
@@ -265,10 +306,9 @@ class RedSearcherTaskSet(SandboxTaskSet):
265
306
  self._judge_sampling_args = dict(judge_sampling_args or {})
266
307
  self._judge_max_retries = judge_max_retries
267
308
  self._use_exact_match_shortcut = use_exact_match_shortcut
268
- label = difficulty or "all"
269
309
  super().__init__(
270
310
  dataset=self._build_dataset,
271
- name=f"search/redsearcher/{label}",
311
+ name="search/redsearcher",
272
312
  filter_fn=filter_fn,
273
313
  )
274
314
 
@@ -282,8 +322,6 @@ class RedSearcherTaskSet(SandboxTaskSet):
282
322
  rows: list[dict[str, Any]] = []
283
323
  for idx, row in enumerate(raw):
284
324
  difficulty = str(row.get("difficulty") or "")
285
- if self.difficulty not in {None, "all"} and difficulty != self.difficulty:
286
- continue
287
325
  question = str(row.get("problem") or "").strip()
288
326
  answer = str(row.get("answer") or "").strip()
289
327
  if not question or not answer:
@@ -475,7 +513,7 @@ class RedSearcherRubric(vf.Rubric):
475
513
  prompt = _JUDGE_PROMPT.format(
476
514
  question=question,
477
515
  response=response,
478
- answer=answer,
516
+ correct_answer=answer,
479
517
  )
480
518
  client = self._get_client()
481
519
  request_kwargs = dict(self.judge_sampling_args)
@@ -236,8 +236,10 @@ class Harness(RuntimeOwnerMixin[ConfigT], Generic[ConfigT]):
236
236
  return config
237
237
 
238
238
  def load_endpoint(self) -> Endpoint:
239
+ sandbox_config = self.program_sandbox_config(self.program_config)
239
240
  return Endpoint(
240
- use_tunnel=self.program_sandbox_config(self.program_config) is not None
241
+ use_tunnel=sandbox_config is not None,
242
+ tunnel_labels=sandbox_config.labels if sandbox_config is not None else None,
241
243
  )
242
244
 
243
245
  def rebuild_runtime(self) -> None:
@@ -146,6 +146,7 @@ class Endpoint:
146
146
  secret: str | None = None,
147
147
  use_tunnel: bool = False,
148
148
  logger: logging.Logger | None = None,
149
+ tunnel_labels: list[str] | None = None,
149
150
  ):
150
151
  self.use_tunnel = use_tunnel
151
152
  self.logger = logger or logging.getLogger(__name__)
@@ -154,6 +155,7 @@ class Endpoint:
154
155
  secret=secret or os.environ.get("ENDPOINT_SECRET"),
155
156
  )
156
157
  self.secret = self.server.secret
158
+ self.tunnel_labels = list(tunnel_labels) if tunnel_labels else []
157
159
  self._tunnel: TunnelHandle | None = None
158
160
  self._tunnel_lock = asyncio.Lock()
159
161
  self._tunnel_last_checked = 0.0
@@ -295,7 +297,10 @@ class Endpoint:
295
297
  self._tunnel = None
296
298
 
297
299
  if self._tunnel is None:
298
- tunnel = cast(TunnelHandle, Tunnel(local_port=self.server.port))
300
+ tunnel = cast(
301
+ TunnelHandle,
302
+ Tunnel(local_port=self.server.port, labels=self.tunnel_labels),
303
+ )
299
304
  url = await tunnel.start()
300
305
  self._tunnel = tunnel
301
306
  self._tunnel_last_checked = time.time()