verifiers 0.1.15.dev170__tar.gz → 0.1.15.dev172__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (353) hide show
  1. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/PKG-INFO +1 -1
  2. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_environment_extra.py +24 -1
  3. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_path_utils.py +2 -1
  4. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_save_utils.py +42 -27
  5. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/environment.py +36 -8
  6. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/tasksets/search/README.md +3 -1
  7. verifiers-0.1.15.dev172/verifiers/envs/experimental/composable/tasksets/search/__init__.py +15 -0
  8. verifiers-0.1.15.dev172/verifiers/envs/experimental/composable/tasksets/search/redsearcher/README.md +38 -0
  9. verifiers-0.1.15.dev172/verifiers/envs/experimental/composable/tasksets/search/redsearcher/__init__.py +5 -0
  10. verifiers-0.1.15.dev172/verifiers/envs/experimental/composable/tasksets/search/redsearcher/taskset.py +556 -0
  11. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/tasksets/search/search_tasksets.py +10 -0
  12. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/utils/path_utils.py +16 -10
  13. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/utils/save_utils.py +74 -93
  14. verifiers-0.1.15.dev170/verifiers/envs/experimental/composable/tasksets/search/__init__.py +0 -9
  15. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/.gitignore +0 -0
  16. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/LICENSE +0 -0
  17. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/README.md +0 -0
  18. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/pyproject.toml +0 -0
  19. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/AGENTS.md +0 -0
  20. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/README.md +0 -0
  21. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/__init__.py +0 -0
  22. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/conftest.py +0 -0
  23. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_browser_env.py +0 -0
  24. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_build_script.py +0 -0
  25. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_cli_agent_env.py +0 -0
  26. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_client_auth_errors.py +0 -0
  27. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_client_config.py +0 -0
  28. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_client_multimodal_types.py +0 -0
  29. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_composable_env.py +0 -0
  30. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_context_token_metrics.py +0 -0
  31. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_decorator_ranks.py +0 -0
  32. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_endpoint_registry.py +0 -0
  33. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_env_group.py +0 -0
  34. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_env_server.py +0 -0
  35. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_environment.py +0 -0
  36. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_envs.py +0 -0
  37. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_error_chain.py +0 -0
  38. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_eval_cli.py +0 -0
  39. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_eval_display.py +0 -0
  40. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_eval_utils.py +0 -0
  41. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_gepa_cli.py +0 -0
  42. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_gepa_utils.py +0 -0
  43. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_gym_env.py +0 -0
  44. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_harbor_env_mcp.py +0 -0
  45. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_imports.py +0 -0
  46. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_init_script.py +0 -0
  47. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_install_utils.py +0 -0
  48. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_interception_utils.py +0 -0
  49. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_langchain_deep_agents_wikispeedia.py +0 -0
  50. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_lean_task.py +0 -0
  51. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_logging.py +0 -0
  52. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_math_rubric.py +0 -0
  53. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_maybe_think_parser.py +0 -0
  54. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_mcp_search_env.py +0 -0
  55. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_message_utils.py +0 -0
  56. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_message_utils_multimodal.py +0 -0
  57. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_multiturn_env.py +0 -0
  58. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_nemorl_client.py +0 -0
  59. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_openai_chat_completions_token_client.py +0 -0
  60. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_openai_responses_client.py +0 -0
  61. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_opencode_harbor.py +0 -0
  62. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_opencode_rlm_env.py +0 -0
  63. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_openenv_client.py +0 -0
  64. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_parser.py +0 -0
  65. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_per_turn_timing.py +0 -0
  66. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_pricing_utils.py +0 -0
  67. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_prime_plugin.py +0 -0
  68. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_renderer_client.py +0 -0
  69. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_renderer_e2e.py +0 -0
  70. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_rlm_composable_env.py +0 -0
  71. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_rubric.py +0 -0
  72. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_rubric_group.py +0 -0
  73. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_sandbox_env.py +0 -0
  74. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_sandbox_mixin.py +0 -0
  75. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_setup_script.py +0 -0
  76. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_singleturn_env.py +0 -0
  77. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_stateful_tool_env.py +0 -0
  78. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_think_parser.py +0 -0
  79. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_tool_env.py +0 -0
  80. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_tool_utils.py +0 -0
  81. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_trajectory_processing.py +0 -0
  82. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_tui_info_formatting.py +0 -0
  83. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_types.py +0 -0
  84. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_v1_bfcl.py +0 -0
  85. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_v1_config_extension.py +0 -0
  86. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_v1_empty_completions.py +0 -0
  87. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_v1_endpoint_protocols.py +0 -0
  88. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_v1_example_counts.py +0 -0
  89. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_v1_group_reward_env.py +0 -0
  90. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_v1_harbor_cli.py +0 -0
  91. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_v1_mini_swe_agent.py +0 -0
  92. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_v1_nemo_gym_harness.py +0 -0
  93. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_v1_openenv_taskset.py +0 -0
  94. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_v1_openreward_taskset.py +0 -0
  95. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_v1_replay_harness.py +0 -0
  96. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_v1_rlm_swe.py +0 -0
  97. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_v1_runtime_lifecycle.py +0 -0
  98. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_v1_scoring_functions.py +0 -0
  99. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_v1_taskset_bindings.py +0 -0
  100. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_v1_taskset_utils.py +0 -0
  101. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_v1_textarena_taskset.py +0 -0
  102. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_wiki_search_v1.py +0 -0
  103. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_wordle_env.py +0 -0
  104. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_wordle_v1_env.py +0 -0
  105. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/tests/test_xml_parser.py +0 -0
  106. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/AGENTS.md +0 -0
  107. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/__init__.py +0 -0
  108. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/cli/__init__.py +0 -0
  109. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/cli/commands/__init__.py +0 -0
  110. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/cli/commands/build.py +0 -0
  111. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/cli/commands/eval.py +0 -0
  112. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/cli/commands/gepa.py +0 -0
  113. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/cli/commands/init.py +0 -0
  114. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/cli/commands/install.py +0 -0
  115. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/cli/commands/setup.py +0 -0
  116. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/cli/plugins/__init__.py +0 -0
  117. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/cli/plugins/prime.py +0 -0
  118. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/cli/tui.py +0 -0
  119. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/clients/__init__.py +0 -0
  120. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/clients/anthropic_messages_client.py +0 -0
  121. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/clients/client.py +0 -0
  122. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/clients/nemorl_chat_completions_client.py +0 -0
  123. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/clients/openai_chat_completions_client.py +0 -0
  124. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/clients/openai_chat_completions_token_client.py +0 -0
  125. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/clients/openai_completions_client.py +0 -0
  126. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/clients/openai_responses_client.py +0 -0
  127. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/clients/renderer_client.py +0 -0
  128. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/decorators.py +0 -0
  129. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/AGENTS.md +0 -0
  130. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/__init__.py +0 -0
  131. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/env_group.py +0 -0
  132. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/README.md +0 -0
  133. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/__init__.py +0 -0
  134. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/cli_agent_env.py +0 -0
  135. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/README.md +0 -0
  136. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/__init__.py +0 -0
  137. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/_filter.py +0 -0
  138. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/composable_env.py +0 -0
  139. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/harness.py +0 -0
  140. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/harnesses/__init__.py +0 -0
  141. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/harnesses/mini_swe_agent.py +0 -0
  142. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/harnesses/opencode.py +0 -0
  143. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/harnesses/prompt.txt +0 -0
  144. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/harnesses/rlm.py +0 -0
  145. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/swe_debug_env.py +0 -0
  146. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/task.py +0 -0
  147. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/tasksets/__init__.py +0 -0
  148. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/tasksets/cp/__init__.py +0 -0
  149. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/tasksets/cp/cp_task.py +0 -0
  150. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/tasksets/cp/test_utils.py +0 -0
  151. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/tasksets/harbor/__init__.py +0 -0
  152. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/tasksets/harbor/harbor.py +0 -0
  153. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/tasksets/lean/__init__.py +0 -0
  154. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/tasksets/lean/lean_task.py +0 -0
  155. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/tasksets/math/__init__.py +0 -0
  156. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/tasksets/math/math_task.py +0 -0
  157. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/tasksets/search/openseeker/README.md +0 -0
  158. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/tasksets/search/openseeker/__init__.py +0 -0
  159. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/tasksets/search/openseeker/taskset.py +0 -0
  160. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/tasksets/search/quest/README.md +0 -0
  161. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/tasksets/search/quest/__init__.py +0 -0
  162. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/tasksets/search/quest/obj_task_eval/__init__.py +0 -0
  163. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/tasksets/search/quest/obj_task_eval/api_tools/__init__.py +0 -0
  164. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/tasksets/search/quest/obj_task_eval/api_tools/tool_pdf.py +0 -0
  165. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/tasksets/search/quest/obj_task_eval/eval_toolkit.py +0 -0
  166. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/tasksets/search/quest/obj_task_eval/evaluator.py +0 -0
  167. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/tasksets/search/quest/obj_task_eval/llm_client/__init__.py +0 -0
  168. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/tasksets/search/quest/obj_task_eval/llm_client/base_client.py +0 -0
  169. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/tasksets/search/quest/obj_task_eval/prompts/__init__.py +0 -0
  170. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/tasksets/search/quest/obj_task_eval/prompts/cache_prompts.py +0 -0
  171. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/tasksets/search/quest/obj_task_eval/utils/__init__.py +0 -0
  172. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/tasksets/search/quest/obj_task_eval/utils/cache_filesys.py +0 -0
  173. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/tasksets/search/quest/obj_task_eval/utils/load_eval_script.py +0 -0
  174. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/tasksets/search/quest/obj_task_eval/utils/misc.py +0 -0
  175. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/tasksets/search/quest/obj_task_eval/utils/tool_visit.py +0 -0
  176. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/tasksets/search/quest/obj_task_eval/utils/url_tools.py +0 -0
  177. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/tasksets/search/quest/obj_task_eval/verification_tree.py +0 -0
  178. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/tasksets/search/quest/taskset.py +0 -0
  179. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/tasksets/swe/README.md +0 -0
  180. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/tasksets/swe/__init__.py +0 -0
  181. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/tasksets/swe/multi_swe/__init__.py +0 -0
  182. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/tasksets/swe/multi_swe/extract_fix_patch.sh +0 -0
  183. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/tasksets/swe/multi_swe/taskset.py +0 -0
  184. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/tasksets/swe/openswe/__init__.py +0 -0
  185. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/tasksets/swe/openswe/taskset.py +0 -0
  186. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/tasksets/swe/r2e_gym/__init__.py +0 -0
  187. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/tasksets/swe/r2e_gym/log_parser.py +0 -0
  188. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/tasksets/swe/r2e_gym/taskset.py +0 -0
  189. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/tasksets/swe/scale_swe/__init__.py +0 -0
  190. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/tasksets/swe/scale_swe/taskset.py +0 -0
  191. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/tasksets/swe/shared/__init__.py +0 -0
  192. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/tasksets/swe/shared/test_patch.py +0 -0
  193. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/tasksets/swe/swe_bench/__init__.py +0 -0
  194. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/tasksets/swe/swe_bench/taskset.py +0 -0
  195. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/tasksets/swe/swe_lego/__init__.py +0 -0
  196. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/tasksets/swe/swe_lego/taskset.py +0 -0
  197. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/tasksets/swe/swe_rebench_v2/__init__.py +0 -0
  198. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/tasksets/swe/swe_rebench_v2/log_parsers.py +0 -0
  199. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/tasksets/swe/swe_rebench_v2/taskset.py +0 -0
  200. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/tasksets/swe/swe_smith/__init__.py +0 -0
  201. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/tasksets/swe/swe_smith/taskset.py +0 -0
  202. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/composable/tasksets/swe/swe_tasksets.py +0 -0
  203. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/gym_env.py +0 -0
  204. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/harbor_env/__init__.py +0 -0
  205. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/harbor_env/env.py +0 -0
  206. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/harbor_env/mcp.py +0 -0
  207. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/mcp_env.py +0 -0
  208. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/opencode_env.py +0 -0
  209. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/opencode_qa_env.py +0 -0
  210. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/opencode_rlm_env.py +0 -0
  211. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/sandbox_mixin.py +0 -0
  212. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/utils/__init__.py +0 -0
  213. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/utils/file_locks.py +0 -0
  214. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/experimental/utils/git_checkout_cache.py +0 -0
  215. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/integrations/README.md +0 -0
  216. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/integrations/__init__.py +0 -0
  217. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/integrations/browser_env/README.md +0 -0
  218. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/integrations/browser_env/__init__.py +0 -0
  219. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/integrations/browser_env/browser_env.py +0 -0
  220. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/integrations/browser_env/modes/__init__.py +0 -0
  221. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/integrations/browser_env/modes/base.py +0 -0
  222. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/integrations/browser_env/modes/cua_mode.py +0 -0
  223. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/integrations/browser_env/modes/dom_mode.py +0 -0
  224. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/integrations/openenv_env.py +0 -0
  225. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/integrations/reasoninggym_env.py +0 -0
  226. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/integrations/textarena_env.py +0 -0
  227. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/multiturn_env.py +0 -0
  228. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/python_env.py +0 -0
  229. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/sandbox_env.py +0 -0
  230. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/singleturn_env.py +0 -0
  231. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/stateful_tool_env.py +0 -0
  232. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/envs/tool_env.py +0 -0
  233. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/errors.py +0 -0
  234. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/gepa/__init__.py +0 -0
  235. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/gepa/adapter.py +0 -0
  236. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/gepa/config.py +0 -0
  237. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/gepa/display.py +0 -0
  238. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/gepa/gepa_utils.py +0 -0
  239. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/parsers/__init__.py +0 -0
  240. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/parsers/maybe_think_parser.py +0 -0
  241. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/parsers/parser.py +0 -0
  242. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/parsers/think_parser.py +0 -0
  243. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/parsers/xml_parser.py +0 -0
  244. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/rl/README.md +0 -0
  245. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/rl/__init__.py +0 -0
  246. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/rl/inference/__init__.py +0 -0
  247. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/rl/inference/client.py +0 -0
  248. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/rl/inference/server.py +0 -0
  249. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/rl/trainer/__init__.py +0 -0
  250. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/rl/trainer/config.py +0 -0
  251. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/rl/trainer/orchestrator.py +0 -0
  252. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/rl/trainer/trainer.py +0 -0
  253. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/rl/trainer/utils.py +0 -0
  254. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/rubrics/__init__.py +0 -0
  255. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/rubrics/experimental/hybrid_math_rubric.py +0 -0
  256. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/rubrics/judge_rubric.py +0 -0
  257. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/rubrics/math_rubric.py +0 -0
  258. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/rubrics/rubric.py +0 -0
  259. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/rubrics/rubric_group.py +0 -0
  260. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/scripts/__init__.py +0 -0
  261. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/scripts/build.py +0 -0
  262. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/scripts/eval.py +0 -0
  263. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/scripts/gepa.py +0 -0
  264. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/scripts/init.py +0 -0
  265. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/scripts/install.py +0 -0
  266. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/scripts/rl.py +0 -0
  267. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/scripts/setup.py +0 -0
  268. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/scripts/train.py +0 -0
  269. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/scripts/tui.py +0 -0
  270. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/scripts/vllm.py +0 -0
  271. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/serve/__init__.py +0 -0
  272. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/serve/client/env_client.py +0 -0
  273. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/serve/client/zmq_env_client.py +0 -0
  274. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/serve/server/__init__.py +0 -0
  275. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/serve/server/env_router.py +0 -0
  276. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/serve/server/env_server.py +0 -0
  277. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/serve/server/env_worker.py +0 -0
  278. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/serve/server/zmq_env_server.py +0 -0
  279. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/serve/types.py +0 -0
  280. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/types.py +0 -0
  281. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/utils/__init__.py +0 -0
  282. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/utils/async_utils.py +0 -0
  283. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/utils/client_utils.py +0 -0
  284. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/utils/config_utils.py +0 -0
  285. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/utils/data_utils.py +0 -0
  286. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/utils/display_utils.py +0 -0
  287. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/utils/env_config_utils.py +0 -0
  288. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/utils/env_utils.py +0 -0
  289. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/utils/error_utils.py +0 -0
  290. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/utils/eval_display.py +0 -0
  291. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/utils/eval_utils.py +0 -0
  292. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/utils/heartbeat.py +0 -0
  293. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/utils/import_utils.py +0 -0
  294. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/utils/install_utils.py +0 -0
  295. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/utils/interception_utils.py +0 -0
  296. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/utils/logging_utils.py +0 -0
  297. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/utils/message_utils.py +0 -0
  298. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/utils/metric_utils.py +0 -0
  299. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/utils/pricing_utils.py +0 -0
  300. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/utils/process_utils.py +0 -0
  301. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/utils/response_utils.py +0 -0
  302. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/utils/serve_utils.py +0 -0
  303. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/utils/thread_utils.py +0 -0
  304. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/utils/threaded_sandbox_client.py +0 -0
  305. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/utils/tool_utils.py +0 -0
  306. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/utils/usage_utils.py +0 -0
  307. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/utils/version_utils.py +0 -0
  308. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/v1/ENVIRONMENT_BEST_PRACTICES.md +0 -0
  309. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/v1/README.md +0 -0
  310. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/v1/RE_MIGRATION.md +0 -0
  311. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/v1/__init__.py +0 -0
  312. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/v1/artifact.py +0 -0
  313. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/v1/config.py +0 -0
  314. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/v1/env.py +0 -0
  315. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/v1/harness.py +0 -0
  316. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/v1/model.py +0 -0
  317. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/v1/program.py +0 -0
  318. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/v1/runtime.py +0 -0
  319. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/v1/runtime_handles.py +0 -0
  320. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/v1/sandbox.py +0 -0
  321. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/v1/state.py +0 -0
  322. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/v1/task.py +0 -0
  323. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/v1/taskset.py +0 -0
  324. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/v1/toolset.py +0 -0
  325. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/v1/types.py +0 -0
  326. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/v1/user.py +0 -0
  327. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/v1/utils/__init__.py +0 -0
  328. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/v1/utils/binding_utils.py +0 -0
  329. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/v1/utils/config_callable_utils.py +0 -0
  330. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/v1/utils/config_utils.py +0 -0
  331. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/v1/utils/endpoint_utils.py +0 -0
  332. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/v1/utils/json_utils.py +0 -0
  333. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/v1/utils/judge_utils.py +0 -0
  334. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/v1/utils/lifecycle_utils.py +0 -0
  335. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/v1/utils/logging_utils.py +0 -0
  336. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/v1/utils/mcp_proxy_utils.py +0 -0
  337. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/v1/utils/mcp_utils.py +0 -0
  338. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/v1/utils/object_utils.py +0 -0
  339. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/v1/utils/program_utils.py +0 -0
  340. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/v1/utils/prompt_utils.py +0 -0
  341. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/v1/utils/runtime_owner_utils.py +0 -0
  342. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/v1/utils/runtime_registry.py +0 -0
  343. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/v1/utils/sandbox_program_utils.py +0 -0
  344. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/v1/utils/sandbox_python_utils.py +0 -0
  345. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/v1/utils/sandbox_utils.py +0 -0
  346. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/v1/utils/scoring_utils.py +0 -0
  347. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/v1/utils/serialization_utils.py +0 -0
  348. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/v1/utils/task_freeze_utils.py +0 -0
  349. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/v1/utils/taskset_utils.py +0 -0
  350. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/v1/utils/tool_utils.py +0 -0
  351. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/v1/utils/toolset_utils.py +0 -0
  352. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/v1/utils/trajectory_utils.py +0 -0
  353. {verifiers-0.1.15.dev170 → verifiers-0.1.15.dev172}/verifiers/v1/utils/usage_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: verifiers
3
- Version: 0.1.15.dev170
3
+ Version: 0.1.15.dev172
4
4
  Summary: Verifiers: Environments for LLM Reinforcement Learning
5
5
  Project-URL: Homepage, https://github.com/primeintellect-ai/verifiers
6
6
  Project-URL: Documentation, https://github.com/primeintellect-ai/verifiers
@@ -557,7 +557,12 @@ async def test_generate_resume_closes_local_endpoint_clients(
557
557
  results_path = tmp_path / "resume-complete"
558
558
  results_path.mkdir()
559
559
  (results_path / "results.jsonl").write_text(
560
- json.dumps(make_output(example_id=0)) + "\n",
560
+ (
561
+ json.dumps(make_output(example_id=0, reward=0.0))
562
+ + "\n"
563
+ + json.dumps(make_output(example_id=0, reward=1.0))
564
+ + "\n"
565
+ ),
561
566
  encoding="utf-8",
562
567
  )
563
568
  (results_path / "metadata.json").write_text(
@@ -583,9 +588,12 @@ async def test_generate_resume_closes_local_endpoint_clients(
583
588
  ),
584
589
  model="test-model",
585
590
  results_path=results_path,
591
+ save_results=True,
586
592
  )
587
593
 
588
594
  assert len(outputs["outputs"]) == 1
595
+ saved_metadata = json.loads((results_path / "metadata.json").read_text())
596
+ assert saved_metadata["avg_reward"] == 0.0
589
597
  assert len(created_clients) == 2
590
598
  assert all(client.closed for client in created_clients)
591
599
 
@@ -669,6 +677,21 @@ async def test_generate_resume_raises_on_metadata_mismatch(
669
677
  ):
670
678
  env = make_dummy_env(mock_client)
671
679
 
680
+ invalid_results_path = tmp_path / "missing-metadata"
681
+ invalid_results_path.mkdir()
682
+ (invalid_results_path / "results.jsonl").write_text(
683
+ json.dumps({"example_id": 99, "label": "existing"}) + "\n",
684
+ encoding="utf-8",
685
+ )
686
+ with pytest.raises(ValueError, match="already exists without valid metadata"):
687
+ await env.generate(
688
+ inputs=[make_input(example_id=0)],
689
+ client=mock_client,
690
+ model="test-model",
691
+ results_path=invalid_results_path,
692
+ save_results=True,
693
+ )
694
+
672
695
  results_path = tmp_path / "resume"
673
696
  results_path.mkdir()
674
697
  (results_path / "results.jsonl").write_text("", encoding="utf-8")
@@ -30,7 +30,8 @@ def test_find_latest_incomplete_eval_results_path_picks_newest_matching(
30
30
 
31
31
  (old_run / "results.jsonl").write_text('{"example_id":0}\n', encoding="utf-8")
32
32
  (new_run / "results.jsonl").write_text(
33
- '{"example_id":0}\n{"example_id":1}\n', encoding="utf-8"
33
+ '{"example_id":0}\n{"example_id":1}\n{"example_id":2',
34
+ encoding="utf-8",
34
35
  )
35
36
  (complete_run / "results.jsonl").write_text(
36
37
  '{"example_id":0}\n{"example_id":1}\n{"example_id":2}\n{"example_id":3}\n',
@@ -33,7 +33,6 @@ from verifiers.utils.save_utils import (
33
33
  save_new_outputs,
34
34
  save_metadata,
35
35
  states_to_outputs,
36
- truncate_malformed_trailing_line,
37
36
  validate_resume_metadata,
38
37
  )
39
38
  from verifiers.utils.usage_utils import StateUsageTracker, response_usage_tokens
@@ -475,7 +474,7 @@ class TestSavingResults:
475
474
 
476
475
 
477
476
  class TestLoadOutputs:
478
- def test_ignores_malformed_trailing_line(self, tmp_path: Path):
477
+ def test_ignores_malformed_trailing_line(self, tmp_path: Path, monkeypatch):
479
478
  results_path = tmp_path / "results"
480
479
  results_path.mkdir()
481
480
  outputs_path = results_path / "results.jsonl"
@@ -489,50 +488,68 @@ class TestLoadOutputs:
489
488
  outputs_path.write_text(
490
489
  "\n".join(lines + [partial_trailing_line]) + "\n", encoding="utf-8"
491
490
  )
491
+ warnings = []
492
+ monkeypatch.setattr(
493
+ "verifiers.utils.save_utils.logger.warning",
494
+ lambda *args, **kwargs: warnings.append(args),
495
+ )
492
496
 
493
497
  outputs = load_outputs(results_path)
494
498
 
495
- assert len(outputs) == 2
496
- assert outputs[0]["example_id"] == 0
497
- assert outputs[1]["example_id"] == 1
499
+ assert [output["example_id"] for output in outputs] == [0, 1]
500
+ assert warnings
498
501
 
499
- def test_raises_for_malformed_non_trailing_line(self, tmp_path: Path):
502
+ def test_ignores_malformed_non_trailing_line(self, tmp_path: Path, monkeypatch):
500
503
  results_path = tmp_path / "results"
501
504
  results_path.mkdir()
502
505
  outputs_path = results_path / "results.jsonl"
503
506
 
504
507
  malformed_non_trailing_line = '{"example_id": 0, "label": "broken"'
508
+ missing_example_id_line = json.dumps({"label": "missing"})
505
509
  valid_line = json.dumps({"example_id": 1, "label": "row-1"})
506
510
  outputs_path.write_text(
507
- "\n".join([malformed_non_trailing_line, valid_line]) + "\n",
511
+ "\n".join(
512
+ [malformed_non_trailing_line, missing_example_id_line, valid_line]
513
+ )
514
+ + "\n",
508
515
  encoding="utf-8",
509
516
  )
517
+ warnings = []
518
+ monkeypatch.setattr(
519
+ "verifiers.utils.save_utils.logger.warning",
520
+ lambda *args, **kwargs: warnings.append(args),
521
+ )
510
522
 
511
- with pytest.raises(json.JSONDecodeError):
512
- load_outputs(results_path)
523
+ outputs = load_outputs(results_path)
524
+
525
+ assert [output["example_id"] for output in outputs] == [1]
526
+ assert warnings
513
527
 
514
528
 
515
529
  class TestSaveNewOutputs:
516
- def test_truncates_malformed_trailing_line_before_append(self, tmp_path: Path):
530
+ def test_appends_after_malformed_rows_without_rewriting(self, tmp_path: Path):
517
531
  results_path = tmp_path / "results"
518
532
  results_path.mkdir()
519
533
  outputs_path = results_path / "results.jsonl"
520
534
 
521
- existing_outputs = [
522
- {"example_id": 0, "label": "row-0"},
523
- {"example_id": 1, "label": "row-1"},
524
- ]
535
+ malformed_middle_line = '{"example_id": 99, "label": "broken"'
525
536
  malformed_trailing_line = '{"example_id": 2, "label": "row-2"'
526
- lines = [json.dumps(output) for output in existing_outputs]
527
537
  outputs_path.write_text(
528
- "\n".join(lines + [malformed_trailing_line]), encoding="utf-8"
538
+ "\n".join(
539
+ [
540
+ json.dumps({"example_id": 0, "label": "row-0"}),
541
+ malformed_middle_line,
542
+ json.dumps({"example_id": 1, "label": "row-1"}),
543
+ malformed_trailing_line,
544
+ ]
545
+ ),
546
+ encoding="utf-8",
529
547
  )
530
548
 
531
- # Caller drops the partial trailing row before appending so the new
532
- # row lands on a valid JSONL boundary.
533
- truncate_malformed_trailing_line(outputs_path)
549
+ circular_output = {"example_id": 4}
550
+ circular_output["self"] = circular_output
534
551
  save_new_outputs(
535
- [{"example_id": 3, "label": "row-3"}],
552
+ [circular_output, {"example_id": 3, "label": "row-3"}],
536
553
  results_path,
537
554
  )
538
555
 
@@ -541,14 +558,12 @@ class TestSaveNewOutputs:
541
558
  for line in outputs_path.read_text(encoding="utf-8").splitlines()
542
559
  if line
543
560
  ]
544
- parsed_outputs = [json.loads(line) for line in persisted_lines]
545
561
 
546
- assert [output["example_id"] for output in parsed_outputs] == [0, 1, 3]
547
- assert [output["example_id"] for output in load_outputs(results_path)] == [
548
- 0,
549
- 1,
550
- 3,
551
- ]
562
+ assert persisted_lines[1] == malformed_middle_line
563
+ assert persisted_lines[3] == malformed_trailing_line
564
+ assert [
565
+ json.loads(persisted_lines[idx])["example_id"] for idx in [0, 2, 4]
566
+ ] == [0, 1, 3]
552
567
 
553
568
 
554
569
  class TestResumeMetadataValidation:
@@ -82,7 +82,6 @@ from verifiers.utils.save_utils import (
82
82
  save_new_outputs,
83
83
  save_outputs,
84
84
  state_to_output,
85
- truncate_malformed_trailing_line,
86
85
  validate_resume_metadata,
87
86
  )
88
87
  from verifiers.utils.usage_utils import StateUsageTracker
@@ -1003,9 +1002,21 @@ class Environment(ABC):
1003
1002
  )
1004
1003
  on_log(f"Resuming evaluation from {results_path}")
1005
1004
  outputs = load_outputs(results_path)
1006
- # Drop any partial trailing row left by a crashed prior write
1007
- # so subsequent appends start from a valid JSONL boundary.
1008
- truncate_malformed_trailing_line(results_path / "results.jsonl")
1005
+ rollout_counts_by_example_id: dict[object, int] = {}
1006
+ capped_outputs: list[RolloutOutput] = []
1007
+ for output in outputs:
1008
+ example_id = output["example_id"]
1009
+ rollout_count = rollout_counts_by_example_id.get(example_id, 0)
1010
+ if rollout_count >= rollouts_per_example:
1011
+ continue
1012
+ rollout_counts_by_example_id[example_id] = rollout_count + 1
1013
+ capped_outputs.append(output)
1014
+ if len(capped_outputs) != len(outputs):
1015
+ on_log(
1016
+ f"Ignoring {len(outputs) - len(capped_outputs)} saved duplicate rollout(s) "
1017
+ "beyond rollouts_per_example"
1018
+ )
1019
+ outputs = capped_outputs
1009
1020
  builder.add_outputs(outputs)
1010
1021
  filtered_inputs = filter_inputs(
1011
1022
  raw_inputs, outputs, rollouts_per_example
@@ -1014,7 +1025,12 @@ class Environment(ABC):
1014
1025
  on_log(
1015
1026
  "No remaining rollouts to evaluate, returning completed outputs"
1016
1027
  )
1017
- return builder.build(sort_by_example_id=True)
1028
+ results = builder.build(sort_by_example_id=True)
1029
+ if save_results:
1030
+ await asyncio.to_thread(
1031
+ save_metadata, results["metadata"], builder.results_path
1032
+ )
1033
+ return results
1018
1034
  on_log(
1019
1035
  f"Found {len(outputs)} completed rollout(s), {len(filtered_inputs)} remaining rollout(s)"
1020
1036
  )
@@ -1023,6 +1039,21 @@ class Environment(ABC):
1023
1039
 
1024
1040
  if save_results:
1025
1041
  on_log(f"Saving results to {builder.results_path}")
1042
+ if results_path is None or not is_valid_eval_results_path(results_path):
1043
+ outputs_path = builder.results_path / "results.jsonl"
1044
+ if (
1045
+ results_path is not None
1046
+ and outputs_path.is_file()
1047
+ and outputs_path.stat().st_size > 0
1048
+ ):
1049
+ raise ValueError(
1050
+ f"Cannot save to invalid results path {builder.results_path}: "
1051
+ "results.jsonl already exists without valid metadata"
1052
+ )
1053
+ await asyncio.to_thread(save_outputs, [], builder.results_path, "a")
1054
+ await asyncio.to_thread(
1055
+ save_metadata, builder.build_metadata(), builder.results_path
1056
+ )
1026
1057
 
1027
1058
  tasks: dict[asyncio.Task, int] = {}
1028
1059
  try:
@@ -1104,9 +1135,6 @@ class Environment(ABC):
1104
1135
 
1105
1136
  # save if requested
1106
1137
  if save_results:
1107
- await asyncio.to_thread(
1108
- save_outputs, results["outputs"], builder.results_path
1109
- )
1110
1138
  await asyncio.to_thread(
1111
1139
  save_metadata, results["metadata"], builder.results_path
1112
1140
  )
@@ -10,6 +10,7 @@ The search family is intentionally backend-oriented, mirroring the SWE taskset p
10
10
  |---|---|---|---|
11
11
  | `openseeker` | [PolarSeeker/OpenSeeker](https://github.com/PolarSeeker/OpenSeeker) | [`PolarSeeker/OpenSeeker-v1-Data`](https://huggingface.co/datasets/PolarSeeker/OpenSeeker-v1-Data) | Binary semantic answer judge |
12
12
  | `quest` | [OSU-NLP-Group/QUEST](https://github.com/OSU-NLP-Group/QUEST) | [`osunlp/QUEST-RL-Data`](https://huggingface.co/datasets/osunlp/QUEST-RL-Data) | Objective tasks supported |
13
+ | `redsearcher` | [RedSearchAgent/REDSearcher](https://github.com/RedSearchAgent/REDSearcher) | [`Zchu/REDSearcher_RL_1K`](https://huggingface.co/datasets/Zchu/REDSearcher_RL_1K) | Text RL query set supported |
13
14
 
14
15
  ## Usage
15
16
 
@@ -18,13 +19,14 @@ from verifiers.envs.experimental.composable.tasksets.search import make_search_t
18
19
 
19
20
  taskset = make_search_taskset(backend="openseeker")
20
21
  taskset = make_search_taskset(backend="quest", category="objective")
22
+ redsearcher = make_search_taskset(backend="redsearcher", difficulty="easy")
21
23
  ```
22
24
 
23
25
  `make_search_taskset()` dispatches by backend name. Unknown backends raise `ValueError` with the available backend list.
24
26
 
25
27
  ## Output Contract
26
28
 
27
- Search tasksets should define their own output contract. The `quest` and `openseeker` backends expect the agent to write one final researched response to `/task/answer.txt`, including supporting URLs/citations when available. Scratch reasoning, tool traces, and logs should not be written as the final answer.
29
+ Search tasksets should define their own output contract. The `quest`, `openseeker`, and `redsearcher` backends expect the agent to write one final researched response to `/task/answer.txt`, including supporting URLs/citations when available. Scratch reasoning, tool traces, and logs should not be written as the final answer.
28
30
 
29
31
  ## Error Handling
30
32
 
@@ -0,0 +1,15 @@
1
+ """Composable search/research tasksets."""
2
+
3
+ from .search_tasksets import (
4
+ make_openseeker_taskset,
5
+ make_quest_taskset,
6
+ make_redsearcher_taskset,
7
+ make_search_taskset,
8
+ )
9
+
10
+ __all__ = [
11
+ "make_openseeker_taskset",
12
+ "make_quest_taskset",
13
+ "make_redsearcher_taskset",
14
+ "make_search_taskset",
15
+ ]
@@ -0,0 +1,38 @@
1
+ # REDSearcher Search Taskset
2
+
3
+ Text RL queries from REDSearcher ported into the composable search taskset framework.
4
+
5
+ ## Source
6
+
7
+ - Dataset: [`Zchu/REDSearcher_RL_1K`](https://huggingface.co/datasets/Zchu/REDSearcher_RL_1K)
8
+ - Collection: [`Zchu/redsearcher`](https://huggingface.co/collections/Zchu/redsearcher)
9
+ - Upstream project: [`RedSearchAgent/REDSearcher`](https://github.com/RedSearchAgent/REDSearcher)
10
+ - Paper: [`arXiv:2602.14234`](https://arxiv.org/abs/2602.14234)
11
+
12
+ The released text RL dataset contains 1,000 rows with `problem`, `answer`, and `difficulty` columns. The upstream REDSearcher repo describes converting each row into a Slime-style `prompt` plus `label`; this taskset keeps the same problem/answer boundary while adapting it to Verifiers' taskset format.
13
+
14
+ ## Task Contract
15
+
16
+ Each example is a long-horizon web-search question. The agent should research across sources and produce one final answer in `/task/answer.txt`, with supporting URLs/citations when available.
17
+
18
+ The paired `rlm_search` environment prompts RLM to write this file and provides web search/open-page skills. The rubric can fall back to the final assistant text if the answer file is empty, but agents should still write the file directly.
19
+
20
+ ## Scoring
21
+
22
+ `RedSearcherRubric` compares the final response against the released `answer` label. It first applies a strict normalized exact-answer shortcut for unambiguous matches. Otherwise it uses an OpenAI-compatible LLM-as-judge prompt that follows the answer-matching convention in REDSearcher's DeepTraceHub evaluation code: judge whether the predicted final answer is equivalent to the ground truth and return binary accuracy.
23
+
24
+ A reward of `1.0` means the final response matched the ground-truth answer; `0.0` means it did not, or no final answer was produced. Judge provider failures are preserved as `vf.Error` values on `state["error"]`.
25
+
26
+ ## Common Arguments
27
+
28
+ | Argument | Default | Description |
29
+ |---|---:|---|
30
+ | `dataset_name` | `Zchu/REDSearcher_RL_1K` | Hugging Face dataset name. |
31
+ | `split` | `train` | Dataset split. |
32
+ | `difficulty` | `None` | Optional difficulty filter: `easy`, `medium`, `hard`, or `all`. |
33
+ | `answer_file` | `/task/answer.txt` | Final answer path in the sandbox. |
34
+ | `judge_model` | `openai/gpt-5.4-mini` | OpenAI-compatible model for answer-match judging. |
35
+ | `judge_base_url` | `https://api.pinference.ai/api/v1` | Judge API base URL. |
36
+ | `judge_api_key_var` | `PRIME_API_KEY` | Env var containing the judge API key. |
37
+ | `judge_max_retries` | `5` | Number of parse retries for the A/B judge response. |
38
+ | `use_exact_match_shortcut` | `True` | Return `1.0` without an LLM call when the normalized final response exactly equals the normalized ground-truth answer. |
@@ -0,0 +1,5 @@
1
+ """REDSearcher search taskset."""
2
+
3
+ from .taskset import RedSearcherRubric, RedSearcherTaskSet
4
+
5
+ __all__ = ["RedSearcherRubric", "RedSearcherTaskSet"]