verifiers 0.1.13.dev2__tar.gz → 0.1.13.dev3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (222) hide show
  1. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/PKG-INFO +1 -1
  2. verifiers-0.1.13.dev3/tests/test_interception_utils.py +133 -0
  3. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/__init__.py +1 -1
  4. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/envs/experimental/cli_agent_env.py +5 -2
  5. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/envs/experimental/composable/harnesses/rlm.py +13 -10
  6. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/utils/interception_utils.py +34 -1
  7. verifiers-0.1.13.dev2/tests/test_interception_utils.py +0 -63
  8. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/.gitignore +0 -0
  9. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/LICENSE +0 -0
  10. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/README.md +0 -0
  11. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/pyproject.toml +0 -0
  12. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/tests/AGENTS.md +0 -0
  13. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/tests/README.md +0 -0
  14. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/tests/__init__.py +0 -0
  15. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/tests/conftest.py +0 -0
  16. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/tests/test_browser_env.py +0 -0
  17. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/tests/test_build_script.py +0 -0
  18. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/tests/test_cli_agent_env.py +0 -0
  19. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/tests/test_client_auth_errors.py +0 -0
  20. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/tests/test_client_config.py +0 -0
  21. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/tests/test_client_multimodal_types.py +0 -0
  22. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/tests/test_composable_env.py +0 -0
  23. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/tests/test_context_token_metrics.py +0 -0
  24. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/tests/test_decorator_ranks.py +0 -0
  25. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/tests/test_endpoint_registry.py +0 -0
  26. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/tests/test_env_group.py +0 -0
  27. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/tests/test_env_server.py +0 -0
  28. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/tests/test_environment.py +0 -0
  29. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/tests/test_environment_extra.py +0 -0
  30. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/tests/test_envs.py +0 -0
  31. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/tests/test_error_chain.py +0 -0
  32. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/tests/test_eval_cli.py +0 -0
  33. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/tests/test_eval_display.py +0 -0
  34. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/tests/test_eval_utils.py +0 -0
  35. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/tests/test_gepa_cli.py +0 -0
  36. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/tests/test_gym_env.py +0 -0
  37. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/tests/test_imports.py +0 -0
  38. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/tests/test_install_utils.py +0 -0
  39. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/tests/test_logging.py +0 -0
  40. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/tests/test_math_rubric.py +0 -0
  41. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/tests/test_maybe_think_parser.py +0 -0
  42. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/tests/test_message_utils.py +0 -0
  43. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/tests/test_message_utils_multimodal.py +0 -0
  44. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/tests/test_multiturn_env.py +0 -0
  45. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/tests/test_nemorl_client.py +0 -0
  46. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/tests/test_openai_chat_completions_token_client.py +0 -0
  47. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/tests/test_opencode_harbor.py +0 -0
  48. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/tests/test_opencode_rlm_env.py +0 -0
  49. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/tests/test_parser.py +0 -0
  50. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/tests/test_path_utils.py +0 -0
  51. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/tests/test_prime_plugin.py +0 -0
  52. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/tests/test_rlm_composable_env.py +0 -0
  53. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/tests/test_rlm_env.py +0 -0
  54. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/tests/test_rubric.py +0 -0
  55. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/tests/test_rubric_group.py +0 -0
  56. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/tests/test_sandbox_env.py +0 -0
  57. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/tests/test_sandbox_mixin.py +0 -0
  58. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/tests/test_save_utils.py +0 -0
  59. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/tests/test_setup_script.py +0 -0
  60. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/tests/test_singleturn_env.py +0 -0
  61. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/tests/test_stateful_tool_env.py +0 -0
  62. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/tests/test_think_parser.py +0 -0
  63. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/tests/test_tool_env.py +0 -0
  64. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/tests/test_tool_utils.py +0 -0
  65. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/tests/test_trajectory_processing.py +0 -0
  66. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/tests/test_tui_info_formatting.py +0 -0
  67. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/tests/test_xml_parser.py +0 -0
  68. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/AGENTS.md +0 -0
  69. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/cli/__init__.py +0 -0
  70. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/cli/commands/__init__.py +0 -0
  71. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/cli/commands/build.py +0 -0
  72. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/cli/commands/eval.py +0 -0
  73. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/cli/commands/gepa.py +0 -0
  74. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/cli/commands/init.py +0 -0
  75. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/cli/commands/install.py +0 -0
  76. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/cli/commands/setup.py +0 -0
  77. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/cli/plugins/__init__.py +0 -0
  78. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/cli/plugins/prime.py +0 -0
  79. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/cli/tui.py +0 -0
  80. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/clients/__init__.py +0 -0
  81. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/clients/anthropic_messages_client.py +0 -0
  82. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/clients/client.py +0 -0
  83. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/clients/nemorl_chat_completions_client.py +0 -0
  84. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/clients/openai_chat_completions_client.py +0 -0
  85. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/clients/openai_chat_completions_token_client.py +0 -0
  86. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/clients/openai_completions_client.py +0 -0
  87. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/decorators.py +0 -0
  88. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/envs/AGENTS.md +0 -0
  89. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/envs/__init__.py +0 -0
  90. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/envs/env_group.py +0 -0
  91. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/envs/environment.py +0 -0
  92. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/envs/experimental/README.md +0 -0
  93. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/envs/experimental/__init__.py +0 -0
  94. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/envs/experimental/composable/README.md +0 -0
  95. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/envs/experimental/composable/__init__.py +0 -0
  96. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/envs/experimental/composable/composable_env.py +0 -0
  97. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/envs/experimental/composable/harness.py +0 -0
  98. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/envs/experimental/composable/harnesses/__init__.py +0 -0
  99. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/envs/experimental/composable/harnesses/opencode.py +0 -0
  100. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/envs/experimental/composable/harnesses/prompt.txt +0 -0
  101. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/envs/experimental/composable/task.py +0 -0
  102. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/envs/experimental/composable/tasksets/__init__.py +0 -0
  103. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/envs/experimental/composable/tasksets/cp/__init__.py +0 -0
  104. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/envs/experimental/composable/tasksets/cp/cp_task.py +0 -0
  105. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/envs/experimental/composable/tasksets/cp/test_utils.py +0 -0
  106. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/envs/experimental/composable/tasksets/harbor/__init__.py +0 -0
  107. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/envs/experimental/composable/tasksets/harbor/harbor.py +0 -0
  108. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/envs/experimental/composable/tasksets/lean/__init__.py +0 -0
  109. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/envs/experimental/composable/tasksets/lean/lean_task.py +0 -0
  110. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/envs/experimental/composable/tasksets/math/__init__.py +0 -0
  111. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/envs/experimental/composable/tasksets/math/math_task.py +0 -0
  112. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/envs/experimental/composable/tasksets/swe/__init__.py +0 -0
  113. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/envs/experimental/composable/tasksets/swe/create_fix_patch.sh +0 -0
  114. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/envs/experimental/composable/tasksets/swe/log_parser.py +0 -0
  115. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/envs/experimental/composable/tasksets/swe/multi_swe.py +0 -0
  116. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/envs/experimental/composable/tasksets/swe/openswe.py +0 -0
  117. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/envs/experimental/composable/tasksets/swe/r2e_gym.py +0 -0
  118. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/envs/experimental/composable/tasksets/swe/swe_bench.py +0 -0
  119. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/envs/experimental/composable/tasksets/swe/swe_lego.py +0 -0
  120. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/envs/experimental/composable/tasksets/swe/swe_tasksets.py +0 -0
  121. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/envs/experimental/gym_env.py +0 -0
  122. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/envs/experimental/harbor_env.py +0 -0
  123. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/envs/experimental/mcp_env.py +0 -0
  124. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/envs/experimental/opencode_env.py +0 -0
  125. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/envs/experimental/opencode_qa_env.py +0 -0
  126. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/envs/experimental/opencode_rlm_env.py +0 -0
  127. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/envs/experimental/rlm_env.py +0 -0
  128. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/envs/experimental/sandbox_mixin.py +0 -0
  129. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/envs/integrations/README.md +0 -0
  130. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/envs/integrations/__init__.py +0 -0
  131. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/envs/integrations/browser_env/README.md +0 -0
  132. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/envs/integrations/browser_env/__init__.py +0 -0
  133. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/envs/integrations/browser_env/browser_env.py +0 -0
  134. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/envs/integrations/browser_env/modes/__init__.py +0 -0
  135. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/envs/integrations/browser_env/modes/base.py +0 -0
  136. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/envs/integrations/browser_env/modes/cua_mode.py +0 -0
  137. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/envs/integrations/browser_env/modes/dom_mode.py +0 -0
  138. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/envs/integrations/openenv_env.py +0 -0
  139. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/envs/integrations/reasoninggym_env.py +0 -0
  140. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/envs/integrations/textarena_env.py +0 -0
  141. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/envs/multiturn_env.py +0 -0
  142. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/envs/python_env.py +0 -0
  143. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/envs/sandbox_env.py +0 -0
  144. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/envs/singleturn_env.py +0 -0
  145. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/envs/stateful_tool_env.py +0 -0
  146. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/envs/tool_env.py +0 -0
  147. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/errors.py +0 -0
  148. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/gepa/__init__.py +0 -0
  149. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/gepa/adapter.py +0 -0
  150. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/gepa/config.py +0 -0
  151. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/gepa/display.py +0 -0
  152. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/gepa/gepa_utils.py +0 -0
  153. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/parsers/__init__.py +0 -0
  154. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/parsers/maybe_think_parser.py +0 -0
  155. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/parsers/parser.py +0 -0
  156. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/parsers/think_parser.py +0 -0
  157. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/parsers/xml_parser.py +0 -0
  158. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/rl/README.md +0 -0
  159. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/rl/__init__.py +0 -0
  160. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/rl/inference/__init__.py +0 -0
  161. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/rl/inference/client.py +0 -0
  162. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/rl/inference/server.py +0 -0
  163. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/rl/trainer/__init__.py +0 -0
  164. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/rl/trainer/config.py +0 -0
  165. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/rl/trainer/orchestrator.py +0 -0
  166. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/rl/trainer/trainer.py +0 -0
  167. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/rl/trainer/utils.py +0 -0
  168. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/rubrics/__init__.py +0 -0
  169. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/rubrics/experimental/hybrid_math_rubric.py +0 -0
  170. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/rubrics/judge_rubric.py +0 -0
  171. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/rubrics/math_rubric.py +0 -0
  172. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/rubrics/rubric.py +0 -0
  173. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/rubrics/rubric_group.py +0 -0
  174. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/scripts/__init__.py +0 -0
  175. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/scripts/build.py +0 -0
  176. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/scripts/eval.py +0 -0
  177. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/scripts/gepa.py +0 -0
  178. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/scripts/init.py +0 -0
  179. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/scripts/install.py +0 -0
  180. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/scripts/prime_rl.py +0 -0
  181. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/scripts/rl.py +0 -0
  182. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/scripts/setup.py +0 -0
  183. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/scripts/train.py +0 -0
  184. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/scripts/tui.py +0 -0
  185. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/scripts/vllm.py +0 -0
  186. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/serve/__init__.py +0 -0
  187. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/serve/client/env_client.py +0 -0
  188. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/serve/client/zmq_env_client.py +0 -0
  189. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/serve/server/__init__.py +0 -0
  190. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/serve/server/env_router.py +0 -0
  191. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/serve/server/env_server.py +0 -0
  192. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/serve/server/env_worker.py +0 -0
  193. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/serve/server/zmq_env_server.py +0 -0
  194. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/serve/types.py +0 -0
  195. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/types.py +0 -0
  196. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/utils/__init__.py +0 -0
  197. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/utils/async_utils.py +0 -0
  198. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/utils/client_utils.py +0 -0
  199. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/utils/config_utils.py +0 -0
  200. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/utils/data_utils.py +0 -0
  201. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/utils/display_utils.py +0 -0
  202. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/utils/env_utils.py +0 -0
  203. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/utils/error_utils.py +0 -0
  204. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/utils/eval_display.py +0 -0
  205. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/utils/eval_utils.py +0 -0
  206. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/utils/heartbeat.py +0 -0
  207. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/utils/import_utils.py +0 -0
  208. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/utils/install_utils.py +0 -0
  209. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/utils/logging_utils.py +0 -0
  210. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/utils/message_utils.py +0 -0
  211. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/utils/metric_utils.py +0 -0
  212. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/utils/path_utils.py +0 -0
  213. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/utils/process_utils.py +0 -0
  214. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/utils/response_utils.py +0 -0
  215. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/utils/save_utils.py +0 -0
  216. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/utils/serve_utils.py +0 -0
  217. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/utils/thread_utils.py +0 -0
  218. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/utils/threaded_sandbox_client.py +0 -0
  219. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/utils/tool_utils.py +0 -0
  220. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/utils/tunnel_utils.py +0 -0
  221. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/utils/usage_utils.py +0 -0
  222. {verifiers-0.1.13.dev2 → verifiers-0.1.13.dev3}/verifiers/utils/version_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: verifiers
3
- Version: 0.1.13.dev2
3
+ Version: 0.1.13.dev3
4
4
  Summary: Verifiers: Environments for LLM Reinforcement Learning
5
5
  Project-URL: Homepage, https://github.com/primeintellect-ai/verifiers
6
6
  Project-URL: Documentation, https://github.com/primeintellect-ai/verifiers
@@ -0,0 +1,133 @@
1
+ import asyncio
2
+ from unittest.mock import AsyncMock, MagicMock
3
+
4
+ from verifiers.errors import InfraError
5
+ from verifiers.types import (
6
+ Response,
7
+ ResponseMessage,
8
+ TextContentPart,
9
+ ToolCall,
10
+ Usage,
11
+ )
12
+ from verifiers.utils import interception_utils
13
+ from verifiers.utils.interception_utils import (
14
+ InterceptionServer,
15
+ StreamInterrupted,
16
+ create_empty_completion,
17
+ serialize_intercept_response,
18
+ )
19
+
20
+
21
+ def test_serialize_intercept_response_from_vf_response_uses_chat_completion_shape():
22
+ response = Response(
23
+ id="resp_1",
24
+ created=123,
25
+ model="test-model",
26
+ usage=Usage(
27
+ prompt_tokens=10,
28
+ reasoning_tokens=0,
29
+ completion_tokens=5,
30
+ total_tokens=15,
31
+ ),
32
+ message=ResponseMessage(
33
+ content=[TextContentPart(text="hello "), {"type": "text", "text": "world"}],
34
+ reasoning_content=None,
35
+ tool_calls=[
36
+ ToolCall(id="call_1", name="echo", arguments='{"x": 1}'),
37
+ ],
38
+ finish_reason="tool_calls",
39
+ is_truncated=False,
40
+ tokens=None,
41
+ ),
42
+ )
43
+
44
+ payload = serialize_intercept_response(response)
45
+
46
+ assert payload["id"] == "resp_1"
47
+ assert payload["object"] == "chat.completion"
48
+ assert payload["model"] == "test-model"
49
+ assert payload["choices"][0]["message"]["role"] == "assistant"
50
+ assert payload["choices"][0]["message"]["content"] == "hello world"
51
+ assert payload["choices"][0]["message"]["tool_calls"] == [
52
+ {
53
+ "id": "call_1",
54
+ "type": "function",
55
+ "function": {"name": "echo", "arguments": '{"x": 1}'},
56
+ }
57
+ ]
58
+ assert payload["choices"][0]["finish_reason"] == "tool_calls"
59
+ assert payload["usage"]["prompt_tokens"] == 10
60
+ assert payload["usage"]["completion_tokens"] == 5
61
+ assert payload["usage"]["total_tokens"] == 15
62
+
63
+
64
+ def test_serialize_intercept_response_passthrough_native_chat_completion():
65
+ native = create_empty_completion("native-model")
66
+ payload = serialize_intercept_response(native)
67
+
68
+ assert payload["object"] == "chat.completion"
69
+ assert payload["model"] == "native-model"
70
+ assert len(payload["choices"]) == 1
71
+
72
+
73
+ def test_set_rollout_error_attaches_stream_interrupted_to_state():
74
+ server = InterceptionServer(port=0)
75
+ state: dict = {}
76
+ server.register_rollout("r1", state=state)
77
+
78
+ err = StreamInterrupted("tunnel died")
79
+ server._set_rollout_error("r1", err)
80
+
81
+ assert state["error"] is err
82
+ assert isinstance(state["error"], InfraError)
83
+
84
+
85
+ def test_set_rollout_error_does_not_clobber_existing_error():
86
+ # First error wins — later write failures must not hide the original cause.
87
+ server = InterceptionServer(port=0)
88
+ original = InfraError("original")
89
+ state: dict = {"error": original}
90
+ server.register_rollout("r1", state=state)
91
+
92
+ server._set_rollout_error("r1", StreamInterrupted("later"))
93
+
94
+ assert state["error"] is original
95
+
96
+
97
+ async def test_streaming_write_failure_surfaces_to_state(monkeypatch):
98
+ """The real failure path: a mid-SSE transport close on the client side
99
+ raises out of ``response.write(...)``. The except branch must funnel
100
+ that into ``state["error"]`` so the rollout halts via ``has_error``."""
101
+ server = InterceptionServer(port=0)
102
+ state: dict = {}
103
+ server.register_rollout("r1", state=state)
104
+
105
+ # Mock StreamResponse whose second write raises (first write succeeds
106
+ # to prove we're in the streaming loop, not failing at prepare()).
107
+ writes: list[bytes] = []
108
+
109
+ async def fake_write(data: bytes) -> None:
110
+ writes.append(data)
111
+ if len(writes) >= 2:
112
+ raise ConnectionResetError("client closed transport")
113
+
114
+ fake_response = MagicMock()
115
+ fake_response.prepare = AsyncMock()
116
+ fake_response.write = AsyncMock(side_effect=fake_write)
117
+ fake_response.write_eof = AsyncMock()
118
+ monkeypatch.setattr(
119
+ interception_utils.web, "StreamResponse", lambda **_: fake_response
120
+ )
121
+
122
+ chunk_queue: asyncio.Queue = asyncio.Queue()
123
+ await chunk_queue.put({"choices": [{"delta": {"content": "hi"}}]})
124
+ await chunk_queue.put({"choices": [{"delta": {"content": " there"}}]})
125
+ intercept = {
126
+ "chunk_queue": chunk_queue,
127
+ "response_future": asyncio.Future(),
128
+ }
129
+
130
+ await server._handle_streaming_response(MagicMock(), "r1", intercept)
131
+
132
+ assert isinstance(state["error"], StreamInterrupted)
133
+ assert "ConnectionResetError" in str(state["error"])
@@ -1,4 +1,4 @@
1
- __version__ = "0.1.13.dev2"
1
+ __version__ = "0.1.13.dev3"
2
2
 
3
3
  import importlib
4
4
  import os
@@ -248,8 +248,11 @@ class CliAgentEnv(SandboxMixin, vf.MultiTurnEnv):
248
248
  )
249
249
  await self.create_sandbox(state, sandbox_request)
250
250
 
251
- # Register rollout for interception
252
- request_id_queue = interception_server.register_rollout(rollout_id)
251
+ # Register rollout for interception. Pass state so the server can
252
+ # surface stream-interruption errors (e.g. tunnel dies mid-SSE) back
253
+ # onto the rollout; without this the agent sees a truncated stream
254
+ # and often exits with code 0 and an empty trajectory.
255
+ request_id_queue = interception_server.register_rollout(rollout_id, state=state)
253
256
  state["request_id_queue"] = request_id_queue
254
257
  state["agent_completed"] = False
255
258
 
@@ -259,18 +259,21 @@ def rlm_harness(
259
259
  local_checkout: str | Path | None = None,
260
260
  gh_token: str | None = None,
261
261
  ) -> Harness:
262
- resolved_local_checkout = resolve_local_checkout(
263
- local_checkout,
264
- rlm_repo_url=rlm_repo_url,
265
- rlm_branch=rlm_branch,
266
- gh_token=gh_token,
267
- )
268
- upload_dirs: dict[str, Traversable | Path] = {
269
- DEFAULT_RLM_CHECKOUT_UPLOAD_NAME: resolved_local_checkout,
270
- }
271
262
  upload_dir_mapping: dict[str, str] = {
272
263
  DEFAULT_RLM_CHECKOUT_UPLOAD_NAME: DEFAULT_RLM_CHECKOUT_PATH,
273
264
  }
265
+
266
+ def get_upload_dirs() -> dict[str, Traversable | Path]:
267
+ resolved_local_checkout = resolve_local_checkout(
268
+ local_checkout,
269
+ rlm_repo_url=rlm_repo_url,
270
+ rlm_branch=rlm_branch,
271
+ gh_token=gh_token,
272
+ )
273
+ return {
274
+ DEFAULT_RLM_CHECKOUT_UPLOAD_NAME: resolved_local_checkout,
275
+ }
276
+
274
277
  return Harness(
275
278
  install_script=build_install_script(),
276
279
  run_command=build_run_command(instruction_path, workdir),
@@ -278,7 +281,7 @@ def rlm_harness(
278
281
  system_prompt_path=DEFAULT_APPEND_TO_SYSTEM_PROMPT_PATH,
279
282
  instruction_path=instruction_path,
280
283
  skills_path="/task/rlm-skills",
281
- get_upload_dirs=lambda: upload_dirs,
284
+ get_upload_dirs=get_upload_dirs,
282
285
  upload_dir_mapping=upload_dir_mapping,
283
286
  metrics_path="{workdir}/.rlm/sessions/*/meta.json",
284
287
  metrics_key="metrics",
@@ -23,12 +23,23 @@ from openai.types.chat.chat_completion_chunk import (
23
23
  Choice as ChunkChoice,
24
24
  )
25
25
 
26
+ from verifiers.errors import InfraError
26
27
  from verifiers.types import Response
27
28
  from verifiers.utils.logging_utils import truncate
28
29
 
29
30
  logger = logging.getLogger(__name__)
30
31
 
31
32
 
33
+ class StreamInterrupted(InfraError):
34
+ """Raised when the intercepted streaming response to the agent is cut short.
35
+
36
+ Without this, a mid-stream transport failure would be swallowed here and
37
+ the agent would observe a truncated (but syntactically valid) SSE stream,
38
+ often exiting with code 0 and an empty trajectory — bypassing the
39
+ non-zero-exit error capture in `CliAgentEnv.poll_job_completion`.
40
+ """
41
+
42
+
32
43
  class InterceptionServer:
33
44
  """
34
45
  HTTP server that intercepts API requests from agents.
@@ -99,10 +110,26 @@ class InterceptionServer:
99
110
  self._site = None
100
111
  self._app = None
101
112
 
102
- def register_rollout(self, rollout_id: str) -> asyncio.Queue:
113
+ def _set_rollout_error(self, rollout_id: str, error: BaseException) -> None:
114
+ """Attach `error` to the rollout's state if one is registered and
115
+ unset. First error wins — later failures (e.g. the downstream
116
+ `response_future` raising too) should not clobber the original cause.
117
+ """
118
+ context = self.active_rollouts.get(rollout_id)
119
+ if context is None:
120
+ return
121
+ state = context.get("state")
122
+ if state is None or state.get("error"):
123
+ return
124
+ state["error"] = error
125
+
126
+ def register_rollout(
127
+ self, rollout_id: str, state: dict[str, Any] | None = None
128
+ ) -> asyncio.Queue:
103
129
  request_queue: asyncio.Queue = asyncio.Queue()
104
130
  self.active_rollouts[rollout_id] = {
105
131
  "request_id_queue": request_queue,
132
+ "state": state,
106
133
  }
107
134
  return request_queue
108
135
 
@@ -214,6 +241,12 @@ class InterceptionServer:
214
241
  logger.debug(f"[{rollout_id}] Streaming cancelled")
215
242
  except Exception as e:
216
243
  logger.error(f"[{rollout_id}] Streaming error: {e}")
244
+ self._set_rollout_error(
245
+ rollout_id,
246
+ StreamInterrupted(
247
+ f"Interception stream to agent interrupted: {type(e).__name__}: {e}"
248
+ ),
249
+ )
217
250
  return response
218
251
 
219
252
  try:
@@ -1,63 +0,0 @@
1
- from verifiers.types import (
2
- Response,
3
- ResponseMessage,
4
- TextContentPart,
5
- ToolCall,
6
- Usage,
7
- )
8
- from verifiers.utils.interception_utils import (
9
- create_empty_completion,
10
- serialize_intercept_response,
11
- )
12
-
13
-
14
- def test_serialize_intercept_response_from_vf_response_uses_chat_completion_shape():
15
- response = Response(
16
- id="resp_1",
17
- created=123,
18
- model="test-model",
19
- usage=Usage(
20
- prompt_tokens=10,
21
- reasoning_tokens=0,
22
- completion_tokens=5,
23
- total_tokens=15,
24
- ),
25
- message=ResponseMessage(
26
- content=[TextContentPart(text="hello "), {"type": "text", "text": "world"}],
27
- reasoning_content=None,
28
- tool_calls=[
29
- ToolCall(id="call_1", name="echo", arguments='{"x": 1}'),
30
- ],
31
- finish_reason="tool_calls",
32
- is_truncated=False,
33
- tokens=None,
34
- ),
35
- )
36
-
37
- payload = serialize_intercept_response(response)
38
-
39
- assert payload["id"] == "resp_1"
40
- assert payload["object"] == "chat.completion"
41
- assert payload["model"] == "test-model"
42
- assert payload["choices"][0]["message"]["role"] == "assistant"
43
- assert payload["choices"][0]["message"]["content"] == "hello world"
44
- assert payload["choices"][0]["message"]["tool_calls"] == [
45
- {
46
- "id": "call_1",
47
- "type": "function",
48
- "function": {"name": "echo", "arguments": '{"x": 1}'},
49
- }
50
- ]
51
- assert payload["choices"][0]["finish_reason"] == "tool_calls"
52
- assert payload["usage"]["prompt_tokens"] == 10
53
- assert payload["usage"]["completion_tokens"] == 5
54
- assert payload["usage"]["total_tokens"] == 15
55
-
56
-
57
- def test_serialize_intercept_response_passthrough_native_chat_completion():
58
- native = create_empty_completion("native-model")
59
- payload = serialize_intercept_response(native)
60
-
61
- assert payload["object"] == "chat.completion"
62
- assert payload["model"] == "native-model"
63
- assert len(payload["choices"]) == 1
File without changes