verifiers 0.1.15.dev0__tar.gz → 0.1.15.dev1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (295) hide show
  1. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/LICENSE +2 -2
  2. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/PKG-INFO +2 -2
  3. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/pyproject.toml +2 -8
  4. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_renderer_client.py +18 -12
  5. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_save_utils.py +255 -0
  6. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/__init__.py +1 -1
  7. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/clients/anthropic_messages_client.py +21 -0
  8. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/clients/openai_chat_completions_client.py +25 -0
  9. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/clients/renderer_client.py +74 -15
  10. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/serve/client/zmq_env_client.py +1 -1
  11. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/types.py +11 -0
  12. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/utils/response_utils.py +9 -1
  13. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/utils/save_utils.py +190 -0
  14. verifiers-0.1.15.dev1/verifiers/utils/serve_utils.py +129 -0
  15. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/runtime.py +2 -2
  16. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/utils/lifecycle_utils.py +1 -1
  17. verifiers-0.1.15.dev0/verifiers/utils/serve_utils.py +0 -48
  18. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/.gitignore +0 -0
  19. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/README.md +0 -0
  20. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/AGENTS.md +0 -0
  21. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/README.md +0 -0
  22. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/__init__.py +0 -0
  23. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/conftest.py +0 -0
  24. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_browser_env.py +0 -0
  25. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_build_script.py +0 -0
  26. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_cli_agent_env.py +0 -0
  27. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_client_auth_errors.py +0 -0
  28. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_client_config.py +0 -0
  29. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_client_multimodal_types.py +0 -0
  30. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_composable_env.py +0 -0
  31. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_context_token_metrics.py +0 -0
  32. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_decorator_ranks.py +0 -0
  33. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_endpoint_registry.py +0 -0
  34. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_env_group.py +0 -0
  35. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_env_server.py +0 -0
  36. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_environment.py +0 -0
  37. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_environment_extra.py +0 -0
  38. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_envs.py +0 -0
  39. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_error_chain.py +0 -0
  40. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_eval_cli.py +0 -0
  41. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_eval_display.py +0 -0
  42. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_eval_utils.py +0 -0
  43. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_gepa_cli.py +0 -0
  44. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_gepa_utils.py +0 -0
  45. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_gym_env.py +0 -0
  46. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_harbor_env_mcp.py +0 -0
  47. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_imports.py +0 -0
  48. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_install_utils.py +0 -0
  49. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_interception_utils.py +0 -0
  50. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_langchain_deep_agents_wikispeedia.py +0 -0
  51. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_lean_task.py +0 -0
  52. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_logging.py +0 -0
  53. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_math_rubric.py +0 -0
  54. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_maybe_think_parser.py +0 -0
  55. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_mcp_search_env.py +0 -0
  56. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_message_utils.py +0 -0
  57. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_message_utils_multimodal.py +0 -0
  58. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_multiturn_env.py +0 -0
  59. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_nemorl_client.py +0 -0
  60. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_openai_chat_completions_token_client.py +0 -0
  61. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_openai_responses_client.py +0 -0
  62. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_opencode_harbor.py +0 -0
  63. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_opencode_rlm_env.py +0 -0
  64. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_parser.py +0 -0
  65. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_path_utils.py +0 -0
  66. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_per_turn_timing.py +0 -0
  67. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_prime_plugin.py +0 -0
  68. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_renderer_e2e.py +0 -0
  69. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_rlm_composable_env.py +0 -0
  70. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_rlm_env.py +0 -0
  71. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_rubric.py +0 -0
  72. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_rubric_group.py +0 -0
  73. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_sandbox_env.py +0 -0
  74. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_sandbox_mixin.py +0 -0
  75. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_setup_script.py +0 -0
  76. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_singleturn_env.py +0 -0
  77. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_stateful_tool_env.py +0 -0
  78. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_think_parser.py +0 -0
  79. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_tool_env.py +0 -0
  80. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_tool_utils.py +0 -0
  81. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_trajectory_processing.py +0 -0
  82. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_tui_info_formatting.py +0 -0
  83. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_types.py +0 -0
  84. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_v1_bfcl.py +0 -0
  85. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_v1_config_extension.py +0 -0
  86. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_v1_endpoint_protocols.py +0 -0
  87. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_v1_example_counts.py +0 -0
  88. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_v1_group_reward_env.py +0 -0
  89. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_v1_harbor_cli.py +0 -0
  90. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_v1_mini_swe_agent.py +0 -0
  91. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_v1_rlm_swe.py +0 -0
  92. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_v1_runtime_lifecycle.py +0 -0
  93. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_v1_scoring_functions.py +0 -0
  94. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_wordle_env.py +0 -0
  95. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_xml_parser.py +0 -0
  96. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/AGENTS.md +0 -0
  97. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/cli/__init__.py +0 -0
  98. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/cli/commands/__init__.py +0 -0
  99. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/cli/commands/build.py +0 -0
  100. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/cli/commands/eval.py +0 -0
  101. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/cli/commands/gepa.py +0 -0
  102. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/cli/commands/init.py +0 -0
  103. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/cli/commands/install.py +0 -0
  104. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/cli/commands/setup.py +0 -0
  105. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/cli/plugins/__init__.py +0 -0
  106. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/cli/plugins/prime.py +0 -0
  107. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/cli/tui.py +0 -0
  108. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/clients/__init__.py +0 -0
  109. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/clients/client.py +0 -0
  110. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/clients/nemorl_chat_completions_client.py +0 -0
  111. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/clients/openai_chat_completions_token_client.py +0 -0
  112. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/clients/openai_completions_client.py +0 -0
  113. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/clients/openai_responses_client.py +0 -0
  114. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/decorators.py +0 -0
  115. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/AGENTS.md +0 -0
  116. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/__init__.py +0 -0
  117. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/env_group.py +0 -0
  118. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/environment.py +0 -0
  119. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/README.md +0 -0
  120. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/__init__.py +0 -0
  121. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/cli_agent_env.py +0 -0
  122. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/README.md +0 -0
  123. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/__init__.py +0 -0
  124. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/_filter.py +0 -0
  125. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/composable_env.py +0 -0
  126. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/harness.py +0 -0
  127. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/harnesses/__init__.py +0 -0
  128. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/harnesses/mini_swe_agent.py +0 -0
  129. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/harnesses/opencode.py +0 -0
  130. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/harnesses/prompt.txt +0 -0
  131. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/harnesses/rlm.py +0 -0
  132. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/swe_debug_env.py +0 -0
  133. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/task.py +0 -0
  134. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/tasksets/__init__.py +0 -0
  135. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/tasksets/cp/__init__.py +0 -0
  136. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/tasksets/cp/cp_task.py +0 -0
  137. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/tasksets/cp/test_utils.py +0 -0
  138. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/tasksets/harbor/__init__.py +0 -0
  139. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/tasksets/harbor/harbor.py +0 -0
  140. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/tasksets/lean/__init__.py +0 -0
  141. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/tasksets/lean/lean_task.py +0 -0
  142. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/tasksets/math/__init__.py +0 -0
  143. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/tasksets/math/math_task.py +0 -0
  144. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/tasksets/swe/__init__.py +0 -0
  145. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/tasksets/swe/_test_patch.py +0 -0
  146. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/tasksets/swe/create_fix_patch.sh +0 -0
  147. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/tasksets/swe/log_parser.py +0 -0
  148. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/tasksets/swe/multi_swe.py +0 -0
  149. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/tasksets/swe/openswe.py +0 -0
  150. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/tasksets/swe/r2e_gym.py +0 -0
  151. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/tasksets/swe/swe_bench.py +0 -0
  152. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/tasksets/swe/swe_lego.py +0 -0
  153. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/tasksets/swe/swe_rebench_v2.py +0 -0
  154. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/tasksets/swe/swe_rebench_v2_log_parsers.py +0 -0
  155. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/tasksets/swe/swe_smith.py +0 -0
  156. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/tasksets/swe/swe_tasksets.py +0 -0
  157. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/gym_env.py +0 -0
  158. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/harbor_env/__init__.py +0 -0
  159. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/harbor_env/env.py +0 -0
  160. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/harbor_env/mcp.py +0 -0
  161. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/mcp_env.py +0 -0
  162. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/opencode_env.py +0 -0
  163. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/opencode_qa_env.py +0 -0
  164. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/opencode_rlm_env.py +0 -0
  165. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/rlm_env.py +0 -0
  166. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/sandbox_mixin.py +0 -0
  167. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/utils/__init__.py +0 -0
  168. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/utils/file_locks.py +0 -0
  169. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/utils/git_checkout_cache.py +0 -0
  170. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/integrations/README.md +0 -0
  171. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/integrations/__init__.py +0 -0
  172. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/integrations/browser_env/README.md +0 -0
  173. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/integrations/browser_env/__init__.py +0 -0
  174. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/integrations/browser_env/browser_env.py +0 -0
  175. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/integrations/browser_env/modes/__init__.py +0 -0
  176. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/integrations/browser_env/modes/base.py +0 -0
  177. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/integrations/browser_env/modes/cua_mode.py +0 -0
  178. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/integrations/browser_env/modes/dom_mode.py +0 -0
  179. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/integrations/openenv_env.py +0 -0
  180. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/integrations/reasoninggym_env.py +0 -0
  181. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/integrations/textarena_env.py +0 -0
  182. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/multiturn_env.py +0 -0
  183. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/python_env.py +0 -0
  184. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/sandbox_env.py +0 -0
  185. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/singleturn_env.py +0 -0
  186. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/stateful_tool_env.py +0 -0
  187. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/tool_env.py +0 -0
  188. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/errors.py +0 -0
  189. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/gepa/__init__.py +0 -0
  190. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/gepa/adapter.py +0 -0
  191. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/gepa/config.py +0 -0
  192. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/gepa/display.py +0 -0
  193. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/gepa/gepa_utils.py +0 -0
  194. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/parsers/__init__.py +0 -0
  195. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/parsers/maybe_think_parser.py +0 -0
  196. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/parsers/parser.py +0 -0
  197. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/parsers/think_parser.py +0 -0
  198. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/parsers/xml_parser.py +0 -0
  199. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/rl/README.md +0 -0
  200. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/rl/__init__.py +0 -0
  201. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/rl/inference/__init__.py +0 -0
  202. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/rl/inference/client.py +0 -0
  203. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/rl/inference/server.py +0 -0
  204. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/rl/trainer/__init__.py +0 -0
  205. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/rl/trainer/config.py +0 -0
  206. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/rl/trainer/orchestrator.py +0 -0
  207. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/rl/trainer/trainer.py +0 -0
  208. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/rl/trainer/utils.py +0 -0
  209. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/rubrics/__init__.py +0 -0
  210. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/rubrics/experimental/hybrid_math_rubric.py +0 -0
  211. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/rubrics/judge_rubric.py +0 -0
  212. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/rubrics/math_rubric.py +0 -0
  213. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/rubrics/rubric.py +0 -0
  214. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/rubrics/rubric_group.py +0 -0
  215. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/scripts/__init__.py +0 -0
  216. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/scripts/build.py +0 -0
  217. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/scripts/eval.py +0 -0
  218. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/scripts/gepa.py +0 -0
  219. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/scripts/init.py +0 -0
  220. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/scripts/install.py +0 -0
  221. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/scripts/rl.py +0 -0
  222. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/scripts/setup.py +0 -0
  223. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/scripts/train.py +0 -0
  224. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/scripts/tui.py +0 -0
  225. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/scripts/vllm.py +0 -0
  226. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/serve/__init__.py +0 -0
  227. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/serve/client/env_client.py +0 -0
  228. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/serve/server/__init__.py +0 -0
  229. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/serve/server/env_router.py +0 -0
  230. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/serve/server/env_server.py +0 -0
  231. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/serve/server/env_worker.py +0 -0
  232. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/serve/server/zmq_env_server.py +0 -0
  233. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/serve/types.py +0 -0
  234. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/utils/__init__.py +0 -0
  235. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/utils/async_utils.py +0 -0
  236. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/utils/client_utils.py +0 -0
  237. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/utils/config_utils.py +0 -0
  238. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/utils/data_utils.py +0 -0
  239. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/utils/display_utils.py +0 -0
  240. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/utils/env_config_utils.py +0 -0
  241. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/utils/env_utils.py +0 -0
  242. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/utils/error_utils.py +0 -0
  243. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/utils/eval_display.py +0 -0
  244. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/utils/eval_utils.py +0 -0
  245. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/utils/heartbeat.py +0 -0
  246. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/utils/import_utils.py +0 -0
  247. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/utils/install_utils.py +0 -0
  248. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/utils/interception_utils.py +0 -0
  249. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/utils/logging_utils.py +0 -0
  250. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/utils/message_utils.py +0 -0
  251. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/utils/metric_utils.py +0 -0
  252. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/utils/path_utils.py +0 -0
  253. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/utils/process_utils.py +0 -0
  254. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/utils/thread_utils.py +0 -0
  255. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/utils/threaded_sandbox_client.py +0 -0
  256. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/utils/tool_utils.py +0 -0
  257. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/utils/tunnel_utils.py +0 -0
  258. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/utils/usage_utils.py +0 -0
  259. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/utils/version_utils.py +0 -0
  260. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/README.md +0 -0
  261. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/RE_MIGRATION.md +0 -0
  262. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/__init__.py +0 -0
  263. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/config.py +0 -0
  264. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/env.py +0 -0
  265. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/harness.py +0 -0
  266. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/packages/__init__.py +0 -0
  267. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/packages/harnesses/__init__.py +0 -0
  268. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/packages/harnesses/cli.py +0 -0
  269. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/packages/harnesses/configs.py +0 -0
  270. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/packages/harnesses/mini_swe_agent.py +0 -0
  271. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/packages/harnesses/opencode.py +0 -0
  272. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/packages/harnesses/pi.py +0 -0
  273. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/packages/harnesses/rlm.py +0 -0
  274. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/packages/tasksets/__init__.py +0 -0
  275. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/packages/tasksets/harbor.py +0 -0
  276. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/state.py +0 -0
  277. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/task.py +0 -0
  278. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/taskset.py +0 -0
  279. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/toolset.py +0 -0
  280. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/user.py +0 -0
  281. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/utils/__init__.py +0 -0
  282. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/utils/artifact_utils.py +0 -0
  283. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/utils/endpoint_utils.py +0 -0
  284. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/utils/json_utils.py +0 -0
  285. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/utils/judge_utils.py +0 -0
  286. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/utils/mcp_proxy_utils.py +0 -0
  287. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/utils/mcp_utils.py +0 -0
  288. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/utils/program_utils.py +0 -0
  289. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/utils/prompt_utils.py +0 -0
  290. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/utils/sandbox_program_utils.py +0 -0
  291. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/utils/sandbox_utils.py +0 -0
  292. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/utils/scoring_utils.py +0 -0
  293. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/utils/timing_utils.py +0 -0
  294. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/utils/tool_utils.py +0 -0
  295. {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/utils/trajectory_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  MIT License
2
2
 
3
- Copyright (c) 2025 William Brown
3
+ Copyright (c) 2026 Prime Intellect
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
@@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
18
  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
19
  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
20
  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
- SOFTWARE.
21
+ SOFTWARE.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: verifiers
3
- Version: 0.1.15.dev0
3
+ Version: 0.1.15.dev1
4
4
  Summary: Verifiers: Environments for LLM Reinforcement Learning
5
5
  Project-URL: Homepage, https://github.com/primeintellect-ai/verifiers
6
6
  Project-URL: Documentation, https://github.com/primeintellect-ai/verifiers
@@ -54,7 +54,7 @@ Requires-Dist: stagehand>=3.0.0; extra == 'browser'
54
54
  Provides-Extra: openenv
55
55
  Requires-Dist: openenv-core[core]==0.2.1; extra == 'openenv'
56
56
  Provides-Extra: renderers
57
- Requires-Dist: renderers>=0.1.6; extra == 'renderers'
57
+ Requires-Dist: renderers>=0.1.8.dev0; extra == 'renderers'
58
58
  Provides-Extra: rg
59
59
  Requires-Dist: reasoning-gym; extra == 'rg'
60
60
  Provides-Extra: rl
@@ -73,7 +73,7 @@ dev = [
73
73
  "aiohttp>=3.9.0",
74
74
  "python-dotenv>=1.0.0",
75
75
  "nltk",
76
- "renderers>=0.1.6",
76
+ "renderers>=0.1.8.dev0",
77
77
  ]
78
78
 
79
79
  [project.optional-dependencies]
@@ -93,7 +93,7 @@ browser = [
93
93
  "python-dotenv>=1.0.0",
94
94
  ]
95
95
  renderers = [
96
- "renderers>=0.1.6",
96
+ "renderers>=0.1.8.dev0",
97
97
  ]
98
98
  rl = [
99
99
  "torch>=2.8.0,<2.9.0",
@@ -124,12 +124,6 @@ prime-tunnel = false
124
124
  prime-sandboxes = false
125
125
  renderers = false
126
126
 
127
- [tool.uv.sources]
128
- # Pinned to renderers main until the next PyPI release lands; drop after.
129
- # fe67f9f = renderers main: PR #4 squash-merge — construction-time
130
- # preserve_*_thinking flags on create_renderer / create_renderer_pool.
131
- renderers = { git = "https://github.com/PrimeIntellect-ai/renderers.git", rev = "fe67f9f" }
132
-
133
127
  [tool.uv.extra-build-dependencies]
134
128
  flash-attn = [{ requirement = "torch", match-runtime = true }]
135
129
 
@@ -5,7 +5,7 @@ import pytest
5
5
 
6
6
  import verifiers as vf
7
7
  from renderers import RendererPool
8
- from renderers.base import ParsedResponse, create_renderer
8
+ from renderers.base import ParsedResponse, RenderedTokens, create_renderer
9
9
  from verifiers.clients.renderer_client import (
10
10
  RendererClient,
11
11
  _attach_tool_call_names,
@@ -280,11 +280,13 @@ class _BridgeRenderer:
280
280
  stop_idx = len(self.bridge_base) - 1
281
281
  trailing = list(self.bridge_base[stop_idx + 1 :])
282
282
  extension = list(self.bridge_full[len(self.bridge_base) :])
283
- return (
284
- list(previous_prompt_ids)
285
- + list(previous_completion_ids)
286
- + trailing
287
- + extension
283
+ return RenderedTokens(
284
+ token_ids=(
285
+ list(previous_prompt_ids)
286
+ + list(previous_completion_ids)
287
+ + trailing
288
+ + extension
289
+ )
288
290
  )
289
291
 
290
292
  def parse_response(self, token_ids):
@@ -345,7 +347,8 @@ async def test_get_incremental_prompt_ids_matches_tool_tail_without_rerendering_
345
347
  renderer=renderer, prompt=prompt, state=state, tools=None
346
348
  )
347
349
 
348
- assert result == [1, 2, 3, 99, 30, 40]
350
+ assert result is not None
351
+ assert result.token_ids == [1, 2, 3, 99, 30, 40]
349
352
  # The bridge stitches over the completion without re-rendering it —
350
353
  # one bridge call, zero render_ids calls (older diff-based bridges
351
354
  # called render_ids twice).
@@ -387,7 +390,8 @@ async def test_get_incremental_prompt_ids_accepts_tool_then_user_tail():
387
390
  renderer=renderer, prompt=prompt, state=state, tools=None
388
391
  )
389
392
 
390
- assert result == [1, 2, 3, 99, 40, 50]
393
+ assert result is not None
394
+ assert result.token_ids == [1, 2, 3, 99, 40, 50]
391
395
 
392
396
 
393
397
  @pytest.mark.asyncio
@@ -446,7 +450,8 @@ async def test_get_incremental_prompt_ids_accepts_multimodal_tool_user_tail():
446
450
  renderer=renderer, prompt=prompt, state=state, tools=None
447
451
  )
448
452
 
449
- assert result == [1, 2, 3, 99, 40, 50]
453
+ assert result is not None
454
+ assert result.token_ids == [1, 2, 3, 99, 40, 50]
450
455
 
451
456
 
452
457
  # ── Parity across real renderers: truncated most-recent step ──────────
@@ -478,7 +483,7 @@ _TRUNCATED_ANCHOR_MODELS = [
478
483
  "auto",
479
484
  id="nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16",
480
485
  ),
481
- pytest.param("openai/gpt-oss-20b", "gpt_oss", id="openai/gpt-oss-20b"),
486
+ pytest.param("openai/gpt-oss-20b", "gpt-oss", id="openai/gpt-oss-20b"),
482
487
  ]
483
488
 
484
489
 
@@ -552,11 +557,12 @@ async def test_get_incremental_prompt_ids_bridges_over_truncated_step(
552
557
 
553
558
  prefix = list(prev_prompt_ids) + list(prev_completion_ids)
554
559
  assert result is not None, f"{model_id}: bridge returned None on truncated anchor"
555
- assert result[: len(prefix)] == prefix, (
560
+ result_ids = result.token_ids
561
+ assert result_ids[: len(prefix)] == prefix, (
556
562
  f"{model_id}: bridge result does not prefix-preserve "
557
563
  f"prev_prompt + prev_completion"
558
564
  )
559
- assert len(result) > len(prefix), (
565
+ assert len(result_ids) > len(prefix), (
560
566
  f"{model_id}: bridge produced no tail tokens for the new user turn"
561
567
  )
562
568
 
@@ -27,6 +27,7 @@ from verifiers.utils.metric_utils import (
27
27
  )
28
28
  from verifiers.utils.save_utils import (
29
29
  GenerateOutputsBuilder,
30
+ _delta_intermediate_mm_data,
30
31
  extract_usage_tokens,
31
32
  load_outputs,
32
33
  make_serializable,
@@ -897,3 +898,257 @@ class TestPassAtKMetric:
897
898
  )
898
899
  pass_at_k, _ = m.compute()
899
900
  assert pass_at_k["1"] == pytest.approx(0.5)
901
+
902
+
903
+ class TestDeltaIntermediateMmData:
904
+ """Verify per-step delta encoding of trajectory mm_data sidecars.
905
+
906
+ Renderer bridge_to_next_turn emits cumulative mm_data on every
907
+ step. The transport-layer delta strips items whose mm_hash already
908
+ appeared in the prior step, so the per-window TrainingSample
909
+ assembler can recover its window's images by unioning step-deltas.
910
+ """
911
+
912
+ @staticmethod
913
+ def _mm(*hashes: str):
914
+ """Build a renderers.MultiModalData with one image item per hash."""
915
+ from renderers.base import MultiModalData, PlaceholderRange
916
+
917
+ return MultiModalData(
918
+ mm_hashes={"image": list(hashes)},
919
+ mm_placeholders={
920
+ "image": [
921
+ PlaceholderRange(offset=i * 10, length=4)
922
+ for i in range(len(hashes))
923
+ ]
924
+ },
925
+ mm_items={"image": [{"pixel_values": f"px-{h}"} for h in hashes]},
926
+ )
927
+
928
+ def _step(self, mm):
929
+ return {"tokens": {"multi_modal_data": mm}}
930
+
931
+ def test_none_and_single_step_passthrough(self):
932
+ assert _delta_intermediate_mm_data(None) is None
933
+ assert _delta_intermediate_mm_data([]) == []
934
+ only = [self._step(self._mm("A"))]
935
+ assert _delta_intermediate_mm_data(only) is only
936
+
937
+ def test_linear_extension_keeps_only_new_items_per_step(self):
938
+ traj = [
939
+ self._step(self._mm("A")),
940
+ self._step(self._mm("A", "B")),
941
+ self._step(self._mm("A", "B", "C")),
942
+ ]
943
+ out = _delta_intermediate_mm_data(traj)
944
+
945
+ assert out[0]["tokens"]["multi_modal_data"].mm_hashes == {"image": ["A"]}
946
+ assert out[1]["tokens"]["multi_modal_data"].mm_hashes == {"image": ["B"]}
947
+ assert out[2]["tokens"]["multi_modal_data"].mm_hashes == {"image": ["C"]}
948
+ # Items and placeholders are reindexed in lockstep with hashes.
949
+ assert out[1]["tokens"]["multi_modal_data"].mm_items["image"] == [
950
+ {"pixel_values": "px-B"}
951
+ ]
952
+ assert (
953
+ out[2]["tokens"]["multi_modal_data"].mm_placeholders["image"][0].offset
954
+ == 20
955
+ )
956
+
957
+ def test_compaction_two_training_samples_assemble_correctly(self):
958
+ """Rollout with one compaction event → two TrainingSamples.
959
+
960
+ Models the prime-rl compaction flow: a single rollout produces
961
+ multiple ``TrainingSample`` objects, one per compaction window.
962
+ The pre-compaction sample's images are no longer in the
963
+ post-compaction step's cumulative ``mm_data`` — the previous
964
+ "keep last" strategy would have silently dropped them. With
965
+ delta encoding, each per-window assembler recovers exactly the
966
+ images its tokens reference: no leakage in either direction.
967
+ """
968
+ from renderers.base import MultiModalData, PlaceholderRange
969
+
970
+ def step(*hashes: str, offsets: list[int]):
971
+ return {
972
+ "tokens": {
973
+ "multi_modal_data": MultiModalData(
974
+ mm_hashes={"image": list(hashes)},
975
+ mm_placeholders={
976
+ "image": [
977
+ PlaceholderRange(offset=o, length=4) for o in offsets
978
+ ]
979
+ },
980
+ mm_items={
981
+ "image": [{"pixel_values": f"px-{h}"} for h in hashes]
982
+ },
983
+ )
984
+ }
985
+ }
986
+
987
+ # Turn 1: image A. Cumulative {A}.
988
+ # Turn 2: image B. Cumulative {A, B}.
989
+ # ── compaction event: turns 1+2 summarized in text, images dropped ──
990
+ # Turn 3: image C. Cumulative {C} (offsets reset against the
991
+ # post-compaction prompt).
992
+ # Turn 4: image D. Cumulative {C, D}.
993
+ traj = [
994
+ step("A", offsets=[10]),
995
+ step("A", "B", offsets=[10, 50]),
996
+ step("C", offsets=[8]),
997
+ step("C", "D", offsets=[8, 40]),
998
+ ]
999
+ out = _delta_intermediate_mm_data(traj)
1000
+
1001
+ # Per-step deltas keep only what's new since the immediately prior step.
1002
+ deltas = [s["tokens"]["multi_modal_data"].mm_hashes for s in out]
1003
+ assert deltas == [
1004
+ {"image": ["A"]},
1005
+ {"image": ["B"]},
1006
+ {"image": ["C"]},
1007
+ {"image": ["D"]},
1008
+ ]
1009
+
1010
+ def assemble(steps):
1011
+ hashes: list[str] = []
1012
+ items: list[dict] = []
1013
+ placeholders: list[PlaceholderRange] = []
1014
+ for s in steps:
1015
+ mm = s["tokens"]["multi_modal_data"]
1016
+ hashes += mm.mm_hashes.get("image", [])
1017
+ items += mm.mm_items.get("image", [])
1018
+ placeholders += mm.mm_placeholders.get("image", [])
1019
+ return hashes, items, placeholders
1020
+
1021
+ ts1_hashes, ts1_items, ts1_phs = assemble(out[0:2]) # pre-compaction
1022
+ ts2_hashes, ts2_items, ts2_phs = assemble(out[2:4]) # post-compaction
1023
+
1024
+ assert ts1_hashes == ["A", "B"]
1025
+ assert ts2_hashes == ["C", "D"]
1026
+ # The invariant the previous "keep last" broke: pre-compaction TS
1027
+ # does not see post-compaction images, and vice versa.
1028
+ assert set(ts1_hashes).isdisjoint(set(ts2_hashes))
1029
+
1030
+ # Items / placeholders are reindexed lock-step with hashes (no
1031
+ # off-by-one or cross-contamination during reindex).
1032
+ assert ts1_items == [{"pixel_values": "px-A"}, {"pixel_values": "px-B"}]
1033
+ assert ts2_items == [{"pixel_values": "px-C"}, {"pixel_values": "px-D"}]
1034
+
1035
+ # Placeholder offsets travel verbatim per step; the assembler is
1036
+ # responsible for shifting them into each window's local frame.
1037
+ assert [p.offset for p in ts1_phs] == [10, 50]
1038
+ assert [p.offset for p in ts2_phs] == [8, 40]
1039
+
1040
+ def test_same_image_rendered_in_two_turns_uses_multiset_diff(self):
1041
+ """Same image hash appearing N times must keep the right N-prior occurrences.
1042
+
1043
+ The renderer doesn't dedupe by hash: ``emit_image`` appends to
1044
+ the parallel lists every time an image content part is rendered.
1045
+ So if image A is shown in turn 1 *and* turn 3, the cumulative
1046
+ ``mm_hashes`` is ``["A", "A"]`` with two distinct placeholder
1047
+ offsets, and ``mm_items`` is ``[pixA, pixA]`` (literally the
1048
+ same payload twice). Both placeholder runs need their own item
1049
+ — set-based diff would drop both as "already seen" and orphan
1050
+ the second placeholder. Multiset diff drops only the first.
1051
+ """
1052
+ from renderers.base import MultiModalData, PlaceholderRange
1053
+
1054
+ def step(hashes, offsets):
1055
+ return {
1056
+ "tokens": {
1057
+ "multi_modal_data": MultiModalData(
1058
+ mm_hashes={"image": list(hashes)},
1059
+ mm_placeholders={
1060
+ "image": [
1061
+ PlaceholderRange(offset=o, length=4) for o in offsets
1062
+ ]
1063
+ },
1064
+ mm_items={
1065
+ "image": [{"pixel_values": f"px-{h}"} for h in hashes]
1066
+ },
1067
+ )
1068
+ }
1069
+ }
1070
+
1071
+ # Turn 1: image A at offset 10. Cumulative ["A"].
1072
+ # Turn 2: no image. Cumulative unchanged ["A"].
1073
+ # Turn 3: image A re-rendered at offset 200. Cumulative ["A", "A"].
1074
+ traj = [
1075
+ step(["A"], offsets=[10]),
1076
+ step(["A"], offsets=[10]),
1077
+ step(["A", "A"], offsets=[10, 200]),
1078
+ ]
1079
+ out = _delta_intermediate_mm_data(traj)
1080
+
1081
+ # Step 0 keeps everything (no prior).
1082
+ assert out[0]["tokens"]["multi_modal_data"].mm_hashes == {"image": ["A"]}
1083
+ assert [
1084
+ p.offset
1085
+ for p in out[0]["tokens"]["multi_modal_data"].mm_placeholders["image"]
1086
+ ] == [10]
1087
+
1088
+ # Step 1 introduced no new image (cumulative unchanged).
1089
+ assert out[1]["tokens"]["multi_modal_data"].mm_hashes == {"image": []}
1090
+
1091
+ # Step 2: prior was ["A"], current is ["A", "A"]. Multiset budget
1092
+ # consumes the first A; the *second* A (the new one at offset
1093
+ # 200) survives the diff with its pixel_values intact. Set-based
1094
+ # diff would have produced [].
1095
+ step2_mm = out[2]["tokens"]["multi_modal_data"]
1096
+ assert step2_mm.mm_hashes == {"image": ["A"]}
1097
+ assert step2_mm.mm_items == {"image": [{"pixel_values": "px-A"}]}
1098
+ assert [p.offset for p in step2_mm.mm_placeholders["image"]] == [200]
1099
+
1100
+ # End-to-end: assembling the single TrainingSample (no
1101
+ # compaction) recovers both placeholder runs with matching
1102
+ # pixel_values, so the trainer can satisfy both image-pad
1103
+ # token runs in the prompt.
1104
+ all_hashes: list[str] = []
1105
+ all_phs: list[PlaceholderRange] = []
1106
+ for s in out:
1107
+ mm = s["tokens"]["multi_modal_data"]
1108
+ all_hashes += mm.mm_hashes.get("image", [])
1109
+ all_phs += mm.mm_placeholders.get("image", [])
1110
+ assert all_hashes == ["A", "A"]
1111
+ assert [p.offset for p in all_phs] == [10, 200]
1112
+
1113
+ def test_image_reintroduction_after_compaction(self):
1114
+ """A hash dropped at compaction and re-rendered later is re-transmitted.
1115
+
1116
+ The delta is computed against the *immediately prior step's*
1117
+ cumulative, not a global seen-set. If image A appears in turn
1118
+ 1, is compacted away (step 2's cumulative is empty), and is
1119
+ re-rendered in turn 3, A shows up in step 0's delta *and* step
1120
+ 2's delta — necessary so the post-compaction TrainingSample
1121
+ also receives A's bytes.
1122
+ """
1123
+ traj = [
1124
+ self._step(self._mm("A")),
1125
+ self._step(self._mm()),
1126
+ self._step(self._mm("A")),
1127
+ ]
1128
+ out = _delta_intermediate_mm_data(traj)
1129
+
1130
+ assert out[0]["tokens"]["multi_modal_data"].mm_hashes == {"image": ["A"]}
1131
+ assert out[1]["tokens"]["multi_modal_data"].mm_hashes == {"image": []}
1132
+ # A re-emerges in step 2's delta — its absence from step 1's
1133
+ # cumulative means it counts as "new" again.
1134
+ assert out[2]["tokens"]["multi_modal_data"].mm_hashes == {"image": ["A"]}
1135
+
1136
+ def test_steps_with_no_new_items_collapse_to_empty_delta(self):
1137
+ # Step 2's cumulative equals step 1's — no new items.
1138
+ traj = [
1139
+ self._step(self._mm("A", "B")),
1140
+ self._step(self._mm("A", "B")),
1141
+ self._step(self._mm("A", "B", "C")),
1142
+ ]
1143
+ out = _delta_intermediate_mm_data(traj)
1144
+
1145
+ assert out[1]["tokens"]["multi_modal_data"].mm_hashes == {"image": []}
1146
+ assert out[1]["tokens"]["multi_modal_data"].mm_items == {"image": []}
1147
+ assert out[2]["tokens"]["multi_modal_data"].mm_hashes == {"image": ["C"]}
1148
+
1149
+ def test_non_mapping_steps_pass_through(self):
1150
+ traj = [self._step(self._mm("A")), "not-a-dict", self._step(self._mm("A", "B"))]
1151
+ out = _delta_intermediate_mm_data(traj)
1152
+ assert out[1] == "not-a-dict"
1153
+ # Delta of step 2 still computed against step 0 (last seen cumulative).
1154
+ assert out[2]["tokens"]["multi_modal_data"].mm_hashes == {"image": ["B"]}
@@ -1,4 +1,4 @@
1
- __version__ = "0.1.15.dev0"
1
+ __version__ = "0.1.15.dev1"
2
2
 
3
3
  import importlib
4
4
  import os
@@ -50,6 +50,13 @@ from verifiers.types import (
50
50
  from verifiers.utils.client_utils import setup_anthropic_client
51
51
 
52
52
 
53
+ ANTHROPIC_ADAPTIVE_THINKING_MODELS = {
54
+ "claude-opus-4-7",
55
+ "claude-opus-4-6",
56
+ "claude-sonnet-4-6",
57
+ }
58
+
59
+
53
60
  def _handle_anthropic_overlong_prompt(func):
54
61
  """Decorator to handle overlong prompt errors from the Anthropic API."""
55
62
 
@@ -342,6 +349,20 @@ class AnthropicMessagesClient(
342
349
  ) -> AnthropicMessage:
343
350
  def normalize_sampling_args(sampling_args: SamplingArgs) -> dict:
344
351
  sampling_args = dict(sampling_args)
352
+ reasoning_effort = sampling_args.pop("reasoning_effort", None)
353
+ if reasoning_effort is not None:
354
+ model_id = (
355
+ model.lower().split("/")[-1].replace(".", "-").replace("_", "-")
356
+ )
357
+ output_config = dict(sampling_args.get("output_config") or {})
358
+ output_config["effort"] = reasoning_effort
359
+ sampling_args["output_config"] = output_config
360
+ if "thinking" not in sampling_args and any(
361
+ model_id == adaptive_model
362
+ or model_id.startswith(f"{adaptive_model}-")
363
+ for adaptive_model in ANTHROPIC_ADAPTIVE_THINKING_MODELS
364
+ ):
365
+ sampling_args["thinking"] = {"type": "adaptive"}
345
366
  max_tokens = sampling_args.pop("max_tokens", None)
346
367
  sampling_args.pop("n", None)
347
368
  sampling_args.pop("stop", None)
@@ -252,6 +252,31 @@ class OpenAIChatCompletionsClient(
252
252
  ) -> OpenAIChatResponse:
253
253
  def normalize_sampling_args(sampling_args: SamplingArgs):
254
254
  sampling_args = dict(sampling_args)
255
+ api_base_url = None
256
+ if hasattr(self.client, "base_url"):
257
+ api_base_url = str(self.client.base_url)
258
+ elif self._config is not None:
259
+ api_base_url = self._config.api_base_url
260
+ reasoning_effort = sampling_args.pop("reasoning_effort", None)
261
+ model_id = model.lower().split("/")[-1].replace(".", "-").replace("_", "-")
262
+ is_anthropic_route = (
263
+ "openrouter.ai" in (api_base_url or "").lower()
264
+ or "pinference.ai" in (api_base_url or "").lower()
265
+ )
266
+ if (
267
+ reasoning_effort is not None
268
+ and model_id.startswith("claude-")
269
+ and is_anthropic_route
270
+ ):
271
+ # OpenRouter/Pinference route Anthropic reasoning_effort through extra_body.
272
+ extra_body = dict(sampling_args.get("extra_body") or {})
273
+ extra_body["verbosity"] = reasoning_effort
274
+ reasoning = dict(extra_body.get("reasoning") or {})
275
+ reasoning.setdefault("enabled", True)
276
+ extra_body["reasoning"] = reasoning
277
+ sampling_args["extra_body"] = extra_body
278
+ elif reasoning_effort is not None:
279
+ sampling_args["reasoning_effort"] = reasoning_effort
255
280
  if "max_tokens" in sampling_args:
256
281
  sampling_args["max_completion_tokens"] = sampling_args.pop("max_tokens")
257
282
  return {k: v for k, v in sampling_args.items() if v is not None}
@@ -20,10 +20,13 @@ from openai import AsyncOpenAI
20
20
 
21
21
  from renderers import Message as RendererMessage
22
22
  from renderers import (
23
+ MultimodalRenderer,
24
+ RenderedTokens,
23
25
  Renderer,
24
26
  RendererPool,
25
27
  ToolSpec,
26
28
  create_renderer_pool,
29
+ is_multimodal,
27
30
  )
28
31
  from renderers import ToolCall as RendererToolCall
29
32
  from renderers import ToolCallFunction
@@ -94,15 +97,15 @@ _DEFAULT_POOL_SIZE = 1
94
97
  # ── Helpers ─────────────────────────────────────────────────────────
95
98
 
96
99
 
97
- async def _run_with_renderer(renderer: Renderer | RendererPool, fn):
98
- if isinstance(renderer, RendererPool):
99
-
100
- def _work():
101
- with renderer.checkout() as r:
102
- return fn(r)
100
+ async def _maybe_offload(renderer: Renderer | RendererPool, fn):
101
+ """Run sync renderer work on a thread iff ``renderer`` is a pool.
103
102
 
104
- return await asyncio.to_thread(_work)
105
- return fn(renderer)
103
+ Pool methods can block on the internal queue/lock; we offload to keep
104
+ the event loop responsive. A bare ``Renderer`` runs inline.
105
+ """
106
+ if isinstance(renderer, RendererPool):
107
+ return await asyncio.to_thread(fn)
108
+ return fn()
106
109
 
107
110
 
108
111
  def _get_value(obj: Any, key: str, default: Any = None) -> Any:
@@ -295,6 +298,28 @@ def _step_token_ids(step: Any) -> tuple[list[int], list[int]] | None:
295
298
  return list(prompt_ids), list(completion_ids)
296
299
 
297
300
 
301
+ def _step_multi_modal_data(step: Any):
302
+ """Recover the previous turn's ``MultiModalData`` for bridging.
303
+
304
+ Mirrors :func:`_step_token_ids`: prefer ``step.tokens.multi_modal_data``
305
+ (post-parse_response_tokens), fall back to ``step.response.message.tokens``.
306
+ Returns ``None`` when no multimodal sidecar was emitted (text-only
307
+ rollouts) — the bridge handles that branch transparently.
308
+ """
309
+ tokens = _get_value(step, "tokens")
310
+ if tokens is not None:
311
+ mm = _get_value(tokens, "multi_modal_data")
312
+ if mm is not None:
313
+ return mm
314
+
315
+ response = _get_value(step, "response")
316
+ message = _get_value(response, "message")
317
+ raw_tokens = _get_value(message, "tokens")
318
+ if raw_tokens is None:
319
+ return None
320
+ return _get_value(raw_tokens, "multi_modal_data")
321
+
322
+
298
323
  def _step_rendered_messages(step: Any) -> list[RendererMessage]:
299
324
  prompt = list(_get_value(step, "prompt", []) or [])
300
325
  completion = list(_get_value(step, "completion", []) or [])
@@ -309,7 +334,13 @@ async def _get_incremental_prompt_ids(
309
334
  prompt: list[RendererMessage],
310
335
  state: Any,
311
336
  tools: list[ToolSpec] | None,
312
- ) -> list[int] | None:
337
+ ) -> "RenderedTokens | None":
338
+ """Return the bridged prompt for the next turn as ``RenderedTokens``.
339
+
340
+ Returns ``None`` when no prior trajectory step lines up with the new
341
+ prompt's prefix or the renderer's ``bridge_to_next_turn`` can't extend
342
+ — both cases fall back to a full re-render in :func:`generate`.
343
+ """
313
344
  if not state:
314
345
  return None
315
346
 
@@ -342,15 +373,32 @@ async def _get_incremental_prompt_ids(
342
373
  continue
343
374
 
344
375
  previous_prompt_ids, previous_completion_ids = token_ids
345
- bridged = await _run_with_renderer(
346
- renderer,
347
- lambda r: r.bridge_to_next_turn(
376
+ previous_mm_data = _step_multi_modal_data(step)
377
+ # Multimodal renderers' bridge accepts ``previous_multi_modal_data``
378
+ # so earlier-turn images carry forward into the new prompt's
379
+ # ``mm_placeholders``. Without that carry-forward, vLLM sees
380
+ # placeholder counts that don't match the combined token sequence
381
+ # and silently falls back to hash-cache lookup (or errors).
382
+ # Text-only renderers' bridge signature doesn't include that
383
+ # kwarg. ``is_multimodal`` is type-cached so this dispatch is a
384
+ # dict lookup, not a runtime_checkable Protocol walk.
385
+ if is_multimodal(renderer):
386
+ mm_renderer = cast(MultimodalRenderer, renderer)
387
+ bridge = lambda: mm_renderer.bridge_to_next_turn( # noqa: E731
348
388
  previous_prompt_ids,
349
389
  previous_completion_ids,
350
390
  tail,
351
391
  tools=tools,
352
- ),
353
- )
392
+ previous_multi_modal_data=previous_mm_data,
393
+ )
394
+ else:
395
+ bridge = lambda: renderer.bridge_to_next_turn( # noqa: E731
396
+ previous_prompt_ids,
397
+ previous_completion_ids,
398
+ tail,
399
+ tools=tools,
400
+ )
401
+ bridged = await _maybe_offload(renderer, bridge)
354
402
  _record_bridge(success=bridged is not None)
355
403
  return bridged
356
404
 
@@ -514,12 +562,21 @@ class RendererClient(
514
562
  if args.get("prompt_logprobs"):
515
563
  sampling_params["prompt_logprobs"] = 1
516
564
 
517
- prompt_ids = await _get_incremental_prompt_ids(
565
+ bridged = await _get_incremental_prompt_ids(
518
566
  renderer=renderer,
519
567
  prompt=prompt,
520
568
  state=kwargs.get("state"),
521
569
  tools=tools,
522
570
  )
571
+ # ``bridged`` is RenderedTokens | None. Unpack token_ids + mm_data
572
+ # so multimodal renderers thread per-image features through to
573
+ # /inference/v1/generate without re-rendering the whole turn.
574
+ if bridged is not None:
575
+ prompt_ids = bridged.token_ids
576
+ multi_modal_data = bridged.multi_modal_data
577
+ else:
578
+ prompt_ids = None
579
+ multi_modal_data = None
523
580
 
524
581
  return await generate(
525
582
  client=self.client,
@@ -527,6 +584,7 @@ class RendererClient(
527
584
  messages=prompt,
528
585
  model=model,
529
586
  prompt_ids=prompt_ids,
587
+ multi_modal_data=multi_modal_data,
530
588
  tools=tools,
531
589
  sampling_params=sampling_params,
532
590
  cache_salt=args.get("cache_salt")
@@ -580,6 +638,7 @@ class RendererClient(
580
638
  completion_mask=[1] * len(completion_ids),
581
639
  completion_logprobs=completion_logprobs,
582
640
  routed_experts=response.get("routed_experts"),
641
+ multi_modal_data=response.get("multi_modal_data"),
583
642
  )
584
643
 
585
644
  # /inference/v1/generate doesn't return usage; reconstruct from tokens.
@@ -30,7 +30,7 @@ from verifiers.serve.types import (
30
30
  class ZMQEnvClient(EnvClient):
31
31
  """ZMQ-based environment client."""
32
32
 
33
- DEFAULT_REQUEST_TIMEOUT = 36_000 # 10h
33
+ DEFAULT_REQUEST_TIMEOUT: float | None = None
34
34
 
35
35
  def __init__(self, address: str = "tcp://127.0.0.1:5000", **kwargs):
36
36
  super().__init__(address=address, **kwargs)