verifiers 0.1.15.dev0__tar.gz → 0.1.15.dev1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/LICENSE +2 -2
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/PKG-INFO +2 -2
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/pyproject.toml +2 -8
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_renderer_client.py +18 -12
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_save_utils.py +255 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/__init__.py +1 -1
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/clients/anthropic_messages_client.py +21 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/clients/openai_chat_completions_client.py +25 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/clients/renderer_client.py +74 -15
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/serve/client/zmq_env_client.py +1 -1
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/types.py +11 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/utils/response_utils.py +9 -1
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/utils/save_utils.py +190 -0
- verifiers-0.1.15.dev1/verifiers/utils/serve_utils.py +129 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/runtime.py +2 -2
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/utils/lifecycle_utils.py +1 -1
- verifiers-0.1.15.dev0/verifiers/utils/serve_utils.py +0 -48
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/.gitignore +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/README.md +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/AGENTS.md +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/README.md +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/__init__.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/conftest.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_browser_env.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_build_script.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_cli_agent_env.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_client_auth_errors.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_client_config.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_client_multimodal_types.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_composable_env.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_context_token_metrics.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_decorator_ranks.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_endpoint_registry.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_env_group.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_env_server.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_environment.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_environment_extra.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_envs.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_error_chain.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_eval_cli.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_eval_display.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_eval_utils.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_gepa_cli.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_gepa_utils.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_gym_env.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_harbor_env_mcp.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_imports.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_install_utils.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_interception_utils.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_langchain_deep_agents_wikispeedia.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_lean_task.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_logging.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_math_rubric.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_maybe_think_parser.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_mcp_search_env.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_message_utils.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_message_utils_multimodal.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_multiturn_env.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_nemorl_client.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_openai_chat_completions_token_client.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_openai_responses_client.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_opencode_harbor.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_opencode_rlm_env.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_parser.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_path_utils.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_per_turn_timing.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_prime_plugin.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_renderer_e2e.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_rlm_composable_env.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_rlm_env.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_rubric.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_rubric_group.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_sandbox_env.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_sandbox_mixin.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_setup_script.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_singleturn_env.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_stateful_tool_env.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_think_parser.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_tool_env.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_tool_utils.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_trajectory_processing.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_tui_info_formatting.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_types.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_v1_bfcl.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_v1_config_extension.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_v1_endpoint_protocols.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_v1_example_counts.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_v1_group_reward_env.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_v1_harbor_cli.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_v1_mini_swe_agent.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_v1_rlm_swe.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_v1_runtime_lifecycle.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_v1_scoring_functions.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_wordle_env.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/tests/test_xml_parser.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/AGENTS.md +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/cli/__init__.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/cli/commands/__init__.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/cli/commands/build.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/cli/commands/eval.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/cli/commands/gepa.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/cli/commands/init.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/cli/commands/install.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/cli/commands/setup.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/cli/plugins/__init__.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/cli/plugins/prime.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/cli/tui.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/clients/__init__.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/clients/client.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/clients/nemorl_chat_completions_client.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/clients/openai_chat_completions_token_client.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/clients/openai_completions_client.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/clients/openai_responses_client.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/decorators.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/AGENTS.md +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/__init__.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/env_group.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/environment.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/README.md +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/__init__.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/cli_agent_env.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/README.md +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/__init__.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/_filter.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/composable_env.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/harness.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/harnesses/__init__.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/harnesses/mini_swe_agent.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/harnesses/opencode.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/harnesses/prompt.txt +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/harnesses/rlm.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/swe_debug_env.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/task.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/tasksets/__init__.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/tasksets/cp/__init__.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/tasksets/cp/cp_task.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/tasksets/cp/test_utils.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/tasksets/harbor/__init__.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/tasksets/harbor/harbor.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/tasksets/lean/__init__.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/tasksets/lean/lean_task.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/tasksets/math/__init__.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/tasksets/math/math_task.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/tasksets/swe/__init__.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/tasksets/swe/_test_patch.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/tasksets/swe/create_fix_patch.sh +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/tasksets/swe/log_parser.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/tasksets/swe/multi_swe.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/tasksets/swe/openswe.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/tasksets/swe/r2e_gym.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/tasksets/swe/swe_bench.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/tasksets/swe/swe_lego.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/tasksets/swe/swe_rebench_v2.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/tasksets/swe/swe_rebench_v2_log_parsers.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/tasksets/swe/swe_smith.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/composable/tasksets/swe/swe_tasksets.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/gym_env.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/harbor_env/__init__.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/harbor_env/env.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/harbor_env/mcp.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/mcp_env.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/opencode_env.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/opencode_qa_env.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/opencode_rlm_env.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/rlm_env.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/sandbox_mixin.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/utils/__init__.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/utils/file_locks.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/experimental/utils/git_checkout_cache.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/integrations/README.md +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/integrations/__init__.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/integrations/browser_env/README.md +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/integrations/browser_env/__init__.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/integrations/browser_env/browser_env.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/integrations/browser_env/modes/__init__.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/integrations/browser_env/modes/base.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/integrations/browser_env/modes/cua_mode.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/integrations/browser_env/modes/dom_mode.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/integrations/openenv_env.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/integrations/reasoninggym_env.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/integrations/textarena_env.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/multiturn_env.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/python_env.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/sandbox_env.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/singleturn_env.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/stateful_tool_env.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/envs/tool_env.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/errors.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/gepa/__init__.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/gepa/adapter.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/gepa/config.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/gepa/display.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/gepa/gepa_utils.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/parsers/__init__.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/parsers/maybe_think_parser.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/parsers/parser.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/parsers/think_parser.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/parsers/xml_parser.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/rl/README.md +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/rl/__init__.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/rl/inference/__init__.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/rl/inference/client.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/rl/inference/server.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/rl/trainer/__init__.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/rl/trainer/config.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/rl/trainer/orchestrator.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/rl/trainer/trainer.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/rl/trainer/utils.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/rubrics/__init__.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/rubrics/experimental/hybrid_math_rubric.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/rubrics/judge_rubric.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/rubrics/math_rubric.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/rubrics/rubric.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/rubrics/rubric_group.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/scripts/__init__.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/scripts/build.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/scripts/eval.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/scripts/gepa.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/scripts/init.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/scripts/install.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/scripts/rl.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/scripts/setup.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/scripts/train.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/scripts/tui.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/scripts/vllm.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/serve/__init__.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/serve/client/env_client.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/serve/server/__init__.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/serve/server/env_router.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/serve/server/env_server.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/serve/server/env_worker.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/serve/server/zmq_env_server.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/serve/types.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/utils/__init__.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/utils/async_utils.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/utils/client_utils.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/utils/config_utils.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/utils/data_utils.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/utils/display_utils.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/utils/env_config_utils.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/utils/env_utils.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/utils/error_utils.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/utils/eval_display.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/utils/eval_utils.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/utils/heartbeat.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/utils/import_utils.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/utils/install_utils.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/utils/interception_utils.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/utils/logging_utils.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/utils/message_utils.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/utils/metric_utils.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/utils/path_utils.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/utils/process_utils.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/utils/thread_utils.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/utils/threaded_sandbox_client.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/utils/tool_utils.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/utils/tunnel_utils.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/utils/usage_utils.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/utils/version_utils.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/README.md +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/RE_MIGRATION.md +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/__init__.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/config.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/env.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/harness.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/packages/__init__.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/packages/harnesses/__init__.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/packages/harnesses/cli.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/packages/harnesses/configs.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/packages/harnesses/mini_swe_agent.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/packages/harnesses/opencode.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/packages/harnesses/pi.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/packages/harnesses/rlm.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/packages/tasksets/__init__.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/packages/tasksets/harbor.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/state.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/task.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/taskset.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/toolset.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/user.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/utils/__init__.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/utils/artifact_utils.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/utils/endpoint_utils.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/utils/json_utils.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/utils/judge_utils.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/utils/mcp_proxy_utils.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/utils/mcp_utils.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/utils/program_utils.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/utils/prompt_utils.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/utils/sandbox_program_utils.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/utils/sandbox_utils.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/utils/scoring_utils.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/utils/timing_utils.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/utils/tool_utils.py +0 -0
- {verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/v1/utils/trajectory_utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
MIT License
|
|
2
2
|
|
|
3
|
-
Copyright (c)
|
|
3
|
+
Copyright (c) 2026 Prime Intellect
|
|
4
4
|
|
|
5
5
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
6
|
of this software and associated documentation files (the "Software"), to deal
|
|
@@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
|
18
18
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
19
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
20
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
-
SOFTWARE.
|
|
21
|
+
SOFTWARE.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: verifiers
|
|
3
|
-
Version: 0.1.15.
|
|
3
|
+
Version: 0.1.15.dev1
|
|
4
4
|
Summary: Verifiers: Environments for LLM Reinforcement Learning
|
|
5
5
|
Project-URL: Homepage, https://github.com/primeintellect-ai/verifiers
|
|
6
6
|
Project-URL: Documentation, https://github.com/primeintellect-ai/verifiers
|
|
@@ -54,7 +54,7 @@ Requires-Dist: stagehand>=3.0.0; extra == 'browser'
|
|
|
54
54
|
Provides-Extra: openenv
|
|
55
55
|
Requires-Dist: openenv-core[core]==0.2.1; extra == 'openenv'
|
|
56
56
|
Provides-Extra: renderers
|
|
57
|
-
Requires-Dist: renderers>=0.1.
|
|
57
|
+
Requires-Dist: renderers>=0.1.8.dev0; extra == 'renderers'
|
|
58
58
|
Provides-Extra: rg
|
|
59
59
|
Requires-Dist: reasoning-gym; extra == 'rg'
|
|
60
60
|
Provides-Extra: rl
|
|
@@ -73,7 +73,7 @@ dev = [
|
|
|
73
73
|
"aiohttp>=3.9.0",
|
|
74
74
|
"python-dotenv>=1.0.0",
|
|
75
75
|
"nltk",
|
|
76
|
-
"renderers>=0.1.
|
|
76
|
+
"renderers>=0.1.8.dev0",
|
|
77
77
|
]
|
|
78
78
|
|
|
79
79
|
[project.optional-dependencies]
|
|
@@ -93,7 +93,7 @@ browser = [
|
|
|
93
93
|
"python-dotenv>=1.0.0",
|
|
94
94
|
]
|
|
95
95
|
renderers = [
|
|
96
|
-
"renderers>=0.1.
|
|
96
|
+
"renderers>=0.1.8.dev0",
|
|
97
97
|
]
|
|
98
98
|
rl = [
|
|
99
99
|
"torch>=2.8.0,<2.9.0",
|
|
@@ -124,12 +124,6 @@ prime-tunnel = false
|
|
|
124
124
|
prime-sandboxes = false
|
|
125
125
|
renderers = false
|
|
126
126
|
|
|
127
|
-
[tool.uv.sources]
|
|
128
|
-
# Pinned to renderers main until the next PyPI release lands; drop after.
|
|
129
|
-
# fe67f9f = renderers main: PR #4 squash-merge — construction-time
|
|
130
|
-
# preserve_*_thinking flags on create_renderer / create_renderer_pool.
|
|
131
|
-
renderers = { git = "https://github.com/PrimeIntellect-ai/renderers.git", rev = "fe67f9f" }
|
|
132
|
-
|
|
133
127
|
[tool.uv.extra-build-dependencies]
|
|
134
128
|
flash-attn = [{ requirement = "torch", match-runtime = true }]
|
|
135
129
|
|
|
@@ -5,7 +5,7 @@ import pytest
|
|
|
5
5
|
|
|
6
6
|
import verifiers as vf
|
|
7
7
|
from renderers import RendererPool
|
|
8
|
-
from renderers.base import ParsedResponse, create_renderer
|
|
8
|
+
from renderers.base import ParsedResponse, RenderedTokens, create_renderer
|
|
9
9
|
from verifiers.clients.renderer_client import (
|
|
10
10
|
RendererClient,
|
|
11
11
|
_attach_tool_call_names,
|
|
@@ -280,11 +280,13 @@ class _BridgeRenderer:
|
|
|
280
280
|
stop_idx = len(self.bridge_base) - 1
|
|
281
281
|
trailing = list(self.bridge_base[stop_idx + 1 :])
|
|
282
282
|
extension = list(self.bridge_full[len(self.bridge_base) :])
|
|
283
|
-
return (
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
283
|
+
return RenderedTokens(
|
|
284
|
+
token_ids=(
|
|
285
|
+
list(previous_prompt_ids)
|
|
286
|
+
+ list(previous_completion_ids)
|
|
287
|
+
+ trailing
|
|
288
|
+
+ extension
|
|
289
|
+
)
|
|
288
290
|
)
|
|
289
291
|
|
|
290
292
|
def parse_response(self, token_ids):
|
|
@@ -345,7 +347,8 @@ async def test_get_incremental_prompt_ids_matches_tool_tail_without_rerendering_
|
|
|
345
347
|
renderer=renderer, prompt=prompt, state=state, tools=None
|
|
346
348
|
)
|
|
347
349
|
|
|
348
|
-
assert result
|
|
350
|
+
assert result is not None
|
|
351
|
+
assert result.token_ids == [1, 2, 3, 99, 30, 40]
|
|
349
352
|
# The bridge stitches over the completion without re-rendering it —
|
|
350
353
|
# one bridge call, zero render_ids calls (older diff-based bridges
|
|
351
354
|
# called render_ids twice).
|
|
@@ -387,7 +390,8 @@ async def test_get_incremental_prompt_ids_accepts_tool_then_user_tail():
|
|
|
387
390
|
renderer=renderer, prompt=prompt, state=state, tools=None
|
|
388
391
|
)
|
|
389
392
|
|
|
390
|
-
assert result
|
|
393
|
+
assert result is not None
|
|
394
|
+
assert result.token_ids == [1, 2, 3, 99, 40, 50]
|
|
391
395
|
|
|
392
396
|
|
|
393
397
|
@pytest.mark.asyncio
|
|
@@ -446,7 +450,8 @@ async def test_get_incremental_prompt_ids_accepts_multimodal_tool_user_tail():
|
|
|
446
450
|
renderer=renderer, prompt=prompt, state=state, tools=None
|
|
447
451
|
)
|
|
448
452
|
|
|
449
|
-
assert result
|
|
453
|
+
assert result is not None
|
|
454
|
+
assert result.token_ids == [1, 2, 3, 99, 40, 50]
|
|
450
455
|
|
|
451
456
|
|
|
452
457
|
# ── Parity across real renderers: truncated most-recent step ──────────
|
|
@@ -478,7 +483,7 @@ _TRUNCATED_ANCHOR_MODELS = [
|
|
|
478
483
|
"auto",
|
|
479
484
|
id="nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16",
|
|
480
485
|
),
|
|
481
|
-
pytest.param("openai/gpt-oss-20b", "
|
|
486
|
+
pytest.param("openai/gpt-oss-20b", "gpt-oss", id="openai/gpt-oss-20b"),
|
|
482
487
|
]
|
|
483
488
|
|
|
484
489
|
|
|
@@ -552,11 +557,12 @@ async def test_get_incremental_prompt_ids_bridges_over_truncated_step(
|
|
|
552
557
|
|
|
553
558
|
prefix = list(prev_prompt_ids) + list(prev_completion_ids)
|
|
554
559
|
assert result is not None, f"{model_id}: bridge returned None on truncated anchor"
|
|
555
|
-
|
|
560
|
+
result_ids = result.token_ids
|
|
561
|
+
assert result_ids[: len(prefix)] == prefix, (
|
|
556
562
|
f"{model_id}: bridge result does not prefix-preserve "
|
|
557
563
|
f"prev_prompt + prev_completion"
|
|
558
564
|
)
|
|
559
|
-
assert len(
|
|
565
|
+
assert len(result_ids) > len(prefix), (
|
|
560
566
|
f"{model_id}: bridge produced no tail tokens for the new user turn"
|
|
561
567
|
)
|
|
562
568
|
|
|
@@ -27,6 +27,7 @@ from verifiers.utils.metric_utils import (
|
|
|
27
27
|
)
|
|
28
28
|
from verifiers.utils.save_utils import (
|
|
29
29
|
GenerateOutputsBuilder,
|
|
30
|
+
_delta_intermediate_mm_data,
|
|
30
31
|
extract_usage_tokens,
|
|
31
32
|
load_outputs,
|
|
32
33
|
make_serializable,
|
|
@@ -897,3 +898,257 @@ class TestPassAtKMetric:
|
|
|
897
898
|
)
|
|
898
899
|
pass_at_k, _ = m.compute()
|
|
899
900
|
assert pass_at_k["1"] == pytest.approx(0.5)
|
|
901
|
+
|
|
902
|
+
|
|
903
|
+
class TestDeltaIntermediateMmData:
|
|
904
|
+
"""Verify per-step delta encoding of trajectory mm_data sidecars.
|
|
905
|
+
|
|
906
|
+
Renderer bridge_to_next_turn emits cumulative mm_data on every
|
|
907
|
+
step. The transport-layer delta strips items whose mm_hash already
|
|
908
|
+
appeared in the prior step, so the per-window TrainingSample
|
|
909
|
+
assembler can recover its window's images by unioning step-deltas.
|
|
910
|
+
"""
|
|
911
|
+
|
|
912
|
+
@staticmethod
|
|
913
|
+
def _mm(*hashes: str):
|
|
914
|
+
"""Build a renderers.MultiModalData with one image item per hash."""
|
|
915
|
+
from renderers.base import MultiModalData, PlaceholderRange
|
|
916
|
+
|
|
917
|
+
return MultiModalData(
|
|
918
|
+
mm_hashes={"image": list(hashes)},
|
|
919
|
+
mm_placeholders={
|
|
920
|
+
"image": [
|
|
921
|
+
PlaceholderRange(offset=i * 10, length=4)
|
|
922
|
+
for i in range(len(hashes))
|
|
923
|
+
]
|
|
924
|
+
},
|
|
925
|
+
mm_items={"image": [{"pixel_values": f"px-{h}"} for h in hashes]},
|
|
926
|
+
)
|
|
927
|
+
|
|
928
|
+
def _step(self, mm):
|
|
929
|
+
return {"tokens": {"multi_modal_data": mm}}
|
|
930
|
+
|
|
931
|
+
def test_none_and_single_step_passthrough(self):
|
|
932
|
+
assert _delta_intermediate_mm_data(None) is None
|
|
933
|
+
assert _delta_intermediate_mm_data([]) == []
|
|
934
|
+
only = [self._step(self._mm("A"))]
|
|
935
|
+
assert _delta_intermediate_mm_data(only) is only
|
|
936
|
+
|
|
937
|
+
def test_linear_extension_keeps_only_new_items_per_step(self):
|
|
938
|
+
traj = [
|
|
939
|
+
self._step(self._mm("A")),
|
|
940
|
+
self._step(self._mm("A", "B")),
|
|
941
|
+
self._step(self._mm("A", "B", "C")),
|
|
942
|
+
]
|
|
943
|
+
out = _delta_intermediate_mm_data(traj)
|
|
944
|
+
|
|
945
|
+
assert out[0]["tokens"]["multi_modal_data"].mm_hashes == {"image": ["A"]}
|
|
946
|
+
assert out[1]["tokens"]["multi_modal_data"].mm_hashes == {"image": ["B"]}
|
|
947
|
+
assert out[2]["tokens"]["multi_modal_data"].mm_hashes == {"image": ["C"]}
|
|
948
|
+
# Items and placeholders are reindexed in lockstep with hashes.
|
|
949
|
+
assert out[1]["tokens"]["multi_modal_data"].mm_items["image"] == [
|
|
950
|
+
{"pixel_values": "px-B"}
|
|
951
|
+
]
|
|
952
|
+
assert (
|
|
953
|
+
out[2]["tokens"]["multi_modal_data"].mm_placeholders["image"][0].offset
|
|
954
|
+
== 20
|
|
955
|
+
)
|
|
956
|
+
|
|
957
|
+
def test_compaction_two_training_samples_assemble_correctly(self):
|
|
958
|
+
"""Rollout with one compaction event → two TrainingSamples.
|
|
959
|
+
|
|
960
|
+
Models the prime-rl compaction flow: a single rollout produces
|
|
961
|
+
multiple ``TrainingSample`` objects, one per compaction window.
|
|
962
|
+
The pre-compaction sample's images are no longer in the
|
|
963
|
+
post-compaction step's cumulative ``mm_data`` — the previous
|
|
964
|
+
"keep last" strategy would have silently dropped them. With
|
|
965
|
+
delta encoding, each per-window assembler recovers exactly the
|
|
966
|
+
images its tokens reference: no leakage in either direction.
|
|
967
|
+
"""
|
|
968
|
+
from renderers.base import MultiModalData, PlaceholderRange
|
|
969
|
+
|
|
970
|
+
def step(*hashes: str, offsets: list[int]):
|
|
971
|
+
return {
|
|
972
|
+
"tokens": {
|
|
973
|
+
"multi_modal_data": MultiModalData(
|
|
974
|
+
mm_hashes={"image": list(hashes)},
|
|
975
|
+
mm_placeholders={
|
|
976
|
+
"image": [
|
|
977
|
+
PlaceholderRange(offset=o, length=4) for o in offsets
|
|
978
|
+
]
|
|
979
|
+
},
|
|
980
|
+
mm_items={
|
|
981
|
+
"image": [{"pixel_values": f"px-{h}"} for h in hashes]
|
|
982
|
+
},
|
|
983
|
+
)
|
|
984
|
+
}
|
|
985
|
+
}
|
|
986
|
+
|
|
987
|
+
# Turn 1: image A. Cumulative {A}.
|
|
988
|
+
# Turn 2: image B. Cumulative {A, B}.
|
|
989
|
+
# ── compaction event: turns 1+2 summarized in text, images dropped ──
|
|
990
|
+
# Turn 3: image C. Cumulative {C} (offsets reset against the
|
|
991
|
+
# post-compaction prompt).
|
|
992
|
+
# Turn 4: image D. Cumulative {C, D}.
|
|
993
|
+
traj = [
|
|
994
|
+
step("A", offsets=[10]),
|
|
995
|
+
step("A", "B", offsets=[10, 50]),
|
|
996
|
+
step("C", offsets=[8]),
|
|
997
|
+
step("C", "D", offsets=[8, 40]),
|
|
998
|
+
]
|
|
999
|
+
out = _delta_intermediate_mm_data(traj)
|
|
1000
|
+
|
|
1001
|
+
# Per-step deltas keep only what's new since the immediately prior step.
|
|
1002
|
+
deltas = [s["tokens"]["multi_modal_data"].mm_hashes for s in out]
|
|
1003
|
+
assert deltas == [
|
|
1004
|
+
{"image": ["A"]},
|
|
1005
|
+
{"image": ["B"]},
|
|
1006
|
+
{"image": ["C"]},
|
|
1007
|
+
{"image": ["D"]},
|
|
1008
|
+
]
|
|
1009
|
+
|
|
1010
|
+
def assemble(steps):
|
|
1011
|
+
hashes: list[str] = []
|
|
1012
|
+
items: list[dict] = []
|
|
1013
|
+
placeholders: list[PlaceholderRange] = []
|
|
1014
|
+
for s in steps:
|
|
1015
|
+
mm = s["tokens"]["multi_modal_data"]
|
|
1016
|
+
hashes += mm.mm_hashes.get("image", [])
|
|
1017
|
+
items += mm.mm_items.get("image", [])
|
|
1018
|
+
placeholders += mm.mm_placeholders.get("image", [])
|
|
1019
|
+
return hashes, items, placeholders
|
|
1020
|
+
|
|
1021
|
+
ts1_hashes, ts1_items, ts1_phs = assemble(out[0:2]) # pre-compaction
|
|
1022
|
+
ts2_hashes, ts2_items, ts2_phs = assemble(out[2:4]) # post-compaction
|
|
1023
|
+
|
|
1024
|
+
assert ts1_hashes == ["A", "B"]
|
|
1025
|
+
assert ts2_hashes == ["C", "D"]
|
|
1026
|
+
# The invariant the previous "keep last" broke: pre-compaction TS
|
|
1027
|
+
# does not see post-compaction images, and vice versa.
|
|
1028
|
+
assert set(ts1_hashes).isdisjoint(set(ts2_hashes))
|
|
1029
|
+
|
|
1030
|
+
# Items / placeholders are reindexed lock-step with hashes (no
|
|
1031
|
+
# off-by-one or cross-contamination during reindex).
|
|
1032
|
+
assert ts1_items == [{"pixel_values": "px-A"}, {"pixel_values": "px-B"}]
|
|
1033
|
+
assert ts2_items == [{"pixel_values": "px-C"}, {"pixel_values": "px-D"}]
|
|
1034
|
+
|
|
1035
|
+
# Placeholder offsets travel verbatim per step; the assembler is
|
|
1036
|
+
# responsible for shifting them into each window's local frame.
|
|
1037
|
+
assert [p.offset for p in ts1_phs] == [10, 50]
|
|
1038
|
+
assert [p.offset for p in ts2_phs] == [8, 40]
|
|
1039
|
+
|
|
1040
|
+
def test_same_image_rendered_in_two_turns_uses_multiset_diff(self):
|
|
1041
|
+
"""Same image hash appearing N times must keep the right N-prior occurrences.
|
|
1042
|
+
|
|
1043
|
+
The renderer doesn't dedupe by hash: ``emit_image`` appends to
|
|
1044
|
+
the parallel lists every time an image content part is rendered.
|
|
1045
|
+
So if image A is shown in turn 1 *and* turn 3, the cumulative
|
|
1046
|
+
``mm_hashes`` is ``["A", "A"]`` with two distinct placeholder
|
|
1047
|
+
offsets, and ``mm_items`` is ``[pixA, pixA]`` (literally the
|
|
1048
|
+
same payload twice). Both placeholder runs need their own item
|
|
1049
|
+
— set-based diff would drop both as "already seen" and orphan
|
|
1050
|
+
the second placeholder. Multiset diff drops only the first.
|
|
1051
|
+
"""
|
|
1052
|
+
from renderers.base import MultiModalData, PlaceholderRange
|
|
1053
|
+
|
|
1054
|
+
def step(hashes, offsets):
|
|
1055
|
+
return {
|
|
1056
|
+
"tokens": {
|
|
1057
|
+
"multi_modal_data": MultiModalData(
|
|
1058
|
+
mm_hashes={"image": list(hashes)},
|
|
1059
|
+
mm_placeholders={
|
|
1060
|
+
"image": [
|
|
1061
|
+
PlaceholderRange(offset=o, length=4) for o in offsets
|
|
1062
|
+
]
|
|
1063
|
+
},
|
|
1064
|
+
mm_items={
|
|
1065
|
+
"image": [{"pixel_values": f"px-{h}"} for h in hashes]
|
|
1066
|
+
},
|
|
1067
|
+
)
|
|
1068
|
+
}
|
|
1069
|
+
}
|
|
1070
|
+
|
|
1071
|
+
# Turn 1: image A at offset 10. Cumulative ["A"].
|
|
1072
|
+
# Turn 2: no image. Cumulative unchanged ["A"].
|
|
1073
|
+
# Turn 3: image A re-rendered at offset 200. Cumulative ["A", "A"].
|
|
1074
|
+
traj = [
|
|
1075
|
+
step(["A"], offsets=[10]),
|
|
1076
|
+
step(["A"], offsets=[10]),
|
|
1077
|
+
step(["A", "A"], offsets=[10, 200]),
|
|
1078
|
+
]
|
|
1079
|
+
out = _delta_intermediate_mm_data(traj)
|
|
1080
|
+
|
|
1081
|
+
# Step 0 keeps everything (no prior).
|
|
1082
|
+
assert out[0]["tokens"]["multi_modal_data"].mm_hashes == {"image": ["A"]}
|
|
1083
|
+
assert [
|
|
1084
|
+
p.offset
|
|
1085
|
+
for p in out[0]["tokens"]["multi_modal_data"].mm_placeholders["image"]
|
|
1086
|
+
] == [10]
|
|
1087
|
+
|
|
1088
|
+
# Step 1 introduced no new image (cumulative unchanged).
|
|
1089
|
+
assert out[1]["tokens"]["multi_modal_data"].mm_hashes == {"image": []}
|
|
1090
|
+
|
|
1091
|
+
# Step 2: prior was ["A"], current is ["A", "A"]. Multiset budget
|
|
1092
|
+
# consumes the first A; the *second* A (the new one at offset
|
|
1093
|
+
# 200) survives the diff with its pixel_values intact. Set-based
|
|
1094
|
+
# diff would have produced [].
|
|
1095
|
+
step2_mm = out[2]["tokens"]["multi_modal_data"]
|
|
1096
|
+
assert step2_mm.mm_hashes == {"image": ["A"]}
|
|
1097
|
+
assert step2_mm.mm_items == {"image": [{"pixel_values": "px-A"}]}
|
|
1098
|
+
assert [p.offset for p in step2_mm.mm_placeholders["image"]] == [200]
|
|
1099
|
+
|
|
1100
|
+
# End-to-end: assembling the single TrainingSample (no
|
|
1101
|
+
# compaction) recovers both placeholder runs with matching
|
|
1102
|
+
# pixel_values, so the trainer can satisfy both image-pad
|
|
1103
|
+
# token runs in the prompt.
|
|
1104
|
+
all_hashes: list[str] = []
|
|
1105
|
+
all_phs: list[PlaceholderRange] = []
|
|
1106
|
+
for s in out:
|
|
1107
|
+
mm = s["tokens"]["multi_modal_data"]
|
|
1108
|
+
all_hashes += mm.mm_hashes.get("image", [])
|
|
1109
|
+
all_phs += mm.mm_placeholders.get("image", [])
|
|
1110
|
+
assert all_hashes == ["A", "A"]
|
|
1111
|
+
assert [p.offset for p in all_phs] == [10, 200]
|
|
1112
|
+
|
|
1113
|
+
def test_image_reintroduction_after_compaction(self):
|
|
1114
|
+
"""A hash dropped at compaction and re-rendered later is re-transmitted.
|
|
1115
|
+
|
|
1116
|
+
The delta is computed against the *immediately prior step's*
|
|
1117
|
+
cumulative, not a global seen-set. If image A appears in turn
|
|
1118
|
+
1, is compacted away (step 2's cumulative is empty), and is
|
|
1119
|
+
re-rendered in turn 3, A shows up in step 0's delta *and* step
|
|
1120
|
+
2's delta — necessary so the post-compaction TrainingSample
|
|
1121
|
+
also receives A's bytes.
|
|
1122
|
+
"""
|
|
1123
|
+
traj = [
|
|
1124
|
+
self._step(self._mm("A")),
|
|
1125
|
+
self._step(self._mm()),
|
|
1126
|
+
self._step(self._mm("A")),
|
|
1127
|
+
]
|
|
1128
|
+
out = _delta_intermediate_mm_data(traj)
|
|
1129
|
+
|
|
1130
|
+
assert out[0]["tokens"]["multi_modal_data"].mm_hashes == {"image": ["A"]}
|
|
1131
|
+
assert out[1]["tokens"]["multi_modal_data"].mm_hashes == {"image": []}
|
|
1132
|
+
# A re-emerges in step 2's delta — its absence from step 1's
|
|
1133
|
+
# cumulative means it counts as "new" again.
|
|
1134
|
+
assert out[2]["tokens"]["multi_modal_data"].mm_hashes == {"image": ["A"]}
|
|
1135
|
+
|
|
1136
|
+
def test_steps_with_no_new_items_collapse_to_empty_delta(self):
|
|
1137
|
+
# Step 2's cumulative equals step 1's — no new items.
|
|
1138
|
+
traj = [
|
|
1139
|
+
self._step(self._mm("A", "B")),
|
|
1140
|
+
self._step(self._mm("A", "B")),
|
|
1141
|
+
self._step(self._mm("A", "B", "C")),
|
|
1142
|
+
]
|
|
1143
|
+
out = _delta_intermediate_mm_data(traj)
|
|
1144
|
+
|
|
1145
|
+
assert out[1]["tokens"]["multi_modal_data"].mm_hashes == {"image": []}
|
|
1146
|
+
assert out[1]["tokens"]["multi_modal_data"].mm_items == {"image": []}
|
|
1147
|
+
assert out[2]["tokens"]["multi_modal_data"].mm_hashes == {"image": ["C"]}
|
|
1148
|
+
|
|
1149
|
+
def test_non_mapping_steps_pass_through(self):
|
|
1150
|
+
traj = [self._step(self._mm("A")), "not-a-dict", self._step(self._mm("A", "B"))]
|
|
1151
|
+
out = _delta_intermediate_mm_data(traj)
|
|
1152
|
+
assert out[1] == "not-a-dict"
|
|
1153
|
+
# Delta of step 2 still computed against step 0 (last seen cumulative).
|
|
1154
|
+
assert out[2]["tokens"]["multi_modal_data"].mm_hashes == {"image": ["B"]}
|
{verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/clients/anthropic_messages_client.py
RENAMED
|
@@ -50,6 +50,13 @@ from verifiers.types import (
|
|
|
50
50
|
from verifiers.utils.client_utils import setup_anthropic_client
|
|
51
51
|
|
|
52
52
|
|
|
53
|
+
ANTHROPIC_ADAPTIVE_THINKING_MODELS = {
|
|
54
|
+
"claude-opus-4-7",
|
|
55
|
+
"claude-opus-4-6",
|
|
56
|
+
"claude-sonnet-4-6",
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
|
|
53
60
|
def _handle_anthropic_overlong_prompt(func):
|
|
54
61
|
"""Decorator to handle overlong prompt errors from the Anthropic API."""
|
|
55
62
|
|
|
@@ -342,6 +349,20 @@ class AnthropicMessagesClient(
|
|
|
342
349
|
) -> AnthropicMessage:
|
|
343
350
|
def normalize_sampling_args(sampling_args: SamplingArgs) -> dict:
|
|
344
351
|
sampling_args = dict(sampling_args)
|
|
352
|
+
reasoning_effort = sampling_args.pop("reasoning_effort", None)
|
|
353
|
+
if reasoning_effort is not None:
|
|
354
|
+
model_id = (
|
|
355
|
+
model.lower().split("/")[-1].replace(".", "-").replace("_", "-")
|
|
356
|
+
)
|
|
357
|
+
output_config = dict(sampling_args.get("output_config") or {})
|
|
358
|
+
output_config["effort"] = reasoning_effort
|
|
359
|
+
sampling_args["output_config"] = output_config
|
|
360
|
+
if "thinking" not in sampling_args and any(
|
|
361
|
+
model_id == adaptive_model
|
|
362
|
+
or model_id.startswith(f"{adaptive_model}-")
|
|
363
|
+
for adaptive_model in ANTHROPIC_ADAPTIVE_THINKING_MODELS
|
|
364
|
+
):
|
|
365
|
+
sampling_args["thinking"] = {"type": "adaptive"}
|
|
345
366
|
max_tokens = sampling_args.pop("max_tokens", None)
|
|
346
367
|
sampling_args.pop("n", None)
|
|
347
368
|
sampling_args.pop("stop", None)
|
{verifiers-0.1.15.dev0 → verifiers-0.1.15.dev1}/verifiers/clients/openai_chat_completions_client.py
RENAMED
|
@@ -252,6 +252,31 @@ class OpenAIChatCompletionsClient(
|
|
|
252
252
|
) -> OpenAIChatResponse:
|
|
253
253
|
def normalize_sampling_args(sampling_args: SamplingArgs):
|
|
254
254
|
sampling_args = dict(sampling_args)
|
|
255
|
+
api_base_url = None
|
|
256
|
+
if hasattr(self.client, "base_url"):
|
|
257
|
+
api_base_url = str(self.client.base_url)
|
|
258
|
+
elif self._config is not None:
|
|
259
|
+
api_base_url = self._config.api_base_url
|
|
260
|
+
reasoning_effort = sampling_args.pop("reasoning_effort", None)
|
|
261
|
+
model_id = model.lower().split("/")[-1].replace(".", "-").replace("_", "-")
|
|
262
|
+
is_anthropic_route = (
|
|
263
|
+
"openrouter.ai" in (api_base_url or "").lower()
|
|
264
|
+
or "pinference.ai" in (api_base_url or "").lower()
|
|
265
|
+
)
|
|
266
|
+
if (
|
|
267
|
+
reasoning_effort is not None
|
|
268
|
+
and model_id.startswith("claude-")
|
|
269
|
+
and is_anthropic_route
|
|
270
|
+
):
|
|
271
|
+
# OpenRouter/Pinference route Anthropic reasoning_effort through extra_body.
|
|
272
|
+
extra_body = dict(sampling_args.get("extra_body") or {})
|
|
273
|
+
extra_body["verbosity"] = reasoning_effort
|
|
274
|
+
reasoning = dict(extra_body.get("reasoning") or {})
|
|
275
|
+
reasoning.setdefault("enabled", True)
|
|
276
|
+
extra_body["reasoning"] = reasoning
|
|
277
|
+
sampling_args["extra_body"] = extra_body
|
|
278
|
+
elif reasoning_effort is not None:
|
|
279
|
+
sampling_args["reasoning_effort"] = reasoning_effort
|
|
255
280
|
if "max_tokens" in sampling_args:
|
|
256
281
|
sampling_args["max_completion_tokens"] = sampling_args.pop("max_tokens")
|
|
257
282
|
return {k: v for k, v in sampling_args.items() if v is not None}
|
|
@@ -20,10 +20,13 @@ from openai import AsyncOpenAI
|
|
|
20
20
|
|
|
21
21
|
from renderers import Message as RendererMessage
|
|
22
22
|
from renderers import (
|
|
23
|
+
MultimodalRenderer,
|
|
24
|
+
RenderedTokens,
|
|
23
25
|
Renderer,
|
|
24
26
|
RendererPool,
|
|
25
27
|
ToolSpec,
|
|
26
28
|
create_renderer_pool,
|
|
29
|
+
is_multimodal,
|
|
27
30
|
)
|
|
28
31
|
from renderers import ToolCall as RendererToolCall
|
|
29
32
|
from renderers import ToolCallFunction
|
|
@@ -94,15 +97,15 @@ _DEFAULT_POOL_SIZE = 1
|
|
|
94
97
|
# ── Helpers ─────────────────────────────────────────────────────────
|
|
95
98
|
|
|
96
99
|
|
|
97
|
-
async def
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
def _work():
|
|
101
|
-
with renderer.checkout() as r:
|
|
102
|
-
return fn(r)
|
|
100
|
+
async def _maybe_offload(renderer: Renderer | RendererPool, fn):
|
|
101
|
+
"""Run sync renderer work on a thread iff ``renderer`` is a pool.
|
|
103
102
|
|
|
104
|
-
|
|
105
|
-
|
|
103
|
+
Pool methods can block on the internal queue/lock; we offload to keep
|
|
104
|
+
the event loop responsive. A bare ``Renderer`` runs inline.
|
|
105
|
+
"""
|
|
106
|
+
if isinstance(renderer, RendererPool):
|
|
107
|
+
return await asyncio.to_thread(fn)
|
|
108
|
+
return fn()
|
|
106
109
|
|
|
107
110
|
|
|
108
111
|
def _get_value(obj: Any, key: str, default: Any = None) -> Any:
|
|
@@ -295,6 +298,28 @@ def _step_token_ids(step: Any) -> tuple[list[int], list[int]] | None:
|
|
|
295
298
|
return list(prompt_ids), list(completion_ids)
|
|
296
299
|
|
|
297
300
|
|
|
301
|
+
def _step_multi_modal_data(step: Any):
|
|
302
|
+
"""Recover the previous turn's ``MultiModalData`` for bridging.
|
|
303
|
+
|
|
304
|
+
Mirrors :func:`_step_token_ids`: prefer ``step.tokens.multi_modal_data``
|
|
305
|
+
(post-parse_response_tokens), fall back to ``step.response.message.tokens``.
|
|
306
|
+
Returns ``None`` when no multimodal sidecar was emitted (text-only
|
|
307
|
+
rollouts) — the bridge handles that branch transparently.
|
|
308
|
+
"""
|
|
309
|
+
tokens = _get_value(step, "tokens")
|
|
310
|
+
if tokens is not None:
|
|
311
|
+
mm = _get_value(tokens, "multi_modal_data")
|
|
312
|
+
if mm is not None:
|
|
313
|
+
return mm
|
|
314
|
+
|
|
315
|
+
response = _get_value(step, "response")
|
|
316
|
+
message = _get_value(response, "message")
|
|
317
|
+
raw_tokens = _get_value(message, "tokens")
|
|
318
|
+
if raw_tokens is None:
|
|
319
|
+
return None
|
|
320
|
+
return _get_value(raw_tokens, "multi_modal_data")
|
|
321
|
+
|
|
322
|
+
|
|
298
323
|
def _step_rendered_messages(step: Any) -> list[RendererMessage]:
|
|
299
324
|
prompt = list(_get_value(step, "prompt", []) or [])
|
|
300
325
|
completion = list(_get_value(step, "completion", []) or [])
|
|
@@ -309,7 +334,13 @@ async def _get_incremental_prompt_ids(
|
|
|
309
334
|
prompt: list[RendererMessage],
|
|
310
335
|
state: Any,
|
|
311
336
|
tools: list[ToolSpec] | None,
|
|
312
|
-
) ->
|
|
337
|
+
) -> "RenderedTokens | None":
|
|
338
|
+
"""Return the bridged prompt for the next turn as ``RenderedTokens``.
|
|
339
|
+
|
|
340
|
+
Returns ``None`` when no prior trajectory step lines up with the new
|
|
341
|
+
prompt's prefix or the renderer's ``bridge_to_next_turn`` can't extend
|
|
342
|
+
— both cases fall back to a full re-render in :func:`generate`.
|
|
343
|
+
"""
|
|
313
344
|
if not state:
|
|
314
345
|
return None
|
|
315
346
|
|
|
@@ -342,15 +373,32 @@ async def _get_incremental_prompt_ids(
|
|
|
342
373
|
continue
|
|
343
374
|
|
|
344
375
|
previous_prompt_ids, previous_completion_ids = token_ids
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
376
|
+
previous_mm_data = _step_multi_modal_data(step)
|
|
377
|
+
# Multimodal renderers' bridge accepts ``previous_multi_modal_data``
|
|
378
|
+
# so earlier-turn images carry forward into the new prompt's
|
|
379
|
+
# ``mm_placeholders``. Without that carry-forward, vLLM sees
|
|
380
|
+
# placeholder counts that don't match the combined token sequence
|
|
381
|
+
# and silently falls back to hash-cache lookup (or errors).
|
|
382
|
+
# Text-only renderers' bridge signature doesn't include that
|
|
383
|
+
# kwarg. ``is_multimodal`` is type-cached so this dispatch is a
|
|
384
|
+
# dict lookup, not a runtime_checkable Protocol walk.
|
|
385
|
+
if is_multimodal(renderer):
|
|
386
|
+
mm_renderer = cast(MultimodalRenderer, renderer)
|
|
387
|
+
bridge = lambda: mm_renderer.bridge_to_next_turn( # noqa: E731
|
|
348
388
|
previous_prompt_ids,
|
|
349
389
|
previous_completion_ids,
|
|
350
390
|
tail,
|
|
351
391
|
tools=tools,
|
|
352
|
-
|
|
353
|
-
|
|
392
|
+
previous_multi_modal_data=previous_mm_data,
|
|
393
|
+
)
|
|
394
|
+
else:
|
|
395
|
+
bridge = lambda: renderer.bridge_to_next_turn( # noqa: E731
|
|
396
|
+
previous_prompt_ids,
|
|
397
|
+
previous_completion_ids,
|
|
398
|
+
tail,
|
|
399
|
+
tools=tools,
|
|
400
|
+
)
|
|
401
|
+
bridged = await _maybe_offload(renderer, bridge)
|
|
354
402
|
_record_bridge(success=bridged is not None)
|
|
355
403
|
return bridged
|
|
356
404
|
|
|
@@ -514,12 +562,21 @@ class RendererClient(
|
|
|
514
562
|
if args.get("prompt_logprobs"):
|
|
515
563
|
sampling_params["prompt_logprobs"] = 1
|
|
516
564
|
|
|
517
|
-
|
|
565
|
+
bridged = await _get_incremental_prompt_ids(
|
|
518
566
|
renderer=renderer,
|
|
519
567
|
prompt=prompt,
|
|
520
568
|
state=kwargs.get("state"),
|
|
521
569
|
tools=tools,
|
|
522
570
|
)
|
|
571
|
+
# ``bridged`` is RenderedTokens | None. Unpack token_ids + mm_data
|
|
572
|
+
# so multimodal renderers thread per-image features through to
|
|
573
|
+
# /inference/v1/generate without re-rendering the whole turn.
|
|
574
|
+
if bridged is not None:
|
|
575
|
+
prompt_ids = bridged.token_ids
|
|
576
|
+
multi_modal_data = bridged.multi_modal_data
|
|
577
|
+
else:
|
|
578
|
+
prompt_ids = None
|
|
579
|
+
multi_modal_data = None
|
|
523
580
|
|
|
524
581
|
return await generate(
|
|
525
582
|
client=self.client,
|
|
@@ -527,6 +584,7 @@ class RendererClient(
|
|
|
527
584
|
messages=prompt,
|
|
528
585
|
model=model,
|
|
529
586
|
prompt_ids=prompt_ids,
|
|
587
|
+
multi_modal_data=multi_modal_data,
|
|
530
588
|
tools=tools,
|
|
531
589
|
sampling_params=sampling_params,
|
|
532
590
|
cache_salt=args.get("cache_salt")
|
|
@@ -580,6 +638,7 @@ class RendererClient(
|
|
|
580
638
|
completion_mask=[1] * len(completion_ids),
|
|
581
639
|
completion_logprobs=completion_logprobs,
|
|
582
640
|
routed_experts=response.get("routed_experts"),
|
|
641
|
+
multi_modal_data=response.get("multi_modal_data"),
|
|
583
642
|
)
|
|
584
643
|
|
|
585
644
|
# /inference/v1/generate doesn't return usage; reconstruct from tokens.
|
|
@@ -30,7 +30,7 @@ from verifiers.serve.types import (
|
|
|
30
30
|
class ZMQEnvClient(EnvClient):
|
|
31
31
|
"""ZMQ-based environment client."""
|
|
32
32
|
|
|
33
|
-
DEFAULT_REQUEST_TIMEOUT =
|
|
33
|
+
DEFAULT_REQUEST_TIMEOUT: float | None = None
|
|
34
34
|
|
|
35
35
|
def __init__(self, address: str = "tcp://127.0.0.1:5000", **kwargs):
|
|
36
36
|
super().__init__(address=address, **kwargs)
|