verifiers 0.1.15.dev1__tar.gz → 0.1.15.dev3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/PKG-INFO +1 -1
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_interception_utils.py +2 -2
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_v1_harbor_cli.py +73 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_v1_runtime_lifecycle.py +33 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/__init__.py +1 -1
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/harnesses/opencode.py +7 -6
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/opencode_env.py +7 -3
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/utils/interception_utils.py +15 -7
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/v1/packages/harnesses/opencode.py +7 -3
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/v1/packages/tasksets/harbor.py +3 -2
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/v1/utils/sandbox_utils.py +5 -4
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/.gitignore +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/LICENSE +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/README.md +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/pyproject.toml +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/AGENTS.md +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/README.md +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/__init__.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/conftest.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_browser_env.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_build_script.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_cli_agent_env.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_client_auth_errors.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_client_config.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_client_multimodal_types.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_composable_env.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_context_token_metrics.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_decorator_ranks.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_endpoint_registry.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_env_group.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_env_server.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_environment.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_environment_extra.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_envs.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_error_chain.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_eval_cli.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_eval_display.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_eval_utils.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_gepa_cli.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_gepa_utils.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_gym_env.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_harbor_env_mcp.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_imports.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_install_utils.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_langchain_deep_agents_wikispeedia.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_lean_task.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_logging.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_math_rubric.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_maybe_think_parser.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_mcp_search_env.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_message_utils.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_message_utils_multimodal.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_multiturn_env.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_nemorl_client.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_openai_chat_completions_token_client.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_openai_responses_client.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_opencode_harbor.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_opencode_rlm_env.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_parser.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_path_utils.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_per_turn_timing.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_prime_plugin.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_renderer_client.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_renderer_e2e.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_rlm_composable_env.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_rlm_env.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_rubric.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_rubric_group.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_sandbox_env.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_sandbox_mixin.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_save_utils.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_setup_script.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_singleturn_env.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_stateful_tool_env.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_think_parser.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_tool_env.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_tool_utils.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_trajectory_processing.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_tui_info_formatting.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_types.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_v1_bfcl.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_v1_config_extension.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_v1_endpoint_protocols.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_v1_example_counts.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_v1_group_reward_env.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_v1_mini_swe_agent.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_v1_rlm_swe.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_v1_scoring_functions.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_wordle_env.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_xml_parser.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/AGENTS.md +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/cli/__init__.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/cli/commands/__init__.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/cli/commands/build.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/cli/commands/eval.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/cli/commands/gepa.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/cli/commands/init.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/cli/commands/install.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/cli/commands/setup.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/cli/plugins/__init__.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/cli/plugins/prime.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/cli/tui.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/clients/__init__.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/clients/anthropic_messages_client.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/clients/client.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/clients/nemorl_chat_completions_client.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/clients/openai_chat_completions_client.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/clients/openai_chat_completions_token_client.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/clients/openai_completions_client.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/clients/openai_responses_client.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/clients/renderer_client.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/decorators.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/AGENTS.md +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/__init__.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/env_group.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/environment.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/README.md +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/__init__.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/cli_agent_env.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/README.md +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/__init__.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/_filter.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/composable_env.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/harness.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/harnesses/__init__.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/harnesses/mini_swe_agent.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/harnesses/prompt.txt +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/harnesses/rlm.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/swe_debug_env.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/task.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/tasksets/__init__.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/tasksets/cp/__init__.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/tasksets/cp/cp_task.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/tasksets/cp/test_utils.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/tasksets/harbor/__init__.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/tasksets/harbor/harbor.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/tasksets/lean/__init__.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/tasksets/lean/lean_task.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/tasksets/math/__init__.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/tasksets/math/math_task.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/tasksets/swe/__init__.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/tasksets/swe/_test_patch.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/tasksets/swe/create_fix_patch.sh +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/tasksets/swe/log_parser.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/tasksets/swe/multi_swe.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/tasksets/swe/openswe.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/tasksets/swe/r2e_gym.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/tasksets/swe/swe_bench.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/tasksets/swe/swe_lego.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/tasksets/swe/swe_rebench_v2.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/tasksets/swe/swe_rebench_v2_log_parsers.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/tasksets/swe/swe_smith.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/composable/tasksets/swe/swe_tasksets.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/gym_env.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/harbor_env/__init__.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/harbor_env/env.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/harbor_env/mcp.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/mcp_env.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/opencode_qa_env.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/opencode_rlm_env.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/rlm_env.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/sandbox_mixin.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/utils/__init__.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/utils/file_locks.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/experimental/utils/git_checkout_cache.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/integrations/README.md +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/integrations/__init__.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/integrations/browser_env/README.md +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/integrations/browser_env/__init__.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/integrations/browser_env/browser_env.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/integrations/browser_env/modes/__init__.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/integrations/browser_env/modes/base.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/integrations/browser_env/modes/cua_mode.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/integrations/browser_env/modes/dom_mode.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/integrations/openenv_env.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/integrations/reasoninggym_env.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/integrations/textarena_env.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/multiturn_env.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/python_env.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/sandbox_env.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/singleturn_env.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/stateful_tool_env.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/envs/tool_env.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/errors.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/gepa/__init__.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/gepa/adapter.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/gepa/config.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/gepa/display.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/gepa/gepa_utils.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/parsers/__init__.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/parsers/maybe_think_parser.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/parsers/parser.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/parsers/think_parser.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/parsers/xml_parser.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/rl/README.md +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/rl/__init__.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/rl/inference/__init__.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/rl/inference/client.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/rl/inference/server.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/rl/trainer/__init__.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/rl/trainer/config.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/rl/trainer/orchestrator.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/rl/trainer/trainer.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/rl/trainer/utils.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/rubrics/__init__.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/rubrics/experimental/hybrid_math_rubric.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/rubrics/judge_rubric.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/rubrics/math_rubric.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/rubrics/rubric.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/rubrics/rubric_group.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/scripts/__init__.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/scripts/build.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/scripts/eval.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/scripts/gepa.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/scripts/init.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/scripts/install.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/scripts/rl.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/scripts/setup.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/scripts/train.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/scripts/tui.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/scripts/vllm.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/serve/__init__.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/serve/client/env_client.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/serve/client/zmq_env_client.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/serve/server/__init__.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/serve/server/env_router.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/serve/server/env_server.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/serve/server/env_worker.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/serve/server/zmq_env_server.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/serve/types.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/types.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/utils/__init__.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/utils/async_utils.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/utils/client_utils.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/utils/config_utils.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/utils/data_utils.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/utils/display_utils.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/utils/env_config_utils.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/utils/env_utils.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/utils/error_utils.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/utils/eval_display.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/utils/eval_utils.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/utils/heartbeat.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/utils/import_utils.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/utils/install_utils.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/utils/logging_utils.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/utils/message_utils.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/utils/metric_utils.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/utils/path_utils.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/utils/process_utils.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/utils/response_utils.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/utils/save_utils.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/utils/serve_utils.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/utils/thread_utils.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/utils/threaded_sandbox_client.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/utils/tool_utils.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/utils/tunnel_utils.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/utils/usage_utils.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/utils/version_utils.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/v1/README.md +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/v1/RE_MIGRATION.md +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/v1/__init__.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/v1/config.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/v1/env.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/v1/harness.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/v1/packages/__init__.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/v1/packages/harnesses/__init__.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/v1/packages/harnesses/cli.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/v1/packages/harnesses/configs.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/v1/packages/harnesses/mini_swe_agent.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/v1/packages/harnesses/pi.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/v1/packages/harnesses/rlm.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/v1/packages/tasksets/__init__.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/v1/runtime.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/v1/state.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/v1/task.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/v1/taskset.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/v1/toolset.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/v1/user.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/v1/utils/__init__.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/v1/utils/artifact_utils.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/v1/utils/endpoint_utils.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/v1/utils/json_utils.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/v1/utils/judge_utils.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/v1/utils/lifecycle_utils.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/v1/utils/mcp_proxy_utils.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/v1/utils/mcp_utils.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/v1/utils/program_utils.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/v1/utils/prompt_utils.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/v1/utils/sandbox_program_utils.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/v1/utils/scoring_utils.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/v1/utils/timing_utils.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/v1/utils/tool_utils.py +0 -0
- {verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/verifiers/v1/utils/trajectory_utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: verifiers
|
|
3
|
-
Version: 0.1.15.
|
|
3
|
+
Version: 0.1.15.dev3
|
|
4
4
|
Summary: Verifiers: Environments for LLM Reinforcement Learning
|
|
5
5
|
Project-URL: Homepage, https://github.com/primeintellect-ai/verifiers
|
|
6
6
|
Project-URL: Documentation, https://github.com/primeintellect-ai/verifiers
|
|
@@ -261,7 +261,7 @@ async def test_keepalive_write_failure_surfaces_to_state(monkeypatch):
|
|
|
261
261
|
|
|
262
262
|
assert isinstance(state["error"], StreamInterrupted)
|
|
263
263
|
msg = str(state["error"])
|
|
264
|
-
assert "
|
|
264
|
+
assert "Keepalive write failed" in msg
|
|
265
265
|
assert "ConnectionResetError" in msg
|
|
266
266
|
|
|
267
267
|
|
|
@@ -306,6 +306,6 @@ async def test_non_streaming_response_future_failure_surfaces_to_state(monkeypat
|
|
|
306
306
|
f"expected InterceptionError, got {type(state.get('error'))}"
|
|
307
307
|
)
|
|
308
308
|
msg = str(state["error"])
|
|
309
|
-
assert "
|
|
309
|
+
assert "Intercepted request failed" in msg
|
|
310
310
|
assert "RuntimeError" in msg
|
|
311
311
|
assert "vLLM raised" in msg
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import json
|
|
4
|
+
import sys
|
|
5
|
+
import types
|
|
4
6
|
from pathlib import Path
|
|
5
7
|
from typing import cast
|
|
6
8
|
|
|
@@ -8,6 +10,7 @@ import pytest
|
|
|
8
10
|
|
|
9
11
|
import verifiers.v1 as vf
|
|
10
12
|
from verifiers.v1.packages.harnesses.pi import pi_mcp_json, pi_models_json
|
|
13
|
+
from verifiers.v1.packages.tasksets.harbor import harbor_reward
|
|
11
14
|
from verifiers.v1.utils.program_utils import merge_task_program
|
|
12
15
|
|
|
13
16
|
|
|
@@ -82,6 +85,76 @@ def test_harbor_taskset_constructs_env_with_opencode(tmp_path: Path) -> None:
|
|
|
82
85
|
assert "task_dir" not in cast(dict[str, object], env.harness.program)
|
|
83
86
|
|
|
84
87
|
|
|
88
|
+
class FakeHarborCommandResult:
|
|
89
|
+
def __init__(
|
|
90
|
+
self,
|
|
91
|
+
*,
|
|
92
|
+
exit_code: int = 0,
|
|
93
|
+
stdout: str = "",
|
|
94
|
+
stderr: str = "",
|
|
95
|
+
):
|
|
96
|
+
self.exit_code = exit_code
|
|
97
|
+
self.stdout = stdout
|
|
98
|
+
self.stderr = stderr
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
class FakeHarborSandboxClient:
|
|
102
|
+
instances: list["FakeHarborSandboxClient"] = []
|
|
103
|
+
|
|
104
|
+
def __init__(self):
|
|
105
|
+
self.execute_commands: list[tuple[str, int | None, str | None]] = []
|
|
106
|
+
self.background_jobs: list[tuple[str, str, int | None, str | None]] = []
|
|
107
|
+
type(self).instances.append(self)
|
|
108
|
+
|
|
109
|
+
async def upload_file(self, *args: object, **kwargs: object) -> None:
|
|
110
|
+
_ = args, kwargs
|
|
111
|
+
|
|
112
|
+
async def execute_command(
|
|
113
|
+
self, *args: object, **kwargs: object
|
|
114
|
+
) -> FakeHarborCommandResult:
|
|
115
|
+
command = str(kwargs.get("command") or args[1])
|
|
116
|
+
timeout = cast(int | None, kwargs.get("timeout"))
|
|
117
|
+
working_dir = cast(str | None, kwargs.get("working_dir"))
|
|
118
|
+
self.execute_commands.append((command, timeout, working_dir))
|
|
119
|
+
if "reward.txt" in command:
|
|
120
|
+
return FakeHarborCommandResult(stdout="1\n")
|
|
121
|
+
return FakeHarborCommandResult()
|
|
122
|
+
|
|
123
|
+
async def run_background_job(
|
|
124
|
+
self, *args: object, **kwargs: object
|
|
125
|
+
) -> FakeHarborCommandResult:
|
|
126
|
+
sandbox_id = str(kwargs.get("sandbox_id") or args[0])
|
|
127
|
+
command = str(kwargs.get("command") or args[1])
|
|
128
|
+
timeout = cast(int | None, kwargs.get("timeout"))
|
|
129
|
+
working_dir = cast(str | None, kwargs.get("working_dir"))
|
|
130
|
+
self.background_jobs.append((sandbox_id, command, timeout, working_dir))
|
|
131
|
+
return FakeHarborCommandResult(stdout="tests passed")
|
|
132
|
+
|
|
133
|
+
async def aclose(self) -> None:
|
|
134
|
+
pass
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
@pytest.mark.asyncio
|
|
138
|
+
async def test_harbor_reward_uses_background_job_for_tests(
|
|
139
|
+
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
|
|
140
|
+
) -> None:
|
|
141
|
+
task_dir = write_harbor_task(tmp_path)
|
|
142
|
+
fake_module = types.ModuleType("prime_sandboxes")
|
|
143
|
+
fake_module.AsyncSandboxClient = FakeHarborSandboxClient
|
|
144
|
+
monkeypatch.setitem(sys.modules, "prime_sandboxes", fake_module)
|
|
145
|
+
FakeHarborSandboxClient.instances = []
|
|
146
|
+
|
|
147
|
+
reward = await harbor_reward(
|
|
148
|
+
{"harbor": {"task_dir": str(task_dir), "test_timeout": 120}},
|
|
149
|
+
{"sandbox_id": "sbx-1"},
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
client = FakeHarborSandboxClient.instances[0]
|
|
153
|
+
assert reward == 1.0
|
|
154
|
+
assert client.background_jobs == [("sbx-1", "bash test.sh", 120, "/tests")]
|
|
155
|
+
assert ("bash test.sh", 120, "/tests") not in client.execute_commands
|
|
156
|
+
|
|
157
|
+
|
|
85
158
|
def test_packaged_harbor_and_opencode_imports_are_reexported() -> None:
|
|
86
159
|
from verifiers.v1.packages.harnesses import OpenCode, OpenCodeConfig, Pi
|
|
87
160
|
from verifiers.v1.packages.tasksets import HarborTaskset
|
|
@@ -109,6 +109,7 @@ class FakeSandboxClient:
|
|
|
109
109
|
created: list[str] = []
|
|
110
110
|
deleted: list[str] = []
|
|
111
111
|
commands: list[tuple[str, str]] = []
|
|
112
|
+
background_jobs: list[tuple[str, str, int | None, str | None]] = []
|
|
112
113
|
uploads: list[tuple[str, str, bytes]] = []
|
|
113
114
|
|
|
114
115
|
@classmethod
|
|
@@ -116,6 +117,7 @@ class FakeSandboxClient:
|
|
|
116
117
|
cls.created = []
|
|
117
118
|
cls.deleted = []
|
|
118
119
|
cls.commands = []
|
|
120
|
+
cls.background_jobs = []
|
|
119
121
|
cls.uploads = []
|
|
120
122
|
|
|
121
123
|
async def create(self, request: FakeCreateSandboxRequest) -> FakeSandboxResult:
|
|
@@ -135,6 +137,17 @@ class FakeSandboxClient:
|
|
|
135
137
|
type(self).commands.append((sandbox_id, command))
|
|
136
138
|
return FakeCommandResult()
|
|
137
139
|
|
|
140
|
+
async def run_background_job(
|
|
141
|
+
self, *args: object, **kwargs: object
|
|
142
|
+
) -> FakeCommandResult:
|
|
143
|
+
sandbox_id = str(kwargs.get("sandbox_id") or args[0])
|
|
144
|
+
command = str(kwargs.get("command") or args[1])
|
|
145
|
+
timeout = cast(int | None, kwargs.get("timeout"))
|
|
146
|
+
working_dir = cast(str | None, kwargs.get("working_dir"))
|
|
147
|
+
type(self).commands.append((sandbox_id, command))
|
|
148
|
+
type(self).background_jobs.append((sandbox_id, command, timeout, working_dir))
|
|
149
|
+
return FakeCommandResult()
|
|
150
|
+
|
|
138
151
|
async def upload_bytes(self, *args: object, **kwargs: object) -> None:
|
|
139
152
|
sandbox_id = str(kwargs.get("sandbox_id") or args[0])
|
|
140
153
|
path = str(kwargs.get("file_path") or kwargs.get("path") or args[1])
|
|
@@ -1193,6 +1206,26 @@ async def test_sandbox_state_input_upload_runs_after_rollout_setup(
|
|
|
1193
1206
|
assert uploads["/tmp/vf_state_in.json"]["state_input_setup"] is True
|
|
1194
1207
|
|
|
1195
1208
|
|
|
1209
|
+
@pytest.mark.asyncio
|
|
1210
|
+
async def test_task_command_uses_background_job(
|
|
1211
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
1212
|
+
) -> None:
|
|
1213
|
+
install_fake_sandboxes(monkeypatch)
|
|
1214
|
+
install_fake_endpoint_tunnel(monkeypatch)
|
|
1215
|
+
|
|
1216
|
+
harness = vf.CLIHarness(command=["sleep", "120"], sandbox=True)
|
|
1217
|
+
task = vf.Task(
|
|
1218
|
+
{
|
|
1219
|
+
"prompt": [{"role": "user", "content": "hi"}],
|
|
1220
|
+
"sandbox": {"command_timeout": 120},
|
|
1221
|
+
}
|
|
1222
|
+
).freeze()
|
|
1223
|
+
|
|
1224
|
+
await harness.run(task)
|
|
1225
|
+
|
|
1226
|
+
assert ("sbx-1", "sleep 120", 120, "/app") in FakeSandboxClient.background_jobs
|
|
1227
|
+
|
|
1228
|
+
|
|
1196
1229
|
@pytest.mark.asyncio
|
|
1197
1230
|
async def test_program_tools_mcp_setup_accepts_config_ref_mappings(
|
|
1198
1231
|
monkeypatch: pytest.MonkeyPatch,
|
|
@@ -113,6 +113,9 @@ def build_opencode_config(
|
|
|
113
113
|
provider_timeout_ms: int = 3_600_000,
|
|
114
114
|
) -> str:
|
|
115
115
|
"""Generate opencode.json config content."""
|
|
116
|
+
agent_config: dict[str, object] = {
|
|
117
|
+
"title": {"disable": True},
|
|
118
|
+
}
|
|
116
119
|
config: dict = {
|
|
117
120
|
"${SCHEMA_DOLLAR}schema": "https://opencode.ai/config.json",
|
|
118
121
|
"provider": {
|
|
@@ -134,12 +137,10 @@ def build_opencode_config(
|
|
|
134
137
|
}
|
|
135
138
|
},
|
|
136
139
|
"model": model_id,
|
|
137
|
-
#
|
|
138
|
-
#
|
|
139
|
-
# under concurrent rollouts, which silently blocks the main agent
|
|
140
|
-
# before it issues any LLM call. See OpenCode provider.ts
|
|
141
|
-
# getSmallModel() priority chain. Pin to the same intercepted model.
|
|
140
|
+
# Keep the small-model pin to avoid falling back to the default small
|
|
141
|
+
# model and hitting rate limits; disable title calls below.
|
|
142
142
|
"small_model": model_id,
|
|
143
|
+
"agent": agent_config,
|
|
143
144
|
}
|
|
144
145
|
|
|
145
146
|
if disable_compaction:
|
|
@@ -151,7 +152,7 @@ def build_opencode_config(
|
|
|
151
152
|
if disabled_tools:
|
|
152
153
|
agent_build["tools"] = {tool: False for tool in disabled_tools}
|
|
153
154
|
if agent_build:
|
|
154
|
-
|
|
155
|
+
agent_config["build"] = agent_build
|
|
155
156
|
|
|
156
157
|
return json.dumps(config, indent=2)
|
|
157
158
|
|
|
@@ -362,6 +362,9 @@ class OpenCodeEnv(CliAgentEnv):
|
|
|
362
362
|
enable_interleaved: bool = True,
|
|
363
363
|
) -> str:
|
|
364
364
|
"""Build OpenCode config."""
|
|
365
|
+
agent_config: dict[str, object] = {
|
|
366
|
+
"title": {"disable": True},
|
|
367
|
+
}
|
|
365
368
|
config: dict = {
|
|
366
369
|
"${SCHEMA_DOLLAR}schema": "https://opencode.ai/config.json",
|
|
367
370
|
"provider": {
|
|
@@ -388,9 +391,10 @@ class OpenCodeEnv(CliAgentEnv):
|
|
|
388
391
|
}
|
|
389
392
|
},
|
|
390
393
|
"model": "$OPENAI_MODEL",
|
|
391
|
-
# Keep
|
|
392
|
-
#
|
|
394
|
+
# Keep the small-model pin to avoid falling back to the default small
|
|
395
|
+
# model and hitting rate limits; disable title calls below.
|
|
393
396
|
"small_model": "$OPENAI_MODEL",
|
|
397
|
+
"agent": agent_config,
|
|
394
398
|
}
|
|
395
399
|
|
|
396
400
|
if disable_compaction:
|
|
@@ -402,7 +406,7 @@ class OpenCodeEnv(CliAgentEnv):
|
|
|
402
406
|
build_config["prompt"] = "{file:" + system_prompt_path + "}"
|
|
403
407
|
if disabled_tools:
|
|
404
408
|
build_config["tools"] = {tool: False for tool in disabled_tools}
|
|
405
|
-
|
|
409
|
+
agent_config["build"] = build_config
|
|
406
410
|
|
|
407
411
|
return json.dumps(config, indent=2)
|
|
408
412
|
|
|
@@ -173,12 +173,20 @@ class InterceptionServer:
|
|
|
173
173
|
"""Attach `error` to the rollout's state if one is registered and
|
|
174
174
|
unset. First error wins — later failures (e.g. the downstream
|
|
175
175
|
`response_future` raising too) should not clobber the original cause.
|
|
176
|
+
|
|
177
|
+
Also skip when the rollout loop has already finalized via a clean
|
|
178
|
+
stop condition (e.g. ``state["prompt_too_long"]`` from an
|
|
179
|
+
``OverlongPromptError``). Tail-end failures that happen after
|
|
180
|
+
that — e.g. ``write_eof`` to an agent that has already exited —
|
|
181
|
+
are consequences of the termination, not new infra problems, and
|
|
182
|
+
must not be surfaced as a spurious ``InterceptionError`` /
|
|
183
|
+
``StreamInterrupted`` alongside the real stop signal.
|
|
176
184
|
"""
|
|
177
185
|
context = self.active_rollouts.get(rollout_id)
|
|
178
186
|
if context is None:
|
|
179
187
|
return
|
|
180
188
|
state = context.get("state")
|
|
181
|
-
if state is None or state.get("error"):
|
|
189
|
+
if state is None or state.get("error") or state.get("prompt_too_long"):
|
|
182
190
|
return
|
|
183
191
|
state["error"] = error
|
|
184
192
|
|
|
@@ -295,7 +303,7 @@ class InterceptionServer:
|
|
|
295
303
|
self._set_rollout_error(
|
|
296
304
|
rollout_id,
|
|
297
305
|
InterceptionError(
|
|
298
|
-
f"
|
|
306
|
+
f"Intercepted request failed: {type(e).__name__}: {e}"
|
|
299
307
|
),
|
|
300
308
|
)
|
|
301
309
|
return web.json_response({"error": str(e)}, status=500)
|
|
@@ -433,7 +441,7 @@ class InterceptionServer:
|
|
|
433
441
|
)
|
|
434
442
|
self._set_rollout_error(
|
|
435
443
|
rollout_id,
|
|
436
|
-
StreamInterrupted(f"
|
|
444
|
+
StreamInterrupted(f"Prepare failed: {type(e).__name__}: {e}"),
|
|
437
445
|
)
|
|
438
446
|
return response
|
|
439
447
|
# Reuse one get() task across keepalive cycles; asyncio.wait_for on
|
|
@@ -460,7 +468,7 @@ class InterceptionServer:
|
|
|
460
468
|
self._set_rollout_error(
|
|
461
469
|
rollout_id,
|
|
462
470
|
StreamInterrupted(
|
|
463
|
-
f"
|
|
471
|
+
f"Keepalive write failed after {print_time(waited_s)}: "
|
|
464
472
|
f"{type(e).__name__}: {e}"
|
|
465
473
|
),
|
|
466
474
|
)
|
|
@@ -490,7 +498,7 @@ class InterceptionServer:
|
|
|
490
498
|
self._set_rollout_error(
|
|
491
499
|
rollout_id,
|
|
492
500
|
StreamInterrupted(
|
|
493
|
-
f"
|
|
501
|
+
f"Stream write failed after {print_time(waited_s)}: "
|
|
494
502
|
f"{type(e).__name__}: {e}"
|
|
495
503
|
),
|
|
496
504
|
)
|
|
@@ -510,7 +518,7 @@ class InterceptionServer:
|
|
|
510
518
|
self._set_rollout_error(
|
|
511
519
|
rollout_id,
|
|
512
520
|
StreamInterrupted(
|
|
513
|
-
f"
|
|
521
|
+
f"Streaming response_future failed: {type(e).__name__}: {e}"
|
|
514
522
|
),
|
|
515
523
|
)
|
|
516
524
|
|
|
@@ -527,7 +535,7 @@ class InterceptionServer:
|
|
|
527
535
|
self._set_rollout_error(
|
|
528
536
|
rollout_id,
|
|
529
537
|
StreamInterrupted(
|
|
530
|
-
f"
|
|
538
|
+
f"Write EOF failed after {print_time(waited_s)}: "
|
|
531
539
|
f"{type(e).__name__}: {e}"
|
|
532
540
|
),
|
|
533
541
|
)
|
|
@@ -198,6 +198,9 @@ def build_opencode_config(
|
|
|
198
198
|
disable_compaction: bool,
|
|
199
199
|
provider_timeout_ms: int,
|
|
200
200
|
) -> str:
|
|
201
|
+
agent_config: dict[str, object] = {
|
|
202
|
+
"title": {"disable": True},
|
|
203
|
+
}
|
|
201
204
|
config: dict[str, object] = {
|
|
202
205
|
"${SCHEMA_DOLLAR}schema": "https://opencode.ai/config.json",
|
|
203
206
|
"provider": {
|
|
@@ -218,9 +221,10 @@ def build_opencode_config(
|
|
|
218
221
|
}
|
|
219
222
|
},
|
|
220
223
|
"model": "intercepted/model",
|
|
221
|
-
# Keep
|
|
222
|
-
#
|
|
224
|
+
# Keep the small-model pin to avoid falling back to the default small
|
|
225
|
+
# model and hitting rate limits; disable title calls below.
|
|
223
226
|
"small_model": "intercepted/model",
|
|
227
|
+
"agent": agent_config,
|
|
224
228
|
"mcp": {
|
|
225
229
|
"verifiers-tools": {
|
|
226
230
|
"type": "local",
|
|
@@ -237,7 +241,7 @@ def build_opencode_config(
|
|
|
237
241
|
if disabled_tools:
|
|
238
242
|
build_config["tools"] = {tool: False for tool in disabled_tools}
|
|
239
243
|
if build_config:
|
|
240
|
-
|
|
244
|
+
agent_config["build"] = build_config
|
|
241
245
|
return json.dumps(config, indent=2)
|
|
242
246
|
|
|
243
247
|
|
|
@@ -333,11 +333,12 @@ async def harbor_reward(task, state) -> float:
|
|
|
333
333
|
client = AsyncSandboxClient()
|
|
334
334
|
try:
|
|
335
335
|
await upload_harbor_tests(client, sandbox_id, task_dir)
|
|
336
|
-
|
|
336
|
+
test_timeout = int(parse_number(harbor.get("test_timeout"), 900))
|
|
337
|
+
result = await client.run_background_job(
|
|
337
338
|
sandbox_id=sandbox_id,
|
|
338
339
|
command="bash test.sh",
|
|
339
340
|
working_dir="/tests",
|
|
340
|
-
timeout=
|
|
341
|
+
timeout=test_timeout,
|
|
341
342
|
)
|
|
342
343
|
state["harbor_tests"] = {
|
|
343
344
|
"returncode": result.exit_code,
|
|
@@ -262,12 +262,13 @@ async def run_sandbox_command(
|
|
|
262
262
|
)
|
|
263
263
|
argv = await command_argv(program, task, state, runtime)
|
|
264
264
|
env = await command_env(program, task, state, runtime, include_base=False)
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
265
|
+
command = shlex.join(argv)
|
|
266
|
+
command_timeout = cast(int | None, sandbox_config.get("command_timeout"))
|
|
267
|
+
result = await lease.run_background_job(
|
|
268
|
+
command,
|
|
269
|
+
timeout=command_timeout,
|
|
268
270
|
working_dir=workdir,
|
|
269
271
|
env=env,
|
|
270
|
-
timeout=cast(int | None, sandbox_config.get("command_timeout")),
|
|
271
272
|
)
|
|
272
273
|
state["command"] = {
|
|
273
274
|
"argv": argv,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_langchain_deep_agents_wikispeedia.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{verifiers-0.1.15.dev1 → verifiers-0.1.15.dev3}/tests/test_openai_chat_completions_token_client.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|