verifiers 0.1.12.dev0__tar.gz → 0.1.12.dev2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/.gitignore +1 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/PKG-INFO +4 -3
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/pyproject.toml +4 -2
- verifiers-0.1.12.dev2/tests/test_composable_env.py +200 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_env_server.py +68 -125
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_envs.py +50 -12
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_rlm_env.py +432 -69
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_sandbox_mixin.py +7 -48
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_tui_info_formatting.py +58 -16
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/__init__.py +1 -1
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/clients/openai_chat_completions_client.py +5 -1
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/clients/openai_chat_completions_token_client.py +109 -92
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/clients/openai_completions_client.py +7 -1
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/envs/env_group.py +1 -1
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/envs/environment.py +38 -19
- verifiers-0.1.12.dev2/verifiers/envs/experimental/__init__.py +28 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/envs/experimental/cli_agent_env.py +39 -8
- verifiers-0.1.12.dev2/verifiers/envs/experimental/composable/README.md +151 -0
- verifiers-0.1.12.dev2/verifiers/envs/experimental/composable/__init__.py +17 -0
- verifiers-0.1.12.dev2/verifiers/envs/experimental/composable/composable_env.py +202 -0
- verifiers-0.1.12.dev2/verifiers/envs/experimental/composable/harness.py +58 -0
- verifiers-0.1.12.dev2/verifiers/envs/experimental/composable/task.py +362 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/envs/experimental/opencode_env.py +0 -2
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/envs/experimental/rlm_env.py +661 -413
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/envs/experimental/sandbox_mixin.py +11 -36
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/envs/integrations/README.md +2 -0
- verifiers-0.1.12.dev2/verifiers/envs/integrations/browser_env/README.md +154 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/envs/integrations/openenv_env.py +6 -4
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/rubrics/experimental/hybrid_math_rubric.py +4 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/rubrics/math_rubric.py +23 -1
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/scripts/eval.py +7 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/scripts/tui.py +887 -235
- verifiers-0.1.12.dev2/verifiers/serve/__init__.py +45 -0
- {verifiers-0.1.12.dev0/verifiers/workers → verifiers-0.1.12.dev2/verifiers/serve}/client/env_client.py +1 -1
- {verifiers-0.1.12.dev0/verifiers/workers → verifiers-0.1.12.dev2/verifiers/serve}/client/zmq_env_client.py +13 -15
- verifiers-0.1.12.dev2/verifiers/serve/server/__init__.py +11 -0
- verifiers-0.1.12.dev2/verifiers/serve/server/env_router.py +427 -0
- verifiers-0.1.12.dev2/verifiers/serve/server/env_server.py +128 -0
- verifiers-0.1.12.dev2/verifiers/serve/server/env_worker.py +389 -0
- verifiers-0.1.12.dev2/verifiers/serve/server/zmq_env_server.py +117 -0
- {verifiers-0.1.12.dev0/verifiers/workers → verifiers-0.1.12.dev2/verifiers/serve}/types.py +2 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/types.py +1 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/utils/async_utils.py +42 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/utils/config_utils.py +1 -1
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/utils/env_utils.py +3 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/utils/eval_display.py +6 -1
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/utils/eval_utils.py +34 -19
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/utils/logging_utils.py +20 -22
- verifiers-0.1.12.dev2/verifiers/utils/process_utils.py +89 -0
- verifiers-0.1.12.dev0/verifiers/utils/worker_utils.py → verifiers-0.1.12.dev2/verifiers/utils/serve_utils.py +5 -30
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/utils/thread_utils.py +1 -1
- verifiers-0.1.12.dev0/verifiers/envs/experimental/__init__.py +0 -3
- verifiers-0.1.12.dev0/verifiers/envs/integrations/browser_env/README.md +0 -118
- verifiers-0.1.12.dev0/verifiers/workers/__init__.py +0 -27
- verifiers-0.1.12.dev0/verifiers/workers/server/env_server.py +0 -175
- verifiers-0.1.12.dev0/verifiers/workers/server/zmq_env_server.py +0 -326
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/LICENSE +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/README.md +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/AGENTS.md +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/README.md +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/__init__.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/conftest.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_browser_env.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_build_script.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_cli_agent_env.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_client_auth_errors.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_client_config.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_client_multimodal_types.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_decorator_ranks.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_endpoint_registry.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_env_group.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_environment.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_environment_extra.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_error_chain.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_eval_cli.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_eval_display.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_eval_utils.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_gepa_cli.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_gym_env.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_imports.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_install_utils.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_interception_utils.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_logging.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_math_rubric.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_maybe_think_parser.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_message_utils.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_message_utils_multimodal.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_multiturn_env.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_openai_chat_completions_token_client.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_opencode_harbor.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_opencode_rlm_env.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_parser.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_path_utils.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_prime_plugin.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_rubric.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_rubric_group.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_sandbox_env.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_save_utils.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_setup_script.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_singleturn_env.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_stateful_tool_env.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_think_parser.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_tool_env.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_tool_utils.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_trajectory_processing.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/tests/test_xml_parser.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/AGENTS.md +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/cli/__init__.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/cli/commands/__init__.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/cli/commands/build.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/cli/commands/eval.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/cli/commands/gepa.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/cli/commands/init.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/cli/commands/install.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/cli/commands/setup.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/cli/plugins/__init__.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/cli/plugins/prime.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/cli/tui.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/clients/__init__.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/clients/anthropic_messages_client.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/clients/client.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/decorators.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/envs/AGENTS.md +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/envs/__init__.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/envs/experimental/README.md +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/envs/experimental/gym_env.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/envs/experimental/harbor_env.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/envs/experimental/mcp_env.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/envs/experimental/opencode_qa_env.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/envs/experimental/opencode_rlm_env.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/envs/integrations/__init__.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/envs/integrations/browser_env/__init__.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/envs/integrations/browser_env/browser_env.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/envs/integrations/browser_env/modes/__init__.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/envs/integrations/browser_env/modes/base.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/envs/integrations/browser_env/modes/cua_mode.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/envs/integrations/browser_env/modes/dom_mode.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/envs/integrations/reasoninggym_env.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/envs/integrations/textarena_env.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/envs/multiturn_env.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/envs/python_env.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/envs/sandbox_env.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/envs/singleturn_env.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/envs/stateful_tool_env.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/envs/tool_env.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/errors.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/gepa/__init__.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/gepa/adapter.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/gepa/config.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/gepa/display.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/gepa/gepa_utils.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/parsers/__init__.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/parsers/maybe_think_parser.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/parsers/parser.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/parsers/think_parser.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/parsers/xml_parser.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/rl/README.md +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/rl/__init__.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/rl/inference/__init__.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/rl/inference/client.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/rl/inference/server.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/rl/trainer/__init__.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/rl/trainer/config.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/rl/trainer/orchestrator.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/rl/trainer/trainer.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/rl/trainer/utils.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/rubrics/__init__.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/rubrics/judge_rubric.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/rubrics/rubric.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/rubrics/rubric_group.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/scripts/__init__.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/scripts/build.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/scripts/gepa.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/scripts/init.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/scripts/install.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/scripts/prime_rl.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/scripts/rl.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/scripts/setup.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/scripts/train.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/scripts/vllm.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/utils/__init__.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/utils/client_utils.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/utils/data_utils.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/utils/display_utils.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/utils/error_utils.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/utils/heartbeat.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/utils/import_utils.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/utils/install_utils.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/utils/interception_utils.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/utils/message_utils.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/utils/metric_utils.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/utils/path_utils.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/utils/response_utils.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/utils/save_utils.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/utils/threaded_sandbox_client.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/utils/tool_utils.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/utils/tunnel_utils.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/utils/usage_utils.py +0 -0
- {verifiers-0.1.12.dev0 → verifiers-0.1.12.dev2}/verifiers/utils/version_utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: verifiers
|
|
3
|
-
Version: 0.1.12.
|
|
3
|
+
Version: 0.1.12.dev2
|
|
4
4
|
Summary: Verifiers: Environments for LLM Reinforcement Learning
|
|
5
5
|
Project-URL: Homepage, https://github.com/primeintellect-ai/verifiers
|
|
6
6
|
Project-URL: Documentation, https://github.com/primeintellect-ai/verifiers
|
|
@@ -34,12 +34,13 @@ Requires-Dist: nest-asyncio>=1.6.0
|
|
|
34
34
|
Requires-Dist: numpy
|
|
35
35
|
Requires-Dist: openai-agents>=0.0.7
|
|
36
36
|
Requires-Dist: openai>=1.108.1
|
|
37
|
-
Requires-Dist: prime-sandboxes>=0.2.
|
|
38
|
-
Requires-Dist: prime-tunnel>=0.1.
|
|
37
|
+
Requires-Dist: prime-sandboxes>=0.2.19
|
|
38
|
+
Requires-Dist: prime-tunnel>=0.1.5
|
|
39
39
|
Requires-Dist: pydantic>=2.11.9
|
|
40
40
|
Requires-Dist: pyzmq>=27.1.0
|
|
41
41
|
Requires-Dist: requests
|
|
42
42
|
Requires-Dist: rich
|
|
43
|
+
Requires-Dist: setproctitle>=1.3.0
|
|
43
44
|
Requires-Dist: tenacity>=8.5.0
|
|
44
45
|
Requires-Dist: textual
|
|
45
46
|
Requires-Dist: tomli; python_version < '3.11'
|
|
@@ -37,8 +37,8 @@ dependencies = [
|
|
|
37
37
|
"nest-asyncio>=1.6.0", # for jupyter notebooks
|
|
38
38
|
"openai>=1.108.1",
|
|
39
39
|
"openai-agents>=0.0.7",
|
|
40
|
-
"prime-tunnel>=0.1.
|
|
41
|
-
"prime-sandboxes>=0.2.
|
|
40
|
+
"prime-tunnel>=0.1.5",
|
|
41
|
+
"prime-sandboxes>=0.2.19",
|
|
42
42
|
"pydantic>=2.11.9",
|
|
43
43
|
"requests",
|
|
44
44
|
"rich",
|
|
@@ -51,6 +51,7 @@ dependencies = [
|
|
|
51
51
|
"pyzmq>=27.1.0",
|
|
52
52
|
"msgpack>=1.1.2",
|
|
53
53
|
"aiolimiter>=1.2.1",
|
|
54
|
+
"setproctitle>=1.3.0",
|
|
54
55
|
]
|
|
55
56
|
|
|
56
57
|
[dependency-groups]
|
|
@@ -104,6 +105,7 @@ rl = [
|
|
|
104
105
|
|
|
105
106
|
[tool.uv]
|
|
106
107
|
preview = true
|
|
108
|
+
required-version = "<0.11.0"
|
|
107
109
|
|
|
108
110
|
[tool.uv.extra-build-dependencies]
|
|
109
111
|
flash-attn = [{ requirement = "torch", match-runtime = true }]
|
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
"""Tests for the composable architecture: Task, TaskSet, SandboxTaskSet, SandboxSpec."""
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
import verifiers as vf
|
|
6
|
+
from verifiers.envs.experimental.composable import (
|
|
7
|
+
ComposableEnv,
|
|
8
|
+
Harness,
|
|
9
|
+
SandboxSpec,
|
|
10
|
+
SandboxTaskSet,
|
|
11
|
+
Task,
|
|
12
|
+
TaskSet,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
# ── Mock Rubrics ──────────────────────────────────────────────────────
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class MockSandboxRubric(vf.Rubric):
|
|
20
|
+
def __init__(self, **kwargs):
|
|
21
|
+
super().__init__(**kwargs)
|
|
22
|
+
self.add_reward_func(self.solved)
|
|
23
|
+
|
|
24
|
+
async def solved(self, state, **kwargs) -> float:
|
|
25
|
+
return 1.0 if state.get("test_output") == "PASS" else 0.0
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class MockMathRubric(vf.Rubric):
|
|
29
|
+
def __init__(self, **kwargs):
|
|
30
|
+
super().__init__(**kwargs)
|
|
31
|
+
self.add_reward_func(self.correct)
|
|
32
|
+
|
|
33
|
+
async def correct(self, state, **kwargs) -> float:
|
|
34
|
+
return 1.0 if state.get("info", {}).get("id") == 0 else 0.0
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
# ── Mock TaskSets ───────────────────────────────────────────────────────
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class MockSandboxTaskSet(SandboxTaskSet):
|
|
41
|
+
"""SandboxTaskSet for testing."""
|
|
42
|
+
|
|
43
|
+
def get_instruction(self, info):
|
|
44
|
+
return f"Fix bug #{info.get('id', 0)}"
|
|
45
|
+
|
|
46
|
+
def get_sandbox_spec(self, info):
|
|
47
|
+
return SandboxSpec(image="python:3.11-slim", cpu_cores=2, memory_gb=2)
|
|
48
|
+
|
|
49
|
+
def get_rubric(self):
|
|
50
|
+
return MockSandboxRubric()
|
|
51
|
+
|
|
52
|
+
def get_workdir(self, info):
|
|
53
|
+
return "/testbed"
|
|
54
|
+
|
|
55
|
+
def get_env_vars(self):
|
|
56
|
+
return {"FOO": "bar"}
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class MockTaskSet(TaskSet):
|
|
60
|
+
"""Plain TaskSet (no sandbox) for testing."""
|
|
61
|
+
|
|
62
|
+
def get_instruction(self, info):
|
|
63
|
+
return info.get("question", "")
|
|
64
|
+
|
|
65
|
+
def get_rubric(self):
|
|
66
|
+
return MockMathRubric()
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _make_dataset(n=3):
|
|
70
|
+
from datasets import Dataset
|
|
71
|
+
|
|
72
|
+
return Dataset.from_dict(
|
|
73
|
+
{
|
|
74
|
+
"info": [{"id": i, "question": f"q{i}"} for i in range(n)],
|
|
75
|
+
"answer": ["" for _ in range(n)],
|
|
76
|
+
}
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
# ── SandboxSpec ─────────────────────────────────────────────────────────
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def test_sandbox_spec_defaults():
|
|
84
|
+
spec = SandboxSpec()
|
|
85
|
+
assert spec.image == "python:3.11-slim"
|
|
86
|
+
assert spec.cpu_cores == 4
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def test_sandbox_spec_custom():
|
|
90
|
+
spec = SandboxSpec(image="lean-tactic:v4.27", gpu_count=1)
|
|
91
|
+
assert spec.image == "lean-tactic:v4.27"
|
|
92
|
+
assert spec.gpu_count == 1
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
# ── Task from SandboxTaskSet ───────────────────────────────────────────
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def test_task_sandbox_spec():
|
|
99
|
+
ts = MockSandboxTaskSet(dataset=_make_dataset(), name="test")
|
|
100
|
+
task = ts[0]
|
|
101
|
+
assert isinstance(task, Task)
|
|
102
|
+
assert task.sandbox_spec is not None
|
|
103
|
+
assert task.sandbox_spec.image == "python:3.11-slim"
|
|
104
|
+
assert task.sandbox_spec.cpu_cores == 2
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def test_task_image():
|
|
108
|
+
ts = MockSandboxTaskSet(dataset=_make_dataset(), name="test")
|
|
109
|
+
task = ts[0]
|
|
110
|
+
assert task.image == "python:3.11-slim"
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def test_task_workdir():
|
|
114
|
+
ts = MockSandboxTaskSet(dataset=_make_dataset(), name="test")
|
|
115
|
+
task = ts[0]
|
|
116
|
+
assert task.workdir == "/testbed"
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def test_task_repr_sandbox():
|
|
120
|
+
ts = MockSandboxTaskSet(dataset=_make_dataset(), name="test")
|
|
121
|
+
task = ts[0]
|
|
122
|
+
assert "python:3.11-slim" in repr(task)
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
# ── Task from plain TaskSet ────────────────────────────────────────────
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def test_task_no_sandbox():
|
|
129
|
+
ts = MockTaskSet(dataset=_make_dataset(), name="math")
|
|
130
|
+
task = ts[0]
|
|
131
|
+
assert task.sandbox_spec is None
|
|
132
|
+
assert task.image is None
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def test_task_repr_no_sandbox():
|
|
136
|
+
ts = MockTaskSet(dataset=_make_dataset(), name="math")
|
|
137
|
+
task = ts[0]
|
|
138
|
+
assert "no sandbox" in repr(task)
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
# ── TaskSet ─────────────────────────────────────────────────────────────
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def test_taskset_isinstance():
|
|
145
|
+
ts = MockTaskSet(dataset=_make_dataset(), name="math")
|
|
146
|
+
assert not isinstance(ts, SandboxTaskSet)
|
|
147
|
+
|
|
148
|
+
ts2 = MockSandboxTaskSet(dataset=_make_dataset(), name="swe")
|
|
149
|
+
assert isinstance(ts2, SandboxTaskSet)
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def test_taskset_len():
|
|
153
|
+
ts = MockTaskSet(dataset=_make_dataset(5), name="test")
|
|
154
|
+
assert len(ts) == 5
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def test_taskset_iter():
|
|
158
|
+
ts = MockTaskSet(dataset=_make_dataset(3), name="test")
|
|
159
|
+
tasks = list(ts)
|
|
160
|
+
assert len(tasks) == 3
|
|
161
|
+
assert all(isinstance(t, Task) for t in tasks)
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def test_taskset_filter():
|
|
165
|
+
ts = MockSandboxTaskSet(dataset=_make_dataset(5), name="test")
|
|
166
|
+
filtered = ts.filter(lambda ex: ex["info"]["id"] < 3)
|
|
167
|
+
assert len(filtered) == 3
|
|
168
|
+
assert isinstance(filtered, MockSandboxTaskSet)
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def test_taskset_take():
|
|
172
|
+
ts = MockSandboxTaskSet(dataset=_make_dataset(5), name="test")
|
|
173
|
+
taken = ts.take(2)
|
|
174
|
+
assert len(taken) == 2
|
|
175
|
+
assert isinstance(taken, MockSandboxTaskSet)
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def test_taskset_repr():
|
|
179
|
+
ts = MockTaskSet(dataset=_make_dataset(), name="mytest")
|
|
180
|
+
assert "mytest" in repr(ts)
|
|
181
|
+
assert "3" in repr(ts)
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
@pytest.mark.asyncio
|
|
185
|
+
async def test_composable_env_exports_task_workdir():
|
|
186
|
+
taskset = MockSandboxTaskSet(dataset=_make_dataset(), name="test")
|
|
187
|
+
env = ComposableEnv(
|
|
188
|
+
taskset=taskset,
|
|
189
|
+
harness=Harness(run_command="true"),
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
env_vars = await env.build_env_vars(
|
|
193
|
+
{
|
|
194
|
+
"info": {"id": 0},
|
|
195
|
+
"interception_base_url": "https://test.trycloudflare.com/v1",
|
|
196
|
+
}
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
assert env_vars["AGENT_WORKDIR"] == "/testbed"
|
|
200
|
+
assert env_vars["FOO"] == "bar"
|
|
@@ -4,7 +4,7 @@ Covers:
|
|
|
4
4
|
- Health-check state transitions (STARTUP -> HEALTHY -> UNHEALTHY)
|
|
5
5
|
- Request retry on ServerError and recovery timeouts
|
|
6
6
|
- Server startup waiting
|
|
7
|
-
- Cancellation propagation (client ->
|
|
7
|
+
- Cancellation propagation (client -> router -> worker)
|
|
8
8
|
"""
|
|
9
9
|
|
|
10
10
|
import asyncio
|
|
@@ -15,16 +15,16 @@ from unittest.mock import AsyncMock, MagicMock, patch
|
|
|
15
15
|
import pytest
|
|
16
16
|
|
|
17
17
|
from verifiers.types import ClientConfig, RolloutInput, UserMessage
|
|
18
|
-
from verifiers.utils.
|
|
19
|
-
from verifiers.
|
|
20
|
-
from verifiers.workers.server.zmq_env_server import ZMQEnvServer
|
|
21
|
-
from verifiers.workers.types import (
|
|
18
|
+
from verifiers.utils.serve_utils import get_free_port
|
|
19
|
+
from verifiers.serve import (
|
|
22
20
|
HealthRequest,
|
|
23
21
|
HealthResponse,
|
|
24
22
|
PendingRequest,
|
|
25
23
|
RunRolloutRequest,
|
|
26
24
|
RunRolloutResponse,
|
|
27
25
|
ServerState,
|
|
26
|
+
ZMQEnvClient,
|
|
27
|
+
ZMQEnvServer,
|
|
28
28
|
)
|
|
29
29
|
|
|
30
30
|
|
|
@@ -36,7 +36,7 @@ def make_client(address: str = "tcp://127.0.0.1:5555", **kwargs) -> ZMQEnvClient
|
|
|
36
36
|
|
|
37
37
|
def make_mock_server(address: str) -> ZMQEnvServer:
|
|
38
38
|
"""Create a ZMQEnvServer with a mocked environment (no real env loading)."""
|
|
39
|
-
with patch("verifiers.
|
|
39
|
+
with patch("verifiers.serve.server.env_server.vf") as mock_vf:
|
|
40
40
|
mock_env = MagicMock()
|
|
41
41
|
mock_env._teardown = AsyncMock()
|
|
42
42
|
mock_vf.load_environment.return_value = mock_env
|
|
@@ -77,20 +77,22 @@ def make_pending_request(
|
|
|
77
77
|
|
|
78
78
|
|
|
79
79
|
@contextlib.asynccontextmanager
|
|
80
|
-
async def run_server_and_client(
|
|
80
|
+
async def run_server_and_client():
|
|
81
81
|
"""Start a mock ZMQ server and connected client, tearing both down on exit.
|
|
82
82
|
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
observable handlers in tests.
|
|
83
|
+
The router's worker spawning is mocked out so no subprocesses are created.
|
|
84
|
+
Instead, dispatch_request/forward_cancel are replaced with AsyncMock so tests can
|
|
85
|
+
observe request routing without needing real workers.
|
|
87
86
|
"""
|
|
88
|
-
port =
|
|
87
|
+
port = get_free_port()
|
|
89
88
|
address = f"tcp://127.0.0.1:{port}"
|
|
90
89
|
|
|
91
90
|
server = make_mock_server(address)
|
|
92
|
-
|
|
93
|
-
|
|
91
|
+
|
|
92
|
+
# Mock out worker lifecycle — we don't want real subprocesses in unit tests
|
|
93
|
+
server.router.start_workers = MagicMock()
|
|
94
|
+
server.router.dispatch_request = AsyncMock()
|
|
95
|
+
server.router.forward_cancel = AsyncMock()
|
|
94
96
|
|
|
95
97
|
stop_event = asyncio.Event()
|
|
96
98
|
server_loop = asyncio.create_task(server.serve(stop_event=stop_event))
|
|
@@ -324,142 +326,83 @@ class TestRetryOnServerError:
|
|
|
324
326
|
await client.close()
|
|
325
327
|
|
|
326
328
|
|
|
327
|
-
class
|
|
328
|
-
"""Tests that client-side cancellation
|
|
329
|
+
class TestCancelForwarding:
|
|
330
|
+
"""Tests that client-side cancellation is forwarded through the router.
|
|
329
331
|
|
|
330
|
-
|
|
331
|
-
|
|
332
|
+
With the multi-process architecture, the ZMQEnvServer receives cancel
|
|
333
|
+
signals from the client and forwards them via ``router.forward_cancel()``.
|
|
334
|
+
These tests verify the server correctly routes cancels to the router.
|
|
332
335
|
"""
|
|
333
336
|
|
|
334
337
|
@pytest.mark.asyncio
|
|
335
|
-
async def
|
|
336
|
-
|
|
337
|
-
):
|
|
338
|
-
"""Cancellation should still propagate before process_request enters its body."""
|
|
339
|
-
process_request_blocked = asyncio.Event()
|
|
340
|
-
original_process_request_entered = asyncio.Event()
|
|
341
|
-
server_task_cancelled = asyncio.Event()
|
|
342
|
-
|
|
338
|
+
async def test_cancel_signal_forwarded_to_router(self):
|
|
339
|
+
"""Client cancellation sends empty payload, server calls router.forward_cancel."""
|
|
343
340
|
async with run_server_and_client() as (server, client):
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
async def delayed_process_request(
|
|
347
|
-
client_id,
|
|
348
|
-
request_id_bytes,
|
|
349
|
-
payload_bytes,
|
|
350
|
-
):
|
|
351
|
-
process_request_blocked.set()
|
|
352
|
-
try:
|
|
353
|
-
await asyncio.Event().wait()
|
|
354
|
-
original_process_request_entered.set()
|
|
355
|
-
return await original_process_request(
|
|
356
|
-
client_id,
|
|
357
|
-
request_id_bytes,
|
|
358
|
-
payload_bytes,
|
|
359
|
-
)
|
|
360
|
-
except asyncio.CancelledError:
|
|
361
|
-
server_task_cancelled.set()
|
|
362
|
-
raise
|
|
363
|
-
|
|
364
|
-
server.process_request = delayed_process_request # type: ignore[assignment]
|
|
365
|
-
|
|
341
|
+
# Send a request
|
|
366
342
|
client_task = asyncio.create_task(
|
|
367
343
|
client.send_request(
|
|
368
344
|
make_rollout_request(), RunRolloutResponse, timeout=30
|
|
369
345
|
)
|
|
370
346
|
)
|
|
371
347
|
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
assert
|
|
348
|
+
# Wait for dispatch to be called
|
|
349
|
+
await asyncio.sleep(0.3)
|
|
350
|
+
assert server.router.dispatch_request.call_count == 1
|
|
375
351
|
|
|
352
|
+
# Cancel on the client side — this sends an empty-payload frame
|
|
376
353
|
client_task.cancel()
|
|
377
354
|
with pytest.raises(asyncio.CancelledError):
|
|
378
355
|
await client_task
|
|
379
356
|
|
|
380
|
-
|
|
381
|
-
|
|
357
|
+
# Give the cancel signal time to propagate
|
|
358
|
+
await asyncio.sleep(0.3)
|
|
359
|
+
|
|
360
|
+
# The server should have forwarded the cancel to the router
|
|
361
|
+
assert server.router.forward_cancel.call_count == 1
|
|
362
|
+
call_args = server.router.forward_cancel.call_args
|
|
363
|
+
# forward_cancel(request_id, client_id)
|
|
364
|
+
assert isinstance(call_args[0][0], bytes) # request_id
|
|
365
|
+
assert isinstance(call_args[0][1], bytes) # client_id
|
|
382
366
|
|
|
383
367
|
@pytest.mark.asyncio
|
|
384
|
-
async def
|
|
385
|
-
"""
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
async def slow_handle_run_rollout(request):
|
|
393
|
-
server_task_started.set()
|
|
394
|
-
try:
|
|
395
|
-
await asyncio.sleep(60)
|
|
396
|
-
return RunRolloutResponse(output=None)
|
|
397
|
-
except asyncio.CancelledError:
|
|
398
|
-
server_task_cancelled.set()
|
|
399
|
-
raise
|
|
400
|
-
|
|
401
|
-
async with run_server_and_client(slow_handle_run_rollout) as (server, client):
|
|
402
|
-
client_task = asyncio.create_task(
|
|
403
|
-
client.send_request(
|
|
404
|
-
make_rollout_request(), RunRolloutResponse, timeout=30
|
|
368
|
+
async def test_timeout_sends_cancel_to_router(self):
|
|
369
|
+
"""Client timeout sends cancel signal, server calls router.forward_cancel."""
|
|
370
|
+
async with run_server_and_client() as (server, client):
|
|
371
|
+
# Use a short timeout
|
|
372
|
+
with pytest.raises(TimeoutError):
|
|
373
|
+
await client.send_request(
|
|
374
|
+
make_rollout_request(), RunRolloutResponse, timeout=0.5
|
|
405
375
|
)
|
|
406
|
-
)
|
|
407
376
|
|
|
408
|
-
#
|
|
409
|
-
await asyncio.
|
|
410
|
-
assert len(server.request_tasks) == 1
|
|
377
|
+
# Give the cancel signal time to propagate
|
|
378
|
+
await asyncio.sleep(0.3)
|
|
411
379
|
|
|
412
|
-
#
|
|
413
|
-
|
|
414
|
-
with pytest.raises(asyncio.CancelledError):
|
|
415
|
-
await client_task
|
|
416
|
-
|
|
417
|
-
# Give the system time to propagate the cancellation
|
|
418
|
-
await asyncio.sleep(0.5)
|
|
380
|
+
# Dispatch should have been called
|
|
381
|
+
assert server.router.dispatch_request.call_count == 1
|
|
419
382
|
|
|
420
|
-
# The server
|
|
421
|
-
|
|
422
|
-
assert server_task_cancelled.is_set(), (
|
|
423
|
-
"Server-side task was NOT cancelled even though the client "
|
|
424
|
-
"cancelled the request. The server is still consuming resources "
|
|
425
|
-
"for a request nobody is waiting for."
|
|
426
|
-
)
|
|
383
|
+
# The server should have forwarded the cancel to the router
|
|
384
|
+
assert server.router.forward_cancel.call_count == 1
|
|
427
385
|
|
|
428
386
|
@pytest.mark.asyncio
|
|
429
|
-
async def
|
|
430
|
-
"""
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
server_task_cancelled = asyncio.Event()
|
|
436
|
-
|
|
437
|
-
async def slow_handle_run_rollout(request):
|
|
438
|
-
server_task_started.set()
|
|
439
|
-
try:
|
|
440
|
-
await asyncio.sleep(60)
|
|
441
|
-
return RunRolloutResponse(output=None)
|
|
442
|
-
except asyncio.CancelledError:
|
|
443
|
-
server_task_cancelled.set()
|
|
444
|
-
raise
|
|
445
|
-
|
|
446
|
-
async with run_server_and_client(slow_handle_run_rollout) as (server, client):
|
|
447
|
-
# Use a very short timeout so the client gives up quickly
|
|
448
|
-
with pytest.raises(TimeoutError):
|
|
449
|
-
await client.send_request(
|
|
450
|
-
make_rollout_request(), RunRolloutResponse, timeout=0.5
|
|
387
|
+
async def test_dispatch_called_with_correct_frames(self):
|
|
388
|
+
"""Requests are dispatched to the router with client_id, request_id, payload."""
|
|
389
|
+
async with run_server_and_client() as (server, client):
|
|
390
|
+
client_task = asyncio.create_task(
|
|
391
|
+
client.send_request(
|
|
392
|
+
make_rollout_request(), RunRolloutResponse, timeout=30
|
|
451
393
|
)
|
|
394
|
+
)
|
|
452
395
|
|
|
453
|
-
|
|
454
|
-
await asyncio.wait_for(server_task_started.wait(), timeout=5)
|
|
455
|
-
assert len(server.request_tasks) == 1
|
|
396
|
+
await asyncio.sleep(0.3)
|
|
456
397
|
|
|
457
|
-
|
|
458
|
-
|
|
398
|
+
assert server.router.dispatch_request.call_count == 1
|
|
399
|
+
call_args = server.router.dispatch_request.call_args
|
|
400
|
+
client_id, request_id, payload = call_args[0]
|
|
401
|
+
assert isinstance(client_id, bytes)
|
|
402
|
+
assert isinstance(request_id, bytes)
|
|
403
|
+
assert isinstance(payload, bytes)
|
|
404
|
+
assert len(payload) > 0 # non-empty payload = real request
|
|
459
405
|
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
"The server continues processing a request that already "
|
|
464
|
-
"timed out on the client."
|
|
465
|
-
)
|
|
406
|
+
client_task.cancel()
|
|
407
|
+
with contextlib.suppress(asyncio.CancelledError):
|
|
408
|
+
await client_task
|
|
@@ -5,6 +5,12 @@ from pathlib import Path
|
|
|
5
5
|
import pytest
|
|
6
6
|
import tomllib
|
|
7
7
|
|
|
8
|
+
# Timeout in seconds for each subprocess step
|
|
9
|
+
INSTALL_TIMEOUT = 600 # 10 minutes for venv creation + package install
|
|
10
|
+
IMPORT_TIMEOUT = 120 # 2 minutes for importing a package
|
|
11
|
+
LOAD_TIMEOUT = 300 # 5 minutes for loading an environment (may download datasets)
|
|
12
|
+
EVAL_TIMEOUT = 600 # 10 minutes for running vf-eval with -n 1 -r 1
|
|
13
|
+
|
|
8
14
|
SKIPPED_ENVS = [
|
|
9
15
|
# Requires EXA_API_KEY environment variable
|
|
10
16
|
"mcp_search_env",
|
|
@@ -99,9 +105,17 @@ def test_env(env_dir: Path, tmp_path_factory: pytest.TempPathFactory):
|
|
|
99
105
|
f"uv pip install {repo_root.as_posix()} && "
|
|
100
106
|
f"uv pip install {env_dir.absolute().as_posix()}"
|
|
101
107
|
)
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
108
|
+
try:
|
|
109
|
+
process = subprocess.run(
|
|
110
|
+
cmd,
|
|
111
|
+
shell=True,
|
|
112
|
+
executable="/bin/bash",
|
|
113
|
+
capture_output=True,
|
|
114
|
+
text=True,
|
|
115
|
+
timeout=INSTALL_TIMEOUT,
|
|
116
|
+
)
|
|
117
|
+
except subprocess.TimeoutExpired:
|
|
118
|
+
pytest.fail(f"Timed out after {INSTALL_TIMEOUT}s installing {env_dir.name}")
|
|
105
119
|
assert process.returncode == 0, (
|
|
106
120
|
f"Failed to create virtual environment: {process.stderr}"
|
|
107
121
|
)
|
|
@@ -114,25 +128,49 @@ def test_env(env_dir: Path, tmp_path_factory: pytest.TempPathFactory):
|
|
|
114
128
|
def help_test_can_import_env(tmp_venv_dir: Path, env_dir: Path):
|
|
115
129
|
"""Test that the environment can be imported as a package."""
|
|
116
130
|
import_cmd = f"cd {tmp_venv_dir} && source .venv/bin/activate && uv run python -c 'import {env_dir.name}'"
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
131
|
+
try:
|
|
132
|
+
process = subprocess.run(
|
|
133
|
+
import_cmd,
|
|
134
|
+
shell=True,
|
|
135
|
+
executable="/bin/bash",
|
|
136
|
+
capture_output=True,
|
|
137
|
+
text=True,
|
|
138
|
+
timeout=IMPORT_TIMEOUT,
|
|
139
|
+
)
|
|
140
|
+
except subprocess.TimeoutExpired:
|
|
141
|
+
pytest.fail(f"Timed out after {IMPORT_TIMEOUT}s importing {env_dir.name}")
|
|
120
142
|
assert process.returncode == 0, "Failed to import environment"
|
|
121
143
|
|
|
122
144
|
|
|
123
145
|
def help_test_can_load_env(tmp_venv_dir: Path, env_dir: Path):
|
|
124
146
|
"""Test that the environment can be loaded."""
|
|
125
147
|
load_cmd = f"""cd {tmp_venv_dir} && source .venv/bin/activate && uv run python -c 'import verifiers as vf; vf.load_environment("{env_dir.name}")'"""
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
148
|
+
try:
|
|
149
|
+
process = subprocess.run(
|
|
150
|
+
load_cmd,
|
|
151
|
+
shell=True,
|
|
152
|
+
executable="/bin/bash",
|
|
153
|
+
capture_output=True,
|
|
154
|
+
text=True,
|
|
155
|
+
timeout=LOAD_TIMEOUT,
|
|
156
|
+
)
|
|
157
|
+
except subprocess.TimeoutExpired:
|
|
158
|
+
pytest.fail(f"Timed out after {LOAD_TIMEOUT}s loading {env_dir.name}")
|
|
129
159
|
assert process.returncode == 0, "Failed to load environment"
|
|
130
160
|
|
|
131
161
|
|
|
132
162
|
def help_test_can_eval_env(tmp_venv_dir: Path, env_dir: Path):
|
|
133
163
|
"""Test that the environment can be run via vf-eval."""
|
|
134
164
|
eval_cmd = f"cd {tmp_venv_dir} && source .venv/bin/activate && uv run vf-eval {env_dir.name} -n 1 -r 1 -t 512"
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
165
|
+
try:
|
|
166
|
+
process = subprocess.run(
|
|
167
|
+
eval_cmd,
|
|
168
|
+
shell=True,
|
|
169
|
+
executable="/bin/bash",
|
|
170
|
+
capture_output=True,
|
|
171
|
+
text=True,
|
|
172
|
+
timeout=EVAL_TIMEOUT,
|
|
173
|
+
)
|
|
174
|
+
except subprocess.TimeoutExpired:
|
|
175
|
+
pytest.fail(f"Timed out after {EVAL_TIMEOUT}s evaluating {env_dir.name}")
|
|
138
176
|
assert process.returncode == 0, "Failed to evaluate environment"
|