verifiers 0.1.12.dev1__tar.gz → 0.1.12.dev3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/PKG-INFO +5 -3
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/pyproject.toml +5 -3
- verifiers-0.1.12.dev3/tests/test_composable_env.py +260 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_endpoint_registry.py +97 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_eval_cli.py +82 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_gepa_cli.py +27 -1
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_rlm_env.py +684 -231
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_sandbox_mixin.py +7 -48
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_tui_info_formatting.py +58 -16
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/__init__.py +1 -1
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/clients/openai_chat_completions_client.py +5 -1
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/clients/openai_chat_completions_token_client.py +109 -92
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/clients/openai_completions_client.py +7 -1
- verifiers-0.1.12.dev3/verifiers/envs/experimental/__init__.py +28 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/envs/experimental/cli_agent_env.py +31 -5
- verifiers-0.1.12.dev3/verifiers/envs/experimental/composable/README.md +151 -0
- verifiers-0.1.12.dev3/verifiers/envs/experimental/composable/__init__.py +17 -0
- verifiers-0.1.12.dev3/verifiers/envs/experimental/composable/composable_env.py +205 -0
- verifiers-0.1.12.dev3/verifiers/envs/experimental/composable/harness.py +58 -0
- verifiers-0.1.12.dev3/verifiers/envs/experimental/composable/task.py +362 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/envs/experimental/opencode_env.py +6 -2
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/envs/experimental/rlm_env.py +958 -595
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/envs/experimental/sandbox_mixin.py +11 -36
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/envs/multiturn_env.py +14 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/rubrics/math_rubric.py +7 -9
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/scripts/eval.py +33 -8
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/scripts/gepa.py +26 -1
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/scripts/tui.py +887 -235
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/serve/server/env_server.py +2 -1
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/serve/server/env_worker.py +2 -1
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/types.py +24 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/utils/eval_utils.py +14 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/utils/process_utils.py +15 -0
- verifiers-0.1.12.dev1/verifiers/envs/experimental/__init__.py +0 -3
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/.gitignore +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/LICENSE +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/README.md +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/AGENTS.md +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/README.md +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/__init__.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/conftest.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_browser_env.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_build_script.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_cli_agent_env.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_client_auth_errors.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_client_config.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_client_multimodal_types.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_decorator_ranks.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_env_group.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_env_server.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_environment.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_environment_extra.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_envs.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_error_chain.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_eval_display.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_eval_utils.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_gym_env.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_imports.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_install_utils.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_interception_utils.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_logging.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_math_rubric.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_maybe_think_parser.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_message_utils.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_message_utils_multimodal.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_multiturn_env.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_openai_chat_completions_token_client.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_opencode_harbor.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_opencode_rlm_env.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_parser.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_path_utils.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_prime_plugin.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_rubric.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_rubric_group.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_sandbox_env.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_save_utils.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_setup_script.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_singleturn_env.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_stateful_tool_env.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_think_parser.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_tool_env.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_tool_utils.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_trajectory_processing.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/tests/test_xml_parser.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/AGENTS.md +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/cli/__init__.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/cli/commands/__init__.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/cli/commands/build.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/cli/commands/eval.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/cli/commands/gepa.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/cli/commands/init.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/cli/commands/install.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/cli/commands/setup.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/cli/plugins/__init__.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/cli/plugins/prime.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/cli/tui.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/clients/__init__.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/clients/anthropic_messages_client.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/clients/client.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/decorators.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/envs/AGENTS.md +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/envs/__init__.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/envs/env_group.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/envs/environment.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/envs/experimental/README.md +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/envs/experimental/gym_env.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/envs/experimental/harbor_env.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/envs/experimental/mcp_env.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/envs/experimental/opencode_qa_env.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/envs/experimental/opencode_rlm_env.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/envs/integrations/README.md +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/envs/integrations/__init__.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/envs/integrations/browser_env/README.md +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/envs/integrations/browser_env/__init__.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/envs/integrations/browser_env/browser_env.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/envs/integrations/browser_env/modes/__init__.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/envs/integrations/browser_env/modes/base.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/envs/integrations/browser_env/modes/cua_mode.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/envs/integrations/browser_env/modes/dom_mode.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/envs/integrations/openenv_env.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/envs/integrations/reasoninggym_env.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/envs/integrations/textarena_env.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/envs/python_env.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/envs/sandbox_env.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/envs/singleturn_env.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/envs/stateful_tool_env.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/envs/tool_env.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/errors.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/gepa/__init__.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/gepa/adapter.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/gepa/config.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/gepa/display.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/gepa/gepa_utils.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/parsers/__init__.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/parsers/maybe_think_parser.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/parsers/parser.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/parsers/think_parser.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/parsers/xml_parser.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/rl/README.md +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/rl/__init__.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/rl/inference/__init__.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/rl/inference/client.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/rl/inference/server.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/rl/trainer/__init__.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/rl/trainer/config.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/rl/trainer/orchestrator.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/rl/trainer/trainer.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/rl/trainer/utils.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/rubrics/__init__.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/rubrics/experimental/hybrid_math_rubric.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/rubrics/judge_rubric.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/rubrics/rubric.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/rubrics/rubric_group.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/scripts/__init__.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/scripts/build.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/scripts/init.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/scripts/install.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/scripts/prime_rl.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/scripts/rl.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/scripts/setup.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/scripts/train.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/scripts/vllm.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/serve/__init__.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/serve/client/env_client.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/serve/client/zmq_env_client.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/serve/server/__init__.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/serve/server/env_router.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/serve/server/zmq_env_server.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/serve/types.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/utils/__init__.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/utils/async_utils.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/utils/client_utils.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/utils/config_utils.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/utils/data_utils.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/utils/display_utils.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/utils/env_utils.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/utils/error_utils.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/utils/eval_display.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/utils/heartbeat.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/utils/import_utils.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/utils/install_utils.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/utils/interception_utils.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/utils/logging_utils.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/utils/message_utils.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/utils/metric_utils.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/utils/path_utils.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/utils/response_utils.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/utils/save_utils.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/utils/serve_utils.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/utils/thread_utils.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/utils/threaded_sandbox_client.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/utils/tool_utils.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/utils/tunnel_utils.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/utils/usage_utils.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev3}/verifiers/utils/version_utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: verifiers
|
|
3
|
-
Version: 0.1.12.
|
|
3
|
+
Version: 0.1.12.dev3
|
|
4
4
|
Summary: Verifiers: Environments for LLM Reinforcement Learning
|
|
5
5
|
Project-URL: Homepage, https://github.com/primeintellect-ai/verifiers
|
|
6
6
|
Project-URL: Documentation, https://github.com/primeintellect-ai/verifiers
|
|
@@ -34,12 +34,14 @@ Requires-Dist: nest-asyncio>=1.6.0
|
|
|
34
34
|
Requires-Dist: numpy
|
|
35
35
|
Requires-Dist: openai-agents>=0.0.7
|
|
36
36
|
Requires-Dist: openai>=1.108.1
|
|
37
|
-
Requires-Dist: prime-sandboxes>=0.2.
|
|
38
|
-
Requires-Dist: prime-tunnel>=0.1.
|
|
37
|
+
Requires-Dist: prime-sandboxes>=0.2.19
|
|
38
|
+
Requires-Dist: prime-tunnel>=0.1.5
|
|
39
39
|
Requires-Dist: pydantic>=2.11.9
|
|
40
40
|
Requires-Dist: pyzmq>=27.1.0
|
|
41
|
+
Requires-Dist: regex<2026.4.4
|
|
41
42
|
Requires-Dist: requests
|
|
42
43
|
Requires-Dist: rich
|
|
44
|
+
Requires-Dist: setproctitle>=1.3.0
|
|
43
45
|
Requires-Dist: tenacity>=8.5.0
|
|
44
46
|
Requires-Dist: textual
|
|
45
47
|
Requires-Dist: tomli; python_version < '3.11'
|
|
@@ -37,8 +37,8 @@ dependencies = [
|
|
|
37
37
|
"nest-asyncio>=1.6.0", # for jupyter notebooks
|
|
38
38
|
"openai>=1.108.1",
|
|
39
39
|
"openai-agents>=0.0.7",
|
|
40
|
-
"prime-tunnel>=0.1.
|
|
41
|
-
"prime-sandboxes>=0.2.
|
|
40
|
+
"prime-tunnel>=0.1.5",
|
|
41
|
+
"prime-sandboxes>=0.2.19",
|
|
42
42
|
"pydantic>=2.11.9",
|
|
43
43
|
"requests",
|
|
44
44
|
"rich",
|
|
@@ -51,6 +51,8 @@ dependencies = [
|
|
|
51
51
|
"pyzmq>=27.1.0",
|
|
52
52
|
"msgpack>=1.1.2",
|
|
53
53
|
"aiolimiter>=1.2.1",
|
|
54
|
+
"setproctitle>=1.3.0",
|
|
55
|
+
"regex<2026.4.4", # 2026.4.4 missing cp312/cp313 wheels
|
|
54
56
|
]
|
|
55
57
|
|
|
56
58
|
[dependency-groups]
|
|
@@ -104,7 +106,7 @@ rl = [
|
|
|
104
106
|
|
|
105
107
|
[tool.uv]
|
|
106
108
|
preview = true
|
|
107
|
-
required-version = "
|
|
109
|
+
required-version = ">=0.11.1"
|
|
108
110
|
|
|
109
111
|
[tool.uv.extra-build-dependencies]
|
|
110
112
|
flash-attn = [{ requirement = "torch", match-runtime = true }]
|
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
"""Tests for the composable architecture: Task, TaskSet, SandboxTaskSet, SandboxSpec."""
|
|
2
|
+
|
|
3
|
+
from types import SimpleNamespace
|
|
4
|
+
from unittest.mock import AsyncMock
|
|
5
|
+
|
|
6
|
+
import pytest
|
|
7
|
+
|
|
8
|
+
import verifiers as vf
|
|
9
|
+
from verifiers.envs.experimental.composable import (
|
|
10
|
+
ComposableEnv,
|
|
11
|
+
Harness,
|
|
12
|
+
SandboxSpec,
|
|
13
|
+
SandboxTaskSet,
|
|
14
|
+
Task,
|
|
15
|
+
TaskSet,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
# ── Mock Rubrics ──────────────────────────────────────────────────────
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class MockSandboxRubric(vf.Rubric):
|
|
23
|
+
def __init__(self, **kwargs):
|
|
24
|
+
super().__init__(**kwargs)
|
|
25
|
+
self.add_reward_func(self.solved)
|
|
26
|
+
|
|
27
|
+
async def solved(self, state, **kwargs) -> float:
|
|
28
|
+
return 1.0 if state.get("test_output") == "PASS" else 0.0
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class MockMathRubric(vf.Rubric):
|
|
32
|
+
def __init__(self, **kwargs):
|
|
33
|
+
super().__init__(**kwargs)
|
|
34
|
+
self.add_reward_func(self.correct)
|
|
35
|
+
|
|
36
|
+
async def correct(self, state, **kwargs) -> float:
|
|
37
|
+
return 1.0 if state.get("info", {}).get("id") == 0 else 0.0
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
# ── Mock TaskSets ───────────────────────────────────────────────────────
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class MockSandboxTaskSet(SandboxTaskSet):
|
|
44
|
+
"""SandboxTaskSet for testing."""
|
|
45
|
+
|
|
46
|
+
def get_instruction(self, info):
|
|
47
|
+
return f"Fix bug #{info.get('id', 0)}"
|
|
48
|
+
|
|
49
|
+
def get_sandbox_spec(self, info):
|
|
50
|
+
return SandboxSpec(image="python:3.11-slim", cpu_cores=2, memory_gb=2)
|
|
51
|
+
|
|
52
|
+
def get_rubric(self):
|
|
53
|
+
return MockSandboxRubric()
|
|
54
|
+
|
|
55
|
+
def get_workdir(self, info):
|
|
56
|
+
return "/testbed"
|
|
57
|
+
|
|
58
|
+
def get_env_vars(self):
|
|
59
|
+
return {"FOO": "bar"}
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class MockTaskSet(TaskSet):
|
|
63
|
+
"""Plain TaskSet (no sandbox) for testing."""
|
|
64
|
+
|
|
65
|
+
def get_instruction(self, info):
|
|
66
|
+
return info.get("question", "")
|
|
67
|
+
|
|
68
|
+
def get_rubric(self):
|
|
69
|
+
return MockMathRubric()
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _make_dataset(n=3):
|
|
73
|
+
from datasets import Dataset
|
|
74
|
+
|
|
75
|
+
return Dataset.from_dict(
|
|
76
|
+
{
|
|
77
|
+
"info": [{"id": i, "question": f"q{i}"} for i in range(n)],
|
|
78
|
+
"answer": ["" for _ in range(n)],
|
|
79
|
+
}
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
# ── SandboxSpec ─────────────────────────────────────────────────────────
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def test_sandbox_spec_defaults():
|
|
87
|
+
spec = SandboxSpec()
|
|
88
|
+
assert spec.image == "python:3.11-slim"
|
|
89
|
+
assert spec.cpu_cores == 4
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def test_sandbox_spec_custom():
|
|
93
|
+
spec = SandboxSpec(image="lean-tactic:v4.27", gpu_count=1)
|
|
94
|
+
assert spec.image == "lean-tactic:v4.27"
|
|
95
|
+
assert spec.gpu_count == 1
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
# ── Task from SandboxTaskSet ───────────────────────────────────────────
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def test_task_sandbox_spec():
|
|
102
|
+
ts = MockSandboxTaskSet(dataset=_make_dataset(), name="test")
|
|
103
|
+
task = ts[0]
|
|
104
|
+
assert isinstance(task, Task)
|
|
105
|
+
assert task.sandbox_spec is not None
|
|
106
|
+
assert task.sandbox_spec.image == "python:3.11-slim"
|
|
107
|
+
assert task.sandbox_spec.cpu_cores == 2
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def test_task_image():
|
|
111
|
+
ts = MockSandboxTaskSet(dataset=_make_dataset(), name="test")
|
|
112
|
+
task = ts[0]
|
|
113
|
+
assert task.image == "python:3.11-slim"
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def test_task_workdir():
|
|
117
|
+
ts = MockSandboxTaskSet(dataset=_make_dataset(), name="test")
|
|
118
|
+
task = ts[0]
|
|
119
|
+
assert task.workdir == "/testbed"
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def test_task_repr_sandbox():
|
|
123
|
+
ts = MockSandboxTaskSet(dataset=_make_dataset(), name="test")
|
|
124
|
+
task = ts[0]
|
|
125
|
+
assert "python:3.11-slim" in repr(task)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
# ── Task from plain TaskSet ────────────────────────────────────────────
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def test_task_no_sandbox():
|
|
132
|
+
ts = MockTaskSet(dataset=_make_dataset(), name="math")
|
|
133
|
+
task = ts[0]
|
|
134
|
+
assert task.sandbox_spec is None
|
|
135
|
+
assert task.image is None
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def test_task_repr_no_sandbox():
|
|
139
|
+
ts = MockTaskSet(dataset=_make_dataset(), name="math")
|
|
140
|
+
task = ts[0]
|
|
141
|
+
assert "no sandbox" in repr(task)
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
# ── TaskSet ─────────────────────────────────────────────────────────────
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def test_taskset_isinstance():
|
|
148
|
+
ts = MockTaskSet(dataset=_make_dataset(), name="math")
|
|
149
|
+
assert not isinstance(ts, SandboxTaskSet)
|
|
150
|
+
|
|
151
|
+
ts2 = MockSandboxTaskSet(dataset=_make_dataset(), name="swe")
|
|
152
|
+
assert isinstance(ts2, SandboxTaskSet)
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def test_taskset_len():
|
|
156
|
+
ts = MockTaskSet(dataset=_make_dataset(5), name="test")
|
|
157
|
+
assert len(ts) == 5
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def test_taskset_iter():
|
|
161
|
+
ts = MockTaskSet(dataset=_make_dataset(3), name="test")
|
|
162
|
+
tasks = list(ts)
|
|
163
|
+
assert len(tasks) == 3
|
|
164
|
+
assert all(isinstance(t, Task) for t in tasks)
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def test_taskset_filter():
|
|
168
|
+
ts = MockSandboxTaskSet(dataset=_make_dataset(5), name="test")
|
|
169
|
+
filtered = ts.filter(lambda ex: ex["info"]["id"] < 3)
|
|
170
|
+
assert len(filtered) == 3
|
|
171
|
+
assert isinstance(filtered, MockSandboxTaskSet)
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def test_taskset_take():
|
|
175
|
+
ts = MockSandboxTaskSet(dataset=_make_dataset(5), name="test")
|
|
176
|
+
taken = ts.take(2)
|
|
177
|
+
assert len(taken) == 2
|
|
178
|
+
assert isinstance(taken, MockSandboxTaskSet)
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def test_taskset_repr():
|
|
182
|
+
ts = MockTaskSet(dataset=_make_dataset(), name="mytest")
|
|
183
|
+
assert "mytest" in repr(ts)
|
|
184
|
+
assert "3" in repr(ts)
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
@pytest.mark.asyncio
|
|
188
|
+
async def test_composable_env_exports_task_workdir():
|
|
189
|
+
taskset = MockSandboxTaskSet(dataset=_make_dataset(), name="test")
|
|
190
|
+
env = ComposableEnv(
|
|
191
|
+
taskset=taskset,
|
|
192
|
+
harness=Harness(run_command="true"),
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
env_vars = await env.build_env_vars(
|
|
196
|
+
{
|
|
197
|
+
"info": {"id": 0},
|
|
198
|
+
"interception_base_url": "https://test.trycloudflare.com/v1",
|
|
199
|
+
}
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
assert env_vars["AGENT_WORKDIR"] == "/testbed"
|
|
203
|
+
assert env_vars["FOO"] == "bar"
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
@pytest.mark.asyncio
|
|
207
|
+
async def test_composable_env_quotes_paths_in_mkdir_command():
|
|
208
|
+
taskset = MockSandboxTaskSet(dataset=_make_dataset(), name="test")
|
|
209
|
+
env = ComposableEnv(
|
|
210
|
+
taskset=taskset,
|
|
211
|
+
harness=Harness(
|
|
212
|
+
run_command="true",
|
|
213
|
+
instruction_path="/tmp/with space/prompt.txt",
|
|
214
|
+
system_prompt="system",
|
|
215
|
+
system_prompt_path="/tmp/other path/system.txt",
|
|
216
|
+
),
|
|
217
|
+
)
|
|
218
|
+
env.sandbox_client = SimpleNamespace(
|
|
219
|
+
execute_command=AsyncMock(),
|
|
220
|
+
teardown=lambda: None,
|
|
221
|
+
)
|
|
222
|
+
env.taskset.setup = AsyncMock()
|
|
223
|
+
env.upload_content = AsyncMock()
|
|
224
|
+
|
|
225
|
+
await env.post_sandbox_setup({"sandbox_id": "sbx", "info": {"id": 0}})
|
|
226
|
+
|
|
227
|
+
env.sandbox_client.execute_command.assert_awaited_once_with(
|
|
228
|
+
"sbx",
|
|
229
|
+
"mkdir -p '/tmp/other path' '/tmp/with space'",
|
|
230
|
+
timeout=10,
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
@pytest.mark.asyncio
|
|
235
|
+
async def test_composable_env_quotes_log_path_when_collecting_logs():
|
|
236
|
+
taskset = MockSandboxTaskSet(dataset=_make_dataset(), name="test")
|
|
237
|
+
env = ComposableEnv(
|
|
238
|
+
taskset=taskset,
|
|
239
|
+
harness=Harness(
|
|
240
|
+
run_command="true",
|
|
241
|
+
log_path="/tmp/log dir/agent.log",
|
|
242
|
+
),
|
|
243
|
+
)
|
|
244
|
+
env.sandbox_client = SimpleNamespace(
|
|
245
|
+
execute_command=AsyncMock(
|
|
246
|
+
return_value=SimpleNamespace(stdout="agent log\n", stderr="", exit_code=0)
|
|
247
|
+
),
|
|
248
|
+
teardown=lambda: None,
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
state = {"sandbox_id": "sbx", "timing": {"total_ms": 0}}
|
|
252
|
+
|
|
253
|
+
await env.post_rollout(state)
|
|
254
|
+
|
|
255
|
+
env.sandbox_client.execute_command.assert_awaited_once_with(
|
|
256
|
+
"sbx",
|
|
257
|
+
"cat '/tmp/log dir/agent.log' 2>/dev/null || echo '<no logs>'",
|
|
258
|
+
working_dir=None,
|
|
259
|
+
)
|
|
260
|
+
assert state["agent_logs"] == "agent log"
|
|
@@ -1,5 +1,9 @@
|
|
|
1
1
|
from pathlib import Path
|
|
2
2
|
|
|
3
|
+
import pytest
|
|
4
|
+
from pydantic import ValidationError
|
|
5
|
+
|
|
6
|
+
from verifiers.types import ClientConfig
|
|
3
7
|
from verifiers.utils.eval_utils import load_endpoints
|
|
4
8
|
|
|
5
9
|
|
|
@@ -220,3 +224,96 @@ def test_load_endpoints_toml_accepts_type_shorthand(tmp_path: Path):
|
|
|
220
224
|
endpoints = load_endpoints(str(registry_path))
|
|
221
225
|
|
|
222
226
|
assert endpoints["haiku"][0]["api_client_type"] == "anthropic_messages"
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def test_load_endpoints_toml_accepts_headers_table(tmp_path: Path):
|
|
230
|
+
registry_path = tmp_path / "endpoints.toml"
|
|
231
|
+
registry_path.write_text(
|
|
232
|
+
"[[endpoint]]\n"
|
|
233
|
+
'endpoint_id = "proxy"\n'
|
|
234
|
+
'model = "m"\n'
|
|
235
|
+
'url = "https://api.example/v1"\n'
|
|
236
|
+
'key = "K"\n'
|
|
237
|
+
'headers = { "X-Custom" = "v1" }\n',
|
|
238
|
+
encoding="utf-8",
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
endpoints = load_endpoints(str(registry_path))
|
|
242
|
+
|
|
243
|
+
assert endpoints["proxy"][0]["extra_headers"] == {"X-Custom": "v1"}
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def test_load_endpoints_toml_accepts_extra_headers_alias(tmp_path: Path):
|
|
247
|
+
registry_path = tmp_path / "endpoints.toml"
|
|
248
|
+
registry_path.write_text(
|
|
249
|
+
"[[endpoint]]\n"
|
|
250
|
+
'endpoint_id = "proxy"\n'
|
|
251
|
+
'model = "m"\n'
|
|
252
|
+
'url = "https://api.example/v1"\n'
|
|
253
|
+
'key = "K"\n'
|
|
254
|
+
'extra_headers = { "X-A" = "a" }\n',
|
|
255
|
+
encoding="utf-8",
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
endpoints = load_endpoints(str(registry_path))
|
|
259
|
+
|
|
260
|
+
assert endpoints["proxy"][0]["extra_headers"] == {"X-A": "a"}
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def test_load_endpoints_toml_rejects_headers_and_extra_headers_together(
|
|
264
|
+
tmp_path: Path,
|
|
265
|
+
):
|
|
266
|
+
registry_path = tmp_path / "endpoints.toml"
|
|
267
|
+
registry_path.write_text(
|
|
268
|
+
"[[endpoint]]\n"
|
|
269
|
+
'endpoint_id = "proxy"\n'
|
|
270
|
+
'model = "m"\n'
|
|
271
|
+
'url = "https://api.example/v1"\n'
|
|
272
|
+
'key = "K"\n'
|
|
273
|
+
'headers = { "X-A" = "a" }\n'
|
|
274
|
+
'extra_headers = { "X-B" = "b" }\n',
|
|
275
|
+
encoding="utf-8",
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
endpoints = load_endpoints(str(registry_path))
|
|
279
|
+
|
|
280
|
+
assert endpoints == {}
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
def test_load_endpoints_python_registry_accepts_headers_dict(tmp_path: Path):
|
|
284
|
+
registry_path = tmp_path / "endpoints.py"
|
|
285
|
+
registry_path.write_text(
|
|
286
|
+
"ENDPOINTS = {\n"
|
|
287
|
+
' "p": {"model": "m", "url": "https://x/v1", "key": "K", '
|
|
288
|
+
'"headers": {"X-Foo": "bar"}},\n'
|
|
289
|
+
"}\n",
|
|
290
|
+
encoding="utf-8",
|
|
291
|
+
)
|
|
292
|
+
|
|
293
|
+
endpoints = load_endpoints(str(registry_path))
|
|
294
|
+
|
|
295
|
+
assert endpoints["p"][0]["extra_headers"] == {"X-Foo": "bar"}
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
def test_load_endpoints_malformed_headers_string_falls_back_to_empty_registry(
|
|
299
|
+
tmp_path: Path,
|
|
300
|
+
):
|
|
301
|
+
toml_path = tmp_path / "endpoints.toml"
|
|
302
|
+
toml_path.write_text(
|
|
303
|
+
"[[endpoint]]\n"
|
|
304
|
+
'endpoint_id = "x"\n'
|
|
305
|
+
'model = "m"\n'
|
|
306
|
+
'url = "https://api.example/v1"\n'
|
|
307
|
+
'key = "K"\n'
|
|
308
|
+
'headers = "invalid"\n',
|
|
309
|
+
encoding="utf-8",
|
|
310
|
+
)
|
|
311
|
+
|
|
312
|
+
assert load_endpoints(str(toml_path)) == {}
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
def test_client_config_validates_extra_header_keys():
|
|
316
|
+
with pytest.raises(ValidationError):
|
|
317
|
+
ClientConfig(extra_headers={"": "x"})
|
|
318
|
+
with pytest.raises(ValidationError):
|
|
319
|
+
ClientConfig(extra_headers={"X": 1}) # type: ignore[arg-type]
|
|
@@ -40,6 +40,7 @@ def run_cli(make_metadata, make_state, make_input):
|
|
|
40
40
|
"api_key_var": "OPENAI_API_KEY",
|
|
41
41
|
"api_base_url": "https://api.openai.com/v1",
|
|
42
42
|
"header": None,
|
|
43
|
+
"headers": None,
|
|
43
44
|
"num_examples": 1,
|
|
44
45
|
"rollouts_per_example": 1,
|
|
45
46
|
"max_concurrent": 1,
|
|
@@ -229,6 +230,87 @@ def test_cli_temperature_not_added_when_none(monkeypatch, run_cli):
|
|
|
229
230
|
assert "temperature" not in sa
|
|
230
231
|
|
|
231
232
|
|
|
233
|
+
def test_cli_headers_table_and_list_merge(monkeypatch, run_cli):
|
|
234
|
+
captured = run_cli(
|
|
235
|
+
monkeypatch,
|
|
236
|
+
{
|
|
237
|
+
"headers": {"X-A": "a", "X-B": "b"},
|
|
238
|
+
"header": ["X-B: override", "X-C: c"],
|
|
239
|
+
},
|
|
240
|
+
endpoints={},
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
assert captured["configs"][0].client_config.extra_headers == {
|
|
244
|
+
"X-A": "a",
|
|
245
|
+
"X-B": "override",
|
|
246
|
+
"X-C": "c",
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
def test_cli_registry_headers_merged_with_eval_toml(tmp_path, monkeypatch, run_cli):
|
|
251
|
+
cfg = tmp_path / "eval.toml"
|
|
252
|
+
cfg.write_text(
|
|
253
|
+
"[[eval]]\n"
|
|
254
|
+
'env_id = "env1"\n'
|
|
255
|
+
'model = "gpt-5-mini"\n'
|
|
256
|
+
'headers = { "X-Table" = "t" }\n'
|
|
257
|
+
'header = [ "X-List: l", "X-Table: override" ]\n',
|
|
258
|
+
encoding="utf-8",
|
|
259
|
+
)
|
|
260
|
+
captured = run_cli(
|
|
261
|
+
monkeypatch,
|
|
262
|
+
{"env_id_or_config": str(cfg)},
|
|
263
|
+
endpoints={
|
|
264
|
+
"gpt-5-mini": [
|
|
265
|
+
{
|
|
266
|
+
"model": "gpt-5-mini",
|
|
267
|
+
"url": "https://a.example/v1",
|
|
268
|
+
"key": "OPENAI_API_KEY",
|
|
269
|
+
"extra_headers": {"X-Reg": "r"},
|
|
270
|
+
}
|
|
271
|
+
]
|
|
272
|
+
},
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
assert captured["configs"][0].client_config.extra_headers == {
|
|
276
|
+
"X-Reg": "r",
|
|
277
|
+
"X-Table": "override",
|
|
278
|
+
"X-List": "l",
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
def test_cli_multi_variant_preserves_per_row_registry_headers(monkeypatch, run_cli):
|
|
283
|
+
captured = run_cli(
|
|
284
|
+
monkeypatch,
|
|
285
|
+
{
|
|
286
|
+
"model": "gpt-5-mini",
|
|
287
|
+
"api_key_var": None,
|
|
288
|
+
"api_base_url": None,
|
|
289
|
+
"header": ["X-Eval: e"],
|
|
290
|
+
},
|
|
291
|
+
endpoints={
|
|
292
|
+
"gpt-5-mini": [
|
|
293
|
+
{
|
|
294
|
+
"model": "gpt-5-mini",
|
|
295
|
+
"url": "https://a.example/v1",
|
|
296
|
+
"key": "OPENAI_API_KEY",
|
|
297
|
+
"extra_headers": {"X-Row": "a"},
|
|
298
|
+
},
|
|
299
|
+
{
|
|
300
|
+
"model": "gpt-5-mini",
|
|
301
|
+
"url": "https://b.example/v1",
|
|
302
|
+
"key": "OPENAI_API_KEY",
|
|
303
|
+
"extra_headers": {"X-Row": "b"},
|
|
304
|
+
},
|
|
305
|
+
]
|
|
306
|
+
},
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
cfgs = captured["configs"][0].client_config.endpoint_configs
|
|
310
|
+
assert cfgs[0].extra_headers == {"X-Row": "a", "X-Eval": "e"}
|
|
311
|
+
assert cfgs[1].extra_headers == {"X-Row": "b", "X-Eval": "e"}
|
|
312
|
+
|
|
313
|
+
|
|
232
314
|
def test_cli_endpoint_alias_multi_variant_sets_multi_base_urls(monkeypatch, run_cli):
|
|
233
315
|
captured = run_cli(
|
|
234
316
|
monkeypatch,
|
|
@@ -3,7 +3,33 @@ from pathlib import Path
|
|
|
3
3
|
|
|
4
4
|
import pytest
|
|
5
5
|
|
|
6
|
-
from verifiers.scripts.gepa import
|
|
6
|
+
from verifiers.scripts.gepa import (
|
|
7
|
+
_gepa_extra_headers_from_group,
|
|
8
|
+
load_gepa_toml_config,
|
|
9
|
+
resolve_gepa_config_args,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def test_gepa_extra_headers_from_group_requires_consistent_variants():
|
|
14
|
+
with pytest.raises(ValueError, match="different headers"):
|
|
15
|
+
_gepa_extra_headers_from_group(
|
|
16
|
+
[
|
|
17
|
+
{"extra_headers": {"X-A": "1"}},
|
|
18
|
+
{"extra_headers": {"X-A": "2"}},
|
|
19
|
+
],
|
|
20
|
+
"my-alias",
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def test_gepa_extra_headers_from_group_returns_first_row_dict():
|
|
25
|
+
h = _gepa_extra_headers_from_group(
|
|
26
|
+
[
|
|
27
|
+
{"extra_headers": {"X-A": "x"}},
|
|
28
|
+
{"extra_headers": {"X-A": "x"}},
|
|
29
|
+
],
|
|
30
|
+
"my-alias",
|
|
31
|
+
)
|
|
32
|
+
assert h == {"X-A": "x"}
|
|
7
33
|
|
|
8
34
|
|
|
9
35
|
def test_load_gepa_toml_config_reads_env_table(tmp_path: Path):
|