verifiers 0.1.15.dev10__tar.gz → 0.1.15.dev12__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/.gitignore +0 -2
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/PKG-INFO +31 -21
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/README.md +16 -18
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/pyproject.toml +66 -6
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_endpoint_registry.py +14 -18
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_envs.py +4 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_eval_cli.py +74 -70
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_gepa_cli.py +25 -4
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_imports.py +59 -2
- verifiers-0.1.15.dev12/tests/test_init_script.py +119 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_langchain_deep_agents_wikispeedia.py +28 -10
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_mcp_search_env.py +4 -4
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_opencode_harbor.py +42 -22
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_renderer_client.py +295 -18
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_renderer_e2e.py +28 -18
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_trajectory_processing.py +136 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_v1_bfcl.py +45 -5
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_v1_config_extension.py +1131 -480
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_v1_endpoint_protocols.py +7 -5
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_v1_example_counts.py +10 -10
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_v1_harbor_cli.py +183 -109
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_v1_mini_swe_agent.py +16 -14
- verifiers-0.1.15.dev12/tests/test_v1_nemo_gym_harness.py +427 -0
- verifiers-0.1.15.dev12/tests/test_v1_openenv_taskset.py +236 -0
- verifiers-0.1.15.dev12/tests/test_v1_openreward_taskset.py +199 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_v1_rlm_swe.py +198 -121
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_v1_runtime_lifecycle.py +746 -165
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_v1_scoring_functions.py +30 -20
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_v1_taskset_bindings.py +91 -64
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_v1_textarena_taskset.py +69 -44
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_wiki_search_v1.py +3 -3
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_wordle_v1_env.py +77 -24
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/__init__.py +64 -62
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/clients/client.py +4 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/clients/renderer_client.py +120 -56
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/harnesses/mini_swe_agent.py +8 -62
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/tasksets/lean/lean_task.py +12 -5
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/scripts/eval.py +11 -11
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/scripts/gepa.py +21 -15
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/scripts/init.py +67 -76
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/types.py +225 -33
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/utils/env_utils.py +27 -9
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/utils/eval_utils.py +14 -10
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/utils/interception_utils.py +4 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/utils/response_utils.py +42 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/v1/ENVIRONMENT_BEST_PRACTICES.md +53 -58
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/v1/README.md +279 -250
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/v1/RE_MIGRATION.md +91 -73
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/v1/__init__.py +45 -55
- verifiers-0.1.15.dev12/verifiers/v1/artifact.py +86 -0
- verifiers-0.1.15.dev12/verifiers/v1/config.py +127 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/v1/env.py +55 -61
- verifiers-0.1.15.dev12/verifiers/v1/harness.py +632 -0
- verifiers-0.1.15.dev12/verifiers/v1/model.py +51 -0
- verifiers-0.1.15.dev12/verifiers/v1/program.py +303 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/v1/runtime.py +1076 -616
- verifiers-0.1.15.dev12/verifiers/v1/runtime_handles.py +51 -0
- verifiers-0.1.15.dev12/verifiers/v1/sandbox.py +41 -0
- verifiers-0.1.15.dev12/verifiers/v1/task.py +150 -0
- verifiers-0.1.15.dev12/verifiers/v1/taskset.py +146 -0
- verifiers-0.1.15.dev12/verifiers/v1/toolset.py +292 -0
- verifiers-0.1.15.dev12/verifiers/v1/types.py +54 -0
- verifiers-0.1.15.dev12/verifiers/v1/user.py +132 -0
- verifiers-0.1.15.dev12/verifiers/v1/utils/binding_utils.py +323 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/v1/utils/config_callable_utils.py +7 -7
- verifiers-0.1.15.dev12/verifiers/v1/utils/config_utils.py +296 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/v1/utils/endpoint_utils.py +103 -59
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/v1/utils/judge_utils.py +19 -16
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/v1/utils/lifecycle_utils.py +22 -15
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/v1/utils/mcp_proxy_utils.py +18 -23
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/v1/utils/mcp_utils.py +4 -4
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/v1/utils/object_utils.py +13 -11
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/v1/utils/program_utils.py +106 -81
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/v1/utils/prompt_utils.py +89 -22
- verifiers-0.1.15.dev12/verifiers/v1/utils/runtime_owner_utils.py +131 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/v1/utils/runtime_registry.py +3 -7
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/v1/utils/sandbox_program_utils.py +28 -29
- verifiers-0.1.15.dev12/verifiers/v1/utils/sandbox_python_utils.py +99 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/v1/utils/sandbox_utils.py +270 -215
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/v1/utils/scoring_utils.py +45 -45
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/v1/utils/serialization_utils.py +1 -4
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/v1/utils/task_freeze_utils.py +2 -2
- verifiers-0.1.15.dev12/verifiers/v1/utils/taskset_utils.py +144 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/v1/utils/tool_utils.py +24 -20
- verifiers-0.1.15.dev12/verifiers/v1/utils/toolset_utils.py +217 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/v1/utils/trajectory_utils.py +16 -18
- verifiers-0.1.15.dev10/tests/test_init_script.py +0 -80
- verifiers-0.1.15.dev10/verifiers/v1/config.py +0 -361
- verifiers-0.1.15.dev10/verifiers/v1/harness.py +0 -535
- verifiers-0.1.15.dev10/verifiers/v1/packages/__init__.py +0 -1
- verifiers-0.1.15.dev10/verifiers/v1/packages/harnesses/__init__.py +0 -25
- verifiers-0.1.15.dev10/verifiers/v1/packages/harnesses/command.py +0 -160
- verifiers-0.1.15.dev10/verifiers/v1/packages/harnesses/configs.py +0 -168
- verifiers-0.1.15.dev10/verifiers/v1/packages/harnesses/mini_swe_agent.py +0 -219
- verifiers-0.1.15.dev10/verifiers/v1/packages/harnesses/opencode.py +0 -234
- verifiers-0.1.15.dev10/verifiers/v1/packages/harnesses/pi.py +0 -195
- verifiers-0.1.15.dev10/verifiers/v1/packages/harnesses/rlm.py +0 -601
- verifiers-0.1.15.dev10/verifiers/v1/packages/harnesses/terminus_2.py +0 -252
- verifiers-0.1.15.dev10/verifiers/v1/packages/tasksets/__init__.py +0 -17
- verifiers-0.1.15.dev10/verifiers/v1/packages/tasksets/harbor.py +0 -399
- verifiers-0.1.15.dev10/verifiers/v1/packages/tasksets/textarena.py +0 -153
- verifiers-0.1.15.dev10/verifiers/v1/task.py +0 -94
- verifiers-0.1.15.dev10/verifiers/v1/taskset.py +0 -187
- verifiers-0.1.15.dev10/verifiers/v1/toolset.py +0 -413
- verifiers-0.1.15.dev10/verifiers/v1/types.py +0 -54
- verifiers-0.1.15.dev10/verifiers/v1/user.py +0 -96
- verifiers-0.1.15.dev10/verifiers/v1/utils/artifact_utils.py +0 -29
- verifiers-0.1.15.dev10/verifiers/v1/utils/binding_utils.py +0 -218
- verifiers-0.1.15.dev10/verifiers/v1/utils/config_utils.py +0 -119
- verifiers-0.1.15.dev10/verifiers/v1/utils/runtime_owner_utils.py +0 -124
- verifiers-0.1.15.dev10/verifiers/v1/utils/taskset_utils.py +0 -90
- verifiers-0.1.15.dev10/verifiers/v1/utils/timing_utils.py +0 -119
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/LICENSE +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/AGENTS.md +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/README.md +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/conftest.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_browser_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_build_script.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_cli_agent_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_client_auth_errors.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_client_config.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_client_multimodal_types.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_composable_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_context_token_metrics.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_decorator_ranks.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_env_group.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_env_server.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_environment.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_environment_extra.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_error_chain.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_eval_display.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_eval_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_gepa_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_gym_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_harbor_env_mcp.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_install_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_interception_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_lean_task.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_logging.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_math_rubric.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_maybe_think_parser.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_message_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_message_utils_multimodal.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_multiturn_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_nemorl_client.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_openai_chat_completions_token_client.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_openai_responses_client.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_opencode_rlm_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_openenv_client.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_parser.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_path_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_per_turn_timing.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_pricing_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_prime_plugin.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_rlm_composable_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_rlm_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_rubric.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_rubric_group.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_sandbox_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_sandbox_mixin.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_save_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_setup_script.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_singleturn_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_stateful_tool_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_think_parser.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_tool_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_tool_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_tui_info_formatting.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_types.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_v1_empty_completions.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_v1_group_reward_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_wordle_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/tests/test_xml_parser.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/AGENTS.md +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/cli/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/cli/commands/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/cli/commands/build.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/cli/commands/eval.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/cli/commands/gepa.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/cli/commands/init.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/cli/commands/install.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/cli/commands/setup.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/cli/plugins/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/cli/plugins/prime.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/cli/tui.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/clients/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/clients/anthropic_messages_client.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/clients/nemorl_chat_completions_client.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/clients/openai_chat_completions_client.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/clients/openai_chat_completions_token_client.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/clients/openai_completions_client.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/clients/openai_responses_client.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/decorators.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/AGENTS.md +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/env_group.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/environment.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/README.md +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/cli_agent_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/README.md +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/_filter.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/composable_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/harness.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/harnesses/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/harnesses/opencode.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/harnesses/prompt.txt +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/harnesses/rlm.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/swe_debug_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/task.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/tasksets/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/tasksets/cp/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/tasksets/cp/cp_task.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/tasksets/cp/test_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/tasksets/harbor/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/tasksets/harbor/harbor.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/tasksets/lean/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/tasksets/math/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/tasksets/math/math_task.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/tasksets/swe/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/tasksets/swe/_test_patch.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/tasksets/swe/create_fix_patch.sh +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/tasksets/swe/log_parser.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/tasksets/swe/multi_swe.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/tasksets/swe/openswe.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/tasksets/swe/r2e_gym.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/tasksets/swe/swe_bench.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/tasksets/swe/swe_lego.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/tasksets/swe/swe_rebench_v2.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/tasksets/swe/swe_rebench_v2_log_parsers.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/tasksets/swe/swe_smith.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/tasksets/swe/swe_tasksets.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/gym_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/harbor_env/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/harbor_env/env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/harbor_env/mcp.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/mcp_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/opencode_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/opencode_qa_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/opencode_rlm_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/rlm_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/sandbox_mixin.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/utils/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/utils/file_locks.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/utils/git_checkout_cache.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/integrations/README.md +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/integrations/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/integrations/browser_env/README.md +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/integrations/browser_env/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/integrations/browser_env/browser_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/integrations/browser_env/modes/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/integrations/browser_env/modes/base.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/integrations/browser_env/modes/cua_mode.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/integrations/browser_env/modes/dom_mode.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/integrations/openenv_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/integrations/reasoninggym_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/integrations/textarena_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/multiturn_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/python_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/sandbox_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/singleturn_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/stateful_tool_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/envs/tool_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/errors.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/gepa/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/gepa/adapter.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/gepa/config.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/gepa/display.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/gepa/gepa_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/parsers/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/parsers/maybe_think_parser.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/parsers/parser.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/parsers/think_parser.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/parsers/xml_parser.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/rl/README.md +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/rl/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/rl/inference/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/rl/inference/client.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/rl/inference/server.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/rl/trainer/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/rl/trainer/config.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/rl/trainer/orchestrator.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/rl/trainer/trainer.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/rl/trainer/utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/rubrics/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/rubrics/experimental/hybrid_math_rubric.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/rubrics/judge_rubric.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/rubrics/math_rubric.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/rubrics/rubric.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/rubrics/rubric_group.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/scripts/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/scripts/build.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/scripts/install.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/scripts/rl.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/scripts/setup.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/scripts/train.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/scripts/tui.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/scripts/vllm.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/serve/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/serve/client/env_client.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/serve/client/zmq_env_client.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/serve/server/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/serve/server/env_router.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/serve/server/env_server.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/serve/server/env_worker.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/serve/server/zmq_env_server.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/serve/types.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/utils/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/utils/async_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/utils/client_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/utils/config_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/utils/data_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/utils/display_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/utils/env_config_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/utils/error_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/utils/eval_display.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/utils/heartbeat.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/utils/import_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/utils/install_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/utils/logging_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/utils/message_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/utils/metric_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/utils/path_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/utils/pricing_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/utils/process_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/utils/save_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/utils/serve_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/utils/thread_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/utils/threaded_sandbox_client.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/utils/tool_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/utils/usage_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/utils/version_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/v1/state.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/v1/utils/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/v1/utils/json_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev12}/verifiers/v1/utils/usage_utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: verifiers
|
|
3
|
-
Version: 0.1.15.
|
|
3
|
+
Version: 0.1.15.dev12
|
|
4
4
|
Summary: Verifiers: Environments for LLM Reinforcement Learning
|
|
5
5
|
Project-URL: Homepage, https://github.com/primeintellect-ai/verifiers
|
|
6
6
|
Project-URL: Documentation, https://github.com/primeintellect-ai/verifiers
|
|
@@ -53,10 +53,20 @@ Provides-Extra: browser
|
|
|
53
53
|
Requires-Dist: aiohttp>=3.9.0; extra == 'browser'
|
|
54
54
|
Requires-Dist: python-dotenv>=1.0.0; extra == 'browser'
|
|
55
55
|
Requires-Dist: stagehand>=3.0.0; extra == 'browser'
|
|
56
|
+
Provides-Extra: harnesses
|
|
57
|
+
Requires-Dist: harnesses>=0.1.1; extra == 'harnesses'
|
|
58
|
+
Provides-Extra: nemogym
|
|
59
|
+
Requires-Dist: harnesses[nemogym]>=0.1.1; extra == 'nemogym'
|
|
60
|
+
Requires-Dist: tasksets[nemogym]>=0.1.1; extra == 'nemogym'
|
|
56
61
|
Provides-Extra: openenv
|
|
57
|
-
Requires-Dist: openenv
|
|
62
|
+
Requires-Dist: tasksets[openenv]>=0.1.1; extra == 'openenv'
|
|
63
|
+
Provides-Extra: openreward
|
|
64
|
+
Requires-Dist: tasksets[openreward]>=0.1.1; extra == 'openreward'
|
|
65
|
+
Provides-Extra: packages
|
|
66
|
+
Requires-Dist: harnesses>=0.1.1; extra == 'packages'
|
|
67
|
+
Requires-Dist: tasksets[openenv,openreward,ta]>=0.1.1; extra == 'packages'
|
|
58
68
|
Provides-Extra: renderers
|
|
59
|
-
Requires-Dist: renderers>=0.1.8.
|
|
69
|
+
Requires-Dist: renderers>=0.1.8.dev28; extra == 'renderers'
|
|
60
70
|
Provides-Extra: rg
|
|
61
71
|
Requires-Dist: reasoning-gym; extra == 'rg'
|
|
62
72
|
Provides-Extra: rl
|
|
@@ -73,6 +83,8 @@ Requires-Dist: wandb; extra == 'rl'
|
|
|
73
83
|
Provides-Extra: ta
|
|
74
84
|
Requires-Dist: nltk; extra == 'ta'
|
|
75
85
|
Requires-Dist: textarena; extra == 'ta'
|
|
86
|
+
Provides-Extra: tasksets
|
|
87
|
+
Requires-Dist: tasksets[openenv,openreward,ta]>=0.1.1; extra == 'tasksets'
|
|
76
88
|
Description-Content-Type: text/markdown
|
|
77
89
|
|
|
78
90
|
<p align="center">
|
|
@@ -220,19 +232,13 @@ custom harnesses, use the v1 Taskset/Harness path:
|
|
|
220
232
|
# my_env.py
|
|
221
233
|
import verifiers as vf
|
|
222
234
|
|
|
223
|
-
@vf.reward(weight=1.0)
|
|
224
|
-
async def contains_answer(task, state) -> float:
|
|
225
|
-
return float(task["answer"] in str(state.get("completion") or ""))
|
|
226
235
|
|
|
227
236
|
class MyTasksetConfig(vf.TasksetConfig):
|
|
228
237
|
split: str = "train"
|
|
229
238
|
|
|
230
239
|
|
|
231
|
-
class MyTaskset(vf.Taskset):
|
|
232
|
-
|
|
233
|
-
_default_rewards = (contains_answer,)
|
|
234
|
-
|
|
235
|
-
def rows(self) -> list[dict[str, object]]:
|
|
240
|
+
class MyTaskset(vf.Taskset[MyTasksetConfig]):
|
|
241
|
+
def load_tasks(self, split: vf.TaskSplit = "train") -> vf.Tasks:
|
|
236
242
|
rows = [
|
|
237
243
|
{
|
|
238
244
|
"prompt": [{"role": "user", "content": "Reverse abc."}],
|
|
@@ -243,28 +249,32 @@ class MyTaskset(vf.Taskset):
|
|
|
243
249
|
]
|
|
244
250
|
return [row for row in rows if row["split"] == self.config.split]
|
|
245
251
|
|
|
252
|
+
@vf.reward(weight=1.0)
|
|
253
|
+
async def contains_answer(self, task, state) -> float:
|
|
254
|
+
return float(task["answer"] in str(state.get("completion") or ""))
|
|
255
|
+
|
|
246
256
|
|
|
247
257
|
def load_taskset(config: MyTasksetConfig) -> MyTaskset:
|
|
248
|
-
assert isinstance(config, MyTasksetConfig)
|
|
249
258
|
return MyTaskset(config=config)
|
|
250
259
|
|
|
251
260
|
|
|
252
261
|
def load_environment(config: vf.EnvConfig) -> vf.Env:
|
|
253
|
-
|
|
254
|
-
assert isinstance(taskset_config, MyTasksetConfig)
|
|
255
|
-
return vf.Env(taskset=load_taskset(taskset_config))
|
|
262
|
+
return vf.Env(taskset=vf.load_taskset(config=config.taskset))
|
|
256
263
|
```
|
|
257
264
|
If no harness is passed, `vf.Env` uses the base endpoint-backed harness. See
|
|
258
265
|
**[BYO Harness](docs/byo-harness.md)** for the advanced v1 taskset/harness API.
|
|
259
|
-
Reusable taskset and harness packages live
|
|
260
|
-
|
|
261
|
-
For example, Harbor
|
|
262
|
-
harness with:
|
|
266
|
+
Reusable taskset and harness packages live in `tasksets` and `harnesses`.
|
|
267
|
+
Install them with `uv add "verifiers[packages]"`, or with the narrower
|
|
268
|
+
`verifiers[tasksets]` and `verifiers[harnesses]` extras. For example, Harbor
|
|
269
|
+
task directories can run through the bundled OpenCode CLI harness with:
|
|
263
270
|
|
|
264
271
|
```python
|
|
272
|
+
from harnesses import OpenCode, OpenCodeConfig
|
|
273
|
+
from tasksets import HarborTaskset, HarborTasksetConfig
|
|
274
|
+
|
|
265
275
|
env = vf.Env(
|
|
266
|
-
taskset=
|
|
267
|
-
harness=
|
|
276
|
+
taskset=HarborTaskset(config=HarborTasksetConfig(bundle_package=__name__)),
|
|
277
|
+
harness=OpenCode(config=OpenCodeConfig()),
|
|
268
278
|
)
|
|
269
279
|
```
|
|
270
280
|
|
|
@@ -143,19 +143,13 @@ custom harnesses, use the v1 Taskset/Harness path:
|
|
|
143
143
|
# my_env.py
|
|
144
144
|
import verifiers as vf
|
|
145
145
|
|
|
146
|
-
@vf.reward(weight=1.0)
|
|
147
|
-
async def contains_answer(task, state) -> float:
|
|
148
|
-
return float(task["answer"] in str(state.get("completion") or ""))
|
|
149
146
|
|
|
150
147
|
class MyTasksetConfig(vf.TasksetConfig):
|
|
151
148
|
split: str = "train"
|
|
152
149
|
|
|
153
150
|
|
|
154
|
-
class MyTaskset(vf.Taskset):
|
|
155
|
-
|
|
156
|
-
_default_rewards = (contains_answer,)
|
|
157
|
-
|
|
158
|
-
def rows(self) -> list[dict[str, object]]:
|
|
151
|
+
class MyTaskset(vf.Taskset[MyTasksetConfig]):
|
|
152
|
+
def load_tasks(self, split: vf.TaskSplit = "train") -> vf.Tasks:
|
|
159
153
|
rows = [
|
|
160
154
|
{
|
|
161
155
|
"prompt": [{"role": "user", "content": "Reverse abc."}],
|
|
@@ -166,28 +160,32 @@ class MyTaskset(vf.Taskset):
|
|
|
166
160
|
]
|
|
167
161
|
return [row for row in rows if row["split"] == self.config.split]
|
|
168
162
|
|
|
163
|
+
@vf.reward(weight=1.0)
|
|
164
|
+
async def contains_answer(self, task, state) -> float:
|
|
165
|
+
return float(task["answer"] in str(state.get("completion") or ""))
|
|
166
|
+
|
|
169
167
|
|
|
170
168
|
def load_taskset(config: MyTasksetConfig) -> MyTaskset:
|
|
171
|
-
assert isinstance(config, MyTasksetConfig)
|
|
172
169
|
return MyTaskset(config=config)
|
|
173
170
|
|
|
174
171
|
|
|
175
172
|
def load_environment(config: vf.EnvConfig) -> vf.Env:
|
|
176
|
-
|
|
177
|
-
assert isinstance(taskset_config, MyTasksetConfig)
|
|
178
|
-
return vf.Env(taskset=load_taskset(taskset_config))
|
|
173
|
+
return vf.Env(taskset=vf.load_taskset(config=config.taskset))
|
|
179
174
|
```
|
|
180
175
|
If no harness is passed, `vf.Env` uses the base endpoint-backed harness. See
|
|
181
176
|
**[BYO Harness](docs/byo-harness.md)** for the advanced v1 taskset/harness API.
|
|
182
|
-
Reusable taskset and harness packages live
|
|
183
|
-
|
|
184
|
-
For example, Harbor
|
|
185
|
-
harness with:
|
|
177
|
+
Reusable taskset and harness packages live in `tasksets` and `harnesses`.
|
|
178
|
+
Install them with `uv add "verifiers[packages]"`, or with the narrower
|
|
179
|
+
`verifiers[tasksets]` and `verifiers[harnesses]` extras. For example, Harbor
|
|
180
|
+
task directories can run through the bundled OpenCode CLI harness with:
|
|
186
181
|
|
|
187
182
|
```python
|
|
183
|
+
from harnesses import OpenCode, OpenCodeConfig
|
|
184
|
+
from tasksets import HarborTaskset, HarborTasksetConfig
|
|
185
|
+
|
|
188
186
|
env = vf.Env(
|
|
189
|
-
taskset=
|
|
190
|
-
harness=
|
|
187
|
+
taskset=HarborTaskset(config=HarborTasksetConfig(bundle_package=__name__)),
|
|
188
|
+
harness=OpenCode(config=OpenCodeConfig()),
|
|
191
189
|
)
|
|
192
190
|
```
|
|
193
191
|
|
|
@@ -69,12 +69,12 @@ dev = [
|
|
|
69
69
|
"ipykernel",
|
|
70
70
|
"ipywidgets",
|
|
71
71
|
"reasoning-gym",
|
|
72
|
-
"textarena",
|
|
73
72
|
"stagehand>=3.0.0",
|
|
74
73
|
"aiohttp>=3.9.0",
|
|
75
74
|
"python-dotenv>=1.0.0",
|
|
76
|
-
"
|
|
77
|
-
"renderers>=0.1.8.
|
|
75
|
+
"harnesses",
|
|
76
|
+
"renderers>=0.1.8.dev28",
|
|
77
|
+
"tasksets[openenv,openreward,ta]",
|
|
78
78
|
]
|
|
79
79
|
policy = [
|
|
80
80
|
"semgrep>=1.150.0",
|
|
@@ -85,8 +85,18 @@ rg = [
|
|
|
85
85
|
"reasoning-gym",
|
|
86
86
|
]
|
|
87
87
|
ta = [
|
|
88
|
-
"textarena",
|
|
89
88
|
"nltk",
|
|
89
|
+
"textarena",
|
|
90
|
+
]
|
|
91
|
+
tasksets = [
|
|
92
|
+
"tasksets[openenv,openreward,ta]>=0.1.1",
|
|
93
|
+
]
|
|
94
|
+
harnesses = [
|
|
95
|
+
"harnesses>=0.1.1",
|
|
96
|
+
]
|
|
97
|
+
packages = [
|
|
98
|
+
"harnesses>=0.1.1",
|
|
99
|
+
"tasksets[openenv,openreward,ta]>=0.1.1",
|
|
90
100
|
]
|
|
91
101
|
browser = [
|
|
92
102
|
"stagehand>=3.0.0",
|
|
@@ -94,10 +104,17 @@ browser = [
|
|
|
94
104
|
"python-dotenv>=1.0.0",
|
|
95
105
|
]
|
|
96
106
|
openenv = [
|
|
97
|
-
"openenv
|
|
107
|
+
"tasksets[openenv]>=0.1.1",
|
|
108
|
+
]
|
|
109
|
+
openreward = [
|
|
110
|
+
"tasksets[openreward]>=0.1.1",
|
|
111
|
+
]
|
|
112
|
+
nemogym = [
|
|
113
|
+
"harnesses[nemogym]>=0.1.1",
|
|
114
|
+
"tasksets[nemogym]>=0.1.1",
|
|
98
115
|
]
|
|
99
116
|
renderers = [
|
|
100
|
-
"renderers>=0.1.8.
|
|
117
|
+
"renderers>=0.1.8.dev28",
|
|
101
118
|
]
|
|
102
119
|
rl = [
|
|
103
120
|
"torch>=2.8.0,<2.9.0",
|
|
@@ -121,12 +138,53 @@ conflicts = [
|
|
|
121
138
|
{ extra = "openenv" },
|
|
122
139
|
{ group = "policy" },
|
|
123
140
|
],
|
|
141
|
+
[
|
|
142
|
+
{ extra = "nemogym" },
|
|
143
|
+
{ extra = "openenv" },
|
|
144
|
+
],
|
|
145
|
+
[
|
|
146
|
+
{ extra = "nemogym" },
|
|
147
|
+
{ group = "dev" },
|
|
148
|
+
],
|
|
149
|
+
[
|
|
150
|
+
{ extra = "nemogym" },
|
|
151
|
+
{ extra = "tasksets" },
|
|
152
|
+
],
|
|
153
|
+
[
|
|
154
|
+
{ extra = "nemogym" },
|
|
155
|
+
{ extra = "packages" },
|
|
156
|
+
],
|
|
157
|
+
[
|
|
158
|
+
{ extra = "ta" },
|
|
159
|
+
{ group = "policy" },
|
|
160
|
+
],
|
|
161
|
+
[
|
|
162
|
+
{ extra = "tasksets" },
|
|
163
|
+
{ group = "policy" },
|
|
164
|
+
],
|
|
165
|
+
[
|
|
166
|
+
{ extra = "packages" },
|
|
167
|
+
{ group = "policy" },
|
|
168
|
+
],
|
|
169
|
+
[
|
|
170
|
+
{ extra = "openreward" },
|
|
171
|
+
{ group = "policy" },
|
|
172
|
+
],
|
|
173
|
+
[
|
|
174
|
+
{ group = "dev" },
|
|
175
|
+
{ group = "policy" },
|
|
176
|
+
],
|
|
124
177
|
]
|
|
178
|
+
|
|
125
179
|
[[tool.uv.index]]
|
|
126
180
|
name = "pypi"
|
|
127
181
|
url = "https://pypi.org/simple"
|
|
128
182
|
default = true
|
|
129
183
|
|
|
184
|
+
[tool.uv.sources]
|
|
185
|
+
harnesses = { path = "packages/harnesses", editable = true }
|
|
186
|
+
tasksets = { path = "packages/tasksets", editable = true }
|
|
187
|
+
|
|
130
188
|
[tool.uv.exclude-newer-package]
|
|
131
189
|
# PrimeIntellect-published on PyPI (trusted publisher)
|
|
132
190
|
prime-tunnel = false
|
|
@@ -134,6 +192,8 @@ prime-sandboxes = false
|
|
|
134
192
|
prime-pydantic-config = false
|
|
135
193
|
renderers = false
|
|
136
194
|
openenv-core = false
|
|
195
|
+
harnesses = false
|
|
196
|
+
tasksets = false
|
|
137
197
|
|
|
138
198
|
[tool.uv.extra-build-dependencies]
|
|
139
199
|
flash-attn = [{ requirement = "torch", match-runtime = true }]
|
|
@@ -58,8 +58,8 @@ def test_load_endpoints_toml_groups_variants_by_endpoint_id(tmp_path: Path):
|
|
|
58
58
|
|
|
59
59
|
assert set(endpoints.keys()) == {"gpt-5-mini"}
|
|
60
60
|
assert len(endpoints["gpt-5-mini"]) == 2
|
|
61
|
-
assert endpoints["gpt-5-mini"][0]
|
|
62
|
-
assert endpoints["gpt-5-mini"][1]
|
|
61
|
+
assert endpoints["gpt-5-mini"][0].base_url == "https://api.pinference.ai/api/v1"
|
|
62
|
+
assert endpoints["gpt-5-mini"][1].base_url == "https://api.openai.com/v1"
|
|
63
63
|
|
|
64
64
|
|
|
65
65
|
def test_load_endpoints_toml_accepts_long_field_names(tmp_path: Path):
|
|
@@ -75,8 +75,8 @@ def test_load_endpoints_toml_accepts_long_field_names(tmp_path: Path):
|
|
|
75
75
|
|
|
76
76
|
endpoints = load_endpoints(str(registry_path))
|
|
77
77
|
|
|
78
|
-
assert endpoints["gpt-5-mini"][0]
|
|
79
|
-
assert endpoints["gpt-5-mini"][0]
|
|
78
|
+
assert endpoints["gpt-5-mini"][0].base_url == "https://api.pinference.ai/api/v1"
|
|
79
|
+
assert endpoints["gpt-5-mini"][0].api_key_var == "PRIME_API_KEY"
|
|
80
80
|
|
|
81
81
|
|
|
82
82
|
def test_load_endpoints_toml_accepts_matching_short_and_long_fields(tmp_path: Path):
|
|
@@ -94,8 +94,8 @@ def test_load_endpoints_toml_accepts_matching_short_and_long_fields(tmp_path: Pa
|
|
|
94
94
|
|
|
95
95
|
endpoints = load_endpoints(str(registry_path))
|
|
96
96
|
|
|
97
|
-
assert endpoints["gpt-5-mini"][0]
|
|
98
|
-
assert endpoints["gpt-5-mini"][0]
|
|
97
|
+
assert endpoints["gpt-5-mini"][0].base_url == "https://api.pinference.ai/api/v1"
|
|
98
|
+
assert endpoints["gpt-5-mini"][0].api_key_var == "PRIME_API_KEY"
|
|
99
99
|
|
|
100
100
|
|
|
101
101
|
def test_load_endpoints_toml_rejects_conflicting_url_fields(tmp_path: Path):
|
|
@@ -165,14 +165,10 @@ def test_load_endpoints_directory_uses_toml_and_warns_on_ignored_python(
|
|
|
165
165
|
def test_qwen3_vl_endpoint_ids_map_to_vl_models():
|
|
166
166
|
endpoints = load_endpoints("./configs/endpoints.toml")
|
|
167
167
|
|
|
168
|
-
assert endpoints["qwen3-vl-30b-i"][0]
|
|
169
|
-
assert endpoints["qwen3-vl-30b-t"][0]
|
|
170
|
-
assert
|
|
171
|
-
|
|
172
|
-
)
|
|
173
|
-
assert (
|
|
174
|
-
endpoints["qwen3-vl-235b-t"][0]["model"] == "qwen/qwen3-vl-235b-a22b-thinking"
|
|
175
|
-
)
|
|
168
|
+
assert endpoints["qwen3-vl-30b-i"][0].model == "qwen/qwen3-vl-30b-a3b-instruct"
|
|
169
|
+
assert endpoints["qwen3-vl-30b-t"][0].model == "qwen/qwen3-vl-30b-a3b-thinking"
|
|
170
|
+
assert endpoints["qwen3-vl-235b-i"][0].model == "qwen/qwen3-vl-235b-a22b-instruct"
|
|
171
|
+
assert endpoints["qwen3-vl-235b-t"][0].model == "qwen/qwen3-vl-235b-a22b-thinking"
|
|
176
172
|
|
|
177
173
|
|
|
178
174
|
def test_load_endpoints_toml_accepts_type_shorthand(tmp_path: Path):
|
|
@@ -189,7 +185,7 @@ def test_load_endpoints_toml_accepts_type_shorthand(tmp_path: Path):
|
|
|
189
185
|
|
|
190
186
|
endpoints = load_endpoints(str(registry_path))
|
|
191
187
|
|
|
192
|
-
assert endpoints["haiku"][0]
|
|
188
|
+
assert endpoints["haiku"][0].api_client_type == "anthropic_messages"
|
|
193
189
|
|
|
194
190
|
|
|
195
191
|
def test_load_endpoints_toml_accepts_openai_responses_type(tmp_path: Path):
|
|
@@ -206,7 +202,7 @@ def test_load_endpoints_toml_accepts_openai_responses_type(tmp_path: Path):
|
|
|
206
202
|
|
|
207
203
|
endpoints = load_endpoints(str(registry_path))
|
|
208
204
|
|
|
209
|
-
assert endpoints["gpt-responses"][0]
|
|
205
|
+
assert endpoints["gpt-responses"][0].api_client_type == "openai_responses"
|
|
210
206
|
|
|
211
207
|
|
|
212
208
|
def test_load_endpoints_toml_accepts_headers_table(tmp_path: Path):
|
|
@@ -223,7 +219,7 @@ def test_load_endpoints_toml_accepts_headers_table(tmp_path: Path):
|
|
|
223
219
|
|
|
224
220
|
endpoints = load_endpoints(str(registry_path))
|
|
225
221
|
|
|
226
|
-
assert endpoints["proxy"][0]
|
|
222
|
+
assert endpoints["proxy"][0].extra_headers == {"X-Custom": "v1"}
|
|
227
223
|
|
|
228
224
|
|
|
229
225
|
def test_load_endpoints_toml_accepts_extra_headers_alias(tmp_path: Path):
|
|
@@ -240,7 +236,7 @@ def test_load_endpoints_toml_accepts_extra_headers_alias(tmp_path: Path):
|
|
|
240
236
|
|
|
241
237
|
endpoints = load_endpoints(str(registry_path))
|
|
242
238
|
|
|
243
|
-
assert endpoints["proxy"][0]
|
|
239
|
+
assert endpoints["proxy"][0].extra_headers == {"X-A": "a"}
|
|
244
240
|
|
|
245
241
|
|
|
246
242
|
def test_load_endpoints_toml_rejects_headers_and_extra_headers_together(
|
|
@@ -158,6 +158,10 @@ def test_env(env_dir: Path, tmp_path_factory: pytest.TempPathFactory):
|
|
|
158
158
|
f"{repo_root.as_posix()} && "
|
|
159
159
|
"uv pip install "
|
|
160
160
|
"--exclude-newer-package prime-pydantic-config=2026-05-20T00:00:00Z "
|
|
161
|
+
f"{(repo_root / 'packages' / 'tasksets').as_posix()} "
|
|
162
|
+
f"{(repo_root / 'packages' / 'harnesses').as_posix()} && "
|
|
163
|
+
"uv pip install "
|
|
164
|
+
"--exclude-newer-package prime-pydantic-config=2026-05-20T00:00:00Z "
|
|
161
165
|
f"{env_dir.absolute().as_posix()}"
|
|
162
166
|
)
|
|
163
167
|
try:
|
|
@@ -11,7 +11,7 @@ import pytest
|
|
|
11
11
|
|
|
12
12
|
import verifiers.scripts.eval as vf_eval
|
|
13
13
|
import verifiers.utils.eval_utils
|
|
14
|
-
from verifiers.types import GenerateOutputs
|
|
14
|
+
from verifiers.types import EndpointConfig, GenerateOutputs
|
|
15
15
|
from verifiers.utils.eval_utils import load_toml_config
|
|
16
16
|
from verifiers.utils.path_utils import get_eval_results_path
|
|
17
17
|
from verifiers.utils.save_utils import states_to_outputs
|
|
@@ -21,6 +21,10 @@ def fail_load_endpoints(*_: object) -> dict:
|
|
|
21
21
|
raise AssertionError("load_endpoints should not be called")
|
|
22
22
|
|
|
23
23
|
|
|
24
|
+
def endpoint(**values: object) -> EndpointConfig:
|
|
25
|
+
return EndpointConfig.model_validate(values)
|
|
26
|
+
|
|
27
|
+
|
|
24
28
|
@pytest.fixture
|
|
25
29
|
def run_cli(make_metadata, make_state, make_input):
|
|
26
30
|
def _run_cli(
|
|
@@ -322,12 +326,12 @@ def test_cli_registry_headers_merged_with_eval_toml(tmp_path, monkeypatch, run_c
|
|
|
322
326
|
{"env_id_or_config": str(cfg)},
|
|
323
327
|
endpoints={
|
|
324
328
|
"gpt-5-mini": [
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
"
|
|
328
|
-
"
|
|
329
|
-
|
|
330
|
-
|
|
329
|
+
endpoint(
|
|
330
|
+
model="gpt-5-mini",
|
|
331
|
+
base_url="https://a.example/v1",
|
|
332
|
+
api_key_var="OPENAI_API_KEY",
|
|
333
|
+
extra_headers={"X-Reg": "r"},
|
|
334
|
+
)
|
|
331
335
|
]
|
|
332
336
|
},
|
|
333
337
|
)
|
|
@@ -350,18 +354,18 @@ def test_cli_multi_variant_preserves_per_row_registry_headers(monkeypatch, run_c
|
|
|
350
354
|
},
|
|
351
355
|
endpoints={
|
|
352
356
|
"gpt-5-mini": [
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
"
|
|
356
|
-
"
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
"
|
|
362
|
-
"
|
|
363
|
-
|
|
364
|
-
|
|
357
|
+
endpoint(
|
|
358
|
+
model="gpt-5-mini",
|
|
359
|
+
base_url="https://a.example/v1",
|
|
360
|
+
api_key_var="OPENAI_API_KEY",
|
|
361
|
+
extra_headers={"X-Row": "a"},
|
|
362
|
+
),
|
|
363
|
+
endpoint(
|
|
364
|
+
model="gpt-5-mini",
|
|
365
|
+
base_url="https://b.example/v1",
|
|
366
|
+
api_key_var="OPENAI_API_KEY",
|
|
367
|
+
extra_headers={"X-Row": "b"},
|
|
368
|
+
),
|
|
365
369
|
]
|
|
366
370
|
},
|
|
367
371
|
)
|
|
@@ -381,16 +385,16 @@ def test_cli_endpoint_alias_multi_variant_sets_multi_base_urls(monkeypatch, run_
|
|
|
381
385
|
},
|
|
382
386
|
endpoints={
|
|
383
387
|
"gpt-5-mini": [
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
"
|
|
387
|
-
"
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
"
|
|
392
|
-
"
|
|
393
|
-
|
|
388
|
+
endpoint(
|
|
389
|
+
model="gpt-5-mini",
|
|
390
|
+
base_url="https://a.example/v1",
|
|
391
|
+
api_key_var="OPENAI_API_KEY",
|
|
392
|
+
),
|
|
393
|
+
endpoint(
|
|
394
|
+
model="gpt-5-mini",
|
|
395
|
+
base_url="https://b.example/v1",
|
|
396
|
+
api_key_var="OPENAI_API_KEY",
|
|
397
|
+
),
|
|
394
398
|
]
|
|
395
399
|
},
|
|
396
400
|
)
|
|
@@ -417,11 +421,11 @@ def test_cli_model_flag_resolves_endpoint_alias_when_registry_present(
|
|
|
417
421
|
},
|
|
418
422
|
endpoints={
|
|
419
423
|
"gpt-4.1-mini": [
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
"
|
|
423
|
-
"
|
|
424
|
-
|
|
424
|
+
endpoint(
|
|
425
|
+
model="openai/gpt-4.1-mini",
|
|
426
|
+
base_url="https://alias.example/v1",
|
|
427
|
+
api_key_var="ALIAS_API_KEY",
|
|
428
|
+
)
|
|
425
429
|
]
|
|
426
430
|
},
|
|
427
431
|
)
|
|
@@ -443,12 +447,12 @@ def test_cli_model_flag_uses_endpoint_client_type_when_provided(monkeypatch, run
|
|
|
443
447
|
},
|
|
444
448
|
endpoints={
|
|
445
449
|
"haiku": [
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
"
|
|
449
|
-
"
|
|
450
|
-
|
|
451
|
-
|
|
450
|
+
endpoint(
|
|
451
|
+
model="claude-haiku-4-5",
|
|
452
|
+
base_url="https://api.anthropic.com",
|
|
453
|
+
api_key_var="ANTHROPIC_API_KEY",
|
|
454
|
+
api_client_type="anthropic_messages",
|
|
455
|
+
)
|
|
452
456
|
]
|
|
453
457
|
},
|
|
454
458
|
)
|
|
@@ -518,16 +522,16 @@ def test_cli_endpoint_alias_multi_variant_supports_mixed_keys(monkeypatch, run_c
|
|
|
518
522
|
},
|
|
519
523
|
endpoints={
|
|
520
524
|
"gpt-5-mini": [
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
"
|
|
524
|
-
"
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
"
|
|
529
|
-
"
|
|
530
|
-
|
|
525
|
+
endpoint(
|
|
526
|
+
model="gpt-5-mini",
|
|
527
|
+
base_url="https://a.example/v1",
|
|
528
|
+
api_key_var="PRIME_API_KEY",
|
|
529
|
+
),
|
|
530
|
+
endpoint(
|
|
531
|
+
model="gpt-5-mini",
|
|
532
|
+
base_url="https://b.example/v1",
|
|
533
|
+
api_key_var="OPENAI_API_KEY",
|
|
534
|
+
),
|
|
531
535
|
]
|
|
532
536
|
},
|
|
533
537
|
)
|
|
@@ -551,16 +555,16 @@ def test_cli_endpoint_id_resolves_registry_alias(monkeypatch, run_cli):
|
|
|
551
555
|
},
|
|
552
556
|
endpoints={
|
|
553
557
|
"gpt-5-mini": [
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
"
|
|
557
|
-
"
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
"
|
|
562
|
-
"
|
|
563
|
-
|
|
558
|
+
endpoint(
|
|
559
|
+
model="gpt-5-mini",
|
|
560
|
+
base_url="https://a.example/v1",
|
|
561
|
+
api_key_var="OPENAI_API_KEY",
|
|
562
|
+
),
|
|
563
|
+
endpoint(
|
|
564
|
+
model="gpt-5-mini",
|
|
565
|
+
base_url="https://b.example/v1",
|
|
566
|
+
api_key_var="OPENAI_API_KEY",
|
|
567
|
+
),
|
|
564
568
|
]
|
|
565
569
|
},
|
|
566
570
|
)
|
|
@@ -602,11 +606,11 @@ def test_cli_endpoint_id_accepts_directory_endpoints_path(monkeypatch, run_cli):
|
|
|
602
606
|
},
|
|
603
607
|
endpoints={
|
|
604
608
|
"gpt-5-mini": [
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
"
|
|
608
|
-
"
|
|
609
|
-
|
|
609
|
+
endpoint(
|
|
610
|
+
model="gpt-5-mini",
|
|
611
|
+
base_url="https://a.example/v1",
|
|
612
|
+
api_key_var="OPENAI_API_KEY",
|
|
613
|
+
)
|
|
610
614
|
]
|
|
611
615
|
},
|
|
612
616
|
)
|
|
@@ -647,11 +651,11 @@ def test_cli_endpoint_id_requires_toml_endpoints_path(monkeypatch, run_cli):
|
|
|
647
651
|
},
|
|
648
652
|
endpoints={
|
|
649
653
|
"gpt-5-mini": [
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
"
|
|
653
|
-
"
|
|
654
|
-
|
|
654
|
+
endpoint(
|
|
655
|
+
model="gpt-5-mini",
|
|
656
|
+
base_url="https://a.example/v1",
|
|
657
|
+
api_key_var="OPENAI_API_KEY",
|
|
658
|
+
)
|
|
655
659
|
]
|
|
656
660
|
},
|
|
657
661
|
)
|
|
@@ -11,14 +11,25 @@ from verifiers.scripts.gepa import (
|
|
|
11
11
|
load_gepa_toml_config,
|
|
12
12
|
resolve_gepa_config_args,
|
|
13
13
|
)
|
|
14
|
+
from verifiers.types import EndpointConfig
|
|
14
15
|
|
|
15
16
|
|
|
16
17
|
def test_gepa_extra_headers_from_group_requires_consistent_variants():
|
|
17
18
|
with pytest.raises(ValueError, match="different headers"):
|
|
18
19
|
_gepa_extra_headers_from_group(
|
|
19
20
|
[
|
|
20
|
-
|
|
21
|
-
|
|
21
|
+
EndpointConfig(
|
|
22
|
+
api_key_var="K",
|
|
23
|
+
base_url="https://a.example/v1",
|
|
24
|
+
model="m",
|
|
25
|
+
extra_headers={"X-A": "1"},
|
|
26
|
+
),
|
|
27
|
+
EndpointConfig(
|
|
28
|
+
api_key_var="K",
|
|
29
|
+
base_url="https://a.example/v1",
|
|
30
|
+
model="m",
|
|
31
|
+
extra_headers={"X-A": "2"},
|
|
32
|
+
),
|
|
22
33
|
],
|
|
23
34
|
"my-alias",
|
|
24
35
|
)
|
|
@@ -27,8 +38,18 @@ def test_gepa_extra_headers_from_group_requires_consistent_variants():
|
|
|
27
38
|
def test_gepa_extra_headers_from_group_returns_first_row_dict():
|
|
28
39
|
h = _gepa_extra_headers_from_group(
|
|
29
40
|
[
|
|
30
|
-
|
|
31
|
-
|
|
41
|
+
EndpointConfig(
|
|
42
|
+
api_key_var="K",
|
|
43
|
+
base_url="https://a.example/v1",
|
|
44
|
+
model="m",
|
|
45
|
+
extra_headers={"X-A": "x"},
|
|
46
|
+
),
|
|
47
|
+
EndpointConfig(
|
|
48
|
+
api_key_var="K",
|
|
49
|
+
base_url="https://a.example/v1",
|
|
50
|
+
model="m",
|
|
51
|
+
extra_headers={"X-A": "x"},
|
|
52
|
+
),
|
|
32
53
|
],
|
|
33
54
|
"my-alias",
|
|
34
55
|
)
|