verifiers 0.1.15.dev11__tar.gz → 0.1.15.dev12__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/.gitignore +0 -2
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/PKG-INFO +22 -9
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/README.md +8 -7
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/pyproject.toml +64 -4
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_endpoint_registry.py +14 -18
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_envs.py +4 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_eval_cli.py +74 -70
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_gepa_cli.py +25 -4
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_imports.py +28 -2
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_init_script.py +42 -6
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_langchain_deep_agents_wikispeedia.py +27 -9
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_mcp_search_env.py +2 -2
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_opencode_harbor.py +36 -18
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_renderer_client.py +282 -4
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_trajectory_processing.py +136 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_v1_bfcl.py +39 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_v1_config_extension.py +683 -510
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_v1_endpoint_protocols.py +7 -5
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_v1_harbor_cli.py +140 -94
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_v1_mini_swe_agent.py +9 -8
- verifiers-0.1.15.dev12/tests/test_v1_nemo_gym_harness.py +427 -0
- verifiers-0.1.15.dev12/tests/test_v1_openenv_taskset.py +236 -0
- verifiers-0.1.15.dev12/tests/test_v1_openreward_taskset.py +199 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_v1_rlm_swe.py +179 -97
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_v1_runtime_lifecycle.py +669 -114
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_v1_scoring_functions.py +29 -19
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_v1_taskset_bindings.py +44 -18
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_v1_textarena_taskset.py +44 -37
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_wordle_v1_env.py +71 -27
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/__init__.py +55 -17
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/clients/client.py +4 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/clients/renderer_client.py +111 -27
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/harnesses/mini_swe_agent.py +8 -62
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/tasksets/lean/lean_task.py +12 -5
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/scripts/eval.py +11 -11
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/scripts/gepa.py +21 -15
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/scripts/init.py +26 -34
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/types.py +207 -28
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/utils/env_utils.py +6 -5
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/utils/eval_utils.py +14 -10
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/utils/interception_utils.py +4 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/utils/response_utils.py +42 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/v1/ENVIRONMENT_BEST_PRACTICES.md +22 -18
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/v1/README.md +224 -182
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/v1/RE_MIGRATION.md +56 -45
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/v1/__init__.py +41 -21
- verifiers-0.1.15.dev12/verifiers/v1/artifact.py +86 -0
- verifiers-0.1.15.dev12/verifiers/v1/config.py +127 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/v1/env.py +55 -39
- verifiers-0.1.15.dev12/verifiers/v1/harness.py +632 -0
- verifiers-0.1.15.dev12/verifiers/v1/model.py +51 -0
- verifiers-0.1.15.dev12/verifiers/v1/program.py +303 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/v1/runtime.py +1076 -616
- verifiers-0.1.15.dev12/verifiers/v1/runtime_handles.py +51 -0
- verifiers-0.1.15.dev12/verifiers/v1/sandbox.py +41 -0
- verifiers-0.1.15.dev12/verifiers/v1/task.py +150 -0
- verifiers-0.1.15.dev12/verifiers/v1/taskset.py +146 -0
- verifiers-0.1.15.dev12/verifiers/v1/toolset.py +292 -0
- verifiers-0.1.15.dev12/verifiers/v1/types.py +54 -0
- verifiers-0.1.15.dev12/verifiers/v1/user.py +132 -0
- verifiers-0.1.15.dev12/verifiers/v1/utils/binding_utils.py +323 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/v1/utils/config_callable_utils.py +7 -7
- verifiers-0.1.15.dev12/verifiers/v1/utils/config_utils.py +296 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/v1/utils/endpoint_utils.py +103 -59
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/v1/utils/judge_utils.py +19 -16
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/v1/utils/lifecycle_utils.py +22 -15
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/v1/utils/mcp_proxy_utils.py +18 -23
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/v1/utils/mcp_utils.py +4 -4
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/v1/utils/object_utils.py +13 -11
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/v1/utils/program_utils.py +106 -81
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/v1/utils/prompt_utils.py +58 -80
- verifiers-0.1.15.dev12/verifiers/v1/utils/runtime_owner_utils.py +131 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/v1/utils/runtime_registry.py +3 -7
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/v1/utils/sandbox_program_utils.py +28 -29
- verifiers-0.1.15.dev12/verifiers/v1/utils/sandbox_python_utils.py +99 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/v1/utils/sandbox_utils.py +270 -215
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/v1/utils/scoring_utils.py +45 -45
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/v1/utils/serialization_utils.py +1 -4
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/v1/utils/task_freeze_utils.py +2 -2
- verifiers-0.1.15.dev12/verifiers/v1/utils/taskset_utils.py +144 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/v1/utils/tool_utils.py +24 -20
- verifiers-0.1.15.dev12/verifiers/v1/utils/toolset_utils.py +217 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/v1/utils/trajectory_utils.py +16 -18
- verifiers-0.1.15.dev11/verifiers/v1/config.py +0 -385
- verifiers-0.1.15.dev11/verifiers/v1/harness.py +0 -536
- verifiers-0.1.15.dev11/verifiers/v1/packages/__init__.py +0 -1
- verifiers-0.1.15.dev11/verifiers/v1/packages/harnesses/__init__.py +0 -25
- verifiers-0.1.15.dev11/verifiers/v1/packages/harnesses/command.py +0 -160
- verifiers-0.1.15.dev11/verifiers/v1/packages/harnesses/configs.py +0 -168
- verifiers-0.1.15.dev11/verifiers/v1/packages/harnesses/mini_swe_agent.py +0 -219
- verifiers-0.1.15.dev11/verifiers/v1/packages/harnesses/opencode.py +0 -234
- verifiers-0.1.15.dev11/verifiers/v1/packages/harnesses/pi.py +0 -195
- verifiers-0.1.15.dev11/verifiers/v1/packages/harnesses/rlm.py +0 -601
- verifiers-0.1.15.dev11/verifiers/v1/packages/harnesses/terminus_2.py +0 -252
- verifiers-0.1.15.dev11/verifiers/v1/packages/tasksets/__init__.py +0 -17
- verifiers-0.1.15.dev11/verifiers/v1/packages/tasksets/harbor.py +0 -412
- verifiers-0.1.15.dev11/verifiers/v1/packages/tasksets/textarena.py +0 -177
- verifiers-0.1.15.dev11/verifiers/v1/task.py +0 -94
- verifiers-0.1.15.dev11/verifiers/v1/taskset.py +0 -207
- verifiers-0.1.15.dev11/verifiers/v1/toolset.py +0 -414
- verifiers-0.1.15.dev11/verifiers/v1/types.py +0 -55
- verifiers-0.1.15.dev11/verifiers/v1/user.py +0 -96
- verifiers-0.1.15.dev11/verifiers/v1/utils/artifact_utils.py +0 -29
- verifiers-0.1.15.dev11/verifiers/v1/utils/binding_utils.py +0 -218
- verifiers-0.1.15.dev11/verifiers/v1/utils/config_utils.py +0 -168
- verifiers-0.1.15.dev11/verifiers/v1/utils/runtime_owner_utils.py +0 -105
- verifiers-0.1.15.dev11/verifiers/v1/utils/taskset_registry_utils.py +0 -115
- verifiers-0.1.15.dev11/verifiers/v1/utils/taskset_utils.py +0 -78
- verifiers-0.1.15.dev11/verifiers/v1/utils/timing_utils.py +0 -119
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/LICENSE +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/AGENTS.md +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/README.md +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/__init__.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/conftest.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_browser_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_build_script.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_cli_agent_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_client_auth_errors.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_client_config.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_client_multimodal_types.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_composable_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_context_token_metrics.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_decorator_ranks.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_env_group.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_env_server.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_environment.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_environment_extra.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_error_chain.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_eval_display.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_eval_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_gepa_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_gym_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_harbor_env_mcp.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_install_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_interception_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_lean_task.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_logging.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_math_rubric.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_maybe_think_parser.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_message_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_message_utils_multimodal.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_multiturn_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_nemorl_client.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_openai_chat_completions_token_client.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_openai_responses_client.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_opencode_rlm_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_openenv_client.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_parser.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_path_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_per_turn_timing.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_pricing_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_prime_plugin.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_renderer_e2e.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_rlm_composable_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_rlm_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_rubric.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_rubric_group.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_sandbox_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_sandbox_mixin.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_save_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_setup_script.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_singleturn_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_stateful_tool_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_think_parser.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_tool_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_tool_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_tui_info_formatting.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_types.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_v1_empty_completions.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_v1_example_counts.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_v1_group_reward_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_wiki_search_v1.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_wordle_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/tests/test_xml_parser.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/AGENTS.md +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/cli/__init__.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/cli/commands/__init__.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/cli/commands/build.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/cli/commands/eval.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/cli/commands/gepa.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/cli/commands/init.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/cli/commands/install.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/cli/commands/setup.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/cli/plugins/__init__.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/cli/plugins/prime.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/cli/tui.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/clients/__init__.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/clients/anthropic_messages_client.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/clients/nemorl_chat_completions_client.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/clients/openai_chat_completions_client.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/clients/openai_chat_completions_token_client.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/clients/openai_completions_client.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/clients/openai_responses_client.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/decorators.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/AGENTS.md +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/__init__.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/env_group.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/environment.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/README.md +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/__init__.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/cli_agent_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/README.md +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/__init__.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/_filter.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/composable_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/harness.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/harnesses/__init__.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/harnesses/opencode.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/harnesses/prompt.txt +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/harnesses/rlm.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/swe_debug_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/task.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/tasksets/__init__.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/tasksets/cp/__init__.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/tasksets/cp/cp_task.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/tasksets/cp/test_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/tasksets/harbor/__init__.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/tasksets/harbor/harbor.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/tasksets/lean/__init__.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/tasksets/math/__init__.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/tasksets/math/math_task.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/tasksets/swe/__init__.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/tasksets/swe/_test_patch.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/tasksets/swe/create_fix_patch.sh +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/tasksets/swe/log_parser.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/tasksets/swe/multi_swe.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/tasksets/swe/openswe.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/tasksets/swe/r2e_gym.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/tasksets/swe/swe_bench.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/tasksets/swe/swe_lego.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/tasksets/swe/swe_rebench_v2.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/tasksets/swe/swe_rebench_v2_log_parsers.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/tasksets/swe/swe_smith.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/composable/tasksets/swe/swe_tasksets.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/gym_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/harbor_env/__init__.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/harbor_env/env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/harbor_env/mcp.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/mcp_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/opencode_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/opencode_qa_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/opencode_rlm_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/rlm_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/sandbox_mixin.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/utils/__init__.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/utils/file_locks.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/experimental/utils/git_checkout_cache.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/integrations/README.md +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/integrations/__init__.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/integrations/browser_env/README.md +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/integrations/browser_env/__init__.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/integrations/browser_env/browser_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/integrations/browser_env/modes/__init__.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/integrations/browser_env/modes/base.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/integrations/browser_env/modes/cua_mode.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/integrations/browser_env/modes/dom_mode.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/integrations/openenv_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/integrations/reasoninggym_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/integrations/textarena_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/multiturn_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/python_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/sandbox_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/singleturn_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/stateful_tool_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/envs/tool_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/errors.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/gepa/__init__.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/gepa/adapter.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/gepa/config.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/gepa/display.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/gepa/gepa_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/parsers/__init__.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/parsers/maybe_think_parser.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/parsers/parser.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/parsers/think_parser.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/parsers/xml_parser.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/rl/README.md +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/rl/__init__.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/rl/inference/__init__.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/rl/inference/client.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/rl/inference/server.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/rl/trainer/__init__.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/rl/trainer/config.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/rl/trainer/orchestrator.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/rl/trainer/trainer.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/rl/trainer/utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/rubrics/__init__.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/rubrics/experimental/hybrid_math_rubric.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/rubrics/judge_rubric.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/rubrics/math_rubric.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/rubrics/rubric.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/rubrics/rubric_group.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/scripts/__init__.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/scripts/build.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/scripts/install.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/scripts/rl.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/scripts/setup.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/scripts/train.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/scripts/tui.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/scripts/vllm.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/serve/__init__.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/serve/client/env_client.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/serve/client/zmq_env_client.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/serve/server/__init__.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/serve/server/env_router.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/serve/server/env_server.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/serve/server/env_worker.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/serve/server/zmq_env_server.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/serve/types.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/utils/__init__.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/utils/async_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/utils/client_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/utils/config_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/utils/data_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/utils/display_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/utils/env_config_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/utils/error_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/utils/eval_display.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/utils/heartbeat.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/utils/import_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/utils/install_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/utils/logging_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/utils/message_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/utils/metric_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/utils/path_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/utils/pricing_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/utils/process_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/utils/save_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/utils/serve_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/utils/thread_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/utils/threaded_sandbox_client.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/utils/tool_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/utils/usage_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/utils/version_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/v1/state.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/v1/utils/__init__.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/v1/utils/json_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev12}/verifiers/v1/utils/usage_utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: verifiers
|
|
3
|
-
Version: 0.1.15.
|
|
3
|
+
Version: 0.1.15.dev12
|
|
4
4
|
Summary: Verifiers: Environments for LLM Reinforcement Learning
|
|
5
5
|
Project-URL: Homepage, https://github.com/primeintellect-ai/verifiers
|
|
6
6
|
Project-URL: Documentation, https://github.com/primeintellect-ai/verifiers
|
|
@@ -53,8 +53,18 @@ Provides-Extra: browser
|
|
|
53
53
|
Requires-Dist: aiohttp>=3.9.0; extra == 'browser'
|
|
54
54
|
Requires-Dist: python-dotenv>=1.0.0; extra == 'browser'
|
|
55
55
|
Requires-Dist: stagehand>=3.0.0; extra == 'browser'
|
|
56
|
+
Provides-Extra: harnesses
|
|
57
|
+
Requires-Dist: harnesses>=0.1.1; extra == 'harnesses'
|
|
58
|
+
Provides-Extra: nemogym
|
|
59
|
+
Requires-Dist: harnesses[nemogym]>=0.1.1; extra == 'nemogym'
|
|
60
|
+
Requires-Dist: tasksets[nemogym]>=0.1.1; extra == 'nemogym'
|
|
56
61
|
Provides-Extra: openenv
|
|
57
|
-
Requires-Dist: openenv
|
|
62
|
+
Requires-Dist: tasksets[openenv]>=0.1.1; extra == 'openenv'
|
|
63
|
+
Provides-Extra: openreward
|
|
64
|
+
Requires-Dist: tasksets[openreward]>=0.1.1; extra == 'openreward'
|
|
65
|
+
Provides-Extra: packages
|
|
66
|
+
Requires-Dist: harnesses>=0.1.1; extra == 'packages'
|
|
67
|
+
Requires-Dist: tasksets[openenv,openreward,ta]>=0.1.1; extra == 'packages'
|
|
58
68
|
Provides-Extra: renderers
|
|
59
69
|
Requires-Dist: renderers>=0.1.8.dev28; extra == 'renderers'
|
|
60
70
|
Provides-Extra: rg
|
|
@@ -73,6 +83,8 @@ Requires-Dist: wandb; extra == 'rl'
|
|
|
73
83
|
Provides-Extra: ta
|
|
74
84
|
Requires-Dist: nltk; extra == 'ta'
|
|
75
85
|
Requires-Dist: textarena; extra == 'ta'
|
|
86
|
+
Provides-Extra: tasksets
|
|
87
|
+
Requires-Dist: tasksets[openenv,openreward,ta]>=0.1.1; extra == 'tasksets'
|
|
76
88
|
Description-Content-Type: text/markdown
|
|
77
89
|
|
|
78
90
|
<p align="center">
|
|
@@ -226,7 +238,7 @@ class MyTasksetConfig(vf.TasksetConfig):
|
|
|
226
238
|
|
|
227
239
|
|
|
228
240
|
class MyTaskset(vf.Taskset[MyTasksetConfig]):
|
|
229
|
-
def load_tasks(self) -> vf.Tasks:
|
|
241
|
+
def load_tasks(self, split: vf.TaskSplit = "train") -> vf.Tasks:
|
|
230
242
|
rows = [
|
|
231
243
|
{
|
|
232
244
|
"prompt": [{"role": "user", "content": "Reverse abc."}],
|
|
@@ -251,16 +263,17 @@ def load_environment(config: vf.EnvConfig) -> vf.Env:
|
|
|
251
263
|
```
|
|
252
264
|
If no harness is passed, `vf.Env` uses the base endpoint-backed harness. See
|
|
253
265
|
**[BYO Harness](docs/byo-harness.md)** for the advanced v1 taskset/harness API.
|
|
254
|
-
Reusable taskset and harness packages live
|
|
255
|
-
|
|
256
|
-
|
|
266
|
+
Reusable taskset and harness packages live in `tasksets` and `harnesses`.
|
|
267
|
+
Install them with `uv add "verifiers[packages]"`, or with the narrower
|
|
268
|
+
`verifiers[tasksets]` and `verifiers[harnesses]` extras. For example, Harbor
|
|
269
|
+
task directories can run through the bundled OpenCode CLI harness with:
|
|
257
270
|
|
|
258
271
|
```python
|
|
259
|
-
from
|
|
260
|
-
from
|
|
272
|
+
from harnesses import OpenCode, OpenCodeConfig
|
|
273
|
+
from tasksets import HarborTaskset, HarborTasksetConfig
|
|
261
274
|
|
|
262
275
|
env = vf.Env(
|
|
263
|
-
taskset=HarborTaskset(config=HarborTasksetConfig()),
|
|
276
|
+
taskset=HarborTaskset(config=HarborTasksetConfig(bundle_package=__name__)),
|
|
264
277
|
harness=OpenCode(config=OpenCodeConfig()),
|
|
265
278
|
)
|
|
266
279
|
```
|
|
@@ -149,7 +149,7 @@ class MyTasksetConfig(vf.TasksetConfig):
|
|
|
149
149
|
|
|
150
150
|
|
|
151
151
|
class MyTaskset(vf.Taskset[MyTasksetConfig]):
|
|
152
|
-
def load_tasks(self) -> vf.Tasks:
|
|
152
|
+
def load_tasks(self, split: vf.TaskSplit = "train") -> vf.Tasks:
|
|
153
153
|
rows = [
|
|
154
154
|
{
|
|
155
155
|
"prompt": [{"role": "user", "content": "Reverse abc."}],
|
|
@@ -174,16 +174,17 @@ def load_environment(config: vf.EnvConfig) -> vf.Env:
|
|
|
174
174
|
```
|
|
175
175
|
If no harness is passed, `vf.Env` uses the base endpoint-backed harness. See
|
|
176
176
|
**[BYO Harness](docs/byo-harness.md)** for the advanced v1 taskset/harness API.
|
|
177
|
-
Reusable taskset and harness packages live
|
|
178
|
-
|
|
179
|
-
|
|
177
|
+
Reusable taskset and harness packages live in `tasksets` and `harnesses`.
|
|
178
|
+
Install them with `uv add "verifiers[packages]"`, or with the narrower
|
|
179
|
+
`verifiers[tasksets]` and `verifiers[harnesses]` extras. For example, Harbor
|
|
180
|
+
task directories can run through the bundled OpenCode CLI harness with:
|
|
180
181
|
|
|
181
182
|
```python
|
|
182
|
-
from
|
|
183
|
-
from
|
|
183
|
+
from harnesses import OpenCode, OpenCodeConfig
|
|
184
|
+
from tasksets import HarborTaskset, HarborTasksetConfig
|
|
184
185
|
|
|
185
186
|
env = vf.Env(
|
|
186
|
-
taskset=HarborTaskset(config=HarborTasksetConfig()),
|
|
187
|
+
taskset=HarborTaskset(config=HarborTasksetConfig(bundle_package=__name__)),
|
|
187
188
|
harness=OpenCode(config=OpenCodeConfig()),
|
|
188
189
|
)
|
|
189
190
|
```
|
|
@@ -69,12 +69,12 @@ dev = [
|
|
|
69
69
|
"ipykernel",
|
|
70
70
|
"ipywidgets",
|
|
71
71
|
"reasoning-gym",
|
|
72
|
-
"textarena",
|
|
73
72
|
"stagehand>=3.0.0",
|
|
74
73
|
"aiohttp>=3.9.0",
|
|
75
74
|
"python-dotenv>=1.0.0",
|
|
76
|
-
"
|
|
75
|
+
"harnesses",
|
|
77
76
|
"renderers>=0.1.8.dev28",
|
|
77
|
+
"tasksets[openenv,openreward,ta]",
|
|
78
78
|
]
|
|
79
79
|
policy = [
|
|
80
80
|
"semgrep>=1.150.0",
|
|
@@ -85,8 +85,18 @@ rg = [
|
|
|
85
85
|
"reasoning-gym",
|
|
86
86
|
]
|
|
87
87
|
ta = [
|
|
88
|
-
"textarena",
|
|
89
88
|
"nltk",
|
|
89
|
+
"textarena",
|
|
90
|
+
]
|
|
91
|
+
tasksets = [
|
|
92
|
+
"tasksets[openenv,openreward,ta]>=0.1.1",
|
|
93
|
+
]
|
|
94
|
+
harnesses = [
|
|
95
|
+
"harnesses>=0.1.1",
|
|
96
|
+
]
|
|
97
|
+
packages = [
|
|
98
|
+
"harnesses>=0.1.1",
|
|
99
|
+
"tasksets[openenv,openreward,ta]>=0.1.1",
|
|
90
100
|
]
|
|
91
101
|
browser = [
|
|
92
102
|
"stagehand>=3.0.0",
|
|
@@ -94,7 +104,14 @@ browser = [
|
|
|
94
104
|
"python-dotenv>=1.0.0",
|
|
95
105
|
]
|
|
96
106
|
openenv = [
|
|
97
|
-
"openenv
|
|
107
|
+
"tasksets[openenv]>=0.1.1",
|
|
108
|
+
]
|
|
109
|
+
openreward = [
|
|
110
|
+
"tasksets[openreward]>=0.1.1",
|
|
111
|
+
]
|
|
112
|
+
nemogym = [
|
|
113
|
+
"harnesses[nemogym]>=0.1.1",
|
|
114
|
+
"tasksets[nemogym]>=0.1.1",
|
|
98
115
|
]
|
|
99
116
|
renderers = [
|
|
100
117
|
"renderers>=0.1.8.dev28",
|
|
@@ -121,12 +138,53 @@ conflicts = [
|
|
|
121
138
|
{ extra = "openenv" },
|
|
122
139
|
{ group = "policy" },
|
|
123
140
|
],
|
|
141
|
+
[
|
|
142
|
+
{ extra = "nemogym" },
|
|
143
|
+
{ extra = "openenv" },
|
|
144
|
+
],
|
|
145
|
+
[
|
|
146
|
+
{ extra = "nemogym" },
|
|
147
|
+
{ group = "dev" },
|
|
148
|
+
],
|
|
149
|
+
[
|
|
150
|
+
{ extra = "nemogym" },
|
|
151
|
+
{ extra = "tasksets" },
|
|
152
|
+
],
|
|
153
|
+
[
|
|
154
|
+
{ extra = "nemogym" },
|
|
155
|
+
{ extra = "packages" },
|
|
156
|
+
],
|
|
157
|
+
[
|
|
158
|
+
{ extra = "ta" },
|
|
159
|
+
{ group = "policy" },
|
|
160
|
+
],
|
|
161
|
+
[
|
|
162
|
+
{ extra = "tasksets" },
|
|
163
|
+
{ group = "policy" },
|
|
164
|
+
],
|
|
165
|
+
[
|
|
166
|
+
{ extra = "packages" },
|
|
167
|
+
{ group = "policy" },
|
|
168
|
+
],
|
|
169
|
+
[
|
|
170
|
+
{ extra = "openreward" },
|
|
171
|
+
{ group = "policy" },
|
|
172
|
+
],
|
|
173
|
+
[
|
|
174
|
+
{ group = "dev" },
|
|
175
|
+
{ group = "policy" },
|
|
176
|
+
],
|
|
124
177
|
]
|
|
178
|
+
|
|
125
179
|
[[tool.uv.index]]
|
|
126
180
|
name = "pypi"
|
|
127
181
|
url = "https://pypi.org/simple"
|
|
128
182
|
default = true
|
|
129
183
|
|
|
184
|
+
[tool.uv.sources]
|
|
185
|
+
harnesses = { path = "packages/harnesses", editable = true }
|
|
186
|
+
tasksets = { path = "packages/tasksets", editable = true }
|
|
187
|
+
|
|
130
188
|
[tool.uv.exclude-newer-package]
|
|
131
189
|
# PrimeIntellect-published on PyPI (trusted publisher)
|
|
132
190
|
prime-tunnel = false
|
|
@@ -134,6 +192,8 @@ prime-sandboxes = false
|
|
|
134
192
|
prime-pydantic-config = false
|
|
135
193
|
renderers = false
|
|
136
194
|
openenv-core = false
|
|
195
|
+
harnesses = false
|
|
196
|
+
tasksets = false
|
|
137
197
|
|
|
138
198
|
[tool.uv.extra-build-dependencies]
|
|
139
199
|
flash-attn = [{ requirement = "torch", match-runtime = true }]
|
|
@@ -58,8 +58,8 @@ def test_load_endpoints_toml_groups_variants_by_endpoint_id(tmp_path: Path):
|
|
|
58
58
|
|
|
59
59
|
assert set(endpoints.keys()) == {"gpt-5-mini"}
|
|
60
60
|
assert len(endpoints["gpt-5-mini"]) == 2
|
|
61
|
-
assert endpoints["gpt-5-mini"][0]
|
|
62
|
-
assert endpoints["gpt-5-mini"][1]
|
|
61
|
+
assert endpoints["gpt-5-mini"][0].base_url == "https://api.pinference.ai/api/v1"
|
|
62
|
+
assert endpoints["gpt-5-mini"][1].base_url == "https://api.openai.com/v1"
|
|
63
63
|
|
|
64
64
|
|
|
65
65
|
def test_load_endpoints_toml_accepts_long_field_names(tmp_path: Path):
|
|
@@ -75,8 +75,8 @@ def test_load_endpoints_toml_accepts_long_field_names(tmp_path: Path):
|
|
|
75
75
|
|
|
76
76
|
endpoints = load_endpoints(str(registry_path))
|
|
77
77
|
|
|
78
|
-
assert endpoints["gpt-5-mini"][0]
|
|
79
|
-
assert endpoints["gpt-5-mini"][0]
|
|
78
|
+
assert endpoints["gpt-5-mini"][0].base_url == "https://api.pinference.ai/api/v1"
|
|
79
|
+
assert endpoints["gpt-5-mini"][0].api_key_var == "PRIME_API_KEY"
|
|
80
80
|
|
|
81
81
|
|
|
82
82
|
def test_load_endpoints_toml_accepts_matching_short_and_long_fields(tmp_path: Path):
|
|
@@ -94,8 +94,8 @@ def test_load_endpoints_toml_accepts_matching_short_and_long_fields(tmp_path: Pa
|
|
|
94
94
|
|
|
95
95
|
endpoints = load_endpoints(str(registry_path))
|
|
96
96
|
|
|
97
|
-
assert endpoints["gpt-5-mini"][0]
|
|
98
|
-
assert endpoints["gpt-5-mini"][0]
|
|
97
|
+
assert endpoints["gpt-5-mini"][0].base_url == "https://api.pinference.ai/api/v1"
|
|
98
|
+
assert endpoints["gpt-5-mini"][0].api_key_var == "PRIME_API_KEY"
|
|
99
99
|
|
|
100
100
|
|
|
101
101
|
def test_load_endpoints_toml_rejects_conflicting_url_fields(tmp_path: Path):
|
|
@@ -165,14 +165,10 @@ def test_load_endpoints_directory_uses_toml_and_warns_on_ignored_python(
|
|
|
165
165
|
def test_qwen3_vl_endpoint_ids_map_to_vl_models():
|
|
166
166
|
endpoints = load_endpoints("./configs/endpoints.toml")
|
|
167
167
|
|
|
168
|
-
assert endpoints["qwen3-vl-30b-i"][0]
|
|
169
|
-
assert endpoints["qwen3-vl-30b-t"][0]
|
|
170
|
-
assert
|
|
171
|
-
|
|
172
|
-
)
|
|
173
|
-
assert (
|
|
174
|
-
endpoints["qwen3-vl-235b-t"][0]["model"] == "qwen/qwen3-vl-235b-a22b-thinking"
|
|
175
|
-
)
|
|
168
|
+
assert endpoints["qwen3-vl-30b-i"][0].model == "qwen/qwen3-vl-30b-a3b-instruct"
|
|
169
|
+
assert endpoints["qwen3-vl-30b-t"][0].model == "qwen/qwen3-vl-30b-a3b-thinking"
|
|
170
|
+
assert endpoints["qwen3-vl-235b-i"][0].model == "qwen/qwen3-vl-235b-a22b-instruct"
|
|
171
|
+
assert endpoints["qwen3-vl-235b-t"][0].model == "qwen/qwen3-vl-235b-a22b-thinking"
|
|
176
172
|
|
|
177
173
|
|
|
178
174
|
def test_load_endpoints_toml_accepts_type_shorthand(tmp_path: Path):
|
|
@@ -189,7 +185,7 @@ def test_load_endpoints_toml_accepts_type_shorthand(tmp_path: Path):
|
|
|
189
185
|
|
|
190
186
|
endpoints = load_endpoints(str(registry_path))
|
|
191
187
|
|
|
192
|
-
assert endpoints["haiku"][0]
|
|
188
|
+
assert endpoints["haiku"][0].api_client_type == "anthropic_messages"
|
|
193
189
|
|
|
194
190
|
|
|
195
191
|
def test_load_endpoints_toml_accepts_openai_responses_type(tmp_path: Path):
|
|
@@ -206,7 +202,7 @@ def test_load_endpoints_toml_accepts_openai_responses_type(tmp_path: Path):
|
|
|
206
202
|
|
|
207
203
|
endpoints = load_endpoints(str(registry_path))
|
|
208
204
|
|
|
209
|
-
assert endpoints["gpt-responses"][0]
|
|
205
|
+
assert endpoints["gpt-responses"][0].api_client_type == "openai_responses"
|
|
210
206
|
|
|
211
207
|
|
|
212
208
|
def test_load_endpoints_toml_accepts_headers_table(tmp_path: Path):
|
|
@@ -223,7 +219,7 @@ def test_load_endpoints_toml_accepts_headers_table(tmp_path: Path):
|
|
|
223
219
|
|
|
224
220
|
endpoints = load_endpoints(str(registry_path))
|
|
225
221
|
|
|
226
|
-
assert endpoints["proxy"][0]
|
|
222
|
+
assert endpoints["proxy"][0].extra_headers == {"X-Custom": "v1"}
|
|
227
223
|
|
|
228
224
|
|
|
229
225
|
def test_load_endpoints_toml_accepts_extra_headers_alias(tmp_path: Path):
|
|
@@ -240,7 +236,7 @@ def test_load_endpoints_toml_accepts_extra_headers_alias(tmp_path: Path):
|
|
|
240
236
|
|
|
241
237
|
endpoints = load_endpoints(str(registry_path))
|
|
242
238
|
|
|
243
|
-
assert endpoints["proxy"][0]
|
|
239
|
+
assert endpoints["proxy"][0].extra_headers == {"X-A": "a"}
|
|
244
240
|
|
|
245
241
|
|
|
246
242
|
def test_load_endpoints_toml_rejects_headers_and_extra_headers_together(
|
|
@@ -158,6 +158,10 @@ def test_env(env_dir: Path, tmp_path_factory: pytest.TempPathFactory):
|
|
|
158
158
|
f"{repo_root.as_posix()} && "
|
|
159
159
|
"uv pip install "
|
|
160
160
|
"--exclude-newer-package prime-pydantic-config=2026-05-20T00:00:00Z "
|
|
161
|
+
f"{(repo_root / 'packages' / 'tasksets').as_posix()} "
|
|
162
|
+
f"{(repo_root / 'packages' / 'harnesses').as_posix()} && "
|
|
163
|
+
"uv pip install "
|
|
164
|
+
"--exclude-newer-package prime-pydantic-config=2026-05-20T00:00:00Z "
|
|
161
165
|
f"{env_dir.absolute().as_posix()}"
|
|
162
166
|
)
|
|
163
167
|
try:
|
|
@@ -11,7 +11,7 @@ import pytest
|
|
|
11
11
|
|
|
12
12
|
import verifiers.scripts.eval as vf_eval
|
|
13
13
|
import verifiers.utils.eval_utils
|
|
14
|
-
from verifiers.types import GenerateOutputs
|
|
14
|
+
from verifiers.types import EndpointConfig, GenerateOutputs
|
|
15
15
|
from verifiers.utils.eval_utils import load_toml_config
|
|
16
16
|
from verifiers.utils.path_utils import get_eval_results_path
|
|
17
17
|
from verifiers.utils.save_utils import states_to_outputs
|
|
@@ -21,6 +21,10 @@ def fail_load_endpoints(*_: object) -> dict:
|
|
|
21
21
|
raise AssertionError("load_endpoints should not be called")
|
|
22
22
|
|
|
23
23
|
|
|
24
|
+
def endpoint(**values: object) -> EndpointConfig:
|
|
25
|
+
return EndpointConfig.model_validate(values)
|
|
26
|
+
|
|
27
|
+
|
|
24
28
|
@pytest.fixture
|
|
25
29
|
def run_cli(make_metadata, make_state, make_input):
|
|
26
30
|
def _run_cli(
|
|
@@ -322,12 +326,12 @@ def test_cli_registry_headers_merged_with_eval_toml(tmp_path, monkeypatch, run_c
|
|
|
322
326
|
{"env_id_or_config": str(cfg)},
|
|
323
327
|
endpoints={
|
|
324
328
|
"gpt-5-mini": [
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
"
|
|
328
|
-
"
|
|
329
|
-
|
|
330
|
-
|
|
329
|
+
endpoint(
|
|
330
|
+
model="gpt-5-mini",
|
|
331
|
+
base_url="https://a.example/v1",
|
|
332
|
+
api_key_var="OPENAI_API_KEY",
|
|
333
|
+
extra_headers={"X-Reg": "r"},
|
|
334
|
+
)
|
|
331
335
|
]
|
|
332
336
|
},
|
|
333
337
|
)
|
|
@@ -350,18 +354,18 @@ def test_cli_multi_variant_preserves_per_row_registry_headers(monkeypatch, run_c
|
|
|
350
354
|
},
|
|
351
355
|
endpoints={
|
|
352
356
|
"gpt-5-mini": [
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
"
|
|
356
|
-
"
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
"
|
|
362
|
-
"
|
|
363
|
-
|
|
364
|
-
|
|
357
|
+
endpoint(
|
|
358
|
+
model="gpt-5-mini",
|
|
359
|
+
base_url="https://a.example/v1",
|
|
360
|
+
api_key_var="OPENAI_API_KEY",
|
|
361
|
+
extra_headers={"X-Row": "a"},
|
|
362
|
+
),
|
|
363
|
+
endpoint(
|
|
364
|
+
model="gpt-5-mini",
|
|
365
|
+
base_url="https://b.example/v1",
|
|
366
|
+
api_key_var="OPENAI_API_KEY",
|
|
367
|
+
extra_headers={"X-Row": "b"},
|
|
368
|
+
),
|
|
365
369
|
]
|
|
366
370
|
},
|
|
367
371
|
)
|
|
@@ -381,16 +385,16 @@ def test_cli_endpoint_alias_multi_variant_sets_multi_base_urls(monkeypatch, run_
|
|
|
381
385
|
},
|
|
382
386
|
endpoints={
|
|
383
387
|
"gpt-5-mini": [
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
"
|
|
387
|
-
"
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
"
|
|
392
|
-
"
|
|
393
|
-
|
|
388
|
+
endpoint(
|
|
389
|
+
model="gpt-5-mini",
|
|
390
|
+
base_url="https://a.example/v1",
|
|
391
|
+
api_key_var="OPENAI_API_KEY",
|
|
392
|
+
),
|
|
393
|
+
endpoint(
|
|
394
|
+
model="gpt-5-mini",
|
|
395
|
+
base_url="https://b.example/v1",
|
|
396
|
+
api_key_var="OPENAI_API_KEY",
|
|
397
|
+
),
|
|
394
398
|
]
|
|
395
399
|
},
|
|
396
400
|
)
|
|
@@ -417,11 +421,11 @@ def test_cli_model_flag_resolves_endpoint_alias_when_registry_present(
|
|
|
417
421
|
},
|
|
418
422
|
endpoints={
|
|
419
423
|
"gpt-4.1-mini": [
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
"
|
|
423
|
-
"
|
|
424
|
-
|
|
424
|
+
endpoint(
|
|
425
|
+
model="openai/gpt-4.1-mini",
|
|
426
|
+
base_url="https://alias.example/v1",
|
|
427
|
+
api_key_var="ALIAS_API_KEY",
|
|
428
|
+
)
|
|
425
429
|
]
|
|
426
430
|
},
|
|
427
431
|
)
|
|
@@ -443,12 +447,12 @@ def test_cli_model_flag_uses_endpoint_client_type_when_provided(monkeypatch, run
|
|
|
443
447
|
},
|
|
444
448
|
endpoints={
|
|
445
449
|
"haiku": [
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
"
|
|
449
|
-
"
|
|
450
|
-
|
|
451
|
-
|
|
450
|
+
endpoint(
|
|
451
|
+
model="claude-haiku-4-5",
|
|
452
|
+
base_url="https://api.anthropic.com",
|
|
453
|
+
api_key_var="ANTHROPIC_API_KEY",
|
|
454
|
+
api_client_type="anthropic_messages",
|
|
455
|
+
)
|
|
452
456
|
]
|
|
453
457
|
},
|
|
454
458
|
)
|
|
@@ -518,16 +522,16 @@ def test_cli_endpoint_alias_multi_variant_supports_mixed_keys(monkeypatch, run_c
|
|
|
518
522
|
},
|
|
519
523
|
endpoints={
|
|
520
524
|
"gpt-5-mini": [
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
"
|
|
524
|
-
"
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
"
|
|
529
|
-
"
|
|
530
|
-
|
|
525
|
+
endpoint(
|
|
526
|
+
model="gpt-5-mini",
|
|
527
|
+
base_url="https://a.example/v1",
|
|
528
|
+
api_key_var="PRIME_API_KEY",
|
|
529
|
+
),
|
|
530
|
+
endpoint(
|
|
531
|
+
model="gpt-5-mini",
|
|
532
|
+
base_url="https://b.example/v1",
|
|
533
|
+
api_key_var="OPENAI_API_KEY",
|
|
534
|
+
),
|
|
531
535
|
]
|
|
532
536
|
},
|
|
533
537
|
)
|
|
@@ -551,16 +555,16 @@ def test_cli_endpoint_id_resolves_registry_alias(monkeypatch, run_cli):
|
|
|
551
555
|
},
|
|
552
556
|
endpoints={
|
|
553
557
|
"gpt-5-mini": [
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
"
|
|
557
|
-
"
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
"
|
|
562
|
-
"
|
|
563
|
-
|
|
558
|
+
endpoint(
|
|
559
|
+
model="gpt-5-mini",
|
|
560
|
+
base_url="https://a.example/v1",
|
|
561
|
+
api_key_var="OPENAI_API_KEY",
|
|
562
|
+
),
|
|
563
|
+
endpoint(
|
|
564
|
+
model="gpt-5-mini",
|
|
565
|
+
base_url="https://b.example/v1",
|
|
566
|
+
api_key_var="OPENAI_API_KEY",
|
|
567
|
+
),
|
|
564
568
|
]
|
|
565
569
|
},
|
|
566
570
|
)
|
|
@@ -602,11 +606,11 @@ def test_cli_endpoint_id_accepts_directory_endpoints_path(monkeypatch, run_cli):
|
|
|
602
606
|
},
|
|
603
607
|
endpoints={
|
|
604
608
|
"gpt-5-mini": [
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
"
|
|
608
|
-
"
|
|
609
|
-
|
|
609
|
+
endpoint(
|
|
610
|
+
model="gpt-5-mini",
|
|
611
|
+
base_url="https://a.example/v1",
|
|
612
|
+
api_key_var="OPENAI_API_KEY",
|
|
613
|
+
)
|
|
610
614
|
]
|
|
611
615
|
},
|
|
612
616
|
)
|
|
@@ -647,11 +651,11 @@ def test_cli_endpoint_id_requires_toml_endpoints_path(monkeypatch, run_cli):
|
|
|
647
651
|
},
|
|
648
652
|
endpoints={
|
|
649
653
|
"gpt-5-mini": [
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
"
|
|
653
|
-
"
|
|
654
|
-
|
|
654
|
+
endpoint(
|
|
655
|
+
model="gpt-5-mini",
|
|
656
|
+
base_url="https://a.example/v1",
|
|
657
|
+
api_key_var="OPENAI_API_KEY",
|
|
658
|
+
)
|
|
655
659
|
]
|
|
656
660
|
},
|
|
657
661
|
)
|
|
@@ -11,14 +11,25 @@ from verifiers.scripts.gepa import (
|
|
|
11
11
|
load_gepa_toml_config,
|
|
12
12
|
resolve_gepa_config_args,
|
|
13
13
|
)
|
|
14
|
+
from verifiers.types import EndpointConfig
|
|
14
15
|
|
|
15
16
|
|
|
16
17
|
def test_gepa_extra_headers_from_group_requires_consistent_variants():
|
|
17
18
|
with pytest.raises(ValueError, match="different headers"):
|
|
18
19
|
_gepa_extra_headers_from_group(
|
|
19
20
|
[
|
|
20
|
-
|
|
21
|
-
|
|
21
|
+
EndpointConfig(
|
|
22
|
+
api_key_var="K",
|
|
23
|
+
base_url="https://a.example/v1",
|
|
24
|
+
model="m",
|
|
25
|
+
extra_headers={"X-A": "1"},
|
|
26
|
+
),
|
|
27
|
+
EndpointConfig(
|
|
28
|
+
api_key_var="K",
|
|
29
|
+
base_url="https://a.example/v1",
|
|
30
|
+
model="m",
|
|
31
|
+
extra_headers={"X-A": "2"},
|
|
32
|
+
),
|
|
22
33
|
],
|
|
23
34
|
"my-alias",
|
|
24
35
|
)
|
|
@@ -27,8 +38,18 @@ def test_gepa_extra_headers_from_group_requires_consistent_variants():
|
|
|
27
38
|
def test_gepa_extra_headers_from_group_returns_first_row_dict():
|
|
28
39
|
h = _gepa_extra_headers_from_group(
|
|
29
40
|
[
|
|
30
|
-
|
|
31
|
-
|
|
41
|
+
EndpointConfig(
|
|
42
|
+
api_key_var="K",
|
|
43
|
+
base_url="https://a.example/v1",
|
|
44
|
+
model="m",
|
|
45
|
+
extra_headers={"X-A": "x"},
|
|
46
|
+
),
|
|
47
|
+
EndpointConfig(
|
|
48
|
+
api_key_var="K",
|
|
49
|
+
base_url="https://a.example/v1",
|
|
50
|
+
model="m",
|
|
51
|
+
extra_headers={"X-A": "x"},
|
|
52
|
+
),
|
|
32
53
|
],
|
|
33
54
|
"my-alias",
|
|
34
55
|
)
|
|
@@ -9,6 +9,10 @@ PACKAGE_SYMBOLS = {
|
|
|
9
9
|
"HarborTasksetConfig",
|
|
10
10
|
"MiniSWEAgent",
|
|
11
11
|
"MiniSWEAgentConfig",
|
|
12
|
+
"NeMoGymHarness",
|
|
13
|
+
"NeMoGymHarnessConfig",
|
|
14
|
+
"NeMoGymTaskset",
|
|
15
|
+
"NeMoGymTasksetConfig",
|
|
12
16
|
"OpenCode",
|
|
13
17
|
"OpenCodeConfig",
|
|
14
18
|
"Pi",
|
|
@@ -36,10 +40,10 @@ def test_package_tasksets_and_harnesses_are_not_v1_exports():
|
|
|
36
40
|
|
|
37
41
|
|
|
38
42
|
def test_v1_taskset_imports_do_not_import_textarena():
|
|
39
|
-
textarena_module = "
|
|
43
|
+
textarena_module = "tasksets.textarena"
|
|
40
44
|
sys.modules.pop(textarena_module, None)
|
|
41
45
|
|
|
42
|
-
tasksets = importlib.import_module("
|
|
46
|
+
tasksets = importlib.import_module("tasksets")
|
|
43
47
|
tasksets.__dict__.pop("TextArenaTaskset", None)
|
|
44
48
|
tasksets.__dict__.pop("TextArenaTasksetConfig", None)
|
|
45
49
|
importlib.reload(tasksets)
|
|
@@ -52,6 +56,28 @@ def test_v1_taskset_imports_do_not_import_textarena():
|
|
|
52
56
|
assert textarena_module not in sys.modules
|
|
53
57
|
|
|
54
58
|
|
|
59
|
+
def test_harness_imports_do_not_import_nemo_gym():
|
|
60
|
+
nemo_gym_module = "harnesses.nemo_gym"
|
|
61
|
+
sys.modules.pop(nemo_gym_module, None)
|
|
62
|
+
|
|
63
|
+
harnesses = importlib.import_module("harnesses")
|
|
64
|
+
harnesses.__dict__.pop("NeMoGymHarness", None)
|
|
65
|
+
harnesses.__dict__.pop("NeMoGymHarnessConfig", None)
|
|
66
|
+
importlib.reload(harnesses)
|
|
67
|
+
assert nemo_gym_module not in sys.modules
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def test_taskset_imports_do_not_import_nemo_gym():
|
|
71
|
+
nemo_gym_module = "tasksets.nemo_gym"
|
|
72
|
+
sys.modules.pop(nemo_gym_module, None)
|
|
73
|
+
|
|
74
|
+
tasksets = importlib.import_module("tasksets")
|
|
75
|
+
tasksets.__dict__.pop("NeMoGymTaskset", None)
|
|
76
|
+
tasksets.__dict__.pop("NeMoGymTasksetConfig", None)
|
|
77
|
+
importlib.reload(tasksets)
|
|
78
|
+
assert nemo_gym_module not in sys.modules
|
|
79
|
+
|
|
80
|
+
|
|
55
81
|
class TestImports:
|
|
56
82
|
"""Test that all public API imports work correctly.
|
|
57
83
|
This was inspired by issue #349.
|