verifiers 0.1.15.dev10__tar.gz → 0.1.15.dev11__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/PKG-INFO +16 -19
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/README.md +14 -17
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/pyproject.toml +2 -2
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_imports.py +31 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_init_script.py +20 -17
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_langchain_deep_agents_wikispeedia.py +5 -5
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_mcp_search_env.py +3 -3
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_opencode_harbor.py +9 -7
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_renderer_client.py +13 -14
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_renderer_e2e.py +28 -18
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_v1_bfcl.py +6 -5
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_v1_config_extension.py +745 -267
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_v1_example_counts.py +10 -10
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_v1_harbor_cli.py +58 -30
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_v1_mini_swe_agent.py +11 -10
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_v1_rlm_swe.py +40 -45
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_v1_runtime_lifecycle.py +98 -72
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_v1_scoring_functions.py +1 -1
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_v1_taskset_bindings.py +65 -64
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_v1_textarena_taskset.py +29 -11
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_wiki_search_v1.py +3 -3
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_wordle_v1_env.py +11 -2
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/__init__.py +10 -46
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/clients/renderer_client.py +12 -32
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/scripts/init.py +50 -51
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/types.py +18 -5
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/utils/env_utils.py +21 -4
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/ENVIRONMENT_BEST_PRACTICES.md +41 -50
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/README.md +77 -90
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/RE_MIGRATION.md +53 -46
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/__init__.py +6 -36
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/config.py +29 -5
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/env.py +4 -26
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/harness.py +37 -36
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/packages/tasksets/harbor.py +126 -113
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/packages/tasksets/textarena.py +74 -50
- verifiers-0.1.15.dev11/verifiers/v1/taskset.py +207 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/toolset.py +2 -1
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/types.py +4 -3
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/utils/config_utils.py +52 -3
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/utils/prompt_utils.py +91 -2
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/utils/runtime_owner_utils.py +26 -45
- verifiers-0.1.15.dev11/verifiers/v1/utils/taskset_registry_utils.py +115 -0
- verifiers-0.1.15.dev11/verifiers/v1/utils/taskset_utils.py +78 -0
- verifiers-0.1.15.dev10/verifiers/v1/taskset.py +0 -187
- verifiers-0.1.15.dev10/verifiers/v1/utils/taskset_utils.py +0 -90
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/.gitignore +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/LICENSE +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/AGENTS.md +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/README.md +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/conftest.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_browser_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_build_script.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_cli_agent_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_client_auth_errors.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_client_config.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_client_multimodal_types.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_composable_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_context_token_metrics.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_decorator_ranks.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_endpoint_registry.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_env_group.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_env_server.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_environment.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_environment_extra.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_envs.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_error_chain.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_eval_cli.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_eval_display.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_eval_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_gepa_cli.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_gepa_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_gym_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_harbor_env_mcp.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_install_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_interception_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_lean_task.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_logging.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_math_rubric.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_maybe_think_parser.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_message_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_message_utils_multimodal.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_multiturn_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_nemorl_client.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_openai_chat_completions_token_client.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_openai_responses_client.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_opencode_rlm_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_openenv_client.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_parser.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_path_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_per_turn_timing.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_pricing_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_prime_plugin.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_rlm_composable_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_rlm_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_rubric.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_rubric_group.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_sandbox_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_sandbox_mixin.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_save_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_setup_script.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_singleturn_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_stateful_tool_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_think_parser.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_tool_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_tool_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_trajectory_processing.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_tui_info_formatting.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_types.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_v1_empty_completions.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_v1_endpoint_protocols.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_v1_group_reward_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_wordle_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_xml_parser.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/AGENTS.md +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/cli/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/cli/commands/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/cli/commands/build.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/cli/commands/eval.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/cli/commands/gepa.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/cli/commands/init.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/cli/commands/install.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/cli/commands/setup.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/cli/plugins/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/cli/plugins/prime.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/cli/tui.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/clients/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/clients/anthropic_messages_client.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/clients/client.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/clients/nemorl_chat_completions_client.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/clients/openai_chat_completions_client.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/clients/openai_chat_completions_token_client.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/clients/openai_completions_client.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/clients/openai_responses_client.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/decorators.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/AGENTS.md +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/env_group.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/environment.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/README.md +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/cli_agent_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/README.md +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/_filter.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/composable_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/harness.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/harnesses/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/harnesses/mini_swe_agent.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/harnesses/opencode.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/harnesses/prompt.txt +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/harnesses/rlm.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/swe_debug_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/task.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/cp/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/cp/cp_task.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/cp/test_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/harbor/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/harbor/harbor.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/lean/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/lean/lean_task.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/math/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/math/math_task.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/swe/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/swe/_test_patch.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/swe/create_fix_patch.sh +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/swe/log_parser.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/swe/multi_swe.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/swe/openswe.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/swe/r2e_gym.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/swe/swe_bench.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/swe/swe_lego.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/swe/swe_rebench_v2.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/swe/swe_rebench_v2_log_parsers.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/swe/swe_smith.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/composable/tasksets/swe/swe_tasksets.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/gym_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/harbor_env/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/harbor_env/env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/harbor_env/mcp.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/mcp_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/opencode_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/opencode_qa_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/opencode_rlm_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/rlm_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/sandbox_mixin.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/utils/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/utils/file_locks.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/experimental/utils/git_checkout_cache.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/integrations/README.md +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/integrations/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/integrations/browser_env/README.md +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/integrations/browser_env/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/integrations/browser_env/browser_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/integrations/browser_env/modes/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/integrations/browser_env/modes/base.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/integrations/browser_env/modes/cua_mode.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/integrations/browser_env/modes/dom_mode.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/integrations/openenv_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/integrations/reasoninggym_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/integrations/textarena_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/multiturn_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/python_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/sandbox_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/singleturn_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/stateful_tool_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/envs/tool_env.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/errors.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/gepa/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/gepa/adapter.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/gepa/config.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/gepa/display.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/gepa/gepa_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/parsers/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/parsers/maybe_think_parser.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/parsers/parser.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/parsers/think_parser.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/parsers/xml_parser.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/rl/README.md +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/rl/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/rl/inference/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/rl/inference/client.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/rl/inference/server.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/rl/trainer/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/rl/trainer/config.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/rl/trainer/orchestrator.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/rl/trainer/trainer.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/rl/trainer/utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/rubrics/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/rubrics/experimental/hybrid_math_rubric.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/rubrics/judge_rubric.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/rubrics/math_rubric.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/rubrics/rubric.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/rubrics/rubric_group.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/scripts/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/scripts/build.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/scripts/eval.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/scripts/gepa.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/scripts/install.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/scripts/rl.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/scripts/setup.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/scripts/train.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/scripts/tui.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/scripts/vllm.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/serve/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/serve/client/env_client.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/serve/client/zmq_env_client.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/serve/server/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/serve/server/env_router.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/serve/server/env_server.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/serve/server/env_worker.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/serve/server/zmq_env_server.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/serve/types.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/utils/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/utils/async_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/utils/client_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/utils/config_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/utils/data_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/utils/display_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/utils/env_config_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/utils/error_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/utils/eval_display.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/utils/eval_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/utils/heartbeat.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/utils/import_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/utils/install_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/utils/interception_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/utils/logging_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/utils/message_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/utils/metric_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/utils/path_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/utils/pricing_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/utils/process_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/utils/response_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/utils/save_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/utils/serve_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/utils/thread_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/utils/threaded_sandbox_client.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/utils/tool_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/utils/usage_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/utils/version_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/packages/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/packages/harnesses/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/packages/harnesses/command.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/packages/harnesses/configs.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/packages/harnesses/mini_swe_agent.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/packages/harnesses/opencode.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/packages/harnesses/pi.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/packages/harnesses/rlm.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/packages/harnesses/terminus_2.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/packages/tasksets/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/runtime.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/state.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/task.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/user.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/utils/__init__.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/utils/artifact_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/utils/binding_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/utils/config_callable_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/utils/endpoint_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/utils/json_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/utils/judge_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/utils/lifecycle_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/utils/mcp_proxy_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/utils/mcp_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/utils/object_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/utils/program_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/utils/runtime_registry.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/utils/sandbox_program_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/utils/sandbox_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/utils/scoring_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/utils/serialization_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/utils/task_freeze_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/utils/timing_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/utils/tool_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/utils/trajectory_utils.py +0 -0
- {verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/verifiers/v1/utils/usage_utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: verifiers
|
|
3
|
-
Version: 0.1.15.
|
|
3
|
+
Version: 0.1.15.dev11
|
|
4
4
|
Summary: Verifiers: Environments for LLM Reinforcement Learning
|
|
5
5
|
Project-URL: Homepage, https://github.com/primeintellect-ai/verifiers
|
|
6
6
|
Project-URL: Documentation, https://github.com/primeintellect-ai/verifiers
|
|
@@ -56,7 +56,7 @@ Requires-Dist: stagehand>=3.0.0; extra == 'browser'
|
|
|
56
56
|
Provides-Extra: openenv
|
|
57
57
|
Requires-Dist: openenv-core>=0.3.0; extra == 'openenv'
|
|
58
58
|
Provides-Extra: renderers
|
|
59
|
-
Requires-Dist: renderers>=0.1.8.
|
|
59
|
+
Requires-Dist: renderers>=0.1.8.dev28; extra == 'renderers'
|
|
60
60
|
Provides-Extra: rg
|
|
61
61
|
Requires-Dist: reasoning-gym; extra == 'rg'
|
|
62
62
|
Provides-Extra: rl
|
|
@@ -220,19 +220,13 @@ custom harnesses, use the v1 Taskset/Harness path:
|
|
|
220
220
|
# my_env.py
|
|
221
221
|
import verifiers as vf
|
|
222
222
|
|
|
223
|
-
@vf.reward(weight=1.0)
|
|
224
|
-
async def contains_answer(task, state) -> float:
|
|
225
|
-
return float(task["answer"] in str(state.get("completion") or ""))
|
|
226
223
|
|
|
227
224
|
class MyTasksetConfig(vf.TasksetConfig):
|
|
228
225
|
split: str = "train"
|
|
229
226
|
|
|
230
227
|
|
|
231
|
-
class MyTaskset(vf.Taskset):
|
|
232
|
-
|
|
233
|
-
_default_rewards = (contains_answer,)
|
|
234
|
-
|
|
235
|
-
def rows(self) -> list[dict[str, object]]:
|
|
228
|
+
class MyTaskset(vf.Taskset[MyTasksetConfig]):
|
|
229
|
+
def load_tasks(self) -> vf.Tasks:
|
|
236
230
|
rows = [
|
|
237
231
|
{
|
|
238
232
|
"prompt": [{"role": "user", "content": "Reverse abc."}],
|
|
@@ -243,28 +237,31 @@ class MyTaskset(vf.Taskset):
|
|
|
243
237
|
]
|
|
244
238
|
return [row for row in rows if row["split"] == self.config.split]
|
|
245
239
|
|
|
240
|
+
@vf.reward(weight=1.0)
|
|
241
|
+
async def contains_answer(self, task, state) -> float:
|
|
242
|
+
return float(task["answer"] in str(state.get("completion") or ""))
|
|
243
|
+
|
|
246
244
|
|
|
247
245
|
def load_taskset(config: MyTasksetConfig) -> MyTaskset:
|
|
248
|
-
assert isinstance(config, MyTasksetConfig)
|
|
249
246
|
return MyTaskset(config=config)
|
|
250
247
|
|
|
251
248
|
|
|
252
249
|
def load_environment(config: vf.EnvConfig) -> vf.Env:
|
|
253
|
-
|
|
254
|
-
assert isinstance(taskset_config, MyTasksetConfig)
|
|
255
|
-
return vf.Env(taskset=load_taskset(taskset_config))
|
|
250
|
+
return vf.Env(taskset=vf.load_taskset(config=config.taskset))
|
|
256
251
|
```
|
|
257
252
|
If no harness is passed, `vf.Env` uses the base endpoint-backed harness. See
|
|
258
253
|
**[BYO Harness](docs/byo-harness.md)** for the advanced v1 taskset/harness API.
|
|
259
|
-
Reusable taskset and harness packages live under `verifiers.v1.packages
|
|
260
|
-
|
|
261
|
-
For example, Harbor task directories can run through the bundled OpenCode CLI
|
|
254
|
+
Reusable taskset and harness packages live under `verifiers.v1.packages`. For
|
|
255
|
+
example, Harbor task directories can run through the bundled OpenCode CLI
|
|
262
256
|
harness with:
|
|
263
257
|
|
|
264
258
|
```python
|
|
259
|
+
from verifiers.v1.packages.harnesses import OpenCode, OpenCodeConfig
|
|
260
|
+
from verifiers.v1.packages.tasksets import HarborTaskset, HarborTasksetConfig
|
|
261
|
+
|
|
265
262
|
env = vf.Env(
|
|
266
|
-
taskset=
|
|
267
|
-
harness=
|
|
263
|
+
taskset=HarborTaskset(config=HarborTasksetConfig()),
|
|
264
|
+
harness=OpenCode(config=OpenCodeConfig()),
|
|
268
265
|
)
|
|
269
266
|
```
|
|
270
267
|
|
|
@@ -143,19 +143,13 @@ custom harnesses, use the v1 Taskset/Harness path:
|
|
|
143
143
|
# my_env.py
|
|
144
144
|
import verifiers as vf
|
|
145
145
|
|
|
146
|
-
@vf.reward(weight=1.0)
|
|
147
|
-
async def contains_answer(task, state) -> float:
|
|
148
|
-
return float(task["answer"] in str(state.get("completion") or ""))
|
|
149
146
|
|
|
150
147
|
class MyTasksetConfig(vf.TasksetConfig):
|
|
151
148
|
split: str = "train"
|
|
152
149
|
|
|
153
150
|
|
|
154
|
-
class MyTaskset(vf.Taskset):
|
|
155
|
-
|
|
156
|
-
_default_rewards = (contains_answer,)
|
|
157
|
-
|
|
158
|
-
def rows(self) -> list[dict[str, object]]:
|
|
151
|
+
class MyTaskset(vf.Taskset[MyTasksetConfig]):
|
|
152
|
+
def load_tasks(self) -> vf.Tasks:
|
|
159
153
|
rows = [
|
|
160
154
|
{
|
|
161
155
|
"prompt": [{"role": "user", "content": "Reverse abc."}],
|
|
@@ -166,28 +160,31 @@ class MyTaskset(vf.Taskset):
|
|
|
166
160
|
]
|
|
167
161
|
return [row for row in rows if row["split"] == self.config.split]
|
|
168
162
|
|
|
163
|
+
@vf.reward(weight=1.0)
|
|
164
|
+
async def contains_answer(self, task, state) -> float:
|
|
165
|
+
return float(task["answer"] in str(state.get("completion") or ""))
|
|
166
|
+
|
|
169
167
|
|
|
170
168
|
def load_taskset(config: MyTasksetConfig) -> MyTaskset:
|
|
171
|
-
assert isinstance(config, MyTasksetConfig)
|
|
172
169
|
return MyTaskset(config=config)
|
|
173
170
|
|
|
174
171
|
|
|
175
172
|
def load_environment(config: vf.EnvConfig) -> vf.Env:
|
|
176
|
-
|
|
177
|
-
assert isinstance(taskset_config, MyTasksetConfig)
|
|
178
|
-
return vf.Env(taskset=load_taskset(taskset_config))
|
|
173
|
+
return vf.Env(taskset=vf.load_taskset(config=config.taskset))
|
|
179
174
|
```
|
|
180
175
|
If no harness is passed, `vf.Env` uses the base endpoint-backed harness. See
|
|
181
176
|
**[BYO Harness](docs/byo-harness.md)** for the advanced v1 taskset/harness API.
|
|
182
|
-
Reusable taskset and harness packages live under `verifiers.v1.packages
|
|
183
|
-
|
|
184
|
-
For example, Harbor task directories can run through the bundled OpenCode CLI
|
|
177
|
+
Reusable taskset and harness packages live under `verifiers.v1.packages`. For
|
|
178
|
+
example, Harbor task directories can run through the bundled OpenCode CLI
|
|
185
179
|
harness with:
|
|
186
180
|
|
|
187
181
|
```python
|
|
182
|
+
from verifiers.v1.packages.harnesses import OpenCode, OpenCodeConfig
|
|
183
|
+
from verifiers.v1.packages.tasksets import HarborTaskset, HarborTasksetConfig
|
|
184
|
+
|
|
188
185
|
env = vf.Env(
|
|
189
|
-
taskset=
|
|
190
|
-
harness=
|
|
186
|
+
taskset=HarborTaskset(config=HarborTasksetConfig()),
|
|
187
|
+
harness=OpenCode(config=OpenCodeConfig()),
|
|
191
188
|
)
|
|
192
189
|
```
|
|
193
190
|
|
|
@@ -74,7 +74,7 @@ dev = [
|
|
|
74
74
|
"aiohttp>=3.9.0",
|
|
75
75
|
"python-dotenv>=1.0.0",
|
|
76
76
|
"nltk",
|
|
77
|
-
"renderers>=0.1.8.
|
|
77
|
+
"renderers>=0.1.8.dev28",
|
|
78
78
|
]
|
|
79
79
|
policy = [
|
|
80
80
|
"semgrep>=1.150.0",
|
|
@@ -97,7 +97,7 @@ openenv = [
|
|
|
97
97
|
"openenv-core>=0.3.0",
|
|
98
98
|
]
|
|
99
99
|
renderers = [
|
|
100
|
-
"renderers>=0.1.8.
|
|
100
|
+
"renderers>=0.1.8.dev28",
|
|
101
101
|
]
|
|
102
102
|
rl = [
|
|
103
103
|
"torch>=2.8.0,<2.9.0",
|
|
@@ -4,6 +4,37 @@ import sys
|
|
|
4
4
|
import verifiers
|
|
5
5
|
|
|
6
6
|
|
|
7
|
+
PACKAGE_SYMBOLS = {
|
|
8
|
+
"HarborTaskset",
|
|
9
|
+
"HarborTasksetConfig",
|
|
10
|
+
"MiniSWEAgent",
|
|
11
|
+
"MiniSWEAgentConfig",
|
|
12
|
+
"OpenCode",
|
|
13
|
+
"OpenCodeConfig",
|
|
14
|
+
"Pi",
|
|
15
|
+
"PiConfig",
|
|
16
|
+
"RLM",
|
|
17
|
+
"RLMConfig",
|
|
18
|
+
"Terminus2",
|
|
19
|
+
"Terminus2Config",
|
|
20
|
+
"TextArenaTaskset",
|
|
21
|
+
"TextArenaTasksetConfig",
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def test_package_tasksets_and_harnesses_are_not_root_exports():
|
|
26
|
+
for name in PACKAGE_SYMBOLS:
|
|
27
|
+
assert name not in verifiers.__all__
|
|
28
|
+
assert not hasattr(verifiers, name)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def test_package_tasksets_and_harnesses_are_not_v1_exports():
|
|
32
|
+
v1 = importlib.import_module("verifiers.v1")
|
|
33
|
+
for name in PACKAGE_SYMBOLS:
|
|
34
|
+
assert name not in v1.__all__
|
|
35
|
+
assert not hasattr(v1, name)
|
|
36
|
+
|
|
37
|
+
|
|
7
38
|
def test_v1_taskset_imports_do_not_import_textarena():
|
|
8
39
|
textarena_module = "verifiers.v1.packages.tasksets.textarena"
|
|
9
40
|
sys.modules.pop(textarena_module, None)
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from pathlib import Path
|
|
2
2
|
|
|
3
|
+
import pytest
|
|
3
4
|
import verifiers as vf
|
|
4
5
|
from verifiers.scripts.init import init_environment
|
|
5
6
|
|
|
@@ -24,16 +25,19 @@ def test_init_v1_writes_thin_taskset_template(tmp_path: Path) -> None:
|
|
|
24
25
|
init_environment("bar", path=str(tmp_path), v1=True)
|
|
25
26
|
content = read_env_file(tmp_path, "bar")
|
|
26
27
|
|
|
27
|
-
assert
|
|
28
|
-
assert "
|
|
29
|
-
assert "
|
|
30
|
-
assert
|
|
31
|
-
assert
|
|
32
|
-
assert "def load_taskset(config:
|
|
33
|
-
assert "
|
|
28
|
+
assert "class BarTasksetConfig(vf.TasksetConfig):" in content
|
|
29
|
+
assert "class BarTaskset(vf.Taskset[BarTasksetConfig]):" in content
|
|
30
|
+
assert "def load_tasks(self) -> vf.Tasks:" in content
|
|
31
|
+
assert "def load_system_prompt(self) -> vf.SystemPrompt:" in content
|
|
32
|
+
assert "async def correct_answer(self, task: vf.Task, state: vf.State)" in content
|
|
33
|
+
assert "def load_taskset(config: BarTasksetConfig) -> BarTaskset:" in content
|
|
34
|
+
assert "return BarTaskset(config=config)" in content
|
|
35
|
+
assert "vf.load_taskset(config=config.taskset)" in content
|
|
34
36
|
assert "class EnvTaskset(" not in content
|
|
35
37
|
assert "_default_" not in content
|
|
36
38
|
assert "assert isinstance" not in content
|
|
39
|
+
assert 'tasks: str = "load_tasks"' not in content
|
|
40
|
+
assert 'rewards: list[str] = ["correct_answer"]' not in content
|
|
37
41
|
|
|
38
42
|
|
|
39
43
|
def test_init_v1_template_loads_with_vf_load_environment(
|
|
@@ -42,21 +46,19 @@ def test_init_v1_template_loads_with_vf_load_environment(
|
|
|
42
46
|
init_environment("loadable-v1", path=str(tmp_path), v1=True)
|
|
43
47
|
monkeypatch.syspath_prepend(str(tmp_path / "loadable_v1"))
|
|
44
48
|
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
assert isinstance(env, vf.Env)
|
|
48
|
-
assert env.taskset.rows()[0]["answer"] == "cba"
|
|
49
|
-
assert env.taskset.rewards[0].__name__ == "exact_answer"
|
|
49
|
+
with pytest.raises(RuntimeError, match="Load the system prompt"):
|
|
50
|
+
vf.load_environment("loadable-v1")
|
|
50
51
|
|
|
51
52
|
|
|
52
53
|
def test_init_v1_with_harness_writes_harness_stub(tmp_path: Path) -> None:
|
|
53
54
|
init_environment("baz", path=str(tmp_path), v1=True, with_harness=True)
|
|
54
55
|
content = read_env_file(tmp_path, "baz")
|
|
55
56
|
|
|
56
|
-
assert "class
|
|
57
|
-
assert "class
|
|
58
|
-
assert "
|
|
59
|
-
assert "
|
|
57
|
+
assert "class BazTaskset(vf.Taskset[BazTasksetConfig]):" in content
|
|
58
|
+
assert "class BazHarnessConfig(vf.HarnessConfig):" in content
|
|
59
|
+
assert "class BazHarness(vf.Harness):" in content
|
|
60
|
+
assert "def load_harness(config: BazHarnessConfig) -> BazHarness:" in content
|
|
61
|
+
assert "vf.load_harness(config=config.harness)" in content
|
|
60
62
|
|
|
61
63
|
|
|
62
64
|
def test_init_with_harness_without_v1_warns_and_uses_v0(tmp_path: Path, capsys) -> None:
|
|
@@ -77,4 +79,5 @@ def test_init_v1_multifile_exports_component_loaders(tmp_path: Path) -> None:
|
|
|
77
79
|
|
|
78
80
|
assert "from .pkg_env import load_environment, load_taskset" in init_content
|
|
79
81
|
assert "__all__ = ['load_environment', 'load_taskset']" in init_content
|
|
80
|
-
assert
|
|
82
|
+
assert "class PkgEnvTaskset(vf.Taskset[PkgEnvTasksetConfig]):" in env_content
|
|
83
|
+
assert "return PkgEnvTaskset(config=config)" in env_content
|
{verifiers-0.1.15.dev10 → verifiers-0.1.15.dev11}/tests/test_langchain_deep_agents_wikispeedia.py
RENAMED
|
@@ -91,8 +91,8 @@ def test_wikispeedia_env_config_reaches_taskset_and_harness(
|
|
|
91
91
|
)
|
|
92
92
|
)
|
|
93
93
|
|
|
94
|
-
train_rows =
|
|
95
|
-
eval_rows =
|
|
94
|
+
train_rows = [env.taskset.to_task(row) for row in env.taskset.get_dataset()]
|
|
95
|
+
eval_rows = [env.taskset.to_task(row) for row in env.taskset.get_eval_dataset()]
|
|
96
96
|
|
|
97
97
|
assert len(train_rows) == 2
|
|
98
98
|
assert len(eval_rows) == 1
|
|
@@ -136,8 +136,8 @@ def test_wikispeedia_taskset_sources_use_disjoint_target_split(
|
|
|
136
136
|
)
|
|
137
137
|
)
|
|
138
138
|
|
|
139
|
-
train_rows =
|
|
140
|
-
eval_rows =
|
|
139
|
+
train_rows = [taskset.to_task(row) for row in taskset.get_dataset()]
|
|
140
|
+
eval_rows = [taskset.to_task(row) for row in taskset.get_eval_dataset()]
|
|
141
141
|
|
|
142
142
|
assert len(train_rows) == 2
|
|
143
143
|
assert len(eval_rows) == 1
|
|
@@ -218,7 +218,7 @@ async def test_wikispeedia_tools_resolve_through_v1_runtime(
|
|
|
218
218
|
),
|
|
219
219
|
harness=module.load_harness(config=module.WikispeediaHarnessConfig()),
|
|
220
220
|
)
|
|
221
|
-
task =
|
|
221
|
+
task = env.taskset.to_task(env.taskset.get_dataset()[0])
|
|
222
222
|
state = module.vf.State.for_task(task)
|
|
223
223
|
state = await env.harness.setup_state(task, state)
|
|
224
224
|
|
|
@@ -5,7 +5,7 @@ from pathlib import Path
|
|
|
5
5
|
from typing import Any
|
|
6
6
|
|
|
7
7
|
import pytest
|
|
8
|
-
import verifiers
|
|
8
|
+
import verifiers as vf
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
def _load_mcp_search_module() -> Any:
|
|
@@ -54,7 +54,7 @@ def test_mcp_search_env_preserves_harness_config() -> None:
|
|
|
54
54
|
def test_mcp_search_default_taskset_has_stable_non_doc_fixture() -> None:
|
|
55
55
|
module = _load_mcp_search_module()
|
|
56
56
|
|
|
57
|
-
rows =
|
|
57
|
+
rows = list(module.load_tasks())
|
|
58
58
|
|
|
59
59
|
assert len(rows) >= 10
|
|
60
60
|
assert len({row["answer"] for row in rows}) == len(rows)
|
|
@@ -68,7 +68,7 @@ def test_mcp_search_taskset_accepts_v1_taskset_config() -> None:
|
|
|
68
68
|
env = module.load_environment(
|
|
69
69
|
config=module.MCPSearchEnvConfig(taskset={"max_turns": 3}),
|
|
70
70
|
)
|
|
71
|
-
rows = env.taskset.
|
|
71
|
+
rows = [env.taskset.to_task(row) for row in env.taskset.get_dataset()]
|
|
72
72
|
|
|
73
73
|
assert env.taskset.config.max_turns == 3
|
|
74
74
|
assert all(row["max_turns"] == 3 for row in rows)
|
|
@@ -3,7 +3,9 @@ import sys
|
|
|
3
3
|
from pathlib import Path
|
|
4
4
|
from typing import Any, cast
|
|
5
5
|
|
|
6
|
-
import verifiers
|
|
6
|
+
import verifiers as vf
|
|
7
|
+
from verifiers.v1.packages.harnesses import OpenCode, OpenCodeConfig
|
|
8
|
+
from verifiers.v1.packages.tasksets import HarborTaskset
|
|
7
9
|
|
|
8
10
|
|
|
9
11
|
def _load_opencode_module() -> Any:
|
|
@@ -31,14 +33,14 @@ def test_load_environment_uses_v1_taskset_and_harness() -> None:
|
|
|
31
33
|
env = module.load_environment(config=module.OpenCodeHarborEnvConfig())
|
|
32
34
|
|
|
33
35
|
assert isinstance(env, vf.Env)
|
|
34
|
-
assert isinstance(env.taskset,
|
|
35
|
-
assert isinstance(env.harness,
|
|
36
|
-
assert isinstance(env.harness.config,
|
|
36
|
+
assert isinstance(env.taskset, HarborTaskset)
|
|
37
|
+
assert isinstance(env.harness, OpenCode)
|
|
38
|
+
assert isinstance(env.harness.config, OpenCodeConfig)
|
|
37
39
|
assert not hasattr(module, "OpenCodeHarborHarnessConfig")
|
|
38
40
|
assert not hasattr(module, "TERMINAL_BENCH_SAMPLE_TASKS")
|
|
39
41
|
assert env.taskset.resolve_tasks_root() == Path(module.__file__).parent / "tasks"
|
|
40
42
|
assert env.harness.config.max_turns == 4
|
|
41
|
-
assert env.harness.config.disabled_tools ==
|
|
43
|
+
assert env.harness.config.disabled_tools == OpenCodeConfig().disabled_tools
|
|
42
44
|
assert "webfetch" in env.harness.config.disabled_tools
|
|
43
45
|
assert "question" in env.harness.config.disabled_tools
|
|
44
46
|
|
|
@@ -53,11 +55,11 @@ def test_load_environment_accepts_v1_taskset_and_harness_config() -> None:
|
|
|
53
55
|
|
|
54
56
|
env = module.load_environment(
|
|
55
57
|
config=module.OpenCodeHarborEnvConfig(
|
|
56
|
-
taskset=module.
|
|
58
|
+
taskset=module.HarborTasksetConfig(
|
|
57
59
|
task_names=["task-a"],
|
|
58
60
|
cpu_cores=1.5,
|
|
59
61
|
),
|
|
60
|
-
harness=module.
|
|
62
|
+
harness=module.OpenCodeConfig(
|
|
61
63
|
agent_workdir="/workspace",
|
|
62
64
|
disabled_tools=["webfetch"],
|
|
63
65
|
max_turns=2,
|
|
@@ -5,6 +5,7 @@ import pytest
|
|
|
5
5
|
|
|
6
6
|
import verifiers as vf
|
|
7
7
|
from renderers import RendererPool
|
|
8
|
+
from renderers import config_from_name
|
|
8
9
|
from renderers.base import ParsedResponse, RenderedTokens, create_renderer
|
|
9
10
|
from verifiers.clients.renderer_client import (
|
|
10
11
|
RendererClient,
|
|
@@ -24,13 +25,16 @@ from verifiers.types import (
|
|
|
24
25
|
)
|
|
25
26
|
|
|
26
27
|
|
|
27
|
-
def
|
|
28
|
+
def test_renderer_client_honors_configured_renderer_config():
|
|
29
|
+
from renderers import Qwen3VLRendererConfig
|
|
30
|
+
|
|
28
31
|
RendererClient._shared_pools.clear()
|
|
29
32
|
|
|
33
|
+
cfg = Qwen3VLRendererConfig()
|
|
30
34
|
client = object.__new__(RendererClient)
|
|
31
35
|
client._renderer = None
|
|
32
36
|
client._pool_size = 1
|
|
33
|
-
client._config = vf.ClientConfig(client_type="renderer",
|
|
37
|
+
client._config = vf.ClientConfig(client_type="renderer", renderer_config=cfg)
|
|
34
38
|
|
|
35
39
|
sentinel_pool = RendererPool.__new__(RendererPool)
|
|
36
40
|
with patch(
|
|
@@ -42,24 +46,23 @@ def test_renderer_client_honors_configured_renderer_name():
|
|
|
42
46
|
assert pool is sentinel_pool
|
|
43
47
|
create_pool_mock.assert_called_once_with(
|
|
44
48
|
"Qwen/Qwen3-VL-4B-Instruct",
|
|
45
|
-
|
|
49
|
+
cfg,
|
|
46
50
|
size=1,
|
|
47
|
-
tool_parser=None,
|
|
48
|
-
reasoning_parser=None,
|
|
49
|
-
preserve_all_thinking=False,
|
|
50
|
-
preserve_thinking_between_tool_calls=False,
|
|
51
51
|
)
|
|
52
52
|
|
|
53
53
|
|
|
54
54
|
def test_renderer_client_uses_renderer_model_name_override():
|
|
55
|
+
from renderers import Qwen3VLRendererConfig
|
|
56
|
+
|
|
55
57
|
RendererClient._shared_pools.clear()
|
|
56
58
|
|
|
59
|
+
cfg = Qwen3VLRendererConfig()
|
|
57
60
|
client = object.__new__(RendererClient)
|
|
58
61
|
client._renderer = None
|
|
59
62
|
client._pool_size = 1
|
|
60
63
|
client._config = vf.ClientConfig(
|
|
61
64
|
client_type="renderer",
|
|
62
|
-
|
|
65
|
+
renderer_config=cfg,
|
|
63
66
|
renderer_model_name="Qwen/Qwen3-VL-4B-Instruct",
|
|
64
67
|
)
|
|
65
68
|
|
|
@@ -73,12 +76,8 @@ def test_renderer_client_uses_renderer_model_name_override():
|
|
|
73
76
|
assert pool is sentinel_pool
|
|
74
77
|
create_pool_mock.assert_called_once_with(
|
|
75
78
|
"Qwen/Qwen3-VL-4B-Instruct",
|
|
76
|
-
|
|
79
|
+
cfg,
|
|
77
80
|
size=1,
|
|
78
|
-
tool_parser=None,
|
|
79
|
-
reasoning_parser=None,
|
|
80
|
-
preserve_all_thinking=False,
|
|
81
|
-
preserve_thinking_between_tool_calls=False,
|
|
82
81
|
)
|
|
83
82
|
|
|
84
83
|
|
|
@@ -524,7 +523,7 @@ def _load_tokenizer_and_renderer(model_name: str, renderer_name: str):
|
|
|
524
523
|
from transformers import AutoTokenizer
|
|
525
524
|
|
|
526
525
|
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
|
527
|
-
renderer = create_renderer(tokenizer,
|
|
526
|
+
renderer = create_renderer(tokenizer, config_from_name(renderer_name))
|
|
528
527
|
return tokenizer, renderer
|
|
529
528
|
|
|
530
529
|
|
|
@@ -17,6 +17,7 @@ Parametrized over five model families so each renderer's render/parse paths
|
|
|
17
17
|
are exercised. Tokenizers come from the local HF cache; no network.
|
|
18
18
|
"""
|
|
19
19
|
|
|
20
|
+
import json
|
|
20
21
|
import logging
|
|
21
22
|
from typing import Any
|
|
22
23
|
|
|
@@ -24,7 +25,7 @@ import pytest
|
|
|
24
25
|
|
|
25
26
|
import verifiers as vf
|
|
26
27
|
from datasets import Dataset
|
|
27
|
-
from renderers import create_renderer
|
|
28
|
+
from renderers import config_from_name, create_renderer
|
|
28
29
|
from verifiers.clients.renderer_client import RendererClient, _to_renderer_message
|
|
29
30
|
from verifiers.types import Messages, State
|
|
30
31
|
|
|
@@ -83,7 +84,7 @@ def _load(model_name: str, renderer_name: str):
|
|
|
83
84
|
from transformers import AutoTokenizer
|
|
84
85
|
|
|
85
86
|
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
|
86
|
-
renderer = create_renderer(tokenizer,
|
|
87
|
+
renderer = create_renderer(tokenizer, config_from_name(renderer_name))
|
|
87
88
|
_renderer_cache[key] = (tokenizer, renderer)
|
|
88
89
|
return _renderer_cache[key]
|
|
89
90
|
|
|
@@ -106,6 +107,13 @@ def tokenizer_and_renderer(model_family):
|
|
|
106
107
|
# ── Scripted vLLM stand-in ───────────────────────────────────────────
|
|
107
108
|
|
|
108
109
|
|
|
110
|
+
class _ScriptedResponse:
|
|
111
|
+
"""httpx.Response stand-in: ``parse_generate_response`` reads ``.content`` as bytes."""
|
|
112
|
+
|
|
113
|
+
def __init__(self, payload: dict[str, Any]):
|
|
114
|
+
self.content = json.dumps(payload).encode()
|
|
115
|
+
|
|
116
|
+
|
|
109
117
|
class ScriptedVLLM:
|
|
110
118
|
"""Fake ``AsyncOpenAI``-compatible client serving canned
|
|
111
119
|
/inference/v1/generate responses (vllm 0.20 wire shape).
|
|
@@ -124,22 +132,24 @@ class ScriptedVLLM:
|
|
|
124
132
|
assert self._completions, "ScriptedVLLM ran out of canned completions"
|
|
125
133
|
completion_ids = self._completions.pop(0)
|
|
126
134
|
|
|
127
|
-
return
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
"
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
135
|
+
return _ScriptedResponse(
|
|
136
|
+
{
|
|
137
|
+
"request_id": f"resp-{len(self.requests)}",
|
|
138
|
+
"choices": [
|
|
139
|
+
{
|
|
140
|
+
"index": 0,
|
|
141
|
+
"token_ids": list(completion_ids),
|
|
142
|
+
"logprobs": {
|
|
143
|
+
"content": [
|
|
144
|
+
{"token": f"token_id:{tid}", "logprob": -0.1}
|
|
145
|
+
for tid in completion_ids
|
|
146
|
+
]
|
|
147
|
+
},
|
|
148
|
+
"finish_reason": "stop",
|
|
149
|
+
}
|
|
150
|
+
],
|
|
151
|
+
}
|
|
152
|
+
)
|
|
143
153
|
|
|
144
154
|
async def close(self):
|
|
145
155
|
pass
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
import importlib.util
|
|
2
|
+
import sys
|
|
2
3
|
from pathlib import Path
|
|
3
4
|
from types import ModuleType
|
|
4
5
|
|
|
5
6
|
import pytest
|
|
6
7
|
|
|
7
|
-
import verifiers as
|
|
8
|
-
import verifiers.v1 as vf
|
|
8
|
+
import verifiers as vf
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
def load_bfcl_module() -> ModuleType:
|
|
@@ -14,6 +14,7 @@ def load_bfcl_module() -> ModuleType:
|
|
|
14
14
|
assert spec is not None
|
|
15
15
|
assert spec.loader is not None
|
|
16
16
|
module = importlib.util.module_from_spec(spec)
|
|
17
|
+
sys.modules[spec.name] = module
|
|
17
18
|
spec.loader.exec_module(module)
|
|
18
19
|
return module
|
|
19
20
|
|
|
@@ -109,7 +110,7 @@ def test_bfcl_loader_supports_category_groups(
|
|
|
109
110
|
bfcl = load_bfcl_module()
|
|
110
111
|
seen_harness_categories = []
|
|
111
112
|
|
|
112
|
-
def
|
|
113
|
+
def fake_load_tasks(test_category: str, **kwargs: object):
|
|
113
114
|
_ = kwargs
|
|
114
115
|
return [{"question": test_category, "answer": "a"}]
|
|
115
116
|
|
|
@@ -118,7 +119,7 @@ def test_bfcl_loader_supports_category_groups(
|
|
|
118
119
|
seen_harness_categories.append(config.test_category)
|
|
119
120
|
return vf.Harness(config=config)
|
|
120
121
|
|
|
121
|
-
monkeypatch.setattr(bfcl
|
|
122
|
+
monkeypatch.setattr(bfcl, "load_tasks", fake_load_tasks)
|
|
122
123
|
monkeypatch.setattr(bfcl, "load_harness", fake_harness)
|
|
123
124
|
|
|
124
125
|
env = bfcl.load_environment(
|
|
@@ -131,7 +132,7 @@ def test_bfcl_loader_supports_category_groups(
|
|
|
131
132
|
)
|
|
132
133
|
)
|
|
133
134
|
|
|
134
|
-
assert isinstance(env,
|
|
135
|
+
assert isinstance(env, vf.EnvGroup)
|
|
135
136
|
assert env.env_names == ["simple_python", "simple_java"]
|
|
136
137
|
seen_taskset_categories = [item.taskset.config.test_category for item in env.envs]
|
|
137
138
|
assert seen_taskset_categories == ["simple_python", "simple_java"]
|