verifiers 0.1.15.dev5__tar.gz → 0.1.15.dev7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/PKG-INFO +14 -8
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/README.md +13 -7
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_eval_cli.py +51 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_eval_display.py +16 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_eval_utils.py +16 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_langchain_deep_agents_wikispeedia.py +74 -19
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_mcp_search_env.py +5 -3
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_opencode_harbor.py +2 -2
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_path_utils.py +14 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_save_utils.py +4 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_v1_bfcl.py +18 -10
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_v1_config_extension.py +181 -29
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_v1_group_reward_env.py +8 -3
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_v1_rlm_swe.py +3 -3
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/__init__.py +1 -1
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/clients/openai_chat_completions_client.py +3 -24
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/clients/openai_completions_client.py +5 -2
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/environment.py +4 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/scripts/eval.py +5 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/scripts/init.py +77 -15
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/serve/types.py +13 -8
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/types.py +4 -2
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/utils/eval_display.py +25 -9
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/utils/eval_utils.py +30 -16
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/utils/path_utils.py +9 -3
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/utils/response_utils.py +29 -3
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/utils/save_utils.py +1 -3
- verifiers-0.1.15.dev7/verifiers/v1/ENVIRONMENT_BEST_PRACTICES.md +252 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/v1/README.md +21 -37
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/v1/RE_MIGRATION.md +4 -4
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/v1/config.py +66 -27
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/v1/utils/config_utils.py +24 -1
- verifiers-0.1.15.dev5/verifiers/v1/ENVIRONMENT_BEST_PRACTICES.md +0 -73
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/.gitignore +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/LICENSE +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/pyproject.toml +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/AGENTS.md +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/README.md +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/__init__.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/conftest.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_browser_env.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_build_script.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_cli_agent_env.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_client_auth_errors.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_client_config.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_client_multimodal_types.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_composable_env.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_context_token_metrics.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_decorator_ranks.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_endpoint_registry.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_env_group.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_env_server.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_environment.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_environment_extra.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_envs.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_error_chain.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_gepa_cli.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_gepa_utils.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_gym_env.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_harbor_env_mcp.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_imports.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_install_utils.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_interception_utils.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_lean_task.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_logging.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_math_rubric.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_maybe_think_parser.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_message_utils.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_message_utils_multimodal.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_multiturn_env.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_nemorl_client.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_openai_chat_completions_token_client.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_openai_responses_client.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_opencode_rlm_env.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_openenv_client.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_parser.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_per_turn_timing.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_pricing_utils.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_prime_plugin.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_renderer_client.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_renderer_e2e.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_rlm_composable_env.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_rlm_env.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_rubric.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_rubric_group.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_sandbox_env.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_sandbox_mixin.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_setup_script.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_singleturn_env.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_stateful_tool_env.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_think_parser.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_tool_env.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_tool_utils.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_trajectory_processing.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_tui_info_formatting.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_types.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_v1_empty_completions.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_v1_endpoint_protocols.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_v1_example_counts.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_v1_harbor_cli.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_v1_mini_swe_agent.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_v1_runtime_lifecycle.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_v1_scoring_functions.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_v1_taskset_bindings.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_wordle_env.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_xml_parser.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/AGENTS.md +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/cli/__init__.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/cli/commands/__init__.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/cli/commands/build.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/cli/commands/eval.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/cli/commands/gepa.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/cli/commands/init.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/cli/commands/install.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/cli/commands/setup.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/cli/plugins/__init__.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/cli/plugins/prime.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/cli/tui.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/clients/__init__.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/clients/anthropic_messages_client.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/clients/client.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/clients/nemorl_chat_completions_client.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/clients/openai_chat_completions_token_client.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/clients/openai_responses_client.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/clients/renderer_client.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/decorators.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/AGENTS.md +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/__init__.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/env_group.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/experimental/README.md +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/experimental/__init__.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/experimental/cli_agent_env.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/experimental/composable/README.md +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/experimental/composable/__init__.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/experimental/composable/_filter.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/experimental/composable/composable_env.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/experimental/composable/harness.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/experimental/composable/harnesses/__init__.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/experimental/composable/harnesses/mini_swe_agent.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/experimental/composable/harnesses/opencode.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/experimental/composable/harnesses/prompt.txt +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/experimental/composable/harnesses/rlm.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/experimental/composable/swe_debug_env.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/experimental/composable/task.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/experimental/composable/tasksets/__init__.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/experimental/composable/tasksets/cp/__init__.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/experimental/composable/tasksets/cp/cp_task.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/experimental/composable/tasksets/cp/test_utils.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/experimental/composable/tasksets/harbor/__init__.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/experimental/composable/tasksets/harbor/harbor.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/experimental/composable/tasksets/lean/__init__.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/experimental/composable/tasksets/lean/lean_task.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/experimental/composable/tasksets/math/__init__.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/experimental/composable/tasksets/math/math_task.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/experimental/composable/tasksets/swe/__init__.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/experimental/composable/tasksets/swe/_test_patch.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/experimental/composable/tasksets/swe/create_fix_patch.sh +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/experimental/composable/tasksets/swe/log_parser.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/experimental/composable/tasksets/swe/multi_swe.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/experimental/composable/tasksets/swe/openswe.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/experimental/composable/tasksets/swe/r2e_gym.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/experimental/composable/tasksets/swe/swe_bench.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/experimental/composable/tasksets/swe/swe_lego.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/experimental/composable/tasksets/swe/swe_rebench_v2.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/experimental/composable/tasksets/swe/swe_rebench_v2_log_parsers.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/experimental/composable/tasksets/swe/swe_smith.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/experimental/composable/tasksets/swe/swe_tasksets.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/experimental/gym_env.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/experimental/harbor_env/__init__.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/experimental/harbor_env/env.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/experimental/harbor_env/mcp.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/experimental/mcp_env.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/experimental/opencode_env.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/experimental/opencode_qa_env.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/experimental/opencode_rlm_env.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/experimental/rlm_env.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/experimental/sandbox_mixin.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/experimental/utils/__init__.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/experimental/utils/file_locks.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/experimental/utils/git_checkout_cache.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/integrations/README.md +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/integrations/__init__.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/integrations/browser_env/README.md +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/integrations/browser_env/__init__.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/integrations/browser_env/browser_env.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/integrations/browser_env/modes/__init__.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/integrations/browser_env/modes/base.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/integrations/browser_env/modes/cua_mode.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/integrations/browser_env/modes/dom_mode.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/integrations/openenv_env.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/integrations/reasoninggym_env.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/integrations/textarena_env.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/multiturn_env.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/python_env.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/sandbox_env.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/singleturn_env.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/stateful_tool_env.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/envs/tool_env.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/errors.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/gepa/__init__.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/gepa/adapter.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/gepa/config.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/gepa/display.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/gepa/gepa_utils.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/parsers/__init__.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/parsers/maybe_think_parser.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/parsers/parser.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/parsers/think_parser.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/parsers/xml_parser.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/rl/README.md +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/rl/__init__.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/rl/inference/__init__.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/rl/inference/client.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/rl/inference/server.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/rl/trainer/__init__.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/rl/trainer/config.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/rl/trainer/orchestrator.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/rl/trainer/trainer.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/rl/trainer/utils.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/rubrics/__init__.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/rubrics/experimental/hybrid_math_rubric.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/rubrics/judge_rubric.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/rubrics/math_rubric.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/rubrics/rubric.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/rubrics/rubric_group.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/scripts/__init__.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/scripts/build.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/scripts/gepa.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/scripts/install.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/scripts/rl.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/scripts/setup.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/scripts/train.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/scripts/tui.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/scripts/vllm.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/serve/__init__.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/serve/client/env_client.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/serve/client/zmq_env_client.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/serve/server/__init__.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/serve/server/env_router.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/serve/server/env_server.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/serve/server/env_worker.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/serve/server/zmq_env_server.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/utils/__init__.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/utils/async_utils.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/utils/client_utils.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/utils/config_utils.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/utils/data_utils.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/utils/display_utils.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/utils/env_config_utils.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/utils/env_utils.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/utils/error_utils.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/utils/heartbeat.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/utils/import_utils.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/utils/install_utils.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/utils/interception_utils.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/utils/logging_utils.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/utils/message_utils.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/utils/metric_utils.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/utils/pricing_utils.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/utils/process_utils.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/utils/serve_utils.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/utils/thread_utils.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/utils/threaded_sandbox_client.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/utils/tool_utils.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/utils/tunnel_utils.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/utils/usage_utils.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/utils/version_utils.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/v1/__init__.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/v1/env.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/v1/harness.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/v1/packages/__init__.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/v1/packages/harnesses/__init__.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/v1/packages/harnesses/command.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/v1/packages/harnesses/configs.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/v1/packages/harnesses/mini_swe_agent.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/v1/packages/harnesses/opencode.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/v1/packages/harnesses/pi.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/v1/packages/harnesses/rlm.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/v1/packages/harnesses/terminus_2.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/v1/packages/tasksets/__init__.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/v1/packages/tasksets/harbor.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/v1/runtime.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/v1/state.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/v1/task.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/v1/taskset.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/v1/toolset.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/v1/types.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/v1/user.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/v1/utils/__init__.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/v1/utils/artifact_utils.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/v1/utils/binding_utils.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/v1/utils/config_callable_utils.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/v1/utils/endpoint_utils.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/v1/utils/json_utils.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/v1/utils/judge_utils.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/v1/utils/lifecycle_utils.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/v1/utils/mcp_proxy_utils.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/v1/utils/mcp_utils.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/v1/utils/object_utils.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/v1/utils/program_utils.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/v1/utils/prompt_utils.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/v1/utils/runtime_registry.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/v1/utils/sandbox_program_utils.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/v1/utils/sandbox_utils.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/v1/utils/scoring_utils.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/v1/utils/serialization_utils.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/v1/utils/task_freeze_utils.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/v1/utils/taskset_utils.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/v1/utils/timing_utils.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/v1/utils/tool_utils.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/v1/utils/trajectory_utils.py +0 -0
- {verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/verifiers/v1/utils/usage_utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: verifiers
|
|
3
|
-
Version: 0.1.15.
|
|
3
|
+
Version: 0.1.15.dev7
|
|
4
4
|
Summary: Verifiers: Environments for LLM Reinforcement Learning
|
|
5
5
|
Project-URL: Homepage, https://github.com/primeintellect-ai/verifiers
|
|
6
6
|
Project-URL: Documentation, https://github.com/primeintellect-ai/verifiers
|
|
@@ -174,6 +174,10 @@ Environments built with Verifiers are self-contained Python modules. To initiali
|
|
|
174
174
|
```bash
|
|
175
175
|
prime env init my-env # creates a new template in ./environments/my_env
|
|
176
176
|
```
|
|
177
|
+
Add an explicit harness loader when the environment owns harness behavior:
|
|
178
|
+
```bash
|
|
179
|
+
prime env init my-env --with-harness
|
|
180
|
+
```
|
|
177
181
|
For OpenEnv integration, use:
|
|
178
182
|
```bash
|
|
179
183
|
prime env init my-openenv --openenv
|
|
@@ -191,7 +195,9 @@ environments/my_env/
|
|
|
191
195
|
└── README.md # Documentation
|
|
192
196
|
```
|
|
193
197
|
|
|
194
|
-
Environment modules should expose a `load_environment` function which returns an
|
|
198
|
+
Environment modules should expose a `load_environment` function which returns an
|
|
199
|
+
environment object. For simple legacy environments, this can still be a direct
|
|
200
|
+
constructor:
|
|
195
201
|
```python
|
|
196
202
|
# my_env.py
|
|
197
203
|
import verifiers as vf
|
|
@@ -223,7 +229,7 @@ def source():
|
|
|
223
229
|
async def contains_answer(task, state) -> float:
|
|
224
230
|
return float(task["answer"] in str(state.get("completion") or ""))
|
|
225
231
|
|
|
226
|
-
def load_taskset(config: vf.TasksetConfig
|
|
232
|
+
def load_taskset(config: vf.TasksetConfig):
|
|
227
233
|
return vf.Taskset(source=source, rewards=[contains_answer], config=config)
|
|
228
234
|
|
|
229
235
|
def load_environment(config: vf.EnvConfig) -> vf.Env:
|
|
@@ -244,8 +250,8 @@ env = vf.Env(
|
|
|
244
250
|
```
|
|
245
251
|
|
|
246
252
|
The same environment package is the unit used by evals and `prime-rl`. The
|
|
247
|
-
trainer owns model, endpoint, sampling, and rollout count; v1-specific
|
|
248
|
-
|
|
253
|
+
trainer owns model, endpoint, sampling, and rollout count; v1-specific options
|
|
254
|
+
stay on the taskset or harness config that owns them:
|
|
249
255
|
|
|
250
256
|
```toml
|
|
251
257
|
# configs/rl/my-v1-env.toml
|
|
@@ -260,12 +266,12 @@ max_tokens = 4096
|
|
|
260
266
|
[[env]]
|
|
261
267
|
id = "my-env"
|
|
262
268
|
|
|
263
|
-
[env.args]
|
|
264
|
-
arg1 = "non-th-arg"
|
|
265
|
-
|
|
266
269
|
[env.harness]
|
|
267
270
|
max_turns = 1
|
|
268
271
|
|
|
272
|
+
[env.taskset]
|
|
273
|
+
split = "train"
|
|
274
|
+
|
|
269
275
|
[env.taskset.scoring.contains_answer]
|
|
270
276
|
weight = 1.0
|
|
271
277
|
```
|
|
@@ -99,6 +99,10 @@ Environments built with Verifiers are self-contained Python modules. To initiali
|
|
|
99
99
|
```bash
|
|
100
100
|
prime env init my-env # creates a new template in ./environments/my_env
|
|
101
101
|
```
|
|
102
|
+
Add an explicit harness loader when the environment owns harness behavior:
|
|
103
|
+
```bash
|
|
104
|
+
prime env init my-env --with-harness
|
|
105
|
+
```
|
|
102
106
|
For OpenEnv integration, use:
|
|
103
107
|
```bash
|
|
104
108
|
prime env init my-openenv --openenv
|
|
@@ -116,7 +120,9 @@ environments/my_env/
|
|
|
116
120
|
└── README.md # Documentation
|
|
117
121
|
```
|
|
118
122
|
|
|
119
|
-
Environment modules should expose a `load_environment` function which returns an
|
|
123
|
+
Environment modules should expose a `load_environment` function which returns an
|
|
124
|
+
environment object. For simple legacy environments, this can still be a direct
|
|
125
|
+
constructor:
|
|
120
126
|
```python
|
|
121
127
|
# my_env.py
|
|
122
128
|
import verifiers as vf
|
|
@@ -148,7 +154,7 @@ def source():
|
|
|
148
154
|
async def contains_answer(task, state) -> float:
|
|
149
155
|
return float(task["answer"] in str(state.get("completion") or ""))
|
|
150
156
|
|
|
151
|
-
def load_taskset(config: vf.TasksetConfig
|
|
157
|
+
def load_taskset(config: vf.TasksetConfig):
|
|
152
158
|
return vf.Taskset(source=source, rewards=[contains_answer], config=config)
|
|
153
159
|
|
|
154
160
|
def load_environment(config: vf.EnvConfig) -> vf.Env:
|
|
@@ -169,8 +175,8 @@ env = vf.Env(
|
|
|
169
175
|
```
|
|
170
176
|
|
|
171
177
|
The same environment package is the unit used by evals and `prime-rl`. The
|
|
172
|
-
trainer owns model, endpoint, sampling, and rollout count; v1-specific
|
|
173
|
-
|
|
178
|
+
trainer owns model, endpoint, sampling, and rollout count; v1-specific options
|
|
179
|
+
stay on the taskset or harness config that owns them:
|
|
174
180
|
|
|
175
181
|
```toml
|
|
176
182
|
# configs/rl/my-v1-env.toml
|
|
@@ -185,12 +191,12 @@ max_tokens = 4096
|
|
|
185
191
|
[[env]]
|
|
186
192
|
id = "my-env"
|
|
187
193
|
|
|
188
|
-
[env.args]
|
|
189
|
-
arg1 = "non-th-arg"
|
|
190
|
-
|
|
191
194
|
[env.harness]
|
|
192
195
|
max_turns = 1
|
|
193
196
|
|
|
197
|
+
[env.taskset]
|
|
198
|
+
split = "train"
|
|
199
|
+
|
|
194
200
|
[env.taskset.scoring.contains_answer]
|
|
195
201
|
weight = 1.0
|
|
196
202
|
```
|
|
@@ -13,6 +13,7 @@ import verifiers.scripts.eval as vf_eval
|
|
|
13
13
|
import verifiers.utils.eval_utils
|
|
14
14
|
from verifiers.types import GenerateOutputs
|
|
15
15
|
from verifiers.utils.eval_utils import load_toml_config
|
|
16
|
+
from verifiers.utils.path_utils import get_eval_results_path
|
|
16
17
|
from verifiers.utils.save_utils import states_to_outputs
|
|
17
18
|
|
|
18
19
|
|
|
@@ -706,6 +707,34 @@ def test_load_toml_config_multi_env():
|
|
|
706
707
|
assert result[1]["env_id"] == "env2"
|
|
707
708
|
|
|
708
709
|
|
|
710
|
+
def test_load_toml_config_duplicate_envs_accept_names():
|
|
711
|
+
"""Duplicate env ids can be labeled and configured independently."""
|
|
712
|
+
with tempfile.NamedTemporaryFile(suffix=".toml", delete=False, mode="w") as f:
|
|
713
|
+
f.write(
|
|
714
|
+
'[[eval]]\nid = "env1"\nname = "env1-short"\n'
|
|
715
|
+
"[eval.args]\n"
|
|
716
|
+
'split = "short"\n\n'
|
|
717
|
+
'[[eval]]\nid = "env1"\nname = "env1-long"\n'
|
|
718
|
+
"[eval.args]\n"
|
|
719
|
+
'split = "long"\n'
|
|
720
|
+
)
|
|
721
|
+
f.flush()
|
|
722
|
+
result = load_toml_config(Path(f.name))
|
|
723
|
+
|
|
724
|
+
assert len(result) == 2
|
|
725
|
+
assert [config["env_id"] for config in result] == ["env1", "env1"]
|
|
726
|
+
assert [config["name"] for config in result] == ["env1-short", "env1-long"]
|
|
727
|
+
assert [config["env_args"]["split"] for config in result] == ["short", "long"]
|
|
728
|
+
|
|
729
|
+
|
|
730
|
+
def test_load_toml_config_rejects_global_name():
|
|
731
|
+
with tempfile.NamedTemporaryFile(suffix=".toml", delete=False, mode="w") as f:
|
|
732
|
+
f.write('name = "shared-name"\n\n[[eval]]\nid = "env1"\n')
|
|
733
|
+
f.flush()
|
|
734
|
+
with pytest.raises(ValueError, match="Invalid global field"):
|
|
735
|
+
load_toml_config(Path(f.name))
|
|
736
|
+
|
|
737
|
+
|
|
709
738
|
def test_load_toml_config_with_env_args():
|
|
710
739
|
"""Multiple sections with env_args field loads correctly."""
|
|
711
740
|
with tempfile.NamedTemporaryFile(suffix=".toml", delete=False, mode="w") as f:
|
|
@@ -815,6 +844,28 @@ def test_cli_multi_env_via_toml_config(monkeypatch, run_cli):
|
|
|
815
844
|
assert configs[1].env_id == "env2"
|
|
816
845
|
|
|
817
846
|
|
|
847
|
+
def test_cli_duplicate_env_names_disambiguate_result_paths(monkeypatch, run_cli):
|
|
848
|
+
with tempfile.NamedTemporaryFile(suffix=".toml", delete=False, mode="w") as f:
|
|
849
|
+
f.write(
|
|
850
|
+
'[[eval]]\nid = "env1"\nname = "env1-short"\n'
|
|
851
|
+
"[eval.args]\n"
|
|
852
|
+
'split = "short"\n\n'
|
|
853
|
+
'[[eval]]\nid = "env1"\nname = "env1-long"\n'
|
|
854
|
+
"[eval.args]\n"
|
|
855
|
+
'split = "long"\n'
|
|
856
|
+
)
|
|
857
|
+
f.flush()
|
|
858
|
+
captured = run_cli(monkeypatch, {"env_id_or_config": f.name})
|
|
859
|
+
|
|
860
|
+
configs = captured["configs"]
|
|
861
|
+
assert len(configs) == 2
|
|
862
|
+
assert [config.env_id for config in configs] == ["env1", "env1"]
|
|
863
|
+
assert [config.name for config in configs] == ["env1-short", "env1-long"]
|
|
864
|
+
assert [config.env_args["split"] for config in configs] == ["short", "long"]
|
|
865
|
+
assert get_eval_results_path(configs[0]).parent.name.startswith("env1-short--")
|
|
866
|
+
assert get_eval_results_path(configs[1]).parent.name.startswith("env1-long--")
|
|
867
|
+
|
|
868
|
+
|
|
818
869
|
def test_cli_toml_ignores_cli_args(monkeypatch, run_cli):
|
|
819
870
|
"""TOML config ignores CLI args, uses defaults for unspecified values."""
|
|
820
871
|
with tempfile.NamedTemporaryFile(suffix=".toml", delete=False, mode="w") as f:
|
|
@@ -11,9 +11,11 @@ def make_config(
|
|
|
11
11
|
independent_scoring: bool = False,
|
|
12
12
|
endpoint_id: str | None = None,
|
|
13
13
|
client_config: ClientConfig | None = None,
|
|
14
|
+
name: str | None = None,
|
|
14
15
|
) -> EvalConfig:
|
|
15
16
|
return EvalConfig(
|
|
16
17
|
env_id="dummy-env",
|
|
18
|
+
name=name,
|
|
17
19
|
env_args={},
|
|
18
20
|
env_dir_path="./environments",
|
|
19
21
|
endpoint_id=endpoint_id,
|
|
@@ -82,6 +84,20 @@ def test_format_client_target_uses_single_resolved_base_url() -> None:
|
|
|
82
84
|
assert EvalDisplay._format_client_target(config) == "http://localhost:8001/v1"
|
|
83
85
|
|
|
84
86
|
|
|
87
|
+
def test_display_uses_eval_name_for_duplicate_env_labels() -> None:
|
|
88
|
+
display = EvalDisplay(
|
|
89
|
+
[
|
|
90
|
+
make_config(max_concurrent=1, name="dummy-env-short"),
|
|
91
|
+
make_config(max_concurrent=1, name="dummy-env-long"),
|
|
92
|
+
]
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
rendered = render_plain(display._make_compact_env_row(0))
|
|
96
|
+
|
|
97
|
+
assert "dummy-env-short" in rendered
|
|
98
|
+
assert "dummy-env-long" not in rendered
|
|
99
|
+
|
|
100
|
+
|
|
85
101
|
def render_plain(renderable) -> str:
|
|
86
102
|
console = Console(width=100, record=True)
|
|
87
103
|
console.print(renderable)
|
|
@@ -87,6 +87,22 @@ def test_print_results_single_rollout(capsys, make_metadata, make_state, make_in
|
|
|
87
87
|
assert "r1: [0.1, 0.2, 0.3]" in captured.out
|
|
88
88
|
|
|
89
89
|
|
|
90
|
+
def test_print_results_includes_eval_name(capsys, make_metadata, make_output):
|
|
91
|
+
from verifiers.utils.eval_utils import print_results
|
|
92
|
+
|
|
93
|
+
metadata = make_metadata(env_id="env1")
|
|
94
|
+
metadata["name"] = "env1-short"
|
|
95
|
+
results = GenerateOutputs(
|
|
96
|
+
outputs=[make_output(example_id=0, reward=1.0)],
|
|
97
|
+
metadata=metadata,
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
print_results(results)
|
|
101
|
+
captured = capsys.readouterr()
|
|
102
|
+
|
|
103
|
+
assert "Environment: env1-short (env1)" in captured.out
|
|
104
|
+
|
|
105
|
+
|
|
90
106
|
def test_print_results_three_rollouts(capsys, make_metadata, make_state, make_input):
|
|
91
107
|
"""Test print_results with three rollouts per example."""
|
|
92
108
|
from verifiers.utils.eval_utils import print_results
|
{verifiers-0.1.15.dev5 → verifiers-0.1.15.dev7}/tests/test_langchain_deep_agents_wikispeedia.py
RENAMED
|
@@ -57,7 +57,7 @@ def test_wikispeedia_loads_as_v1_taskset_harness(
|
|
|
57
57
|
) -> None:
|
|
58
58
|
module = load_module(monkeypatch)
|
|
59
59
|
|
|
60
|
-
env = module.load_environment(config=
|
|
60
|
+
env = module.load_environment(config=module.WikispeediaEnvConfig())
|
|
61
61
|
|
|
62
62
|
assert isinstance(env, vf.Env)
|
|
63
63
|
assert isinstance(env.taskset, vf.Taskset)
|
|
@@ -65,6 +65,43 @@ def test_wikispeedia_loads_as_v1_taskset_harness(
|
|
|
65
65
|
assert env.taskset.taskset_id == "langchain-deep-agents-wikispeedia"
|
|
66
66
|
|
|
67
67
|
|
|
68
|
+
def test_wikispeedia_env_config_reaches_taskset_and_harness(
|
|
69
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
70
|
+
) -> None:
|
|
71
|
+
module = load_module(monkeypatch)
|
|
72
|
+
wiki = make_small_wiki(module)
|
|
73
|
+
monkeypatch.setattr(module, "load_wiki_graph", lambda cache_dir=None: wiki)
|
|
74
|
+
|
|
75
|
+
env = module.load_environment(
|
|
76
|
+
config=module.WikispeediaEnvConfig(
|
|
77
|
+
taskset={
|
|
78
|
+
"train_size": 2,
|
|
79
|
+
"eval_size": 1,
|
|
80
|
+
"min_path_length": 1,
|
|
81
|
+
"max_path_length": 1,
|
|
82
|
+
"eval_target_fraction": 0.5,
|
|
83
|
+
"allow_go_back": False,
|
|
84
|
+
"links_only": True,
|
|
85
|
+
"max_turns": 7,
|
|
86
|
+
},
|
|
87
|
+
harness={
|
|
88
|
+
"max_turns": 8,
|
|
89
|
+
"timeout_seconds": 9.0,
|
|
90
|
+
},
|
|
91
|
+
)
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
train_rows = list(env.taskset.source())
|
|
95
|
+
eval_rows = list(env.taskset.eval_source())
|
|
96
|
+
|
|
97
|
+
assert len(train_rows) == 2
|
|
98
|
+
assert len(eval_rows) == 1
|
|
99
|
+
assert train_rows[0]["max_turns"] == 7
|
|
100
|
+
assert env.harness.config.max_turns == 8
|
|
101
|
+
assert env.harness.config.timeout_seconds == 9.0
|
|
102
|
+
assert [tool.__name__ for tool in env.taskset.toolsets[0].tools] == ["click_link"]
|
|
103
|
+
|
|
104
|
+
|
|
68
105
|
def test_wikispeedia_rows_use_v1_task_shape(
|
|
69
106
|
monkeypatch: pytest.MonkeyPatch,
|
|
70
107
|
) -> None:
|
|
@@ -90,11 +127,13 @@ def test_wikispeedia_taskset_sources_use_disjoint_target_split(
|
|
|
90
127
|
wiki = make_small_wiki(module)
|
|
91
128
|
monkeypatch.setattr(module, "load_wiki_graph", lambda cache_dir=None: wiki)
|
|
92
129
|
taskset = module.load_taskset(
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
130
|
+
config=module.WikispeediaTasksetConfig(
|
|
131
|
+
train_size=2,
|
|
132
|
+
eval_size=1,
|
|
133
|
+
min_path_length=1,
|
|
134
|
+
max_path_length=1,
|
|
135
|
+
eval_target_fraction=0.5,
|
|
136
|
+
)
|
|
98
137
|
)
|
|
99
138
|
|
|
100
139
|
train_rows = list(taskset.source())
|
|
@@ -114,8 +153,12 @@ def test_wikispeedia_efficiency_weight_uses_fresh_reward_wrapper(
|
|
|
114
153
|
wiki = make_small_wiki(module)
|
|
115
154
|
monkeypatch.setattr(module, "load_wiki_graph", lambda cache_dir=None: wiki)
|
|
116
155
|
|
|
117
|
-
weighted = module.load_taskset(
|
|
118
|
-
|
|
156
|
+
weighted = module.load_taskset(
|
|
157
|
+
config=module.WikispeediaTasksetConfig(efficiency_weight=0.5)
|
|
158
|
+
)
|
|
159
|
+
plain = module.load_taskset(
|
|
160
|
+
config=module.WikispeediaTasksetConfig(efficiency_weight=0.0)
|
|
161
|
+
)
|
|
119
162
|
|
|
120
163
|
assert any(fn.__name__ == "path_efficiency" for fn in weighted.rewards)
|
|
121
164
|
assert any(fn is module.path_efficiency for fn in plain.metrics)
|
|
@@ -127,13 +170,17 @@ def test_wikispeedia_taskset_owns_navigation_tools(
|
|
|
127
170
|
) -> None:
|
|
128
171
|
module = load_module(monkeypatch)
|
|
129
172
|
|
|
130
|
-
taskset = module.load_taskset(
|
|
173
|
+
taskset = module.load_taskset(
|
|
174
|
+
config=module.WikispeediaTasksetConfig(allow_go_back=True)
|
|
175
|
+
)
|
|
131
176
|
names = [tool.__name__ for tool in taskset.toolsets[0].tools]
|
|
132
|
-
no_back = module.load_taskset(
|
|
177
|
+
no_back = module.load_taskset(
|
|
178
|
+
config=module.WikispeediaTasksetConfig(allow_go_back=False)
|
|
179
|
+
)
|
|
133
180
|
|
|
134
181
|
assert names == ["click_link", "go_back"]
|
|
135
182
|
assert [tool.__name__ for tool in no_back.toolsets[0].tools] == ["click_link"]
|
|
136
|
-
assert module.load_harness().toolsets == []
|
|
183
|
+
assert module.load_harness(config=module.WikispeediaHarnessConfig()).toolsets == []
|
|
137
184
|
|
|
138
185
|
|
|
139
186
|
def test_wikispeedia_system_prompt_matches_available_tools(
|
|
@@ -141,8 +188,12 @@ def test_wikispeedia_system_prompt_matches_available_tools(
|
|
|
141
188
|
) -> None:
|
|
142
189
|
module = load_module(monkeypatch)
|
|
143
190
|
|
|
144
|
-
with_back = module.load_taskset(
|
|
145
|
-
|
|
191
|
+
with_back = module.load_taskset(
|
|
192
|
+
config=module.WikispeediaTasksetConfig(allow_go_back=True)
|
|
193
|
+
)
|
|
194
|
+
without_back = module.load_taskset(
|
|
195
|
+
config=module.WikispeediaTasksetConfig(allow_go_back=False)
|
|
196
|
+
)
|
|
146
197
|
|
|
147
198
|
assert "go_back" in with_back.system_prompt[0]["content"]
|
|
148
199
|
assert "go_back" not in without_back.system_prompt[0]["content"]
|
|
@@ -156,12 +207,16 @@ async def test_wikispeedia_tools_resolve_through_v1_runtime(
|
|
|
156
207
|
module = load_module(monkeypatch)
|
|
157
208
|
wiki = make_small_wiki(module)
|
|
158
209
|
monkeypatch.setattr(module, "load_wiki_graph", lambda cache_dir=None: wiki)
|
|
159
|
-
env =
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
210
|
+
env = vf.Env(
|
|
211
|
+
taskset=module.load_taskset(
|
|
212
|
+
config=module.WikispeediaTasksetConfig(
|
|
213
|
+
train_size=2,
|
|
214
|
+
eval_size=1,
|
|
215
|
+
min_path_length=1,
|
|
216
|
+
max_path_length=1,
|
|
217
|
+
)
|
|
218
|
+
),
|
|
219
|
+
harness=module.load_harness(config=module.WikispeediaHarnessConfig()),
|
|
165
220
|
)
|
|
166
221
|
task = module.vf.Task(list(env.taskset.source())[0]).freeze()
|
|
167
222
|
state = module.vf.State.for_task(task)
|
|
@@ -26,7 +26,9 @@ def _load_mcp_search_module() -> Any:
|
|
|
26
26
|
def test_mcp_search_env_is_v1_only() -> None:
|
|
27
27
|
module = _load_mcp_search_module()
|
|
28
28
|
|
|
29
|
-
env = module.load_environment(
|
|
29
|
+
env = module.load_environment(
|
|
30
|
+
config=module.MCPSearchEnvConfig(taskset={"max_turns": 4})
|
|
31
|
+
)
|
|
30
32
|
|
|
31
33
|
assert isinstance(env, vf.Env)
|
|
32
34
|
assert isinstance(env.taskset, vf.Taskset)
|
|
@@ -40,7 +42,7 @@ def test_mcp_search_env_is_v1_only() -> None:
|
|
|
40
42
|
def test_mcp_search_default_taskset_has_stable_non_doc_fixture() -> None:
|
|
41
43
|
module = _load_mcp_search_module()
|
|
42
44
|
|
|
43
|
-
rows = module.load_taskset().rows()
|
|
45
|
+
rows = module.load_taskset(config=module.MCPSearchTasksetConfig()).rows()
|
|
44
46
|
|
|
45
47
|
assert len(rows) >= 10
|
|
46
48
|
assert len({row["answer"] for row in rows}) == len(rows)
|
|
@@ -52,7 +54,7 @@ def test_mcp_search_taskset_accepts_v1_taskset_config() -> None:
|
|
|
52
54
|
module = _load_mcp_search_module()
|
|
53
55
|
|
|
54
56
|
env = module.load_environment(
|
|
55
|
-
config=
|
|
57
|
+
config=module.MCPSearchEnvConfig(taskset={"max_turns": 3}),
|
|
56
58
|
)
|
|
57
59
|
rows = env.taskset.rows()
|
|
58
60
|
|
|
@@ -28,7 +28,7 @@ def _load_opencode_module() -> Any:
|
|
|
28
28
|
def test_load_environment_uses_v1_taskset_and_harness() -> None:
|
|
29
29
|
module = _load_opencode_module()
|
|
30
30
|
|
|
31
|
-
env = module.load_environment(config=
|
|
31
|
+
env = module.load_environment(config=module.OpenCodeHarborEnvConfig())
|
|
32
32
|
|
|
33
33
|
assert isinstance(env, vf.Env)
|
|
34
34
|
assert isinstance(env.taskset, vf.HarborTaskset)
|
|
@@ -52,7 +52,7 @@ def test_load_environment_accepts_v1_taskset_and_harness_config() -> None:
|
|
|
52
52
|
module = _load_opencode_module()
|
|
53
53
|
|
|
54
54
|
env = module.load_environment(
|
|
55
|
-
config=
|
|
55
|
+
config=module.OpenCodeHarborEnvConfig(
|
|
56
56
|
taskset={
|
|
57
57
|
"task_names": ["task-a"],
|
|
58
58
|
"cpu_cores": 1.5,
|
|
@@ -3,6 +3,7 @@ from pathlib import Path
|
|
|
3
3
|
|
|
4
4
|
from verifiers.utils.path_utils import (
|
|
5
5
|
find_latest_incomplete_eval_results_path,
|
|
6
|
+
get_eval_runs_dir,
|
|
6
7
|
is_valid_eval_results_path,
|
|
7
8
|
)
|
|
8
9
|
|
|
@@ -69,6 +70,19 @@ def test_find_latest_incomplete_eval_results_path_returns_none_when_no_match(
|
|
|
69
70
|
assert result is None
|
|
70
71
|
|
|
71
72
|
|
|
73
|
+
def test_get_eval_runs_dir_uses_name_as_result_label(tmp_path: Path):
|
|
74
|
+
runs_dir = get_eval_runs_dir(
|
|
75
|
+
env_id="dummy-env",
|
|
76
|
+
name="dummy-env-short",
|
|
77
|
+
model="openai/gpt-4.1-mini",
|
|
78
|
+
output_dir=str(tmp_path / "outputs"),
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
assert runs_dir == (
|
|
82
|
+
tmp_path / "outputs" / "evals" / "dummy-env-short--openai--gpt-4.1-mini"
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
|
|
72
86
|
def test_is_valid_eval_results_path_requires_files(tmp_path: Path):
|
|
73
87
|
run_dir = tmp_path / "run"
|
|
74
88
|
run_dir.mkdir()
|
|
@@ -32,6 +32,7 @@ from verifiers.utils.save_utils import (
|
|
|
32
32
|
make_serializable,
|
|
33
33
|
save_new_outputs,
|
|
34
34
|
states_to_outputs,
|
|
35
|
+
truncate_malformed_trailing_line,
|
|
35
36
|
validate_resume_metadata,
|
|
36
37
|
)
|
|
37
38
|
from verifiers.utils.usage_utils import StateUsageTracker, response_usage_tokens
|
|
@@ -488,6 +489,9 @@ class TestSaveNewOutputs:
|
|
|
488
489
|
"\n".join(lines + [malformed_trailing_line]), encoding="utf-8"
|
|
489
490
|
)
|
|
490
491
|
|
|
492
|
+
# Caller drops the partial trailing row before appending so the new
|
|
493
|
+
# row lands on a valid JSONL boundary.
|
|
494
|
+
truncate_malformed_trailing_line(outputs_path)
|
|
491
495
|
save_new_outputs(
|
|
492
496
|
[{"example_id": 3, "label": "row-3"}],
|
|
493
497
|
results_path,
|
|
@@ -75,12 +75,12 @@ def test_bfcl_public_loader_is_v1_only(monkeypatch: pytest.MonkeyPatch) -> None:
|
|
|
75
75
|
seen_taskset_config: vf.TasksetConfig | None = None
|
|
76
76
|
seen_harness_config: vf.HarnessConfig | None = None
|
|
77
77
|
|
|
78
|
-
def fake_taskset(config: vf.TasksetConfig
|
|
78
|
+
def fake_taskset(config: vf.TasksetConfig) -> vf.Taskset:
|
|
79
79
|
nonlocal seen_taskset_config
|
|
80
80
|
seen_taskset_config = config
|
|
81
81
|
return vf.Taskset(source=[], config=config)
|
|
82
82
|
|
|
83
|
-
def fake_harness(config: vf.HarnessConfig
|
|
83
|
+
def fake_harness(config: vf.HarnessConfig) -> vf.Harness:
|
|
84
84
|
nonlocal seen_harness_config
|
|
85
85
|
seen_harness_config = config
|
|
86
86
|
return vf.Harness(config=config)
|
|
@@ -89,9 +89,13 @@ def test_bfcl_public_loader_is_v1_only(monkeypatch: pytest.MonkeyPatch) -> None:
|
|
|
89
89
|
monkeypatch.setattr(bfcl, "load_harness", fake_harness)
|
|
90
90
|
|
|
91
91
|
env = bfcl.load_environment(
|
|
92
|
-
config=
|
|
93
|
-
|
|
94
|
-
|
|
92
|
+
config=bfcl.BFCLEnvConfig(
|
|
93
|
+
taskset=bfcl.BFCLTasksetConfig(
|
|
94
|
+
test_category="simple_python",
|
|
95
|
+
examples_per_category=0,
|
|
96
|
+
),
|
|
97
|
+
harness=bfcl.BFCLHarnessConfig(),
|
|
98
|
+
)
|
|
95
99
|
)
|
|
96
100
|
|
|
97
101
|
assert isinstance(env, vf.Env)
|
|
@@ -110,12 +114,12 @@ def test_bfcl_loader_supports_category_groups(
|
|
|
110
114
|
seen_taskset_categories = []
|
|
111
115
|
seen_harness_categories = []
|
|
112
116
|
|
|
113
|
-
def fake_taskset(config: vf.TasksetConfig
|
|
117
|
+
def fake_taskset(config: vf.TasksetConfig) -> vf.Taskset:
|
|
114
118
|
assert isinstance(config, bfcl.BFCLTasksetConfig)
|
|
115
119
|
seen_taskset_categories.append(config.test_category)
|
|
116
120
|
return vf.Taskset(source=[{"question": "q", "answer": "a"}], config=config)
|
|
117
121
|
|
|
118
|
-
def fake_harness(config: vf.HarnessConfig
|
|
122
|
+
def fake_harness(config: vf.HarnessConfig) -> vf.Harness:
|
|
119
123
|
assert isinstance(config, bfcl.BFCLHarnessConfig)
|
|
120
124
|
seen_harness_categories.append(config.test_category)
|
|
121
125
|
return vf.Harness(config=config)
|
|
@@ -124,9 +128,13 @@ def test_bfcl_loader_supports_category_groups(
|
|
|
124
128
|
monkeypatch.setattr(bfcl, "load_harness", fake_harness)
|
|
125
129
|
|
|
126
130
|
env = bfcl.load_environment(
|
|
127
|
-
config=
|
|
128
|
-
|
|
129
|
-
|
|
131
|
+
config=bfcl.BFCLEnvConfig(
|
|
132
|
+
taskset=bfcl.BFCLTasksetConfig(
|
|
133
|
+
test_categories=["simple_python", "simple_java"],
|
|
134
|
+
examples_per_category=0,
|
|
135
|
+
),
|
|
136
|
+
harness=bfcl.BFCLHarnessConfig(),
|
|
137
|
+
)
|
|
130
138
|
)
|
|
131
139
|
|
|
132
140
|
assert isinstance(env, root_vf.EnvGroup)
|