verifiers 0.1.15.dev14__tar.gz → 0.1.15.dev15__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/PKG-INFO +14 -8
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/README.md +13 -7
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_eval_cli.py +23 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_init_script.py +12 -10
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_v1_config_extension.py +243 -2
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/__init__.py +4 -1
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/scripts/eval.py +7 -6
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/scripts/init.py +9 -12
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/utils/env_utils.py +85 -23
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/utils/eval_utils.py +20 -7
- verifiers-0.1.15.dev15/verifiers/v1/ENVIRONMENT_BEST_PRACTICES.md +186 -0
- verifiers-0.1.15.dev15/verifiers/v1/README.md +333 -0
- verifiers-0.1.15.dev15/verifiers/v1/RE_MIGRATION.md +420 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/v1/__init__.py +2 -1
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/v1/harness.py +4 -3
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/v1/utils/prompt_utils.py +69 -26
- verifiers-0.1.15.dev14/verifiers/v1/ENVIRONMENT_BEST_PRACTICES.md +0 -279
- verifiers-0.1.15.dev14/verifiers/v1/README.md +0 -1716
- verifiers-0.1.15.dev14/verifiers/v1/RE_MIGRATION.md +0 -490
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/.gitignore +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/LICENSE +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/pyproject.toml +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/AGENTS.md +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/README.md +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/__init__.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/conftest.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_browser_env.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_build_script.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_cli_agent_env.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_client_auth_errors.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_client_config.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_client_multimodal_types.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_composable_env.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_context_token_metrics.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_decorator_ranks.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_endpoint_registry.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_env_group.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_env_server.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_environment.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_environment_extra.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_envs.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_error_chain.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_eval_display.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_eval_utils.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_gepa_cli.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_gepa_utils.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_gym_env.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_harbor_env_mcp.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_imports.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_install_utils.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_interception_utils.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_langchain_deep_agents_wikispeedia.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_lean_task.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_logging.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_math_rubric.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_maybe_think_parser.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_mcp_search_env.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_message_utils.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_message_utils_multimodal.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_multiturn_env.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_nemorl_client.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_openai_chat_completions_token_client.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_openai_responses_client.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_opencode_harbor.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_opencode_rlm_env.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_openenv_client.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_parser.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_path_utils.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_per_turn_timing.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_pricing_utils.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_prime_plugin.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_renderer_client.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_renderer_e2e.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_rlm_composable_env.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_rlm_env.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_rubric.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_rubric_group.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_sandbox_env.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_sandbox_mixin.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_save_utils.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_setup_script.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_singleturn_env.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_stateful_tool_env.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_think_parser.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_tool_env.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_tool_utils.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_trajectory_processing.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_tui_info_formatting.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_types.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_v1_bfcl.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_v1_empty_completions.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_v1_endpoint_protocols.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_v1_example_counts.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_v1_group_reward_env.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_v1_harbor_cli.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_v1_mini_swe_agent.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_v1_nemo_gym_harness.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_v1_openenv_taskset.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_v1_openreward_taskset.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_v1_rlm_swe.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_v1_runtime_lifecycle.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_v1_scoring_functions.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_v1_taskset_bindings.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_v1_textarena_taskset.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_wiki_search_v1.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_wordle_env.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_wordle_v1_env.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/tests/test_xml_parser.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/AGENTS.md +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/cli/__init__.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/cli/commands/__init__.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/cli/commands/build.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/cli/commands/eval.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/cli/commands/gepa.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/cli/commands/init.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/cli/commands/install.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/cli/commands/setup.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/cli/plugins/__init__.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/cli/plugins/prime.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/cli/tui.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/clients/__init__.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/clients/anthropic_messages_client.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/clients/client.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/clients/nemorl_chat_completions_client.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/clients/openai_chat_completions_client.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/clients/openai_chat_completions_token_client.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/clients/openai_completions_client.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/clients/openai_responses_client.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/clients/renderer_client.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/decorators.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/AGENTS.md +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/__init__.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/env_group.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/environment.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/experimental/README.md +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/experimental/__init__.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/experimental/cli_agent_env.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/experimental/composable/README.md +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/experimental/composable/__init__.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/experimental/composable/_filter.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/experimental/composable/composable_env.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/experimental/composable/harness.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/experimental/composable/harnesses/__init__.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/experimental/composable/harnesses/mini_swe_agent.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/experimental/composable/harnesses/opencode.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/experimental/composable/harnesses/prompt.txt +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/experimental/composable/harnesses/rlm.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/experimental/composable/swe_debug_env.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/experimental/composable/task.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/experimental/composable/tasksets/__init__.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/experimental/composable/tasksets/cp/__init__.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/experimental/composable/tasksets/cp/cp_task.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/experimental/composable/tasksets/cp/test_utils.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/experimental/composable/tasksets/harbor/__init__.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/experimental/composable/tasksets/harbor/harbor.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/experimental/composable/tasksets/lean/__init__.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/experimental/composable/tasksets/lean/lean_task.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/experimental/composable/tasksets/math/__init__.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/experimental/composable/tasksets/math/math_task.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/experimental/composable/tasksets/swe/__init__.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/experimental/composable/tasksets/swe/_test_patch.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/experimental/composable/tasksets/swe/create_fix_patch.sh +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/experimental/composable/tasksets/swe/log_parser.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/experimental/composable/tasksets/swe/multi_swe.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/experimental/composable/tasksets/swe/openswe.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/experimental/composable/tasksets/swe/r2e_gym.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/experimental/composable/tasksets/swe/swe_bench.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/experimental/composable/tasksets/swe/swe_lego.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/experimental/composable/tasksets/swe/swe_rebench_v2.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/experimental/composable/tasksets/swe/swe_rebench_v2_log_parsers.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/experimental/composable/tasksets/swe/swe_smith.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/experimental/composable/tasksets/swe/swe_tasksets.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/experimental/gym_env.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/experimental/harbor_env/__init__.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/experimental/harbor_env/env.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/experimental/harbor_env/mcp.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/experimental/mcp_env.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/experimental/opencode_env.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/experimental/opencode_qa_env.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/experimental/opencode_rlm_env.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/experimental/rlm_env.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/experimental/sandbox_mixin.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/experimental/utils/__init__.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/experimental/utils/file_locks.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/experimental/utils/git_checkout_cache.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/integrations/README.md +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/integrations/__init__.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/integrations/browser_env/README.md +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/integrations/browser_env/__init__.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/integrations/browser_env/browser_env.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/integrations/browser_env/modes/__init__.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/integrations/browser_env/modes/base.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/integrations/browser_env/modes/cua_mode.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/integrations/browser_env/modes/dom_mode.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/integrations/openenv_env.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/integrations/reasoninggym_env.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/integrations/textarena_env.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/multiturn_env.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/python_env.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/sandbox_env.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/singleturn_env.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/stateful_tool_env.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/envs/tool_env.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/errors.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/gepa/__init__.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/gepa/adapter.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/gepa/config.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/gepa/display.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/gepa/gepa_utils.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/parsers/__init__.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/parsers/maybe_think_parser.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/parsers/parser.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/parsers/think_parser.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/parsers/xml_parser.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/rl/README.md +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/rl/__init__.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/rl/inference/__init__.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/rl/inference/client.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/rl/inference/server.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/rl/trainer/__init__.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/rl/trainer/config.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/rl/trainer/orchestrator.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/rl/trainer/trainer.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/rl/trainer/utils.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/rubrics/__init__.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/rubrics/experimental/hybrid_math_rubric.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/rubrics/judge_rubric.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/rubrics/math_rubric.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/rubrics/rubric.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/rubrics/rubric_group.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/scripts/__init__.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/scripts/build.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/scripts/gepa.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/scripts/install.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/scripts/rl.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/scripts/setup.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/scripts/train.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/scripts/tui.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/scripts/vllm.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/serve/__init__.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/serve/client/env_client.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/serve/client/zmq_env_client.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/serve/server/__init__.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/serve/server/env_router.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/serve/server/env_server.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/serve/server/env_worker.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/serve/server/zmq_env_server.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/serve/types.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/types.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/utils/__init__.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/utils/async_utils.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/utils/client_utils.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/utils/config_utils.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/utils/data_utils.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/utils/display_utils.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/utils/env_config_utils.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/utils/error_utils.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/utils/eval_display.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/utils/heartbeat.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/utils/import_utils.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/utils/install_utils.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/utils/interception_utils.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/utils/logging_utils.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/utils/message_utils.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/utils/metric_utils.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/utils/path_utils.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/utils/pricing_utils.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/utils/process_utils.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/utils/response_utils.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/utils/save_utils.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/utils/serve_utils.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/utils/thread_utils.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/utils/threaded_sandbox_client.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/utils/tool_utils.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/utils/usage_utils.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/utils/version_utils.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/v1/artifact.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/v1/config.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/v1/env.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/v1/model.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/v1/program.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/v1/runtime.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/v1/runtime_handles.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/v1/sandbox.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/v1/state.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/v1/task.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/v1/taskset.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/v1/toolset.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/v1/types.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/v1/user.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/v1/utils/__init__.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/v1/utils/binding_utils.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/v1/utils/config_callable_utils.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/v1/utils/config_utils.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/v1/utils/endpoint_utils.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/v1/utils/json_utils.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/v1/utils/judge_utils.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/v1/utils/lifecycle_utils.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/v1/utils/mcp_proxy_utils.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/v1/utils/mcp_utils.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/v1/utils/object_utils.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/v1/utils/program_utils.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/v1/utils/runtime_owner_utils.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/v1/utils/runtime_registry.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/v1/utils/sandbox_program_utils.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/v1/utils/sandbox_python_utils.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/v1/utils/sandbox_utils.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/v1/utils/scoring_utils.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/v1/utils/serialization_utils.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/v1/utils/task_freeze_utils.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/v1/utils/taskset_utils.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/v1/utils/tool_utils.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/v1/utils/toolset_utils.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/v1/utils/trajectory_utils.py +0 -0
- {verifiers-0.1.15.dev14 → verifiers-0.1.15.dev15}/verifiers/v1/utils/usage_utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: verifiers
|
|
3
|
-
Version: 0.1.15.
|
|
3
|
+
Version: 0.1.15.dev15
|
|
4
4
|
Summary: Verifiers: Environments for LLM Reinforcement Learning
|
|
5
5
|
Project-URL: Homepage, https://github.com/primeintellect-ai/verifiers
|
|
6
6
|
Project-URL: Documentation, https://github.com/primeintellect-ai/verifiers
|
|
@@ -234,7 +234,7 @@ import verifiers as vf
|
|
|
234
234
|
|
|
235
235
|
|
|
236
236
|
class MyTasksetConfig(vf.TasksetConfig):
|
|
237
|
-
|
|
237
|
+
system_prompt: vf.SystemPrompt = "Reverse text exactly."
|
|
238
238
|
|
|
239
239
|
|
|
240
240
|
class MyTaskset(vf.Taskset[MyTasksetConfig]):
|
|
@@ -247,7 +247,7 @@ class MyTaskset(vf.Taskset[MyTasksetConfig]):
|
|
|
247
247
|
"max_turns": 1,
|
|
248
248
|
}
|
|
249
249
|
]
|
|
250
|
-
return [row for row in rows if row["split"] ==
|
|
250
|
+
return [row for row in rows if row["split"] == split]
|
|
251
251
|
|
|
252
252
|
@vf.reward(weight=1.0)
|
|
253
253
|
async def contains_answer(self, task, state) -> float:
|
|
@@ -259,14 +259,20 @@ def load_taskset(config: MyTasksetConfig) -> MyTaskset:
|
|
|
259
259
|
|
|
260
260
|
|
|
261
261
|
def load_environment(config: vf.EnvConfig) -> vf.Env:
|
|
262
|
-
|
|
262
|
+
"""Loader pattern for all Taskset/Harness environments."""
|
|
263
|
+
return vf.Env(
|
|
264
|
+
taskset=vf.load_taskset(config=config.taskset),
|
|
265
|
+
harness=vf.load_harness(config=config.harness),
|
|
266
|
+
)
|
|
263
267
|
```
|
|
264
|
-
|
|
268
|
+
The child loader annotation defines the taskset config shape; root
|
|
269
|
+
`load_environment` stays typed as `vf.EnvConfig`. See
|
|
265
270
|
**[BYO Harness](docs/byo-harness.md)** for the advanced v1 taskset/harness API.
|
|
266
271
|
Reusable taskset and harness packages live in `tasksets` and `harnesses`.
|
|
267
272
|
Install them with `uv add "verifiers[packages]"`, or with the narrower
|
|
268
|
-
`verifiers[tasksets]
|
|
269
|
-
task directories can run through the bundled OpenCode CLI
|
|
273
|
+
`verifiers[tasksets]`, `verifiers[harnesses]`, and backend-specific extras. For
|
|
274
|
+
example, Harbor task directories can run through the bundled OpenCode CLI
|
|
275
|
+
harness with:
|
|
270
276
|
|
|
271
277
|
```python
|
|
272
278
|
from harnesses import OpenCode, OpenCodeConfig
|
|
@@ -299,7 +305,7 @@ id = "my-env"
|
|
|
299
305
|
max_turns = 1
|
|
300
306
|
|
|
301
307
|
[env.taskset]
|
|
302
|
-
|
|
308
|
+
system_prompt = "Reverse text exactly."
|
|
303
309
|
|
|
304
310
|
[env.taskset.scoring.contains_answer]
|
|
305
311
|
weight = 1.0
|
|
@@ -145,7 +145,7 @@ import verifiers as vf
|
|
|
145
145
|
|
|
146
146
|
|
|
147
147
|
class MyTasksetConfig(vf.TasksetConfig):
|
|
148
|
-
|
|
148
|
+
system_prompt: vf.SystemPrompt = "Reverse text exactly."
|
|
149
149
|
|
|
150
150
|
|
|
151
151
|
class MyTaskset(vf.Taskset[MyTasksetConfig]):
|
|
@@ -158,7 +158,7 @@ class MyTaskset(vf.Taskset[MyTasksetConfig]):
|
|
|
158
158
|
"max_turns": 1,
|
|
159
159
|
}
|
|
160
160
|
]
|
|
161
|
-
return [row for row in rows if row["split"] ==
|
|
161
|
+
return [row for row in rows if row["split"] == split]
|
|
162
162
|
|
|
163
163
|
@vf.reward(weight=1.0)
|
|
164
164
|
async def contains_answer(self, task, state) -> float:
|
|
@@ -170,14 +170,20 @@ def load_taskset(config: MyTasksetConfig) -> MyTaskset:
|
|
|
170
170
|
|
|
171
171
|
|
|
172
172
|
def load_environment(config: vf.EnvConfig) -> vf.Env:
|
|
173
|
-
|
|
173
|
+
"""Loader pattern for all Taskset/Harness environments."""
|
|
174
|
+
return vf.Env(
|
|
175
|
+
taskset=vf.load_taskset(config=config.taskset),
|
|
176
|
+
harness=vf.load_harness(config=config.harness),
|
|
177
|
+
)
|
|
174
178
|
```
|
|
175
|
-
|
|
179
|
+
The child loader annotation defines the taskset config shape; root
|
|
180
|
+
`load_environment` stays typed as `vf.EnvConfig`. See
|
|
176
181
|
**[BYO Harness](docs/byo-harness.md)** for the advanced v1 taskset/harness API.
|
|
177
182
|
Reusable taskset and harness packages live in `tasksets` and `harnesses`.
|
|
178
183
|
Install them with `uv add "verifiers[packages]"`, or with the narrower
|
|
179
|
-
`verifiers[tasksets]
|
|
180
|
-
task directories can run through the bundled OpenCode CLI
|
|
184
|
+
`verifiers[tasksets]`, `verifiers[harnesses]`, and backend-specific extras. For
|
|
185
|
+
example, Harbor task directories can run through the bundled OpenCode CLI
|
|
186
|
+
harness with:
|
|
181
187
|
|
|
182
188
|
```python
|
|
183
189
|
from harnesses import OpenCode, OpenCodeConfig
|
|
@@ -210,7 +216,7 @@ id = "my-env"
|
|
|
210
216
|
max_turns = 1
|
|
211
217
|
|
|
212
218
|
[env.taskset]
|
|
213
|
-
|
|
219
|
+
system_prompt = "Reverse text exactly."
|
|
214
220
|
|
|
215
221
|
[env.taskset.scoring.contains_answer]
|
|
216
222
|
weight = 1.0
|
|
@@ -994,6 +994,29 @@ def test_load_toml_config_with_args_taskset_harness():
|
|
|
994
994
|
assert "harness" not in result[0]
|
|
995
995
|
|
|
996
996
|
|
|
997
|
+
def test_load_toml_config_allows_taskset_id_without_env_id():
|
|
998
|
+
with tempfile.NamedTemporaryFile(suffix=".toml", delete=False, mode="w") as f:
|
|
999
|
+
f.write(
|
|
1000
|
+
"[[eval]]\n"
|
|
1001
|
+
"[eval.taskset]\n"
|
|
1002
|
+
'id = "tasksets.harbor"\n'
|
|
1003
|
+
"num_examples = 10\n\n"
|
|
1004
|
+
"[eval.harness]\n"
|
|
1005
|
+
'id = "harnesses.opencode"\n'
|
|
1006
|
+
"max_turns = 5\n"
|
|
1007
|
+
)
|
|
1008
|
+
f.flush()
|
|
1009
|
+
result = load_toml_config(Path(f.name))
|
|
1010
|
+
|
|
1011
|
+
assert result[0]["env_id"] == "tasksets.harbor"
|
|
1012
|
+
assert result[0]["env_args"] == {
|
|
1013
|
+
"config": {
|
|
1014
|
+
"taskset": {"id": "tasksets.harbor", "num_examples": 10},
|
|
1015
|
+
"harness": {"id": "harnesses.opencode", "max_turns": 5},
|
|
1016
|
+
},
|
|
1017
|
+
}
|
|
1018
|
+
|
|
1019
|
+
|
|
997
1020
|
def test_load_toml_config_missing_env_section():
|
|
998
1021
|
"""TOML without [[eval]] section raises ValueError."""
|
|
999
1022
|
with tempfile.NamedTemporaryFile(suffix=".toml", delete=False, mode="w") as f:
|
|
@@ -27,16 +27,18 @@ def test_init_v1_writes_thin_taskset_template(tmp_path: Path) -> None:
|
|
|
27
27
|
|
|
28
28
|
assert "class BarTasksetConfig(vf.TasksetConfig):" in content
|
|
29
29
|
assert "class BarTaskset(vf.Taskset[BarTasksetConfig]):" in content
|
|
30
|
-
assert 'def load_tasks(self, split: vf.TaskSplit = "train") -> vf.Tasks:' in content
|
|
31
30
|
assert (
|
|
32
|
-
"
|
|
31
|
+
'system_prompt: vf.SystemPrompt = "Replace this with the system prompt for bar."'
|
|
33
32
|
in content
|
|
34
33
|
)
|
|
34
|
+
assert 'def load_tasks(self, split: vf.TaskSplit = "train") -> vf.Tasks:' in content
|
|
35
|
+
assert "def load_system_prompt" not in content
|
|
35
36
|
assert "async def correct_answer(self, task: vf.Task, state: vf.State)" in content
|
|
36
37
|
assert "def load_taskset(config: BarTasksetConfig) -> BarTaskset:" in content
|
|
37
38
|
assert "return BarTaskset(config=config)" in content
|
|
38
39
|
assert "taskset=vf.load_taskset(config=config.taskset)" in content
|
|
39
|
-
assert "
|
|
40
|
+
assert '"""Loader pattern for all Taskset/Harness environments."""' in content
|
|
41
|
+
assert "harness=vf.load_harness(config=config.harness)" in content
|
|
40
42
|
assert "class EnvTaskset(" not in content
|
|
41
43
|
assert "_default_" not in content
|
|
42
44
|
assert 'tasks: str = "load_tasks"' not in content
|
|
@@ -49,8 +51,10 @@ def test_init_v1_template_loads_with_vf_load_environment(
|
|
|
49
51
|
init_environment("loadable-v1", path=str(tmp_path), v1=True)
|
|
50
52
|
monkeypatch.syspath_prepend(str(tmp_path / "loadable_v1"))
|
|
51
53
|
|
|
52
|
-
|
|
53
|
-
|
|
54
|
+
env = vf.load_environment("loadable-v1")
|
|
55
|
+
|
|
56
|
+
with pytest.raises(RuntimeError, match="Load tasks"):
|
|
57
|
+
env.get_dataset()
|
|
54
58
|
|
|
55
59
|
|
|
56
60
|
def test_init_v1_with_harness_writes_harness_stub(tmp_path: Path) -> None:
|
|
@@ -92,16 +96,14 @@ def test_init_openenv_writes_v1_taskset_template(tmp_path: Path) -> None:
|
|
|
92
96
|
content = read_env_file(tmp_path, "openenv-sample")
|
|
93
97
|
pyproject = (tmp_path / "openenv_sample" / "pyproject.toml").read_text()
|
|
94
98
|
|
|
95
|
-
assert
|
|
96
|
-
"from tasksets.openenv import OpenEnvTaskset, OpenEnvTasksetConfig" in content
|
|
97
|
-
)
|
|
99
|
+
assert "from tasksets import OpenEnvTaskset, OpenEnvTasksetConfig" in content
|
|
98
100
|
assert (
|
|
99
101
|
"def load_taskset(config: OpenEnvTasksetConfig) -> OpenEnvTaskset:" in content
|
|
100
102
|
)
|
|
101
103
|
assert "taskset=vf.load_taskset(config=config.taskset)" in content
|
|
102
|
-
assert "harness=vf.
|
|
104
|
+
assert "harness=vf.load_harness(config=config.harness)" in content
|
|
103
105
|
assert "vf.OpenEnvEnv" not in content
|
|
104
|
-
assert '"tasksets>=0.1.1"' in pyproject
|
|
106
|
+
assert '"tasksets[openenv]>=0.1.1"' in pyproject
|
|
105
107
|
|
|
106
108
|
|
|
107
109
|
def test_init_openenv_multifile_exports_taskset_loader(tmp_path: Path) -> None:
|
|
@@ -1670,18 +1670,151 @@ def test_system_prompt_direct_string_can_contain_colon() -> None:
|
|
|
1670
1670
|
|
|
1671
1671
|
|
|
1672
1672
|
@pytest.mark.asyncio
|
|
1673
|
-
async def
|
|
1673
|
+
async def test_harness_concats_multiple_system_prompt_sources_by_default() -> None:
|
|
1674
1674
|
taskset = make_taskset(system_prompt="taskset sys")
|
|
1675
1675
|
harness = make_harness(
|
|
1676
1676
|
program={"fn": ref("config_program")}, system_prompt="harness sys"
|
|
1677
1677
|
)
|
|
1678
1678
|
Env(taskset=taskset, harness=harness)
|
|
1679
1679
|
task = next(iter(taskset))
|
|
1680
|
+
state = await harness.setup_state(task, State.for_task(task))
|
|
1681
|
+
|
|
1682
|
+
assert state["system_prompt"] == [
|
|
1683
|
+
{"role": "system", "content": "harness sys"},
|
|
1684
|
+
{"role": "system", "content": "taskset sys"},
|
|
1685
|
+
]
|
|
1686
|
+
|
|
1687
|
+
|
|
1688
|
+
@pytest.mark.asyncio
|
|
1689
|
+
async def test_task_system_prompt_overrides_taskset_side_at_runtime() -> None:
|
|
1690
|
+
taskset = make_taskset(system_prompt="taskset sys")
|
|
1691
|
+
harness = make_harness(program={"fn": ref("config_program")})
|
|
1692
|
+
Env(taskset=taskset, harness=harness)
|
|
1693
|
+
task = Task(
|
|
1694
|
+
{
|
|
1695
|
+
"prompt": [{"role": "user", "content": "hi"}],
|
|
1696
|
+
"system_prompt": "task sys",
|
|
1697
|
+
}
|
|
1698
|
+
).freeze()
|
|
1699
|
+
state = await harness.setup_state(task, State.for_task(task))
|
|
1700
|
+
|
|
1701
|
+
assert state["system_prompt"] == [{"role": "system", "content": "task sys"}]
|
|
1702
|
+
|
|
1703
|
+
|
|
1704
|
+
@pytest.mark.asyncio
|
|
1705
|
+
async def test_task_override_is_resolved_before_harness_concat() -> None:
|
|
1706
|
+
taskset = make_taskset(system_prompt="taskset sys")
|
|
1707
|
+
harness = make_harness(
|
|
1708
|
+
program={"fn": ref("config_program")}, system_prompt="harness sys"
|
|
1709
|
+
)
|
|
1710
|
+
Env(taskset=taskset, harness=harness)
|
|
1711
|
+
task = Task(
|
|
1712
|
+
{
|
|
1713
|
+
"prompt": [{"role": "user", "content": "hi"}],
|
|
1714
|
+
"system_prompt": "task sys",
|
|
1715
|
+
}
|
|
1716
|
+
).freeze()
|
|
1717
|
+
state = await harness.setup_state(task, State.for_task(task))
|
|
1680
1718
|
|
|
1681
|
-
|
|
1719
|
+
assert state["system_prompt"] == [
|
|
1720
|
+
{"role": "system", "content": "harness sys"},
|
|
1721
|
+
{"role": "system", "content": "task sys"},
|
|
1722
|
+
]
|
|
1723
|
+
|
|
1724
|
+
|
|
1725
|
+
@pytest.mark.asyncio
|
|
1726
|
+
async def test_system_prompt_strategy_can_concat_taskset_side_first() -> None:
|
|
1727
|
+
taskset = make_taskset(system_prompt="taskset sys")
|
|
1728
|
+
harness = make_harness(
|
|
1729
|
+
program={"fn": ref("config_program")},
|
|
1730
|
+
system_prompt="harness sys",
|
|
1731
|
+
system_prompt_strategy="TH",
|
|
1732
|
+
)
|
|
1733
|
+
Env(taskset=taskset, harness=harness)
|
|
1734
|
+
task = next(iter(taskset))
|
|
1735
|
+
state = await harness.setup_state(task, State.for_task(task))
|
|
1736
|
+
|
|
1737
|
+
assert state["system_prompt"] == [
|
|
1738
|
+
{"role": "system", "content": "taskset sys"},
|
|
1739
|
+
{"role": "system", "content": "harness sys"},
|
|
1740
|
+
]
|
|
1741
|
+
|
|
1742
|
+
|
|
1743
|
+
@pytest.mark.asyncio
|
|
1744
|
+
async def test_harness_can_reject_multiple_system_prompt_sides() -> None:
|
|
1745
|
+
taskset = make_taskset(system_prompt="taskset sys")
|
|
1746
|
+
harness = make_harness(
|
|
1747
|
+
program={"fn": ref("config_program")},
|
|
1748
|
+
system_prompt="harness sys",
|
|
1749
|
+
system_prompt_strategy="REJECT",
|
|
1750
|
+
)
|
|
1751
|
+
Env(taskset=taskset, harness=harness)
|
|
1752
|
+
task = next(iter(taskset))
|
|
1753
|
+
|
|
1754
|
+
with pytest.raises(ValueError, match="Multiple system_prompt sides"):
|
|
1682
1755
|
await harness.setup_state(task, State.for_task(task))
|
|
1683
1756
|
|
|
1684
1757
|
|
|
1758
|
+
@pytest.mark.asyncio
|
|
1759
|
+
async def test_system_prompt_side_selection_uses_resolved_taskset_side() -> None:
|
|
1760
|
+
taskset = make_taskset(system_prompt="taskset sys")
|
|
1761
|
+
harness = make_harness(
|
|
1762
|
+
program={"fn": ref("config_program")},
|
|
1763
|
+
system_prompt="harness sys",
|
|
1764
|
+
system_prompt_strategy="T_OR_H",
|
|
1765
|
+
)
|
|
1766
|
+
Env(taskset=taskset, harness=harness)
|
|
1767
|
+
task = Task(
|
|
1768
|
+
{
|
|
1769
|
+
"prompt": [{"role": "user", "content": "hi"}],
|
|
1770
|
+
"system_prompt": "task sys",
|
|
1771
|
+
}
|
|
1772
|
+
).freeze()
|
|
1773
|
+
state = await harness.setup_state(task, State.for_task(task))
|
|
1774
|
+
|
|
1775
|
+
assert state["system_prompt"] == [{"role": "system", "content": "task sys"}]
|
|
1776
|
+
|
|
1777
|
+
|
|
1778
|
+
@pytest.mark.asyncio
|
|
1779
|
+
async def test_system_prompt_side_selection_can_prefer_harness() -> None:
|
|
1780
|
+
taskset = make_taskset(system_prompt="taskset sys")
|
|
1781
|
+
harness = make_harness(
|
|
1782
|
+
program={"fn": ref("config_program")},
|
|
1783
|
+
system_prompt="harness sys",
|
|
1784
|
+
system_prompt_strategy="H_OR_T",
|
|
1785
|
+
)
|
|
1786
|
+
Env(taskset=taskset, harness=harness)
|
|
1787
|
+
task = next(iter(taskset))
|
|
1788
|
+
state = await harness.setup_state(task, State.for_task(task))
|
|
1789
|
+
|
|
1790
|
+
assert state["system_prompt"] == [{"role": "system", "content": "harness sys"}]
|
|
1791
|
+
|
|
1792
|
+
|
|
1793
|
+
@pytest.mark.asyncio
|
|
1794
|
+
async def test_system_prompt_strategy_can_select_exact_sides() -> None:
|
|
1795
|
+
taskset = make_taskset(system_prompt="taskset sys")
|
|
1796
|
+
task = Task({"prompt": [{"role": "user", "content": "hi"}]}).freeze()
|
|
1797
|
+
|
|
1798
|
+
harness_t = make_harness(
|
|
1799
|
+
program={"fn": ref("config_program")},
|
|
1800
|
+
system_prompt="harness sys",
|
|
1801
|
+
system_prompt_strategy="T",
|
|
1802
|
+
)
|
|
1803
|
+
Env(taskset=taskset, harness=harness_t)
|
|
1804
|
+
state_t = await harness_t.setup_state(task, State.for_task(task))
|
|
1805
|
+
|
|
1806
|
+
harness_h = make_harness(
|
|
1807
|
+
program={"fn": ref("config_program")},
|
|
1808
|
+
system_prompt="harness sys",
|
|
1809
|
+
system_prompt_strategy="H",
|
|
1810
|
+
)
|
|
1811
|
+
Env(taskset=taskset, harness=harness_h)
|
|
1812
|
+
state_h = await harness_h.setup_state(task, State.for_task(task))
|
|
1813
|
+
|
|
1814
|
+
assert state_t["system_prompt"] == [{"role": "system", "content": "taskset sys"}]
|
|
1815
|
+
assert state_h["system_prompt"] == [{"role": "system", "content": "harness sys"}]
|
|
1816
|
+
|
|
1817
|
+
|
|
1685
1818
|
@pytest.mark.asyncio
|
|
1686
1819
|
async def test_task_max_turns_overrides_harness_default() -> None:
|
|
1687
1820
|
harness = make_harness(max_turns=9)
|
|
@@ -2823,6 +2956,114 @@ def load_environment(config: vf.EnvConfig) -> vf.Env:
|
|
|
2823
2956
|
assert configured.taskset.get_dataset()[0]["answer"] == "configured:train"
|
|
2824
2957
|
|
|
2825
2958
|
|
|
2959
|
+
def test_load_environment_composes_component_package_without_root_loader(
|
|
2960
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
2961
|
+
) -> None:
|
|
2962
|
+
module_name = "component_only_taskset_package"
|
|
2963
|
+
module = types.ModuleType(module_name)
|
|
2964
|
+
|
|
2965
|
+
class LocalTasksetConfig(TasksetConfig):
|
|
2966
|
+
answer: str = "configured"
|
|
2967
|
+
|
|
2968
|
+
class LocalTaskset(Taskset[LocalTasksetConfig]):
|
|
2969
|
+
def load_tasks(self, split: vf.TaskSplit = "train") -> vf.Tasks:
|
|
2970
|
+
return [{"prompt": [], "answer": f"{split}:{self.config.answer}"}]
|
|
2971
|
+
|
|
2972
|
+
def load_taskset(config: LocalTasksetConfig) -> LocalTaskset:
|
|
2973
|
+
return LocalTaskset(config=config)
|
|
2974
|
+
|
|
2975
|
+
module.load_taskset = load_taskset
|
|
2976
|
+
monkeypatch.setitem(sys.modules, module_name, module)
|
|
2977
|
+
|
|
2978
|
+
env = vf.load_environment(
|
|
2979
|
+
"component-only-taskset-package",
|
|
2980
|
+
config={
|
|
2981
|
+
"taskset": {"answer": "composed"},
|
|
2982
|
+
"harness": {"max_turns": 3},
|
|
2983
|
+
},
|
|
2984
|
+
)
|
|
2985
|
+
|
|
2986
|
+
assert env.taskset.get_dataset()[0]["answer"] == "train:composed"
|
|
2987
|
+
assert type(env.harness) is Harness
|
|
2988
|
+
assert env.harness.config.max_turns == 3
|
|
2989
|
+
|
|
2990
|
+
|
|
2991
|
+
def test_load_environment_delegates_missing_child_loaders_by_config_id(
|
|
2992
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
2993
|
+
) -> None:
|
|
2994
|
+
env_module = types.ModuleType("thin_env_package")
|
|
2995
|
+
exec(
|
|
2996
|
+
"""
|
|
2997
|
+
import verifiers as vf
|
|
2998
|
+
|
|
2999
|
+
|
|
3000
|
+
def load_environment(config: vf.EnvConfig) -> vf.Env:
|
|
3001
|
+
return vf.Env(
|
|
3002
|
+
taskset=vf.load_taskset(config=config.taskset),
|
|
3003
|
+
harness=vf.load_harness(config=config.harness),
|
|
3004
|
+
)
|
|
3005
|
+
""",
|
|
3006
|
+
env_module.__dict__,
|
|
3007
|
+
)
|
|
3008
|
+
taskset_module = types.ModuleType("external_taskset_pkg")
|
|
3009
|
+
exec(
|
|
3010
|
+
"""
|
|
3011
|
+
import verifiers as vf
|
|
3012
|
+
|
|
3013
|
+
|
|
3014
|
+
class ExternalTasksetConfig(vf.TasksetConfig):
|
|
3015
|
+
answer: str = "external"
|
|
3016
|
+
|
|
3017
|
+
|
|
3018
|
+
class ExternalTaskset(vf.Taskset[ExternalTasksetConfig]):
|
|
3019
|
+
def load_tasks(self, split: vf.TaskSplit = "train") -> vf.Tasks:
|
|
3020
|
+
return [{"prompt": [], "answer": f"{split}:{self.config.answer}"}]
|
|
3021
|
+
|
|
3022
|
+
|
|
3023
|
+
def load_taskset(config: ExternalTasksetConfig) -> ExternalTaskset:
|
|
3024
|
+
return ExternalTaskset(config=config)
|
|
3025
|
+
""",
|
|
3026
|
+
taskset_module.__dict__,
|
|
3027
|
+
)
|
|
3028
|
+
harness_module = types.ModuleType("external_harness_pkg")
|
|
3029
|
+
exec(
|
|
3030
|
+
"""
|
|
3031
|
+
import verifiers as vf
|
|
3032
|
+
|
|
3033
|
+
|
|
3034
|
+
class ExternalHarnessConfig(vf.HarnessConfig):
|
|
3035
|
+
mode: str = "default"
|
|
3036
|
+
|
|
3037
|
+
|
|
3038
|
+
class ExternalHarness(vf.Harness[ExternalHarnessConfig]):
|
|
3039
|
+
pass
|
|
3040
|
+
|
|
3041
|
+
|
|
3042
|
+
def load_harness(config: ExternalHarnessConfig) -> ExternalHarness:
|
|
3043
|
+
return ExternalHarness(config=config)
|
|
3044
|
+
""",
|
|
3045
|
+
harness_module.__dict__,
|
|
3046
|
+
)
|
|
3047
|
+
monkeypatch.setitem(sys.modules, "thin_env_package", env_module)
|
|
3048
|
+
monkeypatch.setitem(
|
|
3049
|
+
sys.modules, "empty_env_package", types.ModuleType("empty_env_package")
|
|
3050
|
+
)
|
|
3051
|
+
monkeypatch.setitem(sys.modules, "external_taskset_pkg", taskset_module)
|
|
3052
|
+
monkeypatch.setitem(sys.modules, "external_harness_pkg", harness_module)
|
|
3053
|
+
|
|
3054
|
+
config = {
|
|
3055
|
+
"taskset": {"id": "external-taskset-pkg", "answer": "delegated"},
|
|
3056
|
+
"harness": {"id": "external-harness-pkg", "mode": "custom"},
|
|
3057
|
+
}
|
|
3058
|
+
for env_id in ("thin-env-package", "empty-env-package"):
|
|
3059
|
+
env = vf.load_environment(env_id, config=config)
|
|
3060
|
+
|
|
3061
|
+
assert env.taskset.get_dataset()[0]["answer"] == "train:delegated"
|
|
3062
|
+
assert type(env.taskset).__name__ == "ExternalTaskset"
|
|
3063
|
+
assert type(env.harness).__name__ == "ExternalHarness"
|
|
3064
|
+
assert env.harness.config.mode == "custom"
|
|
3065
|
+
|
|
3066
|
+
|
|
2826
3067
|
def test_load_environment_coerces_base_env_config_with_factory_annotations(
|
|
2827
3068
|
monkeypatch: pytest.MonkeyPatch,
|
|
2828
3069
|
) -> None:
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
__version__ = "0.1.15.
|
|
1
|
+
__version__ = "0.1.15.dev15"
|
|
2
2
|
|
|
3
3
|
import importlib
|
|
4
4
|
import os
|
|
@@ -95,6 +95,7 @@ __all__ = [
|
|
|
95
95
|
"SandboxConfig",
|
|
96
96
|
"SystemPrompt",
|
|
97
97
|
"SystemPromptConfig",
|
|
98
|
+
"SystemPromptStrategy",
|
|
98
99
|
"Toolset",
|
|
99
100
|
"ToolLike",
|
|
100
101
|
"ToolsetConfig",
|
|
@@ -232,6 +233,7 @@ _LAZY_IMPORTS = {
|
|
|
232
233
|
"SignalConfig": "verifiers.v1:SignalConfig",
|
|
233
234
|
"SystemPrompt": "verifiers.v1:SystemPrompt",
|
|
234
235
|
"SystemPromptConfig": "verifiers.v1:SystemPromptConfig",
|
|
236
|
+
"SystemPromptStrategy": "verifiers.v1:SystemPromptStrategy",
|
|
235
237
|
"ToolLike": "verifiers.v1:ToolLike",
|
|
236
238
|
"Toolset": "verifiers.v1:Toolset",
|
|
237
239
|
"ToolsetConfig": "verifiers.v1:ToolsetConfig",
|
|
@@ -343,6 +345,7 @@ if TYPE_CHECKING:
|
|
|
343
345
|
SignalConfig,
|
|
344
346
|
SystemPrompt,
|
|
345
347
|
SystemPromptConfig,
|
|
348
|
+
SystemPromptStrategy,
|
|
346
349
|
Task,
|
|
347
350
|
Tasks,
|
|
348
351
|
Taskset,
|
|
@@ -327,11 +327,12 @@ def apply_env_config_cli_overrides(
|
|
|
327
327
|
|
|
328
328
|
module = import_env_module(env_id)
|
|
329
329
|
env_load_func = getattr(module, "load_environment", None)
|
|
330
|
+
config_type: type[EnvConfig] | None
|
|
330
331
|
if env_load_func is None:
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
332
|
+
config_type = EnvConfig
|
|
333
|
+
else:
|
|
334
|
+
sig = inspect.signature(env_load_func)
|
|
335
|
+
config_type = env_config_annotation(env_load_func, sig)
|
|
335
336
|
if config_type is None:
|
|
336
337
|
raise ValueError(
|
|
337
338
|
"Taskset/harness CLI overrides require a v1 loader shaped as "
|
|
@@ -339,7 +340,8 @@ def apply_env_config_cli_overrides(
|
|
|
339
340
|
)
|
|
340
341
|
|
|
341
342
|
merged_env_args = dict(env_args)
|
|
342
|
-
|
|
343
|
+
base_config_data = explicit_config_data(merged_env_args.get("config", {}))
|
|
344
|
+
child_types = env_config_child_types(module, config_type, base_config_data)
|
|
343
345
|
base_config = load_env_config(
|
|
344
346
|
module,
|
|
345
347
|
config_type,
|
|
@@ -356,7 +358,6 @@ def apply_env_config_cli_overrides(
|
|
|
356
358
|
except ConfigFileError as exc:
|
|
357
359
|
raise ValueError(f"Invalid taskset/harness override: {exc}") from exc
|
|
358
360
|
|
|
359
|
-
base_config_data = explicit_config_data(merged_env_args.get("config", {}))
|
|
360
361
|
override_config_data = explicit_config_data(config)
|
|
361
362
|
merged_env_args["config"] = merge_config_data(
|
|
362
363
|
base_config_data,
|
|
@@ -120,7 +120,7 @@ version = "0.1.0"
|
|
|
120
120
|
requires-python = ">=3.10"
|
|
121
121
|
dependencies = [
|
|
122
122
|
"verifiers>={vf.__version__}",
|
|
123
|
-
"tasksets>=0.1.1",
|
|
123
|
+
"tasksets[openenv]>=0.1.1",
|
|
124
124
|
]
|
|
125
125
|
|
|
126
126
|
[build-system]
|
|
@@ -160,13 +160,10 @@ import verifiers as vf
|
|
|
160
160
|
|
|
161
161
|
|
|
162
162
|
class {taskset_config_name}(vf.TasksetConfig):
|
|
163
|
-
|
|
163
|
+
system_prompt: vf.SystemPrompt = "Replace this with the system prompt for {env_id_dash}."
|
|
164
164
|
|
|
165
165
|
|
|
166
166
|
class {taskset_name}(vf.Taskset[{taskset_config_name}]):
|
|
167
|
-
def load_system_prompt(self, config: {taskset_config_name}) -> vf.SystemPrompt:
|
|
168
|
-
raise NotImplementedError("Load the system prompt for {env_id_dash}.")
|
|
169
|
-
|
|
170
167
|
def load_tasks(self, split: vf.TaskSplit = "train") -> vf.Tasks:
|
|
171
168
|
raise NotImplementedError("Load tasks for {env_id_dash}.")
|
|
172
169
|
|
|
@@ -180,9 +177,10 @@ def load_taskset(config: {taskset_config_name}) -> {taskset_name}:
|
|
|
180
177
|
|
|
181
178
|
|
|
182
179
|
def load_environment(config: vf.EnvConfig) -> vf.Env:
|
|
180
|
+
\"\"\"Loader pattern for all Taskset/Harness environments.\"\"\"
|
|
183
181
|
return vf.Env(
|
|
184
182
|
taskset=vf.load_taskset(config=config.taskset),
|
|
185
|
-
harness=vf.
|
|
183
|
+
harness=vf.load_harness(config=config.harness),
|
|
186
184
|
)
|
|
187
185
|
"""
|
|
188
186
|
|
|
@@ -191,13 +189,10 @@ import verifiers as vf
|
|
|
191
189
|
|
|
192
190
|
|
|
193
191
|
class {taskset_config_name}(vf.TasksetConfig):
|
|
194
|
-
|
|
192
|
+
system_prompt: vf.SystemPrompt = "Replace this with the system prompt for {env_id_dash}."
|
|
195
193
|
|
|
196
194
|
|
|
197
195
|
class {taskset_name}(vf.Taskset[{taskset_config_name}]):
|
|
198
|
-
def load_system_prompt(self, config: {taskset_config_name}) -> vf.SystemPrompt:
|
|
199
|
-
raise NotImplementedError("Load the system prompt for {env_id_dash}.")
|
|
200
|
-
|
|
201
196
|
def load_tasks(self, split: vf.TaskSplit = "train") -> vf.Tasks:
|
|
202
197
|
raise NotImplementedError("Load tasks for {env_id_dash}.")
|
|
203
198
|
|
|
@@ -223,6 +218,7 @@ def load_harness(config: {harness_config_name}) -> {harness_name}:
|
|
|
223
218
|
|
|
224
219
|
|
|
225
220
|
def load_environment(config: vf.EnvConfig) -> vf.Env:
|
|
221
|
+
\"\"\"Loader pattern for all Taskset/Harness environments.\"\"\"
|
|
226
222
|
return vf.Env(
|
|
227
223
|
taskset=vf.load_taskset(config=config.taskset),
|
|
228
224
|
harness=vf.load_harness(config=config.harness),
|
|
@@ -231,7 +227,7 @@ def load_environment(config: vf.EnvConfig) -> vf.Env:
|
|
|
231
227
|
|
|
232
228
|
OPENENV_ENVIRONMENT_TEMPLATE = """\
|
|
233
229
|
import verifiers as vf
|
|
234
|
-
from tasksets
|
|
230
|
+
from tasksets import OpenEnvTaskset, OpenEnvTasksetConfig
|
|
235
231
|
|
|
236
232
|
|
|
237
233
|
def load_taskset(config: OpenEnvTasksetConfig) -> OpenEnvTaskset:
|
|
@@ -239,9 +235,10 @@ def load_taskset(config: OpenEnvTasksetConfig) -> OpenEnvTaskset:
|
|
|
239
235
|
|
|
240
236
|
|
|
241
237
|
def load_environment(config: vf.EnvConfig) -> vf.Env:
|
|
238
|
+
\"\"\"Loader pattern for all Taskset/Harness environments.\"\"\"
|
|
242
239
|
return vf.Env(
|
|
243
240
|
taskset=vf.load_taskset(config=config.taskset),
|
|
244
|
-
harness=vf.
|
|
241
|
+
harness=vf.load_harness(config=config.harness),
|
|
245
242
|
)
|
|
246
243
|
"""
|
|
247
244
|
|