verifiers 0.1.15.dev4__tar.gz → 0.1.15.dev5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/PKG-INFO +1 -1
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_v1_harbor_cli.py +57 -1
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/__init__.py +4 -1
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/README.md +4 -2
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/RE_MIGRATION.md +4 -2
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/__init__.py +2 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/config.py +8 -3
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/packages/harnesses/__init__.py +2 -0
- verifiers-0.1.15.dev5/verifiers/v1/packages/harnesses/terminus_2.py +286 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/packages/tasksets/harbor.py +17 -15
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/task.py +3 -1
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/.gitignore +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/LICENSE +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/README.md +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/pyproject.toml +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/AGENTS.md +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/README.md +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/__init__.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/conftest.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_browser_env.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_build_script.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_cli_agent_env.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_client_auth_errors.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_client_config.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_client_multimodal_types.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_composable_env.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_context_token_metrics.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_decorator_ranks.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_endpoint_registry.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_env_group.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_env_server.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_environment.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_environment_extra.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_envs.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_error_chain.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_eval_cli.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_eval_display.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_eval_utils.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_gepa_cli.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_gepa_utils.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_gym_env.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_harbor_env_mcp.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_imports.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_install_utils.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_interception_utils.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_langchain_deep_agents_wikispeedia.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_lean_task.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_logging.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_math_rubric.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_maybe_think_parser.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_mcp_search_env.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_message_utils.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_message_utils_multimodal.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_multiturn_env.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_nemorl_client.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_openai_chat_completions_token_client.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_openai_responses_client.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_opencode_harbor.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_opencode_rlm_env.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_openenv_client.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_parser.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_path_utils.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_per_turn_timing.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_pricing_utils.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_prime_plugin.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_renderer_client.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_renderer_e2e.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_rlm_composable_env.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_rlm_env.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_rubric.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_rubric_group.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_sandbox_env.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_sandbox_mixin.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_save_utils.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_setup_script.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_singleturn_env.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_stateful_tool_env.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_think_parser.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_tool_env.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_tool_utils.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_trajectory_processing.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_tui_info_formatting.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_types.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_v1_bfcl.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_v1_config_extension.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_v1_empty_completions.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_v1_endpoint_protocols.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_v1_example_counts.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_v1_group_reward_env.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_v1_mini_swe_agent.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_v1_rlm_swe.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_v1_runtime_lifecycle.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_v1_scoring_functions.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_v1_taskset_bindings.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_wordle_env.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_xml_parser.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/AGENTS.md +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/cli/__init__.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/cli/commands/__init__.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/cli/commands/build.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/cli/commands/eval.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/cli/commands/gepa.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/cli/commands/init.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/cli/commands/install.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/cli/commands/setup.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/cli/plugins/__init__.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/cli/plugins/prime.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/cli/tui.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/clients/__init__.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/clients/anthropic_messages_client.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/clients/client.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/clients/nemorl_chat_completions_client.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/clients/openai_chat_completions_client.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/clients/openai_chat_completions_token_client.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/clients/openai_completions_client.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/clients/openai_responses_client.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/clients/renderer_client.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/decorators.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/AGENTS.md +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/__init__.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/env_group.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/environment.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/README.md +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/__init__.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/cli_agent_env.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/README.md +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/__init__.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/_filter.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/composable_env.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/harness.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/harnesses/__init__.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/harnesses/mini_swe_agent.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/harnesses/opencode.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/harnesses/prompt.txt +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/harnesses/rlm.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/swe_debug_env.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/task.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/tasksets/__init__.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/tasksets/cp/__init__.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/tasksets/cp/cp_task.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/tasksets/cp/test_utils.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/tasksets/harbor/__init__.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/tasksets/harbor/harbor.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/tasksets/lean/__init__.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/tasksets/lean/lean_task.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/tasksets/math/__init__.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/tasksets/math/math_task.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/tasksets/swe/__init__.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/tasksets/swe/_test_patch.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/tasksets/swe/create_fix_patch.sh +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/tasksets/swe/log_parser.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/tasksets/swe/multi_swe.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/tasksets/swe/openswe.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/tasksets/swe/r2e_gym.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/tasksets/swe/swe_bench.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/tasksets/swe/swe_lego.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/tasksets/swe/swe_rebench_v2.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/tasksets/swe/swe_rebench_v2_log_parsers.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/tasksets/swe/swe_smith.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/composable/tasksets/swe/swe_tasksets.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/gym_env.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/harbor_env/__init__.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/harbor_env/env.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/harbor_env/mcp.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/mcp_env.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/opencode_env.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/opencode_qa_env.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/opencode_rlm_env.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/rlm_env.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/sandbox_mixin.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/utils/__init__.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/utils/file_locks.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/experimental/utils/git_checkout_cache.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/integrations/README.md +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/integrations/__init__.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/integrations/browser_env/README.md +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/integrations/browser_env/__init__.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/integrations/browser_env/browser_env.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/integrations/browser_env/modes/__init__.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/integrations/browser_env/modes/base.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/integrations/browser_env/modes/cua_mode.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/integrations/browser_env/modes/dom_mode.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/integrations/openenv_env.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/integrations/reasoninggym_env.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/integrations/textarena_env.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/multiturn_env.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/python_env.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/sandbox_env.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/singleturn_env.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/stateful_tool_env.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/envs/tool_env.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/errors.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/gepa/__init__.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/gepa/adapter.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/gepa/config.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/gepa/display.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/gepa/gepa_utils.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/parsers/__init__.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/parsers/maybe_think_parser.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/parsers/parser.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/parsers/think_parser.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/parsers/xml_parser.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/rl/README.md +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/rl/__init__.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/rl/inference/__init__.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/rl/inference/client.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/rl/inference/server.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/rl/trainer/__init__.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/rl/trainer/config.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/rl/trainer/orchestrator.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/rl/trainer/trainer.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/rl/trainer/utils.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/rubrics/__init__.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/rubrics/experimental/hybrid_math_rubric.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/rubrics/judge_rubric.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/rubrics/math_rubric.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/rubrics/rubric.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/rubrics/rubric_group.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/scripts/__init__.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/scripts/build.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/scripts/eval.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/scripts/gepa.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/scripts/init.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/scripts/install.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/scripts/rl.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/scripts/setup.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/scripts/train.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/scripts/tui.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/scripts/vllm.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/serve/__init__.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/serve/client/env_client.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/serve/client/zmq_env_client.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/serve/server/__init__.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/serve/server/env_router.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/serve/server/env_server.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/serve/server/env_worker.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/serve/server/zmq_env_server.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/serve/types.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/types.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/utils/__init__.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/utils/async_utils.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/utils/client_utils.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/utils/config_utils.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/utils/data_utils.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/utils/display_utils.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/utils/env_config_utils.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/utils/env_utils.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/utils/error_utils.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/utils/eval_display.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/utils/eval_utils.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/utils/heartbeat.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/utils/import_utils.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/utils/install_utils.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/utils/interception_utils.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/utils/logging_utils.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/utils/message_utils.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/utils/metric_utils.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/utils/path_utils.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/utils/pricing_utils.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/utils/process_utils.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/utils/response_utils.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/utils/save_utils.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/utils/serve_utils.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/utils/thread_utils.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/utils/threaded_sandbox_client.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/utils/tool_utils.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/utils/tunnel_utils.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/utils/usage_utils.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/utils/version_utils.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/ENVIRONMENT_BEST_PRACTICES.md +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/env.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/harness.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/packages/__init__.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/packages/harnesses/command.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/packages/harnesses/configs.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/packages/harnesses/mini_swe_agent.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/packages/harnesses/opencode.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/packages/harnesses/pi.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/packages/harnesses/rlm.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/packages/tasksets/__init__.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/runtime.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/state.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/taskset.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/toolset.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/types.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/user.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/utils/__init__.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/utils/artifact_utils.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/utils/binding_utils.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/utils/config_callable_utils.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/utils/config_utils.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/utils/endpoint_utils.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/utils/json_utils.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/utils/judge_utils.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/utils/lifecycle_utils.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/utils/mcp_proxy_utils.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/utils/mcp_utils.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/utils/object_utils.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/utils/program_utils.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/utils/prompt_utils.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/utils/runtime_registry.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/utils/sandbox_program_utils.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/utils/sandbox_utils.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/utils/scoring_utils.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/utils/serialization_utils.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/utils/task_freeze_utils.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/utils/taskset_utils.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/utils/timing_utils.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/utils/tool_utils.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/utils/trajectory_utils.py +0 -0
- {verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/verifiers/v1/utils/usage_utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: verifiers
|
|
3
|
-
Version: 0.1.15.
|
|
3
|
+
Version: 0.1.15.dev5
|
|
4
4
|
Summary: Verifiers: Environments for LLM Reinforcement Learning
|
|
5
5
|
Project-URL: Homepage, https://github.com/primeintellect-ai/verifiers
|
|
6
6
|
Project-URL: Documentation, https://github.com/primeintellect-ai/verifiers
|
|
@@ -9,10 +9,18 @@ from uuid import uuid4
|
|
|
9
9
|
|
|
10
10
|
import pytest
|
|
11
11
|
|
|
12
|
+
import verifiers as root_vf
|
|
12
13
|
import verifiers.v1 as vf
|
|
13
14
|
from verifiers.v1.packages.harnesses.pi import pi_mcp_json, pi_models_json
|
|
15
|
+
from verifiers.v1.packages.harnesses.terminus_2 import (
|
|
16
|
+
DEFAULT_API_BASE_URL,
|
|
17
|
+
DEFAULT_HARBOR_PACKAGE,
|
|
18
|
+
DEFAULT_MODEL_NAME,
|
|
19
|
+
Terminus2,
|
|
20
|
+
terminus_2_agent_script,
|
|
21
|
+
)
|
|
14
22
|
from verifiers.v1.packages.tasksets.harbor import harbor_reward
|
|
15
|
-
from verifiers.v1.utils.program_utils import merge_task_program
|
|
23
|
+
from verifiers.v1.utils.program_utils import merge_task_program, merge_task_sandbox
|
|
16
24
|
|
|
17
25
|
|
|
18
26
|
def write_harbor_task(root: Path, name: str = "task-a") -> Path:
|
|
@@ -85,6 +93,13 @@ def test_harbor_taskset_loads_package_tasks_with_program_patch(
|
|
|
85
93
|
assert task["sandbox"]["memory_gb"] == 2.0
|
|
86
94
|
assert task["sandbox"]["disk_size_gb"] == 8.0
|
|
87
95
|
assert task["sandbox"]["command_timeout"] == 600
|
|
96
|
+
assert "network_access" not in task["sandbox"]
|
|
97
|
+
assert (
|
|
98
|
+
merge_task_sandbox({"network_access": False, "scope": "rollout"}, task)[
|
|
99
|
+
"network_access"
|
|
100
|
+
]
|
|
101
|
+
is False
|
|
102
|
+
)
|
|
88
103
|
assert task["harbor"]["test_timeout"] == 300.0
|
|
89
104
|
assert task["program"]["files"] == {
|
|
90
105
|
"/task/instruction.md": {"task": "instruction"},
|
|
@@ -200,6 +215,8 @@ def test_packaged_harbor_and_opencode_imports_are_reexported() -> None:
|
|
|
200
215
|
assert vf.OpenCode is OpenCode
|
|
201
216
|
assert vf.OpenCodeConfig is OpenCodeConfig
|
|
202
217
|
assert vf.Pi is Pi
|
|
218
|
+
assert vf.Terminus2 is Terminus2
|
|
219
|
+
assert root_vf.Terminus2 is Terminus2
|
|
203
220
|
assert vf.HarborTaskset is HarborTaskset
|
|
204
221
|
|
|
205
222
|
|
|
@@ -254,6 +271,45 @@ def test_pi_harness_writes_intercepted_model_and_mcp_config() -> None:
|
|
|
254
271
|
assert mcp["mcpServers"]["verifiers-tools"]["command"] == "python3"
|
|
255
272
|
|
|
256
273
|
|
|
274
|
+
def test_terminus_2_harness_builds_sandbox_program() -> None:
|
|
275
|
+
harness = vf.Terminus2(
|
|
276
|
+
system_prompt="extra system prompt",
|
|
277
|
+
agent_workdir="/workspace",
|
|
278
|
+
max_turns=7,
|
|
279
|
+
python_version="3.12",
|
|
280
|
+
)
|
|
281
|
+
program = cast(dict[str, object], harness.program)
|
|
282
|
+
command = cast(list[object], program["command"])
|
|
283
|
+
setup = cast(str, program["setup"])
|
|
284
|
+
files = cast(dict[str, object], program["files"])
|
|
285
|
+
artifacts = cast(dict[str, object], program["artifacts"])
|
|
286
|
+
env = cast(dict[str, object], program.get("env", {}))
|
|
287
|
+
|
|
288
|
+
assert isinstance(harness, vf.Harness)
|
|
289
|
+
assert "/terminus_2/instruction.md" in files
|
|
290
|
+
assert "/terminus_2/system_prompt.txt" in files
|
|
291
|
+
assert "apt-get -o Acquire::Retries=3 update" in setup
|
|
292
|
+
assert "apt-get -o Acquire::Retries=3 install" in setup
|
|
293
|
+
assert "git" not in setup
|
|
294
|
+
assert "terminus_2_log" in artifacts
|
|
295
|
+
assert "OPENAI_MODEL" not in env
|
|
296
|
+
|
|
297
|
+
run_script = cast(str, command[2])
|
|
298
|
+
assert "TERMINUS_2_WORKDIR=/workspace" in run_script
|
|
299
|
+
assert f"--with {DEFAULT_HARBOR_PACKAGE}" in run_script
|
|
300
|
+
assert "git+https://github.com" not in run_script
|
|
301
|
+
assert "max_turns=7" in run_script
|
|
302
|
+
|
|
303
|
+
script = terminus_2_agent_script(max_turns=7)
|
|
304
|
+
compile(script, "terminus_2_agent.py", "exec")
|
|
305
|
+
assert DEFAULT_MODEL_NAME in script
|
|
306
|
+
assert DEFAULT_API_BASE_URL in script
|
|
307
|
+
assert "OPENAI_MODEL" not in script
|
|
308
|
+
assert "PRIME_API_KEY" in script
|
|
309
|
+
assert "async def prepare_logs_for_host(self) -> None" in script
|
|
310
|
+
assert "max_turns=7" in script
|
|
311
|
+
|
|
312
|
+
|
|
257
313
|
def test_task_program_merges_into_command_program_without_collisions() -> None:
|
|
258
314
|
harness = vf.Harness(
|
|
259
315
|
program={
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
__version__ = "0.1.15.
|
|
1
|
+
__version__ = "0.1.15.dev5"
|
|
2
2
|
|
|
3
3
|
import importlib
|
|
4
4
|
import os
|
|
@@ -93,6 +93,7 @@ __all__ = [
|
|
|
93
93
|
"Pi",
|
|
94
94
|
"RLM",
|
|
95
95
|
"RLMConfig",
|
|
96
|
+
"Terminus2",
|
|
96
97
|
"Environment",
|
|
97
98
|
"MultiTurnEnv",
|
|
98
99
|
"SingleTurnEnv",
|
|
@@ -217,6 +218,7 @@ _LAZY_IMPORTS = {
|
|
|
217
218
|
"Pi": "verifiers.v1:Pi",
|
|
218
219
|
"RLM": "verifiers.v1:RLM",
|
|
219
220
|
"RLMConfig": "verifiers.v1:RLMConfig",
|
|
221
|
+
"Terminus2": "verifiers.v1:Terminus2",
|
|
220
222
|
"get_messages": "verifiers.v1:get_messages",
|
|
221
223
|
"add_metric": "verifiers.v1:add_metric",
|
|
222
224
|
"add_reward": "verifiers.v1:add_reward",
|
|
@@ -311,6 +313,7 @@ if TYPE_CHECKING:
|
|
|
311
313
|
ProgramConfig,
|
|
312
314
|
RLM,
|
|
313
315
|
RLMConfig,
|
|
316
|
+
Terminus2,
|
|
314
317
|
SandboxConfig,
|
|
315
318
|
Task,
|
|
316
319
|
TaskRow,
|
|
@@ -537,8 +537,8 @@ signature.
|
|
|
537
537
|
Reusable CLI programs should be packaged as `Harness` subclasses. Package
|
|
538
538
|
implementations live under `verifiers.v1.packages` while the v1 API stabilizes,
|
|
539
539
|
and are re-exported from `verifiers.v1` for normal use. `OpenCode`, `Pi`,
|
|
540
|
-
`MiniSWEAgent`, and `RLM` are bundled `Harness` leaf wrappers for
|
|
541
|
-
coding-agent CLIs.
|
|
540
|
+
`MiniSWEAgent`, `Terminus2`, and `RLM` are bundled `Harness` leaf wrappers for
|
|
541
|
+
common coding-agent CLIs.
|
|
542
542
|
|
|
543
543
|
```python
|
|
544
544
|
import verifiers as vf
|
|
@@ -560,6 +560,8 @@ endpoint and, when tools are enabled, installs the Pi MCP adapter and writes a
|
|
|
560
560
|
project `.mcp.json`. Neither side needs to know the other's private fields.
|
|
561
561
|
`MiniSWEAgent` owns mini-swe-agent installation, config layering, endpoint env,
|
|
562
562
|
and log/trajectory artifacts.
|
|
563
|
+
`Terminus2` owns Harbor Terminus agent installation, endpoint env, and log
|
|
564
|
+
artifacts.
|
|
563
565
|
`RLM` follows the same boundary for recursive LLM runs: `HarborTaskset` owns
|
|
564
566
|
the task directory and tests, while `RLM` owns RLM installation, optional skill
|
|
565
567
|
upload to `/rlm/skills`, endpoint wiring, and trajectory filtering.
|
|
@@ -548,8 +548,8 @@ Use this for:
|
|
|
548
548
|
Migration:
|
|
549
549
|
|
|
550
550
|
1. Use `vf.HarborTaskset` for Harbor-format task directories.
|
|
551
|
-
2. Use `vf.OpenCode()`, `vf.Pi()`, `vf.MiniSWEAgent()`,
|
|
552
|
-
command harness.
|
|
551
|
+
2. Use `vf.OpenCode()`, `vf.Pi()`, `vf.MiniSWEAgent()`, `vf.Terminus2()`, or
|
|
552
|
+
`vf.RLM()` for the command harness.
|
|
553
553
|
3. Put task-owned uploads and sandbox overrides on the taskset.
|
|
554
554
|
4. Keep scoring as reward/metric functions on the taskset.
|
|
555
555
|
|
|
@@ -580,6 +580,8 @@ Gotchas:
|
|
|
580
580
|
adapter setup, and log artifacts.
|
|
581
581
|
- `MiniSWEAgent` owns mini-swe-agent installation, config layering, endpoint
|
|
582
582
|
env, and log/trajectory artifacts.
|
|
583
|
+
- `Terminus2` owns Harbor Terminus agent installation, endpoint env, and log
|
|
584
|
+
artifacts.
|
|
583
585
|
- `RLM` owns RLM installation, optional `/task/rlm-skills` upload, endpoint
|
|
584
586
|
wiring, and trajectory filtering.
|
|
585
587
|
- `task.program` is the merge point for task-owned program files/env/setup.
|
|
@@ -41,6 +41,7 @@ from .packages.harnesses import (
|
|
|
41
41
|
Pi,
|
|
42
42
|
RLM,
|
|
43
43
|
RLMConfig,
|
|
44
|
+
Terminus2,
|
|
44
45
|
)
|
|
45
46
|
from .utils.scoring_utils import (
|
|
46
47
|
add_metric,
|
|
@@ -97,6 +98,7 @@ __all__ = [
|
|
|
97
98
|
"ProgramConfig",
|
|
98
99
|
"RLM",
|
|
99
100
|
"RLMConfig",
|
|
101
|
+
"Terminus2",
|
|
100
102
|
"SandboxConfig",
|
|
101
103
|
"State",
|
|
102
104
|
"Task",
|
|
@@ -367,15 +367,20 @@ class EnvConfig(Config):
|
|
|
367
367
|
return base
|
|
368
368
|
|
|
369
369
|
|
|
370
|
-
def sandbox_config_mapping(
|
|
370
|
+
def sandbox_config_mapping(
|
|
371
|
+
value: object | None, *, fill_defaults: bool = True
|
|
372
|
+
) -> ConfigData | None:
|
|
371
373
|
if value is None:
|
|
372
374
|
return None
|
|
373
375
|
if isinstance(value, SandboxConfig):
|
|
374
|
-
return value.model_dump(exclude_none=True)
|
|
376
|
+
return value.model_dump(exclude_none=True, exclude_unset=not fill_defaults)
|
|
375
377
|
if isinstance(value, Mapping):
|
|
376
378
|
mapping = cast(ConfigMap, value)
|
|
377
379
|
prefer = mapping.get("prefer")
|
|
378
380
|
if prefer is not None and prefer != "program":
|
|
379
381
|
raise ValueError("sandbox.prefer must be 'program'.")
|
|
380
|
-
|
|
382
|
+
sandbox = SandboxConfig.from_config(mapping).model_dump(exclude_none=True)
|
|
383
|
+
if fill_defaults:
|
|
384
|
+
return sandbox
|
|
385
|
+
return {key: sandbox[key] for key in mapping if key in sandbox}
|
|
381
386
|
raise TypeError("Sandbox config must be a mapping.")
|
|
@@ -3,6 +3,7 @@ from .mini_swe_agent import MiniSWEAgent
|
|
|
3
3
|
from .opencode import OpenCode
|
|
4
4
|
from .pi import Pi
|
|
5
5
|
from .rlm import RLM
|
|
6
|
+
from .terminus_2 import Terminus2
|
|
6
7
|
|
|
7
8
|
__all__ = [
|
|
8
9
|
"MiniSWEAgent",
|
|
@@ -11,4 +12,5 @@ __all__ = [
|
|
|
11
12
|
"Pi",
|
|
12
13
|
"RLM",
|
|
13
14
|
"RLMConfig",
|
|
15
|
+
"Terminus2",
|
|
14
16
|
]
|
|
@@ -0,0 +1,286 @@
|
|
|
1
|
+
import shlex
|
|
2
|
+
from pathlib import PurePosixPath
|
|
3
|
+
|
|
4
|
+
from typing_extensions import Unpack
|
|
5
|
+
|
|
6
|
+
from .command import HarnessKwargs, command_program, command_sandbox
|
|
7
|
+
from ...config import SandboxConfig
|
|
8
|
+
from ...harness import Harness
|
|
9
|
+
from ...utils.prompt_utils import (
|
|
10
|
+
state_system_prompt_text,
|
|
11
|
+
task_text as task_instruction_text,
|
|
12
|
+
)
|
|
13
|
+
from ...types import ConfigMap, ProgramMap, ProgramOptionMap, ProgramValue, PromptInput
|
|
14
|
+
|
|
15
|
+
DEFAULT_AGENT_WORKDIR = "/app"
|
|
16
|
+
DEFAULT_INSTRUCTION_PATH = "/terminus_2/instruction.md"
|
|
17
|
+
DEFAULT_SYSTEM_PROMPT_PATH = "/terminus_2/system_prompt.txt"
|
|
18
|
+
DEFAULT_LOG_PATH = "/logs/agent/terminus_2.log"
|
|
19
|
+
DEFAULT_HARBOR_PACKAGE = "harbor==0.6.6"
|
|
20
|
+
DEFAULT_PYTHON_VERSION = "3.12"
|
|
21
|
+
DEFAULT_MODEL_NAME = "openai/gpt-4.1-mini"
|
|
22
|
+
DEFAULT_API_BASE_URL = "https://api.pinference.ai/api/v1"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class Terminus2(Harness):
|
|
26
|
+
def __init__(
|
|
27
|
+
self,
|
|
28
|
+
*,
|
|
29
|
+
agent_workdir: str = DEFAULT_AGENT_WORKDIR,
|
|
30
|
+
instruction_path: str = DEFAULT_INSTRUCTION_PATH,
|
|
31
|
+
system_prompt_path: str = DEFAULT_SYSTEM_PROMPT_PATH,
|
|
32
|
+
log_path: str = DEFAULT_LOG_PATH,
|
|
33
|
+
harbor_package: str = DEFAULT_HARBOR_PACKAGE,
|
|
34
|
+
python_version: str = DEFAULT_PYTHON_VERSION,
|
|
35
|
+
model_name: str = DEFAULT_MODEL_NAME,
|
|
36
|
+
api_base_url: str = DEFAULT_API_BASE_URL,
|
|
37
|
+
system_prompt: PromptInput | None = None,
|
|
38
|
+
sandbox: bool | ConfigMap | SandboxConfig = True,
|
|
39
|
+
program: ProgramMap | None = None,
|
|
40
|
+
max_turns: int | None = 4,
|
|
41
|
+
**kwargs: Unpack[HarnessKwargs],
|
|
42
|
+
):
|
|
43
|
+
files: dict[str, ProgramValue] = {
|
|
44
|
+
instruction_path: task_instruction_text,
|
|
45
|
+
}
|
|
46
|
+
if system_prompt is not None:
|
|
47
|
+
files[system_prompt_path] = state_system_prompt_text
|
|
48
|
+
artifacts: ProgramOptionMap = {
|
|
49
|
+
"terminus_2_log": {
|
|
50
|
+
"path": log_path,
|
|
51
|
+
"format": "text",
|
|
52
|
+
"optional": True,
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
command = [
|
|
56
|
+
"bash",
|
|
57
|
+
"-lc",
|
|
58
|
+
build_terminus_2_run_script(
|
|
59
|
+
agent_workdir=agent_workdir,
|
|
60
|
+
instruction_path=instruction_path,
|
|
61
|
+
system_prompt_path=system_prompt_path
|
|
62
|
+
if system_prompt is not None
|
|
63
|
+
else None,
|
|
64
|
+
log_path=log_path,
|
|
65
|
+
harbor_package=harbor_package,
|
|
66
|
+
python_version=python_version,
|
|
67
|
+
model_name=model_name,
|
|
68
|
+
api_base_url=api_base_url,
|
|
69
|
+
max_turns=max_turns,
|
|
70
|
+
),
|
|
71
|
+
]
|
|
72
|
+
super().__init__(
|
|
73
|
+
program=command_program(
|
|
74
|
+
command=command,
|
|
75
|
+
sandbox=sandbox,
|
|
76
|
+
files=files,
|
|
77
|
+
setup=build_terminus_2_install_script(),
|
|
78
|
+
artifacts=artifacts,
|
|
79
|
+
program=program,
|
|
80
|
+
),
|
|
81
|
+
sandbox=command_sandbox(sandbox),
|
|
82
|
+
system_prompt=system_prompt,
|
|
83
|
+
max_turns=max_turns,
|
|
84
|
+
**kwargs,
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def build_terminus_2_install_script() -> str:
|
|
89
|
+
return """\
|
|
90
|
+
set -e
|
|
91
|
+
apt-get -o Acquire::Retries=3 update -qq
|
|
92
|
+
apt-get -o Acquire::Retries=3 install -y -qq curl ca-certificates > /dev/null 2>&1
|
|
93
|
+
if ! command -v uv >/dev/null 2>&1; then
|
|
94
|
+
curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
95
|
+
fi
|
|
96
|
+
"""
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def build_terminus_2_run_script(
|
|
100
|
+
*,
|
|
101
|
+
agent_workdir: str = DEFAULT_AGENT_WORKDIR,
|
|
102
|
+
instruction_path: str = DEFAULT_INSTRUCTION_PATH,
|
|
103
|
+
system_prompt_path: str | None = DEFAULT_SYSTEM_PROMPT_PATH,
|
|
104
|
+
log_path: str = DEFAULT_LOG_PATH,
|
|
105
|
+
harbor_package: str = DEFAULT_HARBOR_PACKAGE,
|
|
106
|
+
python_version: str = DEFAULT_PYTHON_VERSION,
|
|
107
|
+
model_name: str = DEFAULT_MODEL_NAME,
|
|
108
|
+
api_base_url: str = DEFAULT_API_BASE_URL,
|
|
109
|
+
max_turns: int | None = 4,
|
|
110
|
+
) -> str:
|
|
111
|
+
log_dir = str(PurePosixPath(log_path).parent)
|
|
112
|
+
agent_script = terminus_2_agent_script(
|
|
113
|
+
instruction_path=instruction_path,
|
|
114
|
+
system_prompt_path=system_prompt_path,
|
|
115
|
+
log_dir=log_dir,
|
|
116
|
+
model_name=model_name,
|
|
117
|
+
api_base_url=api_base_url,
|
|
118
|
+
max_turns=max_turns,
|
|
119
|
+
)
|
|
120
|
+
return f"""\
|
|
121
|
+
set -eo pipefail
|
|
122
|
+
export PATH="$HOME/.local/bin:$PATH"
|
|
123
|
+
|
|
124
|
+
TERMINUS_2_WORKDIR="${{AGENT_WORKDIR:-}}"
|
|
125
|
+
if [[ -z "$TERMINUS_2_WORKDIR" ]]; then
|
|
126
|
+
TERMINUS_2_WORKDIR={shlex.quote(agent_workdir)}
|
|
127
|
+
fi
|
|
128
|
+
export AGENT_WORKDIR="$TERMINUS_2_WORKDIR"
|
|
129
|
+
|
|
130
|
+
mkdir -p {shlex.quote(log_dir)} "$TERMINUS_2_WORKDIR"
|
|
131
|
+
cd "$TERMINUS_2_WORKDIR"
|
|
132
|
+
uv --no-config run --no-project --quiet \
|
|
133
|
+
--python {shlex.quote(python_version)} \
|
|
134
|
+
--with {shlex.quote(harbor_package)} \
|
|
135
|
+
python - <<'PY' 2>&1 | tee -a {shlex.quote(log_path)}
|
|
136
|
+
{agent_script}
|
|
137
|
+
PY
|
|
138
|
+
"""
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def terminus_2_agent_script(
|
|
142
|
+
*,
|
|
143
|
+
instruction_path: str = DEFAULT_INSTRUCTION_PATH,
|
|
144
|
+
system_prompt_path: str | None = DEFAULT_SYSTEM_PROMPT_PATH,
|
|
145
|
+
log_dir: str = "/logs/agent",
|
|
146
|
+
model_name: str = DEFAULT_MODEL_NAME,
|
|
147
|
+
api_base_url: str = DEFAULT_API_BASE_URL,
|
|
148
|
+
max_turns: int | None = 4,
|
|
149
|
+
) -> str:
|
|
150
|
+
system_prompt_block = ""
|
|
151
|
+
if system_prompt_path is not None:
|
|
152
|
+
system_prompt_block = f"""\
|
|
153
|
+
system_prompt_path = Path({system_prompt_path!r})
|
|
154
|
+
if system_prompt_path.exists() and system_prompt_path.stat().st_size > 0:
|
|
155
|
+
instruction = system_prompt_path.read_text() + "\\n\\n" + instruction
|
|
156
|
+
"""
|
|
157
|
+
return f"""\
|
|
158
|
+
from __future__ import annotations
|
|
159
|
+
|
|
160
|
+
import asyncio
|
|
161
|
+
import logging
|
|
162
|
+
import os
|
|
163
|
+
import shutil
|
|
164
|
+
import subprocess
|
|
165
|
+
from pathlib import Path
|
|
166
|
+
|
|
167
|
+
from harbor.agents.terminus_2 import Terminus2
|
|
168
|
+
from harbor.environments.base import BaseEnvironment, ExecResult
|
|
169
|
+
from harbor.models.agent.context import AgentContext
|
|
170
|
+
from harbor.models.environment_type import EnvironmentType
|
|
171
|
+
from harbor.models.trial.paths import TrialPaths
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
class LocalEnvironment(BaseEnvironment):
|
|
175
|
+
def __init__(self, workdir: Path, logs_dir: Path):
|
|
176
|
+
self.workdir = workdir
|
|
177
|
+
self.trial_paths = TrialPaths(trial_dir=logs_dir)
|
|
178
|
+
self.trial_paths.mkdir()
|
|
179
|
+
self.default_user = None
|
|
180
|
+
self.session_id = "local"
|
|
181
|
+
self.logger = logging.getLogger(__name__)
|
|
182
|
+
|
|
183
|
+
@staticmethod
|
|
184
|
+
def type() -> EnvironmentType:
|
|
185
|
+
return EnvironmentType.DOCKER
|
|
186
|
+
|
|
187
|
+
@property
|
|
188
|
+
def is_mounted(self) -> bool:
|
|
189
|
+
return True
|
|
190
|
+
|
|
191
|
+
@property
|
|
192
|
+
def supports_gpus(self) -> bool:
|
|
193
|
+
return False
|
|
194
|
+
|
|
195
|
+
@property
|
|
196
|
+
def can_disable_internet(self) -> bool:
|
|
197
|
+
return False
|
|
198
|
+
|
|
199
|
+
def _validate_definition(self):
|
|
200
|
+
pass
|
|
201
|
+
|
|
202
|
+
async def start(self, force_build: bool) -> None:
|
|
203
|
+
pass
|
|
204
|
+
|
|
205
|
+
async def stop(self, delete: bool):
|
|
206
|
+
pass
|
|
207
|
+
|
|
208
|
+
async def prepare_logs_for_host(self) -> None:
|
|
209
|
+
pass
|
|
210
|
+
|
|
211
|
+
async def upload_file(self, source_path, target_path):
|
|
212
|
+
shutil.copy(source_path, target_path)
|
|
213
|
+
|
|
214
|
+
async def upload_dir(self, source_dir, target_dir):
|
|
215
|
+
shutil.copytree(source_dir, target_dir, dirs_exist_ok=True)
|
|
216
|
+
|
|
217
|
+
async def download_file(self, source_path, target_path):
|
|
218
|
+
shutil.copy(source_path, target_path)
|
|
219
|
+
|
|
220
|
+
async def download_dir(self, source_dir, target_dir):
|
|
221
|
+
shutil.copytree(source_dir, target_dir, dirs_exist_ok=True)
|
|
222
|
+
|
|
223
|
+
async def exec(
|
|
224
|
+
self,
|
|
225
|
+
command: str,
|
|
226
|
+
cwd: str | None = None,
|
|
227
|
+
env: dict | None = None,
|
|
228
|
+
timeout_sec: int | None = None,
|
|
229
|
+
user: str | int | None = None,
|
|
230
|
+
) -> ExecResult:
|
|
231
|
+
del user
|
|
232
|
+
try:
|
|
233
|
+
result = subprocess.run(
|
|
234
|
+
command,
|
|
235
|
+
shell=True,
|
|
236
|
+
cwd=cwd or str(self.workdir),
|
|
237
|
+
env={{**os.environ, **(env or {{}})}},
|
|
238
|
+
capture_output=True,
|
|
239
|
+
text=True,
|
|
240
|
+
timeout=timeout_sec,
|
|
241
|
+
)
|
|
242
|
+
except subprocess.TimeoutExpired:
|
|
243
|
+
return ExecResult(stdout="", stderr="Command timed out", return_code=124)
|
|
244
|
+
return ExecResult(
|
|
245
|
+
stdout=result.stdout,
|
|
246
|
+
stderr=result.stderr,
|
|
247
|
+
return_code=result.returncode,
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
async def main() -> None:
|
|
252
|
+
workdir = Path(os.environ.get("AGENT_WORKDIR") or {DEFAULT_AGENT_WORKDIR!r})
|
|
253
|
+
logs_dir = Path({log_dir!r})
|
|
254
|
+
instruction = Path({instruction_path!r}).read_text()
|
|
255
|
+
{system_prompt_block} env = LocalEnvironment(workdir=workdir, logs_dir=logs_dir)
|
|
256
|
+
if "OPENAI_API_KEY" not in os.environ and "PRIME_API_KEY" in os.environ:
|
|
257
|
+
os.environ["OPENAI_API_KEY"] = os.environ["PRIME_API_KEY"]
|
|
258
|
+
api_base = os.environ.get("OPENAI_BASE_URL") or {api_base_url!r}
|
|
259
|
+
agent = Terminus2(
|
|
260
|
+
logs_dir=logs_dir,
|
|
261
|
+
model_name={model_name!r},
|
|
262
|
+
api_base=api_base,
|
|
263
|
+
max_turns={max_turns!r},
|
|
264
|
+
)
|
|
265
|
+
await agent.setup(env)
|
|
266
|
+
await agent.run(instruction, env, AgentContext())
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
asyncio.run(main())
|
|
270
|
+
"""
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
__all__ = [
|
|
274
|
+
"DEFAULT_AGENT_WORKDIR",
|
|
275
|
+
"DEFAULT_API_BASE_URL",
|
|
276
|
+
"DEFAULT_HARBOR_PACKAGE",
|
|
277
|
+
"DEFAULT_INSTRUCTION_PATH",
|
|
278
|
+
"DEFAULT_LOG_PATH",
|
|
279
|
+
"DEFAULT_MODEL_NAME",
|
|
280
|
+
"DEFAULT_PYTHON_VERSION",
|
|
281
|
+
"DEFAULT_SYSTEM_PROMPT_PATH",
|
|
282
|
+
"Terminus2",
|
|
283
|
+
"build_terminus_2_install_script",
|
|
284
|
+
"build_terminus_2_run_script",
|
|
285
|
+
"terminus_2_agent_script",
|
|
286
|
+
]
|
|
@@ -199,6 +199,22 @@ class HarborTaskset(Taskset):
|
|
|
199
199
|
verifier_config = config.get("verifier", {}) or {}
|
|
200
200
|
instruction = instruction_path.read_text().strip()
|
|
201
201
|
task_remote_dir = self.task_dir.rstrip("/") or "/task"
|
|
202
|
+
sandbox = {
|
|
203
|
+
"image": environment.get("docker_image") or self.docker_image,
|
|
204
|
+
"cpu_cores": parse_number(environment.get("cpus"), self.cpu_cores),
|
|
205
|
+
"memory_gb": parse_gb(environment.get("memory"), self.memory_gb),
|
|
206
|
+
"disk_size_gb": parse_gb(environment.get("storage"), self.disk_size_gb),
|
|
207
|
+
"timeout_minutes": self.timeout_minutes,
|
|
208
|
+
"command_timeout": int(
|
|
209
|
+
parse_number(
|
|
210
|
+
agent_config.get("timeout_sec"), self.agent_timeout_seconds
|
|
211
|
+
)
|
|
212
|
+
),
|
|
213
|
+
"workdir": self.workdir,
|
|
214
|
+
"scope": self.scope,
|
|
215
|
+
}
|
|
216
|
+
if "allow_internet" in environment:
|
|
217
|
+
sandbox["network_access"] = bool(environment["allow_internet"])
|
|
202
218
|
return {
|
|
203
219
|
"example_id": index,
|
|
204
220
|
"task_name": task_dir.name,
|
|
@@ -206,21 +222,7 @@ class HarborTaskset(Taskset):
|
|
|
206
222
|
"task_toml": task_toml_path.read_text(),
|
|
207
223
|
"task_dir": str(task_dir),
|
|
208
224
|
"prompt": [{"role": "user", "content": instruction}],
|
|
209
|
-
"sandbox":
|
|
210
|
-
"image": environment.get("docker_image") or self.docker_image,
|
|
211
|
-
"cpu_cores": parse_number(environment.get("cpus"), self.cpu_cores),
|
|
212
|
-
"memory_gb": parse_gb(environment.get("memory"), self.memory_gb),
|
|
213
|
-
"disk_size_gb": parse_gb(environment.get("storage"), self.disk_size_gb),
|
|
214
|
-
"network_access": bool(environment.get("allow_internet", True)),
|
|
215
|
-
"timeout_minutes": self.timeout_minutes,
|
|
216
|
-
"command_timeout": int(
|
|
217
|
-
parse_number(
|
|
218
|
-
agent_config.get("timeout_sec"), self.agent_timeout_seconds
|
|
219
|
-
)
|
|
220
|
-
),
|
|
221
|
-
"workdir": self.workdir,
|
|
222
|
-
"scope": self.scope,
|
|
223
|
-
},
|
|
225
|
+
"sandbox": sandbox,
|
|
224
226
|
"program": {
|
|
225
227
|
"files": {
|
|
226
228
|
f"{task_remote_dir}/instruction.md": {"task": "instruction"},
|
|
@@ -37,7 +37,9 @@ class Task(dict):
|
|
|
37
37
|
if "sandbox" in self and not isinstance(self["sandbox"], Mapping):
|
|
38
38
|
raise TypeError("task.sandbox must be a mapping.")
|
|
39
39
|
if "sandbox" in self:
|
|
40
|
-
super().__setitem__(
|
|
40
|
+
super().__setitem__(
|
|
41
|
+
"sandbox", sandbox_config_mapping(self["sandbox"], fill_defaults=False)
|
|
42
|
+
)
|
|
41
43
|
if "program" in self and not isinstance(self["program"], Mapping):
|
|
42
44
|
raise TypeError("task.program must be a mapping.")
|
|
43
45
|
if "max_turns" in self and (
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{verifiers-0.1.15.dev4 → verifiers-0.1.15.dev5}/tests/test_langchain_deep_agents_wikispeedia.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|