verifiers 0.1.12.dev4__tar.gz → 0.1.12.dev6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/PKG-INFO +1 -1
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/pyproject.toml +9 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_eval_cli.py +32 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/__init__.py +1 -1
- verifiers-0.1.12.dev6/verifiers/cli/commands/eval.py +21 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/composable/README.md +2 -2
- verifiers-0.1.12.dev6/verifiers/envs/experimental/composable/harnesses/__init__.py +35 -0
- verifiers-0.1.12.dev6/verifiers/envs/experimental/composable/harnesses/opencode.py +265 -0
- verifiers-0.1.12.dev6/verifiers/envs/experimental/composable/harnesses/prompt.txt +12 -0
- verifiers-0.1.12.dev6/verifiers/envs/experimental/composable/harnesses/rlm.py +50 -0
- verifiers-0.1.12.dev6/verifiers/envs/experimental/composable/tasksets/__init__.py +39 -0
- verifiers-0.1.12.dev6/verifiers/envs/experimental/composable/tasksets/cp/__init__.py +3 -0
- verifiers-0.1.12.dev6/verifiers/envs/experimental/composable/tasksets/cp/cp_task.py +239 -0
- verifiers-0.1.12.dev6/verifiers/envs/experimental/composable/tasksets/cp/test_utils.py +367 -0
- verifiers-0.1.12.dev6/verifiers/envs/experimental/composable/tasksets/harbor/__init__.py +13 -0
- verifiers-0.1.12.dev6/verifiers/envs/experimental/composable/tasksets/harbor/harbor.py +375 -0
- verifiers-0.1.12.dev6/verifiers/envs/experimental/composable/tasksets/lean/__init__.py +3 -0
- verifiers-0.1.12.dev6/verifiers/envs/experimental/composable/tasksets/lean/lean_task.py +354 -0
- verifiers-0.1.12.dev6/verifiers/envs/experimental/composable/tasksets/math/__init__.py +3 -0
- verifiers-0.1.12.dev6/verifiers/envs/experimental/composable/tasksets/math/math_task.py +161 -0
- verifiers-0.1.12.dev6/verifiers/envs/experimental/composable/tasksets/swe/__init__.py +15 -0
- verifiers-0.1.12.dev6/verifiers/envs/experimental/composable/tasksets/swe/create_fix_patch.sh +36 -0
- verifiers-0.1.12.dev6/verifiers/envs/experimental/composable/tasksets/swe/log_parser.py +42 -0
- verifiers-0.1.12.dev6/verifiers/envs/experimental/composable/tasksets/swe/multi_swe.py +396 -0
- verifiers-0.1.12.dev6/verifiers/envs/experimental/composable/tasksets/swe/openswe.py +230 -0
- verifiers-0.1.12.dev6/verifiers/envs/experimental/composable/tasksets/swe/r2e_gym.py +413 -0
- verifiers-0.1.12.dev6/verifiers/envs/experimental/composable/tasksets/swe/swe_bench.py +634 -0
- verifiers-0.1.12.dev6/verifiers/envs/experimental/composable/tasksets/swe/swe_tasksets.py +67 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/scripts/eval.py +72 -30
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/serve/server/env_router.py +3 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/serve/server/env_server.py +1 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/serve/server/env_worker.py +2 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/utils/eval_utils.py +14 -1
- verifiers-0.1.12.dev4/verifiers/cli/commands/eval.py +0 -7
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/.gitignore +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/LICENSE +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/README.md +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/AGENTS.md +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/README.md +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/__init__.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/conftest.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_browser_env.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_build_script.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_cli_agent_env.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_client_auth_errors.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_client_config.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_client_multimodal_types.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_composable_env.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_decorator_ranks.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_endpoint_registry.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_env_group.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_env_server.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_environment.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_environment_extra.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_envs.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_error_chain.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_eval_display.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_eval_utils.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_gepa_cli.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_gym_env.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_imports.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_install_utils.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_interception_utils.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_logging.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_math_rubric.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_maybe_think_parser.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_message_utils.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_message_utils_multimodal.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_multiturn_env.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_openai_chat_completions_token_client.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_opencode_harbor.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_opencode_rlm_env.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_parser.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_path_utils.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_prime_plugin.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_rlm_env.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_rubric.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_rubric_group.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_sandbox_env.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_sandbox_mixin.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_save_utils.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_setup_script.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_singleturn_env.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_stateful_tool_env.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_think_parser.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_tool_env.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_tool_utils.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_trajectory_processing.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_tui_info_formatting.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/tests/test_xml_parser.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/AGENTS.md +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/cli/__init__.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/cli/commands/__init__.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/cli/commands/build.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/cli/commands/gepa.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/cli/commands/init.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/cli/commands/install.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/cli/commands/setup.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/cli/plugins/__init__.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/cli/plugins/prime.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/cli/tui.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/clients/__init__.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/clients/anthropic_messages_client.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/clients/client.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/clients/openai_chat_completions_client.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/clients/openai_chat_completions_token_client.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/clients/openai_completions_client.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/decorators.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/AGENTS.md +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/__init__.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/env_group.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/environment.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/README.md +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/__init__.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/cli_agent_env.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/composable/__init__.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/composable/composable_env.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/composable/harness.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/composable/task.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/gym_env.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/harbor_env.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/mcp_env.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/opencode_env.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/opencode_qa_env.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/opencode_rlm_env.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/rlm_env.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/sandbox_mixin.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/integrations/README.md +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/integrations/__init__.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/integrations/browser_env/README.md +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/integrations/browser_env/__init__.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/integrations/browser_env/browser_env.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/integrations/browser_env/modes/__init__.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/integrations/browser_env/modes/base.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/integrations/browser_env/modes/cua_mode.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/integrations/browser_env/modes/dom_mode.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/integrations/openenv_env.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/integrations/reasoninggym_env.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/integrations/textarena_env.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/multiturn_env.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/python_env.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/sandbox_env.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/singleturn_env.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/stateful_tool_env.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/tool_env.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/errors.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/gepa/__init__.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/gepa/adapter.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/gepa/config.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/gepa/display.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/gepa/gepa_utils.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/parsers/__init__.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/parsers/maybe_think_parser.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/parsers/parser.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/parsers/think_parser.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/parsers/xml_parser.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/rl/README.md +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/rl/__init__.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/rl/inference/__init__.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/rl/inference/client.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/rl/inference/server.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/rl/trainer/__init__.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/rl/trainer/config.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/rl/trainer/orchestrator.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/rl/trainer/trainer.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/rl/trainer/utils.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/rubrics/__init__.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/rubrics/experimental/hybrid_math_rubric.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/rubrics/judge_rubric.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/rubrics/math_rubric.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/rubrics/rubric.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/rubrics/rubric_group.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/scripts/__init__.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/scripts/build.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/scripts/gepa.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/scripts/init.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/scripts/install.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/scripts/prime_rl.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/scripts/rl.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/scripts/setup.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/scripts/train.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/scripts/tui.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/scripts/vllm.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/serve/__init__.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/serve/client/env_client.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/serve/client/zmq_env_client.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/serve/server/__init__.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/serve/server/zmq_env_server.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/serve/types.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/types.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/utils/__init__.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/utils/async_utils.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/utils/client_utils.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/utils/config_utils.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/utils/data_utils.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/utils/display_utils.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/utils/env_utils.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/utils/error_utils.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/utils/eval_display.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/utils/heartbeat.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/utils/import_utils.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/utils/install_utils.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/utils/interception_utils.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/utils/logging_utils.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/utils/message_utils.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/utils/metric_utils.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/utils/path_utils.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/utils/process_utils.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/utils/response_utils.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/utils/save_utils.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/utils/serve_utils.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/utils/thread_utils.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/utils/threaded_sandbox_client.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/utils/tool_utils.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/utils/tunnel_utils.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/utils/usage_utils.py +0 -0
- {verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/utils/version_utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: verifiers
|
|
3
|
-
Version: 0.1.12.
|
|
3
|
+
Version: 0.1.12.dev6
|
|
4
4
|
Summary: Verifiers: Environments for LLM Reinforcement Learning
|
|
5
5
|
Project-URL: Homepage, https://github.com/primeintellect-ai/verifiers
|
|
6
6
|
Project-URL: Documentation, https://github.com/primeintellect-ai/verifiers
|
|
@@ -197,6 +197,15 @@ redundant-cast = "ignore"
|
|
|
197
197
|
[tool.ty.src]
|
|
198
198
|
exclude = ["environments"]
|
|
199
199
|
|
|
200
|
+
[[tool.ty.overrides]]
|
|
201
|
+
include = ["verifiers/envs/experimental/composable/tasksets/**"]
|
|
202
|
+
|
|
203
|
+
[tool.ty.overrides.rules]
|
|
204
|
+
unresolved-import = "ignore"
|
|
205
|
+
invalid-method-override = "ignore"
|
|
206
|
+
invalid-assignment = "ignore"
|
|
207
|
+
not-iterable = "ignore"
|
|
208
|
+
|
|
200
209
|
[tool.coverage.run]
|
|
201
210
|
source = ["verifiers"]
|
|
202
211
|
omit = [
|
|
@@ -1061,6 +1061,38 @@ def test_ablation_global_defaults_apply():
|
|
|
1061
1061
|
assert all(c["num_examples"] == 100 for c in configs)
|
|
1062
1062
|
|
|
1063
1063
|
|
|
1064
|
+
def test_ablation_endpoint_id_override_removes_global_model():
|
|
1065
|
+
with tempfile.NamedTemporaryFile(suffix=".toml", delete=False, mode="w") as f:
|
|
1066
|
+
f.write(
|
|
1067
|
+
'model = "gpt-4.1-mini"\n\n'
|
|
1068
|
+
'[[ablation]]\nenv_id = "my-env"\nendpoint_id = "proxy"\n\n'
|
|
1069
|
+
"[ablation.sweep]\n"
|
|
1070
|
+
"temperature = [0.0]\n"
|
|
1071
|
+
)
|
|
1072
|
+
f.flush()
|
|
1073
|
+
configs = load_toml_config(Path(f.name))
|
|
1074
|
+
|
|
1075
|
+
assert len(configs) == 1
|
|
1076
|
+
assert configs[0]["endpoint_id"] == "proxy"
|
|
1077
|
+
assert "model" not in configs[0]
|
|
1078
|
+
|
|
1079
|
+
|
|
1080
|
+
def test_ablation_swept_model_override_removes_global_endpoint_id():
|
|
1081
|
+
with tempfile.NamedTemporaryFile(suffix=".toml", delete=False, mode="w") as f:
|
|
1082
|
+
f.write(
|
|
1083
|
+
'endpoint_id = "proxy"\n\n'
|
|
1084
|
+
'[[ablation]]\nenv_id = "my-env"\n\n'
|
|
1085
|
+
"[ablation.sweep]\n"
|
|
1086
|
+
'model = ["gpt-4.1-mini"]\n'
|
|
1087
|
+
)
|
|
1088
|
+
f.flush()
|
|
1089
|
+
configs = load_toml_config(Path(f.name))
|
|
1090
|
+
|
|
1091
|
+
assert len(configs) == 1
|
|
1092
|
+
assert configs[0]["model"] == "gpt-4.1-mini"
|
|
1093
|
+
assert "endpoint_id" not in configs[0]
|
|
1094
|
+
|
|
1095
|
+
|
|
1064
1096
|
def test_ablation_with_eval_blocks():
|
|
1065
1097
|
"""Ablation and eval blocks can coexist."""
|
|
1066
1098
|
with tempfile.NamedTemporaryFile(suffix=".toml", delete=False, mode="w") as f:
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""Evaluation command module for external hosts."""
|
|
2
|
+
|
|
3
|
+
from verifiers.scripts.eval import (
|
|
4
|
+
build_extra_headers,
|
|
5
|
+
build_parser,
|
|
6
|
+
main,
|
|
7
|
+
merge_sampling_args,
|
|
8
|
+
parse_args,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"build_extra_headers",
|
|
13
|
+
"build_parser",
|
|
14
|
+
"merge_sampling_args",
|
|
15
|
+
"parse_args",
|
|
16
|
+
"main",
|
|
17
|
+
]
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
if __name__ == "__main__":
|
|
21
|
+
main()
|
{verifiers-0.1.12.dev4 → verifiers-0.1.12.dev6}/verifiers/envs/experimental/composable/README.md
RENAMED
|
@@ -19,8 +19,8 @@ Separates **what to solve** (the task) from **how to solve it** (the agent) by r
|
|
|
19
19
|
## Usage
|
|
20
20
|
|
|
21
21
|
```python
|
|
22
|
-
from
|
|
23
|
-
from
|
|
22
|
+
from verifiers.envs.experimental.composable.tasksets.swe.r2e_gym import R2EGymTaskSet
|
|
23
|
+
from verifiers.envs.experimental.composable.harnesses.opencode import opencode_harness
|
|
24
24
|
from verifiers.envs.experimental.composable import ComposableEnv
|
|
25
25
|
|
|
26
26
|
# Create a taskset
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
from verifiers.envs.experimental.composable.harnesses.rlm import (
|
|
2
|
+
DEFAULT_RLM_MAX_TURNS,
|
|
3
|
+
DEFAULT_RLM_REPO_URL,
|
|
4
|
+
DEFAULT_RLM_TOOLS,
|
|
5
|
+
build_install_script as build_rlm_install_script,
|
|
6
|
+
build_run_command as build_rlm_run_command,
|
|
7
|
+
rlm_harness,
|
|
8
|
+
)
|
|
9
|
+
from verifiers.envs.experimental.composable.harnesses.opencode import (
|
|
10
|
+
DEFAULT_DISABLED_TOOLS,
|
|
11
|
+
DEFAULT_RELEASE_SHA256,
|
|
12
|
+
DEFAULT_SYSTEM_PROMPT,
|
|
13
|
+
OPENCODE_INSTALL_SCRIPT,
|
|
14
|
+
build_install_script as build_opencode_install_script,
|
|
15
|
+
build_opencode_config,
|
|
16
|
+
build_opencode_run_command,
|
|
17
|
+
opencode_harness,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
__all__ = [
|
|
21
|
+
"rlm_harness",
|
|
22
|
+
"build_rlm_install_script",
|
|
23
|
+
"build_rlm_run_command",
|
|
24
|
+
"DEFAULT_RLM_REPO_URL",
|
|
25
|
+
"DEFAULT_RLM_TOOLS",
|
|
26
|
+
"DEFAULT_RLM_MAX_TURNS",
|
|
27
|
+
"opencode_harness",
|
|
28
|
+
"build_opencode_install_script",
|
|
29
|
+
"build_opencode_config",
|
|
30
|
+
"build_opencode_run_command",
|
|
31
|
+
"OPENCODE_INSTALL_SCRIPT",
|
|
32
|
+
"DEFAULT_DISABLED_TOOLS",
|
|
33
|
+
"DEFAULT_RELEASE_SHA256",
|
|
34
|
+
"DEFAULT_SYSTEM_PROMPT",
|
|
35
|
+
]
|
|
@@ -0,0 +1,265 @@
|
|
|
1
|
+
"""OpenCode harness configuration.
|
|
2
|
+
|
|
3
|
+
Provides install script, config generation, and run command templates
|
|
4
|
+
that are shared across all OpenCode-based environments (SWE, Lean, Math, etc.).
|
|
5
|
+
|
|
6
|
+
Usage::
|
|
7
|
+
|
|
8
|
+
from verifiers.envs.experimental.composable.harnesses.opencode import opencode_harness
|
|
9
|
+
harness = opencode_harness(system_prompt="You are a coding agent...")
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import json
|
|
15
|
+
import shlex
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
|
|
18
|
+
# ── Defaults ─────────────────────────────────────────────────────────────
|
|
19
|
+
|
|
20
|
+
DEFAULT_RELEASE_REPO = "PrimeIntellect-ai/opencode"
|
|
21
|
+
DEFAULT_RELEASE_VERSION = "1.1.63-rl1"
|
|
22
|
+
DEFAULT_RELEASE_SHA256 = (
|
|
23
|
+
"17104d601b8bf6fd03dd46a6de055b422414b9ada524fe085b09683f455ccac1"
|
|
24
|
+
)
|
|
25
|
+
DEFAULT_SYSTEM_PROMPT = (Path(__file__).parent / "prompt.txt").read_text()
|
|
26
|
+
|
|
27
|
+
DEFAULT_DISABLED_TOOLS = [
|
|
28
|
+
"apply_patch",
|
|
29
|
+
"write",
|
|
30
|
+
"multiedit",
|
|
31
|
+
"glob",
|
|
32
|
+
"todowrite",
|
|
33
|
+
"todoread",
|
|
34
|
+
"websearch",
|
|
35
|
+
"task",
|
|
36
|
+
"batch",
|
|
37
|
+
"list",
|
|
38
|
+
"read",
|
|
39
|
+
"question",
|
|
40
|
+
"webfetch",
|
|
41
|
+
"grep",
|
|
42
|
+
"plan_exit",
|
|
43
|
+
"plan_enter",
|
|
44
|
+
"lsp",
|
|
45
|
+
"codesearch",
|
|
46
|
+
"skill",
|
|
47
|
+
]
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
# ── Install script ───────────────────────────────────────────────────────
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def build_install_script(
|
|
54
|
+
release_repo: str = DEFAULT_RELEASE_REPO,
|
|
55
|
+
release_version: str = DEFAULT_RELEASE_VERSION,
|
|
56
|
+
release_sha256: str = DEFAULT_RELEASE_SHA256,
|
|
57
|
+
install_ripgrep: bool = True,
|
|
58
|
+
) -> str:
|
|
59
|
+
"""Build the shell script that installs OpenCode in a sandbox."""
|
|
60
|
+
rg_install = (
|
|
61
|
+
"apt-get install -y -qq ripgrep > /dev/null 2>&1 || true"
|
|
62
|
+
if install_ripgrep
|
|
63
|
+
else ""
|
|
64
|
+
)
|
|
65
|
+
sha256_check = f'echo "{release_sha256} /tmp/opencode.tar.gz" | sha256sum -c -'
|
|
66
|
+
return f"""\
|
|
67
|
+
set -e
|
|
68
|
+
apt-get update -qq && apt-get install -y -qq curl tar > /dev/null 2>&1
|
|
69
|
+
{rg_install}
|
|
70
|
+
|
|
71
|
+
OPENCODE_RELEASE_REPO="{release_repo}"
|
|
72
|
+
OPENCODE_RELEASE_VERSION="{release_version}"
|
|
73
|
+
|
|
74
|
+
case "$(uname -m)" in
|
|
75
|
+
x86_64) OPENCODE_ARCH=x64 ;;
|
|
76
|
+
aarch64|arm64) OPENCODE_ARCH=arm64 ;;
|
|
77
|
+
*) echo "Unsupported architecture: $(uname -m)"; exit 1 ;;
|
|
78
|
+
esac
|
|
79
|
+
|
|
80
|
+
OPENCODE_ASSET="opencode-linux-$OPENCODE_ARCH.tar.gz"
|
|
81
|
+
OPENCODE_RELEASE_TAG="${{OPENCODE_RELEASE_VERSION#v}}"
|
|
82
|
+
OPENCODE_RELEASE_URL="https://github.com/$OPENCODE_RELEASE_REPO/releases/download/v$OPENCODE_RELEASE_TAG/$OPENCODE_ASSET"
|
|
83
|
+
|
|
84
|
+
mkdir -p "$HOME/.opencode/bin"
|
|
85
|
+
curl -fsSL "$OPENCODE_RELEASE_URL" -o /tmp/opencode.tar.gz
|
|
86
|
+
{sha256_check}
|
|
87
|
+
tar -xzf /tmp/opencode.tar.gz -C /tmp
|
|
88
|
+
install -m 755 /tmp/opencode "$HOME/.opencode/bin/opencode"
|
|
89
|
+
echo "OpenCode installed successfully"
|
|
90
|
+
"""
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
# ── Config generation ────────────────────────────────────────────────────
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def build_opencode_config(
|
|
97
|
+
disabled_tools: list[str] | None = None,
|
|
98
|
+
system_prompt_path: str | None = None,
|
|
99
|
+
disable_compaction: bool = True,
|
|
100
|
+
provider_key: str = "${OPENAI_MODEL%%/*}",
|
|
101
|
+
provider_display_name: str | None = None,
|
|
102
|
+
model_id: str = "$OPENAI_MODEL",
|
|
103
|
+
model_key: str = "${OPENAI_MODEL##*/}",
|
|
104
|
+
model_display_name: str | None = None,
|
|
105
|
+
provider_timeout_ms: int = 3_600_000,
|
|
106
|
+
) -> str:
|
|
107
|
+
"""Generate opencode.json config content."""
|
|
108
|
+
config: dict = {
|
|
109
|
+
"${SCHEMA_DOLLAR}schema": "https://opencode.ai/config.json",
|
|
110
|
+
"provider": {
|
|
111
|
+
provider_key: {
|
|
112
|
+
"npm": "@ai-sdk/openai-compatible",
|
|
113
|
+
"name": provider_display_name or provider_key,
|
|
114
|
+
"options": {
|
|
115
|
+
"baseURL": "$OPENAI_BASE_URL",
|
|
116
|
+
"apiKey": "intercepted",
|
|
117
|
+
"timeout": provider_timeout_ms,
|
|
118
|
+
},
|
|
119
|
+
"models": {
|
|
120
|
+
model_key: {
|
|
121
|
+
"name": model_display_name or model_key,
|
|
122
|
+
"modalities": {"input": ["text", "image"], "output": ["text"]},
|
|
123
|
+
"interleaved": {"field": "reasoning_content"},
|
|
124
|
+
}
|
|
125
|
+
},
|
|
126
|
+
}
|
|
127
|
+
},
|
|
128
|
+
"model": model_id,
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
if disable_compaction:
|
|
132
|
+
config["compaction"] = {"auto": False, "prune": False}
|
|
133
|
+
|
|
134
|
+
agent_build: dict = {}
|
|
135
|
+
if system_prompt_path:
|
|
136
|
+
agent_build["prompt"] = "{file:" + system_prompt_path + "}"
|
|
137
|
+
if disabled_tools:
|
|
138
|
+
agent_build["tools"] = {tool: False for tool in disabled_tools}
|
|
139
|
+
if agent_build:
|
|
140
|
+
config["agent"] = {"build": agent_build}
|
|
141
|
+
|
|
142
|
+
return json.dumps(config, indent=2)
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
# ── Run command ──────────────────────────────────────────────────────────
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def build_opencode_run_command(
|
|
149
|
+
agent_workdir: str = "/app",
|
|
150
|
+
prompt_path: str = "/opencode/prompt.txt",
|
|
151
|
+
log_path: str = "/opencode/logs.txt",
|
|
152
|
+
disabled_tools: list[str] | None = None,
|
|
153
|
+
system_prompt_path: str | None = None,
|
|
154
|
+
disable_compaction: bool = True,
|
|
155
|
+
allow_git: bool = False,
|
|
156
|
+
provider_key: str = "${OPENAI_MODEL%%/*}",
|
|
157
|
+
provider_display_name: str | None = None,
|
|
158
|
+
model_id: str = "$OPENAI_MODEL",
|
|
159
|
+
model_key: str = "${OPENAI_MODEL##*/}",
|
|
160
|
+
model_display_name: str | None = None,
|
|
161
|
+
provider_timeout_ms: int = 3_600_000,
|
|
162
|
+
) -> str:
|
|
163
|
+
"""Build the shell command that configures and runs OpenCode."""
|
|
164
|
+
config_json = build_opencode_config(
|
|
165
|
+
disabled_tools=disabled_tools,
|
|
166
|
+
system_prompt_path=system_prompt_path,
|
|
167
|
+
disable_compaction=disable_compaction,
|
|
168
|
+
provider_key=provider_key,
|
|
169
|
+
provider_display_name=provider_display_name,
|
|
170
|
+
model_id=model_id,
|
|
171
|
+
model_key=model_key,
|
|
172
|
+
model_display_name=model_display_name,
|
|
173
|
+
provider_timeout_ms=provider_timeout_ms,
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
script = f"""\
|
|
177
|
+
set -eo pipefail
|
|
178
|
+
|
|
179
|
+
export PATH="$HOME/.opencode/bin:$PATH"
|
|
180
|
+
export OPENCODE_DISABLE_FILETIME_CHECK=true
|
|
181
|
+
export ALLOW_GIT={"1" if allow_git else "0"}
|
|
182
|
+
|
|
183
|
+
mkdir -p ~/.config/opencode /logs/agent {agent_workdir}
|
|
184
|
+
|
|
185
|
+
SCHEMA_DOLLAR='$'
|
|
186
|
+
|
|
187
|
+
cat > ~/.config/opencode/opencode.json << EOFCONFIG
|
|
188
|
+
{config_json}
|
|
189
|
+
EOFCONFIG
|
|
190
|
+
|
|
191
|
+
cd {agent_workdir}
|
|
192
|
+
cat {prompt_path} | opencode run 2>&1 | tee {log_path}
|
|
193
|
+
"""
|
|
194
|
+
return f"bash -lc {shlex.quote(script)}"
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
# ── Convenience: pre-built install script ────────────────────────────────
|
|
198
|
+
|
|
199
|
+
OPENCODE_INSTALL_SCRIPT = build_install_script()
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
# ── Harness factory ──────────────────────────────────────────────────────
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def opencode_harness(
|
|
206
|
+
system_prompt: str | None = DEFAULT_SYSTEM_PROMPT,
|
|
207
|
+
task_system_prompt: str | None = None,
|
|
208
|
+
disabled_tools: list[str] | None = None,
|
|
209
|
+
agent_workdir: str = "/app",
|
|
210
|
+
allow_git: bool = False,
|
|
211
|
+
disable_compaction: bool = True,
|
|
212
|
+
release_repo: str = DEFAULT_RELEASE_REPO,
|
|
213
|
+
release_version: str = DEFAULT_RELEASE_VERSION,
|
|
214
|
+
release_sha256: str = DEFAULT_RELEASE_SHA256,
|
|
215
|
+
instruction_path: str = "/opencode/prompt.txt",
|
|
216
|
+
system_prompt_path: str = "/opencode/system.txt",
|
|
217
|
+
log_path: str = "/opencode/logs.txt",
|
|
218
|
+
provider_key: str = "${OPENAI_MODEL%%/*}",
|
|
219
|
+
provider_display_name: str | None = None,
|
|
220
|
+
model_id: str = "$OPENAI_MODEL",
|
|
221
|
+
model_key: str = "${OPENAI_MODEL##*/}",
|
|
222
|
+
model_display_name: str | None = None,
|
|
223
|
+
provider_timeout_ms: int = 3_600_000,
|
|
224
|
+
):
|
|
225
|
+
"""Create a Harness configured for OpenCode.
|
|
226
|
+
|
|
227
|
+
Usage::
|
|
228
|
+
|
|
229
|
+
from verifiers.envs.experimental.composable.harnesses.opencode import opencode_harness
|
|
230
|
+
harness = opencode_harness(system_prompt="You are a coding agent...")
|
|
231
|
+
"""
|
|
232
|
+
from verifiers.envs.experimental.composable import Harness
|
|
233
|
+
|
|
234
|
+
if task_system_prompt:
|
|
235
|
+
if system_prompt:
|
|
236
|
+
system_prompt = system_prompt + "\n" + task_system_prompt
|
|
237
|
+
else:
|
|
238
|
+
system_prompt = task_system_prompt
|
|
239
|
+
|
|
240
|
+
return Harness(
|
|
241
|
+
install_script=build_install_script(
|
|
242
|
+
release_repo=release_repo,
|
|
243
|
+
release_version=release_version,
|
|
244
|
+
release_sha256=release_sha256,
|
|
245
|
+
),
|
|
246
|
+
run_command=build_opencode_run_command(
|
|
247
|
+
agent_workdir=agent_workdir,
|
|
248
|
+
prompt_path=instruction_path,
|
|
249
|
+
log_path=log_path,
|
|
250
|
+
disabled_tools=disabled_tools,
|
|
251
|
+
system_prompt_path=system_prompt_path if system_prompt else None,
|
|
252
|
+
disable_compaction=disable_compaction,
|
|
253
|
+
allow_git=allow_git,
|
|
254
|
+
provider_key=provider_key,
|
|
255
|
+
provider_display_name=provider_display_name,
|
|
256
|
+
model_id=model_id,
|
|
257
|
+
model_key=model_key,
|
|
258
|
+
model_display_name=model_display_name,
|
|
259
|
+
provider_timeout_ms=provider_timeout_ms,
|
|
260
|
+
),
|
|
261
|
+
system_prompt=system_prompt,
|
|
262
|
+
instruction_path=instruction_path,
|
|
263
|
+
system_prompt_path=system_prompt_path,
|
|
264
|
+
log_path=log_path,
|
|
265
|
+
)
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
You are OpenCode, the best coding agent on the planet.
|
|
2
|
+
|
|
3
|
+
You are an interactive CLI tool that helps users with tasks. Use the instructions below and the tools available to you to assist the user.
|
|
4
|
+
|
|
5
|
+
# Tone and style
|
|
6
|
+
- Only use emojis if the user explicitly requests it. Avoid using emojis in all communication unless asked.
|
|
7
|
+
- Your output will be displayed on a command line interface. Your responses should be short and concise. You can use Github-flavored markdown for formatting, and will be rendered in a monospace font using the CommonMark specification.
|
|
8
|
+
- Output text to communicate with the user; all text you output outside of tool use is displayed to the user. Only use tools to complete tasks. Never use tools like bash or code comments as means to communicate with the user during the session.
|
|
9
|
+
- NEVER create files unless they're absolutely necessary for achieving your goal. ALWAYS prefer editing an existing file to creating a new one. This includes markdown files.
|
|
10
|
+
|
|
11
|
+
# Professional objectivity
|
|
12
|
+
Prioritize technical accuracy and truthfulness over validating the user's beliefs. Focus on facts and problem-solving, providing direct, objective technical info without any unnecessary superlatives, praise, or emotional validation. It is best for the user if OpenCode honestly applies the same rigorous standards to all ideas and disagrees when necessary, even if it may not be what the user wants to hear. Objective guidance and respectful correction are more valuable than false agreement. Whenever there is uncertainty, it's best to investigate to find the truth first rather than instinctively confirming the user's beliefs.
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"""RLM agent harness: install script, run command, and harness factory."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import shlex
|
|
6
|
+
|
|
7
|
+
from verifiers.envs.experimental.composable import Harness
|
|
8
|
+
|
|
9
|
+
DEFAULT_RLM_REPO_URL = "github.com/PrimeIntellect-ai/rlm.git"
|
|
10
|
+
DEFAULT_RLM_TOOLS = "bash,edit"
|
|
11
|
+
DEFAULT_RLM_MAX_TURNS = 100
|
|
12
|
+
DEFAULT_APPEND_TO_SYSTEM_PROMPT_PATH = "/task/append_to_system_prompt.txt"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def build_install_script(rlm_repo_url: str = DEFAULT_RLM_REPO_URL) -> str:
|
|
16
|
+
raw_base = rlm_repo_url.removesuffix(".git").replace(
|
|
17
|
+
"github.com", "raw.githubusercontent.com"
|
|
18
|
+
)
|
|
19
|
+
url = f"https://${{GH_TOKEN}}@{raw_base}/main/install.sh"
|
|
20
|
+
return f"(curl -fsSL {url} || wget -qO- {url}) > /tmp/rlm-install.sh && bash /tmp/rlm-install.sh"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def build_run_command(
|
|
24
|
+
instruction_path: str = "/task/instruction.md",
|
|
25
|
+
workdir: str = "/testbed",
|
|
26
|
+
) -> str:
|
|
27
|
+
script = f"""\
|
|
28
|
+
set -eo pipefail
|
|
29
|
+
export RLM_MODEL=$OPENAI_MODEL
|
|
30
|
+
export OPENAI_API_KEY=intercepted
|
|
31
|
+
export RLM_APPEND_TO_SYSTEM_PROMPT="$(cat {shlex.quote(DEFAULT_APPEND_TO_SYSTEM_PROMPT_PATH)} 2>/dev/null || true)"
|
|
32
|
+
cd {workdir}
|
|
33
|
+
rlm "$(cat {instruction_path})"
|
|
34
|
+
"""
|
|
35
|
+
return f"bash -lc {shlex.quote(script)}"
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def rlm_harness(
|
|
39
|
+
workdir: str = "/testbed",
|
|
40
|
+
instruction_path: str = "/task/instruction.md",
|
|
41
|
+
rlm_repo_url: str = DEFAULT_RLM_REPO_URL,
|
|
42
|
+
append_to_system_prompt: str | None = None,
|
|
43
|
+
) -> Harness:
|
|
44
|
+
return Harness(
|
|
45
|
+
install_script=build_install_script(rlm_repo_url),
|
|
46
|
+
run_command=build_run_command(instruction_path, workdir),
|
|
47
|
+
system_prompt=append_to_system_prompt,
|
|
48
|
+
system_prompt_path=DEFAULT_APPEND_TO_SYSTEM_PROMPT_PATH,
|
|
49
|
+
instruction_path=instruction_path,
|
|
50
|
+
)
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
from verifiers.envs.experimental.composable.tasksets.swe.swe_tasksets import (
|
|
2
|
+
make_multiswe_taskset,
|
|
3
|
+
make_openswe_taskset,
|
|
4
|
+
make_r2e_taskset,
|
|
5
|
+
make_swe_taskset,
|
|
6
|
+
make_swebench_taskset,
|
|
7
|
+
)
|
|
8
|
+
from verifiers.envs.experimental.composable.tasksets.lean.lean_task import (
|
|
9
|
+
LEAN_SYSTEM_PROMPT,
|
|
10
|
+
LeanTaskSet,
|
|
11
|
+
)
|
|
12
|
+
from verifiers.envs.experimental.composable.tasksets.math.math_task import MathTaskSet
|
|
13
|
+
from verifiers.envs.experimental.composable.tasksets.cp.cp_task import (
|
|
14
|
+
CPRubric,
|
|
15
|
+
CPTaskSet,
|
|
16
|
+
)
|
|
17
|
+
from verifiers.envs.experimental.composable.tasksets.harbor.harbor import (
|
|
18
|
+
HarborDatasetRubric,
|
|
19
|
+
HarborDatasetTaskSet,
|
|
20
|
+
HarborRubric,
|
|
21
|
+
HarborTaskSet,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
__all__ = [
|
|
25
|
+
"make_swe_taskset",
|
|
26
|
+
"make_r2e_taskset",
|
|
27
|
+
"make_swebench_taskset",
|
|
28
|
+
"make_multiswe_taskset",
|
|
29
|
+
"make_openswe_taskset",
|
|
30
|
+
"LeanTaskSet",
|
|
31
|
+
"LEAN_SYSTEM_PROMPT",
|
|
32
|
+
"MathTaskSet",
|
|
33
|
+
"CPTaskSet",
|
|
34
|
+
"CPRubric",
|
|
35
|
+
"HarborTaskSet",
|
|
36
|
+
"HarborDatasetTaskSet",
|
|
37
|
+
"HarborRubric",
|
|
38
|
+
"HarborDatasetRubric",
|
|
39
|
+
]
|