verifiers 0.1.12.dev1__tar.gz → 0.1.12.dev2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/PKG-INFO +4 -3
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/pyproject.toml +3 -2
- verifiers-0.1.12.dev2/tests/test_composable_env.py +200 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/tests/test_rlm_env.py +432 -69
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/tests/test_sandbox_mixin.py +7 -48
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/tests/test_tui_info_formatting.py +58 -16
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/__init__.py +1 -1
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/clients/openai_chat_completions_client.py +5 -1
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/clients/openai_chat_completions_token_client.py +109 -92
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/clients/openai_completions_client.py +7 -1
- verifiers-0.1.12.dev2/verifiers/envs/experimental/__init__.py +28 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/envs/experimental/cli_agent_env.py +31 -5
- verifiers-0.1.12.dev2/verifiers/envs/experimental/composable/README.md +151 -0
- verifiers-0.1.12.dev2/verifiers/envs/experimental/composable/__init__.py +17 -0
- verifiers-0.1.12.dev2/verifiers/envs/experimental/composable/composable_env.py +202 -0
- verifiers-0.1.12.dev2/verifiers/envs/experimental/composable/harness.py +58 -0
- verifiers-0.1.12.dev2/verifiers/envs/experimental/composable/task.py +362 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/envs/experimental/opencode_env.py +0 -2
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/envs/experimental/rlm_env.py +661 -413
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/envs/experimental/sandbox_mixin.py +11 -36
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/scripts/tui.py +887 -235
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/serve/server/env_server.py +2 -1
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/serve/server/env_worker.py +2 -1
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/utils/process_utils.py +15 -0
- verifiers-0.1.12.dev1/verifiers/envs/experimental/__init__.py +0 -3
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/.gitignore +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/LICENSE +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/README.md +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/tests/AGENTS.md +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/tests/README.md +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/tests/__init__.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/tests/conftest.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/tests/test_browser_env.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/tests/test_build_script.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/tests/test_cli_agent_env.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/tests/test_client_auth_errors.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/tests/test_client_config.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/tests/test_client_multimodal_types.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/tests/test_decorator_ranks.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/tests/test_endpoint_registry.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/tests/test_env_group.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/tests/test_env_server.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/tests/test_environment.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/tests/test_environment_extra.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/tests/test_envs.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/tests/test_error_chain.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/tests/test_eval_cli.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/tests/test_eval_display.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/tests/test_eval_utils.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/tests/test_gepa_cli.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/tests/test_gym_env.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/tests/test_imports.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/tests/test_install_utils.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/tests/test_interception_utils.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/tests/test_logging.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/tests/test_math_rubric.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/tests/test_maybe_think_parser.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/tests/test_message_utils.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/tests/test_message_utils_multimodal.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/tests/test_multiturn_env.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/tests/test_openai_chat_completions_token_client.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/tests/test_opencode_harbor.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/tests/test_opencode_rlm_env.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/tests/test_parser.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/tests/test_path_utils.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/tests/test_prime_plugin.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/tests/test_rubric.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/tests/test_rubric_group.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/tests/test_sandbox_env.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/tests/test_save_utils.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/tests/test_setup_script.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/tests/test_singleturn_env.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/tests/test_stateful_tool_env.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/tests/test_think_parser.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/tests/test_tool_env.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/tests/test_tool_utils.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/tests/test_trajectory_processing.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/tests/test_xml_parser.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/AGENTS.md +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/cli/__init__.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/cli/commands/__init__.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/cli/commands/build.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/cli/commands/eval.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/cli/commands/gepa.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/cli/commands/init.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/cli/commands/install.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/cli/commands/setup.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/cli/plugins/__init__.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/cli/plugins/prime.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/cli/tui.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/clients/__init__.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/clients/anthropic_messages_client.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/clients/client.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/decorators.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/envs/AGENTS.md +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/envs/__init__.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/envs/env_group.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/envs/environment.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/envs/experimental/README.md +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/envs/experimental/gym_env.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/envs/experimental/harbor_env.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/envs/experimental/mcp_env.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/envs/experimental/opencode_qa_env.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/envs/experimental/opencode_rlm_env.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/envs/integrations/README.md +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/envs/integrations/__init__.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/envs/integrations/browser_env/README.md +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/envs/integrations/browser_env/__init__.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/envs/integrations/browser_env/browser_env.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/envs/integrations/browser_env/modes/__init__.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/envs/integrations/browser_env/modes/base.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/envs/integrations/browser_env/modes/cua_mode.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/envs/integrations/browser_env/modes/dom_mode.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/envs/integrations/openenv_env.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/envs/integrations/reasoninggym_env.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/envs/integrations/textarena_env.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/envs/multiturn_env.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/envs/python_env.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/envs/sandbox_env.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/envs/singleturn_env.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/envs/stateful_tool_env.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/envs/tool_env.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/errors.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/gepa/__init__.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/gepa/adapter.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/gepa/config.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/gepa/display.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/gepa/gepa_utils.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/parsers/__init__.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/parsers/maybe_think_parser.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/parsers/parser.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/parsers/think_parser.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/parsers/xml_parser.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/rl/README.md +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/rl/__init__.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/rl/inference/__init__.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/rl/inference/client.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/rl/inference/server.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/rl/trainer/__init__.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/rl/trainer/config.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/rl/trainer/orchestrator.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/rl/trainer/trainer.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/rl/trainer/utils.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/rubrics/__init__.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/rubrics/experimental/hybrid_math_rubric.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/rubrics/judge_rubric.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/rubrics/math_rubric.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/rubrics/rubric.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/rubrics/rubric_group.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/scripts/__init__.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/scripts/build.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/scripts/eval.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/scripts/gepa.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/scripts/init.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/scripts/install.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/scripts/prime_rl.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/scripts/rl.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/scripts/setup.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/scripts/train.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/scripts/vllm.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/serve/__init__.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/serve/client/env_client.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/serve/client/zmq_env_client.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/serve/server/__init__.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/serve/server/env_router.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/serve/server/zmq_env_server.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/serve/types.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/types.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/utils/__init__.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/utils/async_utils.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/utils/client_utils.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/utils/config_utils.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/utils/data_utils.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/utils/display_utils.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/utils/env_utils.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/utils/error_utils.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/utils/eval_display.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/utils/eval_utils.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/utils/heartbeat.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/utils/import_utils.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/utils/install_utils.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/utils/interception_utils.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/utils/logging_utils.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/utils/message_utils.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/utils/metric_utils.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/utils/path_utils.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/utils/response_utils.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/utils/save_utils.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/utils/serve_utils.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/utils/thread_utils.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/utils/threaded_sandbox_client.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/utils/tool_utils.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/utils/tunnel_utils.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/utils/usage_utils.py +0 -0
- {verifiers-0.1.12.dev1 → verifiers-0.1.12.dev2}/verifiers/utils/version_utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: verifiers
|
|
3
|
-
Version: 0.1.12.
|
|
3
|
+
Version: 0.1.12.dev2
|
|
4
4
|
Summary: Verifiers: Environments for LLM Reinforcement Learning
|
|
5
5
|
Project-URL: Homepage, https://github.com/primeintellect-ai/verifiers
|
|
6
6
|
Project-URL: Documentation, https://github.com/primeintellect-ai/verifiers
|
|
@@ -34,12 +34,13 @@ Requires-Dist: nest-asyncio>=1.6.0
|
|
|
34
34
|
Requires-Dist: numpy
|
|
35
35
|
Requires-Dist: openai-agents>=0.0.7
|
|
36
36
|
Requires-Dist: openai>=1.108.1
|
|
37
|
-
Requires-Dist: prime-sandboxes>=0.2.
|
|
38
|
-
Requires-Dist: prime-tunnel>=0.1.
|
|
37
|
+
Requires-Dist: prime-sandboxes>=0.2.19
|
|
38
|
+
Requires-Dist: prime-tunnel>=0.1.5
|
|
39
39
|
Requires-Dist: pydantic>=2.11.9
|
|
40
40
|
Requires-Dist: pyzmq>=27.1.0
|
|
41
41
|
Requires-Dist: requests
|
|
42
42
|
Requires-Dist: rich
|
|
43
|
+
Requires-Dist: setproctitle>=1.3.0
|
|
43
44
|
Requires-Dist: tenacity>=8.5.0
|
|
44
45
|
Requires-Dist: textual
|
|
45
46
|
Requires-Dist: tomli; python_version < '3.11'
|
|
@@ -37,8 +37,8 @@ dependencies = [
|
|
|
37
37
|
"nest-asyncio>=1.6.0", # for jupyter notebooks
|
|
38
38
|
"openai>=1.108.1",
|
|
39
39
|
"openai-agents>=0.0.7",
|
|
40
|
-
"prime-tunnel>=0.1.
|
|
41
|
-
"prime-sandboxes>=0.2.
|
|
40
|
+
"prime-tunnel>=0.1.5",
|
|
41
|
+
"prime-sandboxes>=0.2.19",
|
|
42
42
|
"pydantic>=2.11.9",
|
|
43
43
|
"requests",
|
|
44
44
|
"rich",
|
|
@@ -51,6 +51,7 @@ dependencies = [
|
|
|
51
51
|
"pyzmq>=27.1.0",
|
|
52
52
|
"msgpack>=1.1.2",
|
|
53
53
|
"aiolimiter>=1.2.1",
|
|
54
|
+
"setproctitle>=1.3.0",
|
|
54
55
|
]
|
|
55
56
|
|
|
56
57
|
[dependency-groups]
|
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
"""Tests for the composable architecture: Task, TaskSet, SandboxTaskSet, SandboxSpec."""
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
import verifiers as vf
|
|
6
|
+
from verifiers.envs.experimental.composable import (
|
|
7
|
+
ComposableEnv,
|
|
8
|
+
Harness,
|
|
9
|
+
SandboxSpec,
|
|
10
|
+
SandboxTaskSet,
|
|
11
|
+
Task,
|
|
12
|
+
TaskSet,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
# ── Mock Rubrics ──────────────────────────────────────────────────────
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class MockSandboxRubric(vf.Rubric):
|
|
20
|
+
def __init__(self, **kwargs):
|
|
21
|
+
super().__init__(**kwargs)
|
|
22
|
+
self.add_reward_func(self.solved)
|
|
23
|
+
|
|
24
|
+
async def solved(self, state, **kwargs) -> float:
|
|
25
|
+
return 1.0 if state.get("test_output") == "PASS" else 0.0
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class MockMathRubric(vf.Rubric):
|
|
29
|
+
def __init__(self, **kwargs):
|
|
30
|
+
super().__init__(**kwargs)
|
|
31
|
+
self.add_reward_func(self.correct)
|
|
32
|
+
|
|
33
|
+
async def correct(self, state, **kwargs) -> float:
|
|
34
|
+
return 1.0 if state.get("info", {}).get("id") == 0 else 0.0
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
# ── Mock TaskSets ───────────────────────────────────────────────────────
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class MockSandboxTaskSet(SandboxTaskSet):
|
|
41
|
+
"""SandboxTaskSet for testing."""
|
|
42
|
+
|
|
43
|
+
def get_instruction(self, info):
|
|
44
|
+
return f"Fix bug #{info.get('id', 0)}"
|
|
45
|
+
|
|
46
|
+
def get_sandbox_spec(self, info):
|
|
47
|
+
return SandboxSpec(image="python:3.11-slim", cpu_cores=2, memory_gb=2)
|
|
48
|
+
|
|
49
|
+
def get_rubric(self):
|
|
50
|
+
return MockSandboxRubric()
|
|
51
|
+
|
|
52
|
+
def get_workdir(self, info):
|
|
53
|
+
return "/testbed"
|
|
54
|
+
|
|
55
|
+
def get_env_vars(self):
|
|
56
|
+
return {"FOO": "bar"}
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class MockTaskSet(TaskSet):
|
|
60
|
+
"""Plain TaskSet (no sandbox) for testing."""
|
|
61
|
+
|
|
62
|
+
def get_instruction(self, info):
|
|
63
|
+
return info.get("question", "")
|
|
64
|
+
|
|
65
|
+
def get_rubric(self):
|
|
66
|
+
return MockMathRubric()
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _make_dataset(n=3):
|
|
70
|
+
from datasets import Dataset
|
|
71
|
+
|
|
72
|
+
return Dataset.from_dict(
|
|
73
|
+
{
|
|
74
|
+
"info": [{"id": i, "question": f"q{i}"} for i in range(n)],
|
|
75
|
+
"answer": ["" for _ in range(n)],
|
|
76
|
+
}
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
# ── SandboxSpec ─────────────────────────────────────────────────────────
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def test_sandbox_spec_defaults():
|
|
84
|
+
spec = SandboxSpec()
|
|
85
|
+
assert spec.image == "python:3.11-slim"
|
|
86
|
+
assert spec.cpu_cores == 4
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def test_sandbox_spec_custom():
|
|
90
|
+
spec = SandboxSpec(image="lean-tactic:v4.27", gpu_count=1)
|
|
91
|
+
assert spec.image == "lean-tactic:v4.27"
|
|
92
|
+
assert spec.gpu_count == 1
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
# ── Task from SandboxTaskSet ───────────────────────────────────────────
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def test_task_sandbox_spec():
|
|
99
|
+
ts = MockSandboxTaskSet(dataset=_make_dataset(), name="test")
|
|
100
|
+
task = ts[0]
|
|
101
|
+
assert isinstance(task, Task)
|
|
102
|
+
assert task.sandbox_spec is not None
|
|
103
|
+
assert task.sandbox_spec.image == "python:3.11-slim"
|
|
104
|
+
assert task.sandbox_spec.cpu_cores == 2
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def test_task_image():
|
|
108
|
+
ts = MockSandboxTaskSet(dataset=_make_dataset(), name="test")
|
|
109
|
+
task = ts[0]
|
|
110
|
+
assert task.image == "python:3.11-slim"
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def test_task_workdir():
|
|
114
|
+
ts = MockSandboxTaskSet(dataset=_make_dataset(), name="test")
|
|
115
|
+
task = ts[0]
|
|
116
|
+
assert task.workdir == "/testbed"
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def test_task_repr_sandbox():
|
|
120
|
+
ts = MockSandboxTaskSet(dataset=_make_dataset(), name="test")
|
|
121
|
+
task = ts[0]
|
|
122
|
+
assert "python:3.11-slim" in repr(task)
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
# ── Task from plain TaskSet ────────────────────────────────────────────
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def test_task_no_sandbox():
|
|
129
|
+
ts = MockTaskSet(dataset=_make_dataset(), name="math")
|
|
130
|
+
task = ts[0]
|
|
131
|
+
assert task.sandbox_spec is None
|
|
132
|
+
assert task.image is None
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def test_task_repr_no_sandbox():
|
|
136
|
+
ts = MockTaskSet(dataset=_make_dataset(), name="math")
|
|
137
|
+
task = ts[0]
|
|
138
|
+
assert "no sandbox" in repr(task)
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
# ── TaskSet ─────────────────────────────────────────────────────────────
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def test_taskset_isinstance():
|
|
145
|
+
ts = MockTaskSet(dataset=_make_dataset(), name="math")
|
|
146
|
+
assert not isinstance(ts, SandboxTaskSet)
|
|
147
|
+
|
|
148
|
+
ts2 = MockSandboxTaskSet(dataset=_make_dataset(), name="swe")
|
|
149
|
+
assert isinstance(ts2, SandboxTaskSet)
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def test_taskset_len():
|
|
153
|
+
ts = MockTaskSet(dataset=_make_dataset(5), name="test")
|
|
154
|
+
assert len(ts) == 5
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def test_taskset_iter():
|
|
158
|
+
ts = MockTaskSet(dataset=_make_dataset(3), name="test")
|
|
159
|
+
tasks = list(ts)
|
|
160
|
+
assert len(tasks) == 3
|
|
161
|
+
assert all(isinstance(t, Task) for t in tasks)
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def test_taskset_filter():
|
|
165
|
+
ts = MockSandboxTaskSet(dataset=_make_dataset(5), name="test")
|
|
166
|
+
filtered = ts.filter(lambda ex: ex["info"]["id"] < 3)
|
|
167
|
+
assert len(filtered) == 3
|
|
168
|
+
assert isinstance(filtered, MockSandboxTaskSet)
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def test_taskset_take():
|
|
172
|
+
ts = MockSandboxTaskSet(dataset=_make_dataset(5), name="test")
|
|
173
|
+
taken = ts.take(2)
|
|
174
|
+
assert len(taken) == 2
|
|
175
|
+
assert isinstance(taken, MockSandboxTaskSet)
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def test_taskset_repr():
|
|
179
|
+
ts = MockTaskSet(dataset=_make_dataset(), name="mytest")
|
|
180
|
+
assert "mytest" in repr(ts)
|
|
181
|
+
assert "3" in repr(ts)
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
@pytest.mark.asyncio
|
|
185
|
+
async def test_composable_env_exports_task_workdir():
|
|
186
|
+
taskset = MockSandboxTaskSet(dataset=_make_dataset(), name="test")
|
|
187
|
+
env = ComposableEnv(
|
|
188
|
+
taskset=taskset,
|
|
189
|
+
harness=Harness(run_command="true"),
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
env_vars = await env.build_env_vars(
|
|
193
|
+
{
|
|
194
|
+
"info": {"id": 0},
|
|
195
|
+
"interception_base_url": "https://test.trycloudflare.com/v1",
|
|
196
|
+
}
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
assert env_vars["AGENT_WORKDIR"] == "/testbed"
|
|
200
|
+
assert env_vars["FOO"] == "bar"
|