verifiers 0.1.11.dev0__tar.gz → 0.1.11.dev1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/PKG-INFO +8 -8
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/README.md +7 -7
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/conftest.py +6 -0
- verifiers-0.1.11.dev1/tests/test_env_crash_recovery.py +237 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_rlm_env.py +572 -7
- verifiers-0.1.11.dev1/tests/test_rollout_gateway_env.py +350 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_save_utils.py +205 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/AGENTS.md +1 -1
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/__init__.py +6 -1
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/clients/openai_chat_completions_client.py +64 -21
- verifiers-0.1.11.dev1/verifiers/clients/openai_chat_completions_token_client.py +236 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/envs/env_group.py +0 -6
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/envs/environment.py +41 -26
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/envs/experimental/README.md +3 -1
- verifiers-0.1.11.dev1/verifiers/envs/experimental/__init__.py +4 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/envs/experimental/cli_agent_env.py +27 -12
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/envs/experimental/rlm_env.py +333 -55
- verifiers-0.1.11.dev1/verifiers/envs/experimental/rollout_gateway_mixin.py +397 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/envs/integrations/browser_env/browser_env.py +7 -1
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/envs/integrations/browser_env/modes/cua_mode.py +60 -44
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/envs/integrations/openenv_env.py +7 -1
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/envs/integrations/textarena_env.py +33 -11
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/errors.py +6 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/scripts/eval.py +101 -33
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/scripts/gepa.py +1 -1
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/types.py +5 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/utils/async_utils.py +2 -6
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/utils/eval_utils.py +26 -2
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/utils/logging_utils.py +44 -6
- verifiers-0.1.11.dev1/verifiers/utils/metric_utils.py +69 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/utils/response_utils.py +5 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/utils/save_utils.py +11 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/utils/worker_utils.py +15 -32
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/workers/client/env_client.py +22 -2
- verifiers-0.1.11.dev1/verifiers/workers/client/zmq_env_client.py +408 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/workers/server/env_server.py +64 -29
- verifiers-0.1.11.dev1/verifiers/workers/server/zmq_env_server.py +246 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/workers/types.py +21 -0
- verifiers-0.1.11.dev0/verifiers/clients/openai_chat_completions_token_client.py +0 -67
- verifiers-0.1.11.dev0/verifiers/envs/experimental/__init__.py +0 -3
- verifiers-0.1.11.dev0/verifiers/utils/token_utils.py +0 -174
- verifiers-0.1.11.dev0/verifiers/workers/client/zmq_env_client.py +0 -198
- verifiers-0.1.11.dev0/verifiers/workers/server/zmq_env_server.py +0 -148
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/.gitignore +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/LICENSE +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/pyproject.toml +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/AGENTS.md +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/README.md +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/__init__.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_browser_env.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_build_script.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_cli_agent_env.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_client_auth_errors.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_client_config.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_client_multimodal_types.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_decorator_ranks.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_endpoint_registry.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_env_group.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_environment.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_environment_extra.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_envs.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_error_chain.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_eval_cli.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_eval_display.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_eval_utils.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_gepa_cli.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_gym_env.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_imports.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_install_utils.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_interception_utils.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_logging.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_math_rubric.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_maybe_think_parser.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_message_utils.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_message_utils_audio.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_multiturn_env.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_opencode_harbor.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_parser.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_path_utils.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_prime_plugin.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_rlm_env_sandbox.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_rubric.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_rubric_group.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_sandbox_env.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_sandbox_mixin.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_setup_script.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_singleturn_env.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_stateful_tool_env.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_think_parser.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_tool_env.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_tool_utils.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_trajectory_processing.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_tui_info_formatting.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/tests/test_xml_parser.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/cli/__init__.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/cli/commands/__init__.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/cli/commands/build.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/cli/commands/eval.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/cli/commands/gepa.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/cli/commands/init.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/cli/commands/install.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/cli/commands/setup.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/cli/plugins/__init__.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/cli/plugins/prime.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/cli/tui.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/clients/__init__.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/clients/anthropic_messages_client.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/clients/client.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/clients/openai_completions_client.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/decorators.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/envs/AGENTS.md +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/envs/__init__.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/envs/experimental/gym_env.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/envs/experimental/harbor_env.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/envs/experimental/mcp_env.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/envs/experimental/sandbox_mixin.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/envs/integrations/README.md +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/envs/integrations/__init__.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/envs/integrations/browser_env/__init__.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/envs/integrations/browser_env/modes/__init__.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/envs/integrations/browser_env/modes/base.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/envs/integrations/browser_env/modes/dom_mode.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/envs/integrations/reasoninggym_env.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/envs/multiturn_env.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/envs/python_env.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/envs/sandbox_env.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/envs/singleturn_env.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/envs/stateful_tool_env.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/envs/tool_env.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/gepa/__init__.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/gepa/adapter.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/gepa/config.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/gepa/display.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/gepa/gepa_utils.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/parsers/__init__.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/parsers/maybe_think_parser.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/parsers/parser.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/parsers/think_parser.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/parsers/xml_parser.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/rl/README.md +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/rl/__init__.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/rl/inference/__init__.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/rl/inference/client.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/rl/inference/server.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/rl/trainer/__init__.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/rl/trainer/config.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/rl/trainer/orchestrator.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/rl/trainer/trainer.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/rl/trainer/utils.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/rubrics/__init__.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/rubrics/judge_rubric.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/rubrics/math_rubric.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/rubrics/rubric.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/rubrics/rubric_group.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/scripts/__init__.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/scripts/build.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/scripts/init.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/scripts/install.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/scripts/prime_rl.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/scripts/rl.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/scripts/setup.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/scripts/train.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/scripts/tui.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/scripts/vllm.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/utils/__init__.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/utils/client_utils.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/utils/config_utils.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/utils/data_utils.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/utils/display_utils.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/utils/env_utils.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/utils/error_utils.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/utils/eval_display.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/utils/heartbeat.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/utils/import_utils.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/utils/install_utils.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/utils/interception_utils.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/utils/message_utils.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/utils/path_utils.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/utils/thread_utils.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/utils/threaded_sandbox_client.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/utils/tool_utils.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/utils/tunnel_utils.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/utils/usage_utils.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/utils/version_utils.py +0 -0
- {verifiers-0.1.11.dev0 → verifiers-0.1.11.dev1}/verifiers/workers/__init__.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: verifiers
|
|
3
|
-
Version: 0.1.11.
|
|
3
|
+
Version: 0.1.11.dev1
|
|
4
4
|
Summary: Verifiers: Environments for LLM Reinforcement Learning
|
|
5
5
|
Project-URL: Homepage, https://github.com/primeintellect-ai/verifiers
|
|
6
6
|
Project-URL: Documentation, https://github.com/primeintellect-ai/verifiers
|
|
@@ -106,7 +106,7 @@ Verifiers: Environments for LLM Reinforcement Learning
|
|
|
106
106
|
|
|
107
107
|
- [01/08/26] v0.1.9 is released, featuring a number of new experimental environment class types, monitor rubrics for automatic metric collection, improved workspace setup flow, improved error handling, bug fixes, and a documentation overhaul.
|
|
108
108
|
- [11/19/25] v0.1.8 is released, featuring a major refactor of the rollout system to use trajectory-based tracking for token-in token-out training across turns, as well as support for truncated or branching rollouts.
|
|
109
|
-
- [11/07/25] Verifiers v0.1.7 is released! This includes an improved quickstart configuration for training with [prime-rl], a new included "nano" trainer (`vf.RLTrainer`, replacing `vf.GRPOTrainer`), and a number of bug fixes and improvements to the documentation.
|
|
109
|
+
- [11/07/25] Verifiers v0.1.7 is released! This includes an improved quickstart configuration for training with [prime-rl](https://github.com/PrimeIntellect-ai/prime-rl), a new included "nano" trainer (`vf.RLTrainer`, replacing `vf.GRPOTrainer`), and a number of bug fixes and improvements to the documentation.
|
|
110
110
|
- [10/27/25] A new iteration of the Prime Intellect [Environments Program](https://docs.google.com/spreadsheets/d/13UDfRDjgIZXsMI2s9-Lmn8KSMMsgk2_zsfju6cx_pNU/edit?gid=0#gid=0) is live!
|
|
111
111
|
|
|
112
112
|
|
|
@@ -229,17 +229,17 @@ prime eval run primeintellect/math-python
|
|
|
229
229
|
|
|
230
230
|
## Documentation
|
|
231
231
|
|
|
232
|
-
**[Environments](environments.md)** — Create datasets, rubrics, and custom multi-turn interaction protocols.
|
|
232
|
+
**[Environments](docs/environments.md)** — Create datasets, rubrics, and custom multi-turn interaction protocols.
|
|
233
233
|
|
|
234
|
-
**[Evaluation](evaluation.md)** - Evaluate models using your environments.
|
|
234
|
+
**[Evaluation](docs/evaluation.md)** - Evaluate models using your environments.
|
|
235
235
|
|
|
236
|
-
**[Training](training.md)** — Train models in your environments with reinforcement learning.
|
|
236
|
+
**[Training](docs/training.md)** — Train models in your environments with reinforcement learning.
|
|
237
237
|
|
|
238
|
-
**[Development](development.md)** — Contributing to verifiers
|
|
238
|
+
**[Development](docs/development.md)** — Contributing to verifiers
|
|
239
239
|
|
|
240
|
-
**[API Reference](reference.md)** — Understanding the API and data structures
|
|
240
|
+
**[API Reference](docs/reference.md)** — Understanding the API and data structures
|
|
241
241
|
|
|
242
|
-
**[FAQs](faqs.md)** - Other frequently asked questions.
|
|
242
|
+
**[FAQs](docs/faqs.md)** - Other frequently asked questions.
|
|
243
243
|
|
|
244
244
|
|
|
245
245
|
## Citation
|
|
@@ -36,7 +36,7 @@ Verifiers: Environments for LLM Reinforcement Learning
|
|
|
36
36
|
|
|
37
37
|
- [01/08/26] v0.1.9 is released, featuring a number of new experimental environment class types, monitor rubrics for automatic metric collection, improved workspace setup flow, improved error handling, bug fixes, and a documentation overhaul.
|
|
38
38
|
- [11/19/25] v0.1.8 is released, featuring a major refactor of the rollout system to use trajectory-based tracking for token-in token-out training across turns, as well as support for truncated or branching rollouts.
|
|
39
|
-
- [11/07/25] Verifiers v0.1.7 is released! This includes an improved quickstart configuration for training with [prime-rl], a new included "nano" trainer (`vf.RLTrainer`, replacing `vf.GRPOTrainer`), and a number of bug fixes and improvements to the documentation.
|
|
39
|
+
- [11/07/25] Verifiers v0.1.7 is released! This includes an improved quickstart configuration for training with [prime-rl](https://github.com/PrimeIntellect-ai/prime-rl), a new included "nano" trainer (`vf.RLTrainer`, replacing `vf.GRPOTrainer`), and a number of bug fixes and improvements to the documentation.
|
|
40
40
|
- [10/27/25] A new iteration of the Prime Intellect [Environments Program](https://docs.google.com/spreadsheets/d/13UDfRDjgIZXsMI2s9-Lmn8KSMMsgk2_zsfju6cx_pNU/edit?gid=0#gid=0) is live!
|
|
41
41
|
|
|
42
42
|
|
|
@@ -159,17 +159,17 @@ prime eval run primeintellect/math-python
|
|
|
159
159
|
|
|
160
160
|
## Documentation
|
|
161
161
|
|
|
162
|
-
**[Environments](environments.md)** — Create datasets, rubrics, and custom multi-turn interaction protocols.
|
|
162
|
+
**[Environments](docs/environments.md)** — Create datasets, rubrics, and custom multi-turn interaction protocols.
|
|
163
163
|
|
|
164
|
-
**[Evaluation](evaluation.md)** - Evaluate models using your environments.
|
|
164
|
+
**[Evaluation](docs/evaluation.md)** - Evaluate models using your environments.
|
|
165
165
|
|
|
166
|
-
**[Training](training.md)** — Train models in your environments with reinforcement learning.
|
|
166
|
+
**[Training](docs/training.md)** — Train models in your environments with reinforcement learning.
|
|
167
167
|
|
|
168
|
-
**[Development](development.md)** — Contributing to verifiers
|
|
168
|
+
**[Development](docs/development.md)** — Contributing to verifiers
|
|
169
169
|
|
|
170
|
-
**[API Reference](reference.md)** — Understanding the API and data structures
|
|
170
|
+
**[API Reference](docs/reference.md)** — Understanding the API and data structures
|
|
171
171
|
|
|
172
|
-
**[FAQs](faqs.md)** - Other frequently asked questions.
|
|
172
|
+
**[FAQs](docs/faqs.md)** - Other frequently asked questions.
|
|
173
173
|
|
|
174
174
|
|
|
175
175
|
## Citation
|
|
@@ -554,6 +554,9 @@ def make_metadata() -> Callable[..., GenerateMetadata]:
|
|
|
554
554
|
time_ms: float = 0.0,
|
|
555
555
|
avg_reward: float = 0.0,
|
|
556
556
|
avg_metrics: dict[str, float] = {},
|
|
557
|
+
pass_at_k: dict[str, float] = {},
|
|
558
|
+
pass_all_k: dict[str, float] = {},
|
|
559
|
+
pass_threshold: float = 0.5,
|
|
557
560
|
usage: dict[str, float] | None = None,
|
|
558
561
|
version_info: dict | None = None,
|
|
559
562
|
state_columns: list[str] = ["foo"],
|
|
@@ -579,6 +582,9 @@ def make_metadata() -> Callable[..., GenerateMetadata]:
|
|
|
579
582
|
time_ms=time_ms,
|
|
580
583
|
avg_reward=avg_reward,
|
|
581
584
|
avg_metrics=avg_metrics,
|
|
585
|
+
pass_at_k=pass_at_k,
|
|
586
|
+
pass_all_k=pass_all_k,
|
|
587
|
+
pass_threshold=pass_threshold,
|
|
582
588
|
usage=usage,
|
|
583
589
|
version_info=version_info,
|
|
584
590
|
state_columns=state_columns,
|
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
"""Tests for environment server crash detection and recovery."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import time
|
|
5
|
+
from unittest.mock import patch
|
|
6
|
+
|
|
7
|
+
import pytest
|
|
8
|
+
|
|
9
|
+
from verifiers.workers.client.zmq_env_client import ZMQEnvClient
|
|
10
|
+
from verifiers.workers.types import (
|
|
11
|
+
HealthRequest,
|
|
12
|
+
HealthResponse,
|
|
13
|
+
PendingRequest,
|
|
14
|
+
ServerState,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class TestStateTransitions:
|
|
19
|
+
"""Tests for health-check-driven state transitions (via dedicated thread callbacks)."""
|
|
20
|
+
|
|
21
|
+
@pytest.mark.asyncio
|
|
22
|
+
async def test_startup_to_healthy_to_unhealthy(self):
|
|
23
|
+
"""Callbacks drive STARTUP → HEALTHY → UNHEALTHY via healthy_event."""
|
|
24
|
+
client = ZMQEnvClient(
|
|
25
|
+
address="tcp://127.0.0.1:5555",
|
|
26
|
+
health_check_interval=0, # disable auto thread
|
|
27
|
+
)
|
|
28
|
+
client.loop = asyncio.get_running_loop()
|
|
29
|
+
|
|
30
|
+
assert client.server_state == ServerState.STARTUP
|
|
31
|
+
assert not client.healthy_event.is_set()
|
|
32
|
+
|
|
33
|
+
# STARTUP → HEALTHY
|
|
34
|
+
client.on_became_healthy(ServerState.STARTUP)
|
|
35
|
+
assert client.server_state == ServerState.HEALTHY
|
|
36
|
+
assert client.healthy_event.is_set()
|
|
37
|
+
|
|
38
|
+
# HEALTHY → UNHEALTHY (after 5 consecutive failures)
|
|
39
|
+
client.on_became_unhealthy(5)
|
|
40
|
+
await asyncio.sleep(0.1) # let _do_cancel_pending run
|
|
41
|
+
assert client.server_state == ServerState.UNHEALTHY
|
|
42
|
+
assert not client.healthy_event.is_set()
|
|
43
|
+
|
|
44
|
+
await client.close()
|
|
45
|
+
|
|
46
|
+
@pytest.mark.asyncio
|
|
47
|
+
async def test_unhealthy_cancels_pending_with_server_error(self):
|
|
48
|
+
"""HEALTHY → UNHEALTHY transition cancels pending requests with ServerError."""
|
|
49
|
+
client = ZMQEnvClient(
|
|
50
|
+
address="tcp://127.0.0.1:5555",
|
|
51
|
+
health_check_interval=0, # disable auto thread
|
|
52
|
+
)
|
|
53
|
+
client.loop = asyncio.get_running_loop()
|
|
54
|
+
|
|
55
|
+
# Start in HEALTHY state
|
|
56
|
+
client.server_state = ServerState.HEALTHY
|
|
57
|
+
client.healthy_event.set()
|
|
58
|
+
|
|
59
|
+
# Add a pending request
|
|
60
|
+
future = asyncio.Future()
|
|
61
|
+
async with client.pending_lock:
|
|
62
|
+
client.pending_requests["test_req"] = PendingRequest(
|
|
63
|
+
request_id="test_req",
|
|
64
|
+
request=HealthRequest(),
|
|
65
|
+
submitted_at=time.time(),
|
|
66
|
+
timeout=10.0,
|
|
67
|
+
future=future,
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
# Trigger UNHEALTHY
|
|
71
|
+
client.on_became_unhealthy(5)
|
|
72
|
+
await asyncio.sleep(0.1) # let _do_cancel_pending run
|
|
73
|
+
|
|
74
|
+
assert future.done()
|
|
75
|
+
assert len(client.pending_requests) == 0
|
|
76
|
+
with pytest.raises(RuntimeError, match="unhealthy"):
|
|
77
|
+
future.result()
|
|
78
|
+
|
|
79
|
+
await client.close()
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class TestRetryOnServerError:
|
|
83
|
+
"""Tests for send_request retry after ServerError."""
|
|
84
|
+
|
|
85
|
+
@pytest.mark.asyncio
|
|
86
|
+
async def test_retry_after_recovery(self):
|
|
87
|
+
"""ServerError → wait for healthy_event → retry succeeds."""
|
|
88
|
+
client = ZMQEnvClient(
|
|
89
|
+
address="tcp://127.0.0.1:5555",
|
|
90
|
+
health_check_interval=0,
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
attempt_count = 0
|
|
94
|
+
|
|
95
|
+
async def mock_send(*args, **kwargs):
|
|
96
|
+
nonlocal attempt_count
|
|
97
|
+
attempt_count += 1
|
|
98
|
+
|
|
99
|
+
if attempt_count == 1:
|
|
100
|
+
# First attempt: simulate server crash
|
|
101
|
+
async def fail_then_recover():
|
|
102
|
+
await asyncio.sleep(0.1)
|
|
103
|
+
await client.cancel_all_pending("Connection lost")
|
|
104
|
+
await asyncio.sleep(0.1)
|
|
105
|
+
client.healthy_event.set()
|
|
106
|
+
|
|
107
|
+
asyncio.create_task(fail_then_recover())
|
|
108
|
+
else:
|
|
109
|
+
# Second attempt: succeed
|
|
110
|
+
async def succeed():
|
|
111
|
+
await asyncio.sleep(0.05)
|
|
112
|
+
req_id = list(client.pending_requests.keys())[0]
|
|
113
|
+
pending = client.pending_requests.get(req_id)
|
|
114
|
+
if pending and not pending.future.done():
|
|
115
|
+
pending.future.set_result(
|
|
116
|
+
HealthResponse(success=True).model_dump()
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
asyncio.create_task(succeed())
|
|
120
|
+
|
|
121
|
+
with (
|
|
122
|
+
patch.object(client.socket, "connect"),
|
|
123
|
+
patch.object(client.socket, "send_multipart", new=mock_send),
|
|
124
|
+
):
|
|
125
|
+
await client.ensure_started()
|
|
126
|
+
response = await client.send_request(
|
|
127
|
+
HealthRequest(), HealthResponse, timeout=5.0
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
assert attempt_count == 2
|
|
131
|
+
assert response.success
|
|
132
|
+
|
|
133
|
+
await client.close()
|
|
134
|
+
|
|
135
|
+
@pytest.mark.asyncio
|
|
136
|
+
async def test_recovery_timeout(self):
|
|
137
|
+
"""ServerError + no recovery within timeout → TimeoutError."""
|
|
138
|
+
client = ZMQEnvClient(
|
|
139
|
+
address="tcp://127.0.0.1:5555",
|
|
140
|
+
health_check_interval=0,
|
|
141
|
+
recovery_timeout=0.5,
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
async def mock_send(*args, **kwargs):
|
|
145
|
+
async def fail():
|
|
146
|
+
await asyncio.sleep(0.05)
|
|
147
|
+
await client.cancel_all_pending("Connection lost")
|
|
148
|
+
|
|
149
|
+
asyncio.create_task(fail())
|
|
150
|
+
|
|
151
|
+
with (
|
|
152
|
+
patch.object(client.socket, "connect"),
|
|
153
|
+
patch.object(client.socket, "send_multipart", new=mock_send),
|
|
154
|
+
):
|
|
155
|
+
await client.ensure_started()
|
|
156
|
+
|
|
157
|
+
with pytest.raises(TimeoutError, match="did not recover"):
|
|
158
|
+
await client.send_request(HealthRequest(), HealthResponse, timeout=5.0)
|
|
159
|
+
|
|
160
|
+
await client.close()
|
|
161
|
+
|
|
162
|
+
@pytest.mark.asyncio
|
|
163
|
+
async def test_no_retry_on_runtime_error(self):
|
|
164
|
+
"""Plain RuntimeError propagates immediately without retry."""
|
|
165
|
+
client = ZMQEnvClient(
|
|
166
|
+
address="tcp://127.0.0.1:5555",
|
|
167
|
+
health_check_interval=0,
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
attempt_count = 0
|
|
171
|
+
|
|
172
|
+
async def mock_send(*args, **kwargs):
|
|
173
|
+
nonlocal attempt_count
|
|
174
|
+
attempt_count += 1
|
|
175
|
+
|
|
176
|
+
async def fail():
|
|
177
|
+
await asyncio.sleep(0.05)
|
|
178
|
+
req_id = list(client.pending_requests.keys())[0]
|
|
179
|
+
pending = client.pending_requests.get(req_id)
|
|
180
|
+
if pending and not pending.future.done():
|
|
181
|
+
pending.future.set_exception(RuntimeError("Bad request"))
|
|
182
|
+
|
|
183
|
+
asyncio.create_task(fail())
|
|
184
|
+
|
|
185
|
+
with (
|
|
186
|
+
patch.object(client.socket, "connect"),
|
|
187
|
+
patch.object(client.socket, "send_multipart", new=mock_send),
|
|
188
|
+
):
|
|
189
|
+
await client.ensure_started()
|
|
190
|
+
|
|
191
|
+
with pytest.raises(RuntimeError, match="Bad request"):
|
|
192
|
+
await client.send_request(HealthRequest(), HealthResponse, timeout=5.0)
|
|
193
|
+
|
|
194
|
+
assert attempt_count == 1
|
|
195
|
+
|
|
196
|
+
await client.close()
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
class TestWaitForServerStartup:
|
|
200
|
+
"""Tests for event-based wait_for_server_startup."""
|
|
201
|
+
|
|
202
|
+
@pytest.mark.asyncio
|
|
203
|
+
async def test_delayed_startup(self):
|
|
204
|
+
"""Startup succeeds when health thread detects server after a delay."""
|
|
205
|
+
client = ZMQEnvClient(
|
|
206
|
+
address="tcp://127.0.0.1:5555",
|
|
207
|
+
health_check_interval=0, # disable auto thread
|
|
208
|
+
)
|
|
209
|
+
client.loop = asyncio.get_running_loop()
|
|
210
|
+
|
|
211
|
+
# Simulate health thread detecting server after a delay
|
|
212
|
+
async def simulate_health_thread():
|
|
213
|
+
await asyncio.sleep(0.2)
|
|
214
|
+
client.on_became_healthy(ServerState.STARTUP)
|
|
215
|
+
|
|
216
|
+
asyncio.create_task(simulate_health_thread())
|
|
217
|
+
|
|
218
|
+
with patch.object(client.socket, "connect"):
|
|
219
|
+
await client.wait_for_server_startup(timeout=3.0)
|
|
220
|
+
|
|
221
|
+
assert client.healthy_event.is_set()
|
|
222
|
+
|
|
223
|
+
await client.close()
|
|
224
|
+
|
|
225
|
+
@pytest.mark.asyncio
|
|
226
|
+
async def test_startup_timeout(self):
|
|
227
|
+
"""Startup raises TimeoutError when server never becomes healthy."""
|
|
228
|
+
client = ZMQEnvClient(
|
|
229
|
+
address="tcp://127.0.0.1:5555",
|
|
230
|
+
health_check_interval=0, # disable auto thread
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
with patch.object(client.socket, "connect"):
|
|
234
|
+
with pytest.raises(TimeoutError, match="did not become healthy"):
|
|
235
|
+
await client.wait_for_server_startup(timeout=0.5)
|
|
236
|
+
|
|
237
|
+
await client.close()
|