verifiers 0.1.10.dev2__tar.gz → 0.1.10.dev4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/.gitignore +1 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/PKG-INFO +20 -17
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/README.md +15 -3
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/pyproject.toml +15 -23
- verifiers-0.1.10.dev4/tests/test_client_config.py +52 -0
- verifiers-0.1.10.dev4/tests/test_endpoint_registry.py +177 -0
- verifiers-0.1.10.dev4/tests/test_environment_extra.py +615 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/test_envs.py +12 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/test_eval_cli.py +374 -3
- verifiers-0.1.10.dev4/tests/test_eval_display.py +80 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/test_eval_utils.py +20 -0
- verifiers-0.1.10.dev4/tests/test_gepa_cli.py +89 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/test_imports.py +1 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/test_install_utils.py +3 -2
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/test_math_rubric.py +8 -15
- verifiers-0.1.10.dev4/tests/test_opencode_harbor.py +57 -0
- verifiers-0.1.10.dev4/tests/test_path_utils.py +89 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/test_sandbox_env.py +0 -6
- verifiers-0.1.10.dev4/tests/test_sandbox_mixin.py +351 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/test_save_utils.py +203 -0
- verifiers-0.1.10.dev4/tests/test_setup_script.py +103 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/test_tool_env.py +4 -1
- verifiers-0.1.10.dev4/tests/test_tui_info_formatting.py +41 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/__init__.py +44 -28
- verifiers-0.1.10.dev4/verifiers/cli/__init__.py +1 -0
- verifiers-0.1.10.dev4/verifiers/cli/commands/__init__.py +1 -0
- verifiers-0.1.10.dev4/verifiers/cli/commands/build.py +7 -0
- verifiers-0.1.10.dev4/verifiers/cli/commands/eval.py +7 -0
- verifiers-0.1.10.dev4/verifiers/cli/commands/gepa.py +7 -0
- verifiers-0.1.10.dev4/verifiers/cli/commands/init.py +7 -0
- verifiers-0.1.10.dev4/verifiers/cli/commands/install.py +7 -0
- verifiers-0.1.10.dev4/verifiers/cli/commands/setup.py +9 -0
- verifiers-0.1.10.dev4/verifiers/cli/plugins/__init__.py +5 -0
- verifiers-0.1.10.dev4/verifiers/cli/plugins/prime.py +97 -0
- verifiers-0.1.10.dev4/verifiers/cli/tui.py +9 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/envs/environment.py +360 -140
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/envs/experimental/README.md +8 -0
- verifiers-0.1.10.dev4/verifiers/envs/experimental/__init__.py +3 -0
- verifiers-0.1.10.dev4/verifiers/envs/experimental/cli_agent_env.py +422 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/envs/experimental/harbor_env.py +55 -51
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/envs/experimental/mcp_env.py +7 -1
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/envs/experimental/rlm_env.py +325 -100
- verifiers-0.1.10.dev4/verifiers/envs/experimental/sandbox_mixin.py +241 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/envs/integrations/README.md +66 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/envs/integrations/browser_env/__init__.py +0 -6
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/envs/integrations/browser_env/modes/dom_mode.py +4 -2
- verifiers-0.1.10.dev4/verifiers/envs/integrations/openenv_env.py +1169 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/envs/integrations/textarena_env.py +9 -5
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/envs/sandbox_env.py +11 -62
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/envs/tool_env.py +6 -3
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/gepa/adapter.py +12 -8
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/gepa/display.py +3 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/gepa/gepa_utils.py +11 -7
- verifiers-0.1.10.dev4/verifiers/rl/README.md +15 -0
- verifiers-0.1.10.dev4/verifiers/rl/__init__.py +11 -0
- verifiers-0.1.10.dev4/verifiers/rl/inference/__init__.py +11 -0
- verifiers-0.1.10.dev4/verifiers/rl/inference/client.py +3 -0
- verifiers-0.1.10.dev4/verifiers/rl/inference/server.py +11 -0
- verifiers-0.1.10.dev4/verifiers/rl/trainer/__init__.py +29 -0
- verifiers-0.1.10.dev4/verifiers/rl/trainer/config.py +3 -0
- verifiers-0.1.10.dev4/verifiers/rl/trainer/orchestrator.py +3 -0
- verifiers-0.1.10.dev4/verifiers/rl/trainer/trainer.py +4 -0
- verifiers-0.1.10.dev4/verifiers/rl/trainer/utils.py +5 -0
- verifiers-0.1.10.dev4/verifiers/rubrics/math_rubric.py +102 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/rubrics/rubric.py +16 -5
- verifiers-0.1.10.dev4/verifiers/scripts/build.py +452 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/scripts/eval.py +147 -29
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/scripts/gepa.py +197 -9
- verifiers-0.1.10.dev4/verifiers/scripts/init.py +436 -0
- verifiers-0.1.10.dev4/verifiers/scripts/rl.py +11 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/scripts/setup.py +102 -99
- verifiers-0.1.10.dev4/verifiers/scripts/train.py +11 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/scripts/tui.py +39 -7
- verifiers-0.1.10.dev4/verifiers/scripts/vllm.py +11 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/types.py +71 -8
- verifiers-0.1.10.dev4/verifiers/utils/client_utils.py +98 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/utils/data_utils.py +3 -3
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/utils/display_utils.py +16 -3
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/utils/eval_display.py +44 -10
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/utils/eval_utils.py +264 -84
- verifiers-0.1.10.dev4/verifiers/utils/import_utils.py +18 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/utils/install_utils.py +11 -7
- verifiers-0.1.10.dev4/verifiers/utils/interception_utils.py +416 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/utils/message_utils.py +2 -3
- verifiers-0.1.10.dev4/verifiers/utils/path_utils.py +143 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/utils/sandbox_exec_utils.py +6 -2
- verifiers-0.1.10.dev4/verifiers/utils/save_utils.py +575 -0
- verifiers-0.1.10.dev4/verifiers/utils/threaded_sandbox_client.py +63 -0
- verifiers-0.1.10.dev4/verifiers/utils/usage_utils.py +101 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/utils/worker_utils.py +37 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/workers/client/env_client.py +5 -2
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/workers/client/zmq_env_client.py +37 -4
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/workers/server/env_server.py +9 -3
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/workers/server/zmq_env_server.py +2 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/workers/types.py +3 -3
- verifiers-0.1.10.dev2/tests/test_environment_extra.py +0 -224
- verifiers-0.1.10.dev2/verifiers/envs/experimental/cli_agent_env.py +0 -820
- verifiers-0.1.10.dev2/verifiers/rl/README.md +0 -108
- verifiers-0.1.10.dev2/verifiers/rl/inference/client.py +0 -172
- verifiers-0.1.10.dev2/verifiers/rl/inference/server.py +0 -193
- verifiers-0.1.10.dev2/verifiers/rl/trainer/__init__.py +0 -37
- verifiers-0.1.10.dev2/verifiers/rl/trainer/config.py +0 -342
- verifiers-0.1.10.dev2/verifiers/rl/trainer/orchestrator.py +0 -375
- verifiers-0.1.10.dev2/verifiers/rl/trainer/trainer.py +0 -497
- verifiers-0.1.10.dev2/verifiers/rl/trainer/utils.py +0 -289
- verifiers-0.1.10.dev2/verifiers/rubrics/math_rubric.py +0 -88
- verifiers-0.1.10.dev2/verifiers/scripts/__init__.py +0 -0
- verifiers-0.1.10.dev2/verifiers/scripts/init.py +0 -209
- verifiers-0.1.10.dev2/verifiers/scripts/rl.py +0 -207
- verifiers-0.1.10.dev2/verifiers/scripts/train.py +0 -40
- verifiers-0.1.10.dev2/verifiers/utils/__init__.py +0 -0
- verifiers-0.1.10.dev2/verifiers/utils/client_utils.py +0 -62
- verifiers-0.1.10.dev2/verifiers/utils/path_utils.py +0 -51
- verifiers-0.1.10.dev2/verifiers/utils/save_utils.py +0 -385
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/LICENSE +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/AGENTS.md +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/README.md +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/__init__.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/conftest.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/mock_client_guide.md +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/mock_openai_client.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/test_browser_env.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/test_cli_agent_env.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/test_decorator_ranks.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/test_env_group.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/test_environment.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/test_environment_audio_modality.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/test_error_chain.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/test_gym_env.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/test_logging.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/test_maybe_think_parser.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/test_message_utils_audio.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/test_multiturn_env.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/test_parser.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/test_rlm_env.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/test_rlm_env_sandbox.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/test_rubric.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/test_rubric_group.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/test_singleturn_env.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/test_stateful_tool_env.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/test_think_parser.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/test_tool_utils.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/test_trajectory_processing.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/tests/test_xml_parser.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/AGENTS.md +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/decorators.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/envs/AGENTS.md +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/envs/__init__.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/envs/env_group.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/envs/experimental/gym_env.py +0 -0
- {verifiers-0.1.10.dev2/verifiers/envs/experimental → verifiers-0.1.10.dev4/verifiers/envs/integrations}/__init__.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/envs/integrations/browser_env/browser_env.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/envs/integrations/browser_env/modes/__init__.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/envs/integrations/browser_env/modes/base.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/envs/integrations/browser_env/modes/cua_mode.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/envs/integrations/reasoninggym_env.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/envs/multiturn_env.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/envs/python_env.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/envs/singleturn_env.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/envs/stateful_tool_env.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/errors.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/gepa/__init__.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/gepa/config.py +0 -0
- {verifiers-0.1.10.dev2/verifiers/envs/integrations → verifiers-0.1.10.dev4/verifiers/parsers}/__init__.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/parsers/maybe_think_parser.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/parsers/parser.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/parsers/think_parser.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/parsers/xml_parser.py +0 -0
- {verifiers-0.1.10.dev2/verifiers/parsers → verifiers-0.1.10.dev4/verifiers/rubrics}/__init__.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/rubrics/judge_rubric.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/rubrics/rubric_group.py +0 -0
- {verifiers-0.1.10.dev2/verifiers/rl/inference → verifiers-0.1.10.dev4/verifiers/scripts}/__init__.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/scripts/install.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/scripts/prime_rl.py +0 -0
- {verifiers-0.1.10.dev2/verifiers/rubrics → verifiers-0.1.10.dev4/verifiers/utils}/__init__.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/utils/async_utils.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/utils/config_utils.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/utils/env_utils.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/utils/error_utils.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/utils/logging_utils.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/utils/response_utils.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/utils/thread_utils.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/utils/token_utils.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/utils/tool_utils.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/utils/tunnel_utils.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev4}/verifiers/workers/__init__.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: verifiers
|
|
3
|
-
Version: 0.1.10.
|
|
3
|
+
Version: 0.1.10.dev4
|
|
4
4
|
Summary: Verifiers: Environments for LLM Reinforcement Learning
|
|
5
5
|
Project-URL: Homepage, https://github.com/primeintellect-ai/verifiers
|
|
6
6
|
Project-URL: Documentation, https://github.com/primeintellect-ai/verifiers
|
|
@@ -32,8 +32,8 @@ Requires-Dist: nest-asyncio>=1.6.0
|
|
|
32
32
|
Requires-Dist: numpy
|
|
33
33
|
Requires-Dist: openai-agents>=0.0.7
|
|
34
34
|
Requires-Dist: openai>=1.108.1
|
|
35
|
-
Requires-Dist: prime-sandboxes>=0.2.
|
|
36
|
-
Requires-Dist: prime-tunnel
|
|
35
|
+
Requires-Dist: prime-sandboxes>=0.2.14
|
|
36
|
+
Requires-Dist: prime-tunnel>=0.1.0
|
|
37
37
|
Requires-Dist: pydantic>=2.11.9
|
|
38
38
|
Requires-Dist: pyzmq>=27.1.0
|
|
39
39
|
Requires-Dist: requests
|
|
@@ -47,19 +47,10 @@ Provides-Extra: browser
|
|
|
47
47
|
Requires-Dist: aiohttp>=3.9.0; extra == 'browser'
|
|
48
48
|
Requires-Dist: python-dotenv>=1.0.0; extra == 'browser'
|
|
49
49
|
Requires-Dist: stagehand>=3.0.0; extra == 'browser'
|
|
50
|
+
Provides-Extra: openenv
|
|
51
|
+
Requires-Dist: openenv-core[core]==0.2.1; extra == 'openenv'
|
|
50
52
|
Provides-Extra: rg
|
|
51
53
|
Requires-Dist: reasoning-gym; extra == 'rg'
|
|
52
|
-
Provides-Extra: rl
|
|
53
|
-
Requires-Dist: accelerate>=1.4.0; extra == 'rl'
|
|
54
|
-
Requires-Dist: deepspeed>=0.17.6; extra == 'rl'
|
|
55
|
-
Requires-Dist: flash-attn>=2.8.3; extra == 'rl'
|
|
56
|
-
Requires-Dist: liger-kernel>=0.5.10; extra == 'rl'
|
|
57
|
-
Requires-Dist: peft; extra == 'rl'
|
|
58
|
-
Requires-Dist: requests; extra == 'rl'
|
|
59
|
-
Requires-Dist: torch<2.9.0,>=2.8.0; extra == 'rl'
|
|
60
|
-
Requires-Dist: transformers>=4.56.2; extra == 'rl'
|
|
61
|
-
Requires-Dist: vllm<0.11.0,>=0.10.0; extra == 'rl'
|
|
62
|
-
Requires-Dist: wandb; extra == 'rl'
|
|
63
54
|
Provides-Extra: ta
|
|
64
55
|
Requires-Dist: nltk; extra == 'ta'
|
|
65
56
|
Requires-Dist: textarena; extra == 'ta'
|
|
@@ -140,8 +131,12 @@ prime lab setup
|
|
|
140
131
|
This sets up a Python project if needed (with `uv init`), installs `verifiers` (with `uv add verifiers`), creates the recommended workspace structure, and downloads useful starter files:
|
|
141
132
|
```
|
|
142
133
|
configs/
|
|
143
|
-
├── endpoints.
|
|
144
|
-
|
|
134
|
+
├── endpoints.toml # OpenAI-compatible API endpoint configuration
|
|
135
|
+
├── rl/ # Example configs for Hosted Training
|
|
136
|
+
├── eval/ # Example multi-environment eval configs
|
|
137
|
+
└── gepa/ # Example configs for prompt optimization
|
|
138
|
+
.prime/
|
|
139
|
+
└── skills/ # Bundled workflow skills for create/browse/review/eval/GEPA/train/brainstorm
|
|
145
140
|
environments/
|
|
146
141
|
└── AGENTS.md # Documentation for AI coding agents
|
|
147
142
|
AGENTS.md # Top-level documentation for AI coding agents
|
|
@@ -157,6 +152,14 @@ Environments built with Verifiers are self-contained Python modules. To initiali
|
|
|
157
152
|
```bash
|
|
158
153
|
prime env init my-env # creates a new template in ./environments/my_env
|
|
159
154
|
```
|
|
155
|
+
For OpenEnv integration, use:
|
|
156
|
+
```bash
|
|
157
|
+
prime env init my-openenv --openenv
|
|
158
|
+
```
|
|
159
|
+
Then copy your OpenEnv project into `environments/my_openenv/proj/` and build the image with:
|
|
160
|
+
```bash
|
|
161
|
+
uv run vf-build my-openenv
|
|
162
|
+
```
|
|
160
163
|
|
|
161
164
|
This will create a new module called `my_env` with a basic environment template.
|
|
162
165
|
```
|
|
@@ -195,7 +198,7 @@ To run a local evaluation with any OpenAI-compatible model, do:
|
|
|
195
198
|
```bash
|
|
196
199
|
prime eval run my-env -m gpt-5-nano # run and save eval results locally
|
|
197
200
|
```
|
|
198
|
-
Evaluations use [Prime Inference](https://docs.primeintellect.ai/inference/overview) by default; configure your own API endpoints in `./configs/endpoints.
|
|
201
|
+
Evaluations use [Prime Inference](https://docs.primeintellect.ai/inference/overview) by default; configure your own API endpoints in `./configs/endpoints.toml`.
|
|
199
202
|
|
|
200
203
|
View local evaluation results in the terminal UI:
|
|
201
204
|
```bash
|
|
@@ -73,8 +73,12 @@ prime lab setup
|
|
|
73
73
|
This sets up a Python project if needed (with `uv init`), installs `verifiers` (with `uv add verifiers`), creates the recommended workspace structure, and downloads useful starter files:
|
|
74
74
|
```
|
|
75
75
|
configs/
|
|
76
|
-
├── endpoints.
|
|
77
|
-
|
|
76
|
+
├── endpoints.toml # OpenAI-compatible API endpoint configuration
|
|
77
|
+
├── rl/ # Example configs for Hosted Training
|
|
78
|
+
├── eval/ # Example multi-environment eval configs
|
|
79
|
+
└── gepa/ # Example configs for prompt optimization
|
|
80
|
+
.prime/
|
|
81
|
+
└── skills/ # Bundled workflow skills for create/browse/review/eval/GEPA/train/brainstorm
|
|
78
82
|
environments/
|
|
79
83
|
└── AGENTS.md # Documentation for AI coding agents
|
|
80
84
|
AGENTS.md # Top-level documentation for AI coding agents
|
|
@@ -90,6 +94,14 @@ Environments built with Verifiers are self-contained Python modules. To initiali
|
|
|
90
94
|
```bash
|
|
91
95
|
prime env init my-env # creates a new template in ./environments/my_env
|
|
92
96
|
```
|
|
97
|
+
For OpenEnv integration, use:
|
|
98
|
+
```bash
|
|
99
|
+
prime env init my-openenv --openenv
|
|
100
|
+
```
|
|
101
|
+
Then copy your OpenEnv project into `environments/my_openenv/proj/` and build the image with:
|
|
102
|
+
```bash
|
|
103
|
+
uv run vf-build my-openenv
|
|
104
|
+
```
|
|
93
105
|
|
|
94
106
|
This will create a new module called `my_env` with a basic environment template.
|
|
95
107
|
```
|
|
@@ -128,7 +140,7 @@ To run a local evaluation with any OpenAI-compatible model, do:
|
|
|
128
140
|
```bash
|
|
129
141
|
prime eval run my-env -m gpt-5-nano # run and save eval results locally
|
|
130
142
|
```
|
|
131
|
-
Evaluations use [Prime Inference](https://docs.primeintellect.ai/inference/overview) by default; configure your own API endpoints in `./configs/endpoints.
|
|
143
|
+
Evaluations use [Prime Inference](https://docs.primeintellect.ai/inference/overview) by default; configure your own API endpoints in `./configs/endpoints.toml`.
|
|
132
144
|
|
|
133
145
|
View local evaluation results in the terminal UI:
|
|
134
146
|
```bash
|
|
@@ -36,8 +36,8 @@ dependencies = [
|
|
|
36
36
|
"nest-asyncio>=1.6.0", # for jupyter notebooks
|
|
37
37
|
"openai>=1.108.1",
|
|
38
38
|
"openai-agents>=0.0.7",
|
|
39
|
-
"prime-tunnel",
|
|
40
|
-
"prime-sandboxes>=0.2.
|
|
39
|
+
"prime-tunnel>=0.1.0",
|
|
40
|
+
"prime-sandboxes>=0.2.14",
|
|
41
41
|
"pydantic>=2.11.9",
|
|
42
42
|
"requests",
|
|
43
43
|
"rich",
|
|
@@ -64,6 +64,10 @@ dev = [
|
|
|
64
64
|
"ipywidgets",
|
|
65
65
|
"reasoning-gym",
|
|
66
66
|
"textarena",
|
|
67
|
+
"openenv-core[core]==0.2.1",
|
|
68
|
+
"stagehand>=3.0.0",
|
|
69
|
+
"aiohttp>=3.9.0",
|
|
70
|
+
"python-dotenv>=1.0.0",
|
|
67
71
|
"nltk",
|
|
68
72
|
]
|
|
69
73
|
|
|
@@ -75,40 +79,25 @@ ta = [
|
|
|
75
79
|
"textarena",
|
|
76
80
|
"nltk",
|
|
77
81
|
]
|
|
82
|
+
openenv = [
|
|
83
|
+
"openenv-core[core]==0.2.1",
|
|
84
|
+
]
|
|
78
85
|
browser = [
|
|
79
86
|
"stagehand>=3.0.0",
|
|
80
87
|
"aiohttp>=3.9.0",
|
|
81
88
|
"python-dotenv>=1.0.0",
|
|
82
89
|
]
|
|
83
|
-
rl = [
|
|
84
|
-
"torch>=2.8.0,<2.9.0",
|
|
85
|
-
"transformers>=4.56.2",
|
|
86
|
-
"accelerate>=1.4.0",
|
|
87
|
-
"requests",
|
|
88
|
-
"peft",
|
|
89
|
-
"wandb",
|
|
90
|
-
"vllm>=0.10.0,<0.11.0",
|
|
91
|
-
"liger-kernel>=0.5.10",
|
|
92
|
-
"deepspeed>=0.17.6",
|
|
93
|
-
"flash-attn>=2.8.3",
|
|
94
|
-
]
|
|
95
|
-
|
|
96
|
-
[tool.uv.extra-build-dependencies]
|
|
97
|
-
flash-attn = [{ requirement = "torch", match-runtime = true }]
|
|
98
|
-
|
|
99
|
-
[tool.uv.extra-build-variables]
|
|
100
|
-
flash-attn = { FLASH_ATTENTION_SKIP_CUDA_BUILD = "TRUE" }
|
|
101
|
-
|
|
102
90
|
[project.scripts]
|
|
103
91
|
vf-eval = "verifiers.scripts.eval:main"
|
|
104
92
|
vf-gepa = "verifiers.scripts.gepa:main"
|
|
105
93
|
vf-init = "verifiers.scripts.init:main"
|
|
106
94
|
vf-install = "verifiers.scripts.install:main"
|
|
107
95
|
vf-setup = "verifiers.scripts.setup:main"
|
|
96
|
+
vf-build = "verifiers.scripts.build:main"
|
|
108
97
|
vf-rl = "verifiers.scripts.rl:main"
|
|
109
98
|
vf-train = "verifiers.scripts.train:main"
|
|
110
99
|
vf-tui = "verifiers.scripts.tui:main"
|
|
111
|
-
vf-vllm = "verifiers.
|
|
100
|
+
vf-vllm = "verifiers.scripts.vllm:main"
|
|
112
101
|
prime-rl = "verifiers.scripts.prime_rl:main"
|
|
113
102
|
|
|
114
103
|
# hatchling configuration
|
|
@@ -171,9 +160,12 @@ filterwarnings = [
|
|
|
171
160
|
asyncio_mode = "auto"
|
|
172
161
|
norecursedirs = [".git", ".tox", "dist", "build", "*.egg", "__pycache__"]
|
|
173
162
|
|
|
163
|
+
[tool.ty.environment]
|
|
164
|
+
python-version = "3.13"
|
|
165
|
+
|
|
174
166
|
[tool.ty.rules]
|
|
175
|
-
unresolved-import = "warn"
|
|
176
167
|
unknown-argument = "warn"
|
|
168
|
+
redundant-cast = "ignore"
|
|
177
169
|
|
|
178
170
|
[tool.ty.src]
|
|
179
171
|
exclude = ["environments"]
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
from pydantic import ValidationError
|
|
3
|
+
|
|
4
|
+
from verifiers.types import ClientConfig, EndpointClientConfig
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def test_client_config_allows_leaf_endpoint_configs():
|
|
8
|
+
config = ClientConfig(
|
|
9
|
+
api_base_url="http://localhost:8000/v1",
|
|
10
|
+
endpoint_configs=[
|
|
11
|
+
EndpointClientConfig(api_base_url="http://localhost:8001/v1"),
|
|
12
|
+
{"api_base_url": "http://localhost:8002/v1"},
|
|
13
|
+
],
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
assert len(config.endpoint_configs) == 2
|
|
17
|
+
assert config.endpoint_configs[0].api_base_url == "http://localhost:8001/v1"
|
|
18
|
+
assert config.endpoint_configs[1].api_base_url == "http://localhost:8002/v1"
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def test_client_config_rejects_recursive_endpoint_configs():
|
|
22
|
+
with pytest.raises(ValidationError, match="cannot include endpoint_configs"):
|
|
23
|
+
ClientConfig.model_validate(
|
|
24
|
+
{
|
|
25
|
+
"api_base_url": "http://localhost:8000/v1",
|
|
26
|
+
"endpoint_configs": [
|
|
27
|
+
{
|
|
28
|
+
"api_base_url": "http://localhost:8001/v1",
|
|
29
|
+
"endpoint_configs": [
|
|
30
|
+
{"api_base_url": "http://localhost:8002/v1"}
|
|
31
|
+
],
|
|
32
|
+
}
|
|
33
|
+
],
|
|
34
|
+
}
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def test_client_config_accepts_empty_nested_endpoint_configs_key():
|
|
39
|
+
config = ClientConfig.model_validate(
|
|
40
|
+
{
|
|
41
|
+
"api_base_url": "http://localhost:8000/v1",
|
|
42
|
+
"endpoint_configs": [
|
|
43
|
+
{
|
|
44
|
+
"api_base_url": "http://localhost:8001/v1",
|
|
45
|
+
"endpoint_configs": [],
|
|
46
|
+
}
|
|
47
|
+
],
|
|
48
|
+
}
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
assert len(config.endpoint_configs) == 1
|
|
52
|
+
assert config.endpoint_configs[0].api_base_url == "http://localhost:8001/v1"
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
from verifiers.utils.eval_utils import load_endpoints
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test_load_endpoints_python_registry_normalizes_to_lists(tmp_path: Path):
|
|
7
|
+
registry_path = tmp_path / "endpoints.py"
|
|
8
|
+
registry_path.write_text(
|
|
9
|
+
"ENDPOINTS = {\n"
|
|
10
|
+
' "gpt-4.1-mini": {"model": "gpt-4.1-mini", "url": "https://api.openai.com/v1", "key": "OPENAI_API_KEY"},\n'
|
|
11
|
+
"}\n",
|
|
12
|
+
encoding="utf-8",
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
endpoints = load_endpoints(str(registry_path))
|
|
16
|
+
|
|
17
|
+
assert set(endpoints.keys()) == {"gpt-4.1-mini"}
|
|
18
|
+
assert len(endpoints["gpt-4.1-mini"]) == 1
|
|
19
|
+
endpoint = endpoints["gpt-4.1-mini"][0]
|
|
20
|
+
assert endpoint["model"] == "gpt-4.1-mini"
|
|
21
|
+
assert endpoint["url"] == "https://api.openai.com/v1"
|
|
22
|
+
assert endpoint["key"] == "OPENAI_API_KEY"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def test_load_endpoints_toml_groups_variants_by_endpoint_id(tmp_path: Path):
|
|
26
|
+
registry_path = tmp_path / "endpoints.toml"
|
|
27
|
+
registry_path.write_text(
|
|
28
|
+
"[[endpoint]]\n"
|
|
29
|
+
'endpoint_id = "gpt-5-mini"\n'
|
|
30
|
+
'model = "openai/gpt-5-mini"\n'
|
|
31
|
+
'url = "https://api.pinference.ai/api/v1"\n'
|
|
32
|
+
'key = "PRIME_API_KEY"\n'
|
|
33
|
+
"\n"
|
|
34
|
+
"[[endpoint]]\n"
|
|
35
|
+
'endpoint_id = "gpt-5-mini"\n'
|
|
36
|
+
'model = "openai/gpt-5-mini"\n'
|
|
37
|
+
'url = "https://api.openai.com/v1"\n'
|
|
38
|
+
'key = "OPENAI_API_KEY"\n',
|
|
39
|
+
encoding="utf-8",
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
endpoints = load_endpoints(str(registry_path))
|
|
43
|
+
|
|
44
|
+
assert set(endpoints.keys()) == {"gpt-5-mini"}
|
|
45
|
+
assert len(endpoints["gpt-5-mini"]) == 2
|
|
46
|
+
assert endpoints["gpt-5-mini"][0]["url"] == "https://api.pinference.ai/api/v1"
|
|
47
|
+
assert endpoints["gpt-5-mini"][1]["url"] == "https://api.openai.com/v1"
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def test_load_endpoints_toml_accepts_long_field_names(tmp_path: Path):
|
|
51
|
+
registry_path = tmp_path / "endpoints.toml"
|
|
52
|
+
registry_path.write_text(
|
|
53
|
+
"[[endpoint]]\n"
|
|
54
|
+
'endpoint_id = "gpt-5-mini"\n'
|
|
55
|
+
'model = "openai/gpt-5-mini"\n'
|
|
56
|
+
'api_base_url = "https://api.pinference.ai/api/v1"\n'
|
|
57
|
+
'api_key_var = "PRIME_API_KEY"\n',
|
|
58
|
+
encoding="utf-8",
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
endpoints = load_endpoints(str(registry_path))
|
|
62
|
+
|
|
63
|
+
assert endpoints["gpt-5-mini"][0]["url"] == "https://api.pinference.ai/api/v1"
|
|
64
|
+
assert endpoints["gpt-5-mini"][0]["key"] == "PRIME_API_KEY"
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def test_load_endpoints_toml_accepts_matching_short_and_long_fields(tmp_path: Path):
|
|
68
|
+
registry_path = tmp_path / "endpoints.toml"
|
|
69
|
+
registry_path.write_text(
|
|
70
|
+
"[[endpoint]]\n"
|
|
71
|
+
'endpoint_id = "gpt-5-mini"\n'
|
|
72
|
+
'model = "openai/gpt-5-mini"\n'
|
|
73
|
+
'url = "https://api.pinference.ai/api/v1"\n'
|
|
74
|
+
'api_base_url = "https://api.pinference.ai/api/v1"\n'
|
|
75
|
+
'key = "PRIME_API_KEY"\n'
|
|
76
|
+
'api_key_var = "PRIME_API_KEY"\n',
|
|
77
|
+
encoding="utf-8",
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
endpoints = load_endpoints(str(registry_path))
|
|
81
|
+
|
|
82
|
+
assert endpoints["gpt-5-mini"][0]["url"] == "https://api.pinference.ai/api/v1"
|
|
83
|
+
assert endpoints["gpt-5-mini"][0]["key"] == "PRIME_API_KEY"
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def test_load_endpoints_toml_rejects_conflicting_url_fields(tmp_path: Path):
|
|
87
|
+
registry_path = tmp_path / "endpoints.toml"
|
|
88
|
+
registry_path.write_text(
|
|
89
|
+
"[[endpoint]]\n"
|
|
90
|
+
'endpoint_id = "gpt-5-mini"\n'
|
|
91
|
+
'model = "openai/gpt-5-mini"\n'
|
|
92
|
+
'url = "https://a.example/v1"\n'
|
|
93
|
+
'api_base_url = "https://b.example/v1"\n'
|
|
94
|
+
'key = "PRIME_API_KEY"\n',
|
|
95
|
+
encoding="utf-8",
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
endpoints = load_endpoints(str(registry_path))
|
|
99
|
+
|
|
100
|
+
assert endpoints == {}
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def test_load_endpoints_toml_rejects_conflicting_key_fields(tmp_path: Path):
|
|
104
|
+
registry_path = tmp_path / "endpoints.toml"
|
|
105
|
+
registry_path.write_text(
|
|
106
|
+
"[[endpoint]]\n"
|
|
107
|
+
'endpoint_id = "gpt-5-mini"\n'
|
|
108
|
+
'model = "openai/gpt-5-mini"\n'
|
|
109
|
+
'url = "https://a.example/v1"\n'
|
|
110
|
+
'key = "A_KEY"\n'
|
|
111
|
+
'api_key_var = "B_KEY"\n',
|
|
112
|
+
encoding="utf-8",
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
endpoints = load_endpoints(str(registry_path))
|
|
116
|
+
|
|
117
|
+
assert endpoints == {}
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def test_load_endpoints_python_registry_supports_list_variants(tmp_path: Path):
|
|
121
|
+
registry_path = tmp_path / "endpoints.py"
|
|
122
|
+
registry_path.write_text(
|
|
123
|
+
"ENDPOINTS = {\n"
|
|
124
|
+
' "gpt-5-mini": [\n'
|
|
125
|
+
' {"model": "gpt-5-mini", "url": "https://a.example/v1", "key": "A_KEY"},\n'
|
|
126
|
+
' {"model": "gpt-5-mini", "url": "https://b.example/v1", "key": "A_KEY"},\n'
|
|
127
|
+
" ]\n"
|
|
128
|
+
"}\n",
|
|
129
|
+
encoding="utf-8",
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
endpoints = load_endpoints(str(registry_path))
|
|
133
|
+
|
|
134
|
+
assert set(endpoints.keys()) == {"gpt-5-mini"}
|
|
135
|
+
assert len(endpoints["gpt-5-mini"]) == 2
|
|
136
|
+
assert endpoints["gpt-5-mini"][0]["url"] == "https://a.example/v1"
|
|
137
|
+
assert endpoints["gpt-5-mini"][1]["url"] == "https://b.example/v1"
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def test_load_endpoints_directory_prefers_toml_then_python(tmp_path: Path):
|
|
141
|
+
python_registry = tmp_path / "endpoints.py"
|
|
142
|
+
toml_registry = tmp_path / "endpoints.toml"
|
|
143
|
+
|
|
144
|
+
python_registry.write_text(
|
|
145
|
+
"ENDPOINTS = {\n"
|
|
146
|
+
' "from-py": {"model": "m", "url": "https://py.example/v1", "key": "PY_KEY"},\n'
|
|
147
|
+
"}\n",
|
|
148
|
+
encoding="utf-8",
|
|
149
|
+
)
|
|
150
|
+
toml_registry.write_text(
|
|
151
|
+
"[[endpoint]]\n"
|
|
152
|
+
'endpoint_id = "from-toml"\n'
|
|
153
|
+
'model = "m"\n'
|
|
154
|
+
'url = "https://toml.example/v1"\n'
|
|
155
|
+
'key = "TOML_KEY"\n',
|
|
156
|
+
encoding="utf-8",
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
endpoints = load_endpoints(str(tmp_path))
|
|
160
|
+
assert set(endpoints.keys()) == {"from-toml"}
|
|
161
|
+
|
|
162
|
+
toml_registry.unlink()
|
|
163
|
+
endpoints = load_endpoints(str(tmp_path))
|
|
164
|
+
assert set(endpoints.keys()) == {"from-py"}
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def test_qwen3_vl_endpoint_ids_map_to_vl_models():
|
|
168
|
+
endpoints = load_endpoints("./configs/endpoints.toml")
|
|
169
|
+
|
|
170
|
+
assert endpoints["qwen3-vl-30b-i"][0]["model"] == "qwen/qwen3-vl-30b-a3b-instruct"
|
|
171
|
+
assert endpoints["qwen3-vl-30b-t"][0]["model"] == "qwen/qwen3-vl-30b-a3b-thinking"
|
|
172
|
+
assert (
|
|
173
|
+
endpoints["qwen3-vl-235b-i"][0]["model"] == "qwen/qwen3-vl-235b-a22b-instruct"
|
|
174
|
+
)
|
|
175
|
+
assert (
|
|
176
|
+
endpoints["qwen3-vl-235b-t"][0]["model"] == "qwen/qwen3-vl-235b-a22b-thinking"
|
|
177
|
+
)
|