verifiers 0.1.10.dev2__tar.gz → 0.1.10.dev3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/.gitignore +1 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/PKG-INFO +13 -14
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/README.md +8 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/pyproject.toml +15 -23
- verifiers-0.1.10.dev3/tests/test_client_config.py +52 -0
- verifiers-0.1.10.dev3/tests/test_endpoint_registry.py +177 -0
- verifiers-0.1.10.dev3/tests/test_environment_extra.py +615 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/test_envs.py +12 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/test_eval_cli.py +374 -3
- verifiers-0.1.10.dev3/tests/test_eval_display.py +80 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/test_eval_utils.py +20 -0
- verifiers-0.1.10.dev3/tests/test_gepa_cli.py +89 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/test_imports.py +1 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/test_math_rubric.py +8 -15
- verifiers-0.1.10.dev3/tests/test_opencode_harbor.py +57 -0
- verifiers-0.1.10.dev3/tests/test_path_utils.py +89 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/test_sandbox_env.py +0 -6
- verifiers-0.1.10.dev3/tests/test_sandbox_mixin.py +351 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/test_save_utils.py +203 -0
- verifiers-0.1.10.dev3/tests/test_setup_script.py +69 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/test_tool_env.py +4 -1
- verifiers-0.1.10.dev3/tests/test_tui_info_formatting.py +41 -0
- verifiers-0.1.10.dev3/tests/test_worker_client_timeouts.py +149 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/__init__.py +44 -28
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/envs/environment.py +357 -137
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/envs/experimental/README.md +8 -0
- verifiers-0.1.10.dev3/verifiers/envs/experimental/__init__.py +3 -0
- verifiers-0.1.10.dev3/verifiers/envs/experimental/cli_agent_env.py +422 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/envs/experimental/harbor_env.py +55 -51
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/envs/experimental/mcp_env.py +7 -1
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/envs/experimental/rlm_env.py +325 -100
- verifiers-0.1.10.dev3/verifiers/envs/experimental/sandbox_mixin.py +241 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/envs/integrations/README.md +66 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/envs/integrations/browser_env/__init__.py +0 -6
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/envs/integrations/browser_env/modes/dom_mode.py +4 -2
- verifiers-0.1.10.dev3/verifiers/envs/integrations/openenv_env.py +1169 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/envs/integrations/textarena_env.py +9 -5
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/envs/sandbox_env.py +11 -62
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/envs/tool_env.py +6 -3
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/gepa/adapter.py +12 -8
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/gepa/display.py +3 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/gepa/gepa_utils.py +11 -7
- verifiers-0.1.10.dev3/verifiers/rl/README.md +15 -0
- verifiers-0.1.10.dev3/verifiers/rl/__init__.py +11 -0
- verifiers-0.1.10.dev3/verifiers/rl/inference/__init__.py +11 -0
- verifiers-0.1.10.dev3/verifiers/rl/inference/client.py +3 -0
- verifiers-0.1.10.dev3/verifiers/rl/inference/server.py +11 -0
- verifiers-0.1.10.dev3/verifiers/rl/trainer/__init__.py +29 -0
- verifiers-0.1.10.dev3/verifiers/rl/trainer/config.py +3 -0
- verifiers-0.1.10.dev3/verifiers/rl/trainer/orchestrator.py +3 -0
- verifiers-0.1.10.dev3/verifiers/rl/trainer/trainer.py +4 -0
- verifiers-0.1.10.dev3/verifiers/rl/trainer/utils.py +5 -0
- verifiers-0.1.10.dev3/verifiers/rubrics/math_rubric.py +102 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/rubrics/rubric.py +16 -5
- verifiers-0.1.10.dev3/verifiers/scripts/build.py +452 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/scripts/eval.py +147 -29
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/scripts/gepa.py +197 -9
- verifiers-0.1.10.dev3/verifiers/scripts/init.py +436 -0
- verifiers-0.1.10.dev3/verifiers/scripts/rl.py +11 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/scripts/setup.py +61 -95
- verifiers-0.1.10.dev3/verifiers/scripts/train.py +11 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/scripts/tui.py +39 -7
- verifiers-0.1.10.dev3/verifiers/scripts/vllm.py +11 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/types.py +71 -8
- verifiers-0.1.10.dev3/verifiers/utils/client_utils.py +98 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/utils/data_utils.py +3 -3
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/utils/display_utils.py +16 -3
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/utils/eval_display.py +44 -10
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/utils/eval_utils.py +264 -84
- verifiers-0.1.10.dev3/verifiers/utils/import_utils.py +18 -0
- verifiers-0.1.10.dev3/verifiers/utils/interception_utils.py +416 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/utils/message_utils.py +2 -3
- verifiers-0.1.10.dev3/verifiers/utils/path_utils.py +143 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/utils/sandbox_exec_utils.py +6 -2
- verifiers-0.1.10.dev3/verifiers/utils/save_utils.py +575 -0
- verifiers-0.1.10.dev3/verifiers/utils/threaded_sandbox_client.py +63 -0
- verifiers-0.1.10.dev3/verifiers/utils/usage_utils.py +101 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/workers/client/env_client.py +18 -4
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/workers/client/zmq_env_client.py +39 -4
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/workers/server/env_server.py +9 -3
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/workers/server/zmq_env_server.py +2 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/workers/types.py +3 -3
- verifiers-0.1.10.dev2/tests/test_environment_extra.py +0 -224
- verifiers-0.1.10.dev2/verifiers/envs/experimental/cli_agent_env.py +0 -820
- verifiers-0.1.10.dev2/verifiers/rl/README.md +0 -108
- verifiers-0.1.10.dev2/verifiers/rl/inference/client.py +0 -172
- verifiers-0.1.10.dev2/verifiers/rl/inference/server.py +0 -193
- verifiers-0.1.10.dev2/verifiers/rl/trainer/__init__.py +0 -37
- verifiers-0.1.10.dev2/verifiers/rl/trainer/config.py +0 -342
- verifiers-0.1.10.dev2/verifiers/rl/trainer/orchestrator.py +0 -375
- verifiers-0.1.10.dev2/verifiers/rl/trainer/trainer.py +0 -497
- verifiers-0.1.10.dev2/verifiers/rl/trainer/utils.py +0 -289
- verifiers-0.1.10.dev2/verifiers/rubrics/math_rubric.py +0 -88
- verifiers-0.1.10.dev2/verifiers/scripts/__init__.py +0 -0
- verifiers-0.1.10.dev2/verifiers/scripts/init.py +0 -209
- verifiers-0.1.10.dev2/verifiers/scripts/rl.py +0 -207
- verifiers-0.1.10.dev2/verifiers/scripts/train.py +0 -40
- verifiers-0.1.10.dev2/verifiers/utils/__init__.py +0 -0
- verifiers-0.1.10.dev2/verifiers/utils/client_utils.py +0 -62
- verifiers-0.1.10.dev2/verifiers/utils/path_utils.py +0 -51
- verifiers-0.1.10.dev2/verifiers/utils/save_utils.py +0 -385
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/LICENSE +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/AGENTS.md +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/README.md +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/__init__.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/conftest.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/mock_client_guide.md +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/mock_openai_client.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/test_browser_env.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/test_cli_agent_env.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/test_decorator_ranks.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/test_env_group.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/test_environment.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/test_environment_audio_modality.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/test_error_chain.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/test_gym_env.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/test_install_utils.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/test_logging.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/test_maybe_think_parser.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/test_message_utils_audio.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/test_multiturn_env.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/test_parser.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/test_rlm_env.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/test_rlm_env_sandbox.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/test_rubric.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/test_rubric_group.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/test_singleturn_env.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/test_stateful_tool_env.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/test_think_parser.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/test_tool_utils.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/test_trajectory_processing.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/tests/test_xml_parser.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/AGENTS.md +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/decorators.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/envs/AGENTS.md +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/envs/__init__.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/envs/env_group.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/envs/experimental/gym_env.py +0 -0
- {verifiers-0.1.10.dev2/verifiers/envs/experimental → verifiers-0.1.10.dev3/verifiers/envs/integrations}/__init__.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/envs/integrations/browser_env/browser_env.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/envs/integrations/browser_env/modes/__init__.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/envs/integrations/browser_env/modes/base.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/envs/integrations/browser_env/modes/cua_mode.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/envs/integrations/reasoninggym_env.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/envs/multiturn_env.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/envs/python_env.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/envs/singleturn_env.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/envs/stateful_tool_env.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/errors.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/gepa/__init__.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/gepa/config.py +0 -0
- {verifiers-0.1.10.dev2/verifiers/envs/integrations → verifiers-0.1.10.dev3/verifiers/parsers}/__init__.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/parsers/maybe_think_parser.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/parsers/parser.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/parsers/think_parser.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/parsers/xml_parser.py +0 -0
- {verifiers-0.1.10.dev2/verifiers/parsers → verifiers-0.1.10.dev3/verifiers/rubrics}/__init__.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/rubrics/judge_rubric.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/rubrics/rubric_group.py +0 -0
- {verifiers-0.1.10.dev2/verifiers/rl/inference → verifiers-0.1.10.dev3/verifiers/scripts}/__init__.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/scripts/install.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/scripts/prime_rl.py +0 -0
- {verifiers-0.1.10.dev2/verifiers/rubrics → verifiers-0.1.10.dev3/verifiers/utils}/__init__.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/utils/async_utils.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/utils/config_utils.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/utils/env_utils.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/utils/error_utils.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/utils/install_utils.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/utils/logging_utils.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/utils/response_utils.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/utils/thread_utils.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/utils/token_utils.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/utils/tool_utils.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/utils/tunnel_utils.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/utils/worker_utils.py +0 -0
- {verifiers-0.1.10.dev2 → verifiers-0.1.10.dev3}/verifiers/workers/__init__.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: verifiers
|
|
3
|
-
Version: 0.1.10.
|
|
3
|
+
Version: 0.1.10.dev3
|
|
4
4
|
Summary: Verifiers: Environments for LLM Reinforcement Learning
|
|
5
5
|
Project-URL: Homepage, https://github.com/primeintellect-ai/verifiers
|
|
6
6
|
Project-URL: Documentation, https://github.com/primeintellect-ai/verifiers
|
|
@@ -32,8 +32,8 @@ Requires-Dist: nest-asyncio>=1.6.0
|
|
|
32
32
|
Requires-Dist: numpy
|
|
33
33
|
Requires-Dist: openai-agents>=0.0.7
|
|
34
34
|
Requires-Dist: openai>=1.108.1
|
|
35
|
-
Requires-Dist: prime-sandboxes>=0.2.
|
|
36
|
-
Requires-Dist: prime-tunnel
|
|
35
|
+
Requires-Dist: prime-sandboxes>=0.2.14
|
|
36
|
+
Requires-Dist: prime-tunnel>=0.1.0
|
|
37
37
|
Requires-Dist: pydantic>=2.11.9
|
|
38
38
|
Requires-Dist: pyzmq>=27.1.0
|
|
39
39
|
Requires-Dist: requests
|
|
@@ -47,19 +47,10 @@ Provides-Extra: browser
|
|
|
47
47
|
Requires-Dist: aiohttp>=3.9.0; extra == 'browser'
|
|
48
48
|
Requires-Dist: python-dotenv>=1.0.0; extra == 'browser'
|
|
49
49
|
Requires-Dist: stagehand>=3.0.0; extra == 'browser'
|
|
50
|
+
Provides-Extra: openenv
|
|
51
|
+
Requires-Dist: openenv-core[core]==0.2.1; extra == 'openenv'
|
|
50
52
|
Provides-Extra: rg
|
|
51
53
|
Requires-Dist: reasoning-gym; extra == 'rg'
|
|
52
|
-
Provides-Extra: rl
|
|
53
|
-
Requires-Dist: accelerate>=1.4.0; extra == 'rl'
|
|
54
|
-
Requires-Dist: deepspeed>=0.17.6; extra == 'rl'
|
|
55
|
-
Requires-Dist: flash-attn>=2.8.3; extra == 'rl'
|
|
56
|
-
Requires-Dist: liger-kernel>=0.5.10; extra == 'rl'
|
|
57
|
-
Requires-Dist: peft; extra == 'rl'
|
|
58
|
-
Requires-Dist: requests; extra == 'rl'
|
|
59
|
-
Requires-Dist: torch<2.9.0,>=2.8.0; extra == 'rl'
|
|
60
|
-
Requires-Dist: transformers>=4.56.2; extra == 'rl'
|
|
61
|
-
Requires-Dist: vllm<0.11.0,>=0.10.0; extra == 'rl'
|
|
62
|
-
Requires-Dist: wandb; extra == 'rl'
|
|
63
54
|
Provides-Extra: ta
|
|
64
55
|
Requires-Dist: nltk; extra == 'ta'
|
|
65
56
|
Requires-Dist: textarena; extra == 'ta'
|
|
@@ -157,6 +148,14 @@ Environments built with Verifiers are self-contained Python modules. To initiali
|
|
|
157
148
|
```bash
|
|
158
149
|
prime env init my-env # creates a new template in ./environments/my_env
|
|
159
150
|
```
|
|
151
|
+
For OpenEnv integration, use:
|
|
152
|
+
```bash
|
|
153
|
+
prime env init my-openenv --openenv
|
|
154
|
+
```
|
|
155
|
+
Then copy your OpenEnv project into `environments/my_openenv/proj/` and build the image with:
|
|
156
|
+
```bash
|
|
157
|
+
uv run vf-build my-openenv
|
|
158
|
+
```
|
|
160
159
|
|
|
161
160
|
This will create a new module called `my_env` with a basic environment template.
|
|
162
161
|
```
|
|
@@ -90,6 +90,14 @@ Environments built with Verifiers are self-contained Python modules. To initiali
|
|
|
90
90
|
```bash
|
|
91
91
|
prime env init my-env # creates a new template in ./environments/my_env
|
|
92
92
|
```
|
|
93
|
+
For OpenEnv integration, use:
|
|
94
|
+
```bash
|
|
95
|
+
prime env init my-openenv --openenv
|
|
96
|
+
```
|
|
97
|
+
Then copy your OpenEnv project into `environments/my_openenv/proj/` and build the image with:
|
|
98
|
+
```bash
|
|
99
|
+
uv run vf-build my-openenv
|
|
100
|
+
```
|
|
93
101
|
|
|
94
102
|
This will create a new module called `my_env` with a basic environment template.
|
|
95
103
|
```
|
|
@@ -36,8 +36,8 @@ dependencies = [
|
|
|
36
36
|
"nest-asyncio>=1.6.0", # for jupyter notebooks
|
|
37
37
|
"openai>=1.108.1",
|
|
38
38
|
"openai-agents>=0.0.7",
|
|
39
|
-
"prime-tunnel",
|
|
40
|
-
"prime-sandboxes>=0.2.
|
|
39
|
+
"prime-tunnel>=0.1.0",
|
|
40
|
+
"prime-sandboxes>=0.2.14",
|
|
41
41
|
"pydantic>=2.11.9",
|
|
42
42
|
"requests",
|
|
43
43
|
"rich",
|
|
@@ -64,6 +64,10 @@ dev = [
|
|
|
64
64
|
"ipywidgets",
|
|
65
65
|
"reasoning-gym",
|
|
66
66
|
"textarena",
|
|
67
|
+
"openenv-core[core]==0.2.1",
|
|
68
|
+
"stagehand>=3.0.0",
|
|
69
|
+
"aiohttp>=3.9.0",
|
|
70
|
+
"python-dotenv>=1.0.0",
|
|
67
71
|
"nltk",
|
|
68
72
|
]
|
|
69
73
|
|
|
@@ -75,40 +79,25 @@ ta = [
|
|
|
75
79
|
"textarena",
|
|
76
80
|
"nltk",
|
|
77
81
|
]
|
|
82
|
+
openenv = [
|
|
83
|
+
"openenv-core[core]==0.2.1",
|
|
84
|
+
]
|
|
78
85
|
browser = [
|
|
79
86
|
"stagehand>=3.0.0",
|
|
80
87
|
"aiohttp>=3.9.0",
|
|
81
88
|
"python-dotenv>=1.0.0",
|
|
82
89
|
]
|
|
83
|
-
rl = [
|
|
84
|
-
"torch>=2.8.0,<2.9.0",
|
|
85
|
-
"transformers>=4.56.2",
|
|
86
|
-
"accelerate>=1.4.0",
|
|
87
|
-
"requests",
|
|
88
|
-
"peft",
|
|
89
|
-
"wandb",
|
|
90
|
-
"vllm>=0.10.0,<0.11.0",
|
|
91
|
-
"liger-kernel>=0.5.10",
|
|
92
|
-
"deepspeed>=0.17.6",
|
|
93
|
-
"flash-attn>=2.8.3",
|
|
94
|
-
]
|
|
95
|
-
|
|
96
|
-
[tool.uv.extra-build-dependencies]
|
|
97
|
-
flash-attn = [{ requirement = "torch", match-runtime = true }]
|
|
98
|
-
|
|
99
|
-
[tool.uv.extra-build-variables]
|
|
100
|
-
flash-attn = { FLASH_ATTENTION_SKIP_CUDA_BUILD = "TRUE" }
|
|
101
|
-
|
|
102
90
|
[project.scripts]
|
|
103
91
|
vf-eval = "verifiers.scripts.eval:main"
|
|
104
92
|
vf-gepa = "verifiers.scripts.gepa:main"
|
|
105
93
|
vf-init = "verifiers.scripts.init:main"
|
|
106
94
|
vf-install = "verifiers.scripts.install:main"
|
|
107
95
|
vf-setup = "verifiers.scripts.setup:main"
|
|
96
|
+
vf-build = "verifiers.scripts.build:main"
|
|
108
97
|
vf-rl = "verifiers.scripts.rl:main"
|
|
109
98
|
vf-train = "verifiers.scripts.train:main"
|
|
110
99
|
vf-tui = "verifiers.scripts.tui:main"
|
|
111
|
-
vf-vllm = "verifiers.
|
|
100
|
+
vf-vllm = "verifiers.scripts.vllm:main"
|
|
112
101
|
prime-rl = "verifiers.scripts.prime_rl:main"
|
|
113
102
|
|
|
114
103
|
# hatchling configuration
|
|
@@ -171,9 +160,12 @@ filterwarnings = [
|
|
|
171
160
|
asyncio_mode = "auto"
|
|
172
161
|
norecursedirs = [".git", ".tox", "dist", "build", "*.egg", "__pycache__"]
|
|
173
162
|
|
|
163
|
+
[tool.ty.environment]
|
|
164
|
+
python-version = "3.13"
|
|
165
|
+
|
|
174
166
|
[tool.ty.rules]
|
|
175
|
-
unresolved-import = "warn"
|
|
176
167
|
unknown-argument = "warn"
|
|
168
|
+
redundant-cast = "ignore"
|
|
177
169
|
|
|
178
170
|
[tool.ty.src]
|
|
179
171
|
exclude = ["environments"]
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
from pydantic import ValidationError
|
|
3
|
+
|
|
4
|
+
from verifiers.types import ClientConfig, EndpointClientConfig
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def test_client_config_allows_leaf_endpoint_configs():
|
|
8
|
+
config = ClientConfig(
|
|
9
|
+
api_base_url="http://localhost:8000/v1",
|
|
10
|
+
endpoint_configs=[
|
|
11
|
+
EndpointClientConfig(api_base_url="http://localhost:8001/v1"),
|
|
12
|
+
{"api_base_url": "http://localhost:8002/v1"},
|
|
13
|
+
],
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
assert len(config.endpoint_configs) == 2
|
|
17
|
+
assert config.endpoint_configs[0].api_base_url == "http://localhost:8001/v1"
|
|
18
|
+
assert config.endpoint_configs[1].api_base_url == "http://localhost:8002/v1"
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def test_client_config_rejects_recursive_endpoint_configs():
|
|
22
|
+
with pytest.raises(ValidationError, match="cannot include endpoint_configs"):
|
|
23
|
+
ClientConfig.model_validate(
|
|
24
|
+
{
|
|
25
|
+
"api_base_url": "http://localhost:8000/v1",
|
|
26
|
+
"endpoint_configs": [
|
|
27
|
+
{
|
|
28
|
+
"api_base_url": "http://localhost:8001/v1",
|
|
29
|
+
"endpoint_configs": [
|
|
30
|
+
{"api_base_url": "http://localhost:8002/v1"}
|
|
31
|
+
],
|
|
32
|
+
}
|
|
33
|
+
],
|
|
34
|
+
}
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def test_client_config_accepts_empty_nested_endpoint_configs_key():
|
|
39
|
+
config = ClientConfig.model_validate(
|
|
40
|
+
{
|
|
41
|
+
"api_base_url": "http://localhost:8000/v1",
|
|
42
|
+
"endpoint_configs": [
|
|
43
|
+
{
|
|
44
|
+
"api_base_url": "http://localhost:8001/v1",
|
|
45
|
+
"endpoint_configs": [],
|
|
46
|
+
}
|
|
47
|
+
],
|
|
48
|
+
}
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
assert len(config.endpoint_configs) == 1
|
|
52
|
+
assert config.endpoint_configs[0].api_base_url == "http://localhost:8001/v1"
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
from verifiers.utils.eval_utils import load_endpoints
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test_load_endpoints_python_registry_normalizes_to_lists(tmp_path: Path):
|
|
7
|
+
registry_path = tmp_path / "endpoints.py"
|
|
8
|
+
registry_path.write_text(
|
|
9
|
+
"ENDPOINTS = {\n"
|
|
10
|
+
' "gpt-4.1-mini": {"model": "gpt-4.1-mini", "url": "https://api.openai.com/v1", "key": "OPENAI_API_KEY"},\n'
|
|
11
|
+
"}\n",
|
|
12
|
+
encoding="utf-8",
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
endpoints = load_endpoints(str(registry_path))
|
|
16
|
+
|
|
17
|
+
assert set(endpoints.keys()) == {"gpt-4.1-mini"}
|
|
18
|
+
assert len(endpoints["gpt-4.1-mini"]) == 1
|
|
19
|
+
endpoint = endpoints["gpt-4.1-mini"][0]
|
|
20
|
+
assert endpoint["model"] == "gpt-4.1-mini"
|
|
21
|
+
assert endpoint["url"] == "https://api.openai.com/v1"
|
|
22
|
+
assert endpoint["key"] == "OPENAI_API_KEY"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def test_load_endpoints_toml_groups_variants_by_endpoint_id(tmp_path: Path):
|
|
26
|
+
registry_path = tmp_path / "endpoints.toml"
|
|
27
|
+
registry_path.write_text(
|
|
28
|
+
"[[endpoint]]\n"
|
|
29
|
+
'endpoint_id = "gpt-5-mini"\n'
|
|
30
|
+
'model = "openai/gpt-5-mini"\n'
|
|
31
|
+
'url = "https://api.pinference.ai/api/v1"\n'
|
|
32
|
+
'key = "PRIME_API_KEY"\n'
|
|
33
|
+
"\n"
|
|
34
|
+
"[[endpoint]]\n"
|
|
35
|
+
'endpoint_id = "gpt-5-mini"\n'
|
|
36
|
+
'model = "openai/gpt-5-mini"\n'
|
|
37
|
+
'url = "https://api.openai.com/v1"\n'
|
|
38
|
+
'key = "OPENAI_API_KEY"\n',
|
|
39
|
+
encoding="utf-8",
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
endpoints = load_endpoints(str(registry_path))
|
|
43
|
+
|
|
44
|
+
assert set(endpoints.keys()) == {"gpt-5-mini"}
|
|
45
|
+
assert len(endpoints["gpt-5-mini"]) == 2
|
|
46
|
+
assert endpoints["gpt-5-mini"][0]["url"] == "https://api.pinference.ai/api/v1"
|
|
47
|
+
assert endpoints["gpt-5-mini"][1]["url"] == "https://api.openai.com/v1"
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def test_load_endpoints_toml_accepts_long_field_names(tmp_path: Path):
|
|
51
|
+
registry_path = tmp_path / "endpoints.toml"
|
|
52
|
+
registry_path.write_text(
|
|
53
|
+
"[[endpoint]]\n"
|
|
54
|
+
'endpoint_id = "gpt-5-mini"\n'
|
|
55
|
+
'model = "openai/gpt-5-mini"\n'
|
|
56
|
+
'api_base_url = "https://api.pinference.ai/api/v1"\n'
|
|
57
|
+
'api_key_var = "PRIME_API_KEY"\n',
|
|
58
|
+
encoding="utf-8",
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
endpoints = load_endpoints(str(registry_path))
|
|
62
|
+
|
|
63
|
+
assert endpoints["gpt-5-mini"][0]["url"] == "https://api.pinference.ai/api/v1"
|
|
64
|
+
assert endpoints["gpt-5-mini"][0]["key"] == "PRIME_API_KEY"
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def test_load_endpoints_toml_accepts_matching_short_and_long_fields(tmp_path: Path):
|
|
68
|
+
registry_path = tmp_path / "endpoints.toml"
|
|
69
|
+
registry_path.write_text(
|
|
70
|
+
"[[endpoint]]\n"
|
|
71
|
+
'endpoint_id = "gpt-5-mini"\n'
|
|
72
|
+
'model = "openai/gpt-5-mini"\n'
|
|
73
|
+
'url = "https://api.pinference.ai/api/v1"\n'
|
|
74
|
+
'api_base_url = "https://api.pinference.ai/api/v1"\n'
|
|
75
|
+
'key = "PRIME_API_KEY"\n'
|
|
76
|
+
'api_key_var = "PRIME_API_KEY"\n',
|
|
77
|
+
encoding="utf-8",
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
endpoints = load_endpoints(str(registry_path))
|
|
81
|
+
|
|
82
|
+
assert endpoints["gpt-5-mini"][0]["url"] == "https://api.pinference.ai/api/v1"
|
|
83
|
+
assert endpoints["gpt-5-mini"][0]["key"] == "PRIME_API_KEY"
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def test_load_endpoints_toml_rejects_conflicting_url_fields(tmp_path: Path):
|
|
87
|
+
registry_path = tmp_path / "endpoints.toml"
|
|
88
|
+
registry_path.write_text(
|
|
89
|
+
"[[endpoint]]\n"
|
|
90
|
+
'endpoint_id = "gpt-5-mini"\n'
|
|
91
|
+
'model = "openai/gpt-5-mini"\n'
|
|
92
|
+
'url = "https://a.example/v1"\n'
|
|
93
|
+
'api_base_url = "https://b.example/v1"\n'
|
|
94
|
+
'key = "PRIME_API_KEY"\n',
|
|
95
|
+
encoding="utf-8",
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
endpoints = load_endpoints(str(registry_path))
|
|
99
|
+
|
|
100
|
+
assert endpoints == {}
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def test_load_endpoints_toml_rejects_conflicting_key_fields(tmp_path: Path):
|
|
104
|
+
registry_path = tmp_path / "endpoints.toml"
|
|
105
|
+
registry_path.write_text(
|
|
106
|
+
"[[endpoint]]\n"
|
|
107
|
+
'endpoint_id = "gpt-5-mini"\n'
|
|
108
|
+
'model = "openai/gpt-5-mini"\n'
|
|
109
|
+
'url = "https://a.example/v1"\n'
|
|
110
|
+
'key = "A_KEY"\n'
|
|
111
|
+
'api_key_var = "B_KEY"\n',
|
|
112
|
+
encoding="utf-8",
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
endpoints = load_endpoints(str(registry_path))
|
|
116
|
+
|
|
117
|
+
assert endpoints == {}
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def test_load_endpoints_python_registry_supports_list_variants(tmp_path: Path):
|
|
121
|
+
registry_path = tmp_path / "endpoints.py"
|
|
122
|
+
registry_path.write_text(
|
|
123
|
+
"ENDPOINTS = {\n"
|
|
124
|
+
' "gpt-5-mini": [\n'
|
|
125
|
+
' {"model": "gpt-5-mini", "url": "https://a.example/v1", "key": "A_KEY"},\n'
|
|
126
|
+
' {"model": "gpt-5-mini", "url": "https://b.example/v1", "key": "A_KEY"},\n'
|
|
127
|
+
" ]\n"
|
|
128
|
+
"}\n",
|
|
129
|
+
encoding="utf-8",
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
endpoints = load_endpoints(str(registry_path))
|
|
133
|
+
|
|
134
|
+
assert set(endpoints.keys()) == {"gpt-5-mini"}
|
|
135
|
+
assert len(endpoints["gpt-5-mini"]) == 2
|
|
136
|
+
assert endpoints["gpt-5-mini"][0]["url"] == "https://a.example/v1"
|
|
137
|
+
assert endpoints["gpt-5-mini"][1]["url"] == "https://b.example/v1"
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def test_load_endpoints_directory_prefers_toml_then_python(tmp_path: Path):
|
|
141
|
+
python_registry = tmp_path / "endpoints.py"
|
|
142
|
+
toml_registry = tmp_path / "endpoints.toml"
|
|
143
|
+
|
|
144
|
+
python_registry.write_text(
|
|
145
|
+
"ENDPOINTS = {\n"
|
|
146
|
+
' "from-py": {"model": "m", "url": "https://py.example/v1", "key": "PY_KEY"},\n'
|
|
147
|
+
"}\n",
|
|
148
|
+
encoding="utf-8",
|
|
149
|
+
)
|
|
150
|
+
toml_registry.write_text(
|
|
151
|
+
"[[endpoint]]\n"
|
|
152
|
+
'endpoint_id = "from-toml"\n'
|
|
153
|
+
'model = "m"\n'
|
|
154
|
+
'url = "https://toml.example/v1"\n'
|
|
155
|
+
'key = "TOML_KEY"\n',
|
|
156
|
+
encoding="utf-8",
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
endpoints = load_endpoints(str(tmp_path))
|
|
160
|
+
assert set(endpoints.keys()) == {"from-toml"}
|
|
161
|
+
|
|
162
|
+
toml_registry.unlink()
|
|
163
|
+
endpoints = load_endpoints(str(tmp_path))
|
|
164
|
+
assert set(endpoints.keys()) == {"from-py"}
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def test_qwen3_vl_endpoint_ids_map_to_vl_models():
|
|
168
|
+
endpoints = load_endpoints("./configs/endpoints.toml")
|
|
169
|
+
|
|
170
|
+
assert endpoints["qwen3-vl-30b-i"][0]["model"] == "qwen/qwen3-vl-30b-a3b-instruct"
|
|
171
|
+
assert endpoints["qwen3-vl-30b-t"][0]["model"] == "qwen/qwen3-vl-30b-a3b-thinking"
|
|
172
|
+
assert (
|
|
173
|
+
endpoints["qwen3-vl-235b-i"][0]["model"] == "qwen/qwen3-vl-235b-a22b-instruct"
|
|
174
|
+
)
|
|
175
|
+
assert (
|
|
176
|
+
endpoints["qwen3-vl-235b-t"][0]["model"] == "qwen/qwen3-vl-235b-a22b-thinking"
|
|
177
|
+
)
|