verifiers 0.1.15.dev11__tar.gz → 0.1.15.dev13__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/.gitignore +0 -2
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/PKG-INFO +22 -9
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/README.md +8 -7
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/pyproject.toml +64 -4
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_endpoint_registry.py +14 -18
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_envs.py +4 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_eval_cli.py +185 -76
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_gepa_cli.py +25 -4
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_imports.py +28 -2
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_init_script.py +42 -6
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_langchain_deep_agents_wikispeedia.py +27 -9
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_mcp_search_env.py +2 -2
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_opencode_harbor.py +36 -18
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_renderer_client.py +282 -4
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_trajectory_processing.py +136 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_v1_bfcl.py +39 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_v1_config_extension.py +705 -510
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_v1_endpoint_protocols.py +7 -5
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_v1_harbor_cli.py +140 -94
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_v1_mini_swe_agent.py +9 -8
- verifiers-0.1.15.dev13/tests/test_v1_nemo_gym_harness.py +427 -0
- verifiers-0.1.15.dev13/tests/test_v1_openenv_taskset.py +236 -0
- verifiers-0.1.15.dev13/tests/test_v1_openreward_taskset.py +199 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_v1_rlm_swe.py +179 -97
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_v1_runtime_lifecycle.py +669 -114
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_v1_scoring_functions.py +29 -19
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_v1_taskset_bindings.py +44 -18
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_v1_textarena_taskset.py +44 -37
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_wordle_v1_env.py +71 -27
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/__init__.py +55 -17
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/clients/client.py +4 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/clients/renderer_client.py +111 -27
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/experimental/composable/harnesses/mini_swe_agent.py +8 -62
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/experimental/composable/tasksets/lean/lean_task.py +12 -5
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/scripts/eval.py +153 -14
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/scripts/gepa.py +21 -15
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/scripts/init.py +26 -34
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/types.py +216 -30
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/utils/env_utils.py +43 -28
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/utils/eval_utils.py +14 -10
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/utils/interception_utils.py +4 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/utils/response_utils.py +42 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/v1/ENVIRONMENT_BEST_PRACTICES.md +22 -18
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/v1/README.md +224 -182
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/v1/RE_MIGRATION.md +56 -45
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/v1/__init__.py +41 -21
- verifiers-0.1.15.dev13/verifiers/v1/artifact.py +86 -0
- verifiers-0.1.15.dev13/verifiers/v1/config.py +127 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/v1/env.py +55 -39
- verifiers-0.1.15.dev13/verifiers/v1/harness.py +652 -0
- verifiers-0.1.15.dev13/verifiers/v1/model.py +51 -0
- verifiers-0.1.15.dev13/verifiers/v1/program.py +303 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/v1/runtime.py +1076 -616
- verifiers-0.1.15.dev13/verifiers/v1/runtime_handles.py +51 -0
- verifiers-0.1.15.dev13/verifiers/v1/sandbox.py +41 -0
- verifiers-0.1.15.dev13/verifiers/v1/task.py +150 -0
- verifiers-0.1.15.dev13/verifiers/v1/taskset.py +158 -0
- verifiers-0.1.15.dev13/verifiers/v1/toolset.py +292 -0
- verifiers-0.1.15.dev13/verifiers/v1/types.py +54 -0
- verifiers-0.1.15.dev13/verifiers/v1/user.py +132 -0
- verifiers-0.1.15.dev13/verifiers/v1/utils/binding_utils.py +323 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/v1/utils/config_callable_utils.py +7 -7
- verifiers-0.1.15.dev13/verifiers/v1/utils/config_utils.py +296 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/v1/utils/endpoint_utils.py +103 -59
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/v1/utils/judge_utils.py +19 -16
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/v1/utils/lifecycle_utils.py +22 -15
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/v1/utils/mcp_proxy_utils.py +18 -23
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/v1/utils/mcp_utils.py +4 -4
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/v1/utils/object_utils.py +13 -11
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/v1/utils/program_utils.py +106 -81
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/v1/utils/prompt_utils.py +58 -80
- verifiers-0.1.15.dev13/verifiers/v1/utils/runtime_owner_utils.py +131 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/v1/utils/runtime_registry.py +3 -7
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/v1/utils/sandbox_program_utils.py +28 -29
- verifiers-0.1.15.dev13/verifiers/v1/utils/sandbox_python_utils.py +99 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/v1/utils/sandbox_utils.py +270 -215
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/v1/utils/scoring_utils.py +45 -45
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/v1/utils/serialization_utils.py +1 -4
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/v1/utils/task_freeze_utils.py +2 -2
- verifiers-0.1.15.dev13/verifiers/v1/utils/taskset_utils.py +144 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/v1/utils/tool_utils.py +24 -20
- verifiers-0.1.15.dev13/verifiers/v1/utils/toolset_utils.py +217 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/v1/utils/trajectory_utils.py +16 -18
- verifiers-0.1.15.dev11/verifiers/v1/config.py +0 -385
- verifiers-0.1.15.dev11/verifiers/v1/harness.py +0 -536
- verifiers-0.1.15.dev11/verifiers/v1/packages/__init__.py +0 -1
- verifiers-0.1.15.dev11/verifiers/v1/packages/harnesses/__init__.py +0 -25
- verifiers-0.1.15.dev11/verifiers/v1/packages/harnesses/command.py +0 -160
- verifiers-0.1.15.dev11/verifiers/v1/packages/harnesses/configs.py +0 -168
- verifiers-0.1.15.dev11/verifiers/v1/packages/harnesses/mini_swe_agent.py +0 -219
- verifiers-0.1.15.dev11/verifiers/v1/packages/harnesses/opencode.py +0 -234
- verifiers-0.1.15.dev11/verifiers/v1/packages/harnesses/pi.py +0 -195
- verifiers-0.1.15.dev11/verifiers/v1/packages/harnesses/rlm.py +0 -601
- verifiers-0.1.15.dev11/verifiers/v1/packages/harnesses/terminus_2.py +0 -252
- verifiers-0.1.15.dev11/verifiers/v1/packages/tasksets/__init__.py +0 -17
- verifiers-0.1.15.dev11/verifiers/v1/packages/tasksets/harbor.py +0 -412
- verifiers-0.1.15.dev11/verifiers/v1/packages/tasksets/textarena.py +0 -177
- verifiers-0.1.15.dev11/verifiers/v1/task.py +0 -94
- verifiers-0.1.15.dev11/verifiers/v1/taskset.py +0 -207
- verifiers-0.1.15.dev11/verifiers/v1/toolset.py +0 -414
- verifiers-0.1.15.dev11/verifiers/v1/types.py +0 -55
- verifiers-0.1.15.dev11/verifiers/v1/user.py +0 -96
- verifiers-0.1.15.dev11/verifiers/v1/utils/artifact_utils.py +0 -29
- verifiers-0.1.15.dev11/verifiers/v1/utils/binding_utils.py +0 -218
- verifiers-0.1.15.dev11/verifiers/v1/utils/config_utils.py +0 -168
- verifiers-0.1.15.dev11/verifiers/v1/utils/runtime_owner_utils.py +0 -105
- verifiers-0.1.15.dev11/verifiers/v1/utils/taskset_registry_utils.py +0 -115
- verifiers-0.1.15.dev11/verifiers/v1/utils/taskset_utils.py +0 -78
- verifiers-0.1.15.dev11/verifiers/v1/utils/timing_utils.py +0 -119
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/LICENSE +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/AGENTS.md +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/README.md +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/__init__.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/conftest.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_browser_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_build_script.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_cli_agent_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_client_auth_errors.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_client_config.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_client_multimodal_types.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_composable_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_context_token_metrics.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_decorator_ranks.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_env_group.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_env_server.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_environment.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_environment_extra.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_error_chain.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_eval_display.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_eval_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_gepa_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_gym_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_harbor_env_mcp.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_install_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_interception_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_lean_task.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_logging.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_math_rubric.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_maybe_think_parser.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_message_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_message_utils_multimodal.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_multiturn_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_nemorl_client.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_openai_chat_completions_token_client.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_openai_responses_client.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_opencode_rlm_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_openenv_client.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_parser.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_path_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_per_turn_timing.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_pricing_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_prime_plugin.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_renderer_e2e.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_rlm_composable_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_rlm_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_rubric.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_rubric_group.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_sandbox_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_sandbox_mixin.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_save_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_setup_script.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_singleturn_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_stateful_tool_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_think_parser.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_tool_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_tool_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_tui_info_formatting.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_types.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_v1_empty_completions.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_v1_example_counts.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_v1_group_reward_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_wiki_search_v1.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_wordle_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/tests/test_xml_parser.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/AGENTS.md +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/cli/__init__.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/cli/commands/__init__.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/cli/commands/build.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/cli/commands/eval.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/cli/commands/gepa.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/cli/commands/init.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/cli/commands/install.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/cli/commands/setup.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/cli/plugins/__init__.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/cli/plugins/prime.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/cli/tui.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/clients/__init__.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/clients/anthropic_messages_client.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/clients/nemorl_chat_completions_client.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/clients/openai_chat_completions_client.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/clients/openai_chat_completions_token_client.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/clients/openai_completions_client.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/clients/openai_responses_client.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/decorators.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/AGENTS.md +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/__init__.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/env_group.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/environment.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/experimental/README.md +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/experimental/__init__.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/experimental/cli_agent_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/experimental/composable/README.md +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/experimental/composable/__init__.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/experimental/composable/_filter.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/experimental/composable/composable_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/experimental/composable/harness.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/experimental/composable/harnesses/__init__.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/experimental/composable/harnesses/opencode.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/experimental/composable/harnesses/prompt.txt +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/experimental/composable/harnesses/rlm.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/experimental/composable/swe_debug_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/experimental/composable/task.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/experimental/composable/tasksets/__init__.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/experimental/composable/tasksets/cp/__init__.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/experimental/composable/tasksets/cp/cp_task.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/experimental/composable/tasksets/cp/test_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/experimental/composable/tasksets/harbor/__init__.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/experimental/composable/tasksets/harbor/harbor.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/experimental/composable/tasksets/lean/__init__.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/experimental/composable/tasksets/math/__init__.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/experimental/composable/tasksets/math/math_task.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/experimental/composable/tasksets/swe/__init__.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/experimental/composable/tasksets/swe/_test_patch.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/experimental/composable/tasksets/swe/create_fix_patch.sh +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/experimental/composable/tasksets/swe/log_parser.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/experimental/composable/tasksets/swe/multi_swe.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/experimental/composable/tasksets/swe/openswe.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/experimental/composable/tasksets/swe/r2e_gym.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/experimental/composable/tasksets/swe/swe_bench.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/experimental/composable/tasksets/swe/swe_lego.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/experimental/composable/tasksets/swe/swe_rebench_v2.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/experimental/composable/tasksets/swe/swe_rebench_v2_log_parsers.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/experimental/composable/tasksets/swe/swe_smith.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/experimental/composable/tasksets/swe/swe_tasksets.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/experimental/gym_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/experimental/harbor_env/__init__.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/experimental/harbor_env/env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/experimental/harbor_env/mcp.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/experimental/mcp_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/experimental/opencode_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/experimental/opencode_qa_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/experimental/opencode_rlm_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/experimental/rlm_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/experimental/sandbox_mixin.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/experimental/utils/__init__.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/experimental/utils/file_locks.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/experimental/utils/git_checkout_cache.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/integrations/README.md +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/integrations/__init__.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/integrations/browser_env/README.md +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/integrations/browser_env/__init__.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/integrations/browser_env/browser_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/integrations/browser_env/modes/__init__.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/integrations/browser_env/modes/base.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/integrations/browser_env/modes/cua_mode.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/integrations/browser_env/modes/dom_mode.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/integrations/openenv_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/integrations/reasoninggym_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/integrations/textarena_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/multiturn_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/python_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/sandbox_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/singleturn_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/stateful_tool_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/envs/tool_env.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/errors.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/gepa/__init__.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/gepa/adapter.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/gepa/config.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/gepa/display.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/gepa/gepa_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/parsers/__init__.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/parsers/maybe_think_parser.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/parsers/parser.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/parsers/think_parser.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/parsers/xml_parser.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/rl/README.md +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/rl/__init__.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/rl/inference/__init__.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/rl/inference/client.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/rl/inference/server.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/rl/trainer/__init__.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/rl/trainer/config.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/rl/trainer/orchestrator.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/rl/trainer/trainer.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/rl/trainer/utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/rubrics/__init__.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/rubrics/experimental/hybrid_math_rubric.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/rubrics/judge_rubric.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/rubrics/math_rubric.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/rubrics/rubric.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/rubrics/rubric_group.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/scripts/__init__.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/scripts/build.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/scripts/install.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/scripts/rl.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/scripts/setup.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/scripts/train.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/scripts/tui.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/scripts/vllm.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/serve/__init__.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/serve/client/env_client.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/serve/client/zmq_env_client.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/serve/server/__init__.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/serve/server/env_router.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/serve/server/env_server.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/serve/server/env_worker.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/serve/server/zmq_env_server.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/serve/types.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/utils/__init__.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/utils/async_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/utils/client_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/utils/config_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/utils/data_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/utils/display_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/utils/env_config_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/utils/error_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/utils/eval_display.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/utils/heartbeat.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/utils/import_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/utils/install_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/utils/logging_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/utils/message_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/utils/metric_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/utils/path_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/utils/pricing_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/utils/process_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/utils/save_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/utils/serve_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/utils/thread_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/utils/threaded_sandbox_client.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/utils/tool_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/utils/usage_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/utils/version_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/v1/state.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/v1/utils/__init__.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/v1/utils/json_utils.py +0 -0
- {verifiers-0.1.15.dev11 → verifiers-0.1.15.dev13}/verifiers/v1/utils/usage_utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: verifiers
|
|
3
|
-
Version: 0.1.15.
|
|
3
|
+
Version: 0.1.15.dev13
|
|
4
4
|
Summary: Verifiers: Environments for LLM Reinforcement Learning
|
|
5
5
|
Project-URL: Homepage, https://github.com/primeintellect-ai/verifiers
|
|
6
6
|
Project-URL: Documentation, https://github.com/primeintellect-ai/verifiers
|
|
@@ -53,8 +53,18 @@ Provides-Extra: browser
|
|
|
53
53
|
Requires-Dist: aiohttp>=3.9.0; extra == 'browser'
|
|
54
54
|
Requires-Dist: python-dotenv>=1.0.0; extra == 'browser'
|
|
55
55
|
Requires-Dist: stagehand>=3.0.0; extra == 'browser'
|
|
56
|
+
Provides-Extra: harnesses
|
|
57
|
+
Requires-Dist: harnesses>=0.1.1; extra == 'harnesses'
|
|
58
|
+
Provides-Extra: nemogym
|
|
59
|
+
Requires-Dist: harnesses[nemogym]>=0.1.1; extra == 'nemogym'
|
|
60
|
+
Requires-Dist: tasksets[nemogym]>=0.1.1; extra == 'nemogym'
|
|
56
61
|
Provides-Extra: openenv
|
|
57
|
-
Requires-Dist: openenv
|
|
62
|
+
Requires-Dist: tasksets[openenv]>=0.1.1; extra == 'openenv'
|
|
63
|
+
Provides-Extra: openreward
|
|
64
|
+
Requires-Dist: tasksets[openreward]>=0.1.1; extra == 'openreward'
|
|
65
|
+
Provides-Extra: packages
|
|
66
|
+
Requires-Dist: harnesses>=0.1.1; extra == 'packages'
|
|
67
|
+
Requires-Dist: tasksets[openenv,openreward,ta]>=0.1.1; extra == 'packages'
|
|
58
68
|
Provides-Extra: renderers
|
|
59
69
|
Requires-Dist: renderers>=0.1.8.dev28; extra == 'renderers'
|
|
60
70
|
Provides-Extra: rg
|
|
@@ -73,6 +83,8 @@ Requires-Dist: wandb; extra == 'rl'
|
|
|
73
83
|
Provides-Extra: ta
|
|
74
84
|
Requires-Dist: nltk; extra == 'ta'
|
|
75
85
|
Requires-Dist: textarena; extra == 'ta'
|
|
86
|
+
Provides-Extra: tasksets
|
|
87
|
+
Requires-Dist: tasksets[openenv,openreward,ta]>=0.1.1; extra == 'tasksets'
|
|
76
88
|
Description-Content-Type: text/markdown
|
|
77
89
|
|
|
78
90
|
<p align="center">
|
|
@@ -226,7 +238,7 @@ class MyTasksetConfig(vf.TasksetConfig):
|
|
|
226
238
|
|
|
227
239
|
|
|
228
240
|
class MyTaskset(vf.Taskset[MyTasksetConfig]):
|
|
229
|
-
def load_tasks(self) -> vf.Tasks:
|
|
241
|
+
def load_tasks(self, split: vf.TaskSplit = "train") -> vf.Tasks:
|
|
230
242
|
rows = [
|
|
231
243
|
{
|
|
232
244
|
"prompt": [{"role": "user", "content": "Reverse abc."}],
|
|
@@ -251,16 +263,17 @@ def load_environment(config: vf.EnvConfig) -> vf.Env:
|
|
|
251
263
|
```
|
|
252
264
|
If no harness is passed, `vf.Env` uses the base endpoint-backed harness. See
|
|
253
265
|
**[BYO Harness](docs/byo-harness.md)** for the advanced v1 taskset/harness API.
|
|
254
|
-
Reusable taskset and harness packages live
|
|
255
|
-
|
|
256
|
-
|
|
266
|
+
Reusable taskset and harness packages live in `tasksets` and `harnesses`.
|
|
267
|
+
Install them with `uv add "verifiers[packages]"`, or with the narrower
|
|
268
|
+
`verifiers[tasksets]` and `verifiers[harnesses]` extras. For example, Harbor
|
|
269
|
+
task directories can run through the bundled OpenCode CLI harness with:
|
|
257
270
|
|
|
258
271
|
```python
|
|
259
|
-
from
|
|
260
|
-
from
|
|
272
|
+
from harnesses import OpenCode, OpenCodeConfig
|
|
273
|
+
from tasksets import HarborTaskset, HarborTasksetConfig
|
|
261
274
|
|
|
262
275
|
env = vf.Env(
|
|
263
|
-
taskset=HarborTaskset(config=HarborTasksetConfig()),
|
|
276
|
+
taskset=HarborTaskset(config=HarborTasksetConfig(bundle_package=__name__)),
|
|
264
277
|
harness=OpenCode(config=OpenCodeConfig()),
|
|
265
278
|
)
|
|
266
279
|
```
|
|
@@ -149,7 +149,7 @@ class MyTasksetConfig(vf.TasksetConfig):
|
|
|
149
149
|
|
|
150
150
|
|
|
151
151
|
class MyTaskset(vf.Taskset[MyTasksetConfig]):
|
|
152
|
-
def load_tasks(self) -> vf.Tasks:
|
|
152
|
+
def load_tasks(self, split: vf.TaskSplit = "train") -> vf.Tasks:
|
|
153
153
|
rows = [
|
|
154
154
|
{
|
|
155
155
|
"prompt": [{"role": "user", "content": "Reverse abc."}],
|
|
@@ -174,16 +174,17 @@ def load_environment(config: vf.EnvConfig) -> vf.Env:
|
|
|
174
174
|
```
|
|
175
175
|
If no harness is passed, `vf.Env` uses the base endpoint-backed harness. See
|
|
176
176
|
**[BYO Harness](docs/byo-harness.md)** for the advanced v1 taskset/harness API.
|
|
177
|
-
Reusable taskset and harness packages live
|
|
178
|
-
|
|
179
|
-
|
|
177
|
+
Reusable taskset and harness packages live in `tasksets` and `harnesses`.
|
|
178
|
+
Install them with `uv add "verifiers[packages]"`, or with the narrower
|
|
179
|
+
`verifiers[tasksets]` and `verifiers[harnesses]` extras. For example, Harbor
|
|
180
|
+
task directories can run through the bundled OpenCode CLI harness with:
|
|
180
181
|
|
|
181
182
|
```python
|
|
182
|
-
from
|
|
183
|
-
from
|
|
183
|
+
from harnesses import OpenCode, OpenCodeConfig
|
|
184
|
+
from tasksets import HarborTaskset, HarborTasksetConfig
|
|
184
185
|
|
|
185
186
|
env = vf.Env(
|
|
186
|
-
taskset=HarborTaskset(config=HarborTasksetConfig()),
|
|
187
|
+
taskset=HarborTaskset(config=HarborTasksetConfig(bundle_package=__name__)),
|
|
187
188
|
harness=OpenCode(config=OpenCodeConfig()),
|
|
188
189
|
)
|
|
189
190
|
```
|
|
@@ -69,12 +69,12 @@ dev = [
|
|
|
69
69
|
"ipykernel",
|
|
70
70
|
"ipywidgets",
|
|
71
71
|
"reasoning-gym",
|
|
72
|
-
"textarena",
|
|
73
72
|
"stagehand>=3.0.0",
|
|
74
73
|
"aiohttp>=3.9.0",
|
|
75
74
|
"python-dotenv>=1.0.0",
|
|
76
|
-
"
|
|
75
|
+
"harnesses",
|
|
77
76
|
"renderers>=0.1.8.dev28",
|
|
77
|
+
"tasksets[openenv,openreward,ta]",
|
|
78
78
|
]
|
|
79
79
|
policy = [
|
|
80
80
|
"semgrep>=1.150.0",
|
|
@@ -85,8 +85,18 @@ rg = [
|
|
|
85
85
|
"reasoning-gym",
|
|
86
86
|
]
|
|
87
87
|
ta = [
|
|
88
|
-
"textarena",
|
|
89
88
|
"nltk",
|
|
89
|
+
"textarena",
|
|
90
|
+
]
|
|
91
|
+
tasksets = [
|
|
92
|
+
"tasksets[openenv,openreward,ta]>=0.1.1",
|
|
93
|
+
]
|
|
94
|
+
harnesses = [
|
|
95
|
+
"harnesses>=0.1.1",
|
|
96
|
+
]
|
|
97
|
+
packages = [
|
|
98
|
+
"harnesses>=0.1.1",
|
|
99
|
+
"tasksets[openenv,openreward,ta]>=0.1.1",
|
|
90
100
|
]
|
|
91
101
|
browser = [
|
|
92
102
|
"stagehand>=3.0.0",
|
|
@@ -94,7 +104,14 @@ browser = [
|
|
|
94
104
|
"python-dotenv>=1.0.0",
|
|
95
105
|
]
|
|
96
106
|
openenv = [
|
|
97
|
-
"openenv
|
|
107
|
+
"tasksets[openenv]>=0.1.1",
|
|
108
|
+
]
|
|
109
|
+
openreward = [
|
|
110
|
+
"tasksets[openreward]>=0.1.1",
|
|
111
|
+
]
|
|
112
|
+
nemogym = [
|
|
113
|
+
"harnesses[nemogym]>=0.1.1",
|
|
114
|
+
"tasksets[nemogym]>=0.1.1",
|
|
98
115
|
]
|
|
99
116
|
renderers = [
|
|
100
117
|
"renderers>=0.1.8.dev28",
|
|
@@ -121,12 +138,53 @@ conflicts = [
|
|
|
121
138
|
{ extra = "openenv" },
|
|
122
139
|
{ group = "policy" },
|
|
123
140
|
],
|
|
141
|
+
[
|
|
142
|
+
{ extra = "nemogym" },
|
|
143
|
+
{ extra = "openenv" },
|
|
144
|
+
],
|
|
145
|
+
[
|
|
146
|
+
{ extra = "nemogym" },
|
|
147
|
+
{ group = "dev" },
|
|
148
|
+
],
|
|
149
|
+
[
|
|
150
|
+
{ extra = "nemogym" },
|
|
151
|
+
{ extra = "tasksets" },
|
|
152
|
+
],
|
|
153
|
+
[
|
|
154
|
+
{ extra = "nemogym" },
|
|
155
|
+
{ extra = "packages" },
|
|
156
|
+
],
|
|
157
|
+
[
|
|
158
|
+
{ extra = "ta" },
|
|
159
|
+
{ group = "policy" },
|
|
160
|
+
],
|
|
161
|
+
[
|
|
162
|
+
{ extra = "tasksets" },
|
|
163
|
+
{ group = "policy" },
|
|
164
|
+
],
|
|
165
|
+
[
|
|
166
|
+
{ extra = "packages" },
|
|
167
|
+
{ group = "policy" },
|
|
168
|
+
],
|
|
169
|
+
[
|
|
170
|
+
{ extra = "openreward" },
|
|
171
|
+
{ group = "policy" },
|
|
172
|
+
],
|
|
173
|
+
[
|
|
174
|
+
{ group = "dev" },
|
|
175
|
+
{ group = "policy" },
|
|
176
|
+
],
|
|
124
177
|
]
|
|
178
|
+
|
|
125
179
|
[[tool.uv.index]]
|
|
126
180
|
name = "pypi"
|
|
127
181
|
url = "https://pypi.org/simple"
|
|
128
182
|
default = true
|
|
129
183
|
|
|
184
|
+
[tool.uv.sources]
|
|
185
|
+
harnesses = { path = "packages/harnesses", editable = true }
|
|
186
|
+
tasksets = { path = "packages/tasksets", editable = true }
|
|
187
|
+
|
|
130
188
|
[tool.uv.exclude-newer-package]
|
|
131
189
|
# PrimeIntellect-published on PyPI (trusted publisher)
|
|
132
190
|
prime-tunnel = false
|
|
@@ -134,6 +192,8 @@ prime-sandboxes = false
|
|
|
134
192
|
prime-pydantic-config = false
|
|
135
193
|
renderers = false
|
|
136
194
|
openenv-core = false
|
|
195
|
+
harnesses = false
|
|
196
|
+
tasksets = false
|
|
137
197
|
|
|
138
198
|
[tool.uv.extra-build-dependencies]
|
|
139
199
|
flash-attn = [{ requirement = "torch", match-runtime = true }]
|
|
@@ -58,8 +58,8 @@ def test_load_endpoints_toml_groups_variants_by_endpoint_id(tmp_path: Path):
|
|
|
58
58
|
|
|
59
59
|
assert set(endpoints.keys()) == {"gpt-5-mini"}
|
|
60
60
|
assert len(endpoints["gpt-5-mini"]) == 2
|
|
61
|
-
assert endpoints["gpt-5-mini"][0]
|
|
62
|
-
assert endpoints["gpt-5-mini"][1]
|
|
61
|
+
assert endpoints["gpt-5-mini"][0].base_url == "https://api.pinference.ai/api/v1"
|
|
62
|
+
assert endpoints["gpt-5-mini"][1].base_url == "https://api.openai.com/v1"
|
|
63
63
|
|
|
64
64
|
|
|
65
65
|
def test_load_endpoints_toml_accepts_long_field_names(tmp_path: Path):
|
|
@@ -75,8 +75,8 @@ def test_load_endpoints_toml_accepts_long_field_names(tmp_path: Path):
|
|
|
75
75
|
|
|
76
76
|
endpoints = load_endpoints(str(registry_path))
|
|
77
77
|
|
|
78
|
-
assert endpoints["gpt-5-mini"][0]
|
|
79
|
-
assert endpoints["gpt-5-mini"][0]
|
|
78
|
+
assert endpoints["gpt-5-mini"][0].base_url == "https://api.pinference.ai/api/v1"
|
|
79
|
+
assert endpoints["gpt-5-mini"][0].api_key_var == "PRIME_API_KEY"
|
|
80
80
|
|
|
81
81
|
|
|
82
82
|
def test_load_endpoints_toml_accepts_matching_short_and_long_fields(tmp_path: Path):
|
|
@@ -94,8 +94,8 @@ def test_load_endpoints_toml_accepts_matching_short_and_long_fields(tmp_path: Pa
|
|
|
94
94
|
|
|
95
95
|
endpoints = load_endpoints(str(registry_path))
|
|
96
96
|
|
|
97
|
-
assert endpoints["gpt-5-mini"][0]
|
|
98
|
-
assert endpoints["gpt-5-mini"][0]
|
|
97
|
+
assert endpoints["gpt-5-mini"][0].base_url == "https://api.pinference.ai/api/v1"
|
|
98
|
+
assert endpoints["gpt-5-mini"][0].api_key_var == "PRIME_API_KEY"
|
|
99
99
|
|
|
100
100
|
|
|
101
101
|
def test_load_endpoints_toml_rejects_conflicting_url_fields(tmp_path: Path):
|
|
@@ -165,14 +165,10 @@ def test_load_endpoints_directory_uses_toml_and_warns_on_ignored_python(
|
|
|
165
165
|
def test_qwen3_vl_endpoint_ids_map_to_vl_models():
|
|
166
166
|
endpoints = load_endpoints("./configs/endpoints.toml")
|
|
167
167
|
|
|
168
|
-
assert endpoints["qwen3-vl-30b-i"][0]
|
|
169
|
-
assert endpoints["qwen3-vl-30b-t"][0]
|
|
170
|
-
assert
|
|
171
|
-
|
|
172
|
-
)
|
|
173
|
-
assert (
|
|
174
|
-
endpoints["qwen3-vl-235b-t"][0]["model"] == "qwen/qwen3-vl-235b-a22b-thinking"
|
|
175
|
-
)
|
|
168
|
+
assert endpoints["qwen3-vl-30b-i"][0].model == "qwen/qwen3-vl-30b-a3b-instruct"
|
|
169
|
+
assert endpoints["qwen3-vl-30b-t"][0].model == "qwen/qwen3-vl-30b-a3b-thinking"
|
|
170
|
+
assert endpoints["qwen3-vl-235b-i"][0].model == "qwen/qwen3-vl-235b-a22b-instruct"
|
|
171
|
+
assert endpoints["qwen3-vl-235b-t"][0].model == "qwen/qwen3-vl-235b-a22b-thinking"
|
|
176
172
|
|
|
177
173
|
|
|
178
174
|
def test_load_endpoints_toml_accepts_type_shorthand(tmp_path: Path):
|
|
@@ -189,7 +185,7 @@ def test_load_endpoints_toml_accepts_type_shorthand(tmp_path: Path):
|
|
|
189
185
|
|
|
190
186
|
endpoints = load_endpoints(str(registry_path))
|
|
191
187
|
|
|
192
|
-
assert endpoints["haiku"][0]
|
|
188
|
+
assert endpoints["haiku"][0].api_client_type == "anthropic_messages"
|
|
193
189
|
|
|
194
190
|
|
|
195
191
|
def test_load_endpoints_toml_accepts_openai_responses_type(tmp_path: Path):
|
|
@@ -206,7 +202,7 @@ def test_load_endpoints_toml_accepts_openai_responses_type(tmp_path: Path):
|
|
|
206
202
|
|
|
207
203
|
endpoints = load_endpoints(str(registry_path))
|
|
208
204
|
|
|
209
|
-
assert endpoints["gpt-responses"][0]
|
|
205
|
+
assert endpoints["gpt-responses"][0].api_client_type == "openai_responses"
|
|
210
206
|
|
|
211
207
|
|
|
212
208
|
def test_load_endpoints_toml_accepts_headers_table(tmp_path: Path):
|
|
@@ -223,7 +219,7 @@ def test_load_endpoints_toml_accepts_headers_table(tmp_path: Path):
|
|
|
223
219
|
|
|
224
220
|
endpoints = load_endpoints(str(registry_path))
|
|
225
221
|
|
|
226
|
-
assert endpoints["proxy"][0]
|
|
222
|
+
assert endpoints["proxy"][0].extra_headers == {"X-Custom": "v1"}
|
|
227
223
|
|
|
228
224
|
|
|
229
225
|
def test_load_endpoints_toml_accepts_extra_headers_alias(tmp_path: Path):
|
|
@@ -240,7 +236,7 @@ def test_load_endpoints_toml_accepts_extra_headers_alias(tmp_path: Path):
|
|
|
240
236
|
|
|
241
237
|
endpoints = load_endpoints(str(registry_path))
|
|
242
238
|
|
|
243
|
-
assert endpoints["proxy"][0]
|
|
239
|
+
assert endpoints["proxy"][0].extra_headers == {"X-A": "a"}
|
|
244
240
|
|
|
245
241
|
|
|
246
242
|
def test_load_endpoints_toml_rejects_headers_and_extra_headers_together(
|
|
@@ -158,6 +158,10 @@ def test_env(env_dir: Path, tmp_path_factory: pytest.TempPathFactory):
|
|
|
158
158
|
f"{repo_root.as_posix()} && "
|
|
159
159
|
"uv pip install "
|
|
160
160
|
"--exclude-newer-package prime-pydantic-config=2026-05-20T00:00:00Z "
|
|
161
|
+
f"{(repo_root / 'packages' / 'tasksets').as_posix()} "
|
|
162
|
+
f"{(repo_root / 'packages' / 'harnesses').as_posix()} && "
|
|
163
|
+
"uv pip install "
|
|
164
|
+
"--exclude-newer-package prime-pydantic-config=2026-05-20T00:00:00Z "
|
|
161
165
|
f"{env_dir.absolute().as_posix()}"
|
|
162
166
|
)
|
|
163
167
|
try:
|