verifiers 0.1.15.dev16__tar.gz → 0.1.15.dev17__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/PKG-INFO +1 -1
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_gepa_cli.py +8 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_save_utils.py +23 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_v1_config_extension.py +21 -4
- verifiers-0.1.15.dev17/tests/test_v1_taskset_utils.py +46 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/__init__.py +1 -1
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/utils/metric_utils.py +3 -1
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/utils/save_utils.py +13 -2
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/__init__.py +1 -2
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/harness.py +3 -6
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/taskset.py +3 -6
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/types.py +0 -1
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/utils/prompt_utils.py +13 -8
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/utils/taskset_utils.py +8 -9
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/.gitignore +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/LICENSE +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/README.md +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/pyproject.toml +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/AGENTS.md +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/README.md +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/__init__.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/conftest.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_browser_env.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_build_script.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_cli_agent_env.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_client_auth_errors.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_client_config.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_client_multimodal_types.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_composable_env.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_context_token_metrics.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_decorator_ranks.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_endpoint_registry.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_env_group.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_env_server.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_environment.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_environment_extra.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_envs.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_error_chain.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_eval_cli.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_eval_display.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_eval_utils.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_gepa_utils.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_gym_env.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_harbor_env_mcp.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_imports.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_init_script.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_install_utils.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_interception_utils.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_langchain_deep_agents_wikispeedia.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_lean_task.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_logging.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_math_rubric.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_maybe_think_parser.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_mcp_search_env.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_message_utils.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_message_utils_multimodal.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_multiturn_env.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_nemorl_client.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_openai_chat_completions_token_client.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_openai_responses_client.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_opencode_harbor.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_opencode_rlm_env.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_openenv_client.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_parser.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_path_utils.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_per_turn_timing.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_pricing_utils.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_prime_plugin.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_renderer_client.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_renderer_e2e.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_rlm_composable_env.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_rlm_env.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_rubric.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_rubric_group.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_sandbox_env.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_sandbox_mixin.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_setup_script.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_singleturn_env.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_stateful_tool_env.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_think_parser.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_tool_env.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_tool_utils.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_trajectory_processing.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_tui_info_formatting.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_types.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_v1_bfcl.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_v1_empty_completions.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_v1_endpoint_protocols.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_v1_example_counts.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_v1_group_reward_env.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_v1_harbor_cli.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_v1_mini_swe_agent.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_v1_nemo_gym_harness.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_v1_openenv_taskset.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_v1_openreward_taskset.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_v1_rlm_swe.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_v1_runtime_lifecycle.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_v1_scoring_functions.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_v1_taskset_bindings.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_v1_textarena_taskset.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_wiki_search_v1.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_wordle_env.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_wordle_v1_env.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_xml_parser.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/AGENTS.md +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/cli/__init__.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/cli/commands/__init__.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/cli/commands/build.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/cli/commands/eval.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/cli/commands/gepa.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/cli/commands/init.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/cli/commands/install.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/cli/commands/setup.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/cli/plugins/__init__.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/cli/plugins/prime.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/cli/tui.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/clients/__init__.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/clients/anthropic_messages_client.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/clients/client.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/clients/nemorl_chat_completions_client.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/clients/openai_chat_completions_client.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/clients/openai_chat_completions_token_client.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/clients/openai_completions_client.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/clients/openai_responses_client.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/clients/renderer_client.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/decorators.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/AGENTS.md +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/__init__.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/env_group.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/environment.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/README.md +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/__init__.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/cli_agent_env.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/README.md +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/__init__.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/_filter.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/composable_env.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/harness.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/harnesses/__init__.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/harnesses/mini_swe_agent.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/harnesses/opencode.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/harnesses/prompt.txt +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/harnesses/rlm.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/swe_debug_env.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/task.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/__init__.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/cp/__init__.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/cp/cp_task.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/cp/test_utils.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/harbor/__init__.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/harbor/harbor.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/lean/__init__.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/lean/lean_task.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/math/__init__.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/math/math_task.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/swe/__init__.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/swe/_test_patch.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/swe/create_fix_patch.sh +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/swe/log_parser.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/swe/multi_swe.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/swe/openswe.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/swe/r2e_gym.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/swe/swe_bench.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/swe/swe_lego.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/swe/swe_rebench_v2.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/swe/swe_rebench_v2_log_parsers.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/swe/swe_smith.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/swe/swe_tasksets.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/gym_env.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/harbor_env/__init__.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/harbor_env/env.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/harbor_env/mcp.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/mcp_env.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/opencode_env.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/opencode_qa_env.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/opencode_rlm_env.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/rlm_env.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/sandbox_mixin.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/utils/__init__.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/utils/file_locks.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/utils/git_checkout_cache.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/integrations/README.md +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/integrations/__init__.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/integrations/browser_env/README.md +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/integrations/browser_env/__init__.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/integrations/browser_env/browser_env.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/integrations/browser_env/modes/__init__.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/integrations/browser_env/modes/base.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/integrations/browser_env/modes/cua_mode.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/integrations/browser_env/modes/dom_mode.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/integrations/openenv_env.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/integrations/reasoninggym_env.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/integrations/textarena_env.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/multiturn_env.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/python_env.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/sandbox_env.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/singleturn_env.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/stateful_tool_env.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/envs/tool_env.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/errors.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/gepa/__init__.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/gepa/adapter.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/gepa/config.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/gepa/display.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/gepa/gepa_utils.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/parsers/__init__.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/parsers/maybe_think_parser.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/parsers/parser.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/parsers/think_parser.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/parsers/xml_parser.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/rl/README.md +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/rl/__init__.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/rl/inference/__init__.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/rl/inference/client.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/rl/inference/server.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/rl/trainer/__init__.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/rl/trainer/config.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/rl/trainer/orchestrator.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/rl/trainer/trainer.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/rl/trainer/utils.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/rubrics/__init__.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/rubrics/experimental/hybrid_math_rubric.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/rubrics/judge_rubric.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/rubrics/math_rubric.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/rubrics/rubric.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/rubrics/rubric_group.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/scripts/__init__.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/scripts/build.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/scripts/eval.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/scripts/gepa.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/scripts/init.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/scripts/install.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/scripts/rl.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/scripts/setup.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/scripts/train.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/scripts/tui.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/scripts/vllm.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/serve/__init__.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/serve/client/env_client.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/serve/client/zmq_env_client.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/serve/server/__init__.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/serve/server/env_router.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/serve/server/env_server.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/serve/server/env_worker.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/serve/server/zmq_env_server.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/serve/types.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/types.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/utils/__init__.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/utils/async_utils.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/utils/client_utils.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/utils/config_utils.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/utils/data_utils.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/utils/display_utils.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/utils/env_config_utils.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/utils/env_utils.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/utils/error_utils.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/utils/eval_display.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/utils/eval_utils.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/utils/heartbeat.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/utils/import_utils.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/utils/install_utils.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/utils/interception_utils.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/utils/logging_utils.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/utils/message_utils.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/utils/path_utils.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/utils/pricing_utils.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/utils/process_utils.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/utils/response_utils.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/utils/serve_utils.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/utils/thread_utils.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/utils/threaded_sandbox_client.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/utils/tool_utils.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/utils/usage_utils.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/utils/version_utils.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/ENVIRONMENT_BEST_PRACTICES.md +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/README.md +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/RE_MIGRATION.md +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/artifact.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/config.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/env.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/model.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/program.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/runtime.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/runtime_handles.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/sandbox.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/state.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/task.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/toolset.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/user.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/utils/__init__.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/utils/binding_utils.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/utils/config_callable_utils.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/utils/config_utils.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/utils/endpoint_utils.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/utils/json_utils.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/utils/judge_utils.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/utils/lifecycle_utils.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/utils/mcp_proxy_utils.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/utils/mcp_utils.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/utils/object_utils.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/utils/program_utils.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/utils/runtime_owner_utils.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/utils/runtime_registry.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/utils/sandbox_program_utils.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/utils/sandbox_python_utils.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/utils/sandbox_utils.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/utils/scoring_utils.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/utils/serialization_utils.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/utils/task_freeze_utils.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/utils/tool_utils.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/utils/toolset_utils.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/utils/trajectory_utils.py +0 -0
- {verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/verifiers/v1/utils/usage_utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: verifiers
|
|
3
|
-
Version: 0.1.15.
|
|
3
|
+
Version: 0.1.15.dev17
|
|
4
4
|
Summary: Verifiers: Environments for LLM Reinforcement Learning
|
|
5
5
|
Project-URL: Homepage, https://github.com/primeintellect-ai/verifiers
|
|
6
6
|
Project-URL: Documentation, https://github.com/primeintellect-ai/verifiers
|
|
@@ -189,6 +189,14 @@ def test_load_gepa_toml_config_requires_env_table(tmp_path: Path):
|
|
|
189
189
|
load_gepa_toml_config(config_path)
|
|
190
190
|
|
|
191
191
|
|
|
192
|
+
def test_repo_gepa_example_configs_are_valid():
|
|
193
|
+
config_paths = sorted(Path("configs/gepa").glob("*.toml"))
|
|
194
|
+
assert config_paths
|
|
195
|
+
for config_path in config_paths:
|
|
196
|
+
loaded = load_gepa_toml_config(config_path)
|
|
197
|
+
assert loaded["envs"], f"{config_path} should contain at least one [[env]]"
|
|
198
|
+
|
|
199
|
+
|
|
192
200
|
def test_resolve_gepa_config_args_supports_plain_env_id():
|
|
193
201
|
args = argparse.Namespace(env_id_or_config="primeintellect/wordle")
|
|
194
202
|
|
|
@@ -258,6 +258,13 @@ class TestSavingResults:
|
|
|
258
258
|
assert result[0].get("foo") == "bar" # custom field from make_state fixture
|
|
259
259
|
assert result[0]["reward"] == 1.0
|
|
260
260
|
|
|
261
|
+
def test_states_to_outputs_requires_example_id(self, make_state):
|
|
262
|
+
state = make_state()
|
|
263
|
+
del state["example_id"]
|
|
264
|
+
|
|
265
|
+
with pytest.raises(KeyError):
|
|
266
|
+
states_to_outputs([state], state_columns=[])
|
|
267
|
+
|
|
261
268
|
def test_states_to_outputs_completion_keeps_messages(self, make_state):
|
|
262
269
|
states = [
|
|
263
270
|
make_state(
|
|
@@ -647,6 +654,22 @@ class TestBuilderPassAtK:
|
|
|
647
654
|
# 1 of 4 correct at threshold=0.7: pass^1 = C(1,1)/C(4,1) = 0.25
|
|
648
655
|
assert metadata["pass_all_k"]["1"] == pytest.approx(0.25)
|
|
649
656
|
|
|
657
|
+
def test_builder_requires_example_id(self):
|
|
658
|
+
builder = GenerateOutputsBuilder(
|
|
659
|
+
env_id="test-env",
|
|
660
|
+
env_args={},
|
|
661
|
+
model="test-model",
|
|
662
|
+
client=ClientConfig(api_base_url="http://localhost:8000/v1"),
|
|
663
|
+
num_examples=1,
|
|
664
|
+
rollouts_per_example=1,
|
|
665
|
+
state_columns=[],
|
|
666
|
+
sampling_args={},
|
|
667
|
+
results_path=Path("/tmp/test-results"),
|
|
668
|
+
)
|
|
669
|
+
|
|
670
|
+
with pytest.raises(KeyError):
|
|
671
|
+
builder.add_outputs([{"reward": 1.0, "metrics": {}}])
|
|
672
|
+
|
|
650
673
|
|
|
651
674
|
class TestMetricProtocol:
|
|
652
675
|
def test_all_metrics_satisfy_protocol(self):
|
|
@@ -2310,16 +2310,14 @@ def test_taskset_subclasses_inherit_registered_config_type() -> None:
|
|
|
2310
2310
|
|
|
2311
2311
|
def test_taskset_class_loader_owns_split_loading() -> None:
|
|
2312
2312
|
class LoaderTasksetConfig(TasksetConfig):
|
|
2313
|
-
system_prompt: vf.SystemPrompt
|
|
2313
|
+
system_prompt: vf.SystemPrompt = "class prompt"
|
|
2314
2314
|
|
|
2315
2315
|
class LoaderTaskset(Taskset[LoaderTasksetConfig]):
|
|
2316
2316
|
def load_tasks(self, split: vf.TaskSplit = "train") -> vf.Tasks:
|
|
2317
2317
|
answer = "class eval" if split == "eval" else "class tasks"
|
|
2318
2318
|
return [{"prompt": [], "answer": answer}]
|
|
2319
2319
|
|
|
2320
|
-
def load_system_prompt(
|
|
2321
|
-
self, config: LoaderTasksetConfig
|
|
2322
|
-
) -> vf.SystemPrompt | None:
|
|
2320
|
+
def load_system_prompt(self, config: LoaderTasksetConfig) -> vf.SystemPrompt:
|
|
2323
2321
|
return config.system_prompt
|
|
2324
2322
|
|
|
2325
2323
|
defaulted = LoaderTaskset(config=LoaderTasksetConfig())
|
|
@@ -2341,6 +2339,25 @@ def test_taskset_class_loader_owns_split_loading() -> None:
|
|
|
2341
2339
|
assert disabled_prompt.system_prompt == []
|
|
2342
2340
|
|
|
2343
2341
|
|
|
2342
|
+
def test_system_prompt_alias_accepts_config_data(tmp_path) -> None:
|
|
2343
|
+
prompt_path = tmp_path / "system_prompt.txt"
|
|
2344
|
+
prompt_path.write_text("alias path system prompt", encoding="utf-8")
|
|
2345
|
+
|
|
2346
|
+
class PromptTasksetConfig(TasksetConfig):
|
|
2347
|
+
system_prompt: vf.SystemPrompt = None
|
|
2348
|
+
|
|
2349
|
+
config = PromptTasksetConfig.model_validate(
|
|
2350
|
+
{"system_prompt": {"path": str(prompt_path)}}
|
|
2351
|
+
)
|
|
2352
|
+
assert isinstance(config.system_prompt, vf.SystemPromptConfig)
|
|
2353
|
+
|
|
2354
|
+
taskset = Taskset(config=config)
|
|
2355
|
+
|
|
2356
|
+
assert taskset.system_prompt == [
|
|
2357
|
+
{"role": "system", "content": "alias path system prompt"}
|
|
2358
|
+
]
|
|
2359
|
+
|
|
2360
|
+
|
|
2344
2361
|
def test_taskset_load_tasks_can_return_empty_dataset() -> None:
|
|
2345
2362
|
class LocalTasksetConfig(TasksetConfig):
|
|
2346
2363
|
enabled: bool = True
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import json
|
|
2
|
+
|
|
3
|
+
from datasets import Dataset
|
|
4
|
+
|
|
5
|
+
from verifiers.v1.utils.taskset_utils import dataset_from_result
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def task_payload(row: dict) -> dict:
|
|
9
|
+
return json.loads(row["info"]["task"])
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def test_dataset_from_result_assigns_example_id_to_iterable_records():
|
|
13
|
+
dataset = dataset_from_result(
|
|
14
|
+
[
|
|
15
|
+
{"question": "Reverse abc.", "answer": "cba"},
|
|
16
|
+
{"question": "Reverse xyz.", "answer": "zyx"},
|
|
17
|
+
],
|
|
18
|
+
"ReverseTextTaskset",
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
rows = list(dataset)
|
|
22
|
+
payloads = [task_payload(row) for row in rows]
|
|
23
|
+
|
|
24
|
+
assert [row["example_id"] for row in rows] == [0, 1]
|
|
25
|
+
assert [payload["example_id"] for payload in payloads] == [0, 1]
|
|
26
|
+
assert all(len(payload["task_id"]) == 32 for payload in payloads)
|
|
27
|
+
assert {payload["task_id"] for payload in payloads}.isdisjoint({"0", "1"})
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def test_dataset_from_result_overwrites_existing_example_id_column():
|
|
31
|
+
raw_dataset = Dataset.from_list(
|
|
32
|
+
[
|
|
33
|
+
{"question": "Reverse abc.", "answer": "cba", "example_id": None},
|
|
34
|
+
{"question": "Reverse xyz.", "answer": "zyx", "example_id": 99},
|
|
35
|
+
]
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
dataset = dataset_from_result(raw_dataset, "ReverseTextTaskset")
|
|
39
|
+
|
|
40
|
+
rows = list(dataset)
|
|
41
|
+
payloads = [task_payload(row) for row in rows]
|
|
42
|
+
|
|
43
|
+
assert [row["example_id"] for row in rows] == [0, 1]
|
|
44
|
+
assert [payload["example_id"] for payload in payloads] == [0, 1]
|
|
45
|
+
assert all(len(payload["task_id"]) == 32 for payload in payloads)
|
|
46
|
+
assert {payload["task_id"] for payload in payloads}.isdisjoint({"0", "1", "99"})
|
|
@@ -162,10 +162,12 @@ class PassAtKMetric:
|
|
|
162
162
|
self.reset()
|
|
163
163
|
|
|
164
164
|
def add_output(self, output: RolloutOutput) -> None:
|
|
165
|
+
example_id = output["example_id"]
|
|
166
|
+
if example_id is None:
|
|
167
|
+
raise ValueError("output['example_id'] is required.")
|
|
165
168
|
if not self._k_values:
|
|
166
169
|
return
|
|
167
170
|
|
|
168
|
-
example_id = output.get("example_id", 0)
|
|
169
171
|
self._example_counts[example_id] += 1
|
|
170
172
|
if output.get("reward", 0.0) >= self.threshold:
|
|
171
173
|
self._example_correct[example_id] += 1
|
|
@@ -218,8 +218,12 @@ def state_to_output(
|
|
|
218
218
|
else:
|
|
219
219
|
raise TypeError("state['timing'] must be a RolloutTiming or mapping.")
|
|
220
220
|
|
|
221
|
+
example_id = state["example_id"]
|
|
222
|
+
if example_id is None:
|
|
223
|
+
raise ValueError("state['example_id'] is required.")
|
|
224
|
+
|
|
221
225
|
output = RolloutOutput(
|
|
222
|
-
example_id=
|
|
226
|
+
example_id=example_id,
|
|
223
227
|
prompt=state.get("prompt"),
|
|
224
228
|
completion=state.get("completion"),
|
|
225
229
|
answer=state.get("answer", ""),
|
|
@@ -671,9 +675,16 @@ class GenerateOutputsBuilder:
|
|
|
671
675
|
def build_outputs(self, sort_by_example_id: bool = False) -> list[RolloutOutput]:
|
|
672
676
|
"""Return (sorted) accumulated outputs"""
|
|
673
677
|
if sort_by_example_id:
|
|
674
|
-
return sorted(self.outputs, key=
|
|
678
|
+
return sorted(self.outputs, key=self.output_example_id)
|
|
675
679
|
return self.outputs
|
|
676
680
|
|
|
681
|
+
@staticmethod
|
|
682
|
+
def output_example_id(output: RolloutOutput) -> int:
|
|
683
|
+
example_id = output["example_id"]
|
|
684
|
+
if example_id is None:
|
|
685
|
+
raise ValueError("output['example_id'] is required.")
|
|
686
|
+
return example_id
|
|
687
|
+
|
|
677
688
|
def build(self, sort_by_example_id: bool = False) -> GenerateOutputs:
|
|
678
689
|
"""Build GenerateOutputs from accumulated outputs."""
|
|
679
690
|
return GenerateOutputs(
|
|
@@ -59,14 +59,13 @@ from .toolset import (
|
|
|
59
59
|
)
|
|
60
60
|
from .utils.endpoint_utils import Endpoint
|
|
61
61
|
from .utils.binding_utils import BindingsConfig, ObjectsConfig
|
|
62
|
-
from .utils.prompt_utils import SystemPromptConfig, SystemPromptStrategy
|
|
62
|
+
from .utils.prompt_utils import SystemPrompt, SystemPromptConfig, SystemPromptStrategy
|
|
63
63
|
from .types import (
|
|
64
64
|
ConfigData,
|
|
65
65
|
Handler,
|
|
66
66
|
JsonData,
|
|
67
67
|
Objects,
|
|
68
68
|
PromptInput,
|
|
69
|
-
SystemPrompt,
|
|
70
69
|
TaskSplit,
|
|
71
70
|
Tasks,
|
|
72
71
|
)
|
|
@@ -72,8 +72,8 @@ from .utils.sandbox_program_utils import (
|
|
|
72
72
|
run_sandbox_python_program,
|
|
73
73
|
)
|
|
74
74
|
from .utils.prompt_utils import (
|
|
75
|
+
SystemPrompt,
|
|
75
76
|
SystemPromptStrategy,
|
|
76
|
-
SystemPromptConfig,
|
|
77
77
|
normalize_prompt,
|
|
78
78
|
normalize_system_prompt,
|
|
79
79
|
resolve_system_prompt,
|
|
@@ -88,7 +88,6 @@ from .types import (
|
|
|
88
88
|
ConfigData,
|
|
89
89
|
JsonData,
|
|
90
90
|
Objects,
|
|
91
|
-
PromptInput,
|
|
92
91
|
)
|
|
93
92
|
|
|
94
93
|
if TYPE_CHECKING:
|
|
@@ -106,7 +105,7 @@ class HarnessConfig(LifecycleConfig):
|
|
|
106
105
|
)
|
|
107
106
|
program: ProgramConfig = ProgramConfig()
|
|
108
107
|
model: ModelConfig = ModelConfig()
|
|
109
|
-
system_prompt:
|
|
108
|
+
system_prompt: SystemPrompt = None
|
|
110
109
|
system_prompt_strategy: SystemPromptStrategy = "HT"
|
|
111
110
|
sandbox: SandboxConfig | None = None
|
|
112
111
|
user: UserConfig | None = None
|
|
@@ -217,9 +216,7 @@ class Harness(RuntimeOwnerMixin[ConfigT], Generic[ConfigT]):
|
|
|
217
216
|
self.endpoint = self.load_endpoint()
|
|
218
217
|
self.program = self.compile_program(self.program_config)
|
|
219
218
|
|
|
220
|
-
def load_system_prompt(
|
|
221
|
-
self, config: ConfigT
|
|
222
|
-
) -> PromptInput | SystemPromptConfig | None:
|
|
219
|
+
def load_system_prompt(self, config: ConfigT) -> SystemPrompt:
|
|
223
220
|
return config.system_prompt
|
|
224
221
|
|
|
225
222
|
def load_sandbox(self, config: SandboxConfig | None) -> SandboxConfig | None:
|
|
@@ -18,7 +18,7 @@ from .utils.binding_utils import (
|
|
|
18
18
|
BindingsConfig,
|
|
19
19
|
ObjectsConfig,
|
|
20
20
|
)
|
|
21
|
-
from .utils.prompt_utils import
|
|
21
|
+
from .utils.prompt_utils import SystemPrompt, normalize_system_prompt
|
|
22
22
|
from .utils.config_utils import (
|
|
23
23
|
coerce_config,
|
|
24
24
|
config_ref_context,
|
|
@@ -36,7 +36,6 @@ from .utils.taskset_utils import (
|
|
|
36
36
|
from .types import (
|
|
37
37
|
JsonData,
|
|
38
38
|
Objects,
|
|
39
|
-
PromptInput,
|
|
40
39
|
TaskSplit,
|
|
41
40
|
Tasks,
|
|
42
41
|
)
|
|
@@ -48,7 +47,7 @@ class TasksetConfig(LifecycleConfig):
|
|
|
48
47
|
default=None,
|
|
49
48
|
validation_alias=AliasChoices("taskset_id", "id"),
|
|
50
49
|
)
|
|
51
|
-
system_prompt:
|
|
50
|
+
system_prompt: SystemPrompt = None
|
|
52
51
|
user: UserConfig | None = None
|
|
53
52
|
bindings: BindingsConfig = BindingsConfig()
|
|
54
53
|
objects: ObjectsConfig = ObjectsConfig()
|
|
@@ -152,7 +151,5 @@ class Taskset(RuntimeOwnerMixin[ConfigT], Generic[ConfigT]):
|
|
|
152
151
|
def __len__(self) -> int:
|
|
153
152
|
return len(self.get_dataset())
|
|
154
153
|
|
|
155
|
-
def load_system_prompt(
|
|
156
|
-
self, config: ConfigT
|
|
157
|
-
) -> PromptInput | SystemPromptConfig | None:
|
|
154
|
+
def load_system_prompt(self, config: ConfigT) -> SystemPrompt:
|
|
158
155
|
return config.system_prompt
|
|
@@ -41,7 +41,6 @@ Tasks: TypeAlias = Dataset | Iterable[JsonData] | Iterable["Task"]
|
|
|
41
41
|
|
|
42
42
|
PromptMessage: TypeAlias = Message | JsonData
|
|
43
43
|
PromptInput: TypeAlias = str | Sequence[PromptMessage]
|
|
44
|
-
SystemPrompt: TypeAlias = PromptInput
|
|
45
44
|
|
|
46
45
|
ModelClient: TypeAlias = Client | ClientConfig
|
|
47
46
|
RuntimeObject: TypeAlias = object
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import importlib.util
|
|
2
2
|
from dataclasses import dataclass
|
|
3
3
|
from pathlib import Path
|
|
4
|
-
from typing import TYPE_CHECKING, Literal, cast
|
|
4
|
+
from typing import TYPE_CHECKING, Literal, TypeAlias, cast
|
|
5
5
|
|
|
6
6
|
from pydantic import model_validator
|
|
7
7
|
from typing_extensions import Self
|
|
@@ -9,7 +9,7 @@ from verifiers.types import Messages, SystemMessage
|
|
|
9
9
|
from verifiers.utils.message_utils import normalize_messages
|
|
10
10
|
|
|
11
11
|
from ..config import Config
|
|
12
|
-
from ..types import JsonData, PromptInput
|
|
12
|
+
from ..types import JsonData, PromptInput
|
|
13
13
|
from .config_utils import current_config_ref_module
|
|
14
14
|
|
|
15
15
|
if TYPE_CHECKING:
|
|
@@ -64,13 +64,15 @@ class SystemPromptConfig(Config):
|
|
|
64
64
|
messages: list[JsonData] = []
|
|
65
65
|
|
|
66
66
|
@model_validator(mode="after")
|
|
67
|
-
def
|
|
68
|
-
|
|
67
|
+
def validate_one_input(self) -> Self:
|
|
68
|
+
inputs = [
|
|
69
69
|
self.path is not None,
|
|
70
70
|
bool(self.messages),
|
|
71
71
|
]
|
|
72
|
-
if sum(
|
|
73
|
-
raise ValueError(
|
|
72
|
+
if sum(inputs) != 1:
|
|
73
|
+
raise ValueError(
|
|
74
|
+
"SystemPromptConfig requires exactly one of path or messages."
|
|
75
|
+
)
|
|
74
76
|
return self
|
|
75
77
|
|
|
76
78
|
def load(self, field_name: str) -> PromptInput | None:
|
|
@@ -81,6 +83,9 @@ class SystemPromptConfig(Config):
|
|
|
81
83
|
return self.messages
|
|
82
84
|
|
|
83
85
|
|
|
86
|
+
SystemPrompt: TypeAlias = PromptInput | SystemPromptConfig | None
|
|
87
|
+
|
|
88
|
+
|
|
84
89
|
def normalize_prompt(
|
|
85
90
|
value: PromptInput | None, field_name: str = "prompt"
|
|
86
91
|
) -> list[JsonData]:
|
|
@@ -95,7 +100,7 @@ def normalize_prompt(
|
|
|
95
100
|
|
|
96
101
|
|
|
97
102
|
def normalize_system_prompt(
|
|
98
|
-
value: SystemPrompt
|
|
103
|
+
value: SystemPrompt,
|
|
99
104
|
field_name: str = "system_prompt",
|
|
100
105
|
) -> list[JsonData]:
|
|
101
106
|
value = resolve_system_prompt_input(value, field_name=field_name)
|
|
@@ -111,7 +116,7 @@ def normalize_system_prompt(
|
|
|
111
116
|
|
|
112
117
|
|
|
113
118
|
def resolve_system_prompt_input(
|
|
114
|
-
value:
|
|
119
|
+
value: SystemPrompt,
|
|
115
120
|
*,
|
|
116
121
|
field_name: str,
|
|
117
122
|
) -> PromptInput | None:
|
|
@@ -38,10 +38,8 @@ def prepare_task(task: Task, taskset_id: str) -> Task:
|
|
|
38
38
|
raise TypeError("v1 task loaders must return Task objects.")
|
|
39
39
|
prepared = Task(cast(JsonData, dict(task)))
|
|
40
40
|
prepared["taskset_id"] = taskset_id
|
|
41
|
-
if "task_id"
|
|
41
|
+
if prepared.get("task_id") is not None:
|
|
42
42
|
prepared["task_id"] = str(prepared["task_id"])
|
|
43
|
-
elif "example_id" in prepared:
|
|
44
|
-
prepared["task_id"] = str(prepared["example_id"])
|
|
45
43
|
else:
|
|
46
44
|
prepared["task_id"] = uuid.uuid4().hex
|
|
47
45
|
return prepared.freeze()
|
|
@@ -51,13 +49,13 @@ def dataset_record_from_task(
|
|
|
51
49
|
task: Task,
|
|
52
50
|
taskset_id: str,
|
|
53
51
|
index: int,
|
|
54
|
-
|
|
52
|
+
record: JsonData | None = None,
|
|
55
53
|
) -> JsonData:
|
|
56
54
|
data = Task(cast(JsonData, dict(task)))
|
|
57
|
-
data
|
|
55
|
+
data["example_id"] = index
|
|
58
56
|
normalized = prepare_task(data, taskset_id)
|
|
59
57
|
task_payload = dict(normalized)
|
|
60
|
-
dataset_record = deepcopy(dict(
|
|
58
|
+
dataset_record = deepcopy(dict(record or {}))
|
|
61
59
|
dataset_record["prompt"] = task_payload["prompt"]
|
|
62
60
|
dataset_record["example_id"] = task_payload["example_id"]
|
|
63
61
|
info = dataset_record.get("info")
|
|
@@ -82,9 +80,10 @@ def dataset_from_result(result: Tasks, taskset_id: str) -> Dataset:
|
|
|
82
80
|
if isinstance(result, Dataset):
|
|
83
81
|
records: list[JsonData] = []
|
|
84
82
|
for index, record in enumerate(result):
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
83
|
+
row = cast(JsonData, dict(record))
|
|
84
|
+
row["example_id"] = index
|
|
85
|
+
task = task_from_dataset_record(row, taskset_id)
|
|
86
|
+
records.append(dataset_record_from_task(task, taskset_id, index, row))
|
|
88
87
|
return Dataset.from_list(records)
|
|
89
88
|
tasks = tasks_from_result(result, taskset_id)
|
|
90
89
|
return Dataset.from_list(dataset_records_from_tasks(tasks, taskset_id))
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_langchain_deep_agents_wikispeedia.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{verifiers-0.1.15.dev16 → verifiers-0.1.15.dev17}/tests/test_openai_chat_completions_token_client.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|