verifiers 0.1.15.dev15__tar.gz → 0.1.15.dev17__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/PKG-INFO +1 -1
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_gepa_cli.py +8 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_init_script.py +10 -7
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_save_utils.py +23 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_v1_config_extension.py +21 -4
- verifiers-0.1.15.dev17/tests/test_v1_taskset_utils.py +46 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/__init__.py +1 -1
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/scripts/init.py +42 -32
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/utils/metric_utils.py +3 -1
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/utils/save_utils.py +13 -2
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/v1/__init__.py +1 -2
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/v1/harness.py +3 -6
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/v1/taskset.py +3 -6
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/v1/types.py +0 -1
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/v1/utils/prompt_utils.py +13 -8
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/v1/utils/taskset_utils.py +8 -9
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/.gitignore +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/LICENSE +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/README.md +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/pyproject.toml +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/AGENTS.md +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/README.md +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/__init__.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/conftest.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_browser_env.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_build_script.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_cli_agent_env.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_client_auth_errors.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_client_config.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_client_multimodal_types.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_composable_env.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_context_token_metrics.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_decorator_ranks.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_endpoint_registry.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_env_group.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_env_server.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_environment.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_environment_extra.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_envs.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_error_chain.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_eval_cli.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_eval_display.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_eval_utils.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_gepa_utils.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_gym_env.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_harbor_env_mcp.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_imports.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_install_utils.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_interception_utils.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_langchain_deep_agents_wikispeedia.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_lean_task.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_logging.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_math_rubric.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_maybe_think_parser.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_mcp_search_env.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_message_utils.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_message_utils_multimodal.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_multiturn_env.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_nemorl_client.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_openai_chat_completions_token_client.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_openai_responses_client.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_opencode_harbor.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_opencode_rlm_env.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_openenv_client.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_parser.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_path_utils.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_per_turn_timing.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_pricing_utils.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_prime_plugin.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_renderer_client.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_renderer_e2e.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_rlm_composable_env.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_rlm_env.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_rubric.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_rubric_group.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_sandbox_env.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_sandbox_mixin.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_setup_script.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_singleturn_env.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_stateful_tool_env.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_think_parser.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_tool_env.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_tool_utils.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_trajectory_processing.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_tui_info_formatting.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_types.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_v1_bfcl.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_v1_empty_completions.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_v1_endpoint_protocols.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_v1_example_counts.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_v1_group_reward_env.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_v1_harbor_cli.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_v1_mini_swe_agent.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_v1_nemo_gym_harness.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_v1_openenv_taskset.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_v1_openreward_taskset.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_v1_rlm_swe.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_v1_runtime_lifecycle.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_v1_scoring_functions.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_v1_taskset_bindings.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_v1_textarena_taskset.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_wiki_search_v1.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_wordle_env.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_wordle_v1_env.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/tests/test_xml_parser.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/AGENTS.md +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/cli/__init__.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/cli/commands/__init__.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/cli/commands/build.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/cli/commands/eval.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/cli/commands/gepa.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/cli/commands/init.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/cli/commands/install.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/cli/commands/setup.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/cli/plugins/__init__.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/cli/plugins/prime.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/cli/tui.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/clients/__init__.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/clients/anthropic_messages_client.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/clients/client.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/clients/nemorl_chat_completions_client.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/clients/openai_chat_completions_client.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/clients/openai_chat_completions_token_client.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/clients/openai_completions_client.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/clients/openai_responses_client.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/clients/renderer_client.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/decorators.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/AGENTS.md +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/__init__.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/env_group.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/environment.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/README.md +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/__init__.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/cli_agent_env.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/README.md +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/__init__.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/_filter.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/composable_env.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/harness.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/harnesses/__init__.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/harnesses/mini_swe_agent.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/harnesses/opencode.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/harnesses/prompt.txt +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/harnesses/rlm.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/swe_debug_env.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/task.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/__init__.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/cp/__init__.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/cp/cp_task.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/cp/test_utils.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/harbor/__init__.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/harbor/harbor.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/lean/__init__.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/lean/lean_task.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/math/__init__.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/math/math_task.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/swe/__init__.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/swe/_test_patch.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/swe/create_fix_patch.sh +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/swe/log_parser.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/swe/multi_swe.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/swe/openswe.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/swe/r2e_gym.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/swe/swe_bench.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/swe/swe_lego.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/swe/swe_rebench_v2.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/swe/swe_rebench_v2_log_parsers.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/swe/swe_smith.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/composable/tasksets/swe/swe_tasksets.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/gym_env.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/harbor_env/__init__.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/harbor_env/env.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/harbor_env/mcp.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/mcp_env.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/opencode_env.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/opencode_qa_env.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/opencode_rlm_env.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/rlm_env.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/sandbox_mixin.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/utils/__init__.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/utils/file_locks.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/experimental/utils/git_checkout_cache.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/integrations/README.md +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/integrations/__init__.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/integrations/browser_env/README.md +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/integrations/browser_env/__init__.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/integrations/browser_env/browser_env.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/integrations/browser_env/modes/__init__.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/integrations/browser_env/modes/base.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/integrations/browser_env/modes/cua_mode.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/integrations/browser_env/modes/dom_mode.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/integrations/openenv_env.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/integrations/reasoninggym_env.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/integrations/textarena_env.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/multiturn_env.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/python_env.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/sandbox_env.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/singleturn_env.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/stateful_tool_env.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/envs/tool_env.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/errors.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/gepa/__init__.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/gepa/adapter.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/gepa/config.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/gepa/display.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/gepa/gepa_utils.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/parsers/__init__.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/parsers/maybe_think_parser.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/parsers/parser.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/parsers/think_parser.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/parsers/xml_parser.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/rl/README.md +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/rl/__init__.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/rl/inference/__init__.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/rl/inference/client.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/rl/inference/server.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/rl/trainer/__init__.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/rl/trainer/config.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/rl/trainer/orchestrator.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/rl/trainer/trainer.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/rl/trainer/utils.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/rubrics/__init__.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/rubrics/experimental/hybrid_math_rubric.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/rubrics/judge_rubric.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/rubrics/math_rubric.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/rubrics/rubric.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/rubrics/rubric_group.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/scripts/__init__.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/scripts/build.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/scripts/eval.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/scripts/gepa.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/scripts/install.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/scripts/rl.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/scripts/setup.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/scripts/train.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/scripts/tui.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/scripts/vllm.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/serve/__init__.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/serve/client/env_client.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/serve/client/zmq_env_client.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/serve/server/__init__.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/serve/server/env_router.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/serve/server/env_server.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/serve/server/env_worker.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/serve/server/zmq_env_server.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/serve/types.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/types.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/utils/__init__.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/utils/async_utils.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/utils/client_utils.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/utils/config_utils.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/utils/data_utils.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/utils/display_utils.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/utils/env_config_utils.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/utils/env_utils.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/utils/error_utils.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/utils/eval_display.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/utils/eval_utils.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/utils/heartbeat.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/utils/import_utils.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/utils/install_utils.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/utils/interception_utils.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/utils/logging_utils.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/utils/message_utils.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/utils/path_utils.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/utils/pricing_utils.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/utils/process_utils.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/utils/response_utils.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/utils/serve_utils.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/utils/thread_utils.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/utils/threaded_sandbox_client.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/utils/tool_utils.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/utils/usage_utils.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/utils/version_utils.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/v1/ENVIRONMENT_BEST_PRACTICES.md +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/v1/README.md +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/v1/RE_MIGRATION.md +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/v1/artifact.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/v1/config.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/v1/env.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/v1/model.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/v1/program.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/v1/runtime.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/v1/runtime_handles.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/v1/sandbox.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/v1/state.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/v1/task.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/v1/toolset.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/v1/user.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/v1/utils/__init__.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/v1/utils/binding_utils.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/v1/utils/config_callable_utils.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/v1/utils/config_utils.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/v1/utils/endpoint_utils.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/v1/utils/json_utils.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/v1/utils/judge_utils.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/v1/utils/lifecycle_utils.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/v1/utils/mcp_proxy_utils.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/v1/utils/mcp_utils.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/v1/utils/object_utils.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/v1/utils/program_utils.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/v1/utils/runtime_owner_utils.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/v1/utils/runtime_registry.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/v1/utils/sandbox_program_utils.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/v1/utils/sandbox_python_utils.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/v1/utils/sandbox_utils.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/v1/utils/scoring_utils.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/v1/utils/serialization_utils.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/v1/utils/task_freeze_utils.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/v1/utils/tool_utils.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/v1/utils/toolset_utils.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/v1/utils/trajectory_utils.py +0 -0
- {verifiers-0.1.15.dev15 → verifiers-0.1.15.dev17}/verifiers/v1/utils/usage_utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: verifiers
|
|
3
|
-
Version: 0.1.15.
|
|
3
|
+
Version: 0.1.15.dev17
|
|
4
4
|
Summary: Verifiers: Environments for LLM Reinforcement Learning
|
|
5
5
|
Project-URL: Homepage, https://github.com/primeintellect-ai/verifiers
|
|
6
6
|
Project-URL: Documentation, https://github.com/primeintellect-ai/verifiers
|
|
@@ -189,6 +189,14 @@ def test_load_gepa_toml_config_requires_env_table(tmp_path: Path):
|
|
|
189
189
|
load_gepa_toml_config(config_path)
|
|
190
190
|
|
|
191
191
|
|
|
192
|
+
def test_repo_gepa_example_configs_are_valid():
|
|
193
|
+
config_paths = sorted(Path("configs/gepa").glob("*.toml"))
|
|
194
|
+
assert config_paths
|
|
195
|
+
for config_path in config_paths:
|
|
196
|
+
loaded = load_gepa_toml_config(config_path)
|
|
197
|
+
assert loaded["envs"], f"{config_path} should contain at least one [[env]]"
|
|
198
|
+
|
|
199
|
+
|
|
192
200
|
def test_resolve_gepa_config_args_supports_plain_env_id():
|
|
193
201
|
args = argparse.Namespace(env_id_or_config="primeintellect/wordle")
|
|
194
202
|
|
|
@@ -1,6 +1,4 @@
|
|
|
1
1
|
from pathlib import Path
|
|
2
|
-
|
|
3
|
-
import pytest
|
|
4
2
|
import verifiers as vf
|
|
5
3
|
from verifiers.scripts.init import init_environment
|
|
6
4
|
|
|
@@ -21,20 +19,23 @@ def test_init_default_writes_v0_stub(tmp_path: Path) -> None:
|
|
|
21
19
|
assert "EnvTaskset" not in content
|
|
22
20
|
|
|
23
21
|
|
|
24
|
-
def
|
|
22
|
+
def test_init_v1_writes_taskset_template(tmp_path: Path) -> None:
|
|
25
23
|
init_environment("bar", path=str(tmp_path), v1=True)
|
|
26
24
|
content = read_env_file(tmp_path, "bar")
|
|
27
25
|
|
|
28
26
|
assert "class BarTasksetConfig(vf.TasksetConfig):" in content
|
|
29
27
|
assert "class BarTaskset(vf.Taskset[BarTasksetConfig]):" in content
|
|
28
|
+
assert 'system_prompt: vf.SystemPrompt = "Answer exactly."' in content
|
|
29
|
+
assert '"""Taskset implementation for bar.' in content
|
|
30
|
+
assert 'def load_tasks(self, split: vf.TaskSplit = "train") -> vf.Tasks:' in content
|
|
30
31
|
assert (
|
|
31
|
-
'
|
|
32
|
+
'"""Return serializable task records as a list, generator, or Dataset."""'
|
|
32
33
|
in content
|
|
33
34
|
)
|
|
34
|
-
assert 'def load_tasks(self, split: vf.TaskSplit = "train") -> vf.Tasks:' in content
|
|
35
35
|
assert "def load_system_prompt" not in content
|
|
36
36
|
assert "async def correct_answer(self, task: vf.Task, state: vf.State)" in content
|
|
37
37
|
assert "def load_taskset(config: BarTasksetConfig) -> BarTaskset:" in content
|
|
38
|
+
assert '"""Typed taskset loader used by vf.load_taskset."""' in content
|
|
38
39
|
assert "return BarTaskset(config=config)" in content
|
|
39
40
|
assert "taskset=vf.load_taskset(config=config.taskset)" in content
|
|
40
41
|
assert '"""Loader pattern for all Taskset/Harness environments."""' in content
|
|
@@ -53,8 +54,10 @@ def test_init_v1_template_loads_with_vf_load_environment(
|
|
|
53
54
|
|
|
54
55
|
env = vf.load_environment("loadable-v1")
|
|
55
56
|
|
|
56
|
-
|
|
57
|
-
|
|
57
|
+
dataset = env.get_dataset()
|
|
58
|
+
|
|
59
|
+
assert len(dataset) == 1
|
|
60
|
+
assert dataset[0]["answer"] == "cba"
|
|
58
61
|
|
|
59
62
|
|
|
60
63
|
def test_init_v1_with_harness_writes_harness_stub(tmp_path: Path) -> None:
|
|
@@ -258,6 +258,13 @@ class TestSavingResults:
|
|
|
258
258
|
assert result[0].get("foo") == "bar" # custom field from make_state fixture
|
|
259
259
|
assert result[0]["reward"] == 1.0
|
|
260
260
|
|
|
261
|
+
def test_states_to_outputs_requires_example_id(self, make_state):
|
|
262
|
+
state = make_state()
|
|
263
|
+
del state["example_id"]
|
|
264
|
+
|
|
265
|
+
with pytest.raises(KeyError):
|
|
266
|
+
states_to_outputs([state], state_columns=[])
|
|
267
|
+
|
|
261
268
|
def test_states_to_outputs_completion_keeps_messages(self, make_state):
|
|
262
269
|
states = [
|
|
263
270
|
make_state(
|
|
@@ -647,6 +654,22 @@ class TestBuilderPassAtK:
|
|
|
647
654
|
# 1 of 4 correct at threshold=0.7: pass^1 = C(1,1)/C(4,1) = 0.25
|
|
648
655
|
assert metadata["pass_all_k"]["1"] == pytest.approx(0.25)
|
|
649
656
|
|
|
657
|
+
def test_builder_requires_example_id(self):
|
|
658
|
+
builder = GenerateOutputsBuilder(
|
|
659
|
+
env_id="test-env",
|
|
660
|
+
env_args={},
|
|
661
|
+
model="test-model",
|
|
662
|
+
client=ClientConfig(api_base_url="http://localhost:8000/v1"),
|
|
663
|
+
num_examples=1,
|
|
664
|
+
rollouts_per_example=1,
|
|
665
|
+
state_columns=[],
|
|
666
|
+
sampling_args={},
|
|
667
|
+
results_path=Path("/tmp/test-results"),
|
|
668
|
+
)
|
|
669
|
+
|
|
670
|
+
with pytest.raises(KeyError):
|
|
671
|
+
builder.add_outputs([{"reward": 1.0, "metrics": {}}])
|
|
672
|
+
|
|
650
673
|
|
|
651
674
|
class TestMetricProtocol:
|
|
652
675
|
def test_all_metrics_satisfy_protocol(self):
|
|
@@ -2310,16 +2310,14 @@ def test_taskset_subclasses_inherit_registered_config_type() -> None:
|
|
|
2310
2310
|
|
|
2311
2311
|
def test_taskset_class_loader_owns_split_loading() -> None:
|
|
2312
2312
|
class LoaderTasksetConfig(TasksetConfig):
|
|
2313
|
-
system_prompt: vf.SystemPrompt
|
|
2313
|
+
system_prompt: vf.SystemPrompt = "class prompt"
|
|
2314
2314
|
|
|
2315
2315
|
class LoaderTaskset(Taskset[LoaderTasksetConfig]):
|
|
2316
2316
|
def load_tasks(self, split: vf.TaskSplit = "train") -> vf.Tasks:
|
|
2317
2317
|
answer = "class eval" if split == "eval" else "class tasks"
|
|
2318
2318
|
return [{"prompt": [], "answer": answer}]
|
|
2319
2319
|
|
|
2320
|
-
def load_system_prompt(
|
|
2321
|
-
self, config: LoaderTasksetConfig
|
|
2322
|
-
) -> vf.SystemPrompt | None:
|
|
2320
|
+
def load_system_prompt(self, config: LoaderTasksetConfig) -> vf.SystemPrompt:
|
|
2323
2321
|
return config.system_prompt
|
|
2324
2322
|
|
|
2325
2323
|
defaulted = LoaderTaskset(config=LoaderTasksetConfig())
|
|
@@ -2341,6 +2339,25 @@ def test_taskset_class_loader_owns_split_loading() -> None:
|
|
|
2341
2339
|
assert disabled_prompt.system_prompt == []
|
|
2342
2340
|
|
|
2343
2341
|
|
|
2342
|
+
def test_system_prompt_alias_accepts_config_data(tmp_path) -> None:
|
|
2343
|
+
prompt_path = tmp_path / "system_prompt.txt"
|
|
2344
|
+
prompt_path.write_text("alias path system prompt", encoding="utf-8")
|
|
2345
|
+
|
|
2346
|
+
class PromptTasksetConfig(TasksetConfig):
|
|
2347
|
+
system_prompt: vf.SystemPrompt = None
|
|
2348
|
+
|
|
2349
|
+
config = PromptTasksetConfig.model_validate(
|
|
2350
|
+
{"system_prompt": {"path": str(prompt_path)}}
|
|
2351
|
+
)
|
|
2352
|
+
assert isinstance(config.system_prompt, vf.SystemPromptConfig)
|
|
2353
|
+
|
|
2354
|
+
taskset = Taskset(config=config)
|
|
2355
|
+
|
|
2356
|
+
assert taskset.system_prompt == [
|
|
2357
|
+
{"role": "system", "content": "alias path system prompt"}
|
|
2358
|
+
]
|
|
2359
|
+
|
|
2360
|
+
|
|
2344
2361
|
def test_taskset_load_tasks_can_return_empty_dataset() -> None:
|
|
2345
2362
|
class LocalTasksetConfig(TasksetConfig):
|
|
2346
2363
|
enabled: bool = True
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import json
|
|
2
|
+
|
|
3
|
+
from datasets import Dataset
|
|
4
|
+
|
|
5
|
+
from verifiers.v1.utils.taskset_utils import dataset_from_result
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def task_payload(row: dict) -> dict:
|
|
9
|
+
return json.loads(row["info"]["task"])
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def test_dataset_from_result_assigns_example_id_to_iterable_records():
|
|
13
|
+
dataset = dataset_from_result(
|
|
14
|
+
[
|
|
15
|
+
{"question": "Reverse abc.", "answer": "cba"},
|
|
16
|
+
{"question": "Reverse xyz.", "answer": "zyx"},
|
|
17
|
+
],
|
|
18
|
+
"ReverseTextTaskset",
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
rows = list(dataset)
|
|
22
|
+
payloads = [task_payload(row) for row in rows]
|
|
23
|
+
|
|
24
|
+
assert [row["example_id"] for row in rows] == [0, 1]
|
|
25
|
+
assert [payload["example_id"] for payload in payloads] == [0, 1]
|
|
26
|
+
assert all(len(payload["task_id"]) == 32 for payload in payloads)
|
|
27
|
+
assert {payload["task_id"] for payload in payloads}.isdisjoint({"0", "1"})
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def test_dataset_from_result_overwrites_existing_example_id_column():
|
|
31
|
+
raw_dataset = Dataset.from_list(
|
|
32
|
+
[
|
|
33
|
+
{"question": "Reverse abc.", "answer": "cba", "example_id": None},
|
|
34
|
+
{"question": "Reverse xyz.", "answer": "zyx", "example_id": 99},
|
|
35
|
+
]
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
dataset = dataset_from_result(raw_dataset, "ReverseTextTaskset")
|
|
39
|
+
|
|
40
|
+
rows = list(dataset)
|
|
41
|
+
payloads = [task_payload(row) for row in rows]
|
|
42
|
+
|
|
43
|
+
assert [row["example_id"] for row in rows] == [0, 1]
|
|
44
|
+
assert [payload["example_id"] for payload in payloads] == [0, 1]
|
|
45
|
+
assert all(len(payload["task_id"]) == 32 for payload in payloads)
|
|
46
|
+
assert {payload["task_id"] for payload in payloads}.isdisjoint({"0", "1", "99"})
|
|
@@ -155,67 +155,72 @@ def load_environment(**kwargs) -> vf.Environment:
|
|
|
155
155
|
raise NotImplementedError("Implement load_environment here.")
|
|
156
156
|
"""
|
|
157
157
|
|
|
158
|
-
|
|
158
|
+
V1_TASKSET_TEMPLATE = """\
|
|
159
159
|
import verifiers as vf
|
|
160
160
|
|
|
161
161
|
|
|
162
162
|
class {taskset_config_name}(vf.TasksetConfig):
|
|
163
|
-
|
|
163
|
+
\"\"\"User-facing task settings for {env_id_dash}.\"\"\"
|
|
164
|
+
|
|
165
|
+
system_prompt: vf.SystemPrompt = "Answer exactly."
|
|
164
166
|
|
|
165
167
|
|
|
166
168
|
class {taskset_name}(vf.Taskset[{taskset_config_name}]):
|
|
169
|
+
\"\"\"Taskset implementation for {env_id_dash}.
|
|
170
|
+
|
|
171
|
+
Add task loading, task-owned toolsets, user behavior, lifecycle hooks,
|
|
172
|
+
metrics, rewards, and advantages on this class.
|
|
173
|
+
\"\"\"
|
|
174
|
+
|
|
167
175
|
def load_tasks(self, split: vf.TaskSplit = "train") -> vf.Tasks:
|
|
168
|
-
|
|
176
|
+
\"\"\"Return serializable task records as a list, generator, or Dataset.\"\"\"
|
|
177
|
+
if split == "eval":
|
|
178
|
+
return []
|
|
179
|
+
return [
|
|
180
|
+
{
|
|
181
|
+
"prompt": [{"role": "user", "content": "Reverse abc."}],
|
|
182
|
+
"answer": "cba",
|
|
183
|
+
"max_turns": 1,
|
|
184
|
+
}
|
|
185
|
+
]
|
|
169
186
|
|
|
170
187
|
@vf.reward(weight=1.0)
|
|
171
188
|
async def correct_answer(self, task: vf.Task, state: vf.State) -> float:
|
|
172
|
-
|
|
189
|
+
\"\"\"Score the final assistant response for one rollout.\"\"\"
|
|
190
|
+
messages = vf.get_messages(state.get("completion") or [], role="assistant")
|
|
191
|
+
if not messages:
|
|
192
|
+
return 0.0
|
|
193
|
+
response = str(messages[-1].content or "").strip()
|
|
194
|
+
return float(response == task["answer"])
|
|
173
195
|
|
|
174
196
|
|
|
175
197
|
def load_taskset(config: {taskset_config_name}) -> {taskset_name}:
|
|
198
|
+
\"\"\"Typed taskset loader used by vf.load_taskset.\"\"\"
|
|
176
199
|
return {taskset_name}(config=config)
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
def load_environment(config: vf.EnvConfig) -> vf.Env:
|
|
180
|
-
\"\"\"Loader pattern for all Taskset/Harness environments.\"\"\"
|
|
181
|
-
return vf.Env(
|
|
182
|
-
taskset=vf.load_taskset(config=config.taskset),
|
|
183
|
-
harness=vf.load_harness(config=config.harness),
|
|
184
|
-
)
|
|
185
200
|
"""
|
|
186
201
|
|
|
187
|
-
V1_HARNESS_ENVIRONMENT_TEMPLATE = """\
|
|
188
|
-
import verifiers as vf
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
class {taskset_config_name}(vf.TasksetConfig):
|
|
192
|
-
system_prompt: vf.SystemPrompt = "Replace this with the system prompt for {env_id_dash}."
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
class {taskset_name}(vf.Taskset[{taskset_config_name}]):
|
|
196
|
-
def load_tasks(self, split: vf.TaskSplit = "train") -> vf.Tasks:
|
|
197
|
-
raise NotImplementedError("Load tasks for {env_id_dash}.")
|
|
198
|
-
|
|
199
|
-
@vf.reward(weight=1.0)
|
|
200
|
-
async def correct_answer(self, task: vf.Task, state: vf.State) -> float:
|
|
201
|
-
raise NotImplementedError("Score a completed rollout for {env_id_dash}.")
|
|
202
202
|
|
|
203
|
+
V1_HARNESS_TEMPLATE = """\
|
|
203
204
|
|
|
204
205
|
class {harness_config_name}(vf.HarnessConfig):
|
|
205
|
-
|
|
206
|
+
\"\"\"Execution settings for {env_id_dash}.\"\"\"
|
|
206
207
|
|
|
207
208
|
|
|
208
209
|
class {harness_name}(vf.Harness[{harness_config_name}]):
|
|
209
|
-
|
|
210
|
+
\"\"\"Reusable execution behavior for {env_id_dash}.
|
|
210
211
|
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
212
|
+
Add harness-owned program, sandbox, endpoint, model, toolset, or lifecycle
|
|
213
|
+
behavior here when this environment owns a custom execution mechanism.
|
|
214
|
+
\"\"\"
|
|
214
215
|
|
|
215
216
|
|
|
216
217
|
def load_harness(config: {harness_config_name}) -> {harness_name}:
|
|
218
|
+
\"\"\"Typed harness loader used by vf.load_harness.\"\"\"
|
|
217
219
|
return {harness_name}(config=config)
|
|
220
|
+
"""
|
|
221
|
+
|
|
218
222
|
|
|
223
|
+
V1_ENV_LOADER_TEMPLATE = """\
|
|
219
224
|
|
|
220
225
|
def load_environment(config: vf.EnvConfig) -> vf.Env:
|
|
221
226
|
\"\"\"Loader pattern for all Taskset/Harness environments.\"\"\"
|
|
@@ -225,6 +230,11 @@ def load_environment(config: vf.EnvConfig) -> vf.Env:
|
|
|
225
230
|
)
|
|
226
231
|
"""
|
|
227
232
|
|
|
233
|
+
V1_ENVIRONMENT_TEMPLATE = V1_TASKSET_TEMPLATE + V1_ENV_LOADER_TEMPLATE
|
|
234
|
+
V1_HARNESS_ENVIRONMENT_TEMPLATE = (
|
|
235
|
+
V1_TASKSET_TEMPLATE + V1_HARNESS_TEMPLATE + V1_ENV_LOADER_TEMPLATE
|
|
236
|
+
)
|
|
237
|
+
|
|
228
238
|
OPENENV_ENVIRONMENT_TEMPLATE = """\
|
|
229
239
|
import verifiers as vf
|
|
230
240
|
from tasksets import OpenEnvTaskset, OpenEnvTasksetConfig
|
|
@@ -162,10 +162,12 @@ class PassAtKMetric:
|
|
|
162
162
|
self.reset()
|
|
163
163
|
|
|
164
164
|
def add_output(self, output: RolloutOutput) -> None:
|
|
165
|
+
example_id = output["example_id"]
|
|
166
|
+
if example_id is None:
|
|
167
|
+
raise ValueError("output['example_id'] is required.")
|
|
165
168
|
if not self._k_values:
|
|
166
169
|
return
|
|
167
170
|
|
|
168
|
-
example_id = output.get("example_id", 0)
|
|
169
171
|
self._example_counts[example_id] += 1
|
|
170
172
|
if output.get("reward", 0.0) >= self.threshold:
|
|
171
173
|
self._example_correct[example_id] += 1
|
|
@@ -218,8 +218,12 @@ def state_to_output(
|
|
|
218
218
|
else:
|
|
219
219
|
raise TypeError("state['timing'] must be a RolloutTiming or mapping.")
|
|
220
220
|
|
|
221
|
+
example_id = state["example_id"]
|
|
222
|
+
if example_id is None:
|
|
223
|
+
raise ValueError("state['example_id'] is required.")
|
|
224
|
+
|
|
221
225
|
output = RolloutOutput(
|
|
222
|
-
example_id=
|
|
226
|
+
example_id=example_id,
|
|
223
227
|
prompt=state.get("prompt"),
|
|
224
228
|
completion=state.get("completion"),
|
|
225
229
|
answer=state.get("answer", ""),
|
|
@@ -671,9 +675,16 @@ class GenerateOutputsBuilder:
|
|
|
671
675
|
def build_outputs(self, sort_by_example_id: bool = False) -> list[RolloutOutput]:
|
|
672
676
|
"""Return (sorted) accumulated outputs"""
|
|
673
677
|
if sort_by_example_id:
|
|
674
|
-
return sorted(self.outputs, key=
|
|
678
|
+
return sorted(self.outputs, key=self.output_example_id)
|
|
675
679
|
return self.outputs
|
|
676
680
|
|
|
681
|
+
@staticmethod
|
|
682
|
+
def output_example_id(output: RolloutOutput) -> int:
|
|
683
|
+
example_id = output["example_id"]
|
|
684
|
+
if example_id is None:
|
|
685
|
+
raise ValueError("output['example_id'] is required.")
|
|
686
|
+
return example_id
|
|
687
|
+
|
|
677
688
|
def build(self, sort_by_example_id: bool = False) -> GenerateOutputs:
|
|
678
689
|
"""Build GenerateOutputs from accumulated outputs."""
|
|
679
690
|
return GenerateOutputs(
|
|
@@ -59,14 +59,13 @@ from .toolset import (
|
|
|
59
59
|
)
|
|
60
60
|
from .utils.endpoint_utils import Endpoint
|
|
61
61
|
from .utils.binding_utils import BindingsConfig, ObjectsConfig
|
|
62
|
-
from .utils.prompt_utils import SystemPromptConfig, SystemPromptStrategy
|
|
62
|
+
from .utils.prompt_utils import SystemPrompt, SystemPromptConfig, SystemPromptStrategy
|
|
63
63
|
from .types import (
|
|
64
64
|
ConfigData,
|
|
65
65
|
Handler,
|
|
66
66
|
JsonData,
|
|
67
67
|
Objects,
|
|
68
68
|
PromptInput,
|
|
69
|
-
SystemPrompt,
|
|
70
69
|
TaskSplit,
|
|
71
70
|
Tasks,
|
|
72
71
|
)
|
|
@@ -72,8 +72,8 @@ from .utils.sandbox_program_utils import (
|
|
|
72
72
|
run_sandbox_python_program,
|
|
73
73
|
)
|
|
74
74
|
from .utils.prompt_utils import (
|
|
75
|
+
SystemPrompt,
|
|
75
76
|
SystemPromptStrategy,
|
|
76
|
-
SystemPromptConfig,
|
|
77
77
|
normalize_prompt,
|
|
78
78
|
normalize_system_prompt,
|
|
79
79
|
resolve_system_prompt,
|
|
@@ -88,7 +88,6 @@ from .types import (
|
|
|
88
88
|
ConfigData,
|
|
89
89
|
JsonData,
|
|
90
90
|
Objects,
|
|
91
|
-
PromptInput,
|
|
92
91
|
)
|
|
93
92
|
|
|
94
93
|
if TYPE_CHECKING:
|
|
@@ -106,7 +105,7 @@ class HarnessConfig(LifecycleConfig):
|
|
|
106
105
|
)
|
|
107
106
|
program: ProgramConfig = ProgramConfig()
|
|
108
107
|
model: ModelConfig = ModelConfig()
|
|
109
|
-
system_prompt:
|
|
108
|
+
system_prompt: SystemPrompt = None
|
|
110
109
|
system_prompt_strategy: SystemPromptStrategy = "HT"
|
|
111
110
|
sandbox: SandboxConfig | None = None
|
|
112
111
|
user: UserConfig | None = None
|
|
@@ -217,9 +216,7 @@ class Harness(RuntimeOwnerMixin[ConfigT], Generic[ConfigT]):
|
|
|
217
216
|
self.endpoint = self.load_endpoint()
|
|
218
217
|
self.program = self.compile_program(self.program_config)
|
|
219
218
|
|
|
220
|
-
def load_system_prompt(
|
|
221
|
-
self, config: ConfigT
|
|
222
|
-
) -> PromptInput | SystemPromptConfig | None:
|
|
219
|
+
def load_system_prompt(self, config: ConfigT) -> SystemPrompt:
|
|
223
220
|
return config.system_prompt
|
|
224
221
|
|
|
225
222
|
def load_sandbox(self, config: SandboxConfig | None) -> SandboxConfig | None:
|
|
@@ -18,7 +18,7 @@ from .utils.binding_utils import (
|
|
|
18
18
|
BindingsConfig,
|
|
19
19
|
ObjectsConfig,
|
|
20
20
|
)
|
|
21
|
-
from .utils.prompt_utils import
|
|
21
|
+
from .utils.prompt_utils import SystemPrompt, normalize_system_prompt
|
|
22
22
|
from .utils.config_utils import (
|
|
23
23
|
coerce_config,
|
|
24
24
|
config_ref_context,
|
|
@@ -36,7 +36,6 @@ from .utils.taskset_utils import (
|
|
|
36
36
|
from .types import (
|
|
37
37
|
JsonData,
|
|
38
38
|
Objects,
|
|
39
|
-
PromptInput,
|
|
40
39
|
TaskSplit,
|
|
41
40
|
Tasks,
|
|
42
41
|
)
|
|
@@ -48,7 +47,7 @@ class TasksetConfig(LifecycleConfig):
|
|
|
48
47
|
default=None,
|
|
49
48
|
validation_alias=AliasChoices("taskset_id", "id"),
|
|
50
49
|
)
|
|
51
|
-
system_prompt:
|
|
50
|
+
system_prompt: SystemPrompt = None
|
|
52
51
|
user: UserConfig | None = None
|
|
53
52
|
bindings: BindingsConfig = BindingsConfig()
|
|
54
53
|
objects: ObjectsConfig = ObjectsConfig()
|
|
@@ -152,7 +151,5 @@ class Taskset(RuntimeOwnerMixin[ConfigT], Generic[ConfigT]):
|
|
|
152
151
|
def __len__(self) -> int:
|
|
153
152
|
return len(self.get_dataset())
|
|
154
153
|
|
|
155
|
-
def load_system_prompt(
|
|
156
|
-
self, config: ConfigT
|
|
157
|
-
) -> PromptInput | SystemPromptConfig | None:
|
|
154
|
+
def load_system_prompt(self, config: ConfigT) -> SystemPrompt:
|
|
158
155
|
return config.system_prompt
|
|
@@ -41,7 +41,6 @@ Tasks: TypeAlias = Dataset | Iterable[JsonData] | Iterable["Task"]
|
|
|
41
41
|
|
|
42
42
|
PromptMessage: TypeAlias = Message | JsonData
|
|
43
43
|
PromptInput: TypeAlias = str | Sequence[PromptMessage]
|
|
44
|
-
SystemPrompt: TypeAlias = PromptInput
|
|
45
44
|
|
|
46
45
|
ModelClient: TypeAlias = Client | ClientConfig
|
|
47
46
|
RuntimeObject: TypeAlias = object
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import importlib.util
|
|
2
2
|
from dataclasses import dataclass
|
|
3
3
|
from pathlib import Path
|
|
4
|
-
from typing import TYPE_CHECKING, Literal, cast
|
|
4
|
+
from typing import TYPE_CHECKING, Literal, TypeAlias, cast
|
|
5
5
|
|
|
6
6
|
from pydantic import model_validator
|
|
7
7
|
from typing_extensions import Self
|
|
@@ -9,7 +9,7 @@ from verifiers.types import Messages, SystemMessage
|
|
|
9
9
|
from verifiers.utils.message_utils import normalize_messages
|
|
10
10
|
|
|
11
11
|
from ..config import Config
|
|
12
|
-
from ..types import JsonData, PromptInput
|
|
12
|
+
from ..types import JsonData, PromptInput
|
|
13
13
|
from .config_utils import current_config_ref_module
|
|
14
14
|
|
|
15
15
|
if TYPE_CHECKING:
|
|
@@ -64,13 +64,15 @@ class SystemPromptConfig(Config):
|
|
|
64
64
|
messages: list[JsonData] = []
|
|
65
65
|
|
|
66
66
|
@model_validator(mode="after")
|
|
67
|
-
def
|
|
68
|
-
|
|
67
|
+
def validate_one_input(self) -> Self:
|
|
68
|
+
inputs = [
|
|
69
69
|
self.path is not None,
|
|
70
70
|
bool(self.messages),
|
|
71
71
|
]
|
|
72
|
-
if sum(
|
|
73
|
-
raise ValueError(
|
|
72
|
+
if sum(inputs) != 1:
|
|
73
|
+
raise ValueError(
|
|
74
|
+
"SystemPromptConfig requires exactly one of path or messages."
|
|
75
|
+
)
|
|
74
76
|
return self
|
|
75
77
|
|
|
76
78
|
def load(self, field_name: str) -> PromptInput | None:
|
|
@@ -81,6 +83,9 @@ class SystemPromptConfig(Config):
|
|
|
81
83
|
return self.messages
|
|
82
84
|
|
|
83
85
|
|
|
86
|
+
SystemPrompt: TypeAlias = PromptInput | SystemPromptConfig | None
|
|
87
|
+
|
|
88
|
+
|
|
84
89
|
def normalize_prompt(
|
|
85
90
|
value: PromptInput | None, field_name: str = "prompt"
|
|
86
91
|
) -> list[JsonData]:
|
|
@@ -95,7 +100,7 @@ def normalize_prompt(
|
|
|
95
100
|
|
|
96
101
|
|
|
97
102
|
def normalize_system_prompt(
|
|
98
|
-
value: SystemPrompt
|
|
103
|
+
value: SystemPrompt,
|
|
99
104
|
field_name: str = "system_prompt",
|
|
100
105
|
) -> list[JsonData]:
|
|
101
106
|
value = resolve_system_prompt_input(value, field_name=field_name)
|
|
@@ -111,7 +116,7 @@ def normalize_system_prompt(
|
|
|
111
116
|
|
|
112
117
|
|
|
113
118
|
def resolve_system_prompt_input(
|
|
114
|
-
value:
|
|
119
|
+
value: SystemPrompt,
|
|
115
120
|
*,
|
|
116
121
|
field_name: str,
|
|
117
122
|
) -> PromptInput | None:
|
|
@@ -38,10 +38,8 @@ def prepare_task(task: Task, taskset_id: str) -> Task:
|
|
|
38
38
|
raise TypeError("v1 task loaders must return Task objects.")
|
|
39
39
|
prepared = Task(cast(JsonData, dict(task)))
|
|
40
40
|
prepared["taskset_id"] = taskset_id
|
|
41
|
-
if "task_id"
|
|
41
|
+
if prepared.get("task_id") is not None:
|
|
42
42
|
prepared["task_id"] = str(prepared["task_id"])
|
|
43
|
-
elif "example_id" in prepared:
|
|
44
|
-
prepared["task_id"] = str(prepared["example_id"])
|
|
45
43
|
else:
|
|
46
44
|
prepared["task_id"] = uuid.uuid4().hex
|
|
47
45
|
return prepared.freeze()
|
|
@@ -51,13 +49,13 @@ def dataset_record_from_task(
|
|
|
51
49
|
task: Task,
|
|
52
50
|
taskset_id: str,
|
|
53
51
|
index: int,
|
|
54
|
-
|
|
52
|
+
record: JsonData | None = None,
|
|
55
53
|
) -> JsonData:
|
|
56
54
|
data = Task(cast(JsonData, dict(task)))
|
|
57
|
-
data
|
|
55
|
+
data["example_id"] = index
|
|
58
56
|
normalized = prepare_task(data, taskset_id)
|
|
59
57
|
task_payload = dict(normalized)
|
|
60
|
-
dataset_record = deepcopy(dict(
|
|
58
|
+
dataset_record = deepcopy(dict(record or {}))
|
|
61
59
|
dataset_record["prompt"] = task_payload["prompt"]
|
|
62
60
|
dataset_record["example_id"] = task_payload["example_id"]
|
|
63
61
|
info = dataset_record.get("info")
|
|
@@ -82,9 +80,10 @@ def dataset_from_result(result: Tasks, taskset_id: str) -> Dataset:
|
|
|
82
80
|
if isinstance(result, Dataset):
|
|
83
81
|
records: list[JsonData] = []
|
|
84
82
|
for index, record in enumerate(result):
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
83
|
+
row = cast(JsonData, dict(record))
|
|
84
|
+
row["example_id"] = index
|
|
85
|
+
task = task_from_dataset_record(row, taskset_id)
|
|
86
|
+
records.append(dataset_record_from_task(task, taskset_id, index, row))
|
|
88
87
|
return Dataset.from_list(records)
|
|
89
88
|
tasks = tasks_from_result(result, taskset_id)
|
|
90
89
|
return Dataset.from_list(dataset_records_from_tasks(tasks, taskset_id))
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|