verifiers 0.1.15.dev8__tar.gz → 0.1.15.dev10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (326) hide show
  1. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/PKG-INFO +11 -6
  2. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/README.md +9 -5
  3. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/pyproject.toml +1 -0
  4. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_client_multimodal_types.py +25 -0
  5. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_envs.py +7 -3
  6. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_eval_cli.py +19 -0
  7. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_harbor_env_mcp.py +43 -89
  8. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_imports.py +20 -0
  9. verifiers-0.1.15.dev10/tests/test_init_script.py +80 -0
  10. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_lean_task.py +10 -8
  11. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_opencode_rlm_env.py +35 -44
  12. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_openenv_client.py +89 -31
  13. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_prime_plugin.py +5 -5
  14. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_renderer_client.py +32 -0
  15. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_rlm_env.py +1 -64
  16. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_tool_utils.py +11 -6
  17. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_v1_config_extension.py +273 -695
  18. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_v1_harbor_cli.py +5 -0
  19. verifiers-0.1.15.dev10/tests/test_v1_rlm_swe.py +780 -0
  20. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_v1_runtime_lifecycle.py +130 -73
  21. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_v1_taskset_bindings.py +84 -2
  22. verifiers-0.1.15.dev10/tests/test_v1_textarena_taskset.py +219 -0
  23. verifiers-0.1.15.dev10/tests/test_wordle_v1_env.py +118 -0
  24. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/__init__.py +15 -7
  25. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/cli/plugins/prime.py +1 -5
  26. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/clients/anthropic_messages_client.py +27 -44
  27. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/clients/client.py +12 -14
  28. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/clients/openai_chat_completions_client.py +1 -6
  29. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/clients/openai_chat_completions_token_client.py +14 -17
  30. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/clients/openai_responses_client.py +13 -18
  31. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/clients/renderer_client.py +30 -62
  32. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/env_group.py +0 -16
  33. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/environment.py +14 -27
  34. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/composable_env.py +13 -21
  35. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/harnesses/rlm.py +7 -8
  36. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/swe_debug_env.py +12 -19
  37. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/task.py +9 -18
  38. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/tasksets/lean/lean_task.py +5 -18
  39. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/tasksets/swe/log_parser.py +1 -10
  40. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/tasksets/swe/multi_swe.py +3 -7
  41. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/tasksets/swe/r2e_gym.py +2 -2
  42. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/tasksets/swe/swe_rebench_v2.py +24 -34
  43. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/tasksets/swe/swe_rebench_v2_log_parsers.py +34 -44
  44. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/gym_env.py +22 -19
  45. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/harbor_env/mcp.py +17 -28
  46. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/mcp_env.py +6 -13
  47. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/opencode_rlm_env.py +9 -16
  48. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/rlm_env.py +40 -62
  49. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/utils/git_checkout_cache.py +13 -31
  50. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/integrations/openenv_env.py +75 -126
  51. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/multiturn_env.py +1 -5
  52. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/sandbox_env.py +1 -5
  53. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/gepa/display.py +2 -2
  54. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/gepa/gepa_utils.py +6 -14
  55. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/rubrics/rubric.py +7 -33
  56. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/scripts/build.py +17 -29
  57. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/scripts/eval.py +3 -3
  58. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/scripts/init.py +91 -68
  59. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/serve/server/env_server.py +17 -0
  60. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/serve/server/env_worker.py +19 -4
  61. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/utils/async_utils.py +0 -8
  62. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/utils/client_utils.py +19 -38
  63. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/utils/data_utils.py +10 -69
  64. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/utils/display_utils.py +3 -29
  65. verifiers-0.1.15.dev10/verifiers/utils/env_utils.py +317 -0
  66. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/utils/error_utils.py +0 -10
  67. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/utils/eval_utils.py +21 -38
  68. verifiers-0.1.15.dev10/verifiers/utils/import_utils.py +11 -0
  69. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/utils/install_utils.py +10 -11
  70. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/utils/interception_utils.py +9 -11
  71. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/utils/logging_utils.py +11 -17
  72. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/utils/message_utils.py +9 -20
  73. verifiers-0.1.15.dev10/verifiers/utils/response_utils.py +102 -0
  74. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/utils/save_utils.py +13 -21
  75. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/utils/thread_utils.py +2 -27
  76. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/utils/threaded_sandbox_client.py +2 -2
  77. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/utils/tool_utils.py +1 -0
  78. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/v1/ENVIRONMENT_BEST_PRACTICES.md +82 -88
  79. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/v1/README.md +63 -68
  80. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/v1/RE_MIGRATION.md +23 -16
  81. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/v1/__init__.py +15 -1
  82. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/v1/config.py +6 -129
  83. verifiers-0.1.15.dev10/verifiers/v1/env.py +180 -0
  84. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/v1/harness.py +11 -13
  85. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/v1/packages/harnesses/command.py +18 -22
  86. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/v1/packages/harnesses/configs.py +1 -1
  87. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/v1/packages/harnesses/mini_swe_agent.py +3 -3
  88. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/v1/packages/harnesses/opencode.py +4 -4
  89. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/v1/packages/harnesses/pi.py +9 -13
  90. verifiers-0.1.15.dev10/verifiers/v1/packages/harnesses/rlm.py +601 -0
  91. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/v1/packages/harnesses/terminus_2.py +11 -16
  92. verifiers-0.1.15.dev10/verifiers/v1/packages/tasksets/__init__.py +17 -0
  93. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/v1/packages/tasksets/harbor.py +4 -2
  94. verifiers-0.1.15.dev10/verifiers/v1/packages/tasksets/textarena.py +153 -0
  95. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/v1/runtime.py +61 -42
  96. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/v1/taskset.py +12 -13
  97. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/v1/types.py +1 -1
  98. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/v1/utils/config_utils.py +2 -68
  99. verifiers-0.1.15.dev10/verifiers/v1/utils/object_utils.py +59 -0
  100. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/v1/utils/program_utils.py +2 -1
  101. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/v1/utils/runtime_owner_utils.py +1 -6
  102. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/v1/utils/sandbox_utils.py +2 -0
  103. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/v1/utils/scoring_utils.py +0 -13
  104. verifiers-0.1.15.dev8/tests/test_v1_rlm_swe.py +0 -390
  105. verifiers-0.1.15.dev8/verifiers/utils/env_utils.py +0 -195
  106. verifiers-0.1.15.dev8/verifiers/utils/import_utils.py +0 -16
  107. verifiers-0.1.15.dev8/verifiers/utils/response_utils.py +0 -94
  108. verifiers-0.1.15.dev8/verifiers/utils/tunnel_utils.py +0 -266
  109. verifiers-0.1.15.dev8/verifiers/v1/env.py +0 -351
  110. verifiers-0.1.15.dev8/verifiers/v1/packages/harnesses/rlm.py +0 -290
  111. verifiers-0.1.15.dev8/verifiers/v1/packages/tasksets/__init__.py +0 -3
  112. verifiers-0.1.15.dev8/verifiers/v1/utils/component_utils.py +0 -136
  113. verifiers-0.1.15.dev8/verifiers/v1/utils/object_utils.py +0 -52
  114. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/.gitignore +0 -0
  115. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/LICENSE +0 -0
  116. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/AGENTS.md +0 -0
  117. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/README.md +0 -0
  118. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/__init__.py +0 -0
  119. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/conftest.py +0 -0
  120. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_browser_env.py +0 -0
  121. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_build_script.py +0 -0
  122. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_cli_agent_env.py +0 -0
  123. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_client_auth_errors.py +0 -0
  124. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_client_config.py +0 -0
  125. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_composable_env.py +0 -0
  126. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_context_token_metrics.py +0 -0
  127. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_decorator_ranks.py +0 -0
  128. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_endpoint_registry.py +0 -0
  129. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_env_group.py +0 -0
  130. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_env_server.py +0 -0
  131. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_environment.py +0 -0
  132. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_environment_extra.py +0 -0
  133. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_error_chain.py +0 -0
  134. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_eval_display.py +0 -0
  135. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_eval_utils.py +0 -0
  136. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_gepa_cli.py +0 -0
  137. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_gepa_utils.py +0 -0
  138. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_gym_env.py +0 -0
  139. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_install_utils.py +0 -0
  140. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_interception_utils.py +0 -0
  141. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_langchain_deep_agents_wikispeedia.py +0 -0
  142. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_logging.py +0 -0
  143. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_math_rubric.py +0 -0
  144. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_maybe_think_parser.py +0 -0
  145. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_mcp_search_env.py +0 -0
  146. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_message_utils.py +0 -0
  147. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_message_utils_multimodal.py +0 -0
  148. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_multiturn_env.py +0 -0
  149. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_nemorl_client.py +0 -0
  150. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_openai_chat_completions_token_client.py +0 -0
  151. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_openai_responses_client.py +0 -0
  152. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_opencode_harbor.py +0 -0
  153. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_parser.py +0 -0
  154. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_path_utils.py +0 -0
  155. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_per_turn_timing.py +0 -0
  156. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_pricing_utils.py +0 -0
  157. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_renderer_e2e.py +0 -0
  158. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_rlm_composable_env.py +0 -0
  159. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_rubric.py +0 -0
  160. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_rubric_group.py +0 -0
  161. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_sandbox_env.py +0 -0
  162. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_sandbox_mixin.py +0 -0
  163. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_save_utils.py +0 -0
  164. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_setup_script.py +0 -0
  165. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_singleturn_env.py +0 -0
  166. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_stateful_tool_env.py +0 -0
  167. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_think_parser.py +0 -0
  168. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_tool_env.py +0 -0
  169. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_trajectory_processing.py +0 -0
  170. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_tui_info_formatting.py +0 -0
  171. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_types.py +0 -0
  172. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_v1_bfcl.py +0 -0
  173. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_v1_empty_completions.py +0 -0
  174. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_v1_endpoint_protocols.py +0 -0
  175. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_v1_example_counts.py +0 -0
  176. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_v1_group_reward_env.py +0 -0
  177. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_v1_mini_swe_agent.py +0 -0
  178. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_v1_scoring_functions.py +0 -0
  179. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_wiki_search_v1.py +0 -0
  180. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_wordle_env.py +0 -0
  181. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/tests/test_xml_parser.py +0 -0
  182. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/AGENTS.md +0 -0
  183. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/cli/__init__.py +0 -0
  184. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/cli/commands/__init__.py +0 -0
  185. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/cli/commands/build.py +0 -0
  186. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/cli/commands/eval.py +0 -0
  187. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/cli/commands/gepa.py +0 -0
  188. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/cli/commands/init.py +0 -0
  189. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/cli/commands/install.py +0 -0
  190. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/cli/commands/setup.py +0 -0
  191. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/cli/plugins/__init__.py +0 -0
  192. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/cli/tui.py +0 -0
  193. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/clients/__init__.py +0 -0
  194. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/clients/nemorl_chat_completions_client.py +0 -0
  195. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/clients/openai_completions_client.py +0 -0
  196. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/decorators.py +0 -0
  197. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/AGENTS.md +0 -0
  198. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/__init__.py +0 -0
  199. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/README.md +0 -0
  200. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/__init__.py +0 -0
  201. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/cli_agent_env.py +0 -0
  202. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/README.md +0 -0
  203. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/__init__.py +0 -0
  204. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/_filter.py +0 -0
  205. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/harness.py +0 -0
  206. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/harnesses/__init__.py +0 -0
  207. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/harnesses/mini_swe_agent.py +0 -0
  208. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/harnesses/opencode.py +0 -0
  209. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/harnesses/prompt.txt +0 -0
  210. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/tasksets/__init__.py +0 -0
  211. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/tasksets/cp/__init__.py +0 -0
  212. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/tasksets/cp/cp_task.py +0 -0
  213. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/tasksets/cp/test_utils.py +0 -0
  214. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/tasksets/harbor/__init__.py +0 -0
  215. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/tasksets/harbor/harbor.py +0 -0
  216. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/tasksets/lean/__init__.py +0 -0
  217. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/tasksets/math/__init__.py +0 -0
  218. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/tasksets/math/math_task.py +0 -0
  219. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/tasksets/swe/__init__.py +0 -0
  220. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/tasksets/swe/_test_patch.py +0 -0
  221. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/tasksets/swe/create_fix_patch.sh +0 -0
  222. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/tasksets/swe/openswe.py +0 -0
  223. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/tasksets/swe/swe_bench.py +0 -0
  224. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/tasksets/swe/swe_lego.py +0 -0
  225. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/tasksets/swe/swe_smith.py +0 -0
  226. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/composable/tasksets/swe/swe_tasksets.py +0 -0
  227. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/harbor_env/__init__.py +0 -0
  228. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/harbor_env/env.py +0 -0
  229. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/opencode_env.py +0 -0
  230. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/opencode_qa_env.py +0 -0
  231. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/sandbox_mixin.py +0 -0
  232. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/utils/__init__.py +0 -0
  233. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/experimental/utils/file_locks.py +0 -0
  234. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/integrations/README.md +0 -0
  235. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/integrations/__init__.py +0 -0
  236. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/integrations/browser_env/README.md +0 -0
  237. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/integrations/browser_env/__init__.py +0 -0
  238. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/integrations/browser_env/browser_env.py +0 -0
  239. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/integrations/browser_env/modes/__init__.py +0 -0
  240. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/integrations/browser_env/modes/base.py +0 -0
  241. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/integrations/browser_env/modes/cua_mode.py +0 -0
  242. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/integrations/browser_env/modes/dom_mode.py +0 -0
  243. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/integrations/reasoninggym_env.py +0 -0
  244. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/integrations/textarena_env.py +0 -0
  245. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/python_env.py +0 -0
  246. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/singleturn_env.py +0 -0
  247. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/stateful_tool_env.py +0 -0
  248. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/envs/tool_env.py +0 -0
  249. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/errors.py +0 -0
  250. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/gepa/__init__.py +0 -0
  251. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/gepa/adapter.py +0 -0
  252. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/gepa/config.py +0 -0
  253. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/parsers/__init__.py +0 -0
  254. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/parsers/maybe_think_parser.py +0 -0
  255. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/parsers/parser.py +0 -0
  256. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/parsers/think_parser.py +0 -0
  257. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/parsers/xml_parser.py +0 -0
  258. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/rl/README.md +0 -0
  259. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/rl/__init__.py +0 -0
  260. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/rl/inference/__init__.py +0 -0
  261. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/rl/inference/client.py +0 -0
  262. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/rl/inference/server.py +0 -0
  263. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/rl/trainer/__init__.py +0 -0
  264. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/rl/trainer/config.py +0 -0
  265. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/rl/trainer/orchestrator.py +0 -0
  266. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/rl/trainer/trainer.py +0 -0
  267. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/rl/trainer/utils.py +0 -0
  268. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/rubrics/__init__.py +0 -0
  269. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/rubrics/experimental/hybrid_math_rubric.py +0 -0
  270. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/rubrics/judge_rubric.py +0 -0
  271. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/rubrics/math_rubric.py +0 -0
  272. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/rubrics/rubric_group.py +0 -0
  273. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/scripts/__init__.py +0 -0
  274. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/scripts/gepa.py +0 -0
  275. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/scripts/install.py +0 -0
  276. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/scripts/rl.py +0 -0
  277. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/scripts/setup.py +0 -0
  278. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/scripts/train.py +0 -0
  279. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/scripts/tui.py +0 -0
  280. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/scripts/vllm.py +0 -0
  281. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/serve/__init__.py +0 -0
  282. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/serve/client/env_client.py +0 -0
  283. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/serve/client/zmq_env_client.py +0 -0
  284. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/serve/server/__init__.py +0 -0
  285. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/serve/server/env_router.py +0 -0
  286. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/serve/server/zmq_env_server.py +0 -0
  287. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/serve/types.py +0 -0
  288. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/types.py +0 -0
  289. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/utils/__init__.py +0 -0
  290. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/utils/config_utils.py +0 -0
  291. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/utils/env_config_utils.py +0 -0
  292. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/utils/eval_display.py +0 -0
  293. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/utils/heartbeat.py +0 -0
  294. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/utils/metric_utils.py +0 -0
  295. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/utils/path_utils.py +0 -0
  296. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/utils/pricing_utils.py +0 -0
  297. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/utils/process_utils.py +0 -0
  298. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/utils/serve_utils.py +0 -0
  299. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/utils/usage_utils.py +0 -0
  300. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/utils/version_utils.py +0 -0
  301. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/v1/packages/__init__.py +0 -0
  302. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/v1/packages/harnesses/__init__.py +0 -0
  303. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/v1/state.py +0 -0
  304. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/v1/task.py +0 -0
  305. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/v1/toolset.py +0 -0
  306. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/v1/user.py +0 -0
  307. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/v1/utils/__init__.py +0 -0
  308. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/v1/utils/artifact_utils.py +0 -0
  309. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/v1/utils/binding_utils.py +0 -0
  310. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/v1/utils/config_callable_utils.py +0 -0
  311. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/v1/utils/endpoint_utils.py +0 -0
  312. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/v1/utils/json_utils.py +0 -0
  313. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/v1/utils/judge_utils.py +0 -0
  314. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/v1/utils/lifecycle_utils.py +0 -0
  315. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/v1/utils/mcp_proxy_utils.py +0 -0
  316. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/v1/utils/mcp_utils.py +0 -0
  317. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/v1/utils/prompt_utils.py +0 -0
  318. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/v1/utils/runtime_registry.py +0 -0
  319. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/v1/utils/sandbox_program_utils.py +0 -0
  320. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/v1/utils/serialization_utils.py +0 -0
  321. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/v1/utils/task_freeze_utils.py +0 -0
  322. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/v1/utils/taskset_utils.py +0 -0
  323. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/v1/utils/timing_utils.py +0 -0
  324. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/v1/utils/tool_utils.py +0 -0
  325. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/v1/utils/trajectory_utils.py +0 -0
  326. {verifiers-0.1.15.dev8 → verifiers-0.1.15.dev10}/verifiers/v1/utils/usage_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: verifiers
3
- Version: 0.1.15.dev8
3
+ Version: 0.1.15.dev10
4
4
  Summary: Verifiers: Environments for LLM Reinforcement Learning
5
5
  Project-URL: Homepage, https://github.com/primeintellect-ai/verifiers
6
6
  Project-URL: Documentation, https://github.com/primeintellect-ai/verifiers
@@ -48,6 +48,7 @@ Requires-Dist: tenacity>=8.5.0
48
48
  Requires-Dist: textual
49
49
  Requires-Dist: tomli; python_version < '3.11'
50
50
  Requires-Dist: typing-extensions; python_version < '3.12'
51
+ Requires-Dist: uvloop>=0.21.0; sys_platform != 'win32' and sys_platform != 'cygwin' and platform_python_implementation != 'PyPy'
51
52
  Provides-Extra: browser
52
53
  Requires-Dist: aiohttp>=3.9.0; extra == 'browser'
53
54
  Requires-Dist: python-dotenv>=1.0.0; extra == 'browser'
@@ -227,7 +228,8 @@ class MyTasksetConfig(vf.TasksetConfig):
227
228
  split: str = "train"
228
229
 
229
230
 
230
- class MyTaskset(vf.Taskset[MyTasksetConfig]):
231
+ class MyTaskset(vf.Taskset):
232
+ config: MyTasksetConfig
231
233
  _default_rewards = (contains_answer,)
232
234
 
233
235
  def rows(self) -> list[dict[str, object]]:
@@ -242,12 +244,15 @@ class MyTaskset(vf.Taskset[MyTasksetConfig]):
242
244
  return [row for row in rows if row["split"] == self.config.split]
243
245
 
244
246
 
245
- class MyEnvConfig(vf.EnvConfig):
246
- taskset: MyTasksetConfig = MyTasksetConfig()
247
+ def load_taskset(config: MyTasksetConfig) -> MyTaskset:
248
+ assert isinstance(config, MyTasksetConfig)
249
+ return MyTaskset(config=config)
247
250
 
248
251
 
249
- def load_environment(config: MyEnvConfig) -> vf.Env:
250
- return vf.Env(taskset=MyTaskset(config=config.taskset))
252
+ def load_environment(config: vf.EnvConfig) -> vf.Env:
253
+ taskset_config = config.taskset
254
+ assert isinstance(taskset_config, MyTasksetConfig)
255
+ return vf.Env(taskset=load_taskset(taskset_config))
251
256
  ```
252
257
  If no harness is passed, `vf.Env` uses the base endpoint-backed harness. See
253
258
  **[BYO Harness](docs/byo-harness.md)** for the advanced v1 taskset/harness API.
@@ -151,7 +151,8 @@ class MyTasksetConfig(vf.TasksetConfig):
151
151
  split: str = "train"
152
152
 
153
153
 
154
- class MyTaskset(vf.Taskset[MyTasksetConfig]):
154
+ class MyTaskset(vf.Taskset):
155
+ config: MyTasksetConfig
155
156
  _default_rewards = (contains_answer,)
156
157
 
157
158
  def rows(self) -> list[dict[str, object]]:
@@ -166,12 +167,15 @@ class MyTaskset(vf.Taskset[MyTasksetConfig]):
166
167
  return [row for row in rows if row["split"] == self.config.split]
167
168
 
168
169
 
169
- class MyEnvConfig(vf.EnvConfig):
170
- taskset: MyTasksetConfig = MyTasksetConfig()
170
+ def load_taskset(config: MyTasksetConfig) -> MyTaskset:
171
+ assert isinstance(config, MyTasksetConfig)
172
+ return MyTaskset(config=config)
171
173
 
172
174
 
173
- def load_environment(config: MyEnvConfig) -> vf.Env:
174
- return vf.Env(taskset=MyTaskset(config=config.taskset))
175
+ def load_environment(config: vf.EnvConfig) -> vf.Env:
176
+ taskset_config = config.taskset
177
+ assert isinstance(taskset_config, MyTasksetConfig)
178
+ return vf.Env(taskset=load_taskset(taskset_config))
175
179
  ```
176
180
  If no harness is passed, `vf.Env` uses the base endpoint-backed harness. See
177
181
  **[BYO Harness](docs/byo-harness.md)** for the advanced v1 taskset/harness API.
@@ -54,6 +54,7 @@ dependencies = [
54
54
  "regex<2026.4.4",
55
55
  "httpx>=0.27.0",
56
56
  "prime-pydantic-config[toml]",
57
+ "uvloop>=0.21.0; sys_platform != 'win32' and sys_platform != 'cygwin' and platform_python_implementation != 'PyPy'",
57
58
  ]
58
59
 
59
60
  [dependency-groups]
@@ -98,6 +98,31 @@ async def test_anthropic_to_native_prompt_with_typed_multimodal_content_parts():
98
98
  ]
99
99
 
100
100
 
101
+ @pytest.mark.asyncio
102
+ async def test_anthropic_to_native_prompt_marks_unsupported_images_in_mixed_content():
103
+ pytest.importorskip("anthropic")
104
+ from verifiers.clients.anthropic_messages_client import AnthropicMessagesClient
105
+
106
+ client = AnthropicMessagesClient(object())
107
+ messages = [
108
+ UserMessage(
109
+ content=[
110
+ TextContentPart(text="describe this"),
111
+ ImageUrlContentPart(
112
+ image_url=ImageUrlSource(url="https://example.com/image.png")
113
+ ),
114
+ ]
115
+ )
116
+ ]
117
+
118
+ prompt, kwargs = await client.to_native_prompt(messages)
119
+ assert kwargs["system"] == ""
120
+ assert prompt[0]["content"] == [
121
+ {"type": "text", "text": "describe this"},
122
+ {"type": "text", "text": "[image]"},
123
+ ]
124
+
125
+
101
126
  @pytest.mark.asyncio
102
127
  async def test_anthropic_assistant_tool_calls_use_text_chunks_not_model_repr():
103
128
  pytest.importorskip("anthropic")
@@ -216,10 +216,14 @@ def help_test_can_load_env(tmp_venv_dir: Path, env_dir: Path):
216
216
 
217
217
  def help_test_can_eval_env(tmp_venv_dir: Path, env_dir: Path):
218
218
  """Test that the environment can be run via vf-eval."""
219
- if os.getenv("OPENAI_API_KEY"):
220
- model_flags = "-m gpt-4.1-mini -b https://api.openai.com/v1 -k OPENAI_API_KEY"
221
- elif os.getenv("PRIME_API_KEY"):
219
+ if env_dir.name == "tau2_bench_v1" and not os.getenv("PRIME_API_KEY"):
220
+ pytest.skip(
221
+ "Skipping tau2 default eval because PRIME_API_KEY is not configured"
222
+ )
223
+ if os.getenv("PRIME_API_KEY"):
222
224
  model_flags = "-m openai/gpt-4.1-mini -b https://api.pinference.ai/api/v1 -k PRIME_API_KEY"
225
+ elif os.getenv("OPENAI_API_KEY"):
226
+ model_flags = "-m gpt-4.1-mini -b https://api.openai.com/v1 -k OPENAI_API_KEY"
223
227
  else:
224
228
  pytest.skip("Skipping vf-eval smoke test because no API key is configured")
225
229
 
@@ -288,6 +288,25 @@ def test_cli_headers_table_and_list_merge(monkeypatch, run_cli):
288
288
  }
289
289
 
290
290
 
291
+ def test_cli_defaults_session_header_to_trajectory_id(monkeypatch, run_cli):
292
+ captured = run_cli(monkeypatch, {})
293
+
294
+ assert captured["configs"][0].client_config.extra_headers_from_state == {
295
+ "X-Session-ID": "trajectory_id"
296
+ }
297
+
298
+
299
+ def test_cli_header_from_state_overrides_default_session_header(monkeypatch, run_cli):
300
+ captured = run_cli(
301
+ monkeypatch,
302
+ {"header_from_state": ["X-Session-ID: example_id"]},
303
+ )
304
+
305
+ assert captured["configs"][0].client_config.extra_headers_from_state == {
306
+ "X-Session-ID": "example_id"
307
+ }
308
+
309
+
291
310
  def test_cli_registry_headers_merged_with_eval_toml(tmp_path, monkeypatch, run_cli):
292
311
  cfg = tmp_path / "eval.toml"
293
312
  cfg.write_text(
@@ -239,83 +239,20 @@ class TestLaunchCommandResolution:
239
239
  )
240
240
 
241
241
 
242
- class TestStartStopCommands:
243
- def test_start_cmd_tracks_process_group_leader_pid(self):
244
- """Start command must capture `$!` (the backgrounded pgroup leader),
245
- not `$$` (the outer shell), and must end with `wait` so the recorded
246
- exit code reflects the launched daemon's.
247
- """
248
- cmd = _DummyEnv()._mcp_start_cmd("svc", "python -u /opt/x/server.py")
249
- assert "echo $!" in cmd
250
- assert "echo $$" not in cmd
251
- assert cmd.rstrip().endswith("wait")
252
- assert "/tmp/harbor-mcp-svc.pid" in cmd
253
- assert "python -u /opt/x/server.py" in cmd
254
-
255
- def test_start_cmd_wraps_in_setsid_for_process_group_semantics(self):
256
- """Wrapping the user's command in `setsid sh -c ...` is what makes
257
- `$!` a process-group leader, so `kill -9 -$PID` can reap the whole
258
- daemon tree on stop. Compound commands (e.g. `cd /x && python y.py`)
259
- must be preserved verbatim inside the sh -c payload so their own
260
- semantics are unchanged."""
261
- cmd = _DummyEnv()._mcp_start_cmd("svc", "cd /opt && python server.py")
262
- assert "setsid sh -c " in cmd
263
- assert "'cd /opt && python server.py'" in cmd
264
-
265
- def test_stop_cmd_is_one_line_sigkill_plus_rm(self):
266
- """Default: one SIGKILL to the process group, then unlink the
267
- pidfile — no poll/sleep loop."""
268
- cmd = _DummyEnv()._mcp_stop_cmd("svc")
269
- assert "kill -9" in cmd
270
- assert "rm -f" in cmd
271
- assert "/tmp/harbor-mcp-svc.pid" in cmd
272
- assert "kill -0" not in cmd
273
- assert "sleep" not in cmd
274
- assert "\n" not in cmd
275
- assert len(cmd) < 120
276
-
277
- def test_stop_cmd_targets_process_group_not_single_pid(self):
278
- """The `-` prefix on the `$(cat …)` expansion is what turns kill(1)
279
- into a process-group kill — without it, SIGKILL only lands on the
280
- wrapping shell and e.g. a `python` child spawned via `cd && python`
281
- leaks as an orphan."""
282
- cmd = _DummyEnv()._mcp_stop_cmd("svc")
283
- assert 'kill -9 -"$(cat' in cmd
284
-
285
- def test_server_name_with_shell_metachars_is_quoted(self):
286
- """Server name is task-author-controlled; every pidfile reference
287
- must appear only inside single-quoted spans."""
288
- env = _DummyEnv()
289
- unquoted = "/tmp/harbor-mcp-evil$(whoami).pid"
290
- quoted = f"'{unquoted}'"
291
- for cmd in (
292
- env._mcp_start_cmd("evil$(whoami)", "x"),
293
- env._mcp_stop_cmd("evil$(whoami)"),
294
- ):
295
- assert quoted in cmd
296
- # Every raw occurrence must be inside an already-quoted span.
297
- assert cmd.count(unquoted) == cmd.count(quoted)
298
-
299
- def test_launch_command_with_shell_metachars_is_quoted(self):
300
- """Same for the user's launch command: it's task-author-controlled,
301
- must land inside a single-quoted span once wrapped in `sh -c`."""
302
- env = _DummyEnv()
303
- evil_cmd = "python -c 'print(1)' && touch /pwned"
304
- quoted = f"'{evil_cmd}'".replace("'", "'\"'\"'")
305
- # shlex-quoted output contains the evil string only inside quotes.
306
- cmd = env._mcp_start_cmd("svc", evil_cmd)
307
- assert "setsid sh -c " in cmd
308
- # No unquoted `&& touch /pwned` outside a single-quoted span.
309
- assert cmd.count(evil_cmd) == 0 or quoted in cmd
310
-
311
-
312
242
  class TestLifecycle:
313
243
  @pytest.mark.asyncio
314
244
  async def test_starts_server_with_registered_launch_command(self):
315
- env = _DummyEnv(mcp_launch_commands={"svc": "python server.py"})
245
+ env = _DummyEnv(mcp_launch_commands={"svc": "cd /opt && python server.py"})
316
246
  state: dict[str, Any] = {}
317
247
  await env.start_mcp_servers("sbx", _config_with_server(), state)
318
248
  assert set(state["harbor_mcp_jobs"].keys()) == {"svc"}
249
+ _, start_cmd = env.started_jobs[0]
250
+ assert "echo $!" in start_cmd
251
+ assert "echo $$" not in start_cmd
252
+ assert start_cmd.rstrip().endswith("wait")
253
+ assert "/tmp/harbor-mcp-svc.pid" in start_cmd
254
+ assert "setsid sh -c " in start_cmd
255
+ assert "'cd /opt && python server.py'" in start_cmd
319
256
 
320
257
  @pytest.mark.asyncio
321
258
  async def test_externally_managed_server_is_skipped(self):
@@ -342,9 +279,38 @@ class TestLifecycle:
342
279
  if "kill -9" in c.args[1]
343
280
  ]
344
281
  assert len(stop_calls) == 1
345
- assert "harbor-mcp-svc.pid" in stop_calls[0]
282
+ stop_cmd = stop_calls[0]
283
+ assert "harbor-mcp-svc.pid" in stop_cmd
284
+ assert 'kill -9 -"$(cat' in stop_cmd
285
+ assert "rm -f" in stop_cmd
286
+ assert "kill -0" not in stop_cmd
287
+ assert "sleep" not in stop_cmd
288
+ assert "\n" not in stop_cmd
289
+ assert len(stop_cmd) < 120
346
290
  assert state["harbor_mcp_jobs"] == {}
347
291
 
292
+ @pytest.mark.asyncio
293
+ async def test_launch_and_stop_commands_quote_task_authored_shell_text(self):
294
+ env = _DummyEnv(
295
+ mcp_launch_commands={
296
+ "evil$(whoami)": "python -c 'print(1)' && touch /pwned"
297
+ }
298
+ )
299
+ state: dict[str, Any] = {"sandbox_id": "sbx"}
300
+ await env.start_mcp_servers(
301
+ "sbx", _config_with_server(name="evil$(whoami)"), state
302
+ )
303
+ _, start_cmd = env.started_jobs[0]
304
+ quoted_pidfile = "'/tmp/harbor-mcp-evil$(whoami).pid'"
305
+ assert quoted_pidfile in start_cmd
306
+ assert "setsid sh -c " in start_cmd
307
+ assert "'\"'\"'print(1)'\"'\"'" in start_cmd
308
+
309
+ env.sandbox_client.execute_command.reset_mock()
310
+ await env.stop_mcp_servers(state)
311
+ stop_cmd = env.sandbox_client.execute_command.call_args.args[1]
312
+ assert quoted_pidfile in stop_cmd
313
+
348
314
  @pytest.mark.asyncio
349
315
  async def test_stop_without_sandbox_id_is_a_noop(self):
350
316
  env = _DummyEnv()
@@ -530,22 +496,6 @@ class TestBackgroundJob:
530
496
  class TestHealthCheck:
531
497
  """Readiness probing — default `/proc/net/tcp` + user override."""
532
498
 
533
- def test_default_probe_shape(self):
534
- """Portable awk on /proc/net/tcp{,6}, matching LISTEN state only,
535
- with no bash-ism dependency like /dev/tcp."""
536
- cmd = HarborMCPMixin._default_mcp_health_cmd(8000)
537
- assert "bash" not in cmd and "/dev/tcp" not in cmd
538
- assert "/proc/net/tcp" in cmd and "/proc/net/tcp6" in cmd
539
- assert '$4 == "0A"' in cmd # LISTEN state
540
-
541
- @pytest.mark.parametrize(
542
- "port,hex_expected",
543
- [(80, "0050"), (8000, "1F40"), (65535, "FFFF"), (1, "0001")],
544
- )
545
- def test_default_probe_encodes_port_as_uppercase_hex(self, port, hex_expected):
546
- cmd = HarborMCPMixin._default_mcp_health_cmd(port)
547
- assert f":{hex_expected}$" in cmd
548
-
549
499
  @pytest.mark.asyncio
550
500
  async def test_custom_healthcheck_command_templated_with_port(self):
551
501
  env = _DummyEnv(mcp_launch_commands={"svc": "python x"})
@@ -580,7 +530,11 @@ class TestHealthCheck:
580
530
  if "/proc/net/tcp" in c.args[1]
581
531
  ]
582
532
  assert len(health_calls) == 1
583
- assert ":1F40$" in health_calls[0]
533
+ health_cmd = health_calls[0]
534
+ assert "bash" not in health_cmd and "/dev/tcp" not in health_cmd
535
+ assert "/proc/net/tcp6" in health_cmd
536
+ assert '$4 == "0A"' in health_cmd
537
+ assert ":1F40$" in health_cmd
584
538
 
585
539
  @pytest.mark.asyncio
586
540
  async def test_probe_timeout_is_respected(self):
@@ -1,6 +1,26 @@
1
+ import importlib
2
+ import sys
3
+
1
4
  import verifiers
2
5
 
3
6
 
7
+ def test_v1_taskset_imports_do_not_import_textarena():
8
+ textarena_module = "verifiers.v1.packages.tasksets.textarena"
9
+ sys.modules.pop(textarena_module, None)
10
+
11
+ tasksets = importlib.import_module("verifiers.v1.packages.tasksets")
12
+ tasksets.__dict__.pop("TextArenaTaskset", None)
13
+ tasksets.__dict__.pop("TextArenaTasksetConfig", None)
14
+ importlib.reload(tasksets)
15
+ assert textarena_module not in sys.modules
16
+
17
+ v1 = importlib.import_module("verifiers.v1")
18
+ v1.__dict__.pop("TextArenaTaskset", None)
19
+ v1.__dict__.pop("TextArenaTasksetConfig", None)
20
+ importlib.reload(v1)
21
+ assert textarena_module not in sys.modules
22
+
23
+
4
24
  class TestImports:
5
25
  """Test that all public API imports work correctly.
6
26
  This was inspired by issue #349.
@@ -0,0 +1,80 @@
1
+ from pathlib import Path
2
+
3
+ import verifiers as vf
4
+ from verifiers.scripts.init import init_environment
5
+
6
+
7
+ def read_env_file(root: Path, env_id: str) -> str:
8
+ module_name = env_id.replace("-", "_")
9
+ return (root / module_name / f"{module_name}.py").read_text()
10
+
11
+
12
+ def test_init_default_writes_v0_stub(tmp_path: Path) -> None:
13
+ root = init_environment("foo", path=str(tmp_path))
14
+ content = read_env_file(tmp_path, "foo")
15
+
16
+ assert root == tmp_path / "foo"
17
+ assert "def load_environment(**kwargs) -> vf.Environment:" in content
18
+ assert "NotImplementedError" in content
19
+ assert "load_taskset" not in content
20
+ assert "EnvTaskset" not in content
21
+
22
+
23
+ def test_init_v1_writes_thin_taskset_template(tmp_path: Path) -> None:
24
+ init_environment("bar", path=str(tmp_path), v1=True)
25
+ content = read_env_file(tmp_path, "bar")
26
+
27
+ assert 'ENV_ID = "bar"' in content
28
+ assert "def load_tasks():" in content
29
+ assert "class EnvTasksetConfig(vf.TasksetConfig):" in content
30
+ assert 'source: str = "bar:load_tasks"' in content
31
+ assert 'rewards: list[str] = ["bar:exact_answer"]' in content
32
+ assert "def load_taskset(config: EnvTasksetConfig) -> vf.Taskset:" in content
33
+ assert "vf.load_taskset(ENV_ID, config=config.taskset)" in content
34
+ assert "class EnvTaskset(" not in content
35
+ assert "_default_" not in content
36
+ assert "assert isinstance" not in content
37
+
38
+
39
+ def test_init_v1_template_loads_with_vf_load_environment(
40
+ tmp_path: Path, monkeypatch
41
+ ) -> None:
42
+ init_environment("loadable-v1", path=str(tmp_path), v1=True)
43
+ monkeypatch.syspath_prepend(str(tmp_path / "loadable_v1"))
44
+
45
+ env = vf.load_environment("loadable-v1")
46
+
47
+ assert isinstance(env, vf.Env)
48
+ assert env.taskset.rows()[0]["answer"] == "cba"
49
+ assert env.taskset.rewards[0].__name__ == "exact_answer"
50
+
51
+
52
+ def test_init_v1_with_harness_writes_harness_stub(tmp_path: Path) -> None:
53
+ init_environment("baz", path=str(tmp_path), v1=True, with_harness=True)
54
+ content = read_env_file(tmp_path, "baz")
55
+
56
+ assert "class EnvHarnessConfig(vf.HarnessConfig):" in content
57
+ assert "class EnvHarness(vf.Harness):" in content
58
+ assert "def load_harness(config: EnvHarnessConfig) -> EnvHarness:" in content
59
+ assert "vf.load_harness(ENV_ID, config=config.harness)" in content
60
+
61
+
62
+ def test_init_with_harness_without_v1_warns_and_uses_v0(tmp_path: Path, capsys) -> None:
63
+ init_environment("plain", path=str(tmp_path), with_harness=True)
64
+ content = read_env_file(tmp_path, "plain")
65
+ captured = capsys.readouterr()
66
+
67
+ assert "--with-harness only applies with --v1; ignoring." in captured.out
68
+ assert "def load_environment(**kwargs) -> vf.Environment:" in content
69
+ assert "load_harness" not in content
70
+
71
+
72
+ def test_init_v1_multifile_exports_component_loaders(tmp_path: Path) -> None:
73
+ init_environment("pkg-env", path=str(tmp_path), v1=True, multi_file=True)
74
+ package_dir = tmp_path / "pkg_env" / "pkg_env"
75
+ init_content = (package_dir / "__init__.py").read_text()
76
+ env_content = (package_dir / "pkg_env.py").read_text()
77
+
78
+ assert "from .pkg_env import load_environment, load_taskset" in init_content
79
+ assert "__all__ = ['load_environment', 'load_taskset']" in init_content
80
+ assert 'source: str = "pkg_env.pkg_env:load_tasks"' in env_content
@@ -9,10 +9,8 @@ from verifiers.envs.experimental.composable.tasksets.lean.lean_task import (
9
9
  LEAN_GUARD_END_MARKER,
10
10
  LeanRubric,
11
11
  _build_starter_file,
12
- _expected_protected_region,
13
12
  _extract_protected_region,
14
13
  _normalize_signature,
15
- _wrap_with_lean_guard,
16
14
  )
17
15
 
18
16
 
@@ -80,11 +78,13 @@ class TestNormalizeSignature:
80
78
  )
81
79
 
82
80
 
83
- class TestWrapWithLeanGuard:
81
+ class TestBuildStarterFileLeanGuardLayout:
84
82
  def test_marker_layout(self) -> None:
85
83
  signature = "theorem foo (x : ℝ) : x = x := by"
86
- wrapped = _wrap_with_lean_guard(signature)
87
- assert wrapped == (
84
+ starter = _build_starter_file(
85
+ {"formal_statement": signature, "header": "", "imports": ""}
86
+ )
87
+ assert starter == (
88
88
  "-- lean-guard: begin protected\n"
89
89
  "theorem foo (x : ℝ) : x = x := by\n"
90
90
  "-- lean-guard: end protected\n"
@@ -93,8 +93,10 @@ class TestWrapWithLeanGuard:
93
93
 
94
94
  def test_round_trip_via_extract(self) -> None:
95
95
  signature = "theorem foo : True := by"
96
- wrapped = _wrap_with_lean_guard(signature)
97
- region = _extract_protected_region(wrapped)
96
+ starter = _build_starter_file(
97
+ {"formal_statement": signature, "header": "", "imports": ""}
98
+ )
99
+ region = _extract_protected_region(starter)
98
100
  assert region is not None
99
101
  assert LEAN_GUARD_BEGIN_MARKER in region
100
102
  assert LEAN_GUARD_END_MARKER in region
@@ -212,7 +214,7 @@ class TestBuildStarterFile:
212
214
  "header": "import Mathlib",
213
215
  }
214
216
  starter = _build_starter_file(info)
215
- expected = _expected_protected_region(info)
217
+ expected = _extract_protected_region(_build_starter_file(info)) or ""
216
218
  actual = _extract_protected_region(starter)
217
219
  assert expected == actual
218
220
  assert expected != ""
@@ -1,5 +1,6 @@
1
1
  """Tests for the OpenCodeRLMEnv class."""
2
2
 
3
+ import asyncio
3
4
  import json
4
5
  import subprocess
5
6
  from unittest.mock import AsyncMock, MagicMock, patch
@@ -7,6 +8,7 @@ from unittest.mock import AsyncMock, MagicMock, patch
7
8
  import pytest
8
9
  from datasets import Dataset
9
10
 
11
+ import verifiers as vf
10
12
  from verifiers.envs.experimental.opencode_rlm_env import (
11
13
  OpenCodeRLMEnv,
12
14
  OpenCodeRLMMonitorRubric,
@@ -239,45 +241,6 @@ class TestBuildEnvVars:
239
241
  assert "RLM_SUB_MODEL_ID" not in env_vars
240
242
 
241
243
 
242
- # =============================================================================
243
- # Sub-LLM detection (header-based)
244
- # =============================================================================
245
-
246
-
247
- class TestIsSubLLMRequest:
248
- def test_detects_sub_header(self):
249
- assert (
250
- OpenCodeRLMEnv._is_sub_llm_request({"headers": {"x-rlm-role": "sub"}})
251
- is True
252
- )
253
-
254
- def test_rejects_no_headers(self):
255
- assert OpenCodeRLMEnv._is_sub_llm_request({}) is False
256
-
257
- def test_rejects_empty_headers(self):
258
- assert OpenCodeRLMEnv._is_sub_llm_request({"headers": {}}) is False
259
-
260
- def test_rejects_wrong_value(self):
261
- assert (
262
- OpenCodeRLMEnv._is_sub_llm_request({"headers": {"x-rlm-role": "main"}})
263
- is False
264
- )
265
-
266
- def test_ignores_model_field(self):
267
- """Model name should NOT be used for detection."""
268
- assert (
269
- OpenCodeRLMEnv._is_sub_llm_request({"model": "sub", "headers": {}}) is False
270
- )
271
-
272
- def test_header_takes_precedence(self):
273
- assert (
274
- OpenCodeRLMEnv._is_sub_llm_request(
275
- {"model": "openai/gpt-5-mini", "headers": {"x-rlm-role": "sub"}}
276
- )
277
- is True
278
- )
279
-
280
-
281
244
  # =============================================================================
282
245
  # State setup
283
246
  # =============================================================================
@@ -330,17 +293,45 @@ class TestMetrics:
330
293
  response = MagicMock(spec=[]) # no usage attr
331
294
  assert OpenCodeRLMEnv._extract_token_counts(response) == (0, 0)
332
295
 
333
- def test_update_sub_metrics(self):
296
+ @pytest.mark.asyncio
297
+ async def test_handle_sub_llm_request_updates_sub_metrics(self):
334
298
  env = build_env()
335
299
  state = {
300
+ "trajectory": [],
301
+ "model": "main-model",
336
302
  "sub_llm_turns": 0,
337
303
  "sub_llm_prompt_tokens": 0,
338
304
  "sub_llm_completion_tokens": 0,
339
305
  }
340
- response = MagicMock()
341
- response.usage.prompt_tokens = 50
342
- response.usage.completion_tokens = 20
343
- env._update_sub_metrics(state, response)
306
+ response = vf.Response(
307
+ id="resp",
308
+ created=0,
309
+ model="sub-model",
310
+ message=vf.ResponseMessage(
311
+ content="ok", finish_reason="stop", is_truncated=False
312
+ ),
313
+ usage=vf.Usage(
314
+ prompt_tokens=50,
315
+ completion_tokens=20,
316
+ reasoning_tokens=0,
317
+ total_tokens=70,
318
+ ),
319
+ )
320
+ future = asyncio.get_running_loop().create_future()
321
+ intercept = {
322
+ "messages": [{"role": "user", "content": "hello"}],
323
+ "headers": {"x-rlm-role": "sub"},
324
+ "response_future": future,
325
+ }
326
+ env._require_interception_server().intercepts["req"] = intercept
327
+ with patch.object(
328
+ vf.Environment,
329
+ "get_model_response",
330
+ new=AsyncMock(return_value=response),
331
+ ):
332
+ await env._handle_sub_llm_request(state, "req", intercept)
333
+
334
+ assert future.result() is response
344
335
  assert state["sub_llm_turns"] == 1
345
336
  assert state["sub_llm_prompt_tokens"] == 50
346
337
  assert state["sub_llm_completion_tokens"] == 20