PyPI - verifiers - Versions diffs - 0.1.15.dev9__tar.gz → 0.1.15.dev11__tar.gz - Mend

verifiers 0.1.15.dev9tar.gz → 0.1.15.dev11tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (323) hide show

{verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: verifiers
-Version: 0.1.15.dev9
+Version: 0.1.15.dev11
 Summary: Verifiers: Environments for LLM Reinforcement Learning
 Project-URL: Homepage, https://github.com/primeintellect-ai/verifiers
 Project-URL: Documentation, https://github.com/primeintellect-ai/verifiers
@@ -48,6 +48,7 @@ Requires-Dist: tenacity>=8.5.0
 Requires-Dist: textual
 Requires-Dist: tomli; python_version < '3.11'
 Requires-Dist: typing-extensions; python_version < '3.12'
+Requires-Dist: uvloop>=0.21.0; sys_platform != 'win32' and sys_platform != 'cygwin' and platform_python_implementation != 'PyPy'
 Provides-Extra: browser
 Requires-Dist: aiohttp>=3.9.0; extra == 'browser'
 Requires-Dist: python-dotenv>=1.0.0; extra == 'browser'
@@ -55,7 +56,7 @@ Requires-Dist: stagehand>=3.0.0; extra == 'browser'
 Provides-Extra: openenv
 Requires-Dist: openenv-core>=0.3.0; extra == 'openenv'
 Provides-Extra: renderers
-Requires-Dist: renderers>=0.1.8.dev4; extra == 'renderers'
+Requires-Dist: renderers>=0.1.8.dev28; extra == 'renderers'
 Provides-Extra: rg
 Requires-Dist: reasoning-gym; extra == 'rg'
 Provides-Extra: rl
@@ -219,19 +220,13 @@ custom harnesses, use the v1 Taskset/Harness path:
 # my_env.py
 import verifiers as vf
-@vf.reward(weight=1.0)
-async def contains_answer(task, state) -> float:
-    return float(task["answer"] in str(state.get("completion") or ""))
 class MyTasksetConfig(vf.TasksetConfig):
     split: str = "train"
-class MyTaskset(vf.Taskset):
-    config: MyTasksetConfig
-    _default_rewards = (contains_answer,)
-    def rows(self) -> list[dict[str, object]]:
+class MyTaskset(vf.Taskset[MyTasksetConfig]):
+    def load_tasks(self) -> vf.Tasks:
         rows = [
             {
                 "prompt": [{"role": "user", "content": "Reverse abc."}],
@@ -242,28 +237,31 @@ class MyTaskset(vf.Taskset):
         ]
         return [row for row in rows if row["split"] == self.config.split]
+    @vf.reward(weight=1.0)
+    async def contains_answer(self, task, state) -> float:
+        return float(task["answer"] in str(state.get("completion") or ""))
 def load_taskset(config: MyTasksetConfig) -> MyTaskset:
-    assert isinstance(config, MyTasksetConfig)
     return MyTaskset(config=config)
 def load_environment(config: vf.EnvConfig) -> vf.Env:
-    taskset_config = config.taskset
-    assert isinstance(taskset_config, MyTasksetConfig)
-    return vf.Env(taskset=load_taskset(taskset_config))
+    return vf.Env(taskset=vf.load_taskset(config=config.taskset))
 ```
 If no harness is passed, `vf.Env` uses the base endpoint-backed harness. See
 **[BYO Harness](docs/byo-harness.md)** for the advanced v1 taskset/harness API.
-Reusable taskset and harness packages live under `verifiers.v1.packages` while
-the v1 API stabilizes, and are re-exported from `verifiers.v1` for normal use.
-For example, Harbor task directories can run through the bundled OpenCode CLI
+Reusable taskset and harness packages live under `verifiers.v1.packages`. For
+example, Harbor task directories can run through the bundled OpenCode CLI
 harness with:
 ```python
+from verifiers.v1.packages.harnesses import OpenCode, OpenCodeConfig
+from verifiers.v1.packages.tasksets import HarborTaskset, HarborTasksetConfig
 env = vf.Env(
-    taskset=vf.HarborTaskset(config=vf.HarborTasksetConfig()),
-    harness=vf.OpenCode(config=vf.OpenCodeConfig()),
+    taskset=HarborTaskset(config=HarborTasksetConfig()),
+    harness=OpenCode(config=OpenCodeConfig()),
 )
 ```

{verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/README.md RENAMED Viewed

@@ -143,19 +143,13 @@ custom harnesses, use the v1 Taskset/Harness path:
 # my_env.py
 import verifiers as vf
-@vf.reward(weight=1.0)
-async def contains_answer(task, state) -> float:
-    return float(task["answer"] in str(state.get("completion") or ""))
 class MyTasksetConfig(vf.TasksetConfig):
     split: str = "train"
-class MyTaskset(vf.Taskset):
-    config: MyTasksetConfig
-    _default_rewards = (contains_answer,)
-    def rows(self) -> list[dict[str, object]]:
+class MyTaskset(vf.Taskset[MyTasksetConfig]):
+    def load_tasks(self) -> vf.Tasks:
         rows = [
             {
                 "prompt": [{"role": "user", "content": "Reverse abc."}],
@@ -166,28 +160,31 @@ class MyTaskset(vf.Taskset):
         ]
         return [row for row in rows if row["split"] == self.config.split]
+    @vf.reward(weight=1.0)
+    async def contains_answer(self, task, state) -> float:
+        return float(task["answer"] in str(state.get("completion") or ""))
 def load_taskset(config: MyTasksetConfig) -> MyTaskset:
-    assert isinstance(config, MyTasksetConfig)
     return MyTaskset(config=config)
 def load_environment(config: vf.EnvConfig) -> vf.Env:
-    taskset_config = config.taskset
-    assert isinstance(taskset_config, MyTasksetConfig)
-    return vf.Env(taskset=load_taskset(taskset_config))
+    return vf.Env(taskset=vf.load_taskset(config=config.taskset))
 ```
 If no harness is passed, `vf.Env` uses the base endpoint-backed harness. See
 **[BYO Harness](docs/byo-harness.md)** for the advanced v1 taskset/harness API.
-Reusable taskset and harness packages live under `verifiers.v1.packages` while
-the v1 API stabilizes, and are re-exported from `verifiers.v1` for normal use.
-For example, Harbor task directories can run through the bundled OpenCode CLI
+Reusable taskset and harness packages live under `verifiers.v1.packages`. For
+example, Harbor task directories can run through the bundled OpenCode CLI
 harness with:
 ```python
+from verifiers.v1.packages.harnesses import OpenCode, OpenCodeConfig
+from verifiers.v1.packages.tasksets import HarborTaskset, HarborTasksetConfig
 env = vf.Env(
-    taskset=vf.HarborTaskset(config=vf.HarborTasksetConfig()),
-    harness=vf.OpenCode(config=vf.OpenCodeConfig()),
+    taskset=HarborTaskset(config=HarborTasksetConfig()),
+    harness=OpenCode(config=OpenCodeConfig()),
 )
 ```

{verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/pyproject.toml RENAMED Viewed

@@ -54,6 +54,7 @@ dependencies = [
     "regex<2026.4.4",
     "httpx>=0.27.0",
     "prime-pydantic-config[toml]",
+    "uvloop>=0.21.0; sys_platform != 'win32' and sys_platform != 'cygwin' and platform_python_implementation != 'PyPy'",
 ]
 [dependency-groups]
@@ -73,7 +74,7 @@ dev = [
     "aiohttp>=3.9.0",
     "python-dotenv>=1.0.0",
     "nltk",
-    "renderers>=0.1.8.dev4",
+    "renderers>=0.1.8.dev28",
 ]
 policy = [
     "semgrep>=1.150.0",
@@ -96,7 +97,7 @@ openenv = [
     "openenv-core>=0.3.0",
 ]
 renderers = [
-    "renderers>=0.1.8.dev4",
+    "renderers>=0.1.8.dev28",
 ]
 rl = [
     "torch>=2.8.0,<2.9.0",

{verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_client_multimodal_types.py RENAMED Viewed

@@ -98,6 +98,31 @@ async def test_anthropic_to_native_prompt_with_typed_multimodal_content_parts():
     ]
+@pytest.mark.asyncio
+async def test_anthropic_to_native_prompt_marks_unsupported_images_in_mixed_content():
+    pytest.importorskip("anthropic")
+    from verifiers.clients.anthropic_messages_client import AnthropicMessagesClient
+    client = AnthropicMessagesClient(object())
+    messages = [
+        UserMessage(
+            content=[
+                TextContentPart(text="describe this"),
+                ImageUrlContentPart(
+                    image_url=ImageUrlSource(url="https://example.com/image.png")
+                ),
+            ]
+        )
+    ]
+    prompt, kwargs = await client.to_native_prompt(messages)
+    assert kwargs["system"] == ""
+    assert prompt[0]["content"] == [
+        {"type": "text", "text": "describe this"},
+        {"type": "text", "text": "[image]"},
+    ]
 @pytest.mark.asyncio
 async def test_anthropic_assistant_tool_calls_use_text_chunks_not_model_repr():
     pytest.importorskip("anthropic")

{verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_eval_cli.py RENAMED Viewed

@@ -288,6 +288,25 @@ def test_cli_headers_table_and_list_merge(monkeypatch, run_cli):
     }
+def test_cli_defaults_session_header_to_trajectory_id(monkeypatch, run_cli):
+    captured = run_cli(monkeypatch, {})
+    assert captured["configs"][0].client_config.extra_headers_from_state == {
+        "X-Session-ID": "trajectory_id"
+    }
+def test_cli_header_from_state_overrides_default_session_header(monkeypatch, run_cli):
+    captured = run_cli(
+        monkeypatch,
+        {"header_from_state": ["X-Session-ID: example_id"]},
+    )
+    assert captured["configs"][0].client_config.extra_headers_from_state == {
+        "X-Session-ID": "example_id"
+    }
 def test_cli_registry_headers_merged_with_eval_toml(tmp_path, monkeypatch, run_cli):
     cfg = tmp_path / "eval.toml"
     cfg.write_text(

{verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_harbor_env_mcp.py RENAMED Viewed

@@ -239,83 +239,20 @@ class TestLaunchCommandResolution:
         )
-class TestStartStopCommands:
-    def test_start_cmd_tracks_process_group_leader_pid(self):
-        """Start command must capture `$!` (the backgrounded pgroup leader),
-        not `$$` (the outer shell), and must end with `wait` so the recorded
-        exit code reflects the launched daemon's.
-        """
-        cmd = _DummyEnv()._mcp_start_cmd("svc", "python -u /opt/x/server.py")
-        assert "echo $!" in cmd
-        assert "echo $$" not in cmd
-        assert cmd.rstrip().endswith("wait")
-        assert "/tmp/harbor-mcp-svc.pid" in cmd
-        assert "python -u /opt/x/server.py" in cmd
-    def test_start_cmd_wraps_in_setsid_for_process_group_semantics(self):
-        """Wrapping the user's command in `setsid sh -c ...` is what makes
-        `$!` a process-group leader, so `kill -9 -$PID` can reap the whole
-        daemon tree on stop. Compound commands (e.g. `cd /x && python y.py`)
-        must be preserved verbatim inside the sh -c payload so their own
-        semantics are unchanged."""
-        cmd = _DummyEnv()._mcp_start_cmd("svc", "cd /opt && python server.py")
-        assert "setsid sh -c " in cmd
-        assert "'cd /opt && python server.py'" in cmd
-    def test_stop_cmd_is_one_line_sigkill_plus_rm(self):
-        """Default: one SIGKILL to the process group, then unlink the
-        pidfile — no poll/sleep loop."""
-        cmd = _DummyEnv()._mcp_stop_cmd("svc")
-        assert "kill -9" in cmd
-        assert "rm -f" in cmd
-        assert "/tmp/harbor-mcp-svc.pid" in cmd
-        assert "kill -0" not in cmd
-        assert "sleep" not in cmd
-        assert "\n" not in cmd
-        assert len(cmd) < 120
-    def test_stop_cmd_targets_process_group_not_single_pid(self):
-        """The `-` prefix on the `$(cat …)` expansion is what turns kill(1)
-        into a process-group kill — without it, SIGKILL only lands on the
-        wrapping shell and e.g. a `python` child spawned via `cd && python`
-        leaks as an orphan."""
-        cmd = _DummyEnv()._mcp_stop_cmd("svc")
-        assert 'kill -9 -"$(cat' in cmd
-    def test_server_name_with_shell_metachars_is_quoted(self):
-        """Server name is task-author-controlled; every pidfile reference
-        must appear only inside single-quoted spans."""
-        env = _DummyEnv()
-        unquoted = "/tmp/harbor-mcp-evil$(whoami).pid"
-        quoted = f"'{unquoted}'"
-        for cmd in (
-            env._mcp_start_cmd("evil$(whoami)", "x"),
-            env._mcp_stop_cmd("evil$(whoami)"),
-        ):
-            assert quoted in cmd
-            # Every raw occurrence must be inside an already-quoted span.
-            assert cmd.count(unquoted) == cmd.count(quoted)
-    def test_launch_command_with_shell_metachars_is_quoted(self):
-        """Same for the user's launch command: it's task-author-controlled,
-        must land inside a single-quoted span once wrapped in `sh -c`."""
-        env = _DummyEnv()
-        evil_cmd = "python -c 'print(1)' && touch /pwned"
-        quoted = f"'{evil_cmd}'".replace("'", "'\"'\"'")
-        # shlex-quoted output contains the evil string only inside quotes.
-        cmd = env._mcp_start_cmd("svc", evil_cmd)
-        assert "setsid sh -c " in cmd
-        # No unquoted `&& touch /pwned` outside a single-quoted span.
-        assert cmd.count(evil_cmd) == 0 or quoted in cmd
 class TestLifecycle:
     @pytest.mark.asyncio
     async def test_starts_server_with_registered_launch_command(self):
-        env = _DummyEnv(mcp_launch_commands={"svc": "python server.py"})
+        env = _DummyEnv(mcp_launch_commands={"svc": "cd /opt && python server.py"})
         state: dict[str, Any] = {}
         await env.start_mcp_servers("sbx", _config_with_server(), state)
         assert set(state["harbor_mcp_jobs"].keys()) == {"svc"}
+        _, start_cmd = env.started_jobs[0]
+        assert "echo $!" in start_cmd
+        assert "echo $$" not in start_cmd
+        assert start_cmd.rstrip().endswith("wait")
+        assert "/tmp/harbor-mcp-svc.pid" in start_cmd
+        assert "setsid sh -c " in start_cmd
+        assert "'cd /opt && python server.py'" in start_cmd
     @pytest.mark.asyncio
     async def test_externally_managed_server_is_skipped(self):
@@ -342,9 +279,38 @@ class TestLifecycle:
             if "kill -9" in c.args[1]
         ]
         assert len(stop_calls) == 1
-        assert "harbor-mcp-svc.pid" in stop_calls[0]
+        stop_cmd = stop_calls[0]
+        assert "harbor-mcp-svc.pid" in stop_cmd
+        assert 'kill -9 -"$(cat' in stop_cmd
+        assert "rm -f" in stop_cmd
+        assert "kill -0" not in stop_cmd
+        assert "sleep" not in stop_cmd
+        assert "\n" not in stop_cmd
+        assert len(stop_cmd) < 120
         assert state["harbor_mcp_jobs"] == {}
+    @pytest.mark.asyncio
+    async def test_launch_and_stop_commands_quote_task_authored_shell_text(self):
+        env = _DummyEnv(
+            mcp_launch_commands={
+                "evil$(whoami)": "python -c 'print(1)' && touch /pwned"
+            }
+        )
+        state: dict[str, Any] = {"sandbox_id": "sbx"}
+        await env.start_mcp_servers(
+            "sbx", _config_with_server(name="evil$(whoami)"), state
+        )
+        _, start_cmd = env.started_jobs[0]
+        quoted_pidfile = "'/tmp/harbor-mcp-evil$(whoami).pid'"
+        assert quoted_pidfile in start_cmd
+        assert "setsid sh -c " in start_cmd
+        assert "'\"'\"'print(1)'\"'\"'" in start_cmd
+        env.sandbox_client.execute_command.reset_mock()
+        await env.stop_mcp_servers(state)
+        stop_cmd = env.sandbox_client.execute_command.call_args.args[1]
+        assert quoted_pidfile in stop_cmd
     @pytest.mark.asyncio
     async def test_stop_without_sandbox_id_is_a_noop(self):
         env = _DummyEnv()
@@ -530,22 +496,6 @@ class TestBackgroundJob:
 class TestHealthCheck:
     """Readiness probing — default `/proc/net/tcp` + user override."""
-    def test_default_probe_shape(self):
-        """Portable awk on /proc/net/tcp{,6}, matching LISTEN state only,
-        with no bash-ism dependency like /dev/tcp."""
-        cmd = HarborMCPMixin._default_mcp_health_cmd(8000)
-        assert "bash" not in cmd and "/dev/tcp" not in cmd
-        assert "/proc/net/tcp" in cmd and "/proc/net/tcp6" in cmd
-        assert '$4 == "0A"' in cmd  # LISTEN state
-    @pytest.mark.parametrize(
-        "port,hex_expected",
-        [(80, "0050"), (8000, "1F40"), (65535, "FFFF"), (1, "0001")],
-    )
-    def test_default_probe_encodes_port_as_uppercase_hex(self, port, hex_expected):
-        cmd = HarborMCPMixin._default_mcp_health_cmd(port)
-        assert f":{hex_expected}$" in cmd
     @pytest.mark.asyncio
     async def test_custom_healthcheck_command_templated_with_port(self):
         env = _DummyEnv(mcp_launch_commands={"svc": "python x"})
@@ -580,7 +530,11 @@ class TestHealthCheck:
             if "/proc/net/tcp" in c.args[1]
         ]
         assert len(health_calls) == 1
-        assert ":1F40$" in health_calls[0]
+        health_cmd = health_calls[0]
+        assert "bash" not in health_cmd and "/dev/tcp" not in health_cmd
+        assert "/proc/net/tcp6" in health_cmd
+        assert '$4 == "0A"' in health_cmd
+        assert ":1F40$" in health_cmd
     @pytest.mark.asyncio
     async def test_probe_timeout_is_respected(self):

{verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_imports.py RENAMED Viewed

@@ -4,6 +4,37 @@ import sys
 import verifiers
+PACKAGE_SYMBOLS = {
+    "HarborTaskset",
+    "HarborTasksetConfig",
+    "MiniSWEAgent",
+    "MiniSWEAgentConfig",
+    "OpenCode",
+    "OpenCodeConfig",
+    "Pi",
+    "PiConfig",
+    "RLM",
+    "RLMConfig",
+    "Terminus2",
+    "Terminus2Config",
+    "TextArenaTaskset",
+    "TextArenaTasksetConfig",
+}
+def test_package_tasksets_and_harnesses_are_not_root_exports():
+    for name in PACKAGE_SYMBOLS:
+        assert name not in verifiers.__all__
+        assert not hasattr(verifiers, name)
+def test_package_tasksets_and_harnesses_are_not_v1_exports():
+    v1 = importlib.import_module("verifiers.v1")
+    for name in PACKAGE_SYMBOLS:
+        assert name not in v1.__all__
+        assert not hasattr(v1, name)
 def test_v1_taskset_imports_do_not_import_textarena():
     textarena_module = "verifiers.v1.packages.tasksets.textarena"
     sys.modules.pop(textarena_module, None)

verifiers-0.1.15.dev11/tests/test_init_script.py ADDED Viewed

@@ -0,0 +1,83 @@
+from pathlib import Path
+import pytest
+import verifiers as vf
+from verifiers.scripts.init import init_environment
+def read_env_file(root: Path, env_id: str) -> str:
+    module_name = env_id.replace("-", "_")
+    return (root / module_name / f"{module_name}.py").read_text()
+def test_init_default_writes_v0_stub(tmp_path: Path) -> None:
+    root = init_environment("foo", path=str(tmp_path))
+    content = read_env_file(tmp_path, "foo")
+    assert root == tmp_path / "foo"
+    assert "def load_environment(**kwargs) -> vf.Environment:" in content
+    assert "NotImplementedError" in content
+    assert "load_taskset" not in content
+    assert "EnvTaskset" not in content
+def test_init_v1_writes_thin_taskset_template(tmp_path: Path) -> None:
+    init_environment("bar", path=str(tmp_path), v1=True)
+    content = read_env_file(tmp_path, "bar")
+    assert "class BarTasksetConfig(vf.TasksetConfig):" in content
+    assert "class BarTaskset(vf.Taskset[BarTasksetConfig]):" in content
+    assert "def load_tasks(self) -> vf.Tasks:" in content
+    assert "def load_system_prompt(self) -> vf.SystemPrompt:" in content
+    assert "async def correct_answer(self, task: vf.Task, state: vf.State)" in content
+    assert "def load_taskset(config: BarTasksetConfig) -> BarTaskset:" in content
+    assert "return BarTaskset(config=config)" in content
+    assert "vf.load_taskset(config=config.taskset)" in content
+    assert "class EnvTaskset(" not in content
+    assert "_default_" not in content
+    assert "assert isinstance" not in content
+    assert 'tasks: str = "load_tasks"' not in content
+    assert 'rewards: list[str] = ["correct_answer"]' not in content
+def test_init_v1_template_loads_with_vf_load_environment(
+    tmp_path: Path, monkeypatch
+) -> None:
+    init_environment("loadable-v1", path=str(tmp_path), v1=True)
+    monkeypatch.syspath_prepend(str(tmp_path / "loadable_v1"))
+    with pytest.raises(RuntimeError, match="Load the system prompt"):
+        vf.load_environment("loadable-v1")
+def test_init_v1_with_harness_writes_harness_stub(tmp_path: Path) -> None:
+    init_environment("baz", path=str(tmp_path), v1=True, with_harness=True)
+    content = read_env_file(tmp_path, "baz")
+    assert "class BazTaskset(vf.Taskset[BazTasksetConfig]):" in content
+    assert "class BazHarnessConfig(vf.HarnessConfig):" in content
+    assert "class BazHarness(vf.Harness):" in content
+    assert "def load_harness(config: BazHarnessConfig) -> BazHarness:" in content
+    assert "vf.load_harness(config=config.harness)" in content
+def test_init_with_harness_without_v1_warns_and_uses_v0(tmp_path: Path, capsys) -> None:
+    init_environment("plain", path=str(tmp_path), with_harness=True)
+    content = read_env_file(tmp_path, "plain")
+    captured = capsys.readouterr()
+    assert "--with-harness only applies with --v1; ignoring." in captured.out
+    assert "def load_environment(**kwargs) -> vf.Environment:" in content
+    assert "load_harness" not in content
+def test_init_v1_multifile_exports_component_loaders(tmp_path: Path) -> None:
+    init_environment("pkg-env", path=str(tmp_path), v1=True, multi_file=True)
+    package_dir = tmp_path / "pkg_env" / "pkg_env"
+    init_content = (package_dir / "__init__.py").read_text()
+    env_content = (package_dir / "pkg_env.py").read_text()
+    assert "from .pkg_env import load_environment, load_taskset" in init_content
+    assert "__all__ = ['load_environment', 'load_taskset']" in init_content
+    assert "class PkgEnvTaskset(vf.Taskset[PkgEnvTasksetConfig]):" in env_content
+    assert "return PkgEnvTaskset(config=config)" in env_content

{verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_langchain_deep_agents_wikispeedia.py RENAMED Viewed

@@ -91,8 +91,8 @@ def test_wikispeedia_env_config_reaches_taskset_and_harness(
         )
     )
-    train_rows = list(env.taskset.source())
-    eval_rows = list(env.taskset.eval_source())
+    train_rows = [env.taskset.to_task(row) for row in env.taskset.get_dataset()]
+    eval_rows = [env.taskset.to_task(row) for row in env.taskset.get_eval_dataset()]
     assert len(train_rows) == 2
     assert len(eval_rows) == 1
@@ -136,8 +136,8 @@ def test_wikispeedia_taskset_sources_use_disjoint_target_split(
         )
     )
-    train_rows = list(taskset.source())
-    eval_rows = list(taskset.eval_source())
+    train_rows = [taskset.to_task(row) for row in taskset.get_dataset()]
+    eval_rows = [taskset.to_task(row) for row in taskset.get_eval_dataset()]
     assert len(train_rows) == 2
     assert len(eval_rows) == 1
@@ -218,7 +218,7 @@ async def test_wikispeedia_tools_resolve_through_v1_runtime(
         ),
         harness=module.load_harness(config=module.WikispeediaHarnessConfig()),
     )
-    task = module.vf.Task(list(env.taskset.source())[0]).freeze()
+    task = env.taskset.to_task(env.taskset.get_dataset()[0])
     state = module.vf.State.for_task(task)
     state = await env.harness.setup_state(task, state)

{verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_lean_task.py RENAMED Viewed

@@ -9,10 +9,8 @@ from verifiers.envs.experimental.composable.tasksets.lean.lean_task import (
     LEAN_GUARD_END_MARKER,
     LeanRubric,
     _build_starter_file,
-    _expected_protected_region,
     _extract_protected_region,
     _normalize_signature,
-    _wrap_with_lean_guard,
 )
@@ -80,11 +78,13 @@ class TestNormalizeSignature:
         )
-class TestWrapWithLeanGuard:
+class TestBuildStarterFileLeanGuardLayout:
     def test_marker_layout(self) -> None:
         signature = "theorem foo (x : ℝ) : x = x := by"
-        wrapped = _wrap_with_lean_guard(signature)
-        assert wrapped == (
+        starter = _build_starter_file(
+            {"formal_statement": signature, "header": "", "imports": ""}
+        )
+        assert starter == (
             "-- lean-guard: begin protected\n"
             "theorem foo (x : ℝ) : x = x := by\n"
             "-- lean-guard: end protected\n"
@@ -93,8 +93,10 @@ class TestWrapWithLeanGuard:
     def test_round_trip_via_extract(self) -> None:
         signature = "theorem foo : True := by"
-        wrapped = _wrap_with_lean_guard(signature)
-        region = _extract_protected_region(wrapped)
+        starter = _build_starter_file(
+            {"formal_statement": signature, "header": "", "imports": ""}
+        )
+        region = _extract_protected_region(starter)
         assert region is not None
         assert LEAN_GUARD_BEGIN_MARKER in region
         assert LEAN_GUARD_END_MARKER in region
@@ -212,7 +214,7 @@ class TestBuildStarterFile:
             "header": "import Mathlib",
         }
         starter = _build_starter_file(info)
-        expected = _expected_protected_region(info)
+        expected = _extract_protected_region(_build_starter_file(info)) or ""
         actual = _extract_protected_region(starter)
         assert expected == actual
         assert expected != ""

{verifiers-0.1.15.dev9 → verifiers-0.1.15.dev11}/tests/test_mcp_search_env.py RENAMED Viewed

@@ -5,7 +5,7 @@ from pathlib import Path
 from typing import Any
 import pytest
-import verifiers.v1 as vf
+import verifiers as vf
 def _load_mcp_search_module() -> Any:
@@ -54,7 +54,7 @@ def test_mcp_search_env_preserves_harness_config() -> None:
 def test_mcp_search_default_taskset_has_stable_non_doc_fixture() -> None:
     module = _load_mcp_search_module()
-    rows = module.MCPSearchTaskset(config=module.MCPSearchTasksetConfig()).rows()
+    rows = list(module.load_tasks())
     assert len(rows) >= 10
     assert len({row["answer"] for row in rows}) == len(rows)
@@ -68,7 +68,7 @@ def test_mcp_search_taskset_accepts_v1_taskset_config() -> None:
     env = module.load_environment(
         config=module.MCPSearchEnvConfig(taskset={"max_turns": 3}),
     )
-    rows = env.taskset.rows()
+    rows = [env.taskset.to_task(row) for row in env.taskset.get_dataset()]
     assert env.taskset.config.max_turns == 3
     assert all(row["max_turns"] == 3 for row in rows)

verifiers 0.1.15.dev9__tar.gz → 0.1.15.dev11__tar.gz

verifiers 0.1.15.dev9tar.gz → 0.1.15.dev11tar.gz