PyPI - wafer-cli - Versions diffs - 0.2.59__tar.gz → 0.2.60__tar.gz - Mend

wafer-cli 0.2.59tar.gz → 0.2.60tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (97) hide show

{wafer_cli-0.2.59 → wafer_cli-0.2.60}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: wafer-cli
-Version: 0.2.59
+Version: 0.2.60
 Summary: CLI for running GPU workloads, managing remote workspaces, and evaluating/optimizing kernels
 Requires-Python: >=3.11
 Description-Content-Type: text/markdown

{wafer_cli-0.2.59 → wafer_cli-0.2.60}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "wafer-cli"
-version = "0.2.59"
+version = "0.2.60"
 description = "CLI for running GPU workloads, managing remote workspaces, and evaluating/optimizing kernels"
 readme = "README.md"
 requires-python = ">=3.11"

wafer_cli-0.2.60/tests/test_direct_streaming.py ADDED Viewed

@@ -0,0 +1,352 @@
+"""Tests for direct endpoint streaming (_stream_direct_endpoint).
+Tests SSE parsing, output formatting for tool_call/tool_result/text/error
+events, and JSON mode re-emission.
+Run with:
+    PYTHONPATH=apps/wafer-cli uv run pytest apps/wafer-cli/tests/test_direct_streaming.py -v
+"""
+from __future__ import annotations
+import json
+from unittest.mock import AsyncMock, MagicMock, patch
+import pytest
+import trio
+from wafer.wevin_cli import (
+    _format_tool_call_summary,
+    _format_tool_result_summary,
+    _stream_direct_endpoint,
+)
+# ---------------------------------------------------------------------------
+# Unit tests for formatting helpers
+# ---------------------------------------------------------------------------
+class TestFormatToolCallSummary:
+    def test_grep(self) -> None:
+        result = _format_tool_call_summary("grep", {"pattern": "shared memory"})
+        assert result == 'searching: grep("shared memory")...'
+    def test_read_file(self) -> None:
+        result = _format_tool_call_summary("read_file", {"path": "./guide/memory.md"})
+        assert result == "reading: ./guide/memory.md..."
+    def test_list_files(self) -> None:
+        result = _format_tool_call_summary("list_files", {"pattern": "*.md"})
+        assert result == 'listing: find("*.md")...'
+    def test_list_files_default(self) -> None:
+        result = _format_tool_call_summary("list_files", {})
+        assert result == 'listing: find("*")...'
+    def test_unknown_tool(self) -> None:
+        result = _format_tool_call_summary("some_tool", {"x": 1})
+        assert "some_tool" in result
+class TestFormatToolResultSummary:
+    def test_no_matches(self) -> None:
+        result = _format_tool_result_summary("grep", "No matches found.")
+        assert result == "no results"
+    def test_no_files(self) -> None:
+        result = _format_tool_result_summary("list_files", "No files found matching pattern.")
+        assert result == "no results"
+    def test_error(self) -> None:
+        result = _format_tool_result_summary("read_file", "Error: file not found")
+        assert result.startswith("Error:")
+    def test_grep_matches(self) -> None:
+        content = "line1\nline2\nline3\n"
+        result = _format_tool_result_summary("grep", content)
+        assert "3 matches" in result
+    def test_read_file_lines(self) -> None:
+        content = "a\nb\nc\nd\ne\n"
+        result = _format_tool_result_summary("read_file", content)
+        assert "5 lines" in result
+    def test_list_files_count(self) -> None:
+        content = "./a.md\n./b.md\n"
+        result = _format_tool_result_summary("list_files", content)
+        assert "2 files" in result
+    def test_unknown_tool_char_count(self) -> None:
+        result = _format_tool_result_summary("unknown", "abcdef")
+        assert "6 chars" in result
+# ---------------------------------------------------------------------------
+# SSE streaming integration tests (mocked httpx)
+# ---------------------------------------------------------------------------
+def _make_sse_lines(events: list[dict]) -> list[str]:
+    """Build SSE lines from a list of event dicts."""
+    lines = []
+    for ev in events:
+        lines.append(f"data: {json.dumps(ev)}")
+    lines.append("data: [DONE]")
+    return lines
+class _FakeResponse:
+    """Fake httpx streaming response."""
+    def __init__(self, lines: list[str], status_code: int = 200) -> None:
+        self.status_code = status_code
+        self._lines = lines
+        self._raw_body = b""
+    async def aiter_lines(self):
+        for line in self._lines:
+            yield line
+    async def aread(self) -> bytes:
+        return self._raw_body
+    async def __aenter__(self):
+        return self
+    async def __aexit__(self, *args):
+        pass
+class _FakeClient:
+    """Fake httpx.AsyncClient that returns a _FakeResponse from .stream()."""
+    def __init__(self, response: _FakeResponse) -> None:
+        self._response = response
+        self.last_url: str | None = None
+        self.last_json: dict | None = None
+        self.last_headers: dict | None = None
+    def stream(self, method: str, url: str, *, json: dict | None = None, headers: dict | None = None):
+        self.last_url = url
+        self.last_json = json
+        self.last_headers = headers
+        return self._response
+    async def __aenter__(self):
+        return self
+    async def __aexit__(self, *args):
+        pass
+class TestStreamDirectEndpoint:
+    """Tests for _stream_direct_endpoint with mocked HTTP."""
+    def _run(self, events: list[dict], *, json_output: bool = False, status_code: int = 200, **kwargs) -> tuple[_FakeClient, list[str], list[str]]:
+        """Run _stream_direct_endpoint with mocked httpx, return (client, stdout_lines, stderr_lines)."""
+        lines = _make_sse_lines(events)
+        response = _FakeResponse(lines, status_code=status_code)
+        client = _FakeClient(response)
+        stdout_capture: list[str] = []
+        stderr_capture: list[str] = []
+        def mock_print(*args, **kw):
+            file = kw.get("file")
+            import sys
+            text = " ".join(str(a) for a in args)
+            if file is sys.stderr:
+                stderr_capture.append(text)
+            else:
+                stdout_capture.append(text)
+        async def _run_inner():
+            with patch("httpx.AsyncClient", return_value=client):
+                with patch("builtins.print", side_effect=mock_print):
+                    await _stream_direct_endpoint(
+                        api_url=kwargs.get("api_url", "https://api.wafer.ai"),
+                        auth_token=kwargs.get("auth_token", "test-token"),
+                        endpoint_path=kwargs.get("endpoint_path", "/v1/docs/query"),
+                        query=kwargs.get("query", "How do bank conflicts work?"),
+                        template_args=kwargs.get("template_args", None),
+                        defaults=kwargs.get("defaults", {"corpus": "cuda"}),
+                        json_output=json_output,
+                    )
+        trio.run(_run_inner)
+        return client, stdout_capture, stderr_capture
+    def test_text_events_stream_to_stdout(self) -> None:
+        events = [
+            {"type": "text", "content": "Bank conflicts occur "},
+            {"type": "text", "content": "when threads access "},
+            {"type": "text", "content": "the same bank."},
+            {"type": "done"},
+        ]
+        _, stdout, stderr = self._run(events)
+        text_output = "".join(stdout)
+        assert "Bank conflicts occur " in text_output
+        assert "when threads access " in text_output
+        assert "the same bank." in text_output
+    def test_tool_call_events_render_to_stderr(self) -> None:
+        events = [
+            {"type": "tool_call", "name": "grep", "input": {"pattern": "bank conflict"}},
+            {"type": "tool_result", "name": "grep", "content": "line1\nline2\nline3\n"},
+            {"type": "text", "content": "Answer here."},
+            {"type": "done"},
+        ]
+        _, stdout, stderr = self._run(events)
+        # tool_call and tool_result go to stderr (dim status)
+        stderr_text = " ".join(stderr)
+        assert "grep" in stderr_text
+        # Text goes to stdout
+        assert any("Answer here." in s for s in stdout)
+    def test_tool_result_no_matches(self) -> None:
+        events = [
+            {"type": "tool_call", "name": "grep", "input": {"pattern": "nonexistent"}},
+            {"type": "tool_result", "name": "grep", "content": "No matches found."},
+            {"type": "text", "content": "No results."},
+            {"type": "done"},
+        ]
+        _, _, stderr = self._run(events)
+        stderr_text = " ".join(stderr)
+        assert "no results" in stderr_text
+    def test_error_event_to_stderr(self) -> None:
+        events = [
+            {"type": "error", "content": "Anthropic API error 500"},
+        ]
+        _, _, stderr = self._run(events)
+        stderr_text = " ".join(stderr)
+        assert "Anthropic API error 500" in stderr_text
+    def test_request_body_merges_defaults_and_args(self) -> None:
+        events = [{"type": "text", "content": "ok"}, {"type": "done"}]
+        client, _, _ = self._run(
+            events,
+            defaults={"corpus": "cuda"},
+            template_args={"corpus": "hip"},
+            query="test question",
+        )
+        assert client.last_json is not None
+        assert client.last_json["corpus"] == "hip"  # template_args override defaults
+        assert client.last_json["query"] == "test question"
+    def test_request_url_construction(self) -> None:
+        events = [{"type": "text", "content": "ok"}, {"type": "done"}]
+        client, _, _ = self._run(
+            events,
+            api_url="https://api.wafer.ai",
+            endpoint_path="/v1/docs/query",
+        )
+        assert client.last_url == "https://api.wafer.ai/v1/docs/query"
+    def test_request_url_strips_trailing_slash(self) -> None:
+        events = [{"type": "text", "content": "ok"}, {"type": "done"}]
+        client, _, _ = self._run(
+            events,
+            api_url="https://api.wafer.ai/",
+            endpoint_path="/v1/docs/query",
+        )
+        assert client.last_url == "https://api.wafer.ai/v1/docs/query"
+    def test_auth_header_sent(self) -> None:
+        events = [{"type": "text", "content": "ok"}, {"type": "done"}]
+        client, _, _ = self._run(events, auth_token="my-secret-token")
+        assert client.last_headers is not None
+        assert client.last_headers["Authorization"] == "Bearer my-secret-token"
+    def test_json_mode_text_events(self) -> None:
+        events = [
+            {"type": "text", "content": "Hello "},
+            {"type": "text", "content": "world"},
+            {"type": "done"},
+        ]
+        _, stdout, _ = self._run(events, json_output=True)
+        # In JSON mode, output is NDJSON lines
+        json_events = [json.loads(line) for line in stdout if line.strip()]
+        text_deltas = [e for e in json_events if e.get("type") == "text_delta"]
+        assert len(text_deltas) == 2
+        assert text_deltas[0]["delta"] == "Hello "
+        assert text_deltas[1]["delta"] == "world"
+    def test_json_mode_tool_events(self) -> None:
+        events = [
+            {"type": "tool_call", "name": "grep", "input": {"pattern": "warp"}},
+            {"type": "tool_result", "name": "grep", "content": "line1\n"},
+            {"type": "text", "content": "Answer"},
+            {"type": "done"},
+        ]
+        _, stdout, _ = self._run(events, json_output=True)
+        json_events = [json.loads(line) for line in stdout if line.strip()]
+        types = [e["type"] for e in json_events]
+        assert "tool_call_start" in types
+        assert "tool_call_end" in types
+        assert "tool_result" in types
+        assert "text_delta" in types
+        assert "session_end" in types
+    def test_json_mode_error_event(self) -> None:
+        events = [
+            {"type": "error", "content": "Something went wrong"},
+        ]
+        _, stdout, _ = self._run(events, json_output=True)
+        json_events = [json.loads(line) for line in stdout if line.strip()]
+        error_events = [e for e in json_events if e.get("type") == "error"]
+        assert len(error_events) == 1
+        assert error_events[0]["error"] == "Something went wrong"
+    def test_http_401_exits(self) -> None:
+        response = _FakeResponse([], status_code=401)
+        client = _FakeClient(response)
+        with pytest.raises(SystemExit) as exc_info:
+            async def _run_inner():
+                with patch("httpx.AsyncClient", return_value=client):
+                    await _stream_direct_endpoint(
+                        api_url="https://api.wafer.ai",
+                        auth_token="bad-token",
+                        endpoint_path="/v1/docs/query",
+                        query="test",
+                        template_args=None,
+                        defaults={"corpus": "cuda"},
+                        json_output=False,
+                    )
+            trio.run(_run_inner)
+        assert exc_info.value.code == 1
+    def test_http_402_exits(self) -> None:
+        response = _FakeResponse([], status_code=402)
+        client = _FakeClient(response)
+        with pytest.raises(SystemExit) as exc_info:
+            async def _run_inner():
+                with patch("httpx.AsyncClient", return_value=client):
+                    await _stream_direct_endpoint(
+                        api_url="https://api.wafer.ai",
+                        auth_token="token",
+                        endpoint_path="/v1/docs/query",
+                        query="test",
+                        template_args=None,
+                        defaults={"corpus": "cuda"},
+                        json_output=False,
+                    )
+            trio.run(_run_inner)
+        assert exc_info.value.code == 1
+    def test_multiple_tool_turns(self) -> None:
+        events = [
+            {"type": "tool_call", "name": "grep", "input": {"pattern": "bank conflict"}},
+            {"type": "tool_result", "name": "grep", "content": "file.md:10:bank conflict\n"},
+            {"type": "tool_call", "name": "read_file", "input": {"path": "./file.md"}},
+            {"type": "tool_result", "name": "read_file", "content": "Full content here\n"},
+            {"type": "text", "content": "Final answer."},
+            {"type": "done"},
+        ]
+        _, stdout, stderr = self._run(events)
+        stderr_text = " ".join(stderr)
+        assert "grep" in stderr_text
+        assert "reading" in stderr_text
+        assert any("Final answer." in s for s in stdout)

wafer_cli-0.2.60/wafer/templates/ask_docs.py ADDED Viewed

@@ -0,0 +1,22 @@
+"""Template for querying GPU documentation.
+Streams directly from the server-side docs agent — no local agent loop.
+The server runs a multi-turn Sonnet agent with grep/read_file/list_files
+tools against the corpus volume in a Modal sandbox.
+Usage:
+    wafer agent -t ask-docs "How do bank conflicts occur?"
+    wafer agent -t ask-docs --args corpus=hip "Explain HIP streams"
+"""
+try:
+    from wafer_core.rollouts.templates import TemplateConfig
+except ImportError:
+    from rollouts.templates import TemplateConfig
+template = TemplateConfig(
+    name="ask-docs",
+    description="Query GPU documentation to answer technical questions",
+    direct_endpoint="/v1/docs/query",
+    defaults={"corpus": "cuda"},
+)

{wafer_cli-0.2.59 → wafer_cli-0.2.60}/wafer/wevin_cli.py RENAMED Viewed

@@ -429,6 +429,140 @@ def _load_template(
         return template, None
     except Exception as e:
         return None, str(e)
+async def _stream_direct_endpoint(
+    api_url: str,
+    auth_token: str,
+    endpoint_path: str,
+    query: str,
+    template_args: dict[str, str] | None,
+    defaults: dict[str, str] | None,
+    json_output: bool,
+) -> None:
+    """Stream SSE from a server-side agent endpoint directly to the terminal.
+    Bypasses the local agent loop entirely. Used when a template has
+    `direct_endpoint` set — the server runs the full agent loop and we
+    just render the events.
+    """
+    import httpx
+    url = f"{api_url.rstrip('/')}{endpoint_path}"
+    body: dict[str, str] = {}
+    if defaults:
+        body.update(defaults)
+    if template_args:
+        body.update(template_args)
+    body["query"] = query
+    headers = {
+        "Authorization": f"Bearer {auth_token}",
+        "Content-Type": "application/json",
+        "Accept": "text/event-stream",
+    }
+    frontend: StreamingChunkFrontend | None = None
+    if json_output:
+        frontend = StreamingChunkFrontend()
+    async with httpx.AsyncClient(timeout=180.0) as client:
+        async with client.stream("POST", url, json=body, headers=headers) as response:
+            if response.status_code == 401:
+                print("Error: Authentication failed. Run 'wafer settings login'.", file=sys.stderr)
+                sys.exit(1)
+            if response.status_code == 402:
+                print("Error: Insufficient credits. Check 'wafer settings billing'.", file=sys.stderr)
+                sys.exit(1)
+            if response.status_code != 200:
+                raw = await response.aread()
+                print(f"Error: API returned {response.status_code}: {raw.decode(errors='replace')[:500]}", file=sys.stderr)
+                sys.exit(1)
+            async for line in response.aiter_lines():
+                if not line.startswith("data: "):
+                    continue
+                data_str = line[len("data: "):]
+                if data_str == "[DONE]":
+                    break
+                event = json.loads(data_str)
+                event_type = event.get("type", "")
+                if event_type == "tool_call":
+                    tool_name = event.get("name", "")
+                    tool_input = event.get("input", {})
+                    summary = _format_tool_call_summary(tool_name, tool_input)
+                    if json_output:
+                        assert frontend is not None
+                        frontend._emit({"type": "tool_call_start", "tool_name": tool_name})
+                        frontend._emit({"type": "tool_call_end", "tool_name": tool_name, "args": tool_input})
+                    else:
+                        print(f"\033[2m  {summary}\033[0m", file=sys.stderr)
+                elif event_type == "tool_result":
+                    tool_name = event.get("name", "")
+                    content = event.get("content", "")
+                    summary = _format_tool_result_summary(tool_name, content)
+                    if json_output:
+                        assert frontend is not None
+                        frontend._emit({"type": "tool_result", "is_error": False})
+                    else:
+                        print(f"\033[2m  {summary}\033[0m", file=sys.stderr)
+                elif event_type == "text":
+                    text = event.get("content", "")
+                    if json_output:
+                        assert frontend is not None
+                        frontend._emit({"type": "text_delta", "delta": text})
+                    else:
+                        print(text, end="", flush=True)
+                elif event_type == "error":
+                    error_msg = event.get("content", "Unknown error")
+                    if json_output:
+                        assert frontend is not None
+                        frontend._emit({"type": "error", "error": error_msg})
+                    else:
+                        print(f"\nError: {error_msg}", file=sys.stderr)
+                elif event_type == "done":
+                    break
+    if json_output:
+        assert frontend is not None
+        frontend._emit({"type": "session_end"})
+    else:
+        print()  # trailing newline after streamed text
+def _format_tool_call_summary(tool_name: str, tool_input: dict) -> str:
+    """Format a tool call into a concise status line."""
+    if tool_name == "grep":
+        pattern = tool_input.get("pattern", "")
+        return f'searching: grep("{pattern}")...'
+    if tool_name == "read_file":
+        path = tool_input.get("path", "")
+        return f"reading: {path}..."
+    if tool_name == "list_files":
+        pattern = tool_input.get("pattern", "*")
+        return f'listing: find("{pattern}")...'
+    return f"{tool_name}({json.dumps(tool_input)})..."
+def _format_tool_result_summary(tool_name: str, content: str) -> str:
+    """Format a tool result into a concise status line."""
+    if "No matches found" in content or "No files found" in content:
+        return "no results"
+    if "Error:" in content:
+        return content[:80]
+    line_count = content.count("\n")
+    if tool_name == "grep":
+        return f"found {line_count} matches"
+    if tool_name == "read_file":
+        return f"read {line_count} lines"
+    if tool_name == "list_files":
+        return f"found {line_count} files"
+    return f"got {len(content)} chars"
 def main(  # noqa: PLR0913, PLR0915
     prompt: str | None = None,
     interactive: bool = False,
@@ -587,6 +721,32 @@ def main(  # noqa: PLR0913, PLR0915
             print(f"Template: {tpl.name}", file=sys.stderr)
             print(f"  {tpl.description}", file=sys.stderr)
             print(file=sys.stderr)
+        # Direct endpoint: bypass local agent loop, stream from server
+        if tpl.direct_endpoint is not None:
+            assert prompt, (
+                f"Template '{tpl.name}' uses direct streaming and requires a prompt. "
+                f"Usage: wafer agent -t {tpl.name} \"your question\""
+            )
+            wafer_api_url = os.environ.get("WAFER_API_URL", get_api_url())
+            wafer_auth_token = os.environ.get("WAFER_AUTH_TOKEN", "")
+            assert wafer_auth_token, "WAFER_AUTH_TOKEN not set. Run 'wafer settings login' first."
+            _direct_tpl = tpl
+            _direct_prompt = prompt
+            async def _run_direct() -> None:
+                await _stream_direct_endpoint(
+                    api_url=wafer_api_url,
+                    auth_token=wafer_auth_token,
+                    endpoint_path=_direct_tpl.direct_endpoint,
+                    query=_direct_prompt,
+                    template_args=template_args,
+                    defaults=_direct_tpl.defaults if _direct_tpl.defaults else None,
+                    json_output=json_output,
+                )
+            import trio
+            trio.run(_run_direct)
+            return
     else:
         tpl = _get_default_template()
         base_system_prompt = tpl.system_prompt

{wafer_cli-0.2.59 → wafer_cli-0.2.60}/wafer_cli.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: wafer-cli
-Version: 0.2.59
+Version: 0.2.60
 Summary: CLI for running GPU workloads, managing remote workspaces, and evaluating/optimizing kernels
 Requires-Python: >=3.11
 Description-Content-Type: text/markdown

{wafer_cli-0.2.59 → wafer_cli-0.2.60}/wafer_cli.egg-info/SOURCES.txt RENAMED Viewed

@@ -9,6 +9,7 @@ tests/test_cli_parity_integration.py
 tests/test_config_show.py
 tests/test_corpus_lockdown.py
 tests/test_deps.py
+tests/test_direct_streaming.py
 tests/test_distributed_traces_cli.py
 tests/test_docker_progress.py
 tests/test_evaluate_ux.py

wafer_cli-0.2.59/wafer/templates/ask_docs.py DELETED Viewed

@@ -1,32 +0,0 @@
-"""Template for querying GPU documentation.
-Usage:
-    wafer agent -t ask-docs "How do bank conflicts occur?"
-    wafer agent -t ask-docs "Explain warp divergence in CUDA"
-"""
-try:
-    from wafer_core.rollouts.templates import TemplateConfig
-except ImportError:
-    from rollouts.templates import TemplateConfig
-template = TemplateConfig(
-    name="ask-docs",
-    description="Query GPU documentation to answer technical questions",
-    system_prompt="""You are a GPU programming expert. Use the ask_docs tool to search documentation and answer questions.
-Available corpora: cuda, cutlass, hip, amd, cdna3, hopper, rdna35, llvm-amdgpu, gcnasm.
-Strategy:
-1. Call ask_docs with the user's question and the appropriate corpus
-2. If the answer is incomplete, call ask_docs again with a refined query or different corpus
-3. Synthesize a clear, accurate answer
-Be concise but thorough. Include code examples when relevant.""",
-    tools=["ask_docs"],
-    model="anthropic/claude-opus-4-5-20251101",
-    max_tokens=8192,
-    thinking=False,
-    thinking_budget=10000,
-    single_turn=False,
-)