PyPI - vtx-coding-agent - Versions diffs - 0.1.1__py3-none-any.whl - Mend

vtx-coding-agent 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (117) hide show

vtx/__init__.py +63 -0
vtx/async_utils.py +40 -0
vtx/builtin_skills/github/SKILL.md +139 -0
vtx/builtin_skills/init/SKILL.md +74 -0
vtx/builtin_skills/review/SKILL.md +73 -0
vtx/builtin_skills/skill-builder/SKILL.md +133 -0
vtx/cli.py +90 -0
vtx/config.py +741 -0
vtx/context/__init__.py +15 -0
vtx/context/_xml.py +8 -0
vtx/context/agent_mds.py +128 -0
vtx/context/git.py +64 -0
vtx/context/loader.py +41 -0
vtx/context/skills.py +423 -0
vtx/core/__init__.py +47 -0
vtx/core/compaction.py +89 -0
vtx/core/errors.py +17 -0
vtx/core/handoff.py +51 -0
vtx/core/scratchpad.py +54 -0
vtx/core/types.py +197 -0
vtx/defaults/__init__.py +0 -0
vtx/defaults/config.yml +53 -0
vtx/diff_display.py +12 -0
vtx/events.py +224 -0
vtx/gh_cli.py +82 -0
vtx/git_branch.py +90 -0
vtx/headless.py +127 -0
vtx/llm/__init__.py +93 -0
vtx/llm/base.py +217 -0
vtx/llm/context_length.py +150 -0
vtx/llm/dynamic_models.py +735 -0
vtx/llm/model_fetcher.py +279 -0
vtx/llm/models.py +78 -0
vtx/llm/oauth/__init__.py +59 -0
vtx/llm/oauth/copilot.py +358 -0
vtx/llm/oauth/dynamic.py +236 -0
vtx/llm/oauth/openai.py +400 -0
vtx/llm/phase_parser.py +270 -0
vtx/llm/provider.yaml +280 -0
vtx/llm/provider_catalog.py +230 -0
vtx/llm/providers/__init__.py +45 -0
vtx/llm/providers/anthropic_sdk.py +256 -0
vtx/llm/providers/mock.py +249 -0
vtx/llm/providers/openai_sdk.py +246 -0
vtx/llm/providers/sanitize.py +14 -0
vtx/llm/sdk/__init__.py +13 -0
vtx/llm/sdk/anthropic.py +382 -0
vtx/llm/sdk/base.py +82 -0
vtx/llm/sdk/openai.py +344 -0
vtx/llm/tool_parser.py +161 -0
vtx/loop.py +272 -0
vtx/notify.py +109 -0
vtx/permissions.py +114 -0
vtx/prompts/__init__.py +45 -0
vtx/prompts/builder.py +86 -0
vtx/prompts/env.py +58 -0
vtx/prompts/identity.py +166 -0
vtx/prompts/tooling.py +36 -0
vtx/py.typed +0 -0
vtx/runtime.py +580 -0
vtx/session.py +868 -0
vtx/sounds/completion.wav +0 -0
vtx/sounds/error.wav +0 -0
vtx/sounds/permission.wav +0 -0
vtx/themes.py +1104 -0
vtx/tools/__init__.py +68 -0
vtx/tools/_read_image.py +106 -0
vtx/tools/_tool_utils.py +90 -0
vtx/tools/base.py +36 -0
vtx/tools/bash.py +371 -0
vtx/tools/edit.py +261 -0
vtx/tools/find.py +132 -0
vtx/tools/read.py +238 -0
vtx/tools/skill.py +278 -0
vtx/tools/web.py +238 -0
vtx/tools/write.py +88 -0
vtx/tools_manager.py +216 -0
vtx/turn.py +789 -0
vtx/ui/__init__.py +0 -0
vtx/ui/agent_runner.py +417 -0
vtx/ui/app.py +665 -0
vtx/ui/app_protocol.py +29 -0
vtx/ui/autocomplete.py +440 -0
vtx/ui/blocks.py +735 -0
vtx/ui/chat.py +613 -0
vtx/ui/clipboard.py +59 -0
vtx/ui/commands/__init__.py +100 -0
vtx/ui/commands/auth.py +306 -0
vtx/ui/commands/base.py +122 -0
vtx/ui/commands/models.py +144 -0
vtx/ui/commands/sessions.py +388 -0
vtx/ui/commands/settings.py +286 -0
vtx/ui/completion_ui.py +313 -0
vtx/ui/export.py +703 -0
vtx/ui/floating_list.py +370 -0
vtx/ui/formatting.py +287 -0
vtx/ui/input.py +760 -0
vtx/ui/latex.py +349 -0
vtx/ui/launch.py +108 -0
vtx/ui/path_complete.py +228 -0
vtx/ui/prompt_history.py +102 -0
vtx/ui/queue_ui.py +141 -0
vtx/ui/selection_mode.py +18 -0
vtx/ui/session_ui.py +235 -0
vtx/ui/startup.py +124 -0
vtx/ui/styles.py +327 -0
vtx/ui/tool_output.py +34 -0
vtx/ui/tree.py +437 -0
vtx/ui/welcome.py +51 -0
vtx/ui/widgets.py +558 -0
vtx/update_check.py +49 -0
vtx/version.py +22 -0
vtx_coding_agent-0.1.1.dist-info/METADATA +259 -0
vtx_coding_agent-0.1.1.dist-info/RECORD +117 -0
vtx_coding_agent-0.1.1.dist-info/WHEEL +4 -0
vtx_coding_agent-0.1.1.dist-info/entry_points.txt +2 -0
vtx_coding_agent-0.1.1.dist-info/licenses/LICENSE +201 -0

vtx/llm/providers/openai_sdk.py ADDED Viewed

@@ -0,0 +1,246 @@
+"""OpenAI SDK provider - wraps the SDK layer into vtx's BaseProvider interface."""
+from collections.abc import AsyncIterator
+from typing import Any, ClassVar
+from openai import APIConnectionError, APIStatusError, RateLimitError
+from ...core.errors import format_error
+from ...core.types import (
+    AssistantMessage,
+    ImageContent,
+    Message,
+    StopReason,
+    StreamDone,
+    StreamError,
+    StreamPart,
+    TextContent,
+    TextPart,
+    ThinkingContent,
+    ThinkPart,
+    ToolCall,
+    ToolCallDelta,
+    ToolCallStart,
+    ToolDefinition,
+    ToolResultMessage,
+    Usage,
+    UserMessage,
+)
+from ..base import BaseProvider, LLMStream, ProviderConfig, resolve_api_key
+from ..sdk.base import GenerationConfig
+from ..sdk.base import Message as SDKMessage
+from ..sdk.openai import OpenAISDK
+from .sanitize import sanitize_surrogates
+class OpenAISDKProvider(BaseProvider):
+    name = "openai"
+    thinking_levels: ClassVar[list[str]] = ["none"]
+    def __init__(self, config: ProviderConfig):
+        super().__init__(config)
+        api_key = resolve_api_key(
+            config.api_key,
+            env_vars=("OPENAI_API_KEY",),
+            base_url=config.base_url,
+            auth_mode=config.openai_compat_auth_mode,
+        )
+        if not api_key and (config.provider or "").lower() in {
+            "airouter",
+            "opencode",
+            "kilo",
+            "tokenrouter",
+        }:
+            api_key = self._resolve_dynamic_key_for(config)
+        if not api_key:
+            raise ValueError(
+                f"No API key found for {self.name}. "
+                "Set OPENAI_API_KEY environment variable or pass api_key in config, "
+                'or configure llm.auth.openai_compat = "auto"/"none" for local endpoints.'
+            )
+        self._sdk = OpenAISDK(api_key=api_key, base_url=config.base_url)
+    @staticmethod
+    def _resolve_dynamic_key_for(config: ProviderConfig) -> str | None:
+        from ..oauth.dynamic import get_dynamic_api_key
+        return get_dynamic_api_key(config.provider or "")
+    def _convert_messages(
+        self, messages: list[Message], system_prompt: str | None
+    ) -> list[SDKMessage]:
+        result: list[SDKMessage] = []
+        if system_prompt:
+            result.append(SDKMessage(role="system", content=sanitize_surrogates(system_prompt)))
+        for msg in messages:
+            if isinstance(msg, UserMessage):
+                result.append(self._convert_user_message(msg))
+            elif isinstance(msg, AssistantMessage):
+                result.append(self._convert_assistant_message(msg))
+            elif isinstance(msg, ToolResultMessage):
+                result.append(self._convert_tool_result(msg))
+        return result
+    def _convert_user_message(self, msg: UserMessage) -> SDKMessage:
+        if isinstance(msg.content, str):
+            content = sanitize_surrogates(msg.content)
+            if not content or content.isspace():
+                raise ValueError("User message content cannot be empty or whitespace-only")
+            return SDKMessage(role="user", content=content)
+        parts: list[str] = []
+        image_parts: list[str] = []
+        for item in msg.content:
+            if isinstance(item, TextContent):
+                text = sanitize_surrogates(item.text)
+                if text and not text.isspace():
+                    parts.append(text)
+            elif isinstance(item, ImageContent):
+                image_parts.append(f"data:{item.mime_type};base64,{item.data}")
+        content = "\n".join(parts) if parts else ""
+        if not content and not image_parts:
+            raise ValueError("User message content cannot be empty or whitespace-only")
+        return SDKMessage(role="user", content=content, image_parts=image_parts or None)
+    def _convert_assistant_message(self, msg: AssistantMessage) -> SDKMessage:
+        import json
+        from ..phase_parser import INLINE_THINK_SIGNATURE
+        content_parts: list[str] = []
+        metadata: dict[str, Any] = {}
+        tool_calls: list[dict[str, Any]] = []
+        for item in msg.content:
+            if isinstance(item, TextContent):
+                if item.text.strip():
+                    content_parts.append(sanitize_surrogates(item.text))
+            elif isinstance(item, ThinkingContent):
+                if item.signature == INLINE_THINK_SIGNATURE:
+                    content_parts.append(f"<think>{item.thinking}</think>")
+                elif item.signature == "reasoning_content":
+                    metadata["reasoning_content"] = item.thinking
+            elif isinstance(item, ToolCall):
+                tool_calls.append(
+                    {
+                        "id": item.id,
+                        "type": "function",
+                        "function": {"name": item.name, "arguments": json.dumps(item.arguments)},
+                    }
+                )
+        if tool_calls:
+            metadata["tool_calls"] = tool_calls
+        return SDKMessage(
+            role="assistant",
+            content="".join(content_parts) if content_parts else "",
+            metadata=metadata or None,
+        )
+    def _convert_tool_result(self, msg: ToolResultMessage) -> SDKMessage:
+        text_parts = [item.text for item in msg.content if isinstance(item, TextContent)]
+        has_images = any(isinstance(item, ImageContent) for item in msg.content)
+        if text_parts:
+            content = "\n".join(text_parts)
+        elif has_images:
+            content = "(see attached image)"
+        else:
+            content = "(no output)"
+        return SDKMessage(
+            role="tool", content=content, metadata={"tool_call_id": msg.tool_call_id}
+        )
+    def _convert_tools(self, tools: list[ToolDefinition]) -> list[dict[str, Any]]:
+        return [
+            {
+                "type": "function",
+                "function": {
+                    "name": tool.name,
+                    "description": tool.description,
+                    "parameters": tool.parameters,
+                },
+            }
+            for tool in tools
+        ]
+    async def _stream_impl(
+        self,
+        messages: list[Message],
+        *,
+        system_prompt: str | None = None,
+        tools: list[ToolDefinition] | None = None,
+        temperature: float | None = None,
+        max_tokens: int | None = None,
+    ) -> LLMStream:
+        sdk_messages = self._convert_messages(messages, system_prompt)
+        sdk_tools = self._convert_tools(tools) if tools else None
+        temp = temperature if temperature is not None else self.config.temperature
+        max_tok = max_tokens if max_tokens is not None else self.config.max_tokens
+        config = GenerationConfig(
+            model=self.config.model, temperature=temp or 0.7, max_tokens=max_tok
+        )
+        if sdk_tools:
+            raw_stream = await self._sdk.generate_with_tools(
+                sdk_messages, sdk_tools, config, stream=True
+            )
+        else:
+            raw_stream = await self._sdk.generate(sdk_messages, config, stream=True)
+        llm_stream = LLMStream()
+        llm_stream.set_iterator(self._process_stream(raw_stream, llm_stream))
+        return llm_stream
+    async def _process_stream(
+        self, response: Any, llm_stream: LLMStream
+    ) -> AsyncIterator[StreamPart]:
+        stop_reason: StopReason = StopReason.STOP
+        try:
+            async for chunk in response:
+                chunk_type = chunk.get("type")
+                if chunk_type == "usage":
+                    usage_data = chunk.get("usage", {})
+                    llm_stream._usage = Usage(
+                        input_tokens=usage_data.get("prompt_tokens", 0)
+                        or usage_data.get("input_tokens", 0),
+                        output_tokens=usage_data.get("completion_tokens", 0)
+                        or usage_data.get("output_tokens", 0),
+                    )
+                elif chunk_type == "reasoning":
+                    yield ThinkPart(
+                        think=chunk.get("content", ""),
+                        signature=chunk.get("signature", "reasoning_content"),
+                    )
+                elif chunk_type == "text" or chunk_type == "content":
+                    yield TextPart(text=chunk.get("content", ""))
+                elif chunk_type == "tool_calls":
+                    tool_calls = chunk.get("tool_calls", [])
+                    for i, tc in enumerate(tool_calls):
+                        yield ToolCallStart(id=tc.id, name=tc.name, index=i)
+                        yield ToolCallDelta(index=i, arguments_delta=tc.arguments)
+            yield StreamDone(stop_reason=stop_reason)
+        except Exception as e:
+            yield StreamError(error=format_error(e))
+    def should_retry_for_error(self, error: Exception) -> bool:
+        if isinstance(error, RateLimitError):
+            return True
+        if isinstance(error, APIConnectionError):
+            return True
+        if isinstance(error, APIStatusError):
+            return error.status_code >= 500
+        return False

vtx/llm/providers/sanitize.py ADDED Viewed

@@ -0,0 +1,14 @@
+"""
+Sanitize text content before sending to LLM APIs.
+Lone Unicode surrogates (U+D800-U+DFFF) cause API errors with some providers.
+This matches pi-mono's sanitizeSurrogates() behavior.
+"""
+import re
+_SURROGATE_RE = re.compile(r"[\ud800-\udfff]")
+def sanitize_surrogates(text: str) -> str:
+    return _SURROGATE_RE.sub("\ufffd", text)

vtx/llm/sdk/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+from .anthropic import AnthropicSDK
+from .base import BaseLLMSDK, GenerationConfig, GenerationResponse, Message, ToolCall
+from .openai import OpenAISDK
+__all__ = [
+    "AnthropicSDK",
+    "BaseLLMSDK",
+    "GenerationConfig",
+    "GenerationResponse",
+    "Message",
+    "OpenAISDK",
+    "ToolCall",
+]

vtx/llm/sdk/anthropic.py ADDED Viewed

@@ -0,0 +1,382 @@
+"""Anthropic-native SDK. Direct HTTP via httpx."""
+from __future__ import annotations
+import asyncio
+import json
+import logging
+import os
+from collections.abc import AsyncGenerator
+from typing import Any
+import httpx
+from .base import BaseLLMSDK, GenerationConfig, GenerationResponse, Message, ToolCall
+logger = logging.getLogger(__name__)
+ANTHROPIC_API_ROOT = "https://api.anthropic.com"
+ANTHROPIC_VERSION = "2023-06-01"
+_MAX_TOKENS_DEFAULT = 4096
+_RETRY_BASE_DELAY = 1.0
+_MAX_RETRIES = 3
+def _is_transient(exc: Exception) -> bool:
+    if isinstance(exc, httpx.HTTPStatusError):
+        return exc.response.status_code >= 500 or exc.response.status_code == 429
+    if isinstance(
+        exc, (httpx.ConnectError, httpx.ReadTimeout, httpx.WriteTimeout, httpx.RemoteProtocolError)
+    ):
+        return True
+    msg = str(exc).lower()
+    return any(
+        s in msg
+        for s in (
+            "connection",
+            "timeout",
+            "timed out",
+            "reset",
+            "broken pipe",
+            "network",
+            "unavailable",
+            "bad gateway",
+        )
+    )
+def _content_to_anthropic(content: str | list[dict[str, Any]]) -> str | list[dict[str, Any]]:
+    if isinstance(content, str):
+        return content
+    out: list[dict[str, Any]] = []
+    for part in content:
+        if not isinstance(part, dict):
+            continue
+        kind = part.get("type")
+        if kind == "text":
+            out.append({"type": "text", "text": part.get("text", "")})
+        elif kind == "image_url":
+            url = (part.get("image_url") or {}).get("url", "")
+            if url.startswith("data:"):
+                try:
+                    header, b64 = url.split(",", 1)
+                except ValueError:
+                    continue
+                media = header[len("data:") :].split(";", 1)[0]
+                out.append(
+                    {
+                        "type": "image",
+                        "source": {"type": "base64", "media_type": media, "data": b64},
+                    }
+                )
+    return out
+def _messages_to_anthropic(messages: list[Message]) -> tuple[str | None, list[dict[str, Any]]]:
+    system_parts: list[str] = []
+    converted: list[dict[str, Any]] = []
+    for m in messages:
+        role = (m.role or "").lower()
+        if role == "system":
+            text = m.content if isinstance(m.content, str) else str(m.content or "")
+            if text:
+                system_parts.append(text)
+            continue
+        if role not in ("user", "assistant"):
+            if role == "tool" and converted and converted[-1]["role"] == "user":
+                converted[-1]["content"] = (
+                    converted[-1]["content"]
+                    if isinstance(converted[-1]["content"], list)
+                    else [{"type": "text", "text": converted[-1]["content"]}]
+                )
+                converted[-1]["content"].append(
+                    {
+                        "type": "tool_result",
+                        "tool_use_id": m.metadata.get("tool_call_id", "") if m.metadata else "",
+                        "content": m.content
+                        if isinstance(m.content, str)
+                        else str(m.content or ""),
+                    }
+                )
+            continue
+        converted.append(
+            {
+                "role": role,
+                "content": _content_to_anthropic(m.content if m.content is not None else ""),
+            }
+        )
+    merged: list[dict[str, Any]] = []
+    for msg in converted:
+        if merged and merged[-1]["role"] == msg["role"]:
+            prev = merged[-1]["content"]
+            cur = msg["content"]
+            if isinstance(prev, str):
+                prev = [{"type": "text", "text": prev}]
+            if isinstance(cur, str):
+                cur = [{"type": "text", "text": cur}]
+            merged[-1]["content"] = prev + cur
+        else:
+            merged.append(msg)
+    if merged and merged[0]["role"] != "user":
+        merged.insert(0, {"role": "user", "content": [{"type": "text", "text": "(continue)"}]})
+    system = "\n\n".join(system_parts) if system_parts else None
+    return system, merged
+def _tools_to_anthropic(tools: list[dict[str, Any]] | None) -> list[dict[str, Any]] | None:
+    if not tools:
+        return None
+    out: list[dict[str, Any]] = []
+    for t in tools:
+        if t.get("type") == "function" and "function" in t:
+            fn = t["function"]
+            out.append(
+                {
+                    "name": fn.get("name", ""),
+                    "description": fn.get("description", ""),
+                    "input_schema": fn.get("parameters", {"type": "object", "properties": {}}),
+                }
+            )
+        else:
+            out.append(t)
+    return out
+def _parse_anthropic_response(data: dict[str, Any], model: str) -> GenerationResponse:
+    content = data.get("content", [])
+    text_parts: list[str] = []
+    tool_calls: list[ToolCall] = []
+    thinking_parts: list[str] = []
+    for block in content:
+        kind = block.get("type")
+        if kind == "text":
+            text_parts.append(block.get("text", ""))
+        elif kind == "tool_use":
+            tool_calls.append(
+                ToolCall(
+                    id=block.get("id", ""),
+                    name=block.get("name", ""),
+                    arguments=json.dumps(block.get("input", {})),
+                )
+            )
+        elif kind == "thinking":
+            thinking_parts.append(block.get("thinking", ""))
+    usage = data.get("usage", {})
+    return GenerationResponse(
+        content="\n".join(text_parts),
+        model=model,
+        finish_reason=data.get("stop_reason"),
+        tool_calls=tool_calls or None,
+        usage={
+            "prompt_tokens": usage.get("input_tokens", 0),
+            "completion_tokens": usage.get("output_tokens", 0),
+            "total_tokens": usage.get("input_tokens", 0) + usage.get("output_tokens", 0),
+        }
+        if usage
+        else None,
+        reasoning_content="\n".join(thinking_parts),
+    )
+class AnthropicSDK(BaseLLMSDK):
+    def __init__(self, api_key: str, base_url: str | None = None, **_: Any):
+        url: str = base_url or ANTHROPIC_API_ROOT
+        super().__init__(api_key, url)
+        self._client: httpx.AsyncClient | None = None
+    @property
+    def client(self) -> httpx.AsyncClient:
+        if self._client is None:
+            assert self.base_url is not None
+            self._client = httpx.AsyncClient(
+                base_url=self.base_url,
+                timeout=httpx.Timeout(60.0, read=300.0),
+                headers={
+                    "x-api-key": self.api_key,
+                    "anthropic-version": ANTHROPIC_VERSION,
+                    "content-type": "application/json",
+                },
+            )
+        return self._client
+    def _resolve_model(self, config: GenerationConfig) -> str:
+        model = (config.model or "").strip() or os.getenv("VTX_MODEL", "").strip()
+        if model:
+            return model
+        return "claude-3-5-sonnet-latest"
+    def _build_payload(
+        self,
+        messages: list[Message],
+        config: GenerationConfig,
+        tools: list[dict[str, Any]] | None = None,
+    ) -> dict[str, Any]:
+        system, converted = _messages_to_anthropic(messages)
+        model = self._resolve_model(config)
+        payload: dict[str, Any] = {
+            "model": model,
+            "max_tokens": config.max_tokens or _MAX_TOKENS_DEFAULT,
+            "messages": converted,
+        }
+        if system:
+            payload["system"] = system
+        if config.temperature is not None:
+            payload["temperature"] = config.temperature
+        if config.top_p is not None:
+            payload["top_p"] = config.top_p
+        if config.stop_sequences:
+            payload["stop_sequences"] = config.stop_sequences
+        anthropic_tools = _tools_to_anthropic(tools)
+        if anthropic_tools:
+            payload["tools"] = anthropic_tools
+            tc = config.tool_choice
+            if isinstance(tc, str) and tc in ("auto", "any", "none"):
+                payload["tool_choice"] = {"type": tc}
+            elif isinstance(tc, dict):
+                payload["tool_choice"] = tc
+        return payload
+    async def generate(
+        self, messages: list[Message], config: GenerationConfig, stream: bool = False
+    ) -> GenerationResponse | AsyncGenerator:
+        if stream:
+            return self._generate_stream(messages, config)
+        return await self._generate_blocking(messages, config)
+    async def _generate_blocking(
+        self,
+        messages: list[Message],
+        config: GenerationConfig,
+        tools: list[dict[str, Any]] | None = None,
+    ) -> GenerationResponse:
+        payload = self._build_payload(messages, config, tools)
+        last_exc: Exception | None = None
+        for attempt in range(_MAX_RETRIES):
+            try:
+                resp = await self.client.post("/v1/messages", json=payload)
+                if resp.status_code >= 400:
+                    if _is_transient(
+                        httpx.HTTPStatusError("err", request=resp.request, response=resp)
+                    ):
+                        raise httpx.HTTPStatusError("err", request=resp.request, response=resp)
+                    body = resp.text
+                    raise RuntimeError(f"Anthropic API error {resp.status_code}: {body[:300]}")
+                data = resp.json()
+                return _parse_anthropic_response(data, payload["model"])
+            except Exception as exc:
+                last_exc = exc
+                if not _is_transient(exc) or attempt == _MAX_RETRIES - 1:
+                    raise
+                delay = _RETRY_BASE_DELAY * (2**attempt)
+                logger.warning(
+                    "Anthropic transient error (attempt %d/%d), retrying in %.1fs: %s",
+                    attempt + 1,
+                    _MAX_RETRIES,
+                    delay,
+                    str(exc)[:200],
+                )
+                await asyncio.sleep(delay)
+        if last_exc:
+            raise last_exc
+        raise RuntimeError("unreachable")
+    async def _generate_stream(
+        self, messages: list[Message], config: GenerationConfig
+    ) -> AsyncGenerator[dict[str, Any], None]:
+        payload = self._build_payload(messages, config)
+        payload["stream"] = True
+        async with self.client.stream("POST", "/v1/messages", json=payload) as resp:
+            if resp.status_code >= 400:
+                body = await resp.aread()
+                raise RuntimeError(
+                    f"Anthropic API error {resp.status_code}: "
+                    f"{body.decode('utf-8', errors='replace')[:300]}"
+                )
+            content_buf: list[str] = []
+            tool_calls: dict[int, dict[str, Any]] = {}
+            input_tokens = 0
+            output_tokens = 0
+            current_block: dict[str, Any] | None = None
+            async for line in resp.aiter_lines():
+                if not line:
+                    continue
+                if line.startswith("data: "):
+                    raw = line[len("data: ") :]
+                    if raw.strip() == "[DONE]":
+                        break
+                    try:
+                        ev = json.loads(raw)
+                    except json.JSONDecodeError:
+                        continue
+                    et = ev.get("type", "")
+                    if et == "message_start":
+                        usage = (ev.get("message") or {}).get("usage") or {}
+                        input_tokens = usage.get("input_tokens", 0)
+                    elif et == "content_block_start":
+                        current_block = ev.get("content_block", {})
+                    elif et == "content_block_delta":
+                        delta = ev.get("delta", {})
+                        if delta.get("type") == "text_delta":
+                            text = delta.get("text", "")
+                            content_buf.append(text)
+                            yield {"type": "content", "content": text}
+                        elif delta.get("type") == "input_json_delta":
+                            idx = ev.get("index", 0)
+                            tc = tool_calls.setdefault(
+                                idx,
+                                {
+                                    "id": (current_block or {}).get("id", ""),
+                                    "name": (current_block or {}).get("name", ""),
+                                    "arguments": "",
+                                },
+                            )
+                            tc["arguments"] += delta.get("partial_json", "")
+                    elif et == "content_block_stop":
+                        if current_block and current_block.get("type") == "tool_use":
+                            idx = ev.get("index", 0)
+                            tool_calls.setdefault(
+                                idx,
+                                {
+                                    "id": current_block.get("id", ""),
+                                    "name": current_block.get("name", ""),
+                                    "arguments": "",
+                                },
+                            )
+                    elif et == "message_delta":
+                        usage = ev.get("usage", {})
+                        output_tokens = usage.get("output_tokens", 0)
+                    elif et == "message_stop":
+                        break
+                    current_block = None
+        if tool_calls:
+            calls = [
+                ToolCall(id=tc["id"], name=tc["name"], arguments=tc["arguments"] or "{}")
+                for tc in tool_calls.values()
+            ]
+            yield {"type": "tool_calls", "tool_calls": calls}
+        if input_tokens or output_tokens:
+            yield {
+                "type": "usage",
+                "usage": {
+                    "prompt_tokens": input_tokens,
+                    "completion_tokens": output_tokens,
+                    "total_tokens": input_tokens + output_tokens,
+                },
+            }
+    async def generate_with_tools(
+        self,
+        messages: list[Message],
+        tools: list[dict],
+        config: GenerationConfig,
+        stream: bool = False,
+    ) -> GenerationResponse | AsyncGenerator:
+        return await self._generate_blocking(messages, config, tools=tools)
+    def get_available_models(self) -> list[str]:
+        return ["claude-3-5-sonnet-latest", "claude-3-5-haiku-latest", "claude-3-opus-latest"]
+    async def aclose(self) -> None:
+        if self._client is not None:
+            await self._client.aclose()
+            self._client = None