PyPI - vtx-coding-agent - Versions diffs - 0.1.1__py3-none-any.whl - Mend

vtx-coding-agent 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (117) hide show

vtx/__init__.py +63 -0
vtx/async_utils.py +40 -0
vtx/builtin_skills/github/SKILL.md +139 -0
vtx/builtin_skills/init/SKILL.md +74 -0
vtx/builtin_skills/review/SKILL.md +73 -0
vtx/builtin_skills/skill-builder/SKILL.md +133 -0
vtx/cli.py +90 -0
vtx/config.py +741 -0
vtx/context/__init__.py +15 -0
vtx/context/_xml.py +8 -0
vtx/context/agent_mds.py +128 -0
vtx/context/git.py +64 -0
vtx/context/loader.py +41 -0
vtx/context/skills.py +423 -0
vtx/core/__init__.py +47 -0
vtx/core/compaction.py +89 -0
vtx/core/errors.py +17 -0
vtx/core/handoff.py +51 -0
vtx/core/scratchpad.py +54 -0
vtx/core/types.py +197 -0
vtx/defaults/__init__.py +0 -0
vtx/defaults/config.yml +53 -0
vtx/diff_display.py +12 -0
vtx/events.py +224 -0
vtx/gh_cli.py +82 -0
vtx/git_branch.py +90 -0
vtx/headless.py +127 -0
vtx/llm/__init__.py +93 -0
vtx/llm/base.py +217 -0
vtx/llm/context_length.py +150 -0
vtx/llm/dynamic_models.py +735 -0
vtx/llm/model_fetcher.py +279 -0
vtx/llm/models.py +78 -0
vtx/llm/oauth/__init__.py +59 -0
vtx/llm/oauth/copilot.py +358 -0
vtx/llm/oauth/dynamic.py +236 -0
vtx/llm/oauth/openai.py +400 -0
vtx/llm/phase_parser.py +270 -0
vtx/llm/provider.yaml +280 -0
vtx/llm/provider_catalog.py +230 -0
vtx/llm/providers/__init__.py +45 -0
vtx/llm/providers/anthropic_sdk.py +256 -0
vtx/llm/providers/mock.py +249 -0
vtx/llm/providers/openai_sdk.py +246 -0
vtx/llm/providers/sanitize.py +14 -0
vtx/llm/sdk/__init__.py +13 -0
vtx/llm/sdk/anthropic.py +382 -0
vtx/llm/sdk/base.py +82 -0
vtx/llm/sdk/openai.py +344 -0
vtx/llm/tool_parser.py +161 -0
vtx/loop.py +272 -0
vtx/notify.py +109 -0
vtx/permissions.py +114 -0
vtx/prompts/__init__.py +45 -0
vtx/prompts/builder.py +86 -0
vtx/prompts/env.py +58 -0
vtx/prompts/identity.py +166 -0
vtx/prompts/tooling.py +36 -0
vtx/py.typed +0 -0
vtx/runtime.py +580 -0
vtx/session.py +868 -0
vtx/sounds/completion.wav +0 -0
vtx/sounds/error.wav +0 -0
vtx/sounds/permission.wav +0 -0
vtx/themes.py +1104 -0
vtx/tools/__init__.py +68 -0
vtx/tools/_read_image.py +106 -0
vtx/tools/_tool_utils.py +90 -0
vtx/tools/base.py +36 -0
vtx/tools/bash.py +371 -0
vtx/tools/edit.py +261 -0
vtx/tools/find.py +132 -0
vtx/tools/read.py +238 -0
vtx/tools/skill.py +278 -0
vtx/tools/web.py +238 -0
vtx/tools/write.py +88 -0
vtx/tools_manager.py +216 -0
vtx/turn.py +789 -0
vtx/ui/__init__.py +0 -0
vtx/ui/agent_runner.py +417 -0
vtx/ui/app.py +665 -0
vtx/ui/app_protocol.py +29 -0
vtx/ui/autocomplete.py +440 -0
vtx/ui/blocks.py +735 -0
vtx/ui/chat.py +613 -0
vtx/ui/clipboard.py +59 -0
vtx/ui/commands/__init__.py +100 -0
vtx/ui/commands/auth.py +306 -0
vtx/ui/commands/base.py +122 -0
vtx/ui/commands/models.py +144 -0
vtx/ui/commands/sessions.py +388 -0
vtx/ui/commands/settings.py +286 -0
vtx/ui/completion_ui.py +313 -0
vtx/ui/export.py +703 -0
vtx/ui/floating_list.py +370 -0
vtx/ui/formatting.py +287 -0
vtx/ui/input.py +760 -0
vtx/ui/latex.py +349 -0
vtx/ui/launch.py +108 -0
vtx/ui/path_complete.py +228 -0
vtx/ui/prompt_history.py +102 -0
vtx/ui/queue_ui.py +141 -0
vtx/ui/selection_mode.py +18 -0
vtx/ui/session_ui.py +235 -0
vtx/ui/startup.py +124 -0
vtx/ui/styles.py +327 -0
vtx/ui/tool_output.py +34 -0
vtx/ui/tree.py +437 -0
vtx/ui/welcome.py +51 -0
vtx/ui/widgets.py +558 -0
vtx/update_check.py +49 -0
vtx/version.py +22 -0
vtx_coding_agent-0.1.1.dist-info/METADATA +259 -0
vtx_coding_agent-0.1.1.dist-info/RECORD +117 -0
vtx_coding_agent-0.1.1.dist-info/WHEEL +4 -0
vtx_coding_agent-0.1.1.dist-info/entry_points.txt +2 -0
vtx_coding_agent-0.1.1.dist-info/licenses/LICENSE +201 -0

vtx/llm/oauth/openai.py ADDED Viewed

@@ -0,0 +1,400 @@
+"""
+OpenAI OAuth flow (ChatGPT/Codex-style OAuth).
+Stores OAuth credentials locally and provides token refresh support.
+"""
+import asyncio
+import base64
+import contextlib
+import hashlib
+import json
+import secrets
+import time
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+from urllib.parse import parse_qs, urlencode, urlparse
+import aiohttp
+from vtx import get_config_dir
+_CLIENT_ID = "app_EMoamEEZ73f0CkXaXp7hrann"
+_AUTHORIZE_URL = "https://auth.openai.com/oauth/authorize"
+_TOKEN_URL = "https://auth.openai.com/oauth/token"
+_REDIRECT_URI = "http://localhost:1455/auth/callback"
+_SCOPE = "openid profile email offline_access"
+_JWT_CLAIM_PATH = "https://api.openai.com/auth"
+_SUCCESS_HTML = """<!doctype html>
+<html lang=\"en\">
+<head><meta charset=\"utf-8\" /><title>Authentication successful</title></head>
+<body><p>Authentication successful. Return to your terminal to continue.</p></body>
+</html>"""
+@dataclass
+class OpenAICredentials:
+    refresh: str
+    access: str
+    expires: int
+    account_id: str
+def get_openai_auth_path() -> Path:
+    return get_config_dir() / "openai_auth.json"
+def load_openai_credentials() -> OpenAICredentials | None:
+    path = get_openai_auth_path()
+    if not path.exists():
+        return None
+    try:
+        data = json.loads(path.read_text())
+        return OpenAICredentials(
+            refresh=data["refresh"],
+            access=data["access"],
+            expires=data["expires"],
+            account_id=data["account_id"],
+        )
+    except (json.JSONDecodeError, KeyError):
+        return None
+def save_openai_credentials(creds: OpenAICredentials) -> None:
+    path = get_openai_auth_path()
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(
+        json.dumps(
+            {
+                "refresh": creds.refresh,
+                "access": creds.access,
+                "expires": creds.expires,
+                "account_id": creds.account_id,
+            },
+            indent=2,
+        )
+    )
+    path.chmod(0o600)
+def clear_openai_credentials() -> None:
+    path = get_openai_auth_path()
+    if path.exists():
+        path.unlink()
+def is_openai_logged_in() -> bool:
+    return load_openai_credentials() is not None
+def _base64url_encode(data: bytes) -> str:
+    return base64.urlsafe_b64encode(data).rstrip(b"=").decode()
+def _generate_pkce() -> tuple[str, str]:
+    verifier = _base64url_encode(secrets.token_bytes(32))
+    challenge = _base64url_encode(hashlib.sha256(verifier.encode()).digest())
+    return verifier, challenge
+def _create_state() -> str:
+    return secrets.token_hex(16)
+def _decode_jwt_payload(token: str) -> dict[str, Any] | None:
+    try:
+        parts = token.split(".")
+        if len(parts) != 3:
+            return None
+        payload = parts[1]
+        if payload is None:
+            return None
+        padded = payload + "=" * (-len(payload) % 4)
+        decoded = base64.urlsafe_b64decode(padded.encode()).decode()
+        return json.loads(decoded)
+    except Exception:
+        return None
+def _extract_account_id(access_token: str) -> str | None:
+    payload = _decode_jwt_payload(access_token)
+    if not payload:
+        return None
+    auth = payload.get(_JWT_CLAIM_PATH)
+    if not isinstance(auth, dict):
+        return None
+    account_id = auth.get("chatgpt_account_id")
+    return account_id if isinstance(account_id, str) and account_id else None
+def _build_authorize_url(code_challenge: str, state: str, originator: str) -> str:
+    query = urlencode(
+        {
+            "response_type": "code",
+            "client_id": _CLIENT_ID,
+            "redirect_uri": _REDIRECT_URI,
+            "scope": _SCOPE,
+            "code_challenge": code_challenge,
+            "code_challenge_method": "S256",
+            "state": state,
+            "id_token_add_organizations": "true",
+            "codex_cli_simplified_flow": "true",
+            "originator": originator,
+        }
+    )
+    return f"{_AUTHORIZE_URL}?{query}"
+async def _exchange_code_for_tokens(code: str, verifier: str) -> OpenAICredentials:
+    async with (
+        aiohttp.ClientSession() as session,
+        session.post(
+            _TOKEN_URL,
+            headers={"Content-Type": "application/x-www-form-urlencoded"},
+            data={
+                "grant_type": "authorization_code",
+                "client_id": _CLIENT_ID,
+                "code": code,
+                "code_verifier": verifier,
+                "redirect_uri": _REDIRECT_URI,
+            },
+        ) as response,
+    ):
+        if response.status >= 400:
+            text = await response.text()
+            raise RuntimeError(f"OpenAI OAuth token exchange failed ({response.status}): {text}")
+        data = await response.json()
+    access = data.get("access_token")
+    refresh = data.get("refresh_token")
+    expires_in = data.get("expires_in")
+    if (
+        not isinstance(access, str)
+        or not isinstance(refresh, str)
+        or not isinstance(expires_in, int)
+    ):
+        raise RuntimeError("OpenAI OAuth token response missing required fields")
+    account_id = _extract_account_id(access)
+    if not account_id:
+        raise RuntimeError("Failed to extract chatgpt_account_id from OpenAI OAuth token")
+    return OpenAICredentials(
+        access=access,
+        refresh=refresh,
+        expires=int(time.time() * 1000) + expires_in * 1000,
+        account_id=account_id,
+    )
+async def refresh_openai_token(creds: OpenAICredentials) -> OpenAICredentials:
+    async with (
+        aiohttp.ClientSession() as session,
+        session.post(
+            _TOKEN_URL,
+            headers={"Content-Type": "application/x-www-form-urlencoded"},
+            data={
+                "grant_type": "refresh_token",
+                "refresh_token": creds.refresh,
+                "client_id": _CLIENT_ID,
+            },
+        ) as response,
+    ):
+        if response.status >= 400:
+            text = await response.text()
+            raise RuntimeError(f"OpenAI OAuth token refresh failed ({response.status}): {text}")
+        data = await response.json()
+    access = data.get("access_token")
+    refresh = data.get("refresh_token")
+    expires_in = data.get("expires_in")
+    if (
+        not isinstance(access, str)
+        or not isinstance(refresh, str)
+        or not isinstance(expires_in, int)
+    ):
+        raise RuntimeError("OpenAI OAuth refresh response missing required fields")
+    account_id = _extract_account_id(access)
+    if not account_id:
+        raise RuntimeError("Failed to extract chatgpt_account_id from OpenAI OAuth token")
+    refreshed = OpenAICredentials(
+        access=access,
+        refresh=refresh,
+        expires=int(time.time() * 1000) + expires_in * 1000,
+        account_id=account_id,
+    )
+    save_openai_credentials(refreshed)
+    return refreshed
+async def _start_callback_server(state: str) -> tuple[asyncio.AbstractServer, asyncio.Future[str]]:
+    loop = asyncio.get_running_loop()
+    code_future: asyncio.Future[str] = loop.create_future()
+    async def handler(reader: asyncio.StreamReader, writer: asyncio.StreamWriter) -> None:
+        try:
+            raw = await reader.read(4096)
+            request_line = raw.decode(errors="ignore").splitlines()[0] if raw else ""
+            parts = request_line.split()
+            if len(parts) < 2:
+                return
+            path = parts[1]
+            parsed = urlparse(path)
+            query = parse_qs(parsed.query)
+            if parsed.path != "/auth/callback":
+                writer.write(b"HTTP/1.1 404 Not Found\r\nContent-Length: 9\r\n\r\nNot found")
+                await writer.drain()
+                return
+            req_state = (query.get("state") or [None])[0]
+            code = (query.get("code") or [None])[0]
+            if req_state != state or not isinstance(code, str) or not code:
+                writer.write(
+                    b"HTTP/1.1 400 Bad Request\r\nContent-Length: 14\r\n\r\nState mismatch"
+                )
+                await writer.drain()
+                return
+            body = _SUCCESS_HTML.encode()
+            writer.write(
+                b"HTTP/1.1 200 OK\r\nContent-Type: text/html; charset=utf-8\r\n"
+                + f"Content-Length: {len(body)}\r\n\r\n".encode()
+                + body
+            )
+            await writer.drain()
+            if not code_future.done():
+                code_future.set_result(code)
+        finally:
+            writer.close()
+            with contextlib.suppress(Exception):
+                await writer.wait_closed()
+    server = await asyncio.start_server(handler, "localhost", 1455)
+    return server, code_future
+def _parse_manual_input(input_text: str) -> tuple[str | None, str | None]:
+    text = input_text.strip()
+    if not text:
+        return None, None
+    try:
+        parsed = urlparse(text)
+        if parsed.scheme and parsed.netloc:
+            query = parse_qs(parsed.query)
+            return (query.get("code") or [None])[0], (query.get("state") or [None])[0]
+    except Exception:
+        pass
+    if "code=" in text:
+        query = parse_qs(text)
+        return (query.get("code") or [None])[0], (query.get("state") or [None])[0]
+    if "#" in text:
+        code, st = text.split("#", 1)
+        return code or None, st or None
+    return text, None
+async def login(
+    on_auth_url: Any | None = None, on_manual_input: Any | None = None, originator: str = "vtx"
+) -> OpenAICredentials:
+    verifier, challenge = _generate_pkce()
+    state = _create_state()
+    auth_url = _build_authorize_url(challenge, state, originator)
+    if on_auth_url:
+        on_auth_url(auth_url)
+    code: str | None = None
+    server: asyncio.AbstractServer | None = None
+    callback_awaitable: asyncio.Future[str] | None = None
+    manual_task: asyncio.Task[Any] | None = None
+    try:
+        try:
+            server, callback_awaitable = await _start_callback_server(state)
+        except OSError:
+            callback_awaitable = None
+        if on_manual_input:
+            manual_task = asyncio.create_task(on_manual_input())
+        if not callback_awaitable and not manual_task:
+            raise RuntimeError(
+                "OpenAI OAuth failed: could not start callback server on port 1455 "
+                "and no manual input handler provided."
+            )
+        if callback_awaitable and manual_task:
+            done, pending = await asyncio.wait(
+                {callback_awaitable, manual_task}, return_when=asyncio.FIRST_COMPLETED, timeout=300
+            )
+            for task in pending:
+                task.cancel()
+            if callback_awaitable in done:
+                code = callback_awaitable.result()
+            elif manual_task in done:
+                manual_input = manual_task.result()
+                parsed_code, parsed_state = _parse_manual_input(str(manual_input))
+                if parsed_state and parsed_state != state:
+                    raise RuntimeError("OpenAI OAuth state mismatch")
+                code = parsed_code
+        elif callback_awaitable:
+            code = await asyncio.wait_for(callback_awaitable, timeout=300)
+        elif manual_task:
+            manual_input = await manual_task
+            parsed_code, parsed_state = _parse_manual_input(str(manual_input))
+            if parsed_state and parsed_state != state:
+                raise RuntimeError("OpenAI OAuth state mismatch")
+            code = parsed_code
+        if not code:
+            raise TimeoutError(
+                "OpenAI OAuth timed out waiting for authorization callback on port 1455."
+            )
+        creds = await _exchange_code_for_tokens(code, verifier)
+        save_openai_credentials(creds)
+        return creds
+    finally:
+        if callback_awaitable and not callback_awaitable.done():
+            callback_awaitable.cancel()
+        if manual_task and not manual_task.done():
+            manual_task.cancel()
+        if server:
+            server.close()
+            with contextlib.suppress(Exception):
+                await server.wait_closed()
+async def get_valid_openai_credentials() -> OpenAICredentials | None:
+    creds = load_openai_credentials()
+    if not creds:
+        return None
+    if time.time() * 1000 >= creds.expires - 60_000:
+        try:
+            creds = await refresh_openai_token(creds)
+        except Exception:
+            return None
+    return creds
+async def get_valid_openai_token() -> str | None:
+    creds = await get_valid_openai_credentials()
+    return creds.access if creds else None

vtx/llm/phase_parser.py ADDED Viewed

@@ -0,0 +1,270 @@
+"""
+Real-time streaming parser for ``<think>`` blocks embedded inside
+``delta.content`` (DeepSeek R1, MiniMax M3, Qwen3, GLM, …).
+These OpenAI-compat gateways follow the chat-completions spec but, unlike
+OpenAI's own ``o1``/``o3`` series, they don't expose a separate
+``reasoning_content`` field. They wrap their chain-of-thought inside
+``<think>`` tags in the regular content stream.
+If we let that through to the TUI's Rich-based markdown renderer,
+``<think>`` is interpreted as the start of a raw HTML block, the entire
+response gets swallowed, and the user sees an empty chat log. So we
+have to detect and split the blocks out *before* they reach the renderer.
+The parser is real-time (handles tags split across SSE chunks) and
+emits typed phase events as boundaries are crossed, so the consumer can
+update the TUI the moment the model transitions between phases — no
+buffering the full response to figure out where thinking ends.
+For multi-turn conversations, the extracted thinking is round-tripped
+through ``ThinkingContent(signature=INLINE_THINK_SIGNATURE)`` and then
+re-inlined into the assistant content on the next turn so the model sees
+its own reasoning in the original ``<think>`` wire format.
+"""
+from __future__ import annotations
+from collections.abc import Iterator
+from dataclasses import dataclass
+from typing import Literal, final
+INLINE_THINK_SIGNATURE = "_inline"
+_OPEN_TAG = "<think>"
+_CLOSE_TAG = "</think>"
+@final
+@dataclass(frozen=True)
+class ThinkStart:
+    """The ``<think>`` opener was just observed."""
+@final
+@dataclass(frozen=True)
+class ThinkDelta:
+    """A chunk of thinking text streamed in real-time."""
+    text: str
+@final
+@dataclass(frozen=True)
+class ThinkEnd:
+    """The ``</think>`` closer was just observed."""
+    full_thinking: str
+@final
+@dataclass(frozen=True)
+class ResponseStart:
+    """Response text is about to stream."""
+@final
+@dataclass(frozen=True)
+class ResponseDelta:
+    """A chunk of response text."""
+    text: str
+@final
+@dataclass(frozen=True)
+class ResponseEnd:
+    """Stream finished cleanly."""
+PhaseEvent = ThinkStart | ThinkDelta | ThinkEnd | ResponseStart | ResponseDelta | ResponseEnd
+Phase = Literal["idle", "thinking", "responding"]
+def _is_prefix_of_close_tag(buffer_tail: str) -> bool:
+    """Check if *buffer_tail* could be the beginning of a ``</think>``
+    tag straddling the next chunk. Returns True if the tail matches a
+    prefix of ``</think>``."""
+    return _CLOSE_TAG.startswith(buffer_tail) or buffer_tail.startswith(
+        _CLOSE_TAG[: len(buffer_tail)]
+    )
+def _is_prefix_of_open_tag(buffer_tail: str) -> bool:
+    """Check if *buffer_tail* could be the beginning of a ``<think>``
+    tag straddling the next chunk."""
+    return _OPEN_TAG.startswith(buffer_tail) or buffer_tail.startswith(
+        _OPEN_TAG[: len(buffer_tail)]
+    )
+@final
+class ThinkingPhaseParser:
+    """Real-time streaming parser for ``<think>`` blocks in ``delta.content``."""
+    __slots__ = ("_buffer", "_deferred_think", "_phase", "_response_started", "_think_buffer")
+    def __init__(self) -> None:
+        self._buffer: str = ""
+        self._phase: Phase = "idle"
+        self._think_buffer: list[str] = []
+        # When set, the next feed() call will emit ThinkDelta + ThinkEnd for
+        # the deferred think content before processing the new text.  This
+        # is only set when </think> was found in the same chunk as ThinkStart
+        # (the opener-split scenario) so the caller can distinguish the two
+        # events across chunk boundaries.
+        self._deferred_think: str | None = None
+        self._response_started: bool = False
+    @property
+    def phase(self) -> Phase:
+        return self._phase
+    def feed(self, text: str) -> Iterator[PhaseEvent]:
+        if not text:
+            return
+        # If the previous feed deferred ThinkDelta+ThinkEnd (opener-split case),
+        # emit them now before processing the new chunk.
+        if self._deferred_think is not None:
+            full = self._deferred_think
+            self._deferred_think = None
+            if full:
+                yield ThinkDelta(text=full)
+            yield ThinkEnd(full_thinking=full)
+            # The buffered remainder after </think> was already stashed;
+            # process it as response text together with the new chunk below.
+        # Detect opener-split: the buffer held a partial <think> prefix from
+        # the previous chunk.  We use this to defer ThinkDelta+ThinkEnd so
+        # callers see ThinkStart on its own chunk boundary.
+        opener_was_split = (
+            self._phase != "thinking"
+            and bool(self._buffer)
+            and _is_prefix_of_open_tag(self._buffer)
+        )
+        self._buffer += text
+        open_tag = _OPEN_TAG
+        close_tag = _CLOSE_TAG
+        open_tag_len = len(open_tag)
+        close_tag_len = len(close_tag)
+        while True:
+            if self._phase == "thinking":
+                end = self._buffer.find(close_tag)
+                if end == -1:
+                    # No close tag yet. Check if the buffer tail could be
+                    # the start of a partial close tag.
+                    for i in range(min(close_tag_len - 1, len(self._buffer)), 0, -1):
+                        tail = self._buffer[-i:]
+                        if _is_prefix_of_close_tag(tail):
+                            head = self._buffer[:-i]
+                            self._buffer = tail
+                            if head:
+                                self._think_buffer.append(head)
+                                yield ThinkDelta(text=head)
+                            return
+                    # No partial close tag — emit everything.
+                    if self._buffer:
+                        self._think_buffer.append(self._buffer)
+                        yield ThinkDelta(text=self._buffer)
+                        self._buffer = ""
+                    return
+                think_chunk = self._buffer[:end]
+                remainder = self._buffer[end + close_tag_len :].lstrip("\n")
+                if think_chunk:
+                    self._think_buffer.append(think_chunk)
+                full_thinking = "".join(self._think_buffer)
+                self._think_buffer = []
+                self._phase = "responding"
+                self._response_started = False
+                if opener_was_split and think_chunk:
+                    # ThinkStart and </think> both arrived in this feed() call.
+                    # Defer ThinkDelta+ThinkEnd to the next feed() so that the
+                    # caller can observe them as separate chunk events.
+                    self._deferred_think = full_thinking
+                    self._buffer = remainder
+                    return
+                self._buffer = remainder
+                yield ThinkEnd(full_thinking=full_thinking)
+            else:
+                # In "idle" or "responding" — look for an opener.
+                start = self._buffer.find(open_tag)
+                if start == -1:
+                    # Check if the buffer tail could be a partial opener.
+                    for i in range(min(open_tag_len - 1, len(self._buffer)), 0, -1):
+                        tail = self._buffer[-i:]
+                        if _is_prefix_of_open_tag(tail):
+                            head = self._buffer[:-i]
+                            self._buffer = tail
+                            if head:
+                                for ev in self._wrap_response(head):
+                                    yield ev
+                            return
+                    # No partial opener — emit everything.
+                    if self._buffer:
+                        for ev in self._wrap_response(self._buffer):
+                            yield ev
+                        self._buffer = ""
+                    return
+                head = self._buffer[:start]
+                self._buffer = self._buffer[start + open_tag_len :]
+                if head:
+                    for ev in self._wrap_response(head):
+                        yield ev
+                self._phase = "thinking"
+                yield ThinkStart()
+    def flush(self) -> Iterator[PhaseEvent]:
+        # Drain deferred ThinkEnd from the opener-split scenario.
+        # We only emit ThinkEnd here (not ThinkDelta) so _collect() doesn't
+        # double-count; ThinkEnd.full_thinking is the authoritative total.
+        if self._deferred_think is not None:
+            full = self._deferred_think
+            self._deferred_think = None
+            yield ThinkEnd(full_thinking=full)
+            # Fall through: emit any remaining buffered response + ResponseEnd.
+        if self._phase == "thinking":
+            if self._buffer:
+                self._think_buffer.append(self._buffer)
+                self._buffer = ""
+            full_thinking = "".join(self._think_buffer)
+            self._think_buffer = []
+            self._phase = "idle"
+            yield ThinkEnd(full_thinking=full_thinking)
+            return
+        if self._buffer:
+            head = self._buffer
+            self._buffer = ""
+            yield from self._wrap_response(head)
+        self._phase = "idle"
+        self._response_started = False
+        yield ResponseEnd()
+    def _wrap_response(self, text: str) -> Iterator[PhaseEvent]:
+        if not text:
+            return
+        if not self._response_started:
+            self._response_started = True
+            self._phase = "responding"
+            yield ResponseStart()
+        yield ResponseDelta(text=text)
+__all__ = [
+    "INLINE_THINK_SIGNATURE",
+    "Phase",
+    "PhaseEvent",
+    "ResponseDelta",
+    "ResponseEnd",
+    "ResponseStart",
+    "ThinkDelta",
+    "ThinkEnd",
+    "ThinkStart",
+    "ThinkingPhaseParser",
+]