vtx-coding-agent 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. vtx/__init__.py +63 -0
  2. vtx/async_utils.py +40 -0
  3. vtx/builtin_skills/github/SKILL.md +139 -0
  4. vtx/builtin_skills/init/SKILL.md +74 -0
  5. vtx/builtin_skills/review/SKILL.md +73 -0
  6. vtx/builtin_skills/skill-builder/SKILL.md +133 -0
  7. vtx/cli.py +90 -0
  8. vtx/config.py +741 -0
  9. vtx/context/__init__.py +15 -0
  10. vtx/context/_xml.py +8 -0
  11. vtx/context/agent_mds.py +128 -0
  12. vtx/context/git.py +64 -0
  13. vtx/context/loader.py +41 -0
  14. vtx/context/skills.py +423 -0
  15. vtx/core/__init__.py +47 -0
  16. vtx/core/compaction.py +89 -0
  17. vtx/core/errors.py +17 -0
  18. vtx/core/handoff.py +51 -0
  19. vtx/core/scratchpad.py +54 -0
  20. vtx/core/types.py +197 -0
  21. vtx/defaults/__init__.py +0 -0
  22. vtx/defaults/config.yml +53 -0
  23. vtx/diff_display.py +12 -0
  24. vtx/events.py +224 -0
  25. vtx/gh_cli.py +82 -0
  26. vtx/git_branch.py +90 -0
  27. vtx/headless.py +127 -0
  28. vtx/llm/__init__.py +93 -0
  29. vtx/llm/base.py +217 -0
  30. vtx/llm/context_length.py +150 -0
  31. vtx/llm/dynamic_models.py +735 -0
  32. vtx/llm/model_fetcher.py +279 -0
  33. vtx/llm/models.py +78 -0
  34. vtx/llm/oauth/__init__.py +59 -0
  35. vtx/llm/oauth/copilot.py +358 -0
  36. vtx/llm/oauth/dynamic.py +236 -0
  37. vtx/llm/oauth/openai.py +400 -0
  38. vtx/llm/phase_parser.py +270 -0
  39. vtx/llm/provider.yaml +280 -0
  40. vtx/llm/provider_catalog.py +230 -0
  41. vtx/llm/providers/__init__.py +45 -0
  42. vtx/llm/providers/anthropic_sdk.py +256 -0
  43. vtx/llm/providers/mock.py +249 -0
  44. vtx/llm/providers/openai_sdk.py +246 -0
  45. vtx/llm/providers/sanitize.py +14 -0
  46. vtx/llm/sdk/__init__.py +13 -0
  47. vtx/llm/sdk/anthropic.py +382 -0
  48. vtx/llm/sdk/base.py +82 -0
  49. vtx/llm/sdk/openai.py +344 -0
  50. vtx/llm/tool_parser.py +161 -0
  51. vtx/loop.py +272 -0
  52. vtx/notify.py +109 -0
  53. vtx/permissions.py +114 -0
  54. vtx/prompts/__init__.py +45 -0
  55. vtx/prompts/builder.py +86 -0
  56. vtx/prompts/env.py +58 -0
  57. vtx/prompts/identity.py +166 -0
  58. vtx/prompts/tooling.py +36 -0
  59. vtx/py.typed +0 -0
  60. vtx/runtime.py +580 -0
  61. vtx/session.py +868 -0
  62. vtx/sounds/completion.wav +0 -0
  63. vtx/sounds/error.wav +0 -0
  64. vtx/sounds/permission.wav +0 -0
  65. vtx/themes.py +1104 -0
  66. vtx/tools/__init__.py +68 -0
  67. vtx/tools/_read_image.py +106 -0
  68. vtx/tools/_tool_utils.py +90 -0
  69. vtx/tools/base.py +36 -0
  70. vtx/tools/bash.py +371 -0
  71. vtx/tools/edit.py +261 -0
  72. vtx/tools/find.py +132 -0
  73. vtx/tools/read.py +238 -0
  74. vtx/tools/skill.py +278 -0
  75. vtx/tools/web.py +238 -0
  76. vtx/tools/write.py +88 -0
  77. vtx/tools_manager.py +216 -0
  78. vtx/turn.py +789 -0
  79. vtx/ui/__init__.py +0 -0
  80. vtx/ui/agent_runner.py +417 -0
  81. vtx/ui/app.py +665 -0
  82. vtx/ui/app_protocol.py +29 -0
  83. vtx/ui/autocomplete.py +440 -0
  84. vtx/ui/blocks.py +735 -0
  85. vtx/ui/chat.py +613 -0
  86. vtx/ui/clipboard.py +59 -0
  87. vtx/ui/commands/__init__.py +100 -0
  88. vtx/ui/commands/auth.py +306 -0
  89. vtx/ui/commands/base.py +122 -0
  90. vtx/ui/commands/models.py +144 -0
  91. vtx/ui/commands/sessions.py +388 -0
  92. vtx/ui/commands/settings.py +286 -0
  93. vtx/ui/completion_ui.py +313 -0
  94. vtx/ui/export.py +703 -0
  95. vtx/ui/floating_list.py +370 -0
  96. vtx/ui/formatting.py +287 -0
  97. vtx/ui/input.py +760 -0
  98. vtx/ui/latex.py +349 -0
  99. vtx/ui/launch.py +108 -0
  100. vtx/ui/path_complete.py +228 -0
  101. vtx/ui/prompt_history.py +102 -0
  102. vtx/ui/queue_ui.py +141 -0
  103. vtx/ui/selection_mode.py +18 -0
  104. vtx/ui/session_ui.py +235 -0
  105. vtx/ui/startup.py +124 -0
  106. vtx/ui/styles.py +327 -0
  107. vtx/ui/tool_output.py +34 -0
  108. vtx/ui/tree.py +437 -0
  109. vtx/ui/welcome.py +51 -0
  110. vtx/ui/widgets.py +558 -0
  111. vtx/update_check.py +49 -0
  112. vtx/version.py +22 -0
  113. vtx_coding_agent-0.1.1.dist-info/METADATA +259 -0
  114. vtx_coding_agent-0.1.1.dist-info/RECORD +117 -0
  115. vtx_coding_agent-0.1.1.dist-info/WHEEL +4 -0
  116. vtx_coding_agent-0.1.1.dist-info/entry_points.txt +2 -0
  117. vtx_coding_agent-0.1.1.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,246 @@
1
+ """OpenAI SDK provider - wraps the SDK layer into vtx's BaseProvider interface."""
2
+
3
+ from collections.abc import AsyncIterator
4
+ from typing import Any, ClassVar
5
+
6
+ from openai import APIConnectionError, APIStatusError, RateLimitError
7
+
8
+ from ...core.errors import format_error
9
+ from ...core.types import (
10
+ AssistantMessage,
11
+ ImageContent,
12
+ Message,
13
+ StopReason,
14
+ StreamDone,
15
+ StreamError,
16
+ StreamPart,
17
+ TextContent,
18
+ TextPart,
19
+ ThinkingContent,
20
+ ThinkPart,
21
+ ToolCall,
22
+ ToolCallDelta,
23
+ ToolCallStart,
24
+ ToolDefinition,
25
+ ToolResultMessage,
26
+ Usage,
27
+ UserMessage,
28
+ )
29
+ from ..base import BaseProvider, LLMStream, ProviderConfig, resolve_api_key
30
+ from ..sdk.base import GenerationConfig
31
+ from ..sdk.base import Message as SDKMessage
32
+ from ..sdk.openai import OpenAISDK
33
+ from .sanitize import sanitize_surrogates
34
+
35
+
36
+ class OpenAISDKProvider(BaseProvider):
37
+ name = "openai"
38
+ thinking_levels: ClassVar[list[str]] = ["none"]
39
+
40
+ def __init__(self, config: ProviderConfig):
41
+ super().__init__(config)
42
+
43
+ api_key = resolve_api_key(
44
+ config.api_key,
45
+ env_vars=("OPENAI_API_KEY",),
46
+ base_url=config.base_url,
47
+ auth_mode=config.openai_compat_auth_mode,
48
+ )
49
+ if not api_key and (config.provider or "").lower() in {
50
+ "airouter",
51
+ "opencode",
52
+ "kilo",
53
+ "tokenrouter",
54
+ }:
55
+ api_key = self._resolve_dynamic_key_for(config)
56
+ if not api_key:
57
+ raise ValueError(
58
+ f"No API key found for {self.name}. "
59
+ "Set OPENAI_API_KEY environment variable or pass api_key in config, "
60
+ 'or configure llm.auth.openai_compat = "auto"/"none" for local endpoints.'
61
+ )
62
+
63
+ self._sdk = OpenAISDK(api_key=api_key, base_url=config.base_url)
64
+
65
+ @staticmethod
66
+ def _resolve_dynamic_key_for(config: ProviderConfig) -> str | None:
67
+ from ..oauth.dynamic import get_dynamic_api_key
68
+
69
+ return get_dynamic_api_key(config.provider or "")
70
+
71
+ def _convert_messages(
72
+ self, messages: list[Message], system_prompt: str | None
73
+ ) -> list[SDKMessage]:
74
+ result: list[SDKMessage] = []
75
+
76
+ if system_prompt:
77
+ result.append(SDKMessage(role="system", content=sanitize_surrogates(system_prompt)))
78
+
79
+ for msg in messages:
80
+ if isinstance(msg, UserMessage):
81
+ result.append(self._convert_user_message(msg))
82
+ elif isinstance(msg, AssistantMessage):
83
+ result.append(self._convert_assistant_message(msg))
84
+ elif isinstance(msg, ToolResultMessage):
85
+ result.append(self._convert_tool_result(msg))
86
+
87
+ return result
88
+
89
+ def _convert_user_message(self, msg: UserMessage) -> SDKMessage:
90
+ if isinstance(msg.content, str):
91
+ content = sanitize_surrogates(msg.content)
92
+ if not content or content.isspace():
93
+ raise ValueError("User message content cannot be empty or whitespace-only")
94
+ return SDKMessage(role="user", content=content)
95
+
96
+ parts: list[str] = []
97
+ image_parts: list[str] = []
98
+ for item in msg.content:
99
+ if isinstance(item, TextContent):
100
+ text = sanitize_surrogates(item.text)
101
+ if text and not text.isspace():
102
+ parts.append(text)
103
+ elif isinstance(item, ImageContent):
104
+ image_parts.append(f"data:{item.mime_type};base64,{item.data}")
105
+
106
+ content = "\n".join(parts) if parts else ""
107
+ if not content and not image_parts:
108
+ raise ValueError("User message content cannot be empty or whitespace-only")
109
+
110
+ return SDKMessage(role="user", content=content, image_parts=image_parts or None)
111
+
112
+ def _convert_assistant_message(self, msg: AssistantMessage) -> SDKMessage:
113
+ import json
114
+
115
+ from ..phase_parser import INLINE_THINK_SIGNATURE
116
+
117
+ content_parts: list[str] = []
118
+ metadata: dict[str, Any] = {}
119
+ tool_calls: list[dict[str, Any]] = []
120
+ for item in msg.content:
121
+ if isinstance(item, TextContent):
122
+ if item.text.strip():
123
+ content_parts.append(sanitize_surrogates(item.text))
124
+ elif isinstance(item, ThinkingContent):
125
+ if item.signature == INLINE_THINK_SIGNATURE:
126
+ content_parts.append(f"<think>{item.thinking}</think>")
127
+ elif item.signature == "reasoning_content":
128
+ metadata["reasoning_content"] = item.thinking
129
+ elif isinstance(item, ToolCall):
130
+ tool_calls.append(
131
+ {
132
+ "id": item.id,
133
+ "type": "function",
134
+ "function": {"name": item.name, "arguments": json.dumps(item.arguments)},
135
+ }
136
+ )
137
+
138
+ if tool_calls:
139
+ metadata["tool_calls"] = tool_calls
140
+
141
+ return SDKMessage(
142
+ role="assistant",
143
+ content="".join(content_parts) if content_parts else "",
144
+ metadata=metadata or None,
145
+ )
146
+
147
+ def _convert_tool_result(self, msg: ToolResultMessage) -> SDKMessage:
148
+ text_parts = [item.text for item in msg.content if isinstance(item, TextContent)]
149
+ has_images = any(isinstance(item, ImageContent) for item in msg.content)
150
+
151
+ if text_parts:
152
+ content = "\n".join(text_parts)
153
+ elif has_images:
154
+ content = "(see attached image)"
155
+ else:
156
+ content = "(no output)"
157
+
158
+ return SDKMessage(
159
+ role="tool", content=content, metadata={"tool_call_id": msg.tool_call_id}
160
+ )
161
+
162
+ def _convert_tools(self, tools: list[ToolDefinition]) -> list[dict[str, Any]]:
163
+ return [
164
+ {
165
+ "type": "function",
166
+ "function": {
167
+ "name": tool.name,
168
+ "description": tool.description,
169
+ "parameters": tool.parameters,
170
+ },
171
+ }
172
+ for tool in tools
173
+ ]
174
+
175
+ async def _stream_impl(
176
+ self,
177
+ messages: list[Message],
178
+ *,
179
+ system_prompt: str | None = None,
180
+ tools: list[ToolDefinition] | None = None,
181
+ temperature: float | None = None,
182
+ max_tokens: int | None = None,
183
+ ) -> LLMStream:
184
+ sdk_messages = self._convert_messages(messages, system_prompt)
185
+ sdk_tools = self._convert_tools(tools) if tools else None
186
+ temp = temperature if temperature is not None else self.config.temperature
187
+ max_tok = max_tokens if max_tokens is not None else self.config.max_tokens
188
+
189
+ config = GenerationConfig(
190
+ model=self.config.model, temperature=temp or 0.7, max_tokens=max_tok
191
+ )
192
+
193
+ if sdk_tools:
194
+ raw_stream = await self._sdk.generate_with_tools(
195
+ sdk_messages, sdk_tools, config, stream=True
196
+ )
197
+ else:
198
+ raw_stream = await self._sdk.generate(sdk_messages, config, stream=True)
199
+
200
+ llm_stream = LLMStream()
201
+ llm_stream.set_iterator(self._process_stream(raw_stream, llm_stream))
202
+ return llm_stream
203
+
204
+ async def _process_stream(
205
+ self, response: Any, llm_stream: LLMStream
206
+ ) -> AsyncIterator[StreamPart]:
207
+ stop_reason: StopReason = StopReason.STOP
208
+
209
+ try:
210
+ async for chunk in response:
211
+ chunk_type = chunk.get("type")
212
+
213
+ if chunk_type == "usage":
214
+ usage_data = chunk.get("usage", {})
215
+ llm_stream._usage = Usage(
216
+ input_tokens=usage_data.get("prompt_tokens", 0)
217
+ or usage_data.get("input_tokens", 0),
218
+ output_tokens=usage_data.get("completion_tokens", 0)
219
+ or usage_data.get("output_tokens", 0),
220
+ )
221
+ elif chunk_type == "reasoning":
222
+ yield ThinkPart(
223
+ think=chunk.get("content", ""),
224
+ signature=chunk.get("signature", "reasoning_content"),
225
+ )
226
+ elif chunk_type == "text" or chunk_type == "content":
227
+ yield TextPart(text=chunk.get("content", ""))
228
+ elif chunk_type == "tool_calls":
229
+ tool_calls = chunk.get("tool_calls", [])
230
+ for i, tc in enumerate(tool_calls):
231
+ yield ToolCallStart(id=tc.id, name=tc.name, index=i)
232
+ yield ToolCallDelta(index=i, arguments_delta=tc.arguments)
233
+
234
+ yield StreamDone(stop_reason=stop_reason)
235
+
236
+ except Exception as e:
237
+ yield StreamError(error=format_error(e))
238
+
239
+ def should_retry_for_error(self, error: Exception) -> bool:
240
+ if isinstance(error, RateLimitError):
241
+ return True
242
+ if isinstance(error, APIConnectionError):
243
+ return True
244
+ if isinstance(error, APIStatusError):
245
+ return error.status_code >= 500
246
+ return False
@@ -0,0 +1,14 @@
1
+ """
2
+ Sanitize text content before sending to LLM APIs.
3
+
4
+ Lone Unicode surrogates (U+D800-U+DFFF) cause API errors with some providers.
5
+ This matches pi-mono's sanitizeSurrogates() behavior.
6
+ """
7
+
8
+ import re
9
+
10
+ _SURROGATE_RE = re.compile(r"[\ud800-\udfff]")
11
+
12
+
13
+ def sanitize_surrogates(text: str) -> str:
14
+ return _SURROGATE_RE.sub("\ufffd", text)
@@ -0,0 +1,13 @@
1
+ from .anthropic import AnthropicSDK
2
+ from .base import BaseLLMSDK, GenerationConfig, GenerationResponse, Message, ToolCall
3
+ from .openai import OpenAISDK
4
+
5
+ __all__ = [
6
+ "AnthropicSDK",
7
+ "BaseLLMSDK",
8
+ "GenerationConfig",
9
+ "GenerationResponse",
10
+ "Message",
11
+ "OpenAISDK",
12
+ "ToolCall",
13
+ ]
@@ -0,0 +1,382 @@
1
+ """Anthropic-native SDK. Direct HTTP via httpx."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import json
7
+ import logging
8
+ import os
9
+ from collections.abc import AsyncGenerator
10
+ from typing import Any
11
+
12
+ import httpx
13
+
14
+ from .base import BaseLLMSDK, GenerationConfig, GenerationResponse, Message, ToolCall
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+ ANTHROPIC_API_ROOT = "https://api.anthropic.com"
19
+ ANTHROPIC_VERSION = "2023-06-01"
20
+ _MAX_TOKENS_DEFAULT = 4096
21
+ _RETRY_BASE_DELAY = 1.0
22
+ _MAX_RETRIES = 3
23
+
24
+
25
+ def _is_transient(exc: Exception) -> bool:
26
+ if isinstance(exc, httpx.HTTPStatusError):
27
+ return exc.response.status_code >= 500 or exc.response.status_code == 429
28
+ if isinstance(
29
+ exc, (httpx.ConnectError, httpx.ReadTimeout, httpx.WriteTimeout, httpx.RemoteProtocolError)
30
+ ):
31
+ return True
32
+ msg = str(exc).lower()
33
+ return any(
34
+ s in msg
35
+ for s in (
36
+ "connection",
37
+ "timeout",
38
+ "timed out",
39
+ "reset",
40
+ "broken pipe",
41
+ "network",
42
+ "unavailable",
43
+ "bad gateway",
44
+ )
45
+ )
46
+
47
+
48
+ def _content_to_anthropic(content: str | list[dict[str, Any]]) -> str | list[dict[str, Any]]:
49
+ if isinstance(content, str):
50
+ return content
51
+ out: list[dict[str, Any]] = []
52
+ for part in content:
53
+ if not isinstance(part, dict):
54
+ continue
55
+ kind = part.get("type")
56
+ if kind == "text":
57
+ out.append({"type": "text", "text": part.get("text", "")})
58
+ elif kind == "image_url":
59
+ url = (part.get("image_url") or {}).get("url", "")
60
+ if url.startswith("data:"):
61
+ try:
62
+ header, b64 = url.split(",", 1)
63
+ except ValueError:
64
+ continue
65
+ media = header[len("data:") :].split(";", 1)[0]
66
+ out.append(
67
+ {
68
+ "type": "image",
69
+ "source": {"type": "base64", "media_type": media, "data": b64},
70
+ }
71
+ )
72
+ return out
73
+
74
+
75
+ def _messages_to_anthropic(messages: list[Message]) -> tuple[str | None, list[dict[str, Any]]]:
76
+ system_parts: list[str] = []
77
+ converted: list[dict[str, Any]] = []
78
+ for m in messages:
79
+ role = (m.role or "").lower()
80
+ if role == "system":
81
+ text = m.content if isinstance(m.content, str) else str(m.content or "")
82
+ if text:
83
+ system_parts.append(text)
84
+ continue
85
+ if role not in ("user", "assistant"):
86
+ if role == "tool" and converted and converted[-1]["role"] == "user":
87
+ converted[-1]["content"] = (
88
+ converted[-1]["content"]
89
+ if isinstance(converted[-1]["content"], list)
90
+ else [{"type": "text", "text": converted[-1]["content"]}]
91
+ )
92
+ converted[-1]["content"].append(
93
+ {
94
+ "type": "tool_result",
95
+ "tool_use_id": m.metadata.get("tool_call_id", "") if m.metadata else "",
96
+ "content": m.content
97
+ if isinstance(m.content, str)
98
+ else str(m.content or ""),
99
+ }
100
+ )
101
+ continue
102
+ converted.append(
103
+ {
104
+ "role": role,
105
+ "content": _content_to_anthropic(m.content if m.content is not None else ""),
106
+ }
107
+ )
108
+ merged: list[dict[str, Any]] = []
109
+ for msg in converted:
110
+ if merged and merged[-1]["role"] == msg["role"]:
111
+ prev = merged[-1]["content"]
112
+ cur = msg["content"]
113
+ if isinstance(prev, str):
114
+ prev = [{"type": "text", "text": prev}]
115
+ if isinstance(cur, str):
116
+ cur = [{"type": "text", "text": cur}]
117
+ merged[-1]["content"] = prev + cur
118
+ else:
119
+ merged.append(msg)
120
+ if merged and merged[0]["role"] != "user":
121
+ merged.insert(0, {"role": "user", "content": [{"type": "text", "text": "(continue)"}]})
122
+ system = "\n\n".join(system_parts) if system_parts else None
123
+ return system, merged
124
+
125
+
126
+ def _tools_to_anthropic(tools: list[dict[str, Any]] | None) -> list[dict[str, Any]] | None:
127
+ if not tools:
128
+ return None
129
+ out: list[dict[str, Any]] = []
130
+ for t in tools:
131
+ if t.get("type") == "function" and "function" in t:
132
+ fn = t["function"]
133
+ out.append(
134
+ {
135
+ "name": fn.get("name", ""),
136
+ "description": fn.get("description", ""),
137
+ "input_schema": fn.get("parameters", {"type": "object", "properties": {}}),
138
+ }
139
+ )
140
+ else:
141
+ out.append(t)
142
+ return out
143
+
144
+
145
+ def _parse_anthropic_response(data: dict[str, Any], model: str) -> GenerationResponse:
146
+ content = data.get("content", [])
147
+ text_parts: list[str] = []
148
+ tool_calls: list[ToolCall] = []
149
+ thinking_parts: list[str] = []
150
+ for block in content:
151
+ kind = block.get("type")
152
+ if kind == "text":
153
+ text_parts.append(block.get("text", ""))
154
+ elif kind == "tool_use":
155
+ tool_calls.append(
156
+ ToolCall(
157
+ id=block.get("id", ""),
158
+ name=block.get("name", ""),
159
+ arguments=json.dumps(block.get("input", {})),
160
+ )
161
+ )
162
+ elif kind == "thinking":
163
+ thinking_parts.append(block.get("thinking", ""))
164
+ usage = data.get("usage", {})
165
+ return GenerationResponse(
166
+ content="\n".join(text_parts),
167
+ model=model,
168
+ finish_reason=data.get("stop_reason"),
169
+ tool_calls=tool_calls or None,
170
+ usage={
171
+ "prompt_tokens": usage.get("input_tokens", 0),
172
+ "completion_tokens": usage.get("output_tokens", 0),
173
+ "total_tokens": usage.get("input_tokens", 0) + usage.get("output_tokens", 0),
174
+ }
175
+ if usage
176
+ else None,
177
+ reasoning_content="\n".join(thinking_parts),
178
+ )
179
+
180
+
181
+ class AnthropicSDK(BaseLLMSDK):
182
+ def __init__(self, api_key: str, base_url: str | None = None, **_: Any):
183
+ url: str = base_url or ANTHROPIC_API_ROOT
184
+ super().__init__(api_key, url)
185
+ self._client: httpx.AsyncClient | None = None
186
+
187
+ @property
188
+ def client(self) -> httpx.AsyncClient:
189
+ if self._client is None:
190
+ assert self.base_url is not None
191
+ self._client = httpx.AsyncClient(
192
+ base_url=self.base_url,
193
+ timeout=httpx.Timeout(60.0, read=300.0),
194
+ headers={
195
+ "x-api-key": self.api_key,
196
+ "anthropic-version": ANTHROPIC_VERSION,
197
+ "content-type": "application/json",
198
+ },
199
+ )
200
+ return self._client
201
+
202
+ def _resolve_model(self, config: GenerationConfig) -> str:
203
+ model = (config.model or "").strip() or os.getenv("VTX_MODEL", "").strip()
204
+ if model:
205
+ return model
206
+ return "claude-3-5-sonnet-latest"
207
+
208
+ def _build_payload(
209
+ self,
210
+ messages: list[Message],
211
+ config: GenerationConfig,
212
+ tools: list[dict[str, Any]] | None = None,
213
+ ) -> dict[str, Any]:
214
+ system, converted = _messages_to_anthropic(messages)
215
+ model = self._resolve_model(config)
216
+ payload: dict[str, Any] = {
217
+ "model": model,
218
+ "max_tokens": config.max_tokens or _MAX_TOKENS_DEFAULT,
219
+ "messages": converted,
220
+ }
221
+ if system:
222
+ payload["system"] = system
223
+ if config.temperature is not None:
224
+ payload["temperature"] = config.temperature
225
+ if config.top_p is not None:
226
+ payload["top_p"] = config.top_p
227
+ if config.stop_sequences:
228
+ payload["stop_sequences"] = config.stop_sequences
229
+ anthropic_tools = _tools_to_anthropic(tools)
230
+ if anthropic_tools:
231
+ payload["tools"] = anthropic_tools
232
+ tc = config.tool_choice
233
+ if isinstance(tc, str) and tc in ("auto", "any", "none"):
234
+ payload["tool_choice"] = {"type": tc}
235
+ elif isinstance(tc, dict):
236
+ payload["tool_choice"] = tc
237
+ return payload
238
+
239
+ async def generate(
240
+ self, messages: list[Message], config: GenerationConfig, stream: bool = False
241
+ ) -> GenerationResponse | AsyncGenerator:
242
+ if stream:
243
+ return self._generate_stream(messages, config)
244
+ return await self._generate_blocking(messages, config)
245
+
246
+ async def _generate_blocking(
247
+ self,
248
+ messages: list[Message],
249
+ config: GenerationConfig,
250
+ tools: list[dict[str, Any]] | None = None,
251
+ ) -> GenerationResponse:
252
+ payload = self._build_payload(messages, config, tools)
253
+ last_exc: Exception | None = None
254
+ for attempt in range(_MAX_RETRIES):
255
+ try:
256
+ resp = await self.client.post("/v1/messages", json=payload)
257
+ if resp.status_code >= 400:
258
+ if _is_transient(
259
+ httpx.HTTPStatusError("err", request=resp.request, response=resp)
260
+ ):
261
+ raise httpx.HTTPStatusError("err", request=resp.request, response=resp)
262
+ body = resp.text
263
+ raise RuntimeError(f"Anthropic API error {resp.status_code}: {body[:300]}")
264
+ data = resp.json()
265
+ return _parse_anthropic_response(data, payload["model"])
266
+ except Exception as exc:
267
+ last_exc = exc
268
+ if not _is_transient(exc) or attempt == _MAX_RETRIES - 1:
269
+ raise
270
+ delay = _RETRY_BASE_DELAY * (2**attempt)
271
+ logger.warning(
272
+ "Anthropic transient error (attempt %d/%d), retrying in %.1fs: %s",
273
+ attempt + 1,
274
+ _MAX_RETRIES,
275
+ delay,
276
+ str(exc)[:200],
277
+ )
278
+ await asyncio.sleep(delay)
279
+ if last_exc:
280
+ raise last_exc
281
+ raise RuntimeError("unreachable")
282
+
283
+ async def _generate_stream(
284
+ self, messages: list[Message], config: GenerationConfig
285
+ ) -> AsyncGenerator[dict[str, Any], None]:
286
+ payload = self._build_payload(messages, config)
287
+ payload["stream"] = True
288
+ async with self.client.stream("POST", "/v1/messages", json=payload) as resp:
289
+ if resp.status_code >= 400:
290
+ body = await resp.aread()
291
+ raise RuntimeError(
292
+ f"Anthropic API error {resp.status_code}: "
293
+ f"{body.decode('utf-8', errors='replace')[:300]}"
294
+ )
295
+ content_buf: list[str] = []
296
+ tool_calls: dict[int, dict[str, Any]] = {}
297
+ input_tokens = 0
298
+ output_tokens = 0
299
+ current_block: dict[str, Any] | None = None
300
+ async for line in resp.aiter_lines():
301
+ if not line:
302
+ continue
303
+ if line.startswith("data: "):
304
+ raw = line[len("data: ") :]
305
+ if raw.strip() == "[DONE]":
306
+ break
307
+ try:
308
+ ev = json.loads(raw)
309
+ except json.JSONDecodeError:
310
+ continue
311
+ et = ev.get("type", "")
312
+ if et == "message_start":
313
+ usage = (ev.get("message") or {}).get("usage") or {}
314
+ input_tokens = usage.get("input_tokens", 0)
315
+ elif et == "content_block_start":
316
+ current_block = ev.get("content_block", {})
317
+ elif et == "content_block_delta":
318
+ delta = ev.get("delta", {})
319
+ if delta.get("type") == "text_delta":
320
+ text = delta.get("text", "")
321
+ content_buf.append(text)
322
+ yield {"type": "content", "content": text}
323
+ elif delta.get("type") == "input_json_delta":
324
+ idx = ev.get("index", 0)
325
+ tc = tool_calls.setdefault(
326
+ idx,
327
+ {
328
+ "id": (current_block or {}).get("id", ""),
329
+ "name": (current_block or {}).get("name", ""),
330
+ "arguments": "",
331
+ },
332
+ )
333
+ tc["arguments"] += delta.get("partial_json", "")
334
+ elif et == "content_block_stop":
335
+ if current_block and current_block.get("type") == "tool_use":
336
+ idx = ev.get("index", 0)
337
+ tool_calls.setdefault(
338
+ idx,
339
+ {
340
+ "id": current_block.get("id", ""),
341
+ "name": current_block.get("name", ""),
342
+ "arguments": "",
343
+ },
344
+ )
345
+ elif et == "message_delta":
346
+ usage = ev.get("usage", {})
347
+ output_tokens = usage.get("output_tokens", 0)
348
+ elif et == "message_stop":
349
+ break
350
+ current_block = None
351
+ if tool_calls:
352
+ calls = [
353
+ ToolCall(id=tc["id"], name=tc["name"], arguments=tc["arguments"] or "{}")
354
+ for tc in tool_calls.values()
355
+ ]
356
+ yield {"type": "tool_calls", "tool_calls": calls}
357
+ if input_tokens or output_tokens:
358
+ yield {
359
+ "type": "usage",
360
+ "usage": {
361
+ "prompt_tokens": input_tokens,
362
+ "completion_tokens": output_tokens,
363
+ "total_tokens": input_tokens + output_tokens,
364
+ },
365
+ }
366
+
367
+ async def generate_with_tools(
368
+ self,
369
+ messages: list[Message],
370
+ tools: list[dict],
371
+ config: GenerationConfig,
372
+ stream: bool = False,
373
+ ) -> GenerationResponse | AsyncGenerator:
374
+ return await self._generate_blocking(messages, config, tools=tools)
375
+
376
+ def get_available_models(self) -> list[str]:
377
+ return ["claude-3-5-sonnet-latest", "claude-3-5-haiku-latest", "claude-3-opus-latest"]
378
+
379
+ async def aclose(self) -> None:
380
+ if self._client is not None:
381
+ await self._client.aclose()
382
+ self._client = None