tsugite-claude-code 0.17.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tsugite_claude_code/__init__.py +5 -0
- tsugite_claude_code/process.py +315 -0
- tsugite_claude_code/provider.py +303 -0
- tsugite_claude_code-0.17.0.dist-info/METADATA +6 -0
- tsugite_claude_code-0.17.0.dist-info/RECORD +7 -0
- tsugite_claude_code-0.17.0.dist-info/WHEEL +4 -0
- tsugite_claude_code-0.17.0.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,315 @@
|
|
|
1
|
+
"""Claude Code CLI subprocess provider.
|
|
2
|
+
|
|
3
|
+
Routes LLM calls through `claude --print` instead of a direct HTTP provider,
|
|
4
|
+
enabling Claude Max subscription auth. Text-only, no multimodal support.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import asyncio
|
|
8
|
+
import json
|
|
9
|
+
import logging
|
|
10
|
+
import os
|
|
11
|
+
import shutil
|
|
12
|
+
import tempfile
|
|
13
|
+
import uuid
|
|
14
|
+
from collections.abc import AsyncIterator
|
|
15
|
+
|
|
16
|
+
from tsugite.cli.helpers import get_workspace_dir
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
# Env vars that must be unset to avoid "nested session" detection
|
|
21
|
+
_CLAUDE_ENV_VARS = {"CLAUDECODE", "CLAUDE_CODE_ENTRYPOINT", "ANTHROPIC_API_KEY"}
|
|
22
|
+
|
|
23
|
+
# How long to wait for a graceful SIGTERM exit before escalating to SIGKILL.
|
|
24
|
+
_STOP_TIMEOUT_SECONDS = 5.0
|
|
25
|
+
|
|
26
|
+
# StreamReader buffer cap. asyncio defaults to 64KB, but a single stream-json
|
|
27
|
+
# line (a long answer plus the result event's usage/modelUsage payload) routinely
|
|
28
|
+
# exceeds that - and `readline()` then raises LimitOverrunError, crashing the turn
|
|
29
|
+
# on stdout or killing the stderr drain into a pipe deadlock. 16MB is ample.
|
|
30
|
+
_STREAM_READ_LIMIT = 16 * 1024 * 1024
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class ClaudeCodeProcess:
|
|
34
|
+
"""Manages a persistent claude CLI subprocess for LLM completions.
|
|
35
|
+
|
|
36
|
+
Uses stream-json I/O format to send user turns via stdin and parse
|
|
37
|
+
streaming responses from stdout. The subprocess holds conversation
|
|
38
|
+
state in memory between turns.
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
def __init__(self):
|
|
42
|
+
self._process: asyncio.subprocess.Process | None = None # pylint: disable=no-member
|
|
43
|
+
self._session_id: str | None = None
|
|
44
|
+
self._system_prompt_file: str | None = None
|
|
45
|
+
self._stderr_lines: list[str] = []
|
|
46
|
+
self._stderr_task: asyncio.Task | None = None
|
|
47
|
+
self._compacted: bool = False
|
|
48
|
+
self._last_usage: dict = {}
|
|
49
|
+
|
|
50
|
+
@property
|
|
51
|
+
def session_id(self) -> str | None:
|
|
52
|
+
return self._session_id
|
|
53
|
+
|
|
54
|
+
@property
|
|
55
|
+
def compacted(self) -> bool:
|
|
56
|
+
"""Whether Claude Code auto-compacted during this session."""
|
|
57
|
+
return self._compacted
|
|
58
|
+
|
|
59
|
+
async def _drain_stderr(self) -> None:
|
|
60
|
+
"""Background task: read stderr lines so the pipe never fills up.
|
|
61
|
+
|
|
62
|
+
Must keep draining even if one read fails - bailing on the first error
|
|
63
|
+
(e.g. an over-limit line) lets the OS stderr buffer fill, which blocks
|
|
64
|
+
the subprocess writing stderr and deadlocks the whole exchange.
|
|
65
|
+
"""
|
|
66
|
+
while True:
|
|
67
|
+
try:
|
|
68
|
+
line = await self._process.stderr.readline()
|
|
69
|
+
except asyncio.CancelledError:
|
|
70
|
+
return
|
|
71
|
+
except Exception:
|
|
72
|
+
# A read error (e.g. transient decode/limit issue) must not stop
|
|
73
|
+
# the drain. Keep the pipe moving until EOF.
|
|
74
|
+
continue
|
|
75
|
+
if not line:
|
|
76
|
+
return
|
|
77
|
+
self._stderr_lines.append(line.decode(errors="replace").rstrip())
|
|
78
|
+
|
|
79
|
+
def _get_stderr(self) -> str:
|
|
80
|
+
return "\n".join(self._stderr_lines[-20:]) # last 20 lines
|
|
81
|
+
|
|
82
|
+
async def start(
|
|
83
|
+
self,
|
|
84
|
+
model: str,
|
|
85
|
+
system_prompt: str,
|
|
86
|
+
resume_session: str | None = None,
|
|
87
|
+
effort: str | None = None,
|
|
88
|
+
) -> None:
|
|
89
|
+
"""Launch persistent claude subprocess.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
model: Model name (sonnet, opus, haiku, or full model ID)
|
|
93
|
+
system_prompt: System prompt text
|
|
94
|
+
resume_session: Optional session ID to resume instead of starting fresh
|
|
95
|
+
effort: Optional reasoning effort level passed via ``--effort``
|
|
96
|
+
(low, medium, high, xhigh, max).
|
|
97
|
+
|
|
98
|
+
Raises:
|
|
99
|
+
RuntimeError: If claude CLI is not found or fails to start
|
|
100
|
+
"""
|
|
101
|
+
if not shutil.which("claude"):
|
|
102
|
+
raise RuntimeError("Claude Code CLI not found. Install it with: npm install -g @anthropic-ai/claude-code")
|
|
103
|
+
|
|
104
|
+
# Write system prompt to temp file
|
|
105
|
+
fd, self._system_prompt_file = tempfile.mkstemp(suffix=".txt", prefix="tsugite_sysprompt_")
|
|
106
|
+
with os.fdopen(fd, "w") as f:
|
|
107
|
+
f.write(system_prompt)
|
|
108
|
+
|
|
109
|
+
cmd = [
|
|
110
|
+
"claude",
|
|
111
|
+
"--print",
|
|
112
|
+
"--input-format",
|
|
113
|
+
"stream-json",
|
|
114
|
+
"--output-format",
|
|
115
|
+
"stream-json",
|
|
116
|
+
"--verbose",
|
|
117
|
+
"--max-turns",
|
|
118
|
+
"1",
|
|
119
|
+
"--model",
|
|
120
|
+
model,
|
|
121
|
+
"--tools",
|
|
122
|
+
"",
|
|
123
|
+
"--strict-mcp-config",
|
|
124
|
+
"--system-prompt-file",
|
|
125
|
+
self._system_prompt_file,
|
|
126
|
+
]
|
|
127
|
+
|
|
128
|
+
if resume_session:
|
|
129
|
+
cmd.extend(["--resume", resume_session])
|
|
130
|
+
else:
|
|
131
|
+
cmd.extend(["--session-id", str(uuid.uuid4())])
|
|
132
|
+
|
|
133
|
+
if effort:
|
|
134
|
+
cmd.extend(["--effort", effort])
|
|
135
|
+
|
|
136
|
+
# Copy env but unset keys that trigger nested-session guard or API key usage
|
|
137
|
+
env = {k: v for k, v in os.environ.items() if k not in _CLAUDE_ENV_VARS}
|
|
138
|
+
|
|
139
|
+
workspace = get_workspace_dir()
|
|
140
|
+
self._process = await asyncio.create_subprocess_exec(
|
|
141
|
+
*cmd,
|
|
142
|
+
stdin=asyncio.subprocess.PIPE,
|
|
143
|
+
stdout=asyncio.subprocess.PIPE,
|
|
144
|
+
stderr=asyncio.subprocess.PIPE,
|
|
145
|
+
env=env,
|
|
146
|
+
cwd=str(workspace) if workspace is not None else None,
|
|
147
|
+
limit=_STREAM_READ_LIMIT,
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
# Start draining stderr in background to prevent pipe buffer deadlock
|
|
151
|
+
self._stderr_task = asyncio.create_task(self._drain_stderr())
|
|
152
|
+
mode = f"resume={resume_session}" if resume_session else "new session"
|
|
153
|
+
logger.info(
|
|
154
|
+
"Claude Code subprocess started (pid=%d, model=%s, effort=%s, %s)",
|
|
155
|
+
self._process.pid,
|
|
156
|
+
model,
|
|
157
|
+
effort or "default",
|
|
158
|
+
mode,
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
async def send_message(self, content: str) -> AsyncIterator[dict]:
|
|
162
|
+
"""Write user message to stdin and yield streaming events from stdout.
|
|
163
|
+
|
|
164
|
+
Args:
|
|
165
|
+
content: User message text
|
|
166
|
+
|
|
167
|
+
Yields:
|
|
168
|
+
Dicts with type "text_delta" (streaming chunk) or "result" (final)
|
|
169
|
+
|
|
170
|
+
Raises:
|
|
171
|
+
RuntimeError: If subprocess has crashed
|
|
172
|
+
"""
|
|
173
|
+
msg = {
|
|
174
|
+
"type": "user",
|
|
175
|
+
"message": {"role": "user", "content": content},
|
|
176
|
+
"session_id": self._session_id or "default",
|
|
177
|
+
}
|
|
178
|
+
content_len = len(content)
|
|
179
|
+
self._process.stdin.write((json.dumps(msg) + "\n").encode())
|
|
180
|
+
await self._process.stdin.drain()
|
|
181
|
+
logger.debug("Sent message (%d chars, ~%d est tokens): %.200s", content_len, content_len // 4, content)
|
|
182
|
+
|
|
183
|
+
# Text already emitted as content_block_delta chunks. The CLI repeats the
|
|
184
|
+
# full message text in the trailing `assistant` event, so without tracking
|
|
185
|
+
# what we already streamed the consumer would accumulate it twice (the
|
|
186
|
+
# duplicate-JSON / doubled-prose bug that marked passing Jobs stuck).
|
|
187
|
+
streamed_text = ""
|
|
188
|
+
|
|
189
|
+
while True:
|
|
190
|
+
line = await self._process.stdout.readline()
|
|
191
|
+
if not line:
|
|
192
|
+
stderr = self._get_stderr()
|
|
193
|
+
raise RuntimeError(
|
|
194
|
+
f"Claude Code process ended unexpectedly (sent ~{content_len} chars). stderr: {stderr}"
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
raw = line.decode().strip()
|
|
198
|
+
if not raw:
|
|
199
|
+
continue
|
|
200
|
+
|
|
201
|
+
try:
|
|
202
|
+
event = json.loads(raw)
|
|
203
|
+
except json.JSONDecodeError:
|
|
204
|
+
continue
|
|
205
|
+
|
|
206
|
+
event_type = event.get("type", "")
|
|
207
|
+
|
|
208
|
+
# Init event — capture session_id (arrives after first user message)
|
|
209
|
+
if event_type == "system" and event.get("subtype") == "init":
|
|
210
|
+
self._session_id = event.get("session_id")
|
|
211
|
+
logger.debug("Init event received (session=%s)", self._session_id)
|
|
212
|
+
continue
|
|
213
|
+
|
|
214
|
+
# Compact boundary — Claude Code auto-compacted the conversation
|
|
215
|
+
if event_type == "system" and event.get("subtype") == "compact_boundary":
|
|
216
|
+
self._compacted = True
|
|
217
|
+
logger.info(
|
|
218
|
+
"Claude Code auto-compacted (preTokens=%s)",
|
|
219
|
+
event.get("compactMetadata", {}).get("preTokens"),
|
|
220
|
+
)
|
|
221
|
+
continue
|
|
222
|
+
|
|
223
|
+
# Content block delta (streaming with --include-partial-messages)
|
|
224
|
+
if event_type == "content_block_delta":
|
|
225
|
+
delta = event.get("delta", {})
|
|
226
|
+
if delta.get("type") == "text_delta":
|
|
227
|
+
streamed_text += delta["text"]
|
|
228
|
+
yield {"type": "text_delta", "text": delta["text"]}
|
|
229
|
+
|
|
230
|
+
# Full assistant message — extract text and usage from content blocks
|
|
231
|
+
elif event_type == "assistant":
|
|
232
|
+
message = event.get("message", {})
|
|
233
|
+
usage = message.get("usage", {})
|
|
234
|
+
if usage:
|
|
235
|
+
self._last_usage = usage
|
|
236
|
+
content_blocks = message.get("content", [])
|
|
237
|
+
# Claude CLI redacts thinking text, leaving an empty payload, so drop it
|
|
238
|
+
# entirely rather than surfacing a content-free placeholder block.
|
|
239
|
+
text = "".join(block.get("text", "") for block in content_blocks if block.get("type") == "text")
|
|
240
|
+
# This event repeats the full message text. Emit only the part not
|
|
241
|
+
# already streamed as content_block_delta chunks; otherwise the
|
|
242
|
+
# consumer accumulates it twice. removeprefix is a no-op when no
|
|
243
|
+
# deltas streamed (prefix "") or the text isn't a clean prefix.
|
|
244
|
+
remainder = text.removeprefix(streamed_text)
|
|
245
|
+
if remainder:
|
|
246
|
+
yield {"type": "text_delta", "text": remainder}
|
|
247
|
+
streamed_text = ""
|
|
248
|
+
|
|
249
|
+
# Final result
|
|
250
|
+
elif event_type == "result":
|
|
251
|
+
result_text = event.get("result", "")
|
|
252
|
+
cost = event.get("total_cost_usd")
|
|
253
|
+
duration = event.get("duration_ms")
|
|
254
|
+
logger.debug("Result received (cost=$%s, %sms): %.200s", cost, duration, result_text)
|
|
255
|
+
|
|
256
|
+
# Prefer usage from result event, fall back to last assistant event
|
|
257
|
+
result_usage = event.get("usage")
|
|
258
|
+
if not result_usage:
|
|
259
|
+
logger.debug(
|
|
260
|
+
"Result event missing usage; falling back to assistant event usage (cache tokens may be lost)"
|
|
261
|
+
)
|
|
262
|
+
result_usage = self._last_usage
|
|
263
|
+
|
|
264
|
+
# modelUsage is keyed by model name, e.g. {"claude-sonnet-4-6": {"contextWindow": 200000}}
|
|
265
|
+
context_window = None
|
|
266
|
+
for model_data in (event.get("modelUsage") or {}).values():
|
|
267
|
+
context_window = model_data.get("contextWindow")
|
|
268
|
+
if context_window:
|
|
269
|
+
break
|
|
270
|
+
|
|
271
|
+
yield {
|
|
272
|
+
"type": "result",
|
|
273
|
+
"text": result_text,
|
|
274
|
+
"cost_usd": cost,
|
|
275
|
+
"duration_ms": duration,
|
|
276
|
+
"session_id": event.get("session_id", self._session_id),
|
|
277
|
+
"input_tokens": result_usage.get("input_tokens") or 0,
|
|
278
|
+
"cache_creation_input_tokens": result_usage.get("cache_creation_input_tokens") or 0,
|
|
279
|
+
"cache_read_input_tokens": result_usage.get("cache_read_input_tokens") or 0,
|
|
280
|
+
"output_tokens": result_usage.get("output_tokens") or 0,
|
|
281
|
+
"context_window": context_window,
|
|
282
|
+
"is_error": bool(event.get("is_error")),
|
|
283
|
+
"subtype": event.get("subtype"),
|
|
284
|
+
}
|
|
285
|
+
return
|
|
286
|
+
|
|
287
|
+
# Skip: rate_limit_event, system, etc.
|
|
288
|
+
|
|
289
|
+
async def stop(self) -> None:
|
|
290
|
+
"""Terminate subprocess and clean up temp files."""
|
|
291
|
+
if self._stderr_task:
|
|
292
|
+
self._stderr_task.cancel()
|
|
293
|
+
self._stderr_task = None
|
|
294
|
+
|
|
295
|
+
if self._process:
|
|
296
|
+
try:
|
|
297
|
+
self._process.terminate()
|
|
298
|
+
try:
|
|
299
|
+
# Bounded wait: a wedged or SIGTERM-ignoring claude (it's a
|
|
300
|
+
# Node process) would otherwise hang this finally forever and
|
|
301
|
+
# leak a long-lived orphan. Escalate to SIGKILL on timeout.
|
|
302
|
+
await asyncio.wait_for(self._process.wait(), timeout=_STOP_TIMEOUT_SECONDS)
|
|
303
|
+
except asyncio.TimeoutError:
|
|
304
|
+
logger.warning(
|
|
305
|
+
"Claude Code subprocess (pid=%s) ignored SIGTERM; sending SIGKILL", self._process.pid
|
|
306
|
+
)
|
|
307
|
+
self._process.kill()
|
|
308
|
+
await self._process.wait()
|
|
309
|
+
except ProcessLookupError:
|
|
310
|
+
pass
|
|
311
|
+
self._process = None
|
|
312
|
+
|
|
313
|
+
if self._system_prompt_file and os.path.exists(self._system_prompt_file):
|
|
314
|
+
os.unlink(self._system_prompt_file)
|
|
315
|
+
self._system_prompt_file = None
|
|
@@ -0,0 +1,303 @@
|
|
|
1
|
+
"""Claude Code provider — routes LLM calls through `claude --print` subprocess."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
from typing import Any, AsyncIterator
|
|
7
|
+
|
|
8
|
+
from tsugite.exceptions import AgentExecutionError
|
|
9
|
+
from tsugite.providers.base import CompletionResponse, ModelInfo, StreamChunk, Usage, default_count_tokens
|
|
10
|
+
from tsugite.providers.model_registry import get_model_info as _get_model_info
|
|
11
|
+
from tsugite.providers.model_registry import register_aliases, register_models
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
_CLAUDE_CODE_EFFORT_LEVELS = ["low", "medium", "high", "xhigh", "max"]
|
|
16
|
+
|
|
17
|
+
_CLAUDE_CODE_MODELS: dict[str, ModelInfo] = {
|
|
18
|
+
"claude_code/claude-opus-4-8": ModelInfo(
|
|
19
|
+
max_input_tokens=1_000_000, supports_vision=True, supported_effort_levels=_CLAUDE_CODE_EFFORT_LEVELS
|
|
20
|
+
),
|
|
21
|
+
"claude_code/claude-opus-4-7": ModelInfo(
|
|
22
|
+
max_input_tokens=1_000_000, supports_vision=True, supported_effort_levels=_CLAUDE_CODE_EFFORT_LEVELS
|
|
23
|
+
),
|
|
24
|
+
"claude_code/claude-opus-4-6": ModelInfo(
|
|
25
|
+
max_input_tokens=1_000_000, supports_vision=True, supported_effort_levels=_CLAUDE_CODE_EFFORT_LEVELS
|
|
26
|
+
),
|
|
27
|
+
"claude_code/claude-sonnet-4-6": ModelInfo(
|
|
28
|
+
max_input_tokens=1_000_000, supports_vision=True, supported_effort_levels=_CLAUDE_CODE_EFFORT_LEVELS
|
|
29
|
+
),
|
|
30
|
+
"claude_code/claude-haiku-4-5-20251001": ModelInfo(
|
|
31
|
+
max_input_tokens=200_000, supports_vision=True, supported_effort_levels=_CLAUDE_CODE_EFFORT_LEVELS
|
|
32
|
+
),
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
_ALIASES = {
|
|
36
|
+
"opus": "claude-opus-4-8",
|
|
37
|
+
"opus-4-8": "claude-opus-4-8",
|
|
38
|
+
"opus-4-7": "claude-opus-4-7",
|
|
39
|
+
"opus-4-6": "claude-opus-4-6",
|
|
40
|
+
"sonnet": "claude-sonnet-4-6",
|
|
41
|
+
"haiku": "claude-haiku-4-5-20251001",
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _raise_if_error(result_event: dict) -> None:
|
|
46
|
+
"""Translate a Claude CLI error result into AgentExecutionError.
|
|
47
|
+
|
|
48
|
+
The CLI reports failures (context overflow, max-turns, etc.) as a result
|
|
49
|
+
event with is_error=true and a non-success subtype, NOT as a non-zero exit
|
|
50
|
+
or stderr. Without this conversion the failure text reaches the user as
|
|
51
|
+
the assistant's reply and bypasses the daemon's prompt-too-long retry path.
|
|
52
|
+
"""
|
|
53
|
+
if not result_event.get("is_error"):
|
|
54
|
+
return
|
|
55
|
+
text = result_event.get("text") or "Claude Code returned an error result"
|
|
56
|
+
subtype = result_event.get("subtype") or "error"
|
|
57
|
+
raise AgentExecutionError(f"{text} (subtype={subtype})")
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class ClaudeCodeProvider:
|
|
61
|
+
"""Provider that manages a persistent `claude` CLI subprocess.
|
|
62
|
+
|
|
63
|
+
Stateful: the subprocess persists across acompletion() calls within a session.
|
|
64
|
+
First call starts the process; subsequent calls send observations to it.
|
|
65
|
+
Call stop() to clean up the subprocess when done.
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
cacheable = False
|
|
69
|
+
|
|
70
|
+
def __init__(self, name: str = "claude_code"):
|
|
71
|
+
self.name = name
|
|
72
|
+
self._process = None
|
|
73
|
+
self._turn_count = 0
|
|
74
|
+
self._resolved_model: str | None = None
|
|
75
|
+
|
|
76
|
+
# Context set via set_context()
|
|
77
|
+
self._attachments = []
|
|
78
|
+
self._skills = []
|
|
79
|
+
self._resume_session = None
|
|
80
|
+
self._resume_after_compaction = False
|
|
81
|
+
self._previous_messages = []
|
|
82
|
+
|
|
83
|
+
# Session state
|
|
84
|
+
self._session_id: str | None = None
|
|
85
|
+
self._compacted: bool = False
|
|
86
|
+
self._context_window: int | None = None
|
|
87
|
+
self._cache_creation_tokens: int = 0
|
|
88
|
+
self._cache_read_tokens: int = 0
|
|
89
|
+
self._cumulative_cost: float = 0.0
|
|
90
|
+
|
|
91
|
+
register_models(_CLAUDE_CODE_MODELS)
|
|
92
|
+
register_aliases(self.name, _ALIASES)
|
|
93
|
+
|
|
94
|
+
def set_context(self, **kwargs: Any) -> None:
|
|
95
|
+
self._attachments = kwargs.get("attachments", [])
|
|
96
|
+
self._skills = kwargs.get("skills", [])
|
|
97
|
+
self._resume_session = kwargs.get("resume_session")
|
|
98
|
+
self._resume_after_compaction = kwargs.get("resume_after_compaction", False)
|
|
99
|
+
self._previous_messages = kwargs.get("previous_messages", [])
|
|
100
|
+
|
|
101
|
+
def get_state(self) -> dict | None:
|
|
102
|
+
return {
|
|
103
|
+
"session_id": self._session_id,
|
|
104
|
+
"compacted": self._compacted,
|
|
105
|
+
"context_window": self._context_window,
|
|
106
|
+
"cache_creation_tokens": self._cache_creation_tokens,
|
|
107
|
+
"cache_read_tokens": self._cache_read_tokens,
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
async def acompletion(
|
|
111
|
+
self,
|
|
112
|
+
messages: list[dict],
|
|
113
|
+
model: str,
|
|
114
|
+
stream: bool = False,
|
|
115
|
+
**kwargs: Any,
|
|
116
|
+
) -> CompletionResponse | AsyncIterator[StreamChunk]:
|
|
117
|
+
from tsugite_claude_code.process import ClaudeCodeProcess
|
|
118
|
+
|
|
119
|
+
resolved_model = _ALIASES.get(model, model)
|
|
120
|
+
if resolved_model != model and self._resolved_model != resolved_model:
|
|
121
|
+
logger.info("claude_code model alias %r -> %s", model, resolved_model)
|
|
122
|
+
self._resolved_model = resolved_model
|
|
123
|
+
|
|
124
|
+
if self._process is None:
|
|
125
|
+
self._process = ClaudeCodeProcess()
|
|
126
|
+
system_prompt = ""
|
|
127
|
+
if messages and messages[0].get("role") == "system":
|
|
128
|
+
system_prompt = messages[0]["content"]
|
|
129
|
+
messages = messages[1:]
|
|
130
|
+
|
|
131
|
+
await self._process.start(
|
|
132
|
+
model=resolved_model,
|
|
133
|
+
system_prompt=system_prompt,
|
|
134
|
+
resume_session=self._resume_session,
|
|
135
|
+
effort=kwargs.get("reasoning_effort"),
|
|
136
|
+
)
|
|
137
|
+
user_content = self._build_first_message(messages)
|
|
138
|
+
else:
|
|
139
|
+
# Subsequent turns: subprocess has context, send the last observation
|
|
140
|
+
user_content = messages[-1]["content"] if messages else ""
|
|
141
|
+
|
|
142
|
+
self._turn_count += 1
|
|
143
|
+
|
|
144
|
+
if stream:
|
|
145
|
+
return self._stream(user_content)
|
|
146
|
+
|
|
147
|
+
return await self._collect(user_content)
|
|
148
|
+
|
|
149
|
+
async def _collect(self, user_content: str) -> CompletionResponse:
|
|
150
|
+
"""Send message and collect full response."""
|
|
151
|
+
accumulated = ""
|
|
152
|
+
usage = Usage()
|
|
153
|
+
cost = 0.0
|
|
154
|
+
|
|
155
|
+
async for event in self._process.send_message(user_content):
|
|
156
|
+
if event["type"] == "text_delta":
|
|
157
|
+
accumulated += event["text"]
|
|
158
|
+
elif event["type"] == "result":
|
|
159
|
+
_raise_if_error(event)
|
|
160
|
+
if not accumulated:
|
|
161
|
+
accumulated = event.get("text", "")
|
|
162
|
+
cost = self._cost_delta(event.get("cost_usd") or 0.0)
|
|
163
|
+
usage = self._extract_usage(event)
|
|
164
|
+
|
|
165
|
+
return CompletionResponse(
|
|
166
|
+
content=accumulated,
|
|
167
|
+
usage=usage,
|
|
168
|
+
cost=cost,
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
async def _stream(self, user_content: str) -> AsyncIterator[StreamChunk]:
|
|
172
|
+
"""Send message and yield streaming chunks."""
|
|
173
|
+
usage = Usage()
|
|
174
|
+
cost = 0.0
|
|
175
|
+
|
|
176
|
+
async for event in self._process.send_message(user_content):
|
|
177
|
+
if event["type"] == "text_delta":
|
|
178
|
+
yield StreamChunk(content=event["text"])
|
|
179
|
+
elif event["type"] == "result":
|
|
180
|
+
_raise_if_error(event)
|
|
181
|
+
cost = self._cost_delta(event.get("cost_usd") or 0.0)
|
|
182
|
+
usage = self._extract_usage(event)
|
|
183
|
+
|
|
184
|
+
yield StreamChunk(content="", done=True, usage=usage, cost=cost)
|
|
185
|
+
|
|
186
|
+
def _cost_delta(self, cumulative_cost: float) -> float:
|
|
187
|
+
"""Convert Claude CLI's cumulative cost to a per-turn delta."""
|
|
188
|
+
delta = cumulative_cost - self._cumulative_cost
|
|
189
|
+
self._cumulative_cost = cumulative_cost
|
|
190
|
+
return max(delta, 0.0)
|
|
191
|
+
|
|
192
|
+
def _extract_usage(self, event: dict) -> Usage:
|
|
193
|
+
"""Extract usage from a subprocess result event and update session state."""
|
|
194
|
+
input_tokens = event.get("input_tokens") or 0
|
|
195
|
+
cache_creation = event.get("cache_creation_input_tokens") or 0
|
|
196
|
+
cache_read = event.get("cache_read_input_tokens") or 0
|
|
197
|
+
output_tokens = event.get("output_tokens") or 0
|
|
198
|
+
|
|
199
|
+
self._cache_creation_tokens += cache_creation
|
|
200
|
+
self._cache_read_tokens += cache_read
|
|
201
|
+
self._session_id = event.get("session_id", self._session_id)
|
|
202
|
+
if event.get("context_window"):
|
|
203
|
+
self._context_window = event["context_window"]
|
|
204
|
+
|
|
205
|
+
return Usage(
|
|
206
|
+
prompt_tokens=input_tokens,
|
|
207
|
+
completion_tokens=output_tokens,
|
|
208
|
+
total_tokens=input_tokens + cache_creation + cache_read + output_tokens,
|
|
209
|
+
cache_creation_input_tokens=cache_creation,
|
|
210
|
+
cache_read_input_tokens=cache_read,
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
def _build_first_message(self, messages: list[dict]) -> str:
|
|
214
|
+
"""Build the first user message, inlining attachments, skills, and history."""
|
|
215
|
+
parts = []
|
|
216
|
+
|
|
217
|
+
include_context = not self._resume_session or self._resume_after_compaction
|
|
218
|
+
if include_context and (self._attachments or self._skills):
|
|
219
|
+
from tsugite.attachments.base import AttachmentContentType, format_attachment_open_tag
|
|
220
|
+
|
|
221
|
+
context_parts = []
|
|
222
|
+
for att in self._attachments:
|
|
223
|
+
if att.content_type == AttachmentContentType.TEXT:
|
|
224
|
+
context_parts.append(format_attachment_open_tag(att))
|
|
225
|
+
context_parts.append(att.content)
|
|
226
|
+
context_parts.append("</attachment>")
|
|
227
|
+
for skill in self._skills:
|
|
228
|
+
content = skill.content
|
|
229
|
+
if len(content) > 4000:
|
|
230
|
+
content = content[:4000] + "\n... (truncated)"
|
|
231
|
+
context_parts.append(f'<skill_content name="{skill.name}">')
|
|
232
|
+
context_parts.append(content)
|
|
233
|
+
context_parts.append("</skill_content>")
|
|
234
|
+
if context_parts:
|
|
235
|
+
parts.append("<context>\n" + "\n".join(context_parts) + "\n</context>\n")
|
|
236
|
+
|
|
237
|
+
if self._previous_messages and not self._resume_session:
|
|
238
|
+
budget = self._get_history_budget()
|
|
239
|
+
trimmed = self._trim_to_budget(self._previous_messages, budget)
|
|
240
|
+
dropped = len(self._previous_messages) - len(trimmed)
|
|
241
|
+
history_lines = [f"{msg.get('role', 'unknown').capitalize()}: {msg.get('content', '')}" for msg in trimmed]
|
|
242
|
+
header = "<conversation_history"
|
|
243
|
+
if dropped > 0:
|
|
244
|
+
header += f' note="{dropped} older messages omitted for context"'
|
|
245
|
+
header += ">"
|
|
246
|
+
parts.append(header + "\n" + "\n\n".join(history_lines) + "\n</conversation_history>\n")
|
|
247
|
+
|
|
248
|
+
# Add the task (last user message — earlier user messages are context/history)
|
|
249
|
+
for msg in reversed(messages):
|
|
250
|
+
if msg.get("role") == "user":
|
|
251
|
+
content = msg["content"]
|
|
252
|
+
if isinstance(content, list):
|
|
253
|
+
content = "\n".join(
|
|
254
|
+
b if isinstance(b, str) else b.get("text", "")
|
|
255
|
+
for b in content
|
|
256
|
+
if isinstance(b, str) or b.get("type") == "text"
|
|
257
|
+
)
|
|
258
|
+
parts.append(content)
|
|
259
|
+
break
|
|
260
|
+
|
|
261
|
+
return "\n".join(parts)
|
|
262
|
+
|
|
263
|
+
def _get_history_budget(self) -> int:
|
|
264
|
+
info = self.get_model_info(self._resolved_model) if self._resolved_model else None
|
|
265
|
+
context_limit = info.max_input_tokens if info else 200_000
|
|
266
|
+
return context_limit // 2
|
|
267
|
+
|
|
268
|
+
@staticmethod
|
|
269
|
+
def _trim_to_budget(messages: list[dict], budget_tokens: int) -> list[dict]:
|
|
270
|
+
"""Keep the most recent messages that fit within a token budget."""
|
|
271
|
+
kept = []
|
|
272
|
+
used = 0
|
|
273
|
+
for msg in reversed(messages):
|
|
274
|
+
content = msg.get("content", "")
|
|
275
|
+
est = len(content) // 4 if isinstance(content, str) else 100
|
|
276
|
+
if used + est > budget_tokens and kept:
|
|
277
|
+
break
|
|
278
|
+
kept.append(msg)
|
|
279
|
+
used += est
|
|
280
|
+
kept.reverse()
|
|
281
|
+
return kept
|
|
282
|
+
|
|
283
|
+
async def stop(self) -> None:
|
|
284
|
+
if self._process:
|
|
285
|
+
self._session_id = self._process.session_id
|
|
286
|
+
self._compacted = self._process.compacted
|
|
287
|
+
await self._process.stop()
|
|
288
|
+
self._process = None
|
|
289
|
+
self._turn_count = 0
|
|
290
|
+
|
|
291
|
+
def count_tokens(self, text: str, model: str) -> int:
|
|
292
|
+
return default_count_tokens(text, model)
|
|
293
|
+
|
|
294
|
+
def get_model_info(self, model: str) -> ModelInfo | None:
|
|
295
|
+
resolved = _ALIASES.get(model, model)
|
|
296
|
+
return _get_model_info(self.name, resolved)
|
|
297
|
+
|
|
298
|
+
async def list_models(self) -> list[str]:
|
|
299
|
+
return list(_ALIASES.keys())
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
def create_provider(name: str = "claude_code", **kwargs: Any) -> ClaudeCodeProvider:
|
|
303
|
+
return ClaudeCodeProvider(name=name)
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
tsugite_claude_code/__init__.py,sha256=FkOXT_MW0ZqTfcutX9dMOSIGCXXScMkV4arCZPAyvvA,217
|
|
2
|
+
tsugite_claude_code/process.py,sha256=QMCxynuxo8e-nHMyklo85gGjCD5WcD00ymfpnR8oHg4,12761
|
|
3
|
+
tsugite_claude_code/provider.py,sha256=bRfQa1ns5jmYu9spqvYfrpA5K07gVgUWrq917T0hWp8,12290
|
|
4
|
+
tsugite_claude_code-0.17.0.dist-info/METADATA,sha256=rpS35G4dSPSBtv7010YVdnfoGBnRs9UKG9ZqRpqLtWQ,211
|
|
5
|
+
tsugite_claude_code-0.17.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
6
|
+
tsugite_claude_code-0.17.0.dist-info/entry_points.txt,sha256=qm-DoAMQ5y8SP2aLVI2JSWdjuOg5Q-tqjlqIuaqAXj4,70
|
|
7
|
+
tsugite_claude_code-0.17.0.dist-info/RECORD,,
|