unified-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- unified_cli/__init__.py +76 -0
- unified_cli/base.py +510 -0
- unified_cli/cli.py +528 -0
- unified_cli/conversation.py +235 -0
- unified_cli/core.py +171 -0
- unified_cli/dashboard_tpl.py +177 -0
- unified_cli/discovery.py +87 -0
- unified_cli/errors.py +209 -0
- unified_cli/factory.py +69 -0
- unified_cli/models.py +220 -0
- unified_cli/onboarding.py +269 -0
- unified_cli/providers/__init__.py +7 -0
- unified_cli/providers/claude.py +356 -0
- unified_cli/providers/codex.py +273 -0
- unified_cli/providers/gemini.py +374 -0
- unified_cli/py.typed +0 -0
- unified_cli/repl.py +379 -0
- unified_cli/server.py +306 -0
- unified_cli/state.py +122 -0
- unified_cli/ui.py +180 -0
- unified_cli/usage.py +126 -0
- unified_cli-0.1.0.dist-info/METADATA +447 -0
- unified_cli-0.1.0.dist-info/RECORD +27 -0
- unified_cli-0.1.0.dist-info/WHEEL +5 -0
- unified_cli-0.1.0.dist-info/entry_points.txt +2 -0
- unified_cli-0.1.0.dist-info/licenses/LICENSE +21 -0
- unified_cli-0.1.0.dist-info/top_level.txt +1 -0
unified_cli/__init__.py
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
"""unified_cli — one Python + CLI API for Claude Code / Codex / Gemini.
|
|
2
|
+
|
|
3
|
+
Quick start (Python):
|
|
4
|
+
|
|
5
|
+
from unified_cli import create, UnifiedConversation
|
|
6
|
+
|
|
7
|
+
# Single call
|
|
8
|
+
resp = create("claude").chat("안녕")
|
|
9
|
+
print(resp.text, resp.session_id)
|
|
10
|
+
|
|
11
|
+
# Manual history (external code manages session_id)
|
|
12
|
+
cli = create("codex")
|
|
13
|
+
r1 = cli.chat("내 이름은 민우")
|
|
14
|
+
r2 = cli.chat("내 이름?", session_id=r1.session_id)
|
|
15
|
+
|
|
16
|
+
# Wrapper-managed history (+ cross-provider context injection)
|
|
17
|
+
conv = UnifiedConversation()
|
|
18
|
+
conv.send("내 이름 민우", provider="claude")
|
|
19
|
+
conv.send("내 이름?", provider="gemini") # auto-preserves context
|
|
20
|
+
|
|
21
|
+
Quick start (CLI):
|
|
22
|
+
|
|
23
|
+
unified-cli setup # first-time onboarding
|
|
24
|
+
unified-cli chat "..." # single call
|
|
25
|
+
unified-cli chat "..." -c # continue last saved session
|
|
26
|
+
unified-cli repl # interactive REPL
|
|
27
|
+
|
|
28
|
+
Error handling:
|
|
29
|
+
|
|
30
|
+
from unified_cli import UnifiedError
|
|
31
|
+
try:
|
|
32
|
+
create("claude").chat("")
|
|
33
|
+
except UnifiedError as e:
|
|
34
|
+
e.kind # auth_expired | rate_limit | model_not_allowed | not_found
|
|
35
|
+
# | network | config | internal
|
|
36
|
+
e.hint # 사용자용 복구 힌트
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
__version__ = "0.1.0"
|
|
40
|
+
|
|
41
|
+
from .base import BaseProvider
|
|
42
|
+
from .conversation import UnifiedConversation
|
|
43
|
+
from .core import Message, ModelInfo, ProviderName, Response, Usage
|
|
44
|
+
from .errors import ErrorKind, UnifiedError, classify
|
|
45
|
+
from .factory import PROVIDERS, create, route
|
|
46
|
+
from .models import DEFAULT_MODELS, list_models
|
|
47
|
+
from .providers import ClaudeProvider, CodexProvider, GeminiProvider
|
|
48
|
+
from .state import SessionState, load_last_session, save_last_session
|
|
49
|
+
from .usage import UsageTracker, tracker
|
|
50
|
+
|
|
51
|
+
__all__ = [
|
|
52
|
+
"__version__",
|
|
53
|
+
"BaseProvider",
|
|
54
|
+
"ClaudeProvider",
|
|
55
|
+
"CodexProvider",
|
|
56
|
+
"GeminiProvider",
|
|
57
|
+
"UnifiedConversation",
|
|
58
|
+
"UnifiedError",
|
|
59
|
+
"UsageTracker",
|
|
60
|
+
"SessionState",
|
|
61
|
+
"ErrorKind",
|
|
62
|
+
"Message",
|
|
63
|
+
"ModelInfo",
|
|
64
|
+
"ProviderName",
|
|
65
|
+
"Response",
|
|
66
|
+
"Usage",
|
|
67
|
+
"DEFAULT_MODELS",
|
|
68
|
+
"PROVIDERS",
|
|
69
|
+
"classify",
|
|
70
|
+
"create",
|
|
71
|
+
"list_models",
|
|
72
|
+
"load_last_session",
|
|
73
|
+
"route",
|
|
74
|
+
"save_last_session",
|
|
75
|
+
"tracker",
|
|
76
|
+
]
|
unified_cli/base.py
ADDED
|
@@ -0,0 +1,510 @@
|
|
|
1
|
+
"""BaseProvider ABC with shared subprocess execution, retry, and fallback."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import atexit
|
|
7
|
+
import json
|
|
8
|
+
import os
|
|
9
|
+
import subprocess
|
|
10
|
+
import threading
|
|
11
|
+
import time
|
|
12
|
+
from abc import ABC, abstractmethod
|
|
13
|
+
from typing import AsyncIterator, ClassVar, Iterator, Optional
|
|
14
|
+
|
|
15
|
+
from .core import Message, ModelInfo, ProviderName, Response, Usage
|
|
16
|
+
from .errors import UnifiedError, classify
|
|
17
|
+
from .usage import tracker as _usage_tracker
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
# Materialized temp files registered for cleanup on interpreter exit (defense
|
|
21
|
+
# in depth; per-call cleanup happens in chat/stream finally blocks).
|
|
22
|
+
_GLOBAL_TEMP_FILES: set[str] = set()
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@atexit.register
|
|
26
|
+
def _cleanup_global_temp_files() -> None:
|
|
27
|
+
for p in list(_GLOBAL_TEMP_FILES):
|
|
28
|
+
try:
|
|
29
|
+
os.unlink(p)
|
|
30
|
+
except OSError:
|
|
31
|
+
pass
|
|
32
|
+
_GLOBAL_TEMP_FILES.clear()
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
# Max 2 retries (0.5s, 1.5s) for network errors; 1 retry for auth fallback.
|
|
36
|
+
_NETWORK_BACKOFF = (0.5, 1.5)
|
|
37
|
+
|
|
38
|
+
# Default subprocess timeouts. The wrapped CLIs occasionally hang (network
|
|
39
|
+
# stalls, OAuth refresh edge cases, etc); without timeouts a REPL or HTTP
|
|
40
|
+
# server backed by this wrapper can wedge indefinitely. Override via
|
|
41
|
+
# `BaseProvider(timeout=N)` if you need shorter or longer.
|
|
42
|
+
DEFAULT_CHAT_TIMEOUT = 120 # seconds — non-streaming
|
|
43
|
+
DEFAULT_STREAM_TIMEOUT = 300 # seconds — streaming may take longer for long replies
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _reject_empty_prompt(prompt: str, provider: str) -> None:
|
|
47
|
+
"""Raise UnifiedError(kind='config') for empty/whitespace-only prompts.
|
|
48
|
+
|
|
49
|
+
Applied to chat() and stream() at entry. Without this, Claude in particular
|
|
50
|
+
produces hallucinated responses for blank input.
|
|
51
|
+
"""
|
|
52
|
+
if not prompt or not prompt.strip():
|
|
53
|
+
raise UnifiedError(
|
|
54
|
+
kind="config", provider=provider, # type: ignore[arg-type]
|
|
55
|
+
message="프롬프트가 비어있습니다.",
|
|
56
|
+
hint="공백이 아닌 텍스트를 전달하세요. stdin 에서 읽는 경우 파이프 입력을 확인하세요.",
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _check_session_match(
|
|
61
|
+
provider: str, requested: Optional[str], got: Optional[str]
|
|
62
|
+
) -> None:
|
|
63
|
+
"""If the user asked to resume `requested` but we got a different session back,
|
|
64
|
+
raise `not_found` instead of silently continuing in a new conversation.
|
|
65
|
+
|
|
66
|
+
Catches the Codex-specific behaviour where `codex exec resume <unknown-uuid>`
|
|
67
|
+
succeeds with a fresh session instead of erroring. Claude/Gemini fail loudly
|
|
68
|
+
or pre-check, so this is only meaningful for Codex in practice — but the
|
|
69
|
+
guard is provider-agnostic for safety.
|
|
70
|
+
"""
|
|
71
|
+
if not requested or not got:
|
|
72
|
+
return
|
|
73
|
+
if requested != got:
|
|
74
|
+
raise UnifiedError(
|
|
75
|
+
kind="not_found", provider=provider, # type: ignore[arg-type]
|
|
76
|
+
message=(f"요청한 세션 {requested[:12]}… 을 찾을 수 없어 "
|
|
77
|
+
f"새 세션 {got[:12]}… 이 생성되었습니다."),
|
|
78
|
+
hint="세션이 만료되었거나 다른 cwd 에서 생성됐을 수 있습니다. "
|
|
79
|
+
"session_id 를 새로 받거나 Conversation 을 재시작하세요.",
|
|
80
|
+
cause=f"requested={requested} got={got}",
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class BaseProvider(ABC):
|
|
85
|
+
"""Base class for a single-provider CLI wrapper.
|
|
86
|
+
|
|
87
|
+
Subclasses must implement:
|
|
88
|
+
- `_build_args(prompt, session_id, resume_last, model, streaming)` → argv list
|
|
89
|
+
- `_normalize(obj)` → iterator of Message (from raw JSON object)
|
|
90
|
+
- `_parse_response(raw_text)` → Response (for non-streaming `--output-format json`)
|
|
91
|
+
- `_default_env()` → dict of env vars to set (subclass-specific)
|
|
92
|
+
"""
|
|
93
|
+
|
|
94
|
+
name: ClassVar[ProviderName]
|
|
95
|
+
default_model: ClassVar[str]
|
|
96
|
+
api_key_env: ClassVar[str] # e.g., "ANTHROPIC_API_KEY"
|
|
97
|
+
login_hint: ClassVar[str] # e.g., "`claude /login` 재실행"
|
|
98
|
+
|
|
99
|
+
def __init__(
|
|
100
|
+
self,
|
|
101
|
+
*,
|
|
102
|
+
model: Optional[str] = None,
|
|
103
|
+
cwd: Optional[str] = None,
|
|
104
|
+
bin_path: Optional[str] = None,
|
|
105
|
+
extra_env: Optional[dict] = None,
|
|
106
|
+
timeout: Optional[float] = None,
|
|
107
|
+
web_search: bool = True,
|
|
108
|
+
):
|
|
109
|
+
self.model = model or self.default_model
|
|
110
|
+
self.cwd = cwd
|
|
111
|
+
self.extra_env = extra_env or {}
|
|
112
|
+
# `timeout` semantics: explicit value applies to both modes; `None` →
|
|
113
|
+
# mode-specific defaults (chat 120s, stream 300s).
|
|
114
|
+
self.timeout = timeout if timeout is not None else DEFAULT_CHAT_TIMEOUT
|
|
115
|
+
self.stream_timeout = timeout if timeout is not None else DEFAULT_STREAM_TIMEOUT
|
|
116
|
+
self.web_search = web_search
|
|
117
|
+
# Per-call temp files (e.g. image bytes materialized to disk). Tracked
|
|
118
|
+
# thread-locally and unlinked after each call so the long-running server
|
|
119
|
+
# doesn't leak files. See _register_temp_file / _cleanup_temp_files.
|
|
120
|
+
self._tmp = threading.local()
|
|
121
|
+
|
|
122
|
+
resolved = bin_path or self._discover_bin()
|
|
123
|
+
if not resolved:
|
|
124
|
+
raise UnifiedError(
|
|
125
|
+
kind="config", provider=self.name,
|
|
126
|
+
message=f"{self.name} CLI 바이너리를 찾을 수 없습니다.",
|
|
127
|
+
hint=self._install_hint(),
|
|
128
|
+
)
|
|
129
|
+
self.bin_path = resolved
|
|
130
|
+
|
|
131
|
+
# ----- abstract -----
|
|
132
|
+
|
|
133
|
+
@classmethod
|
|
134
|
+
@abstractmethod
|
|
135
|
+
def _discover_bin(cls) -> Optional[str]: ...
|
|
136
|
+
|
|
137
|
+
@classmethod
|
|
138
|
+
@abstractmethod
|
|
139
|
+
def _install_hint(cls) -> str: ...
|
|
140
|
+
|
|
141
|
+
@abstractmethod
|
|
142
|
+
def _build_args(
|
|
143
|
+
self,
|
|
144
|
+
prompt: str,
|
|
145
|
+
*,
|
|
146
|
+
session_id: Optional[str],
|
|
147
|
+
resume_last: bool,
|
|
148
|
+
model: Optional[str],
|
|
149
|
+
streaming: bool,
|
|
150
|
+
images: Optional[list] = None,
|
|
151
|
+
) -> tuple[list[str], Optional[str]]:
|
|
152
|
+
"""Build (argv, stdin_data) for the subprocess call.
|
|
153
|
+
|
|
154
|
+
`stdin_data` is `None` for the typical argv-only case, or a string to
|
|
155
|
+
pipe into the child's stdin. Currently used by Codex, whose CLI reads
|
|
156
|
+
the prompt from stdin when an image (`-i`) is attached. (Claude routes
|
|
157
|
+
images through its Read tool, not stdin; agy uses `@path` in the
|
|
158
|
+
prompt — both return stdin_data=None.)
|
|
159
|
+
"""
|
|
160
|
+
|
|
161
|
+
@abstractmethod
|
|
162
|
+
def _normalize(self, obj: dict) -> Iterator[Message]: ...
|
|
163
|
+
|
|
164
|
+
@abstractmethod
|
|
165
|
+
def _parse_json_response(self, text: str, model: str) -> Response: ...
|
|
166
|
+
|
|
167
|
+
# ----- temp file lifecycle -----
|
|
168
|
+
|
|
169
|
+
def _reset_temp_files(self) -> None:
|
|
170
|
+
self._tmp.files = []
|
|
171
|
+
|
|
172
|
+
def _register_temp_file(self, path: str) -> None:
|
|
173
|
+
"""Providers call this when they materialize image bytes/URLs to disk,
|
|
174
|
+
so the file is unlinked after the call completes."""
|
|
175
|
+
files = getattr(self._tmp, "files", None)
|
|
176
|
+
if files is None:
|
|
177
|
+
files = self._tmp.files = []
|
|
178
|
+
files.append(path)
|
|
179
|
+
_GLOBAL_TEMP_FILES.add(path) # atexit safety net
|
|
180
|
+
|
|
181
|
+
def _cleanup_temp_files(self) -> None:
|
|
182
|
+
for p in getattr(self._tmp, "files", None) or []:
|
|
183
|
+
try:
|
|
184
|
+
os.unlink(p)
|
|
185
|
+
except OSError:
|
|
186
|
+
pass
|
|
187
|
+
_GLOBAL_TEMP_FILES.discard(p)
|
|
188
|
+
self._tmp.files = []
|
|
189
|
+
|
|
190
|
+
# ----- env + subprocess -----
|
|
191
|
+
|
|
192
|
+
def _env(self, fallback_api_key: bool = False) -> dict:
|
|
193
|
+
env = os.environ.copy()
|
|
194
|
+
env.update(self.extra_env)
|
|
195
|
+
if fallback_api_key and self.api_key_env in os.environ:
|
|
196
|
+
env[self.api_key_env] = os.environ[self.api_key_env]
|
|
197
|
+
return env
|
|
198
|
+
|
|
199
|
+
def _run(self, args: list[str], stdin_data: Optional[str] = None) -> str:
|
|
200
|
+
"""Run subprocess with non-streaming output. Returns stdout on success.
|
|
201
|
+
|
|
202
|
+
`stdin_data` (if given) is piped to the child's stdin — used by
|
|
203
|
+
Claude's stream-json image input mode.
|
|
204
|
+
|
|
205
|
+
Handles auth-expired fallback (retry once with API key env) and network
|
|
206
|
+
retries (up to 2 with exponential backoff).
|
|
207
|
+
"""
|
|
208
|
+
tried_api_fallback = False
|
|
209
|
+
last_err: Optional[UnifiedError] = None
|
|
210
|
+
|
|
211
|
+
for attempt in range(len(_NETWORK_BACKOFF) + 1):
|
|
212
|
+
try:
|
|
213
|
+
# When no stdin is supplied we still pass empty input ("")
|
|
214
|
+
# rather than letting the child inherit our stdin — Gemini
|
|
215
|
+
# CLI in particular blocks waiting for stdin input even
|
|
216
|
+
# though `-p` is supplied, which causes the wrapper to hang.
|
|
217
|
+
result = subprocess.run(
|
|
218
|
+
args, capture_output=True, text=True,
|
|
219
|
+
input=stdin_data if stdin_data is not None else "",
|
|
220
|
+
cwd=self.cwd, env=self._env(), timeout=self.timeout,
|
|
221
|
+
)
|
|
222
|
+
except subprocess.TimeoutExpired:
|
|
223
|
+
raise UnifiedError(
|
|
224
|
+
kind="network", provider=self.name,
|
|
225
|
+
message=f"{self.name} 응답이 {self.timeout}초 안에 오지 않음.",
|
|
226
|
+
hint=("네트워크/CLI hang 가능성. timeout 을 늘리거나 다시 시도하세요. "
|
|
227
|
+
"BaseProvider(timeout=N) 으로 조정 가능."),
|
|
228
|
+
)
|
|
229
|
+
if result.returncode == 0:
|
|
230
|
+
return result.stdout
|
|
231
|
+
|
|
232
|
+
err = classify(self.name, result.stderr, result.stdout, result.returncode)
|
|
233
|
+
last_err = err
|
|
234
|
+
|
|
235
|
+
if err.kind == "auth_expired" and not tried_api_fallback:
|
|
236
|
+
if self.api_key_env in os.environ:
|
|
237
|
+
tried_api_fallback = True
|
|
238
|
+
args_retry = args
|
|
239
|
+
try:
|
|
240
|
+
result = subprocess.run(
|
|
241
|
+
args_retry, capture_output=True, text=True,
|
|
242
|
+
input=stdin_data if stdin_data is not None else "",
|
|
243
|
+
cwd=self.cwd, env=self._env(fallback_api_key=True),
|
|
244
|
+
timeout=self.timeout,
|
|
245
|
+
)
|
|
246
|
+
except subprocess.TimeoutExpired:
|
|
247
|
+
raise UnifiedError(
|
|
248
|
+
kind="network", provider=self.name,
|
|
249
|
+
message=f"{self.name} API key fallback 중 timeout.",
|
|
250
|
+
hint="네트워크 확인 후 재시도.",
|
|
251
|
+
)
|
|
252
|
+
if result.returncode == 0:
|
|
253
|
+
return result.stdout
|
|
254
|
+
err = classify(self.name, result.stderr, result.stdout, result.returncode)
|
|
255
|
+
last_err = err
|
|
256
|
+
raise err # no key available or fallback also failed
|
|
257
|
+
|
|
258
|
+
if err.kind == "network" and attempt < len(_NETWORK_BACKOFF):
|
|
259
|
+
time.sleep(_NETWORK_BACKOFF[attempt])
|
|
260
|
+
continue
|
|
261
|
+
|
|
262
|
+
raise err
|
|
263
|
+
|
|
264
|
+
assert last_err is not None
|
|
265
|
+
raise last_err
|
|
266
|
+
|
|
267
|
+
# ----- public API -----
|
|
268
|
+
|
|
269
|
+
def chat(
|
|
270
|
+
self,
|
|
271
|
+
prompt: str,
|
|
272
|
+
*,
|
|
273
|
+
session_id: Optional[str] = None,
|
|
274
|
+
resume_last: bool = False,
|
|
275
|
+
model: Optional[str] = None,
|
|
276
|
+
images: Optional[list] = None,
|
|
277
|
+
) -> Response:
|
|
278
|
+
_reject_empty_prompt(prompt, self.name)
|
|
279
|
+
self._reset_temp_files()
|
|
280
|
+
args, stdin_data = self._build_args(
|
|
281
|
+
prompt, session_id=session_id, resume_last=resume_last,
|
|
282
|
+
model=model, streaming=False, images=images,
|
|
283
|
+
)
|
|
284
|
+
t0 = time.time()
|
|
285
|
+
try:
|
|
286
|
+
try:
|
|
287
|
+
stdout = self._run(args, stdin_data=stdin_data)
|
|
288
|
+
resp = self._parse_json_response(stdout, model or self.model)
|
|
289
|
+
_check_session_match(self.name, session_id, resp.session_id)
|
|
290
|
+
except UnifiedError as e:
|
|
291
|
+
_usage_tracker.record(
|
|
292
|
+
self.name, model or self.model,
|
|
293
|
+
latency_ms=int((time.time() - t0) * 1000),
|
|
294
|
+
prompt_preview=prompt, error_kind=e.kind,
|
|
295
|
+
)
|
|
296
|
+
raise
|
|
297
|
+
_usage_tracker.record(
|
|
298
|
+
self.name, resp.model,
|
|
299
|
+
input_tokens=resp.usage.input_tokens or 0,
|
|
300
|
+
output_tokens=resp.usage.output_tokens or 0,
|
|
301
|
+
cached_tokens=resp.usage.cached_tokens or 0,
|
|
302
|
+
latency_ms=int((time.time() - t0) * 1000),
|
|
303
|
+
session_id=resp.session_id,
|
|
304
|
+
prompt_preview=prompt,
|
|
305
|
+
)
|
|
306
|
+
return resp
|
|
307
|
+
finally:
|
|
308
|
+
self._cleanup_temp_files()
|
|
309
|
+
|
|
310
|
+
def stream(
|
|
311
|
+
self,
|
|
312
|
+
prompt: str,
|
|
313
|
+
*,
|
|
314
|
+
session_id: Optional[str] = None,
|
|
315
|
+
resume_last: bool = False,
|
|
316
|
+
model: Optional[str] = None,
|
|
317
|
+
images: Optional[list] = None,
|
|
318
|
+
) -> Iterator[Message]:
|
|
319
|
+
_reject_empty_prompt(prompt, self.name)
|
|
320
|
+
self._reset_temp_files()
|
|
321
|
+
args, stdin_data = self._build_args(
|
|
322
|
+
prompt, session_id=session_id, resume_last=resume_last,
|
|
323
|
+
model=model, streaming=True, images=images,
|
|
324
|
+
)
|
|
325
|
+
t0 = time.time()
|
|
326
|
+
final_usage = Usage()
|
|
327
|
+
final_session = ""
|
|
328
|
+
session_checked = False
|
|
329
|
+
try:
|
|
330
|
+
try:
|
|
331
|
+
for msg in self._stream_run(args, stdin_data=stdin_data):
|
|
332
|
+
if msg.kind == "usage" and msg.usage:
|
|
333
|
+
final_usage = msg.usage
|
|
334
|
+
if msg.kind == "session" and msg.session_id:
|
|
335
|
+
final_session = msg.session_id
|
|
336
|
+
if not session_checked:
|
|
337
|
+
_check_session_match(self.name, session_id, msg.session_id)
|
|
338
|
+
session_checked = True
|
|
339
|
+
yield msg
|
|
340
|
+
except UnifiedError as e:
|
|
341
|
+
_usage_tracker.record(
|
|
342
|
+
self.name, model or self.model,
|
|
343
|
+
latency_ms=int((time.time() - t0) * 1000),
|
|
344
|
+
prompt_preview=prompt, error_kind=e.kind,
|
|
345
|
+
)
|
|
346
|
+
raise
|
|
347
|
+
_usage_tracker.record(
|
|
348
|
+
self.name, model or self.model,
|
|
349
|
+
input_tokens=final_usage.input_tokens or 0,
|
|
350
|
+
output_tokens=final_usage.output_tokens or 0,
|
|
351
|
+
cached_tokens=final_usage.cached_tokens or 0,
|
|
352
|
+
latency_ms=int((time.time() - t0) * 1000),
|
|
353
|
+
session_id=final_session,
|
|
354
|
+
prompt_preview=prompt,
|
|
355
|
+
)
|
|
356
|
+
finally:
|
|
357
|
+
self._cleanup_temp_files()
|
|
358
|
+
|
|
359
|
+
async def achat(self, prompt: str, **kw) -> Response:
|
|
360
|
+
loop = asyncio.get_event_loop()
|
|
361
|
+
return await loop.run_in_executor(None, lambda: self.chat(prompt, **kw))
|
|
362
|
+
|
|
363
|
+
async def astream(
|
|
364
|
+
self,
|
|
365
|
+
prompt: str,
|
|
366
|
+
*,
|
|
367
|
+
session_id: Optional[str] = None,
|
|
368
|
+
resume_last: bool = False,
|
|
369
|
+
model: Optional[str] = None,
|
|
370
|
+
images: Optional[list] = None,
|
|
371
|
+
) -> AsyncIterator[Message]:
|
|
372
|
+
_reject_empty_prompt(prompt, self.name)
|
|
373
|
+
self._reset_temp_files()
|
|
374
|
+
args, stdin_data = self._build_args(
|
|
375
|
+
prompt, session_id=session_id, resume_last=resume_last,
|
|
376
|
+
model=model, streaming=True, images=images,
|
|
377
|
+
)
|
|
378
|
+
t0 = time.time()
|
|
379
|
+
final_usage = Usage()
|
|
380
|
+
final_session = ""
|
|
381
|
+
session_checked = False
|
|
382
|
+
proc = await asyncio.create_subprocess_exec(
|
|
383
|
+
*args,
|
|
384
|
+
stdin=asyncio.subprocess.PIPE if stdin_data else None,
|
|
385
|
+
stdout=asyncio.subprocess.PIPE,
|
|
386
|
+
stderr=asyncio.subprocess.PIPE,
|
|
387
|
+
cwd=self.cwd, env=self._env(),
|
|
388
|
+
)
|
|
389
|
+
if stdin_data and proc.stdin:
|
|
390
|
+
proc.stdin.write(stdin_data.encode())
|
|
391
|
+
await proc.stdin.drain()
|
|
392
|
+
proc.stdin.close()
|
|
393
|
+
assert proc.stdout is not None
|
|
394
|
+
try:
|
|
395
|
+
async for raw in proc.stdout:
|
|
396
|
+
line = raw.decode().strip()
|
|
397
|
+
if not line or not line.startswith("{"):
|
|
398
|
+
continue
|
|
399
|
+
try:
|
|
400
|
+
obj = json.loads(line)
|
|
401
|
+
except json.JSONDecodeError:
|
|
402
|
+
continue
|
|
403
|
+
for msg in self._normalize(obj):
|
|
404
|
+
if msg.kind == "usage" and msg.usage:
|
|
405
|
+
final_usage = msg.usage
|
|
406
|
+
if (msg.kind == "session" and msg.session_id
|
|
407
|
+
and not session_checked):
|
|
408
|
+
final_session = msg.session_id
|
|
409
|
+
_check_session_match(self.name, session_id, msg.session_id)
|
|
410
|
+
session_checked = True
|
|
411
|
+
yield msg
|
|
412
|
+
await proc.wait()
|
|
413
|
+
if proc.returncode != 0:
|
|
414
|
+
err_bytes = await proc.stderr.read() if proc.stderr else b""
|
|
415
|
+
err = classify(self.name, err_bytes.decode(), "", proc.returncode)
|
|
416
|
+
# Mirror sync stream(): record the error turn before raising.
|
|
417
|
+
_usage_tracker.record(
|
|
418
|
+
self.name, model or self.model,
|
|
419
|
+
latency_ms=int((time.time() - t0) * 1000),
|
|
420
|
+
prompt_preview=prompt, error_kind=err.kind,
|
|
421
|
+
)
|
|
422
|
+
raise err
|
|
423
|
+
# Success — record usage parity with sync stream().
|
|
424
|
+
_usage_tracker.record(
|
|
425
|
+
self.name, model or self.model,
|
|
426
|
+
input_tokens=final_usage.input_tokens or 0,
|
|
427
|
+
output_tokens=final_usage.output_tokens or 0,
|
|
428
|
+
cached_tokens=final_usage.cached_tokens or 0,
|
|
429
|
+
latency_ms=int((time.time() - t0) * 1000),
|
|
430
|
+
session_id=final_session,
|
|
431
|
+
prompt_preview=prompt,
|
|
432
|
+
)
|
|
433
|
+
finally:
|
|
434
|
+
if proc.returncode is None:
|
|
435
|
+
try:
|
|
436
|
+
await proc.wait()
|
|
437
|
+
except Exception:
|
|
438
|
+
pass
|
|
439
|
+
self._cleanup_temp_files()
|
|
440
|
+
|
|
441
|
+
def _stream_once(
|
|
442
|
+
self,
|
|
443
|
+
args: list[str],
|
|
444
|
+
*,
|
|
445
|
+
fallback: bool,
|
|
446
|
+
stdin_data: Optional[str] = None,
|
|
447
|
+
) -> Iterator[Message]:
|
|
448
|
+
"""Run subprocess once, yield normalized messages, raise on failure."""
|
|
449
|
+
proc = subprocess.Popen(
|
|
450
|
+
args,
|
|
451
|
+
stdin=subprocess.PIPE if stdin_data else None,
|
|
452
|
+
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
|
|
453
|
+
text=True, cwd=self.cwd,
|
|
454
|
+
env=self._env(fallback_api_key=fallback), bufsize=1,
|
|
455
|
+
)
|
|
456
|
+
if stdin_data and proc.stdin:
|
|
457
|
+
try:
|
|
458
|
+
proc.stdin.write(stdin_data)
|
|
459
|
+
proc.stdin.flush()
|
|
460
|
+
proc.stdin.close()
|
|
461
|
+
except BrokenPipeError:
|
|
462
|
+
pass
|
|
463
|
+
assert proc.stdout is not None
|
|
464
|
+
produced_any = False
|
|
465
|
+
try:
|
|
466
|
+
for line in proc.stdout:
|
|
467
|
+
line = line.strip()
|
|
468
|
+
if not line or not line.startswith("{"):
|
|
469
|
+
continue
|
|
470
|
+
try:
|
|
471
|
+
obj = json.loads(line)
|
|
472
|
+
except json.JSONDecodeError:
|
|
473
|
+
continue
|
|
474
|
+
for msg in self._normalize(obj):
|
|
475
|
+
produced_any = True
|
|
476
|
+
yield msg
|
|
477
|
+
finally:
|
|
478
|
+
try:
|
|
479
|
+
proc.wait(timeout=self.stream_timeout)
|
|
480
|
+
except subprocess.TimeoutExpired:
|
|
481
|
+
proc.kill()
|
|
482
|
+
proc.wait()
|
|
483
|
+
raise UnifiedError(
|
|
484
|
+
kind="network", provider=self.name,
|
|
485
|
+
message=f"{self.name} 스트림이 {self.stream_timeout}초 안에 끝나지 않음.",
|
|
486
|
+
hint="긴 응답이면 BaseProvider(timeout=N) 으로 늘리세요.",
|
|
487
|
+
)
|
|
488
|
+
stderr_text = proc.stderr.read() if proc.stderr else ""
|
|
489
|
+
|
|
490
|
+
if proc.returncode not in (0, None):
|
|
491
|
+
err = classify(self.name, stderr_text, "", proc.returncode)
|
|
492
|
+
# attach a marker so the outer retry loop can decide
|
|
493
|
+
err._produced_any = produced_any # type: ignore[attr-defined]
|
|
494
|
+
raise err
|
|
495
|
+
|
|
496
|
+
def _stream_run(
|
|
497
|
+
self, args: list[str], stdin_data: Optional[str] = None
|
|
498
|
+
) -> Iterator[Message]:
|
|
499
|
+
"""Sync streaming with one auth-fallback retry on pre-stream failure."""
|
|
500
|
+
try:
|
|
501
|
+
yield from self._stream_once(args, fallback=False, stdin_data=stdin_data)
|
|
502
|
+
return
|
|
503
|
+
except UnifiedError as err:
|
|
504
|
+
produced = getattr(err, "_produced_any", False)
|
|
505
|
+
if (err.kind == "auth_expired"
|
|
506
|
+
and not produced
|
|
507
|
+
and self.api_key_env in os.environ):
|
|
508
|
+
yield from self._stream_once(args, fallback=True, stdin_data=stdin_data)
|
|
509
|
+
return
|
|
510
|
+
raise
|