spyllm 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,25 @@
1
+ __pycache__/
2
+ .pytest_cache/
3
+ .venv/
4
+ *.pyc
5
+ *.pyo
6
+ *.pyd
7
+
8
+ node_modules/
9
+ dist/
10
+ coverage/
11
+ .vite/
12
+
13
+ .env
14
+ .env.*
15
+ !.env.example
16
+
17
+ .DS_Store
18
+ Thumbs.db
19
+
20
+ .idea/
21
+ .vscode/
22
+
23
+ backend/.mypy_cache/
24
+ backend/.ruff_cache/
25
+ frontend/.eslintcache
spyllm-0.2.0/PKG-INFO ADDED
@@ -0,0 +1,15 @@
1
+ Metadata-Version: 2.4
2
+ Name: spyllm
3
+ Version: 0.2.0
4
+ Summary: Two-line automatic LLM tracing. Works with OpenAI, Anthropic, and more.
5
+ Requires-Python: >=3.9
6
+ Requires-Dist: httpx>=0.24.0
7
+ Requires-Dist: wrapt>=1.14.0
8
+ Provides-Extra: anthropic
9
+ Requires-Dist: anthropic>=0.18.0; extra == 'anthropic'
10
+ Provides-Extra: openai
11
+ Requires-Dist: openai>=1.0.0; extra == 'openai'
12
+ Provides-Extra: otel
13
+ Requires-Dist: opentelemetry-api>=1.20.0; extra == 'otel'
14
+ Requires-Dist: opentelemetry-exporter-otlp-proto-http>=1.20.0; extra == 'otel'
15
+ Requires-Dist: opentelemetry-sdk>=1.20.0; extra == 'otel'
spyllm-0.2.0/README.md ADDED
@@ -0,0 +1,82 @@
1
+ # SpyLLM Python SDK
2
+
3
+ Automatic LLM tracing in two lines. Works with OpenAI, Anthropic, and more.
4
+
5
+ ## Quick Start
6
+
7
+ ```python
8
+ import spyllm
9
+
10
+ spyllm.init(api_key="sk-...")
11
+
12
+ # That's it. Every OpenAI and Anthropic call is now automatically traced.
13
+ from openai import OpenAI
14
+
15
+ client = OpenAI()
16
+ response = client.chat.completions.create(
17
+ model="gpt-4o",
18
+ messages=[{"role": "user", "content": "Hello!"}],
19
+ )
20
+ # Prompt, response, tokens, cost, and latency are captured automatically.
21
+ ```
22
+
23
+ ## Install
24
+
25
+ ```bash
26
+ pip install spyllm
27
+ ```
28
+
29
+ ## What Gets Captured
30
+
31
+ Every LLM call automatically records:
32
+
33
+ - **Prompt** — full message history sent to the model
34
+ - **Response** — the model's output
35
+ - **Token count** — input + output tokens
36
+ - **Cost** — estimated USD cost based on model pricing
37
+ - **Latency** — wall-clock time for the API call
38
+ - **Tool calls** — if the model invoked tools/functions
39
+ - **Errors** — failed calls with the exception message
40
+
41
+ ## Supported Providers
42
+
43
+ | Provider | Auto-instrumented |
44
+ |------------|-------------------|
45
+ | OpenAI | Yes |
46
+ | Anthropic | Yes |
47
+
48
+ ## Advanced Usage
49
+
50
+ ### Manual Tracing
51
+
52
+ ```python
53
+ from spyllm import SpyLLMClient
54
+
55
+ client = SpyLLMClient(api_key="sk-...", base_url="https://api.spyllm.com")
56
+ client.trace(
57
+ agent_name="my-agent",
58
+ prompt="What is 2+2?",
59
+ response="4",
60
+ token_count=15,
61
+ cost_usd=0.001,
62
+ )
63
+ ```
64
+
65
+ ### Decorator
66
+
67
+ ```python
68
+ from spyllm import agent_trace, init
69
+
70
+ init(api_key="sk-...")
71
+
72
+ @agent_trace("my-pipeline")
73
+ def run_pipeline(query: str) -> str:
74
+ # your code here
75
+ return result
76
+ ```
77
+
78
+ ### Disable Auto-instrumentation
79
+
80
+ ```python
81
+ spyllm.init(api_key="sk-...", instrument=False)
82
+ ```
@@ -0,0 +1,25 @@
1
+ [project]
2
+ name = "spyllm"
3
+ version = "0.2.0"
4
+ description = "Two-line automatic LLM tracing. Works with OpenAI, Anthropic, and more."
5
+ requires-python = ">=3.9"
6
+ dependencies = [
7
+ "httpx>=0.24.0",
8
+ "wrapt>=1.14.0",
9
+ ]
10
+
11
+ [project.optional-dependencies]
12
+ openai = ["openai>=1.0.0"]
13
+ anthropic = ["anthropic>=0.18.0"]
14
+ otel = [
15
+ "opentelemetry-api>=1.20.0",
16
+ "opentelemetry-sdk>=1.20.0",
17
+ "opentelemetry-exporter-otlp-proto-http>=1.20.0",
18
+ ]
19
+
20
+ [tool.hatch.build.targets.wheel]
21
+ packages = ["spyllm"]
22
+
23
+ [build-system]
24
+ requires = ["hatchling"]
25
+ build-backend = "hatchling.build"
@@ -0,0 +1,64 @@
1
+ """SpyLLM Python SDK — two-line automatic LLM tracing.
2
+
3
+ Usage::
4
+
5
+ import spyllm
6
+ spyllm.init(api_key="sk-...")
7
+
8
+ # Every OpenAI / Anthropic call is now auto-traced.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ from typing import Optional
14
+
15
+ from .client import SpyLLMClient, agent_trace
16
+
17
+ __all__ = [
18
+ "init",
19
+ "shutdown",
20
+ "SpyLLMClient",
21
+ "agent_trace",
22
+ ]
23
+
24
+ _client: Optional[SpyLLMClient] = None
25
+
26
+
27
+ def init(
28
+ api_key: str,
29
+ *,
30
+ base_url: str = "https://api.spyllm.dev",
31
+ instrument: bool = True,
32
+ ) -> SpyLLMClient:
33
+ """Initialise the SpyLLM SDK.
34
+
35
+ This creates a background batcher, monkey-patches supported LLM providers,
36
+ and returns a client instance for manual tracing if needed.
37
+
38
+ Args:
39
+ api_key: Your SpyLLM API key.
40
+ base_url: Override the API endpoint (useful for self-hosted).
41
+ instrument: If True (default), auto-patch OpenAI and Anthropic.
42
+ """
43
+ global _client
44
+
45
+ from .batcher import TraceBatcher
46
+ from . import instrumentor
47
+
48
+ batcher = TraceBatcher(api_key=api_key, base_url=base_url)
49
+ instrumentor._batcher = batcher
50
+
51
+ _client = SpyLLMClient(api_key=api_key, base_url=base_url)
52
+
53
+ if instrument:
54
+ instrumentor.patch_all()
55
+
56
+ return _client
57
+
58
+
59
+ def shutdown() -> None:
60
+ """Flush pending traces and shut down the background thread."""
61
+ from . import instrumentor
62
+
63
+ if instrumentor._batcher is not None:
64
+ instrumentor._batcher.shutdown()
@@ -0,0 +1,85 @@
1
+ """Background trace batcher.
2
+
3
+ Collects traces in a thread-safe queue and flushes them to the SpyLLM API in
4
+ a daemon thread so instrumented calls never block on network I/O.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import atexit
10
+ import logging
11
+ import queue
12
+ import threading
13
+ from typing import Any, Optional
14
+
15
+ import httpx
16
+
17
+ logger = logging.getLogger("spyllm")
18
+
19
+ _MAX_BATCH = 50
20
+ _FLUSH_INTERVAL = 2.0 # seconds
21
+
22
+
23
+ class TraceBatcher:
24
+ def __init__(self, api_key: str, base_url: str) -> None:
25
+ self._api_key = api_key
26
+ self._base_url = base_url.rstrip("/")
27
+ self._queue: queue.Queue[dict[str, Any]] = queue.Queue()
28
+ self._shutdown = threading.Event()
29
+ self._thread = threading.Thread(target=self._run, daemon=True)
30
+ self._thread.start()
31
+ atexit.register(self.flush)
32
+
33
+ def enqueue(self, trace: dict[str, Any]) -> None:
34
+ self._queue.put(trace)
35
+
36
+ def flush(self) -> None:
37
+ """Drain the queue and send everything. Called at exit or manually."""
38
+ batch: list[dict[str, Any]] = []
39
+ while True:
40
+ try:
41
+ batch.append(self._queue.get_nowait())
42
+ except queue.Empty:
43
+ break
44
+ if batch:
45
+ self._send(batch)
46
+
47
+ def shutdown(self) -> None:
48
+ self._shutdown.set()
49
+ self.flush()
50
+
51
+ def _run(self) -> None:
52
+ while not self._shutdown.is_set():
53
+ batch: list[dict[str, Any]] = []
54
+ try:
55
+ batch.append(self._queue.get(timeout=_FLUSH_INTERVAL))
56
+ except queue.Empty:
57
+ continue
58
+
59
+ while len(batch) < _MAX_BATCH:
60
+ try:
61
+ batch.append(self._queue.get_nowait())
62
+ except queue.Empty:
63
+ break
64
+
65
+ self._send(batch)
66
+
67
+ def _send(self, batch: list[dict[str, Any]]) -> None:
68
+ if not batch:
69
+ return
70
+ try:
71
+ with httpx.Client(timeout=10.0) as client:
72
+ if len(batch) == 1:
73
+ client.post(
74
+ f"{self._base_url}/v1/traces",
75
+ json=batch[0],
76
+ headers={"X-API-Key": self._api_key},
77
+ )
78
+ else:
79
+ client.post(
80
+ f"{self._base_url}/v1/traces/batch",
81
+ json=batch,
82
+ headers={"X-API-Key": self._api_key},
83
+ )
84
+ except Exception:
85
+ logger.debug("spyllm: failed to send %d traces", len(batch), exc_info=True)
@@ -0,0 +1,108 @@
1
+ from __future__ import annotations
2
+
3
+ import functools
4
+ import threading
5
+ import time
6
+ from typing import Any, Optional
7
+
8
+ import httpx
9
+
10
+
11
+ class SpyLLMClient:
12
+ """Low-level client for the SpyLLM REST API.
13
+
14
+ Most users should use ``spyllm.init()`` instead, which sets up automatic
15
+ instrumentation. This class is useful for manual tracing or advanced use.
16
+ """
17
+
18
+ def __init__(self, api_key: str, base_url: str = "https://api.spyllm.com") -> None:
19
+ self.api_key = api_key
20
+ self.base_url = base_url.rstrip("/")
21
+ self._http = httpx.Client(
22
+ base_url=self.base_url,
23
+ headers={"X-API-Key": self.api_key},
24
+ timeout=30.0,
25
+ )
26
+
27
+ def trace(
28
+ self,
29
+ agent_name: str,
30
+ prompt: str,
31
+ response: str,
32
+ *,
33
+ trace_type: str = "llm_call",
34
+ status: str = "success",
35
+ latency_ms: Optional[float] = None,
36
+ token_count: Optional[int] = None,
37
+ cost_usd: Optional[float] = None,
38
+ session_id: Optional[str] = None,
39
+ metadata: Optional[str] = None,
40
+ tool_calls: Optional[str] = None,
41
+ ) -> str:
42
+ payload: dict[str, Any] = {
43
+ "agent_name": agent_name,
44
+ "prompt": prompt,
45
+ "response": response,
46
+ "trace_type": trace_type,
47
+ "status": status,
48
+ }
49
+ for key, val in [
50
+ ("latency_ms", latency_ms),
51
+ ("token_count", token_count),
52
+ ("cost_usd", cost_usd),
53
+ ("session_id", session_id),
54
+ ("metadata", metadata),
55
+ ("tool_calls", tool_calls),
56
+ ]:
57
+ if val is not None:
58
+ payload[key] = val
59
+
60
+ resp = self._http.post("/v1/traces", json=payload)
61
+ resp.raise_for_status()
62
+ return resp.json()["id"]
63
+
64
+ def search(self, query: str, *, top_k: int = 20, status: Optional[str] = None) -> list[dict[str, Any]]:
65
+ payload: dict[str, Any] = {"query": query, "top_k": top_k}
66
+ if status:
67
+ payload["status"] = status
68
+ resp = self._http.post("/v1/search", json=payload)
69
+ resp.raise_for_status()
70
+ return resp.json()
71
+
72
+
73
+ def agent_trace(agent_name: str, client: Optional[SpyLLMClient] = None) -> Any:
74
+ """Decorator that automatically traces a function call."""
75
+ def decorator(func: Any) -> Any:
76
+ @functools.wraps(func)
77
+ def wrapper(*args: Any, **kwargs: Any) -> Any:
78
+ start = time.time()
79
+ result = None
80
+ call_status = "success"
81
+ try:
82
+ result = func(*args, **kwargs)
83
+ return result
84
+ except Exception as exc:
85
+ call_status = "error"
86
+ result = str(exc)
87
+ raise
88
+ finally:
89
+ sdk_client = client or _get_default_client()
90
+ if sdk_client:
91
+ threading.Thread(
92
+ target=sdk_client.trace,
93
+ kwargs={
94
+ "agent_name": agent_name,
95
+ "prompt": str(args) + str(kwargs),
96
+ "response": str(result),
97
+ "status": call_status,
98
+ "latency_ms": (time.time() - start) * 1000,
99
+ },
100
+ daemon=True,
101
+ ).start()
102
+ return wrapper
103
+ return decorator
104
+
105
+
106
+ def _get_default_client() -> Optional[SpyLLMClient]:
107
+ from . import _client
108
+ return _client
@@ -0,0 +1,3 @@
1
+ from .client import agent_trace
2
+
3
+ __all__ = ["agent_trace"]
@@ -0,0 +1,459 @@
1
+ """Auto-instrumentation for OpenAI and Anthropic Python SDKs.
2
+
3
+ When `patch_openai()` or `patch_anthropic()` is called, the respective SDK's
4
+ `create` methods are monkey-patched via `wrapt` so every LLM call is
5
+ automatically traced — zero code changes required for the end user.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import json
11
+ import logging
12
+ import time
13
+ import types
14
+ from dataclasses import dataclass
15
+ from typing import Any, Optional
16
+
17
+ from wrapt import wrap_function_wrapper
18
+
19
+ from .pricing import estimate_cost
20
+
21
+ logger = logging.getLogger("spyllm")
22
+
23
+ # Will be set by `init()` in __init__.py
24
+ _batcher: Any = None
25
+
26
+
27
+ @dataclass
28
+ class _PatchTarget:
29
+ module: str
30
+ cls: str
31
+ method: str
32
+ provider: str
33
+ is_async: bool
34
+
35
+
36
+ # ---------------------------------------------------------------------------
37
+ # OpenAI targets
38
+ # ---------------------------------------------------------------------------
39
+
40
+ _OPENAI_TARGETS: list[_PatchTarget] = [
41
+ _PatchTarget("openai.resources.chat.completions", "Completions", "create", "openai", False),
42
+ _PatchTarget("openai.resources.chat.completions", "AsyncCompletions", "create", "openai", True),
43
+ _PatchTarget("openai.resources.completions", "Completions", "create", "openai", False),
44
+ _PatchTarget("openai.resources.completions", "AsyncCompletions", "create", "openai", True),
45
+ ]
46
+
47
+ # ---------------------------------------------------------------------------
48
+ # Anthropic targets
49
+ # ---------------------------------------------------------------------------
50
+
51
+ _ANTHROPIC_TARGETS: list[_PatchTarget] = [
52
+ _PatchTarget("anthropic.resources.messages", "Messages", "create", "anthropic", False),
53
+ _PatchTarget("anthropic.resources.messages", "AsyncMessages", "create", "anthropic", True),
54
+ ]
55
+
56
+
57
+ # ---------------------------------------------------------------------------
58
+ # Extraction helpers
59
+ # ---------------------------------------------------------------------------
60
+
61
+ def _safe_json(obj: Any) -> str:
62
+ try:
63
+ return json.dumps(obj, default=str)
64
+ except Exception:
65
+ return str(obj)
66
+
67
+
68
+ def _extract_openai_data(kwargs: dict[str, Any], response: Any) -> dict[str, Any]:
69
+ """Pull trace fields from an OpenAI chat/completion response."""
70
+ resp_dict = response.__dict__ if hasattr(response, "__dict__") else {}
71
+ usage = getattr(response, "usage", None)
72
+
73
+ model = getattr(response, "model", None) or kwargs.get("model", "unknown")
74
+ input_tokens = getattr(usage, "prompt_tokens", 0) or 0
75
+ output_tokens = getattr(usage, "completion_tokens", 0) or 0
76
+
77
+ messages = kwargs.get("messages", [])
78
+ prompt_str = _safe_json(messages) if messages else kwargs.get("prompt", "")
79
+
80
+ choices = getattr(response, "choices", [])
81
+ response_text = ""
82
+ tool_calls_data = None
83
+ if choices:
84
+ choice = choices[0]
85
+ msg = getattr(choice, "message", None)
86
+ if msg:
87
+ response_text = getattr(msg, "content", "") or ""
88
+ tc = getattr(msg, "tool_calls", None)
89
+ if tc:
90
+ tool_calls_data = _safe_json([t.__dict__ for t in tc] if hasattr(tc[0], "__dict__") else tc)
91
+ elif hasattr(choice, "text"):
92
+ response_text = getattr(choice, "text", "")
93
+
94
+ cost = estimate_cost(model, input_tokens, output_tokens)
95
+
96
+ return {
97
+ "agent_name": model,
98
+ "prompt": prompt_str if isinstance(prompt_str, str) else _safe_json(prompt_str),
99
+ "response": response_text,
100
+ "trace_type": "llm_call",
101
+ "status": "success",
102
+ "token_count": input_tokens + output_tokens,
103
+ "cost_usd": cost,
104
+ "tool_calls": tool_calls_data,
105
+ "metadata": _safe_json({
106
+ "provider": "openai",
107
+ "model": model,
108
+ "input_tokens": input_tokens,
109
+ "output_tokens": output_tokens,
110
+ }),
111
+ }
112
+
113
+
114
+ def _extract_anthropic_data(kwargs: dict[str, Any], response: Any) -> dict[str, Any]:
115
+ """Pull trace fields from an Anthropic messages response."""
116
+ model = getattr(response, "model", None) or kwargs.get("model", "unknown")
117
+ usage = getattr(response, "usage", None)
118
+ input_tokens = getattr(usage, "input_tokens", 0) or 0
119
+ output_tokens = getattr(usage, "output_tokens", 0) or 0
120
+
121
+ messages = kwargs.get("messages", [])
122
+ system = kwargs.get("system", "")
123
+ prompt_parts = []
124
+ if system:
125
+ prompt_parts.append({"role": "system", "content": system})
126
+ prompt_parts.extend(messages)
127
+ prompt_str = _safe_json(prompt_parts)
128
+
129
+ content_blocks = getattr(response, "content", [])
130
+ response_text = ""
131
+ tool_calls_data = None
132
+ tool_uses = []
133
+ for block in content_blocks:
134
+ block_type = getattr(block, "type", "")
135
+ if block_type == "text":
136
+ response_text += getattr(block, "text", "")
137
+ elif block_type == "tool_use":
138
+ tool_uses.append({
139
+ "id": getattr(block, "id", ""),
140
+ "name": getattr(block, "name", ""),
141
+ "input": getattr(block, "input", {}),
142
+ })
143
+ if tool_uses:
144
+ tool_calls_data = _safe_json(tool_uses)
145
+
146
+ cost = estimate_cost(model, input_tokens, output_tokens)
147
+
148
+ return {
149
+ "agent_name": model,
150
+ "prompt": prompt_str,
151
+ "response": response_text,
152
+ "trace_type": "llm_call",
153
+ "status": "success",
154
+ "token_count": input_tokens + output_tokens,
155
+ "cost_usd": cost,
156
+ "tool_calls": tool_calls_data,
157
+ "metadata": _safe_json({
158
+ "provider": "anthropic",
159
+ "model": model,
160
+ "input_tokens": input_tokens,
161
+ "output_tokens": output_tokens,
162
+ }),
163
+ }
164
+
165
+
166
+ # ---------------------------------------------------------------------------
167
+ # Streaming wrappers
168
+ # ---------------------------------------------------------------------------
169
+
170
+ class _OpenAIStreamProxy:
171
+ """Wraps an OpenAI sync Stream to capture chunks, then sends the trace
172
+ when the stream is exhausted."""
173
+
174
+ def __init__(self, stream: Any, kwargs: dict[str, Any], start_time: float) -> None:
175
+ self._stream = stream
176
+ self._kwargs = kwargs
177
+ self._start_time = start_time
178
+ self._chunks: list[Any] = []
179
+
180
+ def __iter__(self) -> Any:
181
+ try:
182
+ for chunk in self._stream:
183
+ self._chunks.append(chunk)
184
+ yield chunk
185
+ finally:
186
+ self._finalize()
187
+
188
+ def __next__(self) -> Any:
189
+ try:
190
+ chunk = next(self._stream)
191
+ self._chunks.append(chunk)
192
+ return chunk
193
+ except StopIteration:
194
+ self._finalize()
195
+ raise
196
+
197
+ def __enter__(self) -> "_OpenAIStreamProxy":
198
+ return self
199
+
200
+ def __exit__(self, *args: Any) -> None:
201
+ self._finalize()
202
+
203
+ def __getattr__(self, name: str) -> Any:
204
+ return getattr(self._stream, name)
205
+
206
+ def _finalize(self) -> None:
207
+ if not self._chunks or _batcher is None:
208
+ return
209
+ latency_ms = (time.time() - self._start_time) * 1000
210
+ model, content, usage, tool_calls = _reassemble_openai_stream(self._chunks)
211
+ input_tokens = 0
212
+ output_tokens = 0
213
+ if usage:
214
+ input_tokens = getattr(usage, "prompt_tokens", 0) or 0
215
+ output_tokens = getattr(usage, "completion_tokens", 0) or 0
216
+
217
+ cost = estimate_cost(model or self._kwargs.get("model", "unknown"), input_tokens, output_tokens)
218
+ messages = self._kwargs.get("messages", [])
219
+ trace = {
220
+ "agent_name": model or self._kwargs.get("model", "unknown"),
221
+ "prompt": _safe_json(messages),
222
+ "response": content or "",
223
+ "trace_type": "llm_call",
224
+ "status": "success",
225
+ "latency_ms": latency_ms,
226
+ "token_count": input_tokens + output_tokens,
227
+ "cost_usd": cost,
228
+ "tool_calls": _safe_json(tool_calls) if tool_calls else None,
229
+ "metadata": _safe_json({"provider": "openai", "model": model, "streamed": True}),
230
+ }
231
+ _batcher.enqueue({k: v for k, v in trace.items() if v is not None})
232
+ self._chunks = []
233
+
234
+
235
+ class _OpenAIAsyncStreamProxy:
236
+ """Async variant of the stream proxy."""
237
+
238
+ def __init__(self, stream: Any, kwargs: dict[str, Any], start_time: float) -> None:
239
+ self._stream = stream
240
+ self._kwargs = kwargs
241
+ self._start_time = start_time
242
+ self._chunks: list[Any] = []
243
+
244
+ async def __aiter__(self) -> Any:
245
+ try:
246
+ async for chunk in self._stream:
247
+ self._chunks.append(chunk)
248
+ yield chunk
249
+ finally:
250
+ self._finalize()
251
+
252
+ async def __anext__(self) -> Any:
253
+ try:
254
+ chunk = await self._stream.__anext__()
255
+ self._chunks.append(chunk)
256
+ return chunk
257
+ except StopAsyncIteration:
258
+ self._finalize()
259
+ raise
260
+
261
+ async def __aenter__(self) -> "_OpenAIAsyncStreamProxy":
262
+ return self
263
+
264
+ async def __aexit__(self, *args: Any) -> None:
265
+ self._finalize()
266
+
267
+ def __getattr__(self, name: str) -> Any:
268
+ return getattr(self._stream, name)
269
+
270
+ def _finalize(self) -> None:
271
+ if not self._chunks or _batcher is None:
272
+ return
273
+ latency_ms = (time.time() - self._start_time) * 1000
274
+ model, content, usage, tool_calls = _reassemble_openai_stream(self._chunks)
275
+ input_tokens = 0
276
+ output_tokens = 0
277
+ if usage:
278
+ input_tokens = getattr(usage, "prompt_tokens", 0) or 0
279
+ output_tokens = getattr(usage, "completion_tokens", 0) or 0
280
+
281
+ cost = estimate_cost(model or self._kwargs.get("model", "unknown"), input_tokens, output_tokens)
282
+ messages = self._kwargs.get("messages", [])
283
+ trace = {
284
+ "agent_name": model or self._kwargs.get("model", "unknown"),
285
+ "prompt": _safe_json(messages),
286
+ "response": content or "",
287
+ "trace_type": "llm_call",
288
+ "status": "success",
289
+ "latency_ms": latency_ms,
290
+ "token_count": input_tokens + output_tokens,
291
+ "cost_usd": cost,
292
+ "tool_calls": _safe_json(tool_calls) if tool_calls else None,
293
+ "metadata": _safe_json({"provider": "openai", "model": model, "streamed": True}),
294
+ }
295
+ _batcher.enqueue({k: v for k, v in trace.items() if v is not None})
296
+ self._chunks = []
297
+
298
+
299
+ def _reassemble_openai_stream(chunks: list[Any]) -> tuple[Optional[str], str, Any, Optional[list[Any]]]:
300
+ """Reassemble streamed OpenAI chunks into (model, content, usage, tool_calls)."""
301
+ model: Optional[str] = None
302
+ content_parts: list[str] = []
303
+ usage = None
304
+ tool_calls: dict[int, dict[str, str]] = {}
305
+
306
+ for chunk in chunks:
307
+ c = chunk if isinstance(chunk, dict) else chunk.__dict__
308
+ model = model or c.get("model")
309
+ usage = c.get("usage") or usage
310
+ for choice in c.get("choices", []):
311
+ ch = choice if isinstance(choice, dict) else choice.__dict__
312
+ delta = ch.get("delta")
313
+ if delta is None:
314
+ continue
315
+ d = delta if isinstance(delta, dict) else delta.__dict__
316
+ if d.get("content"):
317
+ content_parts.append(d["content"])
318
+ if d.get("tool_calls"):
319
+ for tc in d["tool_calls"]:
320
+ tc_d = tc if isinstance(tc, dict) else tc.__dict__
321
+ idx = tc_d.get("index", 0)
322
+ if idx not in tool_calls:
323
+ func = tc_d.get("function", {})
324
+ func_d = func if isinstance(func, dict) else func.__dict__
325
+ tool_calls[idx] = {"name": func_d.get("name", ""), "arguments": func_d.get("arguments", "")}
326
+ else:
327
+ func = tc_d.get("function", {})
328
+ func_d = func if isinstance(func, dict) else func.__dict__
329
+ tool_calls[idx]["name"] = tool_calls[idx]["name"] or func_d.get("name", "")
330
+ tool_calls[idx]["arguments"] += func_d.get("arguments", "")
331
+
332
+ tc_list = list(tool_calls.values()) if tool_calls else None
333
+ return model, "".join(content_parts), usage, tc_list
334
+
335
+
336
+ # ---------------------------------------------------------------------------
337
+ # Wrapper factories
338
+ # ---------------------------------------------------------------------------
339
+
340
+ def _make_sync_wrapper(target: _PatchTarget) -> Any:
341
+ extractor = _extract_openai_data if target.provider == "openai" else _extract_anthropic_data
342
+
343
+ def wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any) -> Any:
344
+ if _batcher is None:
345
+ return wrapped(*args, **kwargs)
346
+
347
+ start = time.time()
348
+ try:
349
+ response = wrapped(*args, **kwargs)
350
+ except Exception as exc:
351
+ latency_ms = (time.time() - start) * 1000
352
+ trace = {
353
+ "agent_name": kwargs.get("model", "unknown"),
354
+ "prompt": _safe_json(kwargs.get("messages", [])),
355
+ "response": str(exc),
356
+ "trace_type": "llm_call",
357
+ "status": "error",
358
+ "latency_ms": latency_ms,
359
+ "metadata": _safe_json({"provider": target.provider, "error": str(exc)}),
360
+ }
361
+ _batcher.enqueue(trace)
362
+ raise
363
+
364
+ if _is_stream(response):
365
+ return _OpenAIStreamProxy(response, kwargs, start)
366
+
367
+ latency_ms = (time.time() - start) * 1000
368
+ trace = extractor(kwargs, response)
369
+ trace["latency_ms"] = latency_ms
370
+ _batcher.enqueue({k: v for k, v in trace.items() if v is not None})
371
+ return response
372
+
373
+ return wrapper
374
+
375
+
376
+ def _make_async_wrapper(target: _PatchTarget) -> Any:
377
+ extractor = _extract_openai_data if target.provider == "openai" else _extract_anthropic_data
378
+
379
+ async def wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any) -> Any:
380
+ if _batcher is None:
381
+ return await wrapped(*args, **kwargs)
382
+
383
+ start = time.time()
384
+ try:
385
+ response = await wrapped(*args, **kwargs)
386
+ except Exception as exc:
387
+ latency_ms = (time.time() - start) * 1000
388
+ trace = {
389
+ "agent_name": kwargs.get("model", "unknown"),
390
+ "prompt": _safe_json(kwargs.get("messages", [])),
391
+ "response": str(exc),
392
+ "trace_type": "llm_call",
393
+ "status": "error",
394
+ "latency_ms": latency_ms,
395
+ "metadata": _safe_json({"provider": target.provider, "error": str(exc)}),
396
+ }
397
+ _batcher.enqueue(trace)
398
+ raise
399
+
400
+ if _is_stream(response):
401
+ return _OpenAIAsyncStreamProxy(response, kwargs, start)
402
+
403
+ latency_ms = (time.time() - start) * 1000
404
+ trace = extractor(kwargs, response)
405
+ trace["latency_ms"] = latency_ms
406
+ _batcher.enqueue({k: v for k, v in trace.items() if v is not None})
407
+ return response
408
+
409
+ return wrapper
410
+
411
+
412
+ def _is_stream(obj: Any) -> bool:
413
+ return (
414
+ isinstance(obj, types.GeneratorType)
415
+ or isinstance(obj, types.AsyncGeneratorType)
416
+ or type(obj).__name__ in ("Stream", "AsyncStream")
417
+ )
418
+
419
+
420
+ # ---------------------------------------------------------------------------
421
+ # Public patching API
422
+ # ---------------------------------------------------------------------------
423
+
424
+ def patch_openai() -> None:
425
+ """Monkey-patch the OpenAI Python SDK so all calls are auto-traced."""
426
+ try:
427
+ import openai # noqa: F401
428
+ except ImportError:
429
+ logger.debug("openai not installed, skipping instrumentation")
430
+ return
431
+
432
+ for target in _OPENAI_TARGETS:
433
+ try:
434
+ wrapper = _make_async_wrapper(target) if target.is_async else _make_sync_wrapper(target)
435
+ wrap_function_wrapper(target.module, f"{target.cls}.{target.method}", wrapper)
436
+ except Exception:
437
+ logger.debug("Failed to patch %s.%s.%s", target.module, target.cls, target.method, exc_info=True)
438
+
439
+
440
+ def patch_anthropic() -> None:
441
+ """Monkey-patch the Anthropic Python SDK so all calls are auto-traced."""
442
+ try:
443
+ import anthropic # noqa: F401
444
+ except ImportError:
445
+ logger.debug("anthropic not installed, skipping instrumentation")
446
+ return
447
+
448
+ for target in _ANTHROPIC_TARGETS:
449
+ try:
450
+ wrapper = _make_async_wrapper(target) if target.is_async else _make_sync_wrapper(target)
451
+ wrap_function_wrapper(target.module, f"{target.cls}.{target.method}", wrapper)
452
+ except Exception:
453
+ logger.debug("Failed to patch %s.%s.%s", target.module, target.cls, target.method, exc_info=True)
454
+
455
+
456
+ def patch_all() -> None:
457
+ """Patch all supported providers."""
458
+ patch_openai()
459
+ patch_anthropic()
@@ -0,0 +1,20 @@
1
+ from opentelemetry import trace
2
+ from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
3
+ from opentelemetry.sdk.resources import Resource
4
+ from opentelemetry.sdk.trace import TracerProvider
5
+ from opentelemetry.sdk.trace.export import BatchSpanProcessor
6
+
7
+
8
+ def init_otel(
9
+ api_key: str,
10
+ endpoint: str = "https://api.spyllm.com",
11
+ service_name: str = "my-agent",
12
+ ):
13
+ provider = TracerProvider(resource=Resource.create({"service.name": service_name}))
14
+ exporter = OTLPSpanExporter(
15
+ endpoint=f"{endpoint}/v1/traces/otlp",
16
+ headers={"X-API-Key": api_key},
17
+ )
18
+ provider.add_span_processor(BatchSpanProcessor(exporter))
19
+ trace.set_tracer_provider(provider)
20
+ return provider
@@ -0,0 +1,81 @@
1
+ """Built-in model pricing table.
2
+
3
+ Costs are in USD per token (not per 1K tokens). We maintain this table so the
4
+ SDK can estimate `cost_usd` without any user configuration. Prices should be
5
+ updated periodically as providers change their rates.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from typing import Optional
11
+
12
+ # fmt: off
13
+ MODEL_COSTS: dict[str, dict[str, float]] = {
14
+ # OpenAI
15
+ "gpt-4o": {"input": 2.50 / 1_000_000, "output": 10.00 / 1_000_000},
16
+ "gpt-4o-2024-11-20": {"input": 2.50 / 1_000_000, "output": 10.00 / 1_000_000},
17
+ "gpt-4o-2024-08-06": {"input": 2.50 / 1_000_000, "output": 10.00 / 1_000_000},
18
+ "gpt-4o-2024-05-13": {"input": 5.00 / 1_000_000, "output": 15.00 / 1_000_000},
19
+ "gpt-4o-mini": {"input": 0.15 / 1_000_000, "output": 0.60 / 1_000_000},
20
+ "gpt-4o-mini-2024-07-18":{"input": 0.15 / 1_000_000, "output": 0.60 / 1_000_000},
21
+ "gpt-4-turbo": {"input": 10.00 / 1_000_000, "output": 30.00 / 1_000_000},
22
+ "gpt-4": {"input": 30.00 / 1_000_000, "output": 60.00 / 1_000_000},
23
+ "gpt-3.5-turbo": {"input": 0.50 / 1_000_000, "output": 1.50 / 1_000_000},
24
+ "o1": {"input": 15.00 / 1_000_000, "output": 60.00 / 1_000_000},
25
+ "o1-mini": {"input": 3.00 / 1_000_000, "output": 12.00 / 1_000_000},
26
+ "o1-preview": {"input": 15.00 / 1_000_000, "output": 60.00 / 1_000_000},
27
+ "o3-mini": {"input": 1.10 / 1_000_000, "output": 4.40 / 1_000_000},
28
+
29
+ # Anthropic
30
+ "claude-3-5-sonnet-20241022": {"input": 3.00 / 1_000_000, "output": 15.00 / 1_000_000},
31
+ "claude-3-5-sonnet-20240620": {"input": 3.00 / 1_000_000, "output": 15.00 / 1_000_000},
32
+ "claude-3-5-haiku-20241022": {"input": 0.80 / 1_000_000, "output": 4.00 / 1_000_000},
33
+ "claude-3-opus-20240229": {"input": 15.00/ 1_000_000, "output": 75.00 / 1_000_000},
34
+ "claude-3-sonnet-20240229": {"input": 3.00 / 1_000_000, "output": 15.00 / 1_000_000},
35
+ "claude-3-haiku-20240307": {"input": 0.25 / 1_000_000, "output": 1.25 / 1_000_000},
36
+ "claude-sonnet-4-20250514": {"input": 3.00 / 1_000_000, "output": 15.00 / 1_000_000},
37
+ "claude-haiku-4-20250514": {"input": 0.80 / 1_000_000, "output": 4.00 / 1_000_000},
38
+ }
39
+ # fmt: on
40
+
41
+ # Prefix aliases so "gpt-4o-2024-*" matches even if the exact snapshot isn't listed.
42
+ _PREFIX_MAP: dict[str, str] = {
43
+ "gpt-4o-mini": "gpt-4o-mini",
44
+ "gpt-4o": "gpt-4o",
45
+ "gpt-4-turbo": "gpt-4-turbo",
46
+ "gpt-4": "gpt-4",
47
+ "gpt-3.5-turbo": "gpt-3.5-turbo",
48
+ "o3-mini": "o3-mini",
49
+ "o1-mini": "o1-mini",
50
+ "o1-preview": "o1-preview",
51
+ "o1": "o1",
52
+ "claude-3-5-sonnet": "claude-3-5-sonnet-20241022",
53
+ "claude-3-5-haiku": "claude-3-5-haiku-20241022",
54
+ "claude-3-opus": "claude-3-opus-20240229",
55
+ "claude-3-sonnet": "claude-3-sonnet-20240229",
56
+ "claude-3-haiku": "claude-3-haiku-20240307",
57
+ "claude-sonnet-4": "claude-sonnet-4-20250514",
58
+ "claude-haiku-4": "claude-haiku-4-20250514",
59
+ }
60
+
61
+
62
+ def _resolve_model(model: str) -> Optional[str]:
63
+ if model in MODEL_COSTS:
64
+ return model
65
+ for prefix, canonical in _PREFIX_MAP.items():
66
+ if model.startswith(prefix):
67
+ return canonical
68
+ return None
69
+
70
+
71
+ def estimate_cost(
72
+ model: str,
73
+ input_tokens: int,
74
+ output_tokens: int,
75
+ ) -> Optional[float]:
76
+ """Return estimated USD cost, or None if the model is unknown."""
77
+ resolved = _resolve_model(model)
78
+ if resolved is None:
79
+ return None
80
+ costs = MODEL_COSTS[resolved]
81
+ return input_tokens * costs["input"] + output_tokens * costs["output"]