spyllm 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spyllm-0.2.0/.gitignore +25 -0
- spyllm-0.2.0/PKG-INFO +15 -0
- spyllm-0.2.0/README.md +82 -0
- spyllm-0.2.0/pyproject.toml +25 -0
- spyllm-0.2.0/spyllm/__init__.py +64 -0
- spyllm-0.2.0/spyllm/batcher.py +85 -0
- spyllm-0.2.0/spyllm/client.py +108 -0
- spyllm-0.2.0/spyllm/decorators.py +3 -0
- spyllm-0.2.0/spyllm/instrumentor.py +459 -0
- spyllm-0.2.0/spyllm/otel.py +20 -0
- spyllm-0.2.0/spyllm/pricing.py +81 -0
spyllm-0.2.0/.gitignore
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
__pycache__/
|
|
2
|
+
.pytest_cache/
|
|
3
|
+
.venv/
|
|
4
|
+
*.pyc
|
|
5
|
+
*.pyo
|
|
6
|
+
*.pyd
|
|
7
|
+
|
|
8
|
+
node_modules/
|
|
9
|
+
dist/
|
|
10
|
+
coverage/
|
|
11
|
+
.vite/
|
|
12
|
+
|
|
13
|
+
.env
|
|
14
|
+
.env.*
|
|
15
|
+
!.env.example
|
|
16
|
+
|
|
17
|
+
.DS_Store
|
|
18
|
+
Thumbs.db
|
|
19
|
+
|
|
20
|
+
.idea/
|
|
21
|
+
.vscode/
|
|
22
|
+
|
|
23
|
+
backend/.mypy_cache/
|
|
24
|
+
backend/.ruff_cache/
|
|
25
|
+
frontend/.eslintcache
|
spyllm-0.2.0/PKG-INFO
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: spyllm
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: Two-line automatic LLM tracing. Works with OpenAI, Anthropic, and more.
|
|
5
|
+
Requires-Python: >=3.9
|
|
6
|
+
Requires-Dist: httpx>=0.24.0
|
|
7
|
+
Requires-Dist: wrapt>=1.14.0
|
|
8
|
+
Provides-Extra: anthropic
|
|
9
|
+
Requires-Dist: anthropic>=0.18.0; extra == 'anthropic'
|
|
10
|
+
Provides-Extra: openai
|
|
11
|
+
Requires-Dist: openai>=1.0.0; extra == 'openai'
|
|
12
|
+
Provides-Extra: otel
|
|
13
|
+
Requires-Dist: opentelemetry-api>=1.20.0; extra == 'otel'
|
|
14
|
+
Requires-Dist: opentelemetry-exporter-otlp-proto-http>=1.20.0; extra == 'otel'
|
|
15
|
+
Requires-Dist: opentelemetry-sdk>=1.20.0; extra == 'otel'
|
spyllm-0.2.0/README.md
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
# SpyLLM Python SDK
|
|
2
|
+
|
|
3
|
+
Automatic LLM tracing in two lines. Works with OpenAI, Anthropic, and more.
|
|
4
|
+
|
|
5
|
+
## Quick Start
|
|
6
|
+
|
|
7
|
+
```python
|
|
8
|
+
import spyllm
|
|
9
|
+
|
|
10
|
+
spyllm.init(api_key="sk-...")
|
|
11
|
+
|
|
12
|
+
# That's it. Every OpenAI and Anthropic call is now automatically traced.
|
|
13
|
+
from openai import OpenAI
|
|
14
|
+
|
|
15
|
+
client = OpenAI()
|
|
16
|
+
response = client.chat.completions.create(
|
|
17
|
+
model="gpt-4o",
|
|
18
|
+
messages=[{"role": "user", "content": "Hello!"}],
|
|
19
|
+
)
|
|
20
|
+
# Prompt, response, tokens, cost, and latency are captured automatically.
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
## Install
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
pip install spyllm
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
## What Gets Captured
|
|
30
|
+
|
|
31
|
+
Every LLM call automatically records:
|
|
32
|
+
|
|
33
|
+
- **Prompt** — full message history sent to the model
|
|
34
|
+
- **Response** — the model's output
|
|
35
|
+
- **Token count** — input + output tokens
|
|
36
|
+
- **Cost** — estimated USD cost based on model pricing
|
|
37
|
+
- **Latency** — wall-clock time for the API call
|
|
38
|
+
- **Tool calls** — if the model invoked tools/functions
|
|
39
|
+
- **Errors** — failed calls with the exception message
|
|
40
|
+
|
|
41
|
+
## Supported Providers
|
|
42
|
+
|
|
43
|
+
| Provider | Auto-instrumented |
|
|
44
|
+
|------------|-------------------|
|
|
45
|
+
| OpenAI | Yes |
|
|
46
|
+
| Anthropic | Yes |
|
|
47
|
+
|
|
48
|
+
## Advanced Usage
|
|
49
|
+
|
|
50
|
+
### Manual Tracing
|
|
51
|
+
|
|
52
|
+
```python
|
|
53
|
+
from spyllm import SpyLLMClient
|
|
54
|
+
|
|
55
|
+
client = SpyLLMClient(api_key="sk-...", base_url="https://api.spyllm.com")
|
|
56
|
+
client.trace(
|
|
57
|
+
agent_name="my-agent",
|
|
58
|
+
prompt="What is 2+2?",
|
|
59
|
+
response="4",
|
|
60
|
+
token_count=15,
|
|
61
|
+
cost_usd=0.001,
|
|
62
|
+
)
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
### Decorator
|
|
66
|
+
|
|
67
|
+
```python
|
|
68
|
+
from spyllm import agent_trace, init
|
|
69
|
+
|
|
70
|
+
init(api_key="sk-...")
|
|
71
|
+
|
|
72
|
+
@agent_trace("my-pipeline")
|
|
73
|
+
def run_pipeline(query: str) -> str:
|
|
74
|
+
# your code here
|
|
75
|
+
return result
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
### Disable Auto-instrumentation
|
|
79
|
+
|
|
80
|
+
```python
|
|
81
|
+
spyllm.init(api_key="sk-...", instrument=False)
|
|
82
|
+
```
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "spyllm"
|
|
3
|
+
version = "0.2.0"
|
|
4
|
+
description = "Two-line automatic LLM tracing. Works with OpenAI, Anthropic, and more."
|
|
5
|
+
requires-python = ">=3.9"
|
|
6
|
+
dependencies = [
|
|
7
|
+
"httpx>=0.24.0",
|
|
8
|
+
"wrapt>=1.14.0",
|
|
9
|
+
]
|
|
10
|
+
|
|
11
|
+
[project.optional-dependencies]
|
|
12
|
+
openai = ["openai>=1.0.0"]
|
|
13
|
+
anthropic = ["anthropic>=0.18.0"]
|
|
14
|
+
otel = [
|
|
15
|
+
"opentelemetry-api>=1.20.0",
|
|
16
|
+
"opentelemetry-sdk>=1.20.0",
|
|
17
|
+
"opentelemetry-exporter-otlp-proto-http>=1.20.0",
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
[tool.hatch.build.targets.wheel]
|
|
21
|
+
packages = ["spyllm"]
|
|
22
|
+
|
|
23
|
+
[build-system]
|
|
24
|
+
requires = ["hatchling"]
|
|
25
|
+
build-backend = "hatchling.build"
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
"""SpyLLM Python SDK — two-line automatic LLM tracing.
|
|
2
|
+
|
|
3
|
+
Usage::
|
|
4
|
+
|
|
5
|
+
import spyllm
|
|
6
|
+
spyllm.init(api_key="sk-...")
|
|
7
|
+
|
|
8
|
+
# Every OpenAI / Anthropic call is now auto-traced.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
from typing import Optional
|
|
14
|
+
|
|
15
|
+
from .client import SpyLLMClient, agent_trace
|
|
16
|
+
|
|
17
|
+
__all__ = [
|
|
18
|
+
"init",
|
|
19
|
+
"shutdown",
|
|
20
|
+
"SpyLLMClient",
|
|
21
|
+
"agent_trace",
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
_client: Optional[SpyLLMClient] = None
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def init(
|
|
28
|
+
api_key: str,
|
|
29
|
+
*,
|
|
30
|
+
base_url: str = "https://api.spyllm.dev",
|
|
31
|
+
instrument: bool = True,
|
|
32
|
+
) -> SpyLLMClient:
|
|
33
|
+
"""Initialise the SpyLLM SDK.
|
|
34
|
+
|
|
35
|
+
This creates a background batcher, monkey-patches supported LLM providers,
|
|
36
|
+
and returns a client instance for manual tracing if needed.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
api_key: Your SpyLLM API key.
|
|
40
|
+
base_url: Override the API endpoint (useful for self-hosted).
|
|
41
|
+
instrument: If True (default), auto-patch OpenAI and Anthropic.
|
|
42
|
+
"""
|
|
43
|
+
global _client
|
|
44
|
+
|
|
45
|
+
from .batcher import TraceBatcher
|
|
46
|
+
from . import instrumentor
|
|
47
|
+
|
|
48
|
+
batcher = TraceBatcher(api_key=api_key, base_url=base_url)
|
|
49
|
+
instrumentor._batcher = batcher
|
|
50
|
+
|
|
51
|
+
_client = SpyLLMClient(api_key=api_key, base_url=base_url)
|
|
52
|
+
|
|
53
|
+
if instrument:
|
|
54
|
+
instrumentor.patch_all()
|
|
55
|
+
|
|
56
|
+
return _client
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def shutdown() -> None:
|
|
60
|
+
"""Flush pending traces and shut down the background thread."""
|
|
61
|
+
from . import instrumentor
|
|
62
|
+
|
|
63
|
+
if instrumentor._batcher is not None:
|
|
64
|
+
instrumentor._batcher.shutdown()
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
"""Background trace batcher.
|
|
2
|
+
|
|
3
|
+
Collects traces in a thread-safe queue and flushes them to the SpyLLM API in
|
|
4
|
+
a daemon thread so instrumented calls never block on network I/O.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import atexit
|
|
10
|
+
import logging
|
|
11
|
+
import queue
|
|
12
|
+
import threading
|
|
13
|
+
from typing import Any, Optional
|
|
14
|
+
|
|
15
|
+
import httpx
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger("spyllm")
|
|
18
|
+
|
|
19
|
+
_MAX_BATCH = 50
|
|
20
|
+
_FLUSH_INTERVAL = 2.0 # seconds
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class TraceBatcher:
|
|
24
|
+
def __init__(self, api_key: str, base_url: str) -> None:
|
|
25
|
+
self._api_key = api_key
|
|
26
|
+
self._base_url = base_url.rstrip("/")
|
|
27
|
+
self._queue: queue.Queue[dict[str, Any]] = queue.Queue()
|
|
28
|
+
self._shutdown = threading.Event()
|
|
29
|
+
self._thread = threading.Thread(target=self._run, daemon=True)
|
|
30
|
+
self._thread.start()
|
|
31
|
+
atexit.register(self.flush)
|
|
32
|
+
|
|
33
|
+
def enqueue(self, trace: dict[str, Any]) -> None:
|
|
34
|
+
self._queue.put(trace)
|
|
35
|
+
|
|
36
|
+
def flush(self) -> None:
|
|
37
|
+
"""Drain the queue and send everything. Called at exit or manually."""
|
|
38
|
+
batch: list[dict[str, Any]] = []
|
|
39
|
+
while True:
|
|
40
|
+
try:
|
|
41
|
+
batch.append(self._queue.get_nowait())
|
|
42
|
+
except queue.Empty:
|
|
43
|
+
break
|
|
44
|
+
if batch:
|
|
45
|
+
self._send(batch)
|
|
46
|
+
|
|
47
|
+
def shutdown(self) -> None:
|
|
48
|
+
self._shutdown.set()
|
|
49
|
+
self.flush()
|
|
50
|
+
|
|
51
|
+
def _run(self) -> None:
|
|
52
|
+
while not self._shutdown.is_set():
|
|
53
|
+
batch: list[dict[str, Any]] = []
|
|
54
|
+
try:
|
|
55
|
+
batch.append(self._queue.get(timeout=_FLUSH_INTERVAL))
|
|
56
|
+
except queue.Empty:
|
|
57
|
+
continue
|
|
58
|
+
|
|
59
|
+
while len(batch) < _MAX_BATCH:
|
|
60
|
+
try:
|
|
61
|
+
batch.append(self._queue.get_nowait())
|
|
62
|
+
except queue.Empty:
|
|
63
|
+
break
|
|
64
|
+
|
|
65
|
+
self._send(batch)
|
|
66
|
+
|
|
67
|
+
def _send(self, batch: list[dict[str, Any]]) -> None:
|
|
68
|
+
if not batch:
|
|
69
|
+
return
|
|
70
|
+
try:
|
|
71
|
+
with httpx.Client(timeout=10.0) as client:
|
|
72
|
+
if len(batch) == 1:
|
|
73
|
+
client.post(
|
|
74
|
+
f"{self._base_url}/v1/traces",
|
|
75
|
+
json=batch[0],
|
|
76
|
+
headers={"X-API-Key": self._api_key},
|
|
77
|
+
)
|
|
78
|
+
else:
|
|
79
|
+
client.post(
|
|
80
|
+
f"{self._base_url}/v1/traces/batch",
|
|
81
|
+
json=batch,
|
|
82
|
+
headers={"X-API-Key": self._api_key},
|
|
83
|
+
)
|
|
84
|
+
except Exception:
|
|
85
|
+
logger.debug("spyllm: failed to send %d traces", len(batch), exc_info=True)
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import functools
|
|
4
|
+
import threading
|
|
5
|
+
import time
|
|
6
|
+
from typing import Any, Optional
|
|
7
|
+
|
|
8
|
+
import httpx
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class SpyLLMClient:
|
|
12
|
+
"""Low-level client for the SpyLLM REST API.
|
|
13
|
+
|
|
14
|
+
Most users should use ``spyllm.init()`` instead, which sets up automatic
|
|
15
|
+
instrumentation. This class is useful for manual tracing or advanced use.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
def __init__(self, api_key: str, base_url: str = "https://api.spyllm.com") -> None:
|
|
19
|
+
self.api_key = api_key
|
|
20
|
+
self.base_url = base_url.rstrip("/")
|
|
21
|
+
self._http = httpx.Client(
|
|
22
|
+
base_url=self.base_url,
|
|
23
|
+
headers={"X-API-Key": self.api_key},
|
|
24
|
+
timeout=30.0,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
def trace(
|
|
28
|
+
self,
|
|
29
|
+
agent_name: str,
|
|
30
|
+
prompt: str,
|
|
31
|
+
response: str,
|
|
32
|
+
*,
|
|
33
|
+
trace_type: str = "llm_call",
|
|
34
|
+
status: str = "success",
|
|
35
|
+
latency_ms: Optional[float] = None,
|
|
36
|
+
token_count: Optional[int] = None,
|
|
37
|
+
cost_usd: Optional[float] = None,
|
|
38
|
+
session_id: Optional[str] = None,
|
|
39
|
+
metadata: Optional[str] = None,
|
|
40
|
+
tool_calls: Optional[str] = None,
|
|
41
|
+
) -> str:
|
|
42
|
+
payload: dict[str, Any] = {
|
|
43
|
+
"agent_name": agent_name,
|
|
44
|
+
"prompt": prompt,
|
|
45
|
+
"response": response,
|
|
46
|
+
"trace_type": trace_type,
|
|
47
|
+
"status": status,
|
|
48
|
+
}
|
|
49
|
+
for key, val in [
|
|
50
|
+
("latency_ms", latency_ms),
|
|
51
|
+
("token_count", token_count),
|
|
52
|
+
("cost_usd", cost_usd),
|
|
53
|
+
("session_id", session_id),
|
|
54
|
+
("metadata", metadata),
|
|
55
|
+
("tool_calls", tool_calls),
|
|
56
|
+
]:
|
|
57
|
+
if val is not None:
|
|
58
|
+
payload[key] = val
|
|
59
|
+
|
|
60
|
+
resp = self._http.post("/v1/traces", json=payload)
|
|
61
|
+
resp.raise_for_status()
|
|
62
|
+
return resp.json()["id"]
|
|
63
|
+
|
|
64
|
+
def search(self, query: str, *, top_k: int = 20, status: Optional[str] = None) -> list[dict[str, Any]]:
|
|
65
|
+
payload: dict[str, Any] = {"query": query, "top_k": top_k}
|
|
66
|
+
if status:
|
|
67
|
+
payload["status"] = status
|
|
68
|
+
resp = self._http.post("/v1/search", json=payload)
|
|
69
|
+
resp.raise_for_status()
|
|
70
|
+
return resp.json()
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def agent_trace(agent_name: str, client: Optional[SpyLLMClient] = None) -> Any:
|
|
74
|
+
"""Decorator that automatically traces a function call."""
|
|
75
|
+
def decorator(func: Any) -> Any:
|
|
76
|
+
@functools.wraps(func)
|
|
77
|
+
def wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
78
|
+
start = time.time()
|
|
79
|
+
result = None
|
|
80
|
+
call_status = "success"
|
|
81
|
+
try:
|
|
82
|
+
result = func(*args, **kwargs)
|
|
83
|
+
return result
|
|
84
|
+
except Exception as exc:
|
|
85
|
+
call_status = "error"
|
|
86
|
+
result = str(exc)
|
|
87
|
+
raise
|
|
88
|
+
finally:
|
|
89
|
+
sdk_client = client or _get_default_client()
|
|
90
|
+
if sdk_client:
|
|
91
|
+
threading.Thread(
|
|
92
|
+
target=sdk_client.trace,
|
|
93
|
+
kwargs={
|
|
94
|
+
"agent_name": agent_name,
|
|
95
|
+
"prompt": str(args) + str(kwargs),
|
|
96
|
+
"response": str(result),
|
|
97
|
+
"status": call_status,
|
|
98
|
+
"latency_ms": (time.time() - start) * 1000,
|
|
99
|
+
},
|
|
100
|
+
daemon=True,
|
|
101
|
+
).start()
|
|
102
|
+
return wrapper
|
|
103
|
+
return decorator
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def _get_default_client() -> Optional[SpyLLMClient]:
|
|
107
|
+
from . import _client
|
|
108
|
+
return _client
|
|
@@ -0,0 +1,459 @@
|
|
|
1
|
+
"""Auto-instrumentation for OpenAI and Anthropic Python SDKs.
|
|
2
|
+
|
|
3
|
+
When `patch_openai()` or `patch_anthropic()` is called, the respective SDK's
|
|
4
|
+
`create` methods are monkey-patched via `wrapt` so every LLM call is
|
|
5
|
+
automatically traced — zero code changes required for the end user.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
import logging
|
|
12
|
+
import time
|
|
13
|
+
import types
|
|
14
|
+
from dataclasses import dataclass
|
|
15
|
+
from typing import Any, Optional
|
|
16
|
+
|
|
17
|
+
from wrapt import wrap_function_wrapper
|
|
18
|
+
|
|
19
|
+
from .pricing import estimate_cost
|
|
20
|
+
|
|
21
|
+
logger = logging.getLogger("spyllm")
|
|
22
|
+
|
|
23
|
+
# Will be set by `init()` in __init__.py
|
|
24
|
+
_batcher: Any = None
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class _PatchTarget:
|
|
29
|
+
module: str
|
|
30
|
+
cls: str
|
|
31
|
+
method: str
|
|
32
|
+
provider: str
|
|
33
|
+
is_async: bool
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
# ---------------------------------------------------------------------------
|
|
37
|
+
# OpenAI targets
|
|
38
|
+
# ---------------------------------------------------------------------------
|
|
39
|
+
|
|
40
|
+
_OPENAI_TARGETS: list[_PatchTarget] = [
|
|
41
|
+
_PatchTarget("openai.resources.chat.completions", "Completions", "create", "openai", False),
|
|
42
|
+
_PatchTarget("openai.resources.chat.completions", "AsyncCompletions", "create", "openai", True),
|
|
43
|
+
_PatchTarget("openai.resources.completions", "Completions", "create", "openai", False),
|
|
44
|
+
_PatchTarget("openai.resources.completions", "AsyncCompletions", "create", "openai", True),
|
|
45
|
+
]
|
|
46
|
+
|
|
47
|
+
# ---------------------------------------------------------------------------
|
|
48
|
+
# Anthropic targets
|
|
49
|
+
# ---------------------------------------------------------------------------
|
|
50
|
+
|
|
51
|
+
_ANTHROPIC_TARGETS: list[_PatchTarget] = [
|
|
52
|
+
_PatchTarget("anthropic.resources.messages", "Messages", "create", "anthropic", False),
|
|
53
|
+
_PatchTarget("anthropic.resources.messages", "AsyncMessages", "create", "anthropic", True),
|
|
54
|
+
]
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
# ---------------------------------------------------------------------------
|
|
58
|
+
# Extraction helpers
|
|
59
|
+
# ---------------------------------------------------------------------------
|
|
60
|
+
|
|
61
|
+
def _safe_json(obj: Any) -> str:
|
|
62
|
+
try:
|
|
63
|
+
return json.dumps(obj, default=str)
|
|
64
|
+
except Exception:
|
|
65
|
+
return str(obj)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _extract_openai_data(kwargs: dict[str, Any], response: Any) -> dict[str, Any]:
|
|
69
|
+
"""Pull trace fields from an OpenAI chat/completion response."""
|
|
70
|
+
resp_dict = response.__dict__ if hasattr(response, "__dict__") else {}
|
|
71
|
+
usage = getattr(response, "usage", None)
|
|
72
|
+
|
|
73
|
+
model = getattr(response, "model", None) or kwargs.get("model", "unknown")
|
|
74
|
+
input_tokens = getattr(usage, "prompt_tokens", 0) or 0
|
|
75
|
+
output_tokens = getattr(usage, "completion_tokens", 0) or 0
|
|
76
|
+
|
|
77
|
+
messages = kwargs.get("messages", [])
|
|
78
|
+
prompt_str = _safe_json(messages) if messages else kwargs.get("prompt", "")
|
|
79
|
+
|
|
80
|
+
choices = getattr(response, "choices", [])
|
|
81
|
+
response_text = ""
|
|
82
|
+
tool_calls_data = None
|
|
83
|
+
if choices:
|
|
84
|
+
choice = choices[0]
|
|
85
|
+
msg = getattr(choice, "message", None)
|
|
86
|
+
if msg:
|
|
87
|
+
response_text = getattr(msg, "content", "") or ""
|
|
88
|
+
tc = getattr(msg, "tool_calls", None)
|
|
89
|
+
if tc:
|
|
90
|
+
tool_calls_data = _safe_json([t.__dict__ for t in tc] if hasattr(tc[0], "__dict__") else tc)
|
|
91
|
+
elif hasattr(choice, "text"):
|
|
92
|
+
response_text = getattr(choice, "text", "")
|
|
93
|
+
|
|
94
|
+
cost = estimate_cost(model, input_tokens, output_tokens)
|
|
95
|
+
|
|
96
|
+
return {
|
|
97
|
+
"agent_name": model,
|
|
98
|
+
"prompt": prompt_str if isinstance(prompt_str, str) else _safe_json(prompt_str),
|
|
99
|
+
"response": response_text,
|
|
100
|
+
"trace_type": "llm_call",
|
|
101
|
+
"status": "success",
|
|
102
|
+
"token_count": input_tokens + output_tokens,
|
|
103
|
+
"cost_usd": cost,
|
|
104
|
+
"tool_calls": tool_calls_data,
|
|
105
|
+
"metadata": _safe_json({
|
|
106
|
+
"provider": "openai",
|
|
107
|
+
"model": model,
|
|
108
|
+
"input_tokens": input_tokens,
|
|
109
|
+
"output_tokens": output_tokens,
|
|
110
|
+
}),
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def _extract_anthropic_data(kwargs: dict[str, Any], response: Any) -> dict[str, Any]:
|
|
115
|
+
"""Pull trace fields from an Anthropic messages response."""
|
|
116
|
+
model = getattr(response, "model", None) or kwargs.get("model", "unknown")
|
|
117
|
+
usage = getattr(response, "usage", None)
|
|
118
|
+
input_tokens = getattr(usage, "input_tokens", 0) or 0
|
|
119
|
+
output_tokens = getattr(usage, "output_tokens", 0) or 0
|
|
120
|
+
|
|
121
|
+
messages = kwargs.get("messages", [])
|
|
122
|
+
system = kwargs.get("system", "")
|
|
123
|
+
prompt_parts = []
|
|
124
|
+
if system:
|
|
125
|
+
prompt_parts.append({"role": "system", "content": system})
|
|
126
|
+
prompt_parts.extend(messages)
|
|
127
|
+
prompt_str = _safe_json(prompt_parts)
|
|
128
|
+
|
|
129
|
+
content_blocks = getattr(response, "content", [])
|
|
130
|
+
response_text = ""
|
|
131
|
+
tool_calls_data = None
|
|
132
|
+
tool_uses = []
|
|
133
|
+
for block in content_blocks:
|
|
134
|
+
block_type = getattr(block, "type", "")
|
|
135
|
+
if block_type == "text":
|
|
136
|
+
response_text += getattr(block, "text", "")
|
|
137
|
+
elif block_type == "tool_use":
|
|
138
|
+
tool_uses.append({
|
|
139
|
+
"id": getattr(block, "id", ""),
|
|
140
|
+
"name": getattr(block, "name", ""),
|
|
141
|
+
"input": getattr(block, "input", {}),
|
|
142
|
+
})
|
|
143
|
+
if tool_uses:
|
|
144
|
+
tool_calls_data = _safe_json(tool_uses)
|
|
145
|
+
|
|
146
|
+
cost = estimate_cost(model, input_tokens, output_tokens)
|
|
147
|
+
|
|
148
|
+
return {
|
|
149
|
+
"agent_name": model,
|
|
150
|
+
"prompt": prompt_str,
|
|
151
|
+
"response": response_text,
|
|
152
|
+
"trace_type": "llm_call",
|
|
153
|
+
"status": "success",
|
|
154
|
+
"token_count": input_tokens + output_tokens,
|
|
155
|
+
"cost_usd": cost,
|
|
156
|
+
"tool_calls": tool_calls_data,
|
|
157
|
+
"metadata": _safe_json({
|
|
158
|
+
"provider": "anthropic",
|
|
159
|
+
"model": model,
|
|
160
|
+
"input_tokens": input_tokens,
|
|
161
|
+
"output_tokens": output_tokens,
|
|
162
|
+
}),
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
# ---------------------------------------------------------------------------
|
|
167
|
+
# Streaming wrappers
|
|
168
|
+
# ---------------------------------------------------------------------------
|
|
169
|
+
|
|
170
|
+
class _OpenAIStreamProxy:
|
|
171
|
+
"""Wraps an OpenAI sync Stream to capture chunks, then sends the trace
|
|
172
|
+
when the stream is exhausted."""
|
|
173
|
+
|
|
174
|
+
def __init__(self, stream: Any, kwargs: dict[str, Any], start_time: float) -> None:
|
|
175
|
+
self._stream = stream
|
|
176
|
+
self._kwargs = kwargs
|
|
177
|
+
self._start_time = start_time
|
|
178
|
+
self._chunks: list[Any] = []
|
|
179
|
+
|
|
180
|
+
def __iter__(self) -> Any:
|
|
181
|
+
try:
|
|
182
|
+
for chunk in self._stream:
|
|
183
|
+
self._chunks.append(chunk)
|
|
184
|
+
yield chunk
|
|
185
|
+
finally:
|
|
186
|
+
self._finalize()
|
|
187
|
+
|
|
188
|
+
def __next__(self) -> Any:
|
|
189
|
+
try:
|
|
190
|
+
chunk = next(self._stream)
|
|
191
|
+
self._chunks.append(chunk)
|
|
192
|
+
return chunk
|
|
193
|
+
except StopIteration:
|
|
194
|
+
self._finalize()
|
|
195
|
+
raise
|
|
196
|
+
|
|
197
|
+
def __enter__(self) -> "_OpenAIStreamProxy":
|
|
198
|
+
return self
|
|
199
|
+
|
|
200
|
+
def __exit__(self, *args: Any) -> None:
|
|
201
|
+
self._finalize()
|
|
202
|
+
|
|
203
|
+
def __getattr__(self, name: str) -> Any:
|
|
204
|
+
return getattr(self._stream, name)
|
|
205
|
+
|
|
206
|
+
def _finalize(self) -> None:
|
|
207
|
+
if not self._chunks or _batcher is None:
|
|
208
|
+
return
|
|
209
|
+
latency_ms = (time.time() - self._start_time) * 1000
|
|
210
|
+
model, content, usage, tool_calls = _reassemble_openai_stream(self._chunks)
|
|
211
|
+
input_tokens = 0
|
|
212
|
+
output_tokens = 0
|
|
213
|
+
if usage:
|
|
214
|
+
input_tokens = getattr(usage, "prompt_tokens", 0) or 0
|
|
215
|
+
output_tokens = getattr(usage, "completion_tokens", 0) or 0
|
|
216
|
+
|
|
217
|
+
cost = estimate_cost(model or self._kwargs.get("model", "unknown"), input_tokens, output_tokens)
|
|
218
|
+
messages = self._kwargs.get("messages", [])
|
|
219
|
+
trace = {
|
|
220
|
+
"agent_name": model or self._kwargs.get("model", "unknown"),
|
|
221
|
+
"prompt": _safe_json(messages),
|
|
222
|
+
"response": content or "",
|
|
223
|
+
"trace_type": "llm_call",
|
|
224
|
+
"status": "success",
|
|
225
|
+
"latency_ms": latency_ms,
|
|
226
|
+
"token_count": input_tokens + output_tokens,
|
|
227
|
+
"cost_usd": cost,
|
|
228
|
+
"tool_calls": _safe_json(tool_calls) if tool_calls else None,
|
|
229
|
+
"metadata": _safe_json({"provider": "openai", "model": model, "streamed": True}),
|
|
230
|
+
}
|
|
231
|
+
_batcher.enqueue({k: v for k, v in trace.items() if v is not None})
|
|
232
|
+
self._chunks = []
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
class _OpenAIAsyncStreamProxy:
|
|
236
|
+
"""Async variant of the stream proxy."""
|
|
237
|
+
|
|
238
|
+
def __init__(self, stream: Any, kwargs: dict[str, Any], start_time: float) -> None:
|
|
239
|
+
self._stream = stream
|
|
240
|
+
self._kwargs = kwargs
|
|
241
|
+
self._start_time = start_time
|
|
242
|
+
self._chunks: list[Any] = []
|
|
243
|
+
|
|
244
|
+
async def __aiter__(self) -> Any:
|
|
245
|
+
try:
|
|
246
|
+
async for chunk in self._stream:
|
|
247
|
+
self._chunks.append(chunk)
|
|
248
|
+
yield chunk
|
|
249
|
+
finally:
|
|
250
|
+
self._finalize()
|
|
251
|
+
|
|
252
|
+
async def __anext__(self) -> Any:
|
|
253
|
+
try:
|
|
254
|
+
chunk = await self._stream.__anext__()
|
|
255
|
+
self._chunks.append(chunk)
|
|
256
|
+
return chunk
|
|
257
|
+
except StopAsyncIteration:
|
|
258
|
+
self._finalize()
|
|
259
|
+
raise
|
|
260
|
+
|
|
261
|
+
async def __aenter__(self) -> "_OpenAIAsyncStreamProxy":
|
|
262
|
+
return self
|
|
263
|
+
|
|
264
|
+
async def __aexit__(self, *args: Any) -> None:
|
|
265
|
+
self._finalize()
|
|
266
|
+
|
|
267
|
+
def __getattr__(self, name: str) -> Any:
|
|
268
|
+
return getattr(self._stream, name)
|
|
269
|
+
|
|
270
|
+
def _finalize(self) -> None:
|
|
271
|
+
if not self._chunks or _batcher is None:
|
|
272
|
+
return
|
|
273
|
+
latency_ms = (time.time() - self._start_time) * 1000
|
|
274
|
+
model, content, usage, tool_calls = _reassemble_openai_stream(self._chunks)
|
|
275
|
+
input_tokens = 0
|
|
276
|
+
output_tokens = 0
|
|
277
|
+
if usage:
|
|
278
|
+
input_tokens = getattr(usage, "prompt_tokens", 0) or 0
|
|
279
|
+
output_tokens = getattr(usage, "completion_tokens", 0) or 0
|
|
280
|
+
|
|
281
|
+
cost = estimate_cost(model or self._kwargs.get("model", "unknown"), input_tokens, output_tokens)
|
|
282
|
+
messages = self._kwargs.get("messages", [])
|
|
283
|
+
trace = {
|
|
284
|
+
"agent_name": model or self._kwargs.get("model", "unknown"),
|
|
285
|
+
"prompt": _safe_json(messages),
|
|
286
|
+
"response": content or "",
|
|
287
|
+
"trace_type": "llm_call",
|
|
288
|
+
"status": "success",
|
|
289
|
+
"latency_ms": latency_ms,
|
|
290
|
+
"token_count": input_tokens + output_tokens,
|
|
291
|
+
"cost_usd": cost,
|
|
292
|
+
"tool_calls": _safe_json(tool_calls) if tool_calls else None,
|
|
293
|
+
"metadata": _safe_json({"provider": "openai", "model": model, "streamed": True}),
|
|
294
|
+
}
|
|
295
|
+
_batcher.enqueue({k: v for k, v in trace.items() if v is not None})
|
|
296
|
+
self._chunks = []
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
def _reassemble_openai_stream(chunks: list[Any]) -> tuple[Optional[str], str, Any, Optional[list[Any]]]:
|
|
300
|
+
"""Reassemble streamed OpenAI chunks into (model, content, usage, tool_calls)."""
|
|
301
|
+
model: Optional[str] = None
|
|
302
|
+
content_parts: list[str] = []
|
|
303
|
+
usage = None
|
|
304
|
+
tool_calls: dict[int, dict[str, str]] = {}
|
|
305
|
+
|
|
306
|
+
for chunk in chunks:
|
|
307
|
+
c = chunk if isinstance(chunk, dict) else chunk.__dict__
|
|
308
|
+
model = model or c.get("model")
|
|
309
|
+
usage = c.get("usage") or usage
|
|
310
|
+
for choice in c.get("choices", []):
|
|
311
|
+
ch = choice if isinstance(choice, dict) else choice.__dict__
|
|
312
|
+
delta = ch.get("delta")
|
|
313
|
+
if delta is None:
|
|
314
|
+
continue
|
|
315
|
+
d = delta if isinstance(delta, dict) else delta.__dict__
|
|
316
|
+
if d.get("content"):
|
|
317
|
+
content_parts.append(d["content"])
|
|
318
|
+
if d.get("tool_calls"):
|
|
319
|
+
for tc in d["tool_calls"]:
|
|
320
|
+
tc_d = tc if isinstance(tc, dict) else tc.__dict__
|
|
321
|
+
idx = tc_d.get("index", 0)
|
|
322
|
+
if idx not in tool_calls:
|
|
323
|
+
func = tc_d.get("function", {})
|
|
324
|
+
func_d = func if isinstance(func, dict) else func.__dict__
|
|
325
|
+
tool_calls[idx] = {"name": func_d.get("name", ""), "arguments": func_d.get("arguments", "")}
|
|
326
|
+
else:
|
|
327
|
+
func = tc_d.get("function", {})
|
|
328
|
+
func_d = func if isinstance(func, dict) else func.__dict__
|
|
329
|
+
tool_calls[idx]["name"] = tool_calls[idx]["name"] or func_d.get("name", "")
|
|
330
|
+
tool_calls[idx]["arguments"] += func_d.get("arguments", "")
|
|
331
|
+
|
|
332
|
+
tc_list = list(tool_calls.values()) if tool_calls else None
|
|
333
|
+
return model, "".join(content_parts), usage, tc_list
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
# ---------------------------------------------------------------------------
|
|
337
|
+
# Wrapper factories
|
|
338
|
+
# ---------------------------------------------------------------------------
|
|
339
|
+
|
|
340
|
+
def _make_sync_wrapper(target: _PatchTarget) -> Any:
|
|
341
|
+
extractor = _extract_openai_data if target.provider == "openai" else _extract_anthropic_data
|
|
342
|
+
|
|
343
|
+
def wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any) -> Any:
|
|
344
|
+
if _batcher is None:
|
|
345
|
+
return wrapped(*args, **kwargs)
|
|
346
|
+
|
|
347
|
+
start = time.time()
|
|
348
|
+
try:
|
|
349
|
+
response = wrapped(*args, **kwargs)
|
|
350
|
+
except Exception as exc:
|
|
351
|
+
latency_ms = (time.time() - start) * 1000
|
|
352
|
+
trace = {
|
|
353
|
+
"agent_name": kwargs.get("model", "unknown"),
|
|
354
|
+
"prompt": _safe_json(kwargs.get("messages", [])),
|
|
355
|
+
"response": str(exc),
|
|
356
|
+
"trace_type": "llm_call",
|
|
357
|
+
"status": "error",
|
|
358
|
+
"latency_ms": latency_ms,
|
|
359
|
+
"metadata": _safe_json({"provider": target.provider, "error": str(exc)}),
|
|
360
|
+
}
|
|
361
|
+
_batcher.enqueue(trace)
|
|
362
|
+
raise
|
|
363
|
+
|
|
364
|
+
if _is_stream(response):
|
|
365
|
+
return _OpenAIStreamProxy(response, kwargs, start)
|
|
366
|
+
|
|
367
|
+
latency_ms = (time.time() - start) * 1000
|
|
368
|
+
trace = extractor(kwargs, response)
|
|
369
|
+
trace["latency_ms"] = latency_ms
|
|
370
|
+
_batcher.enqueue({k: v for k, v in trace.items() if v is not None})
|
|
371
|
+
return response
|
|
372
|
+
|
|
373
|
+
return wrapper
|
|
374
|
+
|
|
375
|
+
|
|
376
|
+
def _make_async_wrapper(target: _PatchTarget) -> Any:
|
|
377
|
+
extractor = _extract_openai_data if target.provider == "openai" else _extract_anthropic_data
|
|
378
|
+
|
|
379
|
+
async def wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any) -> Any:
|
|
380
|
+
if _batcher is None:
|
|
381
|
+
return await wrapped(*args, **kwargs)
|
|
382
|
+
|
|
383
|
+
start = time.time()
|
|
384
|
+
try:
|
|
385
|
+
response = await wrapped(*args, **kwargs)
|
|
386
|
+
except Exception as exc:
|
|
387
|
+
latency_ms = (time.time() - start) * 1000
|
|
388
|
+
trace = {
|
|
389
|
+
"agent_name": kwargs.get("model", "unknown"),
|
|
390
|
+
"prompt": _safe_json(kwargs.get("messages", [])),
|
|
391
|
+
"response": str(exc),
|
|
392
|
+
"trace_type": "llm_call",
|
|
393
|
+
"status": "error",
|
|
394
|
+
"latency_ms": latency_ms,
|
|
395
|
+
"metadata": _safe_json({"provider": target.provider, "error": str(exc)}),
|
|
396
|
+
}
|
|
397
|
+
_batcher.enqueue(trace)
|
|
398
|
+
raise
|
|
399
|
+
|
|
400
|
+
if _is_stream(response):
|
|
401
|
+
return _OpenAIAsyncStreamProxy(response, kwargs, start)
|
|
402
|
+
|
|
403
|
+
latency_ms = (time.time() - start) * 1000
|
|
404
|
+
trace = extractor(kwargs, response)
|
|
405
|
+
trace["latency_ms"] = latency_ms
|
|
406
|
+
_batcher.enqueue({k: v for k, v in trace.items() if v is not None})
|
|
407
|
+
return response
|
|
408
|
+
|
|
409
|
+
return wrapper
|
|
410
|
+
|
|
411
|
+
|
|
412
|
+
def _is_stream(obj: Any) -> bool:
|
|
413
|
+
return (
|
|
414
|
+
isinstance(obj, types.GeneratorType)
|
|
415
|
+
or isinstance(obj, types.AsyncGeneratorType)
|
|
416
|
+
or type(obj).__name__ in ("Stream", "AsyncStream")
|
|
417
|
+
)
|
|
418
|
+
|
|
419
|
+
|
|
420
|
+
# ---------------------------------------------------------------------------
|
|
421
|
+
# Public patching API
|
|
422
|
+
# ---------------------------------------------------------------------------
|
|
423
|
+
|
|
424
|
+
def patch_openai() -> None:
|
|
425
|
+
"""Monkey-patch the OpenAI Python SDK so all calls are auto-traced."""
|
|
426
|
+
try:
|
|
427
|
+
import openai # noqa: F401
|
|
428
|
+
except ImportError:
|
|
429
|
+
logger.debug("openai not installed, skipping instrumentation")
|
|
430
|
+
return
|
|
431
|
+
|
|
432
|
+
for target in _OPENAI_TARGETS:
|
|
433
|
+
try:
|
|
434
|
+
wrapper = _make_async_wrapper(target) if target.is_async else _make_sync_wrapper(target)
|
|
435
|
+
wrap_function_wrapper(target.module, f"{target.cls}.{target.method}", wrapper)
|
|
436
|
+
except Exception:
|
|
437
|
+
logger.debug("Failed to patch %s.%s.%s", target.module, target.cls, target.method, exc_info=True)
|
|
438
|
+
|
|
439
|
+
|
|
440
|
+
def patch_anthropic() -> None:
|
|
441
|
+
"""Monkey-patch the Anthropic Python SDK so all calls are auto-traced."""
|
|
442
|
+
try:
|
|
443
|
+
import anthropic # noqa: F401
|
|
444
|
+
except ImportError:
|
|
445
|
+
logger.debug("anthropic not installed, skipping instrumentation")
|
|
446
|
+
return
|
|
447
|
+
|
|
448
|
+
for target in _ANTHROPIC_TARGETS:
|
|
449
|
+
try:
|
|
450
|
+
wrapper = _make_async_wrapper(target) if target.is_async else _make_sync_wrapper(target)
|
|
451
|
+
wrap_function_wrapper(target.module, f"{target.cls}.{target.method}", wrapper)
|
|
452
|
+
except Exception:
|
|
453
|
+
logger.debug("Failed to patch %s.%s.%s", target.module, target.cls, target.method, exc_info=True)
|
|
454
|
+
|
|
455
|
+
|
|
456
|
+
def patch_all() -> None:
|
|
457
|
+
"""Patch all supported providers."""
|
|
458
|
+
patch_openai()
|
|
459
|
+
patch_anthropic()
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
from opentelemetry import trace
|
|
2
|
+
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
|
|
3
|
+
from opentelemetry.sdk.resources import Resource
|
|
4
|
+
from opentelemetry.sdk.trace import TracerProvider
|
|
5
|
+
from opentelemetry.sdk.trace.export import BatchSpanProcessor
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def init_otel(
|
|
9
|
+
api_key: str,
|
|
10
|
+
endpoint: str = "https://api.spyllm.com",
|
|
11
|
+
service_name: str = "my-agent",
|
|
12
|
+
):
|
|
13
|
+
provider = TracerProvider(resource=Resource.create({"service.name": service_name}))
|
|
14
|
+
exporter = OTLPSpanExporter(
|
|
15
|
+
endpoint=f"{endpoint}/v1/traces/otlp",
|
|
16
|
+
headers={"X-API-Key": api_key},
|
|
17
|
+
)
|
|
18
|
+
provider.add_span_processor(BatchSpanProcessor(exporter))
|
|
19
|
+
trace.set_tracer_provider(provider)
|
|
20
|
+
return provider
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"""Built-in model pricing table.
|
|
2
|
+
|
|
3
|
+
Costs are in USD per token (not per 1K tokens). We maintain this table so the
|
|
4
|
+
SDK can estimate `cost_usd` without any user configuration. Prices should be
|
|
5
|
+
updated periodically as providers change their rates.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from typing import Optional
|
|
11
|
+
|
|
12
|
+
# fmt: off
|
|
13
|
+
MODEL_COSTS: dict[str, dict[str, float]] = {
|
|
14
|
+
# OpenAI
|
|
15
|
+
"gpt-4o": {"input": 2.50 / 1_000_000, "output": 10.00 / 1_000_000},
|
|
16
|
+
"gpt-4o-2024-11-20": {"input": 2.50 / 1_000_000, "output": 10.00 / 1_000_000},
|
|
17
|
+
"gpt-4o-2024-08-06": {"input": 2.50 / 1_000_000, "output": 10.00 / 1_000_000},
|
|
18
|
+
"gpt-4o-2024-05-13": {"input": 5.00 / 1_000_000, "output": 15.00 / 1_000_000},
|
|
19
|
+
"gpt-4o-mini": {"input": 0.15 / 1_000_000, "output": 0.60 / 1_000_000},
|
|
20
|
+
"gpt-4o-mini-2024-07-18":{"input": 0.15 / 1_000_000, "output": 0.60 / 1_000_000},
|
|
21
|
+
"gpt-4-turbo": {"input": 10.00 / 1_000_000, "output": 30.00 / 1_000_000},
|
|
22
|
+
"gpt-4": {"input": 30.00 / 1_000_000, "output": 60.00 / 1_000_000},
|
|
23
|
+
"gpt-3.5-turbo": {"input": 0.50 / 1_000_000, "output": 1.50 / 1_000_000},
|
|
24
|
+
"o1": {"input": 15.00 / 1_000_000, "output": 60.00 / 1_000_000},
|
|
25
|
+
"o1-mini": {"input": 3.00 / 1_000_000, "output": 12.00 / 1_000_000},
|
|
26
|
+
"o1-preview": {"input": 15.00 / 1_000_000, "output": 60.00 / 1_000_000},
|
|
27
|
+
"o3-mini": {"input": 1.10 / 1_000_000, "output": 4.40 / 1_000_000},
|
|
28
|
+
|
|
29
|
+
# Anthropic
|
|
30
|
+
"claude-3-5-sonnet-20241022": {"input": 3.00 / 1_000_000, "output": 15.00 / 1_000_000},
|
|
31
|
+
"claude-3-5-sonnet-20240620": {"input": 3.00 / 1_000_000, "output": 15.00 / 1_000_000},
|
|
32
|
+
"claude-3-5-haiku-20241022": {"input": 0.80 / 1_000_000, "output": 4.00 / 1_000_000},
|
|
33
|
+
"claude-3-opus-20240229": {"input": 15.00/ 1_000_000, "output": 75.00 / 1_000_000},
|
|
34
|
+
"claude-3-sonnet-20240229": {"input": 3.00 / 1_000_000, "output": 15.00 / 1_000_000},
|
|
35
|
+
"claude-3-haiku-20240307": {"input": 0.25 / 1_000_000, "output": 1.25 / 1_000_000},
|
|
36
|
+
"claude-sonnet-4-20250514": {"input": 3.00 / 1_000_000, "output": 15.00 / 1_000_000},
|
|
37
|
+
"claude-haiku-4-20250514": {"input": 0.80 / 1_000_000, "output": 4.00 / 1_000_000},
|
|
38
|
+
}
|
|
39
|
+
# fmt: on
|
|
40
|
+
|
|
41
|
+
# Prefix aliases so "gpt-4o-2024-*" matches even if the exact snapshot isn't listed.
|
|
42
|
+
_PREFIX_MAP: dict[str, str] = {
|
|
43
|
+
"gpt-4o-mini": "gpt-4o-mini",
|
|
44
|
+
"gpt-4o": "gpt-4o",
|
|
45
|
+
"gpt-4-turbo": "gpt-4-turbo",
|
|
46
|
+
"gpt-4": "gpt-4",
|
|
47
|
+
"gpt-3.5-turbo": "gpt-3.5-turbo",
|
|
48
|
+
"o3-mini": "o3-mini",
|
|
49
|
+
"o1-mini": "o1-mini",
|
|
50
|
+
"o1-preview": "o1-preview",
|
|
51
|
+
"o1": "o1",
|
|
52
|
+
"claude-3-5-sonnet": "claude-3-5-sonnet-20241022",
|
|
53
|
+
"claude-3-5-haiku": "claude-3-5-haiku-20241022",
|
|
54
|
+
"claude-3-opus": "claude-3-opus-20240229",
|
|
55
|
+
"claude-3-sonnet": "claude-3-sonnet-20240229",
|
|
56
|
+
"claude-3-haiku": "claude-3-haiku-20240307",
|
|
57
|
+
"claude-sonnet-4": "claude-sonnet-4-20250514",
|
|
58
|
+
"claude-haiku-4": "claude-haiku-4-20250514",
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _resolve_model(model: str) -> Optional[str]:
|
|
63
|
+
if model in MODEL_COSTS:
|
|
64
|
+
return model
|
|
65
|
+
for prefix, canonical in _PREFIX_MAP.items():
|
|
66
|
+
if model.startswith(prefix):
|
|
67
|
+
return canonical
|
|
68
|
+
return None
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def estimate_cost(
|
|
72
|
+
model: str,
|
|
73
|
+
input_tokens: int,
|
|
74
|
+
output_tokens: int,
|
|
75
|
+
) -> Optional[float]:
|
|
76
|
+
"""Return estimated USD cost, or None if the model is unknown."""
|
|
77
|
+
resolved = _resolve_model(model)
|
|
78
|
+
if resolved is None:
|
|
79
|
+
return None
|
|
80
|
+
costs = MODEL_COSTS[resolved]
|
|
81
|
+
return input_tokens * costs["input"] + output_tokens * costs["output"]
|