superpenguin 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- superpenguin/__init__.py +79 -0
- superpenguin/_client.py +142 -0
- superpenguin/_context.py +7 -0
- superpenguin/_litellm.py +278 -0
- superpenguin/_pricing.py +77 -0
- superpenguin/_trace.py +135 -0
- superpenguin/_wrap.py +697 -0
- superpenguin-0.1.0.dist-info/METADATA +289 -0
- superpenguin-0.1.0.dist-info/RECORD +11 -0
- superpenguin-0.1.0.dist-info/WHEEL +4 -0
- superpenguin-0.1.0.dist-info/licenses/LICENSE +21 -0
superpenguin/__init__.py
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
"""SuperPenguin Python SDK — AI cost management, attribution, and spend tracking."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from superpenguin._client import SuperPenguinClient
|
|
9
|
+
from superpenguin._litellm import patch_litellm
|
|
10
|
+
from superpenguin._trace import trace
|
|
11
|
+
from superpenguin._wrap import wrap
|
|
12
|
+
|
|
13
|
+
__all__ = ["init", "wrap", "trace", "patch_litellm", "get_client", "flush"]
|
|
14
|
+
__version__ = "0.1.0"
|
|
15
|
+
|
|
16
|
+
_client: SuperPenguinClient | None = None
|
|
17
|
+
|
|
18
|
+
_DEFAULT_BASE_URL = "https://api.carrotlabs.ai"
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def init(
|
|
22
|
+
*,
|
|
23
|
+
api_key: str | None = None,
|
|
24
|
+
base_url: str | None = None,
|
|
25
|
+
flush_interval: float = 5.0,
|
|
26
|
+
batch_size: int = 50,
|
|
27
|
+
) -> None:
|
|
28
|
+
"""Initialize the SuperPenguin SDK.
|
|
29
|
+
|
|
30
|
+
Must be called before ``wrap()`` or ``@trace`` capture any data.
|
|
31
|
+
Alternatively, set the ``SP_API_KEY`` environment variable for
|
|
32
|
+
auto-initialization on first use.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
api_key: Your SuperPenguin API key (``sp_...``). Falls back to
|
|
36
|
+
``SP_API_KEY`` env var.
|
|
37
|
+
base_url: Override the API endpoint. Falls back to
|
|
38
|
+
``SP_BASE_URL`` env var, then ``https://api.carrotlabs.ai``.
|
|
39
|
+
flush_interval: Seconds between background flushes (default 5).
|
|
40
|
+
batch_size: Max events per batch POST (default 50).
|
|
41
|
+
"""
|
|
42
|
+
global _client
|
|
43
|
+
|
|
44
|
+
if api_key is None:
|
|
45
|
+
api_key = os.environ.get("SP_API_KEY")
|
|
46
|
+
if api_key is None:
|
|
47
|
+
raise ValueError(
|
|
48
|
+
"api_key is required — pass it to sp.init() "
|
|
49
|
+
"or set the SP_API_KEY environment variable"
|
|
50
|
+
)
|
|
51
|
+
if base_url is None:
|
|
52
|
+
base_url = os.environ.get("SP_BASE_URL", _DEFAULT_BASE_URL)
|
|
53
|
+
|
|
54
|
+
_client = SuperPenguinClient(
|
|
55
|
+
api_key=api_key,
|
|
56
|
+
base_url=base_url,
|
|
57
|
+
flush_interval=flush_interval,
|
|
58
|
+
batch_size=batch_size,
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def get_client() -> SuperPenguinClient:
|
|
63
|
+
"""Return the global client, auto-initializing if ``SP_API_KEY`` is set."""
|
|
64
|
+
global _client
|
|
65
|
+
if _client is None:
|
|
66
|
+
api_key = os.environ.get("SP_API_KEY")
|
|
67
|
+
if api_key:
|
|
68
|
+
init(api_key=api_key)
|
|
69
|
+
else:
|
|
70
|
+
raise RuntimeError(
|
|
71
|
+
"sp.init() has not been called and SP_API_KEY is not set"
|
|
72
|
+
)
|
|
73
|
+
return _client
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def flush() -> None:
|
|
77
|
+
"""Flush any pending events to the server immediately."""
|
|
78
|
+
if _client is not None:
|
|
79
|
+
_client.flush()
|
superpenguin/_client.py
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
"""Background event submitter — batches and POSTs cost events to the API."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import atexit
|
|
6
|
+
import json
|
|
7
|
+
import logging
|
|
8
|
+
import queue
|
|
9
|
+
import threading
|
|
10
|
+
import time
|
|
11
|
+
from typing import Any
|
|
12
|
+
from urllib.error import URLError
|
|
13
|
+
from urllib.request import Request, urlopen
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger("superpenguin")
|
|
16
|
+
|
|
17
|
+
_SENTINEL = object()
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class SuperPenguinClient:
|
|
21
|
+
"""Batched HTTP client that queues cost events and flushes them
|
|
22
|
+
to the server in a background daemon thread.
|
|
23
|
+
|
|
24
|
+
``flush()`` stops the worker, drains remaining items, sends them
|
|
25
|
+
synchronously, and restarts the worker so nothing is lost.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
def __init__(
|
|
29
|
+
self,
|
|
30
|
+
api_key: str,
|
|
31
|
+
base_url: str = "https://api.carrotlabs.ai",
|
|
32
|
+
flush_interval: float = 5.0,
|
|
33
|
+
batch_size: int = 50,
|
|
34
|
+
max_queue: int = 1000,
|
|
35
|
+
) -> None:
|
|
36
|
+
self._api_key = api_key
|
|
37
|
+
self._base_url = base_url.rstrip("/")
|
|
38
|
+
self._queue: queue.Queue[dict[str, Any] | object] = queue.Queue(
|
|
39
|
+
maxsize=max_queue
|
|
40
|
+
)
|
|
41
|
+
self._flush_interval = flush_interval
|
|
42
|
+
self._batch_size = batch_size
|
|
43
|
+
self._shutdown = threading.Event()
|
|
44
|
+
self._lifecycle_lock = threading.Lock()
|
|
45
|
+
self._thread = self._start_worker()
|
|
46
|
+
atexit.register(self.shutdown)
|
|
47
|
+
|
|
48
|
+
def submit(self, event: dict[str, Any]) -> None:
|
|
49
|
+
"""Enqueue a cost event for background submission. Non-blocking."""
|
|
50
|
+
try:
|
|
51
|
+
self._queue.put_nowait(event)
|
|
52
|
+
except queue.Full:
|
|
53
|
+
logger.warning("superpenguin: event queue full, dropping event")
|
|
54
|
+
|
|
55
|
+
def flush(self) -> None:
|
|
56
|
+
"""Stop worker, drain queue, send everything, restart worker."""
|
|
57
|
+
with self._lifecycle_lock:
|
|
58
|
+
self._stop_worker()
|
|
59
|
+
self._drain_and_send()
|
|
60
|
+
self._thread = self._start_worker()
|
|
61
|
+
|
|
62
|
+
def shutdown(self) -> None:
|
|
63
|
+
"""Final flush on interpreter exit. Does not restart the worker."""
|
|
64
|
+
with self._lifecycle_lock:
|
|
65
|
+
self._stop_worker()
|
|
66
|
+
self._drain_and_send()
|
|
67
|
+
|
|
68
|
+
# -- internals ------------------------------------------------------------
|
|
69
|
+
|
|
70
|
+
def _start_worker(self) -> threading.Thread:
|
|
71
|
+
self._shutdown.clear()
|
|
72
|
+
t = threading.Thread(target=self._run, daemon=True, name="sp-flush")
|
|
73
|
+
t.start()
|
|
74
|
+
return t
|
|
75
|
+
|
|
76
|
+
def _stop_worker(self) -> None:
|
|
77
|
+
self._shutdown.set()
|
|
78
|
+
try:
|
|
79
|
+
self._queue.put_nowait(_SENTINEL)
|
|
80
|
+
except queue.Full:
|
|
81
|
+
pass
|
|
82
|
+
self._thread.join(timeout=10.0)
|
|
83
|
+
|
|
84
|
+
def _drain_and_send(self) -> None:
|
|
85
|
+
batch: list[dict[str, Any]] = []
|
|
86
|
+
while True:
|
|
87
|
+
try:
|
|
88
|
+
item = self._queue.get_nowait()
|
|
89
|
+
if item is _SENTINEL:
|
|
90
|
+
continue
|
|
91
|
+
batch.append(item) # type: ignore[arg-type]
|
|
92
|
+
except queue.Empty:
|
|
93
|
+
break
|
|
94
|
+
if batch:
|
|
95
|
+
self._send(batch)
|
|
96
|
+
|
|
97
|
+
def _run(self) -> None:
|
|
98
|
+
while not self._shutdown.is_set():
|
|
99
|
+
batch: list[dict[str, Any]] = []
|
|
100
|
+
deadline = time.monotonic() + self._flush_interval
|
|
101
|
+
while time.monotonic() < deadline and len(batch) < self._batch_size:
|
|
102
|
+
if self._shutdown.is_set():
|
|
103
|
+
break
|
|
104
|
+
timeout = min(0.5, max(0.05, deadline - time.monotonic()))
|
|
105
|
+
try:
|
|
106
|
+
item = self._queue.get(timeout=timeout)
|
|
107
|
+
if item is _SENTINEL:
|
|
108
|
+
break
|
|
109
|
+
batch.append(item) # type: ignore[arg-type]
|
|
110
|
+
except queue.Empty:
|
|
111
|
+
continue
|
|
112
|
+
if batch:
|
|
113
|
+
self._send(batch)
|
|
114
|
+
|
|
115
|
+
def _send(self, events: list[dict[str, Any]], *, _retries: int = 2) -> None:
|
|
116
|
+
url = f"{self._base_url}/api/sdk/ingest"
|
|
117
|
+
body = json.dumps({"events": events}).encode()
|
|
118
|
+
|
|
119
|
+
for attempt in range(_retries + 1):
|
|
120
|
+
req = Request(url, data=body, method="POST")
|
|
121
|
+
req.add_header("Content-Type", "application/json")
|
|
122
|
+
req.add_header("Authorization", f"Bearer {self._api_key}")
|
|
123
|
+
try:
|
|
124
|
+
with urlopen(req, timeout=10) as resp:
|
|
125
|
+
resp.read()
|
|
126
|
+
logger.debug("superpenguin: sent %d event(s)", len(events))
|
|
127
|
+
return
|
|
128
|
+
except (URLError, OSError) as exc:
|
|
129
|
+
if attempt < _retries:
|
|
130
|
+
time.sleep(0.3 * (attempt + 1))
|
|
131
|
+
continue
|
|
132
|
+
logger.warning(
|
|
133
|
+
"superpenguin: failed to send %d event(s) after %d attempts: %s",
|
|
134
|
+
len(events),
|
|
135
|
+
_retries + 1,
|
|
136
|
+
exc,
|
|
137
|
+
)
|
|
138
|
+
except Exception:
|
|
139
|
+
logger.warning(
|
|
140
|
+
"superpenguin: unexpected error sending events", exc_info=True
|
|
141
|
+
)
|
|
142
|
+
return
|
superpenguin/_context.py
ADDED
superpenguin/_litellm.py
ADDED
|
@@ -0,0 +1,278 @@
|
|
|
1
|
+
"""patch_litellm() — automatic cost tracking for litellm.completion / acompletion."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import functools
|
|
6
|
+
import logging
|
|
7
|
+
import time
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
from superpenguin._pricing import calculate_cost_micros
|
|
11
|
+
from superpenguin._wrap import (
|
|
12
|
+
_AsyncStreamProxy,
|
|
13
|
+
_OpenAIAccumulator,
|
|
14
|
+
_SyncStreamProxy,
|
|
15
|
+
_normalize_openai,
|
|
16
|
+
_extract_call_metadata,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger("superpenguin")
|
|
20
|
+
|
|
21
|
+
_patched = False
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def patch_litellm(
|
|
25
|
+
*,
|
|
26
|
+
name: str | None = None,
|
|
27
|
+
metadata: dict[str, Any] | None = None,
|
|
28
|
+
tags: list[str] | None = None,
|
|
29
|
+
) -> None:
|
|
30
|
+
"""Monkey-patch ``litellm.completion`` and ``litellm.acompletion``.
|
|
31
|
+
|
|
32
|
+
After calling this, every ``litellm.completion()`` and
|
|
33
|
+
``litellm.acompletion()`` call is automatically tracked.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
name: Override the default event name (``"chat.completions"``).
|
|
37
|
+
metadata: Default metadata for every litellm call.
|
|
38
|
+
tags: Tags attached to every event.
|
|
39
|
+
|
|
40
|
+
Usage::
|
|
41
|
+
|
|
42
|
+
import superpenguin as sp
|
|
43
|
+
import litellm
|
|
44
|
+
|
|
45
|
+
sp.init(api_key="sp_...")
|
|
46
|
+
sp.patch_litellm()
|
|
47
|
+
|
|
48
|
+
response = litellm.completion(
|
|
49
|
+
model="openai/gpt-4o",
|
|
50
|
+
messages=[{"role": "user", "content": "Hello"}],
|
|
51
|
+
)
|
|
52
|
+
"""
|
|
53
|
+
global _patched
|
|
54
|
+
if _patched:
|
|
55
|
+
return
|
|
56
|
+
|
|
57
|
+
try:
|
|
58
|
+
import litellm # noqa: F811
|
|
59
|
+
except ImportError as exc:
|
|
60
|
+
raise ImportError(
|
|
61
|
+
"litellm is not installed. Install it with: pip install litellm"
|
|
62
|
+
) from exc
|
|
63
|
+
|
|
64
|
+
extra: dict[str, Any] = {}
|
|
65
|
+
if name:
|
|
66
|
+
extra["name"] = name
|
|
67
|
+
if metadata:
|
|
68
|
+
extra["metadata"] = metadata
|
|
69
|
+
if tags:
|
|
70
|
+
extra["tags"] = tags
|
|
71
|
+
|
|
72
|
+
_do_patch(litellm, "completion", is_async=False, extra=extra)
|
|
73
|
+
_do_patch(litellm, "acompletion", is_async=True, extra=extra)
|
|
74
|
+
_patched = True
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _do_patch(
|
|
78
|
+
module: Any, attr: str, *, is_async: bool, extra: dict[str, Any],
|
|
79
|
+
) -> None:
|
|
80
|
+
original = getattr(module, attr)
|
|
81
|
+
|
|
82
|
+
if is_async:
|
|
83
|
+
|
|
84
|
+
@functools.wraps(original)
|
|
85
|
+
async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
86
|
+
return await _track_async(original, extra, args, kwargs)
|
|
87
|
+
|
|
88
|
+
setattr(module, attr, async_wrapper)
|
|
89
|
+
else:
|
|
90
|
+
|
|
91
|
+
@functools.wraps(original)
|
|
92
|
+
def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
93
|
+
return _track_sync(original, extra, args, kwargs)
|
|
94
|
+
|
|
95
|
+
setattr(module, attr, sync_wrapper)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
# ---------------------------------------------------------------------------
|
|
99
|
+
# Sync / async tracking
|
|
100
|
+
# ---------------------------------------------------------------------------
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _track_sync(
|
|
104
|
+
fn: Any,
|
|
105
|
+
extra: dict[str, Any],
|
|
106
|
+
args: tuple[Any, ...],
|
|
107
|
+
kwargs: dict[str, Any],
|
|
108
|
+
) -> Any:
|
|
109
|
+
started_at = time.time()
|
|
110
|
+
is_stream = kwargs.get("stream", False)
|
|
111
|
+
call_meta = _capture_litellm_input(args, kwargs)
|
|
112
|
+
|
|
113
|
+
if is_stream:
|
|
114
|
+
kwargs = dict(kwargs)
|
|
115
|
+
opts = dict(kwargs.get("stream_options") or {})
|
|
116
|
+
opts["include_usage"] = True
|
|
117
|
+
kwargs["stream_options"] = opts
|
|
118
|
+
|
|
119
|
+
try:
|
|
120
|
+
result = fn(*args, **kwargs)
|
|
121
|
+
except Exception as exc:
|
|
122
|
+
_submit_event(
|
|
123
|
+
extra, call_meta, {}, started_at, time.time(),
|
|
124
|
+
status_code=getattr(exc, "status_code", 500),
|
|
125
|
+
streaming=False,
|
|
126
|
+
)
|
|
127
|
+
raise
|
|
128
|
+
|
|
129
|
+
if is_stream:
|
|
130
|
+
acc = _OpenAIAccumulator()
|
|
131
|
+
|
|
132
|
+
def on_done() -> None:
|
|
133
|
+
_submit_event(
|
|
134
|
+
extra, call_meta, acc.result(),
|
|
135
|
+
started_at, time.time(), status_code=200, streaming=True,
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
return _SyncStreamProxy(result, acc, on_done)
|
|
139
|
+
|
|
140
|
+
output = _normalize_openai(result)
|
|
141
|
+
_submit_event(
|
|
142
|
+
extra, call_meta, output,
|
|
143
|
+
started_at, time.time(), status_code=200, streaming=False,
|
|
144
|
+
)
|
|
145
|
+
return result
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
async def _track_async(
|
|
149
|
+
fn: Any,
|
|
150
|
+
extra: dict[str, Any],
|
|
151
|
+
args: tuple[Any, ...],
|
|
152
|
+
kwargs: dict[str, Any],
|
|
153
|
+
) -> Any:
|
|
154
|
+
started_at = time.time()
|
|
155
|
+
is_stream = kwargs.get("stream", False)
|
|
156
|
+
call_meta = _capture_litellm_input(args, kwargs)
|
|
157
|
+
|
|
158
|
+
if is_stream:
|
|
159
|
+
kwargs = dict(kwargs)
|
|
160
|
+
opts = dict(kwargs.get("stream_options") or {})
|
|
161
|
+
opts["include_usage"] = True
|
|
162
|
+
kwargs["stream_options"] = opts
|
|
163
|
+
|
|
164
|
+
try:
|
|
165
|
+
result = await fn(*args, **kwargs)
|
|
166
|
+
except Exception as exc:
|
|
167
|
+
_submit_event(
|
|
168
|
+
extra, call_meta, {}, started_at, time.time(),
|
|
169
|
+
status_code=getattr(exc, "status_code", 500),
|
|
170
|
+
streaming=False,
|
|
171
|
+
)
|
|
172
|
+
raise
|
|
173
|
+
|
|
174
|
+
if is_stream:
|
|
175
|
+
acc = _OpenAIAccumulator()
|
|
176
|
+
|
|
177
|
+
def on_done() -> None:
|
|
178
|
+
_submit_event(
|
|
179
|
+
extra, call_meta, acc.result(),
|
|
180
|
+
started_at, time.time(), status_code=200, streaming=True,
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
return _AsyncStreamProxy(result, acc, on_done)
|
|
184
|
+
|
|
185
|
+
output = _normalize_openai(result)
|
|
186
|
+
_submit_event(
|
|
187
|
+
extra, call_meta, output,
|
|
188
|
+
started_at, time.time(), status_code=200, streaming=False,
|
|
189
|
+
)
|
|
190
|
+
return result
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
# ---------------------------------------------------------------------------
|
|
194
|
+
# litellm-specific input capture
|
|
195
|
+
# ---------------------------------------------------------------------------
|
|
196
|
+
|
|
197
|
+
_POS_PARAMS = ("model", "messages")
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def _capture_litellm_input(
|
|
201
|
+
args: tuple[Any, ...], kwargs: dict[str, Any],
|
|
202
|
+
) -> dict[str, Any]:
|
|
203
|
+
captured: dict[str, Any] = {}
|
|
204
|
+
for i, param_name in enumerate(_POS_PARAMS):
|
|
205
|
+
if param_name in kwargs:
|
|
206
|
+
captured[param_name] = kwargs[param_name]
|
|
207
|
+
elif i < len(args):
|
|
208
|
+
captured[param_name] = args[i]
|
|
209
|
+
|
|
210
|
+
meta_raw = kwargs.get("metadata") or {}
|
|
211
|
+
if isinstance(meta_raw, dict):
|
|
212
|
+
call_meta = _extract_call_metadata({"extra_body": {"metadata": meta_raw}})
|
|
213
|
+
captured.update(call_meta)
|
|
214
|
+
|
|
215
|
+
return captured
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
# ---------------------------------------------------------------------------
|
|
219
|
+
# Event submission
|
|
220
|
+
# ---------------------------------------------------------------------------
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def _submit_event(
|
|
224
|
+
extra: dict[str, Any],
|
|
225
|
+
call_meta: dict[str, Any],
|
|
226
|
+
output_data: dict[str, Any],
|
|
227
|
+
started_at: float,
|
|
228
|
+
ended_at: float,
|
|
229
|
+
*,
|
|
230
|
+
status_code: int,
|
|
231
|
+
streaming: bool,
|
|
232
|
+
) -> None:
|
|
233
|
+
from superpenguin import get_client
|
|
234
|
+
|
|
235
|
+
usage = output_data.get("usage", {})
|
|
236
|
+
input_tokens = usage.get("input_tokens", 0)
|
|
237
|
+
output_tokens = usage.get("output_tokens", 0)
|
|
238
|
+
cached_tokens = usage.get("cached_tokens", 0)
|
|
239
|
+
model = output_data.get("model") or call_meta.get("model", "")
|
|
240
|
+
|
|
241
|
+
cost_micros = calculate_cost_micros(
|
|
242
|
+
model, input_tokens, output_tokens, cached_tokens,
|
|
243
|
+
)
|
|
244
|
+
|
|
245
|
+
merged_meta = dict(extra.get("metadata") or {})
|
|
246
|
+
merged_meta.update(
|
|
247
|
+
{k: v for k, v in call_meta.items() if k not in ("model", "messages")}
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
event: dict[str, Any] = {
|
|
251
|
+
"provider": "litellm",
|
|
252
|
+
"model": model,
|
|
253
|
+
"input_tokens": input_tokens,
|
|
254
|
+
"output_tokens": output_tokens,
|
|
255
|
+
"cached_tokens": cached_tokens,
|
|
256
|
+
"cost_usd_micros": cost_micros,
|
|
257
|
+
"latency_ms": round((ended_at - started_at) * 1000),
|
|
258
|
+
"status_code": status_code,
|
|
259
|
+
"streaming": streaming,
|
|
260
|
+
"has_tools": output_data.get("has_tools", False),
|
|
261
|
+
"has_vision": False,
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
for key in (
|
|
265
|
+
"customer_id", "feature", "team", "environment",
|
|
266
|
+
"prompt_key", "prompt_version",
|
|
267
|
+
):
|
|
268
|
+
val = merged_meta.get(key)
|
|
269
|
+
if val is not None:
|
|
270
|
+
event[key] = str(val) if key == "prompt_version" else val
|
|
271
|
+
|
|
272
|
+
if merged_meta.get("custom_tags"):
|
|
273
|
+
event["custom_tags"] = merged_meta["custom_tags"]
|
|
274
|
+
|
|
275
|
+
try:
|
|
276
|
+
get_client().submit(event)
|
|
277
|
+
except Exception:
|
|
278
|
+
logger.debug("superpenguin: failed to submit litellm event", exc_info=True)
|
superpenguin/_pricing.py
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
"""Built-in cost estimation for known LLM models.
|
|
2
|
+
|
|
3
|
+
Prices are in USD per 1M tokens. Cost is returned in USD micros
|
|
4
|
+
(1 USD = 1,000,000 micros) for lossless integer arithmetic.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import re
|
|
10
|
+
|
|
11
|
+
_MODEL_PRICING: dict[str, dict[str, float]] = {
|
|
12
|
+
# OpenAI
|
|
13
|
+
"gpt-5.4": {"input": 2.5, "output": 15.0, "cache_read": 0.25},
|
|
14
|
+
"gpt-5.4-mini": {"input": 0.75, "output": 4.5, "cache_read": 0.075},
|
|
15
|
+
"gpt-5.4-nano": {"input": 0.2, "output": 1.25, "cache_read": 0.02},
|
|
16
|
+
"gpt-5": {"input": 1.25, "output": 10.0, "cache_read": 0.125},
|
|
17
|
+
"gpt-5-mini": {"input": 0.25, "output": 2.0, "cache_read": 0.025},
|
|
18
|
+
"gpt-5-nano": {"input": 0.05, "output": 0.4, "cache_read": 0.005},
|
|
19
|
+
"gpt-4o": {"input": 2.5, "output": 10.0},
|
|
20
|
+
"gpt-4o-mini": {"input": 0.15, "output": 0.6},
|
|
21
|
+
"gpt-4.1": {"input": 2.0, "output": 8.0},
|
|
22
|
+
"gpt-4.1-mini": {"input": 0.4, "output": 1.6},
|
|
23
|
+
"gpt-4.1-nano": {"input": 0.1, "output": 0.4},
|
|
24
|
+
"o3": {"input": 2.0, "output": 8.0},
|
|
25
|
+
"o4-mini": {"input": 1.1, "output": 4.4},
|
|
26
|
+
# Anthropic
|
|
27
|
+
"claude-opus-4": {"input": 15.0, "output": 75.0, "cache_read": 1.5},
|
|
28
|
+
"claude-opus-4-6": {"input": 5.0, "output": 25.0, "cache_read": 0.5},
|
|
29
|
+
"claude-sonnet-4": {"input": 3.0, "output": 15.0, "cache_read": 0.3},
|
|
30
|
+
"claude-sonnet-4-5": {"input": 3.0, "output": 15.0, "cache_read": 0.3},
|
|
31
|
+
"claude-sonnet-4-6": {"input": 3.0, "output": 15.0, "cache_read": 0.3},
|
|
32
|
+
"claude-haiku-4-5": {"input": 1.0, "output": 5.0, "cache_read": 0.1},
|
|
33
|
+
# Google Gemini
|
|
34
|
+
"gemini-2.5-pro": {"input": 1.25, "output": 10.0, "cache_read": 0.3125},
|
|
35
|
+
"gemini-2.5-flash": {"input": 0.15, "output": 0.6, "cache_read": 0.0375},
|
|
36
|
+
"gemini-2.0-pro": {"input": 1.25, "output": 10.0, "cache_read": 0.3125},
|
|
37
|
+
"gemini-2.0-flash": {"input": 0.1, "output": 0.4, "cache_read": 0.025},
|
|
38
|
+
"gemini-2.0-flash-lite": {"input": 0.075, "output": 0.3},
|
|
39
|
+
# xAI / Grok
|
|
40
|
+
"grok-3": {"input": 3.0, "output": 15.0},
|
|
41
|
+
"grok-3-mini": {"input": 0.3, "output": 0.5},
|
|
42
|
+
"grok-3-fast": {"input": 5.0, "output": 25.0},
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
_DATE_SUFFIX = re.compile(r"-\d{4}-\d{2}-\d{2}$|-\d{8}$")
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _normalize_model(raw: str) -> str:
|
|
49
|
+
# Strip provider prefix (e.g. "openai/gpt-4o" → "gpt-4o")
|
|
50
|
+
if "/" in raw:
|
|
51
|
+
raw = raw.rsplit("/", 1)[-1]
|
|
52
|
+
return _DATE_SUFFIX.sub("", raw)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def calculate_cost_micros(
|
|
56
|
+
model: str,
|
|
57
|
+
input_tokens: int,
|
|
58
|
+
output_tokens: int,
|
|
59
|
+
cached_tokens: int = 0,
|
|
60
|
+
) -> int:
|
|
61
|
+
"""Calculate request cost in USD micros (1 USD = 1,000,000 micros).
|
|
62
|
+
|
|
63
|
+
Returns 0 if the model is unknown.
|
|
64
|
+
"""
|
|
65
|
+
normalized = _normalize_model(model)
|
|
66
|
+
pricing = _MODEL_PRICING.get(normalized)
|
|
67
|
+
if pricing is None:
|
|
68
|
+
return 0
|
|
69
|
+
|
|
70
|
+
uncached_input = max(0, input_tokens - cached_tokens)
|
|
71
|
+
input_cost = (uncached_input / 1_000_000) * pricing["input"]
|
|
72
|
+
output_cost = (output_tokens / 1_000_000) * pricing["output"]
|
|
73
|
+
cache_cost = (cached_tokens / 1_000_000) * pricing.get(
|
|
74
|
+
"cache_read", pricing["input"] * 0.5
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
return round((input_cost + output_cost + cache_cost) * 1_000_000)
|