superpenguin 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,79 @@
1
+ """SuperPenguin Python SDK — AI cost management, attribution, and spend tracking."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ from typing import Any
7
+
8
+ from superpenguin._client import SuperPenguinClient
9
+ from superpenguin._litellm import patch_litellm
10
+ from superpenguin._trace import trace
11
+ from superpenguin._wrap import wrap
12
+
13
+ __all__ = ["init", "wrap", "trace", "patch_litellm", "get_client", "flush"]
14
+ __version__ = "0.1.0"
15
+
16
+ _client: SuperPenguinClient | None = None
17
+
18
+ _DEFAULT_BASE_URL = "https://api.carrotlabs.ai"
19
+
20
+
21
+ def init(
22
+ *,
23
+ api_key: str | None = None,
24
+ base_url: str | None = None,
25
+ flush_interval: float = 5.0,
26
+ batch_size: int = 50,
27
+ ) -> None:
28
+ """Initialize the SuperPenguin SDK.
29
+
30
+ Must be called before ``wrap()`` or ``@trace`` capture any data.
31
+ Alternatively, set the ``SP_API_KEY`` environment variable for
32
+ auto-initialization on first use.
33
+
34
+ Args:
35
+ api_key: Your SuperPenguin API key (``sp_...``). Falls back to
36
+ ``SP_API_KEY`` env var.
37
+ base_url: Override the API endpoint. Falls back to
38
+ ``SP_BASE_URL`` env var, then ``https://api.carrotlabs.ai``.
39
+ flush_interval: Seconds between background flushes (default 5).
40
+ batch_size: Max events per batch POST (default 50).
41
+ """
42
+ global _client
43
+
44
+ if api_key is None:
45
+ api_key = os.environ.get("SP_API_KEY")
46
+ if api_key is None:
47
+ raise ValueError(
48
+ "api_key is required — pass it to sp.init() "
49
+ "or set the SP_API_KEY environment variable"
50
+ )
51
+ if base_url is None:
52
+ base_url = os.environ.get("SP_BASE_URL", _DEFAULT_BASE_URL)
53
+
54
+ _client = SuperPenguinClient(
55
+ api_key=api_key,
56
+ base_url=base_url,
57
+ flush_interval=flush_interval,
58
+ batch_size=batch_size,
59
+ )
60
+
61
+
62
+ def get_client() -> SuperPenguinClient:
63
+ """Return the global client, auto-initializing if ``SP_API_KEY`` is set."""
64
+ global _client
65
+ if _client is None:
66
+ api_key = os.environ.get("SP_API_KEY")
67
+ if api_key:
68
+ init(api_key=api_key)
69
+ else:
70
+ raise RuntimeError(
71
+ "sp.init() has not been called and SP_API_KEY is not set"
72
+ )
73
+ return _client
74
+
75
+
76
+ def flush() -> None:
77
+ """Flush any pending events to the server immediately."""
78
+ if _client is not None:
79
+ _client.flush()
@@ -0,0 +1,142 @@
1
+ """Background event submitter — batches and POSTs cost events to the API."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import atexit
6
+ import json
7
+ import logging
8
+ import queue
9
+ import threading
10
+ import time
11
+ from typing import Any
12
+ from urllib.error import URLError
13
+ from urllib.request import Request, urlopen
14
+
15
+ logger = logging.getLogger("superpenguin")
16
+
17
+ _SENTINEL = object()
18
+
19
+
20
+ class SuperPenguinClient:
21
+ """Batched HTTP client that queues cost events and flushes them
22
+ to the server in a background daemon thread.
23
+
24
+ ``flush()`` stops the worker, drains remaining items, sends them
25
+ synchronously, and restarts the worker so nothing is lost.
26
+ """
27
+
28
+ def __init__(
29
+ self,
30
+ api_key: str,
31
+ base_url: str = "https://api.carrotlabs.ai",
32
+ flush_interval: float = 5.0,
33
+ batch_size: int = 50,
34
+ max_queue: int = 1000,
35
+ ) -> None:
36
+ self._api_key = api_key
37
+ self._base_url = base_url.rstrip("/")
38
+ self._queue: queue.Queue[dict[str, Any] | object] = queue.Queue(
39
+ maxsize=max_queue
40
+ )
41
+ self._flush_interval = flush_interval
42
+ self._batch_size = batch_size
43
+ self._shutdown = threading.Event()
44
+ self._lifecycle_lock = threading.Lock()
45
+ self._thread = self._start_worker()
46
+ atexit.register(self.shutdown)
47
+
48
+ def submit(self, event: dict[str, Any]) -> None:
49
+ """Enqueue a cost event for background submission. Non-blocking."""
50
+ try:
51
+ self._queue.put_nowait(event)
52
+ except queue.Full:
53
+ logger.warning("superpenguin: event queue full, dropping event")
54
+
55
+ def flush(self) -> None:
56
+ """Stop worker, drain queue, send everything, restart worker."""
57
+ with self._lifecycle_lock:
58
+ self._stop_worker()
59
+ self._drain_and_send()
60
+ self._thread = self._start_worker()
61
+
62
+ def shutdown(self) -> None:
63
+ """Final flush on interpreter exit. Does not restart the worker."""
64
+ with self._lifecycle_lock:
65
+ self._stop_worker()
66
+ self._drain_and_send()
67
+
68
+ # -- internals ------------------------------------------------------------
69
+
70
+ def _start_worker(self) -> threading.Thread:
71
+ self._shutdown.clear()
72
+ t = threading.Thread(target=self._run, daemon=True, name="sp-flush")
73
+ t.start()
74
+ return t
75
+
76
+ def _stop_worker(self) -> None:
77
+ self._shutdown.set()
78
+ try:
79
+ self._queue.put_nowait(_SENTINEL)
80
+ except queue.Full:
81
+ pass
82
+ self._thread.join(timeout=10.0)
83
+
84
+ def _drain_and_send(self) -> None:
85
+ batch: list[dict[str, Any]] = []
86
+ while True:
87
+ try:
88
+ item = self._queue.get_nowait()
89
+ if item is _SENTINEL:
90
+ continue
91
+ batch.append(item) # type: ignore[arg-type]
92
+ except queue.Empty:
93
+ break
94
+ if batch:
95
+ self._send(batch)
96
+
97
+ def _run(self) -> None:
98
+ while not self._shutdown.is_set():
99
+ batch: list[dict[str, Any]] = []
100
+ deadline = time.monotonic() + self._flush_interval
101
+ while time.monotonic() < deadline and len(batch) < self._batch_size:
102
+ if self._shutdown.is_set():
103
+ break
104
+ timeout = min(0.5, max(0.05, deadline - time.monotonic()))
105
+ try:
106
+ item = self._queue.get(timeout=timeout)
107
+ if item is _SENTINEL:
108
+ break
109
+ batch.append(item) # type: ignore[arg-type]
110
+ except queue.Empty:
111
+ continue
112
+ if batch:
113
+ self._send(batch)
114
+
115
+ def _send(self, events: list[dict[str, Any]], *, _retries: int = 2) -> None:
116
+ url = f"{self._base_url}/api/sdk/ingest"
117
+ body = json.dumps({"events": events}).encode()
118
+
119
+ for attempt in range(_retries + 1):
120
+ req = Request(url, data=body, method="POST")
121
+ req.add_header("Content-Type", "application/json")
122
+ req.add_header("Authorization", f"Bearer {self._api_key}")
123
+ try:
124
+ with urlopen(req, timeout=10) as resp:
125
+ resp.read()
126
+ logger.debug("superpenguin: sent %d event(s)", len(events))
127
+ return
128
+ except (URLError, OSError) as exc:
129
+ if attempt < _retries:
130
+ time.sleep(0.3 * (attempt + 1))
131
+ continue
132
+ logger.warning(
133
+ "superpenguin: failed to send %d event(s) after %d attempts: %s",
134
+ len(events),
135
+ _retries + 1,
136
+ exc,
137
+ )
138
+ except Exception:
139
+ logger.warning(
140
+ "superpenguin: unexpected error sending events", exc_info=True
141
+ )
142
+ return
@@ -0,0 +1,7 @@
1
+ from __future__ import annotations
2
+
3
+ import contextvars
4
+
5
+ current_trace_id: contextvars.ContextVar[str | None] = contextvars.ContextVar(
6
+ "sp_trace_id", default=None
7
+ )
@@ -0,0 +1,278 @@
1
+ """patch_litellm() — automatic cost tracking for litellm.completion / acompletion."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import functools
6
+ import logging
7
+ import time
8
+ from typing import Any
9
+
10
+ from superpenguin._pricing import calculate_cost_micros
11
+ from superpenguin._wrap import (
12
+ _AsyncStreamProxy,
13
+ _OpenAIAccumulator,
14
+ _SyncStreamProxy,
15
+ _normalize_openai,
16
+ _extract_call_metadata,
17
+ )
18
+
19
+ logger = logging.getLogger("superpenguin")
20
+
21
+ _patched = False
22
+
23
+
24
+ def patch_litellm(
25
+ *,
26
+ name: str | None = None,
27
+ metadata: dict[str, Any] | None = None,
28
+ tags: list[str] | None = None,
29
+ ) -> None:
30
+ """Monkey-patch ``litellm.completion`` and ``litellm.acompletion``.
31
+
32
+ After calling this, every ``litellm.completion()`` and
33
+ ``litellm.acompletion()`` call is automatically tracked.
34
+
35
+ Args:
36
+ name: Override the default event name (``"chat.completions"``).
37
+ metadata: Default metadata for every litellm call.
38
+ tags: Tags attached to every event.
39
+
40
+ Usage::
41
+
42
+ import superpenguin as sp
43
+ import litellm
44
+
45
+ sp.init(api_key="sp_...")
46
+ sp.patch_litellm()
47
+
48
+ response = litellm.completion(
49
+ model="openai/gpt-4o",
50
+ messages=[{"role": "user", "content": "Hello"}],
51
+ )
52
+ """
53
+ global _patched
54
+ if _patched:
55
+ return
56
+
57
+ try:
58
+ import litellm # noqa: F811
59
+ except ImportError as exc:
60
+ raise ImportError(
61
+ "litellm is not installed. Install it with: pip install litellm"
62
+ ) from exc
63
+
64
+ extra: dict[str, Any] = {}
65
+ if name:
66
+ extra["name"] = name
67
+ if metadata:
68
+ extra["metadata"] = metadata
69
+ if tags:
70
+ extra["tags"] = tags
71
+
72
+ _do_patch(litellm, "completion", is_async=False, extra=extra)
73
+ _do_patch(litellm, "acompletion", is_async=True, extra=extra)
74
+ _patched = True
75
+
76
+
77
+ def _do_patch(
78
+ module: Any, attr: str, *, is_async: bool, extra: dict[str, Any],
79
+ ) -> None:
80
+ original = getattr(module, attr)
81
+
82
+ if is_async:
83
+
84
+ @functools.wraps(original)
85
+ async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
86
+ return await _track_async(original, extra, args, kwargs)
87
+
88
+ setattr(module, attr, async_wrapper)
89
+ else:
90
+
91
+ @functools.wraps(original)
92
+ def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
93
+ return _track_sync(original, extra, args, kwargs)
94
+
95
+ setattr(module, attr, sync_wrapper)
96
+
97
+
98
+ # ---------------------------------------------------------------------------
99
+ # Sync / async tracking
100
+ # ---------------------------------------------------------------------------
101
+
102
+
103
+ def _track_sync(
104
+ fn: Any,
105
+ extra: dict[str, Any],
106
+ args: tuple[Any, ...],
107
+ kwargs: dict[str, Any],
108
+ ) -> Any:
109
+ started_at = time.time()
110
+ is_stream = kwargs.get("stream", False)
111
+ call_meta = _capture_litellm_input(args, kwargs)
112
+
113
+ if is_stream:
114
+ kwargs = dict(kwargs)
115
+ opts = dict(kwargs.get("stream_options") or {})
116
+ opts["include_usage"] = True
117
+ kwargs["stream_options"] = opts
118
+
119
+ try:
120
+ result = fn(*args, **kwargs)
121
+ except Exception as exc:
122
+ _submit_event(
123
+ extra, call_meta, {}, started_at, time.time(),
124
+ status_code=getattr(exc, "status_code", 500),
125
+ streaming=False,
126
+ )
127
+ raise
128
+
129
+ if is_stream:
130
+ acc = _OpenAIAccumulator()
131
+
132
+ def on_done() -> None:
133
+ _submit_event(
134
+ extra, call_meta, acc.result(),
135
+ started_at, time.time(), status_code=200, streaming=True,
136
+ )
137
+
138
+ return _SyncStreamProxy(result, acc, on_done)
139
+
140
+ output = _normalize_openai(result)
141
+ _submit_event(
142
+ extra, call_meta, output,
143
+ started_at, time.time(), status_code=200, streaming=False,
144
+ )
145
+ return result
146
+
147
+
148
+ async def _track_async(
149
+ fn: Any,
150
+ extra: dict[str, Any],
151
+ args: tuple[Any, ...],
152
+ kwargs: dict[str, Any],
153
+ ) -> Any:
154
+ started_at = time.time()
155
+ is_stream = kwargs.get("stream", False)
156
+ call_meta = _capture_litellm_input(args, kwargs)
157
+
158
+ if is_stream:
159
+ kwargs = dict(kwargs)
160
+ opts = dict(kwargs.get("stream_options") or {})
161
+ opts["include_usage"] = True
162
+ kwargs["stream_options"] = opts
163
+
164
+ try:
165
+ result = await fn(*args, **kwargs)
166
+ except Exception as exc:
167
+ _submit_event(
168
+ extra, call_meta, {}, started_at, time.time(),
169
+ status_code=getattr(exc, "status_code", 500),
170
+ streaming=False,
171
+ )
172
+ raise
173
+
174
+ if is_stream:
175
+ acc = _OpenAIAccumulator()
176
+
177
+ def on_done() -> None:
178
+ _submit_event(
179
+ extra, call_meta, acc.result(),
180
+ started_at, time.time(), status_code=200, streaming=True,
181
+ )
182
+
183
+ return _AsyncStreamProxy(result, acc, on_done)
184
+
185
+ output = _normalize_openai(result)
186
+ _submit_event(
187
+ extra, call_meta, output,
188
+ started_at, time.time(), status_code=200, streaming=False,
189
+ )
190
+ return result
191
+
192
+
193
+ # ---------------------------------------------------------------------------
194
+ # litellm-specific input capture
195
+ # ---------------------------------------------------------------------------
196
+
197
+ _POS_PARAMS = ("model", "messages")
198
+
199
+
200
+ def _capture_litellm_input(
201
+ args: tuple[Any, ...], kwargs: dict[str, Any],
202
+ ) -> dict[str, Any]:
203
+ captured: dict[str, Any] = {}
204
+ for i, param_name in enumerate(_POS_PARAMS):
205
+ if param_name in kwargs:
206
+ captured[param_name] = kwargs[param_name]
207
+ elif i < len(args):
208
+ captured[param_name] = args[i]
209
+
210
+ meta_raw = kwargs.get("metadata") or {}
211
+ if isinstance(meta_raw, dict):
212
+ call_meta = _extract_call_metadata({"extra_body": {"metadata": meta_raw}})
213
+ captured.update(call_meta)
214
+
215
+ return captured
216
+
217
+
218
+ # ---------------------------------------------------------------------------
219
+ # Event submission
220
+ # ---------------------------------------------------------------------------
221
+
222
+
223
+ def _submit_event(
224
+ extra: dict[str, Any],
225
+ call_meta: dict[str, Any],
226
+ output_data: dict[str, Any],
227
+ started_at: float,
228
+ ended_at: float,
229
+ *,
230
+ status_code: int,
231
+ streaming: bool,
232
+ ) -> None:
233
+ from superpenguin import get_client
234
+
235
+ usage = output_data.get("usage", {})
236
+ input_tokens = usage.get("input_tokens", 0)
237
+ output_tokens = usage.get("output_tokens", 0)
238
+ cached_tokens = usage.get("cached_tokens", 0)
239
+ model = output_data.get("model") or call_meta.get("model", "")
240
+
241
+ cost_micros = calculate_cost_micros(
242
+ model, input_tokens, output_tokens, cached_tokens,
243
+ )
244
+
245
+ merged_meta = dict(extra.get("metadata") or {})
246
+ merged_meta.update(
247
+ {k: v for k, v in call_meta.items() if k not in ("model", "messages")}
248
+ )
249
+
250
+ event: dict[str, Any] = {
251
+ "provider": "litellm",
252
+ "model": model,
253
+ "input_tokens": input_tokens,
254
+ "output_tokens": output_tokens,
255
+ "cached_tokens": cached_tokens,
256
+ "cost_usd_micros": cost_micros,
257
+ "latency_ms": round((ended_at - started_at) * 1000),
258
+ "status_code": status_code,
259
+ "streaming": streaming,
260
+ "has_tools": output_data.get("has_tools", False),
261
+ "has_vision": False,
262
+ }
263
+
264
+ for key in (
265
+ "customer_id", "feature", "team", "environment",
266
+ "prompt_key", "prompt_version",
267
+ ):
268
+ val = merged_meta.get(key)
269
+ if val is not None:
270
+ event[key] = str(val) if key == "prompt_version" else val
271
+
272
+ if merged_meta.get("custom_tags"):
273
+ event["custom_tags"] = merged_meta["custom_tags"]
274
+
275
+ try:
276
+ get_client().submit(event)
277
+ except Exception:
278
+ logger.debug("superpenguin: failed to submit litellm event", exc_info=True)
@@ -0,0 +1,77 @@
1
+ """Built-in cost estimation for known LLM models.
2
+
3
+ Prices are in USD per 1M tokens. Cost is returned in USD micros
4
+ (1 USD = 1,000,000 micros) for lossless integer arithmetic.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import re
10
+
11
+ _MODEL_PRICING: dict[str, dict[str, float]] = {
12
+ # OpenAI
13
+ "gpt-5.4": {"input": 2.5, "output": 15.0, "cache_read": 0.25},
14
+ "gpt-5.4-mini": {"input": 0.75, "output": 4.5, "cache_read": 0.075},
15
+ "gpt-5.4-nano": {"input": 0.2, "output": 1.25, "cache_read": 0.02},
16
+ "gpt-5": {"input": 1.25, "output": 10.0, "cache_read": 0.125},
17
+ "gpt-5-mini": {"input": 0.25, "output": 2.0, "cache_read": 0.025},
18
+ "gpt-5-nano": {"input": 0.05, "output": 0.4, "cache_read": 0.005},
19
+ "gpt-4o": {"input": 2.5, "output": 10.0},
20
+ "gpt-4o-mini": {"input": 0.15, "output": 0.6},
21
+ "gpt-4.1": {"input": 2.0, "output": 8.0},
22
+ "gpt-4.1-mini": {"input": 0.4, "output": 1.6},
23
+ "gpt-4.1-nano": {"input": 0.1, "output": 0.4},
24
+ "o3": {"input": 2.0, "output": 8.0},
25
+ "o4-mini": {"input": 1.1, "output": 4.4},
26
+ # Anthropic
27
+ "claude-opus-4": {"input": 15.0, "output": 75.0, "cache_read": 1.5},
28
+ "claude-opus-4-6": {"input": 5.0, "output": 25.0, "cache_read": 0.5},
29
+ "claude-sonnet-4": {"input": 3.0, "output": 15.0, "cache_read": 0.3},
30
+ "claude-sonnet-4-5": {"input": 3.0, "output": 15.0, "cache_read": 0.3},
31
+ "claude-sonnet-4-6": {"input": 3.0, "output": 15.0, "cache_read": 0.3},
32
+ "claude-haiku-4-5": {"input": 1.0, "output": 5.0, "cache_read": 0.1},
33
+ # Google Gemini
34
+ "gemini-2.5-pro": {"input": 1.25, "output": 10.0, "cache_read": 0.3125},
35
+ "gemini-2.5-flash": {"input": 0.15, "output": 0.6, "cache_read": 0.0375},
36
+ "gemini-2.0-pro": {"input": 1.25, "output": 10.0, "cache_read": 0.3125},
37
+ "gemini-2.0-flash": {"input": 0.1, "output": 0.4, "cache_read": 0.025},
38
+ "gemini-2.0-flash-lite": {"input": 0.075, "output": 0.3},
39
+ # xAI / Grok
40
+ "grok-3": {"input": 3.0, "output": 15.0},
41
+ "grok-3-mini": {"input": 0.3, "output": 0.5},
42
+ "grok-3-fast": {"input": 5.0, "output": 25.0},
43
+ }
44
+
45
+ _DATE_SUFFIX = re.compile(r"-\d{4}-\d{2}-\d{2}$|-\d{8}$")
46
+
47
+
48
+ def _normalize_model(raw: str) -> str:
49
+ # Strip provider prefix (e.g. "openai/gpt-4o" → "gpt-4o")
50
+ if "/" in raw:
51
+ raw = raw.rsplit("/", 1)[-1]
52
+ return _DATE_SUFFIX.sub("", raw)
53
+
54
+
55
+ def calculate_cost_micros(
56
+ model: str,
57
+ input_tokens: int,
58
+ output_tokens: int,
59
+ cached_tokens: int = 0,
60
+ ) -> int:
61
+ """Calculate request cost in USD micros (1 USD = 1,000,000 micros).
62
+
63
+ Returns 0 if the model is unknown.
64
+ """
65
+ normalized = _normalize_model(model)
66
+ pricing = _MODEL_PRICING.get(normalized)
67
+ if pricing is None:
68
+ return 0
69
+
70
+ uncached_input = max(0, input_tokens - cached_tokens)
71
+ input_cost = (uncached_input / 1_000_000) * pricing["input"]
72
+ output_cost = (output_tokens / 1_000_000) * pricing["output"]
73
+ cache_cost = (cached_tokens / 1_000_000) * pricing.get(
74
+ "cache_read", pricing["input"] * 0.5
75
+ )
76
+
77
+ return round((input_cost + output_cost + cache_cost) * 1_000_000)