tokenwise-sdk 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tokenwise/__init__.py ADDED
@@ -0,0 +1,55 @@
1
+ """Tokenwise SDK — metadata-only usage tracking for Anthropic and OpenAI.
2
+
3
+ Swap one import line to start capturing per-call token/latency metadata:
4
+
5
+ from tokenwise import Anthropic # instead of: from anthropic import Anthropic
6
+ client = Anthropic(api_key="sk-ant-...")
7
+
8
+ The wrappers expose the identical interface to the official SDKs and forward
9
+ every call untouched. After each response, metadata only (model, token counts,
10
+ latency) is shipped to Tokenwise on a background thread — never any prompt or
11
+ response content. If Tokenwise is unreachable the SDK fails silently and your
12
+ AI calls are unaffected.
13
+
14
+ Configuration (env or constructor kwargs ``tokenwise_key`` / ``tokenwise_url``):
15
+ TOKENWISE_API_KEY, TOKENWISE_API_URL, TOKENWISE_DISABLED
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ from typing import TYPE_CHECKING
21
+
22
+ from tokenwise._version import __version__
23
+
24
+ # Provider wrappers are resolved lazily so that `anthropic` and `openai` are
25
+ # OPTIONAL dependencies: importing tokenwise never requires both to be present.
26
+ _LAZY = {
27
+ "Anthropic": ("tokenwise.anthropic", "Anthropic"),
28
+ "AsyncAnthropic": ("tokenwise.anthropic", "AsyncAnthropic"),
29
+ "OpenAI": ("tokenwise.openai", "OpenAI"),
30
+ "AsyncOpenAI": ("tokenwise.openai", "AsyncOpenAI"),
31
+ }
32
+
33
+ __all__ = ["Anthropic", "AsyncAnthropic", "OpenAI", "AsyncOpenAI", "__version__"]
34
+
35
+ if TYPE_CHECKING: # for type checkers / IDEs only
36
+ from tokenwise.anthropic import Anthropic, AsyncAnthropic
37
+ from tokenwise.openai import AsyncOpenAI, OpenAI
38
+
39
+
40
+ def __getattr__(name: str):
41
+ target = _LAZY.get(name)
42
+ if target is None:
43
+ raise AttributeError(f"module 'tokenwise' has no attribute {name!r}")
44
+ module_name, attr = target
45
+ import importlib
46
+
47
+ try:
48
+ module = importlib.import_module(module_name)
49
+ except ImportError as exc: # the official provider SDK isn't installed
50
+ provider = "anthropic" if "anthropic" in module_name else "openai"
51
+ raise ImportError(
52
+ f"Using tokenwise.{name} requires the '{provider}' package. "
53
+ f"Install it with: pip install tokenwise-sdk[{provider}]"
54
+ ) from exc
55
+ return getattr(module, attr)
tokenwise/_capture.py ADDED
@@ -0,0 +1,90 @@
1
+ """Metadata extraction helpers shared by the provider wrappers.
2
+
3
+ Everything here reads ONLY token-usage and model fields off a response object.
4
+ No function in this module reads message content, choices text, deltas' text,
5
+ system prompts, or tool definitions.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import logging
11
+ from datetime import datetime, timezone
12
+ from typing import Any
13
+
14
+ from tokenwise.client import TokenwiseClient
15
+ from tokenwise.event import UsageEvent
16
+
17
+ logger = logging.getLogger("tokenwise")
18
+
19
+
20
+ def now_iso() -> str:
21
+ """Current UTC time as ISO-8601 with a trailing Z."""
22
+ return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
23
+
24
+
25
+ def _int(obj: Any, name: str, default: int = 0) -> int:
26
+ value = getattr(obj, name, None)
27
+ try:
28
+ return int(value) if value is not None else default
29
+ except (TypeError, ValueError):
30
+ return default
31
+
32
+
33
+ def anthropic_usage_fields(model: str, usage: Any) -> dict:
34
+ """Pull metadata from an Anthropic ``Usage``/``MessageDeltaUsage`` object."""
35
+ return {
36
+ "model": model,
37
+ "input_tokens": _int(usage, "input_tokens"),
38
+ "output_tokens": _int(usage, "output_tokens"),
39
+ "cache_read_input_tokens": _int(usage, "cache_read_input_tokens"),
40
+ "cache_creation_input_tokens": _int(usage, "cache_creation_input_tokens"),
41
+ }
42
+
43
+
44
+ def openai_usage_fields(model: str, usage: Any) -> dict:
45
+ """Pull metadata from an OpenAI ``CompletionUsage`` object."""
46
+ cached = 0
47
+ details = getattr(usage, "prompt_tokens_details", None)
48
+ if details is not None:
49
+ cached = _int(details, "cached_tokens")
50
+ return {
51
+ "model": model,
52
+ "input_tokens": _int(usage, "prompt_tokens"),
53
+ "output_tokens": _int(usage, "completion_tokens"),
54
+ "cache_read_input_tokens": cached,
55
+ # OpenAI has no cache-creation concept; always 0.
56
+ "cache_creation_input_tokens": 0,
57
+ }
58
+
59
+
60
+ def make_event(
61
+ provider: str,
62
+ endpoint: str,
63
+ fields: dict,
64
+ latency_ms: int,
65
+ streamed: bool,
66
+ ) -> UsageEvent:
67
+ return UsageEvent(
68
+ provider=provider,
69
+ endpoint=endpoint,
70
+ model=str(fields.get("model") or "unknown"),
71
+ input_tokens=int(fields.get("input_tokens", 0)),
72
+ output_tokens=int(fields.get("output_tokens", 0)),
73
+ cache_read_input_tokens=int(fields.get("cache_read_input_tokens", 0)),
74
+ cache_creation_input_tokens=int(fields.get("cache_creation_input_tokens", 0)),
75
+ latency_ms=latency_ms,
76
+ timestamp=now_iso(),
77
+ streamed=streamed,
78
+ )
79
+
80
+
81
+ def safe_capture(client: TokenwiseClient, event_factory) -> None:
82
+ """Build and enqueue an event, swallowing any error.
83
+
84
+ ``event_factory`` is a zero-arg callable returning a UsageEvent, so that any
85
+ failure in extraction is contained here and never reaches the caller.
86
+ """
87
+ try:
88
+ client.capture(event_factory())
89
+ except Exception:
90
+ logger.debug("tokenwise: capture skipped due to error", exc_info=True)
tokenwise/_version.py ADDED
@@ -0,0 +1,3 @@
1
+ """Single source of truth for the package version."""
2
+
3
+ __version__ = "0.1.1"
tokenwise/anthropic.py ADDED
@@ -0,0 +1,226 @@
1
+ """Drop-in wrappers for the official ``anthropic`` SDK.
2
+
3
+ ``from tokenwise import Anthropic`` exposes the exact same interface as
4
+ ``anthropic.Anthropic``; only ``messages.create`` is instrumented. Every other
5
+ attribute and method is delegated to the real client untouched, so any
6
+ parameter the official SDK adds passes straight through.
7
+
8
+ Streaming usage is captured transparently from the event stream
9
+ (``message_start`` carries input/cache tokens; ``message_delta`` carries the
10
+ final ``output_tokens``) — the caller's request is not modified.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import time
16
+ from typing import Any
17
+
18
+ from tokenwise import _capture as cap
19
+ from tokenwise.client import get_client
20
+ from tokenwise.config import resolve_config
21
+
22
+ _PROVIDER = "anthropic"
23
+ _ENDPOINT = "messages"
24
+
25
+
26
+ def _new_real(async_: bool, args: tuple, kwargs: dict):
27
+ import anthropic # imported lazily so `anthropic` is an optional dependency
28
+
29
+ cls = anthropic.AsyncAnthropic if async_ else anthropic.Anthropic
30
+ return cls(*args, **kwargs)
31
+
32
+
33
+ # ── streaming proxies ──────────────────────────────────────────────────────────
34
+
35
+ class _StreamAccumulator:
36
+ """Collects usage fields off Anthropic stream events. Reads no content."""
37
+
38
+ def __init__(self) -> None:
39
+ self.model = "unknown"
40
+ self.fields = {
41
+ "model": "unknown", "input_tokens": 0, "output_tokens": 0,
42
+ "cache_read_input_tokens": 0, "cache_creation_input_tokens": 0,
43
+ }
44
+
45
+ def observe(self, event: Any) -> None:
46
+ # message_start: event.message has model + initial usage (input/cache).
47
+ message = getattr(event, "message", None)
48
+ if message is not None:
49
+ model = getattr(message, "model", None)
50
+ usage = getattr(message, "usage", None)
51
+ if usage is not None:
52
+ merged = cap.anthropic_usage_fields(model or self.fields["model"], usage)
53
+ # output_tokens in message_start is partial; keep our running value.
54
+ merged["output_tokens"] = self.fields["output_tokens"]
55
+ self.fields = merged
56
+ # message_delta: event.usage carries the final cumulative output_tokens.
57
+ usage = getattr(event, "usage", None)
58
+ if usage is not None and hasattr(usage, "output_tokens"):
59
+ self.fields["output_tokens"] = cap._int(usage, "output_tokens")
60
+
61
+
62
+ class _SyncStreamProxy:
63
+ def __init__(self, stream: Any, tw, t0: float) -> None:
64
+ self._stream = stream
65
+ self._tw = tw
66
+ self._t0 = t0
67
+ self._acc = _StreamAccumulator()
68
+ self._done = False
69
+
70
+ def __getattr__(self, name: str) -> Any:
71
+ return getattr(self.__dict__["_stream"], name)
72
+
73
+ def __iter__(self):
74
+ try:
75
+ for event in self._stream:
76
+ self._acc.observe(event)
77
+ yield event
78
+ finally:
79
+ self._finish()
80
+
81
+ def __enter__(self):
82
+ self._stream.__enter__()
83
+ return self
84
+
85
+ def __exit__(self, *exc: Any):
86
+ try:
87
+ return self._stream.__exit__(*exc)
88
+ finally:
89
+ self._finish()
90
+
91
+ def _finish(self) -> None:
92
+ if self._done:
93
+ return
94
+ self._done = True
95
+ latency = int((time.perf_counter() - self._t0) * 1000)
96
+ cap.safe_capture(
97
+ self._tw,
98
+ lambda: cap.make_event(_PROVIDER, _ENDPOINT, self._acc.fields, latency, True),
99
+ )
100
+
101
+
102
+ class _AsyncStreamProxy:
103
+ def __init__(self, stream: Any, tw, t0: float) -> None:
104
+ self._stream = stream
105
+ self._tw = tw
106
+ self._t0 = t0
107
+ self._acc = _StreamAccumulator()
108
+ self._done = False
109
+
110
+ def __getattr__(self, name: str) -> Any:
111
+ return getattr(self.__dict__["_stream"], name)
112
+
113
+ async def __aiter__(self):
114
+ try:
115
+ async for event in self._stream:
116
+ self._acc.observe(event)
117
+ yield event
118
+ finally:
119
+ self._finish()
120
+
121
+ async def __aenter__(self):
122
+ await self._stream.__aenter__()
123
+ return self
124
+
125
+ async def __aexit__(self, *exc: Any):
126
+ try:
127
+ return await self._stream.__aexit__(*exc)
128
+ finally:
129
+ self._finish()
130
+
131
+ def _finish(self) -> None:
132
+ if self._done:
133
+ return
134
+ self._done = True
135
+ latency = int((time.perf_counter() - self._t0) * 1000)
136
+ cap.safe_capture(
137
+ self._tw,
138
+ lambda: cap.make_event(_PROVIDER, _ENDPOINT, self._acc.fields, latency, True),
139
+ )
140
+
141
+
142
+ # ── messages resource proxy ─────────────────────────────────────────────────────
143
+
144
+ class _Messages:
145
+ def __init__(self, real: Any, tw, async_: bool) -> None:
146
+ self._real = real
147
+ self._tw = tw
148
+ self._async = async_
149
+
150
+ def __getattr__(self, name: str) -> Any:
151
+ return getattr(self.__dict__["_real"], name)
152
+
153
+ def create(self, *args: Any, **kwargs: Any):
154
+ if self._async:
155
+ return self._acreate(*args, **kwargs)
156
+ t0 = time.perf_counter()
157
+ result = self._real.create(*args, **kwargs)
158
+ if kwargs.get("stream"):
159
+ return _SyncStreamProxy(result, self._tw, t0)
160
+ latency = int((time.perf_counter() - t0) * 1000)
161
+ cap.safe_capture(
162
+ self._tw,
163
+ lambda: cap.make_event(
164
+ _PROVIDER, _ENDPOINT,
165
+ cap.anthropic_usage_fields(getattr(result, "model", "unknown"),
166
+ getattr(result, "usage", None)),
167
+ latency, False,
168
+ ),
169
+ )
170
+ return result
171
+
172
+ async def _acreate(self, *args: Any, **kwargs: Any):
173
+ t0 = time.perf_counter()
174
+ result = await self._real.create(*args, **kwargs)
175
+ if kwargs.get("stream"):
176
+ return _AsyncStreamProxy(result, self._tw, t0)
177
+ latency = int((time.perf_counter() - t0) * 1000)
178
+ cap.safe_capture(
179
+ self._tw,
180
+ lambda: cap.make_event(
181
+ _PROVIDER, _ENDPOINT,
182
+ cap.anthropic_usage_fields(getattr(result, "model", "unknown"),
183
+ getattr(result, "usage", None)),
184
+ latency, False,
185
+ ),
186
+ )
187
+ return result
188
+
189
+
190
+ # ── top-level client wrappers ───────────────────────────────────────────────────
191
+
192
+ class _BaseAnthropic:
193
+ _ASYNC = False
194
+
195
+ def __init__(
196
+ self,
197
+ *args: Any,
198
+ tokenwise_key: str | None = None,
199
+ tokenwise_url: str | None = None,
200
+ **kwargs: Any,
201
+ ) -> None:
202
+ self._client = _new_real(self._ASYNC, args, kwargs)
203
+ self._tw = get_client(resolve_config(tokenwise_key, tokenwise_url))
204
+ self._messages = _Messages(self._client.messages, self._tw, self._ASYNC)
205
+
206
+ @property
207
+ def messages(self) -> _Messages:
208
+ return self._messages
209
+
210
+ def __getattr__(self, name: str) -> Any:
211
+ client = self.__dict__.get("_client")
212
+ if client is None:
213
+ raise AttributeError(name)
214
+ return getattr(client, name)
215
+
216
+
217
+ class Anthropic(_BaseAnthropic):
218
+ """Drop-in replacement for ``anthropic.Anthropic``."""
219
+
220
+ _ASYNC = False
221
+
222
+
223
+ class AsyncAnthropic(_BaseAnthropic):
224
+ """Drop-in replacement for ``anthropic.AsyncAnthropic``."""
225
+
226
+ _ASYNC = True
tokenwise/client.py ADDED
@@ -0,0 +1,167 @@
1
+ """Internal client that buffers usage events and ships them in the background.
2
+
3
+ Guarantees (all non-negotiable, see README):
4
+ * ``capture()`` never blocks the caller — it appends to an in-memory
5
+ ``deque`` under a short lock and returns immediately.
6
+ * If the Tokenwise API is slow or down, the caller's AI calls are unaffected;
7
+ sending happens only on a background daemon thread.
8
+ * The buffer is bounded at ``max_buffer`` events. ``deque(maxlen=...)`` drops
9
+ the OLDEST event silently when full — capture never raises, never waits.
10
+ * Failed sends are retried on the next flush; events are put back at the
11
+ front of the buffer (and may age out if the buffer keeps overflowing).
12
+
13
+ A process-wide registry returns one shared client per (api_key, api_url) so
14
+ that wrapping several SDK clients with the same key reuses one worker thread.
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import atexit
20
+ import logging
21
+ import threading
22
+ from collections import deque
23
+ from typing import TYPE_CHECKING
24
+
25
+ from tokenwise.config import Config
26
+
27
+ if TYPE_CHECKING:
28
+ from tokenwise.event import UsageEvent
29
+
30
+ logger = logging.getLogger("tokenwise")
31
+
32
+
33
+ class TokenwiseClient:
34
+ """Background, fail-silent shipper of :class:`UsageEvent` batches."""
35
+
36
+ def __init__(self, config: Config) -> None:
37
+ self._config = config
38
+ self._buffer: deque[UsageEvent] = deque(maxlen=config.max_buffer)
39
+ self._lock = threading.Lock()
40
+ self._wake = threading.Event()
41
+ self._stop = threading.Event()
42
+ self._thread: threading.Thread | None = None
43
+ self._http = None # lazily created httpx.Client on the worker thread
44
+ self._started = False
45
+
46
+ # ── public ────────────────────────────────────────────────────────────────
47
+
48
+ def capture(self, event: UsageEvent) -> None:
49
+ """Enqueue an event. Never blocks, never raises."""
50
+ if not self._config.enabled:
51
+ return
52
+ try:
53
+ self._ensure_started()
54
+ with self._lock:
55
+ self._buffer.append(event) # drops oldest if full (maxlen)
56
+ if len(self._buffer) >= self._config.batch_size:
57
+ self._wake.set()
58
+ except Exception: # capture must never surface an error to the caller
59
+ logger.debug("tokenwise: capture failed (ignored)", exc_info=True)
60
+
61
+ def flush(self, timeout: float = 2.0) -> None:
62
+ """Best-effort synchronous flush (used by tests and atexit)."""
63
+ if not self._config.enabled:
64
+ return
65
+ self._ensure_started()
66
+ self._wake.set()
67
+ deadline = threading.Event()
68
+ # Poll until buffer drains or timeout elapses.
69
+ waited = 0.0
70
+ step = 0.05
71
+ while waited < timeout:
72
+ with self._lock:
73
+ if not self._buffer:
74
+ return
75
+ deadline.wait(step)
76
+ waited += step
77
+
78
+ # ── lifecycle ───────────────────────────────────────────────────────────────
79
+
80
+ def _ensure_started(self) -> None:
81
+ if self._started:
82
+ return
83
+ with self._lock:
84
+ if self._started:
85
+ return
86
+ self._thread = threading.Thread(
87
+ target=self._run, name="tokenwise-worker", daemon=True
88
+ )
89
+ self._thread.start()
90
+ self._started = True
91
+ atexit.register(self._shutdown)
92
+
93
+ def _shutdown(self) -> None:
94
+ self._stop.set()
95
+ self._wake.set()
96
+ if self._thread is not None:
97
+ self._thread.join(timeout=self._config.http_timeout + 1.0)
98
+
99
+ # ── worker ──────────────────────────────────────────────────────────────────
100
+
101
+ def _run(self) -> None:
102
+ try:
103
+ import httpx
104
+
105
+ self._http = httpx.Client(timeout=self._config.http_timeout)
106
+ except Exception:
107
+ logger.debug("tokenwise: HTTP client unavailable; disabling sender",
108
+ exc_info=True)
109
+ return
110
+
111
+ while not self._stop.is_set():
112
+ self._wake.wait(self._config.flush_interval)
113
+ self._wake.clear()
114
+ self._drain_and_send()
115
+
116
+ # Final drain on shutdown.
117
+ self._drain_and_send()
118
+ try:
119
+ self._http.close()
120
+ except Exception:
121
+ pass
122
+
123
+ def _drain_and_send(self) -> None:
124
+ while True:
125
+ with self._lock:
126
+ if not self._buffer:
127
+ return
128
+ batch = []
129
+ for _ in range(min(self._config.batch_size, len(self._buffer))):
130
+ batch.append(self._buffer.popleft())
131
+ if not self._send(batch):
132
+ # Re-queue at the front, preserving order; may age out if full.
133
+ with self._lock:
134
+ self._buffer.extendleft(reversed(batch))
135
+ return # back off until next flush cycle
136
+
137
+ def _send(self, batch: list[UsageEvent]) -> bool:
138
+ """POST a batch. Returns True on 2xx, False otherwise. Never raises."""
139
+ if self._http is None:
140
+ return False
141
+ try:
142
+ resp = self._http.post(
143
+ f"{self._config.api_url}/api/ingest/events",
144
+ json={"events": [e.to_dict() for e in batch]},
145
+ headers={"Authorization": f"Bearer {self._config.api_key}"},
146
+ )
147
+ return 200 <= resp.status_code < 300
148
+ except Exception:
149
+ logger.debug("tokenwise: send failed (will retry)", exc_info=True)
150
+ return False
151
+
152
+
153
+ # ── process-wide registry ─────────────────────────────────────────────────────
154
+
155
+ _registry: dict[tuple[str | None, str], TokenwiseClient] = {}
156
+ _registry_lock = threading.Lock()
157
+
158
+
159
+ def get_client(config: Config) -> TokenwiseClient:
160
+ """Return a shared client for this (api_key, api_url), creating one if needed."""
161
+ key = (config.api_key, config.api_url)
162
+ with _registry_lock:
163
+ client = _registry.get(key)
164
+ if client is None:
165
+ client = TokenwiseClient(config)
166
+ _registry[key] = client
167
+ return client
tokenwise/config.py ADDED
@@ -0,0 +1,66 @@
1
+ """Configuration resolution for the Tokenwise SDK.
2
+
3
+ Precedence for every setting: explicit constructor argument > environment
4
+ variable > built-in default.
5
+
6
+ Environment variables:
7
+ TOKENWISE_API_KEY Tokenwise ingest key (``tw_...``). Required to send
8
+ events; if absent the SDK runs in disabled mode and
9
+ the wrapped AI calls work exactly as normal.
10
+ TOKENWISE_API_URL Base URL of the Tokenwise API
11
+ (default ``https://tokenwise-production-aa59.up.railway.app``).
12
+ TOKENWISE_DISABLED If set to a truthy value ("1", "true", "yes", "on"),
13
+ a global kill switch that disables all capture.
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import os
19
+ from dataclasses import dataclass
20
+
21
+ DEFAULT_API_URL = "https://tokenwise-production-aa59.up.railway.app"
22
+
23
+ # Tuning constants — conservative defaults that keep the SDK invisible to the
24
+ # host application. None of these ever block the caller's thread.
25
+ DEFAULT_MAX_BUFFER = 1_000 # events retained when the API is unreachable
26
+ DEFAULT_BATCH_SIZE = 50 # events per POST
27
+ DEFAULT_FLUSH_INTERVAL = 1.0 # seconds between background flush attempts
28
+ DEFAULT_HTTP_TIMEOUT = 5.0 # seconds; background only, never blocks caller
29
+
30
+ _TRUTHY = {"1", "true", "yes", "on"}
31
+
32
+
33
+ def _env_truthy(name: str) -> bool:
34
+ return os.environ.get(name, "").strip().lower() in _TRUTHY
35
+
36
+
37
+ @dataclass(frozen=True)
38
+ class Config:
39
+ """Resolved SDK configuration."""
40
+
41
+ api_key: str | None
42
+ api_url: str
43
+ disabled: bool
44
+ max_buffer: int = DEFAULT_MAX_BUFFER
45
+ batch_size: int = DEFAULT_BATCH_SIZE
46
+ flush_interval: float = DEFAULT_FLUSH_INTERVAL
47
+ http_timeout: float = DEFAULT_HTTP_TIMEOUT
48
+
49
+ @property
50
+ def enabled(self) -> bool:
51
+ """True only when capture should actually run."""
52
+ return not self.disabled and bool(self.api_key)
53
+
54
+
55
+ def resolve_config(
56
+ api_key: str | None = None,
57
+ api_url: str | None = None,
58
+ ) -> Config:
59
+ """Build a :class:`Config` from explicit args, env vars, then defaults."""
60
+ key = api_key or os.environ.get("TOKENWISE_API_KEY") or None
61
+ url = api_url or os.environ.get("TOKENWISE_API_URL") or DEFAULT_API_URL
62
+ return Config(
63
+ api_key=key,
64
+ api_url=url.rstrip("/"),
65
+ disabled=_env_truthy("TOKENWISE_DISABLED"),
66
+ )
tokenwise/event.py ADDED
@@ -0,0 +1,31 @@
1
+ """The usage event — the SDK's privacy boundary.
2
+
3
+ This dataclass is the ONLY thing the SDK ever transmits. It has fields for
4
+ token counts and timing metadata and **deliberately no field anywhere for
5
+ prompt text, response text, system prompts, tool definitions, or any other
6
+ user content**. Capture code constructs one of these from a response's usage
7
+ block; there is structurally nowhere to put content even by accident.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from dataclasses import asdict, dataclass
13
+
14
+
15
+ @dataclass(frozen=True)
16
+ class UsageEvent:
17
+ """Metadata-only record of a single AI API call."""
18
+
19
+ provider: str # "anthropic" | "openai"
20
+ model: str
21
+ input_tokens: int
22
+ output_tokens: int
23
+ cache_read_input_tokens: int
24
+ cache_creation_input_tokens: int
25
+ latency_ms: int
26
+ timestamp: str # ISO-8601 UTC, e.g. "2026-05-30T15:42:01Z"
27
+ endpoint: str # "messages" | "chat.completions"
28
+ streamed: bool # latency_ms is total stream duration when True
29
+
30
+ def to_dict(self) -> dict:
31
+ return asdict(self)
tokenwise/openai.py ADDED
@@ -0,0 +1,243 @@
1
+ """Drop-in wrappers for the official ``openai`` SDK.
2
+
3
+ ``from tokenwise import OpenAI`` exposes the exact same interface as
4
+ ``openai.OpenAI``; only ``chat.completions.create`` is instrumented. Everything
5
+ else delegates to the real client untouched.
6
+
7
+ Streaming nuance: OpenAI only returns ``usage`` on a streamed response when the
8
+ request carries ``stream_options={"include_usage": True}``. Per product
9
+ decision we inject that option **only when the caller did not supply their own
10
+ ``stream_options``**, and we parse the trailing usage-only chunk defensively
11
+ (its ``choices`` list is empty — we never index into it).
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import time
17
+ from typing import Any
18
+
19
+ from tokenwise import _capture as cap
20
+ from tokenwise.client import get_client
21
+ from tokenwise.config import resolve_config
22
+
23
+ _PROVIDER = "openai"
24
+ _ENDPOINT = "chat.completions"
25
+
26
+
27
+ def _new_real(async_: bool, args: tuple, kwargs: dict):
28
+ import openai # lazy import → openai is an optional dependency
29
+
30
+ cls = openai.AsyncOpenAI if async_ else openai.OpenAI
31
+ return cls(*args, **kwargs)
32
+
33
+
34
+ def _maybe_inject_usage(kwargs: dict) -> None:
35
+ """Add stream_options.include_usage only if the caller gave no stream_options."""
36
+ if kwargs.get("stream") and kwargs.get("stream_options") is None:
37
+ kwargs["stream_options"] = {"include_usage": True}
38
+
39
+
40
+ class _StreamAccumulator:
41
+ """Collects model + usage off chat-completion chunks. Reads no content."""
42
+
43
+ def __init__(self) -> None:
44
+ self.model = "unknown"
45
+ self.usage = None
46
+
47
+ def observe(self, chunk: Any) -> None:
48
+ model = getattr(chunk, "model", None)
49
+ if model:
50
+ self.model = model
51
+ usage = getattr(chunk, "usage", None) # None on all but the final chunk
52
+ if usage is not None:
53
+ self.usage = usage
54
+
55
+ def fields(self) -> dict:
56
+ if self.usage is None:
57
+ return {"model": self.model, "input_tokens": 0, "output_tokens": 0,
58
+ "cache_read_input_tokens": 0, "cache_creation_input_tokens": 0}
59
+ return cap.openai_usage_fields(self.model, self.usage)
60
+
61
+
62
+ class _SyncStreamProxy:
63
+ def __init__(self, stream: Any, tw, t0: float) -> None:
64
+ self._stream = stream
65
+ self._tw = tw
66
+ self._t0 = t0
67
+ self._acc = _StreamAccumulator()
68
+ self._done = False
69
+
70
+ def __getattr__(self, name: str) -> Any:
71
+ return getattr(self.__dict__["_stream"], name)
72
+
73
+ def __iter__(self):
74
+ try:
75
+ for chunk in self._stream:
76
+ self._acc.observe(chunk)
77
+ yield chunk
78
+ finally:
79
+ self._finish()
80
+
81
+ def __enter__(self):
82
+ self._stream.__enter__()
83
+ return self
84
+
85
+ def __exit__(self, *exc: Any):
86
+ try:
87
+ return self._stream.__exit__(*exc)
88
+ finally:
89
+ self._finish()
90
+
91
+ def _finish(self) -> None:
92
+ if self._done:
93
+ return
94
+ self._done = True
95
+ latency = int((time.perf_counter() - self._t0) * 1000)
96
+ cap.safe_capture(
97
+ self._tw,
98
+ lambda: cap.make_event(_PROVIDER, _ENDPOINT, self._acc.fields(), latency, True),
99
+ )
100
+
101
+
102
+ class _AsyncStreamProxy:
103
+ def __init__(self, stream: Any, tw, t0: float) -> None:
104
+ self._stream = stream
105
+ self._tw = tw
106
+ self._t0 = t0
107
+ self._acc = _StreamAccumulator()
108
+ self._done = False
109
+
110
+ def __getattr__(self, name: str) -> Any:
111
+ return getattr(self.__dict__["_stream"], name)
112
+
113
+ async def __aiter__(self):
114
+ try:
115
+ async for chunk in self._stream:
116
+ self._acc.observe(chunk)
117
+ yield chunk
118
+ finally:
119
+ self._finish()
120
+
121
+ async def __aenter__(self):
122
+ await self._stream.__aenter__()
123
+ return self
124
+
125
+ async def __aexit__(self, *exc: Any):
126
+ try:
127
+ return await self._stream.__aexit__(*exc)
128
+ finally:
129
+ self._finish()
130
+
131
+ def _finish(self) -> None:
132
+ if self._done:
133
+ return
134
+ self._done = True
135
+ latency = int((time.perf_counter() - self._t0) * 1000)
136
+ cap.safe_capture(
137
+ self._tw,
138
+ lambda: cap.make_event(_PROVIDER, _ENDPOINT, self._acc.fields(), latency, True),
139
+ )
140
+
141
+
142
+ # ── resource proxies: client.chat.completions.create ────────────────────────────
143
+
144
+ class _Completions:
145
+ def __init__(self, real: Any, tw, async_: bool) -> None:
146
+ self._real = real
147
+ self._tw = tw
148
+ self._async = async_
149
+
150
+ def __getattr__(self, name: str) -> Any:
151
+ return getattr(self.__dict__["_real"], name)
152
+
153
+ def create(self, *args: Any, **kwargs: Any):
154
+ if self._async:
155
+ return self._acreate(*args, **kwargs)
156
+ streaming = bool(kwargs.get("stream"))
157
+ if streaming:
158
+ _maybe_inject_usage(kwargs)
159
+ t0 = time.perf_counter()
160
+ result = self._real.create(*args, **kwargs)
161
+ if streaming:
162
+ return _SyncStreamProxy(result, self._tw, t0)
163
+ latency = int((time.perf_counter() - t0) * 1000)
164
+ cap.safe_capture(
165
+ self._tw,
166
+ lambda: cap.make_event(
167
+ _PROVIDER, _ENDPOINT,
168
+ cap.openai_usage_fields(getattr(result, "model", "unknown"),
169
+ getattr(result, "usage", None)),
170
+ latency, False,
171
+ ),
172
+ )
173
+ return result
174
+
175
+ async def _acreate(self, *args: Any, **kwargs: Any):
176
+ streaming = bool(kwargs.get("stream"))
177
+ if streaming:
178
+ _maybe_inject_usage(kwargs)
179
+ t0 = time.perf_counter()
180
+ result = await self._real.create(*args, **kwargs)
181
+ if streaming:
182
+ return _AsyncStreamProxy(result, self._tw, t0)
183
+ latency = int((time.perf_counter() - t0) * 1000)
184
+ cap.safe_capture(
185
+ self._tw,
186
+ lambda: cap.make_event(
187
+ _PROVIDER, _ENDPOINT,
188
+ cap.openai_usage_fields(getattr(result, "model", "unknown"),
189
+ getattr(result, "usage", None)),
190
+ latency, False,
191
+ ),
192
+ )
193
+ return result
194
+
195
+
196
+ class _Chat:
197
+ def __init__(self, real: Any, tw, async_: bool) -> None:
198
+ self._real = real
199
+ self._completions = _Completions(real.completions, tw, async_)
200
+
201
+ @property
202
+ def completions(self) -> _Completions:
203
+ return self._completions
204
+
205
+ def __getattr__(self, name: str) -> Any:
206
+ return getattr(self.__dict__["_real"], name)
207
+
208
+
209
+ class _BaseOpenAI:
210
+ _ASYNC = False
211
+
212
+ def __init__(
213
+ self,
214
+ *args: Any,
215
+ tokenwise_key: str | None = None,
216
+ tokenwise_url: str | None = None,
217
+ **kwargs: Any,
218
+ ) -> None:
219
+ self._client = _new_real(self._ASYNC, args, kwargs)
220
+ self._tw = get_client(resolve_config(tokenwise_key, tokenwise_url))
221
+ self._chat = _Chat(self._client.chat, self._tw, self._ASYNC)
222
+
223
+ @property
224
+ def chat(self) -> _Chat:
225
+ return self._chat
226
+
227
+ def __getattr__(self, name: str) -> Any:
228
+ client = self.__dict__.get("_client")
229
+ if client is None:
230
+ raise AttributeError(name)
231
+ return getattr(client, name)
232
+
233
+
234
+ class OpenAI(_BaseOpenAI):
235
+ """Drop-in replacement for ``openai.OpenAI``."""
236
+
237
+ _ASYNC = False
238
+
239
+
240
+ class AsyncOpenAI(_BaseOpenAI):
241
+ """Drop-in replacement for ``openai.AsyncOpenAI``."""
242
+
243
+ _ASYNC = True
@@ -0,0 +1,149 @@
1
+ Metadata-Version: 2.4
2
+ Name: tokenwise-sdk
3
+ Version: 0.1.1
4
+ Summary: Metadata-only usage tracking for Anthropic and OpenAI — swap one import line.
5
+ Project-URL: Homepage, https://tokenwise.io
6
+ Project-URL: Documentation, https://docs.tokenwise.io
7
+ Author: Tokenwise
8
+ License: MIT
9
+ Keywords: anthropic,cost,llm,observability,openai,tokens,usage
10
+ Classifier: Development Status :: 4 - Beta
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Programming Language :: Python :: 3.9
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Requires-Python: >=3.9
18
+ Requires-Dist: httpx>=0.23
19
+ Provides-Extra: anthropic
20
+ Requires-Dist: anthropic>=0.40; extra == 'anthropic'
21
+ Provides-Extra: dev
22
+ Requires-Dist: anthropic>=0.40; extra == 'dev'
23
+ Requires-Dist: openai>=1.40; extra == 'dev'
24
+ Requires-Dist: pytest>=8.0; extra == 'dev'
25
+ Provides-Extra: openai
26
+ Requires-Dist: openai>=1.40; extra == 'openai'
27
+ Description-Content-Type: text/markdown
28
+
29
+ # Tokenwise Python SDK
30
+
31
+ Metadata-only usage tracking for Anthropic and OpenAI. Swap **one import line**
32
+ and every API call's token counts and latency flow to your Tokenwise dashboard
33
+ — with **zero access to your prompts or responses**.
34
+
35
+ ```diff
36
+ - from anthropic import Anthropic
37
+ + from tokenwise import Anthropic
38
+ ```
39
+
40
+ Your code is otherwise unchanged: the wrapper exposes the identical interface,
41
+ forwards every call to the official SDK, and returns its response untouched.
42
+
43
+ ## Why it's safe
44
+
45
+ - **Metadata only.** The SDK reads exactly: `model`, `input_tokens`,
46
+ `output_tokens`, `cache_read_input_tokens`, `cache_creation_input_tokens`,
47
+ `latency_ms`, `timestamp`, `endpoint`. It never reads or transmits prompt
48
+ text, response text, system prompts, or tool definitions. (Contrast with
49
+ proxy-based tools, which see all your traffic.)
50
+ - **Non-blocking.** Events are queued and sent on a background daemon thread.
51
+ If Tokenwise is slow or down, your AI calls complete normally.
52
+ - **Fail-silent + bounded.** Up to 1,000 events buffer when offline; the oldest
53
+ drop silently if the buffer fills. Capture never raises, never waits.
54
+
55
+ ## Install
56
+
57
+ ```bash
58
+ pip install tokenwise-sdk[anthropic] # if you use Anthropic
59
+ pip install tokenwise-sdk[openai] # if you use OpenAI
60
+ pip install tokenwise-sdk[anthropic,openai]
61
+ ```
62
+
63
+ `anthropic` and `openai` are optional extras — install only what you use.
64
+
65
+ ## Configure
66
+
67
+ Set your Tokenwise key (from the dashboard, looks like `tw_...`):
68
+
69
+ ```bash
70
+ export TOKENWISE_API_KEY=tw_your_key
71
+ # optional:
72
+ export TOKENWISE_API_URL=https://tokenwise-production-aa59.up.railway.app # default
73
+ export TOKENWISE_DISABLED=true # emergency kill switch
74
+ ```
75
+
76
+ Precedence for every setting: constructor argument > environment variable >
77
+ default. If no key is configured the SDK runs disabled and your AI calls behave
78
+ exactly as the official SDK.
79
+
80
+ ## Usage
81
+
82
+ ```python
83
+ # Pattern 1 — key from environment
84
+ import os
85
+ os.environ["TOKENWISE_API_KEY"] = "tw_abc123"
86
+ from tokenwise import Anthropic
87
+ client = Anthropic(api_key="sk-ant-...")
88
+ msg = client.messages.create(
89
+ model="claude-sonnet-4-6",
90
+ max_tokens=256,
91
+ messages=[{"role": "user", "content": "Hello"}],
92
+ )
93
+
94
+ # Pattern 2 — key passed explicitly
95
+ from tokenwise import Anthropic
96
+ client = Anthropic(api_key="sk-ant-...", tokenwise_key="tw_abc123")
97
+
98
+ # Pattern 3 — OpenAI
99
+ from tokenwise import OpenAI
100
+ client = OpenAI(api_key="sk-...", tokenwise_key="tw_abc123")
101
+ client.chat.completions.create(
102
+ model="gpt-5.4",
103
+ messages=[{"role": "user", "content": "Hello"}],
104
+ )
105
+ ```
106
+
107
+ Streaming and async work the same way:
108
+
109
+ ```python
110
+ # Streaming (sync) — usage captured on stream completion
111
+ with client.messages.create(..., stream=True) as stream:
112
+ for event in stream:
113
+ ...
114
+
115
+ # Async
116
+ from tokenwise import AsyncAnthropic
117
+ client = AsyncAnthropic(api_key="sk-ant-...", tokenwise_key="tw_abc123")
118
+ msg = await client.messages.create(...)
119
+ ```
120
+
121
+ ## What's instrumented (v1)
122
+
123
+ | Provider | Method | Streaming |
124
+ |----------|--------|-----------|
125
+ | Anthropic | `messages.create` | ✅ usage read from the event stream (request unchanged) |
126
+ | OpenAI | `chat.completions.create` | ✅ see note below |
127
+
128
+ Other methods pass through and work, but aren't yet recorded. (OpenAI Responses
129
+ API and legacy completions are planned.)
130
+
131
+ ### Note on OpenAI streaming
132
+
133
+ OpenAI only returns token usage on a streamed response when the request includes
134
+ `stream_options={"include_usage": True}`. When you stream **without** supplying
135
+ your own `stream_options`, Tokenwise injects it for you so usage can be captured.
136
+ This adds one final usage-only chunk (with an empty `choices` list) to the
137
+ stream. If you already pass `stream_options`, Tokenwise respects yours and does
138
+ not modify the request (in that case usage is captured only if you enabled it).
139
+
140
+ ### Latency semantics
141
+
142
+ For non-streaming calls, `latency_ms` is the wall-clock time of the call. For
143
+ streaming calls it is the **total stream duration** (until the last chunk is
144
+ consumed), which includes time your code spends between chunks — events from
145
+ streaming calls carry `streamed: true` so this is distinguishable.
146
+
147
+ ## License
148
+
149
+ MIT
@@ -0,0 +1,11 @@
1
+ tokenwise/__init__.py,sha256=6mqEA1o9s3Tx8vautqX1d7psARfzjzt10CxAFqqbU9I,2234
2
+ tokenwise/_capture.py,sha256=GFYUqeKSjl2fIQgvVPR7OinO3xkpvTDlGK0w0uI3e38,3073
3
+ tokenwise/_version.py,sha256=1fcON3TaH_saTfQzv-QWlGmS61oADXGsc-A7KNbVS4Y,80
4
+ tokenwise/anthropic.py,sha256=Grjf5RhB8wKnWWYMP-FIKAkLGJ2sxA_PjvJyJyskIS8,7811
5
+ tokenwise/client.py,sha256=Hi4PLZFzSFP4p0pWZMaFljgYC1295Xp7GANU1kkF0m4,6719
6
+ tokenwise/config.py,sha256=m-tXSxpvZC_SQrGVdYG4VHXIxQ99S7QtbsrWYTydtM4,2397
7
+ tokenwise/event.py,sha256=82PSQFb63qqBd8h8cTNBj2bl8v0tSUaqgNgulW0v8wA,1111
8
+ tokenwise/openai.py,sha256=L_D7G9fdyl140gSqjpY8FBpaTHJt9wkt3BXKyAcxv60,7725
9
+ tokenwise_sdk-0.1.1.dist-info/METADATA,sha256=_m8sVPdOrSxT0PhlR8dzUQdWMW18F8kuEBLvLJiLJto,5248
10
+ tokenwise_sdk-0.1.1.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
11
+ tokenwise_sdk-0.1.1.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any