tokenwise-sdk 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tokenwise/__init__.py +55 -0
- tokenwise/_capture.py +90 -0
- tokenwise/_version.py +3 -0
- tokenwise/anthropic.py +226 -0
- tokenwise/client.py +167 -0
- tokenwise/config.py +66 -0
- tokenwise/event.py +31 -0
- tokenwise/openai.py +243 -0
- tokenwise_sdk-0.1.1.dist-info/METADATA +149 -0
- tokenwise_sdk-0.1.1.dist-info/RECORD +11 -0
- tokenwise_sdk-0.1.1.dist-info/WHEEL +4 -0
tokenwise/__init__.py
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
"""Tokenwise SDK — metadata-only usage tracking for Anthropic and OpenAI.
|
|
2
|
+
|
|
3
|
+
Swap one import line to start capturing per-call token/latency metadata:
|
|
4
|
+
|
|
5
|
+
from tokenwise import Anthropic # instead of: from anthropic import Anthropic
|
|
6
|
+
client = Anthropic(api_key="sk-ant-...")
|
|
7
|
+
|
|
8
|
+
The wrappers expose the identical interface to the official SDKs and forward
|
|
9
|
+
every call untouched. After each response, metadata only (model, token counts,
|
|
10
|
+
latency) is shipped to Tokenwise on a background thread — never any prompt or
|
|
11
|
+
response content. If Tokenwise is unreachable the SDK fails silently and your
|
|
12
|
+
AI calls are unaffected.
|
|
13
|
+
|
|
14
|
+
Configuration (env or constructor kwargs ``tokenwise_key`` / ``tokenwise_url``):
|
|
15
|
+
TOKENWISE_API_KEY, TOKENWISE_API_URL, TOKENWISE_DISABLED
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
from typing import TYPE_CHECKING
|
|
21
|
+
|
|
22
|
+
from tokenwise._version import __version__
|
|
23
|
+
|
|
24
|
+
# Provider wrappers are resolved lazily so that `anthropic` and `openai` are
|
|
25
|
+
# OPTIONAL dependencies: importing tokenwise never requires both to be present.
|
|
26
|
+
_LAZY = {
|
|
27
|
+
"Anthropic": ("tokenwise.anthropic", "Anthropic"),
|
|
28
|
+
"AsyncAnthropic": ("tokenwise.anthropic", "AsyncAnthropic"),
|
|
29
|
+
"OpenAI": ("tokenwise.openai", "OpenAI"),
|
|
30
|
+
"AsyncOpenAI": ("tokenwise.openai", "AsyncOpenAI"),
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
__all__ = ["Anthropic", "AsyncAnthropic", "OpenAI", "AsyncOpenAI", "__version__"]
|
|
34
|
+
|
|
35
|
+
if TYPE_CHECKING: # for type checkers / IDEs only
|
|
36
|
+
from tokenwise.anthropic import Anthropic, AsyncAnthropic
|
|
37
|
+
from tokenwise.openai import AsyncOpenAI, OpenAI
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def __getattr__(name: str):
|
|
41
|
+
target = _LAZY.get(name)
|
|
42
|
+
if target is None:
|
|
43
|
+
raise AttributeError(f"module 'tokenwise' has no attribute {name!r}")
|
|
44
|
+
module_name, attr = target
|
|
45
|
+
import importlib
|
|
46
|
+
|
|
47
|
+
try:
|
|
48
|
+
module = importlib.import_module(module_name)
|
|
49
|
+
except ImportError as exc: # the official provider SDK isn't installed
|
|
50
|
+
provider = "anthropic" if "anthropic" in module_name else "openai"
|
|
51
|
+
raise ImportError(
|
|
52
|
+
f"Using tokenwise.{name} requires the '{provider}' package. "
|
|
53
|
+
f"Install it with: pip install tokenwise-sdk[{provider}]"
|
|
54
|
+
) from exc
|
|
55
|
+
return getattr(module, attr)
|
tokenwise/_capture.py
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
"""Metadata extraction helpers shared by the provider wrappers.
|
|
2
|
+
|
|
3
|
+
Everything here reads ONLY token-usage and model fields off a response object.
|
|
4
|
+
No function in this module reads message content, choices text, deltas' text,
|
|
5
|
+
system prompts, or tool definitions.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import logging
|
|
11
|
+
from datetime import datetime, timezone
|
|
12
|
+
from typing import Any
|
|
13
|
+
|
|
14
|
+
from tokenwise.client import TokenwiseClient
|
|
15
|
+
from tokenwise.event import UsageEvent
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger("tokenwise")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def now_iso() -> str:
|
|
21
|
+
"""Current UTC time as ISO-8601 with a trailing Z."""
|
|
22
|
+
return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _int(obj: Any, name: str, default: int = 0) -> int:
|
|
26
|
+
value = getattr(obj, name, None)
|
|
27
|
+
try:
|
|
28
|
+
return int(value) if value is not None else default
|
|
29
|
+
except (TypeError, ValueError):
|
|
30
|
+
return default
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def anthropic_usage_fields(model: str, usage: Any) -> dict:
|
|
34
|
+
"""Pull metadata from an Anthropic ``Usage``/``MessageDeltaUsage`` object."""
|
|
35
|
+
return {
|
|
36
|
+
"model": model,
|
|
37
|
+
"input_tokens": _int(usage, "input_tokens"),
|
|
38
|
+
"output_tokens": _int(usage, "output_tokens"),
|
|
39
|
+
"cache_read_input_tokens": _int(usage, "cache_read_input_tokens"),
|
|
40
|
+
"cache_creation_input_tokens": _int(usage, "cache_creation_input_tokens"),
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def openai_usage_fields(model: str, usage: Any) -> dict:
|
|
45
|
+
"""Pull metadata from an OpenAI ``CompletionUsage`` object."""
|
|
46
|
+
cached = 0
|
|
47
|
+
details = getattr(usage, "prompt_tokens_details", None)
|
|
48
|
+
if details is not None:
|
|
49
|
+
cached = _int(details, "cached_tokens")
|
|
50
|
+
return {
|
|
51
|
+
"model": model,
|
|
52
|
+
"input_tokens": _int(usage, "prompt_tokens"),
|
|
53
|
+
"output_tokens": _int(usage, "completion_tokens"),
|
|
54
|
+
"cache_read_input_tokens": cached,
|
|
55
|
+
# OpenAI has no cache-creation concept; always 0.
|
|
56
|
+
"cache_creation_input_tokens": 0,
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def make_event(
|
|
61
|
+
provider: str,
|
|
62
|
+
endpoint: str,
|
|
63
|
+
fields: dict,
|
|
64
|
+
latency_ms: int,
|
|
65
|
+
streamed: bool,
|
|
66
|
+
) -> UsageEvent:
|
|
67
|
+
return UsageEvent(
|
|
68
|
+
provider=provider,
|
|
69
|
+
endpoint=endpoint,
|
|
70
|
+
model=str(fields.get("model") or "unknown"),
|
|
71
|
+
input_tokens=int(fields.get("input_tokens", 0)),
|
|
72
|
+
output_tokens=int(fields.get("output_tokens", 0)),
|
|
73
|
+
cache_read_input_tokens=int(fields.get("cache_read_input_tokens", 0)),
|
|
74
|
+
cache_creation_input_tokens=int(fields.get("cache_creation_input_tokens", 0)),
|
|
75
|
+
latency_ms=latency_ms,
|
|
76
|
+
timestamp=now_iso(),
|
|
77
|
+
streamed=streamed,
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def safe_capture(client: TokenwiseClient, event_factory) -> None:
|
|
82
|
+
"""Build and enqueue an event, swallowing any error.
|
|
83
|
+
|
|
84
|
+
``event_factory`` is a zero-arg callable returning a UsageEvent, so that any
|
|
85
|
+
failure in extraction is contained here and never reaches the caller.
|
|
86
|
+
"""
|
|
87
|
+
try:
|
|
88
|
+
client.capture(event_factory())
|
|
89
|
+
except Exception:
|
|
90
|
+
logger.debug("tokenwise: capture skipped due to error", exc_info=True)
|
tokenwise/_version.py
ADDED
tokenwise/anthropic.py
ADDED
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
"""Drop-in wrappers for the official ``anthropic`` SDK.
|
|
2
|
+
|
|
3
|
+
``from tokenwise import Anthropic`` exposes the exact same interface as
|
|
4
|
+
``anthropic.Anthropic``; only ``messages.create`` is instrumented. Every other
|
|
5
|
+
attribute and method is delegated to the real client untouched, so any
|
|
6
|
+
parameter the official SDK adds passes straight through.
|
|
7
|
+
|
|
8
|
+
Streaming usage is captured transparently from the event stream
|
|
9
|
+
(``message_start`` carries input/cache tokens; ``message_delta`` carries the
|
|
10
|
+
final ``output_tokens``) — the caller's request is not modified.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import time
|
|
16
|
+
from typing import Any
|
|
17
|
+
|
|
18
|
+
from tokenwise import _capture as cap
|
|
19
|
+
from tokenwise.client import get_client
|
|
20
|
+
from tokenwise.config import resolve_config
|
|
21
|
+
|
|
22
|
+
_PROVIDER = "anthropic"
|
|
23
|
+
_ENDPOINT = "messages"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _new_real(async_: bool, args: tuple, kwargs: dict):
|
|
27
|
+
import anthropic # imported lazily so `anthropic` is an optional dependency
|
|
28
|
+
|
|
29
|
+
cls = anthropic.AsyncAnthropic if async_ else anthropic.Anthropic
|
|
30
|
+
return cls(*args, **kwargs)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
# ── streaming proxies ──────────────────────────────────────────────────────────
|
|
34
|
+
|
|
35
|
+
class _StreamAccumulator:
|
|
36
|
+
"""Collects usage fields off Anthropic stream events. Reads no content."""
|
|
37
|
+
|
|
38
|
+
def __init__(self) -> None:
|
|
39
|
+
self.model = "unknown"
|
|
40
|
+
self.fields = {
|
|
41
|
+
"model": "unknown", "input_tokens": 0, "output_tokens": 0,
|
|
42
|
+
"cache_read_input_tokens": 0, "cache_creation_input_tokens": 0,
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
def observe(self, event: Any) -> None:
|
|
46
|
+
# message_start: event.message has model + initial usage (input/cache).
|
|
47
|
+
message = getattr(event, "message", None)
|
|
48
|
+
if message is not None:
|
|
49
|
+
model = getattr(message, "model", None)
|
|
50
|
+
usage = getattr(message, "usage", None)
|
|
51
|
+
if usage is not None:
|
|
52
|
+
merged = cap.anthropic_usage_fields(model or self.fields["model"], usage)
|
|
53
|
+
# output_tokens in message_start is partial; keep our running value.
|
|
54
|
+
merged["output_tokens"] = self.fields["output_tokens"]
|
|
55
|
+
self.fields = merged
|
|
56
|
+
# message_delta: event.usage carries the final cumulative output_tokens.
|
|
57
|
+
usage = getattr(event, "usage", None)
|
|
58
|
+
if usage is not None and hasattr(usage, "output_tokens"):
|
|
59
|
+
self.fields["output_tokens"] = cap._int(usage, "output_tokens")
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class _SyncStreamProxy:
|
|
63
|
+
def __init__(self, stream: Any, tw, t0: float) -> None:
|
|
64
|
+
self._stream = stream
|
|
65
|
+
self._tw = tw
|
|
66
|
+
self._t0 = t0
|
|
67
|
+
self._acc = _StreamAccumulator()
|
|
68
|
+
self._done = False
|
|
69
|
+
|
|
70
|
+
def __getattr__(self, name: str) -> Any:
|
|
71
|
+
return getattr(self.__dict__["_stream"], name)
|
|
72
|
+
|
|
73
|
+
def __iter__(self):
|
|
74
|
+
try:
|
|
75
|
+
for event in self._stream:
|
|
76
|
+
self._acc.observe(event)
|
|
77
|
+
yield event
|
|
78
|
+
finally:
|
|
79
|
+
self._finish()
|
|
80
|
+
|
|
81
|
+
def __enter__(self):
|
|
82
|
+
self._stream.__enter__()
|
|
83
|
+
return self
|
|
84
|
+
|
|
85
|
+
def __exit__(self, *exc: Any):
|
|
86
|
+
try:
|
|
87
|
+
return self._stream.__exit__(*exc)
|
|
88
|
+
finally:
|
|
89
|
+
self._finish()
|
|
90
|
+
|
|
91
|
+
def _finish(self) -> None:
|
|
92
|
+
if self._done:
|
|
93
|
+
return
|
|
94
|
+
self._done = True
|
|
95
|
+
latency = int((time.perf_counter() - self._t0) * 1000)
|
|
96
|
+
cap.safe_capture(
|
|
97
|
+
self._tw,
|
|
98
|
+
lambda: cap.make_event(_PROVIDER, _ENDPOINT, self._acc.fields, latency, True),
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
class _AsyncStreamProxy:
|
|
103
|
+
def __init__(self, stream: Any, tw, t0: float) -> None:
|
|
104
|
+
self._stream = stream
|
|
105
|
+
self._tw = tw
|
|
106
|
+
self._t0 = t0
|
|
107
|
+
self._acc = _StreamAccumulator()
|
|
108
|
+
self._done = False
|
|
109
|
+
|
|
110
|
+
def __getattr__(self, name: str) -> Any:
|
|
111
|
+
return getattr(self.__dict__["_stream"], name)
|
|
112
|
+
|
|
113
|
+
async def __aiter__(self):
|
|
114
|
+
try:
|
|
115
|
+
async for event in self._stream:
|
|
116
|
+
self._acc.observe(event)
|
|
117
|
+
yield event
|
|
118
|
+
finally:
|
|
119
|
+
self._finish()
|
|
120
|
+
|
|
121
|
+
async def __aenter__(self):
|
|
122
|
+
await self._stream.__aenter__()
|
|
123
|
+
return self
|
|
124
|
+
|
|
125
|
+
async def __aexit__(self, *exc: Any):
|
|
126
|
+
try:
|
|
127
|
+
return await self._stream.__aexit__(*exc)
|
|
128
|
+
finally:
|
|
129
|
+
self._finish()
|
|
130
|
+
|
|
131
|
+
def _finish(self) -> None:
|
|
132
|
+
if self._done:
|
|
133
|
+
return
|
|
134
|
+
self._done = True
|
|
135
|
+
latency = int((time.perf_counter() - self._t0) * 1000)
|
|
136
|
+
cap.safe_capture(
|
|
137
|
+
self._tw,
|
|
138
|
+
lambda: cap.make_event(_PROVIDER, _ENDPOINT, self._acc.fields, latency, True),
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
# ── messages resource proxy ─────────────────────────────────────────────────────
|
|
143
|
+
|
|
144
|
+
class _Messages:
|
|
145
|
+
def __init__(self, real: Any, tw, async_: bool) -> None:
|
|
146
|
+
self._real = real
|
|
147
|
+
self._tw = tw
|
|
148
|
+
self._async = async_
|
|
149
|
+
|
|
150
|
+
def __getattr__(self, name: str) -> Any:
|
|
151
|
+
return getattr(self.__dict__["_real"], name)
|
|
152
|
+
|
|
153
|
+
def create(self, *args: Any, **kwargs: Any):
|
|
154
|
+
if self._async:
|
|
155
|
+
return self._acreate(*args, **kwargs)
|
|
156
|
+
t0 = time.perf_counter()
|
|
157
|
+
result = self._real.create(*args, **kwargs)
|
|
158
|
+
if kwargs.get("stream"):
|
|
159
|
+
return _SyncStreamProxy(result, self._tw, t0)
|
|
160
|
+
latency = int((time.perf_counter() - t0) * 1000)
|
|
161
|
+
cap.safe_capture(
|
|
162
|
+
self._tw,
|
|
163
|
+
lambda: cap.make_event(
|
|
164
|
+
_PROVIDER, _ENDPOINT,
|
|
165
|
+
cap.anthropic_usage_fields(getattr(result, "model", "unknown"),
|
|
166
|
+
getattr(result, "usage", None)),
|
|
167
|
+
latency, False,
|
|
168
|
+
),
|
|
169
|
+
)
|
|
170
|
+
return result
|
|
171
|
+
|
|
172
|
+
async def _acreate(self, *args: Any, **kwargs: Any):
|
|
173
|
+
t0 = time.perf_counter()
|
|
174
|
+
result = await self._real.create(*args, **kwargs)
|
|
175
|
+
if kwargs.get("stream"):
|
|
176
|
+
return _AsyncStreamProxy(result, self._tw, t0)
|
|
177
|
+
latency = int((time.perf_counter() - t0) * 1000)
|
|
178
|
+
cap.safe_capture(
|
|
179
|
+
self._tw,
|
|
180
|
+
lambda: cap.make_event(
|
|
181
|
+
_PROVIDER, _ENDPOINT,
|
|
182
|
+
cap.anthropic_usage_fields(getattr(result, "model", "unknown"),
|
|
183
|
+
getattr(result, "usage", None)),
|
|
184
|
+
latency, False,
|
|
185
|
+
),
|
|
186
|
+
)
|
|
187
|
+
return result
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
# ── top-level client wrappers ───────────────────────────────────────────────────
|
|
191
|
+
|
|
192
|
+
class _BaseAnthropic:
|
|
193
|
+
_ASYNC = False
|
|
194
|
+
|
|
195
|
+
def __init__(
|
|
196
|
+
self,
|
|
197
|
+
*args: Any,
|
|
198
|
+
tokenwise_key: str | None = None,
|
|
199
|
+
tokenwise_url: str | None = None,
|
|
200
|
+
**kwargs: Any,
|
|
201
|
+
) -> None:
|
|
202
|
+
self._client = _new_real(self._ASYNC, args, kwargs)
|
|
203
|
+
self._tw = get_client(resolve_config(tokenwise_key, tokenwise_url))
|
|
204
|
+
self._messages = _Messages(self._client.messages, self._tw, self._ASYNC)
|
|
205
|
+
|
|
206
|
+
@property
|
|
207
|
+
def messages(self) -> _Messages:
|
|
208
|
+
return self._messages
|
|
209
|
+
|
|
210
|
+
def __getattr__(self, name: str) -> Any:
|
|
211
|
+
client = self.__dict__.get("_client")
|
|
212
|
+
if client is None:
|
|
213
|
+
raise AttributeError(name)
|
|
214
|
+
return getattr(client, name)
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
class Anthropic(_BaseAnthropic):
|
|
218
|
+
"""Drop-in replacement for ``anthropic.Anthropic``."""
|
|
219
|
+
|
|
220
|
+
_ASYNC = False
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
class AsyncAnthropic(_BaseAnthropic):
|
|
224
|
+
"""Drop-in replacement for ``anthropic.AsyncAnthropic``."""
|
|
225
|
+
|
|
226
|
+
_ASYNC = True
|
tokenwise/client.py
ADDED
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
"""Internal client that buffers usage events and ships them in the background.
|
|
2
|
+
|
|
3
|
+
Guarantees (all non-negotiable, see README):
|
|
4
|
+
* ``capture()`` never blocks the caller — it appends to an in-memory
|
|
5
|
+
``deque`` under a short lock and returns immediately.
|
|
6
|
+
* If the Tokenwise API is slow or down, the caller's AI calls are unaffected;
|
|
7
|
+
sending happens only on a background daemon thread.
|
|
8
|
+
* The buffer is bounded at ``max_buffer`` events. ``deque(maxlen=...)`` drops
|
|
9
|
+
the OLDEST event silently when full — capture never raises, never waits.
|
|
10
|
+
* Failed sends are retried on the next flush; events are put back at the
|
|
11
|
+
front of the buffer (and may age out if the buffer keeps overflowing).
|
|
12
|
+
|
|
13
|
+
A process-wide registry returns one shared client per (api_key, api_url) so
|
|
14
|
+
that wrapping several SDK clients with the same key reuses one worker thread.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import atexit
|
|
20
|
+
import logging
|
|
21
|
+
import threading
|
|
22
|
+
from collections import deque
|
|
23
|
+
from typing import TYPE_CHECKING
|
|
24
|
+
|
|
25
|
+
from tokenwise.config import Config
|
|
26
|
+
|
|
27
|
+
if TYPE_CHECKING:
|
|
28
|
+
from tokenwise.event import UsageEvent
|
|
29
|
+
|
|
30
|
+
logger = logging.getLogger("tokenwise")
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class TokenwiseClient:
|
|
34
|
+
"""Background, fail-silent shipper of :class:`UsageEvent` batches."""
|
|
35
|
+
|
|
36
|
+
def __init__(self, config: Config) -> None:
|
|
37
|
+
self._config = config
|
|
38
|
+
self._buffer: deque[UsageEvent] = deque(maxlen=config.max_buffer)
|
|
39
|
+
self._lock = threading.Lock()
|
|
40
|
+
self._wake = threading.Event()
|
|
41
|
+
self._stop = threading.Event()
|
|
42
|
+
self._thread: threading.Thread | None = None
|
|
43
|
+
self._http = None # lazily created httpx.Client on the worker thread
|
|
44
|
+
self._started = False
|
|
45
|
+
|
|
46
|
+
# ── public ────────────────────────────────────────────────────────────────
|
|
47
|
+
|
|
48
|
+
def capture(self, event: UsageEvent) -> None:
|
|
49
|
+
"""Enqueue an event. Never blocks, never raises."""
|
|
50
|
+
if not self._config.enabled:
|
|
51
|
+
return
|
|
52
|
+
try:
|
|
53
|
+
self._ensure_started()
|
|
54
|
+
with self._lock:
|
|
55
|
+
self._buffer.append(event) # drops oldest if full (maxlen)
|
|
56
|
+
if len(self._buffer) >= self._config.batch_size:
|
|
57
|
+
self._wake.set()
|
|
58
|
+
except Exception: # capture must never surface an error to the caller
|
|
59
|
+
logger.debug("tokenwise: capture failed (ignored)", exc_info=True)
|
|
60
|
+
|
|
61
|
+
def flush(self, timeout: float = 2.0) -> None:
|
|
62
|
+
"""Best-effort synchronous flush (used by tests and atexit)."""
|
|
63
|
+
if not self._config.enabled:
|
|
64
|
+
return
|
|
65
|
+
self._ensure_started()
|
|
66
|
+
self._wake.set()
|
|
67
|
+
deadline = threading.Event()
|
|
68
|
+
# Poll until buffer drains or timeout elapses.
|
|
69
|
+
waited = 0.0
|
|
70
|
+
step = 0.05
|
|
71
|
+
while waited < timeout:
|
|
72
|
+
with self._lock:
|
|
73
|
+
if not self._buffer:
|
|
74
|
+
return
|
|
75
|
+
deadline.wait(step)
|
|
76
|
+
waited += step
|
|
77
|
+
|
|
78
|
+
# ── lifecycle ───────────────────────────────────────────────────────────────
|
|
79
|
+
|
|
80
|
+
def _ensure_started(self) -> None:
|
|
81
|
+
if self._started:
|
|
82
|
+
return
|
|
83
|
+
with self._lock:
|
|
84
|
+
if self._started:
|
|
85
|
+
return
|
|
86
|
+
self._thread = threading.Thread(
|
|
87
|
+
target=self._run, name="tokenwise-worker", daemon=True
|
|
88
|
+
)
|
|
89
|
+
self._thread.start()
|
|
90
|
+
self._started = True
|
|
91
|
+
atexit.register(self._shutdown)
|
|
92
|
+
|
|
93
|
+
def _shutdown(self) -> None:
|
|
94
|
+
self._stop.set()
|
|
95
|
+
self._wake.set()
|
|
96
|
+
if self._thread is not None:
|
|
97
|
+
self._thread.join(timeout=self._config.http_timeout + 1.0)
|
|
98
|
+
|
|
99
|
+
# ── worker ──────────────────────────────────────────────────────────────────
|
|
100
|
+
|
|
101
|
+
def _run(self) -> None:
|
|
102
|
+
try:
|
|
103
|
+
import httpx
|
|
104
|
+
|
|
105
|
+
self._http = httpx.Client(timeout=self._config.http_timeout)
|
|
106
|
+
except Exception:
|
|
107
|
+
logger.debug("tokenwise: HTTP client unavailable; disabling sender",
|
|
108
|
+
exc_info=True)
|
|
109
|
+
return
|
|
110
|
+
|
|
111
|
+
while not self._stop.is_set():
|
|
112
|
+
self._wake.wait(self._config.flush_interval)
|
|
113
|
+
self._wake.clear()
|
|
114
|
+
self._drain_and_send()
|
|
115
|
+
|
|
116
|
+
# Final drain on shutdown.
|
|
117
|
+
self._drain_and_send()
|
|
118
|
+
try:
|
|
119
|
+
self._http.close()
|
|
120
|
+
except Exception:
|
|
121
|
+
pass
|
|
122
|
+
|
|
123
|
+
def _drain_and_send(self) -> None:
|
|
124
|
+
while True:
|
|
125
|
+
with self._lock:
|
|
126
|
+
if not self._buffer:
|
|
127
|
+
return
|
|
128
|
+
batch = []
|
|
129
|
+
for _ in range(min(self._config.batch_size, len(self._buffer))):
|
|
130
|
+
batch.append(self._buffer.popleft())
|
|
131
|
+
if not self._send(batch):
|
|
132
|
+
# Re-queue at the front, preserving order; may age out if full.
|
|
133
|
+
with self._lock:
|
|
134
|
+
self._buffer.extendleft(reversed(batch))
|
|
135
|
+
return # back off until next flush cycle
|
|
136
|
+
|
|
137
|
+
def _send(self, batch: list[UsageEvent]) -> bool:
|
|
138
|
+
"""POST a batch. Returns True on 2xx, False otherwise. Never raises."""
|
|
139
|
+
if self._http is None:
|
|
140
|
+
return False
|
|
141
|
+
try:
|
|
142
|
+
resp = self._http.post(
|
|
143
|
+
f"{self._config.api_url}/api/ingest/events",
|
|
144
|
+
json={"events": [e.to_dict() for e in batch]},
|
|
145
|
+
headers={"Authorization": f"Bearer {self._config.api_key}"},
|
|
146
|
+
)
|
|
147
|
+
return 200 <= resp.status_code < 300
|
|
148
|
+
except Exception:
|
|
149
|
+
logger.debug("tokenwise: send failed (will retry)", exc_info=True)
|
|
150
|
+
return False
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
# ── process-wide registry ─────────────────────────────────────────────────────
|
|
154
|
+
|
|
155
|
+
_registry: dict[tuple[str | None, str], TokenwiseClient] = {}
|
|
156
|
+
_registry_lock = threading.Lock()
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def get_client(config: Config) -> TokenwiseClient:
|
|
160
|
+
"""Return a shared client for this (api_key, api_url), creating one if needed."""
|
|
161
|
+
key = (config.api_key, config.api_url)
|
|
162
|
+
with _registry_lock:
|
|
163
|
+
client = _registry.get(key)
|
|
164
|
+
if client is None:
|
|
165
|
+
client = TokenwiseClient(config)
|
|
166
|
+
_registry[key] = client
|
|
167
|
+
return client
|
tokenwise/config.py
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
"""Configuration resolution for the Tokenwise SDK.
|
|
2
|
+
|
|
3
|
+
Precedence for every setting: explicit constructor argument > environment
|
|
4
|
+
variable > built-in default.
|
|
5
|
+
|
|
6
|
+
Environment variables:
|
|
7
|
+
TOKENWISE_API_KEY Tokenwise ingest key (``tw_...``). Required to send
|
|
8
|
+
events; if absent the SDK runs in disabled mode and
|
|
9
|
+
the wrapped AI calls work exactly as normal.
|
|
10
|
+
TOKENWISE_API_URL Base URL of the Tokenwise API
|
|
11
|
+
(default ``https://tokenwise-production-aa59.up.railway.app``).
|
|
12
|
+
TOKENWISE_DISABLED If set to a truthy value ("1", "true", "yes", "on"),
|
|
13
|
+
a global kill switch that disables all capture.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import os
|
|
19
|
+
from dataclasses import dataclass
|
|
20
|
+
|
|
21
|
+
DEFAULT_API_URL = "https://tokenwise-production-aa59.up.railway.app"
|
|
22
|
+
|
|
23
|
+
# Tuning constants — conservative defaults that keep the SDK invisible to the
|
|
24
|
+
# host application. None of these ever block the caller's thread.
|
|
25
|
+
DEFAULT_MAX_BUFFER = 1_000 # events retained when the API is unreachable
|
|
26
|
+
DEFAULT_BATCH_SIZE = 50 # events per POST
|
|
27
|
+
DEFAULT_FLUSH_INTERVAL = 1.0 # seconds between background flush attempts
|
|
28
|
+
DEFAULT_HTTP_TIMEOUT = 5.0 # seconds; background only, never blocks caller
|
|
29
|
+
|
|
30
|
+
_TRUTHY = {"1", "true", "yes", "on"}
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _env_truthy(name: str) -> bool:
|
|
34
|
+
return os.environ.get(name, "").strip().lower() in _TRUTHY
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@dataclass(frozen=True)
|
|
38
|
+
class Config:
|
|
39
|
+
"""Resolved SDK configuration."""
|
|
40
|
+
|
|
41
|
+
api_key: str | None
|
|
42
|
+
api_url: str
|
|
43
|
+
disabled: bool
|
|
44
|
+
max_buffer: int = DEFAULT_MAX_BUFFER
|
|
45
|
+
batch_size: int = DEFAULT_BATCH_SIZE
|
|
46
|
+
flush_interval: float = DEFAULT_FLUSH_INTERVAL
|
|
47
|
+
http_timeout: float = DEFAULT_HTTP_TIMEOUT
|
|
48
|
+
|
|
49
|
+
@property
|
|
50
|
+
def enabled(self) -> bool:
|
|
51
|
+
"""True only when capture should actually run."""
|
|
52
|
+
return not self.disabled and bool(self.api_key)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def resolve_config(
|
|
56
|
+
api_key: str | None = None,
|
|
57
|
+
api_url: str | None = None,
|
|
58
|
+
) -> Config:
|
|
59
|
+
"""Build a :class:`Config` from explicit args, env vars, then defaults."""
|
|
60
|
+
key = api_key or os.environ.get("TOKENWISE_API_KEY") or None
|
|
61
|
+
url = api_url or os.environ.get("TOKENWISE_API_URL") or DEFAULT_API_URL
|
|
62
|
+
return Config(
|
|
63
|
+
api_key=key,
|
|
64
|
+
api_url=url.rstrip("/"),
|
|
65
|
+
disabled=_env_truthy("TOKENWISE_DISABLED"),
|
|
66
|
+
)
|
tokenwise/event.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""The usage event — the SDK's privacy boundary.
|
|
2
|
+
|
|
3
|
+
This dataclass is the ONLY thing the SDK ever transmits. It has fields for
|
|
4
|
+
token counts and timing metadata and **deliberately no field anywhere for
|
|
5
|
+
prompt text, response text, system prompts, tool definitions, or any other
|
|
6
|
+
user content**. Capture code constructs one of these from a response's usage
|
|
7
|
+
block; there is structurally nowhere to put content even by accident.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from dataclasses import asdict, dataclass
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass(frozen=True)
|
|
16
|
+
class UsageEvent:
|
|
17
|
+
"""Metadata-only record of a single AI API call."""
|
|
18
|
+
|
|
19
|
+
provider: str # "anthropic" | "openai"
|
|
20
|
+
model: str
|
|
21
|
+
input_tokens: int
|
|
22
|
+
output_tokens: int
|
|
23
|
+
cache_read_input_tokens: int
|
|
24
|
+
cache_creation_input_tokens: int
|
|
25
|
+
latency_ms: int
|
|
26
|
+
timestamp: str # ISO-8601 UTC, e.g. "2026-05-30T15:42:01Z"
|
|
27
|
+
endpoint: str # "messages" | "chat.completions"
|
|
28
|
+
streamed: bool # latency_ms is total stream duration when True
|
|
29
|
+
|
|
30
|
+
def to_dict(self) -> dict:
|
|
31
|
+
return asdict(self)
|
tokenwise/openai.py
ADDED
|
@@ -0,0 +1,243 @@
|
|
|
1
|
+
"""Drop-in wrappers for the official ``openai`` SDK.
|
|
2
|
+
|
|
3
|
+
``from tokenwise import OpenAI`` exposes the exact same interface as
|
|
4
|
+
``openai.OpenAI``; only ``chat.completions.create`` is instrumented. Everything
|
|
5
|
+
else delegates to the real client untouched.
|
|
6
|
+
|
|
7
|
+
Streaming nuance: OpenAI only returns ``usage`` on a streamed response when the
|
|
8
|
+
request carries ``stream_options={"include_usage": True}``. Per product
|
|
9
|
+
decision we inject that option **only when the caller did not supply their own
|
|
10
|
+
``stream_options``**, and we parse the trailing usage-only chunk defensively
|
|
11
|
+
(its ``choices`` list is empty — we never index into it).
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import time
|
|
17
|
+
from typing import Any
|
|
18
|
+
|
|
19
|
+
from tokenwise import _capture as cap
|
|
20
|
+
from tokenwise.client import get_client
|
|
21
|
+
from tokenwise.config import resolve_config
|
|
22
|
+
|
|
23
|
+
_PROVIDER = "openai"
|
|
24
|
+
_ENDPOINT = "chat.completions"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _new_real(async_: bool, args: tuple, kwargs: dict):
|
|
28
|
+
import openai # lazy import → openai is an optional dependency
|
|
29
|
+
|
|
30
|
+
cls = openai.AsyncOpenAI if async_ else openai.OpenAI
|
|
31
|
+
return cls(*args, **kwargs)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _maybe_inject_usage(kwargs: dict) -> None:
|
|
35
|
+
"""Add stream_options.include_usage only if the caller gave no stream_options."""
|
|
36
|
+
if kwargs.get("stream") and kwargs.get("stream_options") is None:
|
|
37
|
+
kwargs["stream_options"] = {"include_usage": True}
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class _StreamAccumulator:
|
|
41
|
+
"""Collects model + usage off chat-completion chunks. Reads no content."""
|
|
42
|
+
|
|
43
|
+
def __init__(self) -> None:
|
|
44
|
+
self.model = "unknown"
|
|
45
|
+
self.usage = None
|
|
46
|
+
|
|
47
|
+
def observe(self, chunk: Any) -> None:
|
|
48
|
+
model = getattr(chunk, "model", None)
|
|
49
|
+
if model:
|
|
50
|
+
self.model = model
|
|
51
|
+
usage = getattr(chunk, "usage", None) # None on all but the final chunk
|
|
52
|
+
if usage is not None:
|
|
53
|
+
self.usage = usage
|
|
54
|
+
|
|
55
|
+
def fields(self) -> dict:
|
|
56
|
+
if self.usage is None:
|
|
57
|
+
return {"model": self.model, "input_tokens": 0, "output_tokens": 0,
|
|
58
|
+
"cache_read_input_tokens": 0, "cache_creation_input_tokens": 0}
|
|
59
|
+
return cap.openai_usage_fields(self.model, self.usage)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class _SyncStreamProxy:
|
|
63
|
+
def __init__(self, stream: Any, tw, t0: float) -> None:
|
|
64
|
+
self._stream = stream
|
|
65
|
+
self._tw = tw
|
|
66
|
+
self._t0 = t0
|
|
67
|
+
self._acc = _StreamAccumulator()
|
|
68
|
+
self._done = False
|
|
69
|
+
|
|
70
|
+
def __getattr__(self, name: str) -> Any:
|
|
71
|
+
return getattr(self.__dict__["_stream"], name)
|
|
72
|
+
|
|
73
|
+
def __iter__(self):
|
|
74
|
+
try:
|
|
75
|
+
for chunk in self._stream:
|
|
76
|
+
self._acc.observe(chunk)
|
|
77
|
+
yield chunk
|
|
78
|
+
finally:
|
|
79
|
+
self._finish()
|
|
80
|
+
|
|
81
|
+
def __enter__(self):
|
|
82
|
+
self._stream.__enter__()
|
|
83
|
+
return self
|
|
84
|
+
|
|
85
|
+
def __exit__(self, *exc: Any):
|
|
86
|
+
try:
|
|
87
|
+
return self._stream.__exit__(*exc)
|
|
88
|
+
finally:
|
|
89
|
+
self._finish()
|
|
90
|
+
|
|
91
|
+
def _finish(self) -> None:
|
|
92
|
+
if self._done:
|
|
93
|
+
return
|
|
94
|
+
self._done = True
|
|
95
|
+
latency = int((time.perf_counter() - self._t0) * 1000)
|
|
96
|
+
cap.safe_capture(
|
|
97
|
+
self._tw,
|
|
98
|
+
lambda: cap.make_event(_PROVIDER, _ENDPOINT, self._acc.fields(), latency, True),
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
class _AsyncStreamProxy:
|
|
103
|
+
def __init__(self, stream: Any, tw, t0: float) -> None:
|
|
104
|
+
self._stream = stream
|
|
105
|
+
self._tw = tw
|
|
106
|
+
self._t0 = t0
|
|
107
|
+
self._acc = _StreamAccumulator()
|
|
108
|
+
self._done = False
|
|
109
|
+
|
|
110
|
+
def __getattr__(self, name: str) -> Any:
|
|
111
|
+
return getattr(self.__dict__["_stream"], name)
|
|
112
|
+
|
|
113
|
+
async def __aiter__(self):
|
|
114
|
+
try:
|
|
115
|
+
async for chunk in self._stream:
|
|
116
|
+
self._acc.observe(chunk)
|
|
117
|
+
yield chunk
|
|
118
|
+
finally:
|
|
119
|
+
self._finish()
|
|
120
|
+
|
|
121
|
+
async def __aenter__(self):
|
|
122
|
+
await self._stream.__aenter__()
|
|
123
|
+
return self
|
|
124
|
+
|
|
125
|
+
async def __aexit__(self, *exc: Any):
|
|
126
|
+
try:
|
|
127
|
+
return await self._stream.__aexit__(*exc)
|
|
128
|
+
finally:
|
|
129
|
+
self._finish()
|
|
130
|
+
|
|
131
|
+
def _finish(self) -> None:
|
|
132
|
+
if self._done:
|
|
133
|
+
return
|
|
134
|
+
self._done = True
|
|
135
|
+
latency = int((time.perf_counter() - self._t0) * 1000)
|
|
136
|
+
cap.safe_capture(
|
|
137
|
+
self._tw,
|
|
138
|
+
lambda: cap.make_event(_PROVIDER, _ENDPOINT, self._acc.fields(), latency, True),
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
# ── resource proxies: client.chat.completions.create ────────────────────────────
|
|
143
|
+
|
|
144
|
+
class _Completions:
|
|
145
|
+
def __init__(self, real: Any, tw, async_: bool) -> None:
|
|
146
|
+
self._real = real
|
|
147
|
+
self._tw = tw
|
|
148
|
+
self._async = async_
|
|
149
|
+
|
|
150
|
+
def __getattr__(self, name: str) -> Any:
|
|
151
|
+
return getattr(self.__dict__["_real"], name)
|
|
152
|
+
|
|
153
|
+
def create(self, *args: Any, **kwargs: Any):
|
|
154
|
+
if self._async:
|
|
155
|
+
return self._acreate(*args, **kwargs)
|
|
156
|
+
streaming = bool(kwargs.get("stream"))
|
|
157
|
+
if streaming:
|
|
158
|
+
_maybe_inject_usage(kwargs)
|
|
159
|
+
t0 = time.perf_counter()
|
|
160
|
+
result = self._real.create(*args, **kwargs)
|
|
161
|
+
if streaming:
|
|
162
|
+
return _SyncStreamProxy(result, self._tw, t0)
|
|
163
|
+
latency = int((time.perf_counter() - t0) * 1000)
|
|
164
|
+
cap.safe_capture(
|
|
165
|
+
self._tw,
|
|
166
|
+
lambda: cap.make_event(
|
|
167
|
+
_PROVIDER, _ENDPOINT,
|
|
168
|
+
cap.openai_usage_fields(getattr(result, "model", "unknown"),
|
|
169
|
+
getattr(result, "usage", None)),
|
|
170
|
+
latency, False,
|
|
171
|
+
),
|
|
172
|
+
)
|
|
173
|
+
return result
|
|
174
|
+
|
|
175
|
+
async def _acreate(self, *args: Any, **kwargs: Any):
|
|
176
|
+
streaming = bool(kwargs.get("stream"))
|
|
177
|
+
if streaming:
|
|
178
|
+
_maybe_inject_usage(kwargs)
|
|
179
|
+
t0 = time.perf_counter()
|
|
180
|
+
result = await self._real.create(*args, **kwargs)
|
|
181
|
+
if streaming:
|
|
182
|
+
return _AsyncStreamProxy(result, self._tw, t0)
|
|
183
|
+
latency = int((time.perf_counter() - t0) * 1000)
|
|
184
|
+
cap.safe_capture(
|
|
185
|
+
self._tw,
|
|
186
|
+
lambda: cap.make_event(
|
|
187
|
+
_PROVIDER, _ENDPOINT,
|
|
188
|
+
cap.openai_usage_fields(getattr(result, "model", "unknown"),
|
|
189
|
+
getattr(result, "usage", None)),
|
|
190
|
+
latency, False,
|
|
191
|
+
),
|
|
192
|
+
)
|
|
193
|
+
return result
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
class _Chat:
|
|
197
|
+
def __init__(self, real: Any, tw, async_: bool) -> None:
|
|
198
|
+
self._real = real
|
|
199
|
+
self._completions = _Completions(real.completions, tw, async_)
|
|
200
|
+
|
|
201
|
+
@property
|
|
202
|
+
def completions(self) -> _Completions:
|
|
203
|
+
return self._completions
|
|
204
|
+
|
|
205
|
+
def __getattr__(self, name: str) -> Any:
|
|
206
|
+
return getattr(self.__dict__["_real"], name)
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
class _BaseOpenAI:
|
|
210
|
+
_ASYNC = False
|
|
211
|
+
|
|
212
|
+
def __init__(
|
|
213
|
+
self,
|
|
214
|
+
*args: Any,
|
|
215
|
+
tokenwise_key: str | None = None,
|
|
216
|
+
tokenwise_url: str | None = None,
|
|
217
|
+
**kwargs: Any,
|
|
218
|
+
) -> None:
|
|
219
|
+
self._client = _new_real(self._ASYNC, args, kwargs)
|
|
220
|
+
self._tw = get_client(resolve_config(tokenwise_key, tokenwise_url))
|
|
221
|
+
self._chat = _Chat(self._client.chat, self._tw, self._ASYNC)
|
|
222
|
+
|
|
223
|
+
@property
|
|
224
|
+
def chat(self) -> _Chat:
|
|
225
|
+
return self._chat
|
|
226
|
+
|
|
227
|
+
def __getattr__(self, name: str) -> Any:
|
|
228
|
+
client = self.__dict__.get("_client")
|
|
229
|
+
if client is None:
|
|
230
|
+
raise AttributeError(name)
|
|
231
|
+
return getattr(client, name)
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
class OpenAI(_BaseOpenAI):
|
|
235
|
+
"""Drop-in replacement for ``openai.OpenAI``."""
|
|
236
|
+
|
|
237
|
+
_ASYNC = False
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
class AsyncOpenAI(_BaseOpenAI):
|
|
241
|
+
"""Drop-in replacement for ``openai.AsyncOpenAI``."""
|
|
242
|
+
|
|
243
|
+
_ASYNC = True
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: tokenwise-sdk
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: Metadata-only usage tracking for Anthropic and OpenAI — swap one import line.
|
|
5
|
+
Project-URL: Homepage, https://tokenwise.io
|
|
6
|
+
Project-URL: Documentation, https://docs.tokenwise.io
|
|
7
|
+
Author: Tokenwise
|
|
8
|
+
License: MIT
|
|
9
|
+
Keywords: anthropic,cost,llm,observability,openai,tokens,usage
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Requires-Python: >=3.9
|
|
18
|
+
Requires-Dist: httpx>=0.23
|
|
19
|
+
Provides-Extra: anthropic
|
|
20
|
+
Requires-Dist: anthropic>=0.40; extra == 'anthropic'
|
|
21
|
+
Provides-Extra: dev
|
|
22
|
+
Requires-Dist: anthropic>=0.40; extra == 'dev'
|
|
23
|
+
Requires-Dist: openai>=1.40; extra == 'dev'
|
|
24
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
25
|
+
Provides-Extra: openai
|
|
26
|
+
Requires-Dist: openai>=1.40; extra == 'openai'
|
|
27
|
+
Description-Content-Type: text/markdown
|
|
28
|
+
|
|
29
|
+
# Tokenwise Python SDK
|
|
30
|
+
|
|
31
|
+
Metadata-only usage tracking for Anthropic and OpenAI. Swap **one import line**
|
|
32
|
+
and every API call's token counts and latency flow to your Tokenwise dashboard
|
|
33
|
+
— with **zero access to your prompts or responses**.
|
|
34
|
+
|
|
35
|
+
```diff
|
|
36
|
+
- from anthropic import Anthropic
|
|
37
|
+
+ from tokenwise import Anthropic
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
Your code is otherwise unchanged: the wrapper exposes the identical interface,
|
|
41
|
+
forwards every call to the official SDK, and returns its response untouched.
|
|
42
|
+
|
|
43
|
+
## Why it's safe
|
|
44
|
+
|
|
45
|
+
- **Metadata only.** The SDK reads exactly: `model`, `input_tokens`,
|
|
46
|
+
`output_tokens`, `cache_read_input_tokens`, `cache_creation_input_tokens`,
|
|
47
|
+
`latency_ms`, `timestamp`, `endpoint`. It never reads or transmits prompt
|
|
48
|
+
text, response text, system prompts, or tool definitions. (Contrast with
|
|
49
|
+
proxy-based tools, which see all your traffic.)
|
|
50
|
+
- **Non-blocking.** Events are queued and sent on a background daemon thread.
|
|
51
|
+
If Tokenwise is slow or down, your AI calls complete normally.
|
|
52
|
+
- **Fail-silent + bounded.** Up to 1,000 events buffer when offline; the oldest
|
|
53
|
+
drop silently if the buffer fills. Capture never raises, never waits.
|
|
54
|
+
|
|
55
|
+
## Install
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
pip install tokenwise-sdk[anthropic] # if you use Anthropic
|
|
59
|
+
pip install tokenwise-sdk[openai] # if you use OpenAI
|
|
60
|
+
pip install tokenwise-sdk[anthropic,openai]
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
`anthropic` and `openai` are optional extras — install only what you use.
|
|
64
|
+
|
|
65
|
+
## Configure
|
|
66
|
+
|
|
67
|
+
Set your Tokenwise key (from the dashboard, looks like `tw_...`):
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
export TOKENWISE_API_KEY=tw_your_key
|
|
71
|
+
# optional:
|
|
72
|
+
export TOKENWISE_API_URL=https://tokenwise-production-aa59.up.railway.app # default
|
|
73
|
+
export TOKENWISE_DISABLED=true # emergency kill switch
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
Precedence for every setting: constructor argument > environment variable >
|
|
77
|
+
default. If no key is configured the SDK runs disabled and your AI calls behave
|
|
78
|
+
exactly as the official SDK.
|
|
79
|
+
|
|
80
|
+
## Usage
|
|
81
|
+
|
|
82
|
+
```python
|
|
83
|
+
# Pattern 1 — key from environment
|
|
84
|
+
import os
|
|
85
|
+
os.environ["TOKENWISE_API_KEY"] = "tw_abc123"
|
|
86
|
+
from tokenwise import Anthropic
|
|
87
|
+
client = Anthropic(api_key="sk-ant-...")
|
|
88
|
+
msg = client.messages.create(
|
|
89
|
+
model="claude-sonnet-4-6",
|
|
90
|
+
max_tokens=256,
|
|
91
|
+
messages=[{"role": "user", "content": "Hello"}],
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
# Pattern 2 — key passed explicitly
|
|
95
|
+
from tokenwise import Anthropic
|
|
96
|
+
client = Anthropic(api_key="sk-ant-...", tokenwise_key="tw_abc123")
|
|
97
|
+
|
|
98
|
+
# Pattern 3 — OpenAI
|
|
99
|
+
from tokenwise import OpenAI
|
|
100
|
+
client = OpenAI(api_key="sk-...", tokenwise_key="tw_abc123")
|
|
101
|
+
client.chat.completions.create(
|
|
102
|
+
model="gpt-5.4",
|
|
103
|
+
messages=[{"role": "user", "content": "Hello"}],
|
|
104
|
+
)
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
Streaming and async work the same way:
|
|
108
|
+
|
|
109
|
+
```python
|
|
110
|
+
# Streaming (sync) — usage captured on stream completion
|
|
111
|
+
with client.messages.create(..., stream=True) as stream:
|
|
112
|
+
for event in stream:
|
|
113
|
+
...
|
|
114
|
+
|
|
115
|
+
# Async
|
|
116
|
+
from tokenwise import AsyncAnthropic
|
|
117
|
+
client = AsyncAnthropic(api_key="sk-ant-...", tokenwise_key="tw_abc123")
|
|
118
|
+
msg = await client.messages.create(...)
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
## What's instrumented (v1)
|
|
122
|
+
|
|
123
|
+
| Provider | Method | Streaming |
|
|
124
|
+
|----------|--------|-----------|
|
|
125
|
+
| Anthropic | `messages.create` | ✅ usage read from the event stream (request unchanged) |
|
|
126
|
+
| OpenAI | `chat.completions.create` | ✅ see note below |
|
|
127
|
+
|
|
128
|
+
Other methods pass through and work, but aren't yet recorded. (OpenAI Responses
|
|
129
|
+
API and legacy completions are planned.)
|
|
130
|
+
|
|
131
|
+
### Note on OpenAI streaming
|
|
132
|
+
|
|
133
|
+
OpenAI only returns token usage on a streamed response when the request includes
|
|
134
|
+
`stream_options={"include_usage": True}`. When you stream **without** supplying
|
|
135
|
+
your own `stream_options`, Tokenwise injects it for you so usage can be captured.
|
|
136
|
+
This adds one final usage-only chunk (with an empty `choices` list) to the
|
|
137
|
+
stream. If you already pass `stream_options`, Tokenwise respects yours and does
|
|
138
|
+
not modify the request (in that case usage is captured only if you enabled it).
|
|
139
|
+
|
|
140
|
+
### Latency semantics
|
|
141
|
+
|
|
142
|
+
For non-streaming calls, `latency_ms` is the wall-clock time of the call. For
|
|
143
|
+
streaming calls it is the **total stream duration** (until the last chunk is
|
|
144
|
+
consumed), which includes time your code spends between chunks — events from
|
|
145
|
+
streaming calls carry `streamed: true` so this is distinguishable.
|
|
146
|
+
|
|
147
|
+
## License
|
|
148
|
+
|
|
149
|
+
MIT
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
tokenwise/__init__.py,sha256=6mqEA1o9s3Tx8vautqX1d7psARfzjzt10CxAFqqbU9I,2234
|
|
2
|
+
tokenwise/_capture.py,sha256=GFYUqeKSjl2fIQgvVPR7OinO3xkpvTDlGK0w0uI3e38,3073
|
|
3
|
+
tokenwise/_version.py,sha256=1fcON3TaH_saTfQzv-QWlGmS61oADXGsc-A7KNbVS4Y,80
|
|
4
|
+
tokenwise/anthropic.py,sha256=Grjf5RhB8wKnWWYMP-FIKAkLGJ2sxA_PjvJyJyskIS8,7811
|
|
5
|
+
tokenwise/client.py,sha256=Hi4PLZFzSFP4p0pWZMaFljgYC1295Xp7GANU1kkF0m4,6719
|
|
6
|
+
tokenwise/config.py,sha256=m-tXSxpvZC_SQrGVdYG4VHXIxQ99S7QtbsrWYTydtM4,2397
|
|
7
|
+
tokenwise/event.py,sha256=82PSQFb63qqBd8h8cTNBj2bl8v0tSUaqgNgulW0v8wA,1111
|
|
8
|
+
tokenwise/openai.py,sha256=L_D7G9fdyl140gSqjpY8FBpaTHJt9wkt3BXKyAcxv60,7725
|
|
9
|
+
tokenwise_sdk-0.1.1.dist-info/METADATA,sha256=_m8sVPdOrSxT0PhlR8dzUQdWMW18F8kuEBLvLJiLJto,5248
|
|
10
|
+
tokenwise_sdk-0.1.1.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
11
|
+
tokenwise_sdk-0.1.1.dist-info/RECORD,,
|