tokendetective 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tokendetective-0.3.0/.env +1 -0
- tokendetective-0.3.0/.env.example +2 -0
- tokendetective-0.3.0/PKG-INFO +22 -0
- tokendetective-0.3.0/README.md +659 -0
- tokendetective-0.3.0/examples/anthropic_example.py +25 -0
- tokendetective-0.3.0/examples/async_example.py +38 -0
- tokendetective-0.3.0/examples/dynamic_agent.py +122 -0
- tokendetective-0.3.0/examples/openai_example.py +38 -0
- tokendetective-0.3.0/pyproject.toml +37 -0
- tokendetective-0.3.0/tokenlens/__init__.py +52 -0
- tokendetective-0.3.0/tokenlens/chat.py +257 -0
- tokendetective-0.3.0/tokenlens/client.py +336 -0
- tokendetective-0.3.0/tokenlens/exceptions.py +10 -0
- tokendetective-0.3.0/tokenlens/pricing.py +75 -0
- tokendetective-0.3.0/tokenlens/wrappers/__init__.py +34 -0
- tokendetective-0.3.0/tokenlens/wrappers/anthropic.py +132 -0
- tokendetective-0.3.0/tokenlens/wrappers/ollama.py +29 -0
- tokendetective-0.3.0/tokenlens/wrappers/openai.py +175 -0
- tokendetective-0.3.0/tokentracker/__init__.py +52 -0
- tokendetective-0.3.0/tokentracker/chat.py +255 -0
- tokendetective-0.3.0/tokentracker/client.py +336 -0
- tokendetective-0.3.0/tokentracker/exceptions.py +10 -0
- tokendetective-0.3.0/tokentracker/pricing.py +75 -0
- tokendetective-0.3.0/tokentracker/wrappers/__init__.py +34 -0
- tokendetective-0.3.0/tokentracker/wrappers/anthropic.py +132 -0
- tokendetective-0.3.0/tokentracker/wrappers/ollama.py +29 -0
- tokendetective-0.3.0/tokentracker/wrappers/openai.py +175 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
TOKENLENS_URL=https://13.126.130.56.nip.io
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: tokendetective
|
|
3
|
+
Version: 0.3.0
|
|
4
|
+
Summary: Python SDK for TokenLens — automatic token and cost tracking for AI requests
|
|
5
|
+
Project-URL: Homepage, https://github.com/alumnx-ai-labs/TokenLens
|
|
6
|
+
Project-URL: Repository, https://github.com/alumnx-ai-labs/TokenLens
|
|
7
|
+
Author-email: TokenLens <nsandeep06595@gmail.com>
|
|
8
|
+
License: MIT
|
|
9
|
+
Keywords: ai,analytics,anthropic,cost,gemini,llm,observability,openai,tokens
|
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
19
|
+
Requires-Python: >=3.9
|
|
20
|
+
Requires-Dist: anthropic>=0.20.0
|
|
21
|
+
Requires-Dist: httpx>=0.24.0
|
|
22
|
+
Requires-Dist: openai>=1.0.0
|
|
@@ -0,0 +1,659 @@
|
|
|
1
|
+
# TokenLens SDK
|
|
2
|
+
|
|
3
|
+
Python SDK for [TokenLens](https://github.com/alumnx-ai-labs/TokenLens) — automatic token counting, latency tracking, and cost calculation for every AI request your application makes.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## Table of Contents
|
|
8
|
+
|
|
9
|
+
- [What It Does](#what-it-does)
|
|
10
|
+
- [Installation](#installation)
|
|
11
|
+
- [Prerequisites](#prerequisites)
|
|
12
|
+
- [Quick Start](#quick-start)
|
|
13
|
+
- [Usage Modes](#usage-modes)
|
|
14
|
+
- [Mode 1 — tl.chat() — Backend as LLM](#mode-1--tlchat----backend-as-llm)
|
|
15
|
+
- [Mode 2 — tl.openai() / tl.anthropic() — Wrap Your Own Client](#mode-2--tlopenai--tlanthropic----wrap-your-own-client)
|
|
16
|
+
- [Mode 3 — tl.log() — Manual Logging](#mode-3--tllog----manual-logging)
|
|
17
|
+
- [TokenLens Constructor](#tokenlens-constructor)
|
|
18
|
+
- [Method Reference](#method-reference)
|
|
19
|
+
- [Response Object](#response-object)
|
|
20
|
+
- [Async Support](#async-support)
|
|
21
|
+
- [Cost Utilities](#cost-utilities)
|
|
22
|
+
- [Supported Models & Pricing](#supported-models--pricing)
|
|
23
|
+
- [Error Handling](#error-handling)
|
|
24
|
+
- [Where Data Is Saved](#where-data-is-saved)
|
|
25
|
+
- [How test_tokenlens.py Works](#how-test_tokenslenspy-works)
|
|
26
|
+
- [File Structure](#file-structure)
|
|
27
|
+
|
|
28
|
+
---
|
|
29
|
+
|
|
30
|
+
## What It Does
|
|
31
|
+
|
|
32
|
+
- Tracks **tokens in**, **tokens out**, **latency**, and **cost** for every LLM call
|
|
33
|
+
- Saves everything to your TokenLens backend database
|
|
34
|
+
- Works with OpenAI, Anthropic, and the TokenLens backend's own models
|
|
35
|
+
- Zero changes to your existing LLM code — just wrap the client
|
|
36
|
+
- No provider API key needed when routing through your TokenLens backend
|
|
37
|
+
|
|
38
|
+
---
|
|
39
|
+
|
|
40
|
+
## Installation
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
# Install from local repo (development)
|
|
44
|
+
pip install -e /path/to/TokenLens/sdk
|
|
45
|
+
|
|
46
|
+
# Install with OpenAI support
|
|
47
|
+
pip install tokenlens-sdk[openai]
|
|
48
|
+
|
|
49
|
+
# Install with Anthropic support
|
|
50
|
+
pip install tokenlens-sdk[anthropic]
|
|
51
|
+
|
|
52
|
+
# Install with all provider support
|
|
53
|
+
pip install tokenlens-sdk[all]
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
**Requires Python 3.9+**
|
|
57
|
+
|
|
58
|
+
---
|
|
59
|
+
|
|
60
|
+
## Prerequisites
|
|
61
|
+
|
|
62
|
+
Before using the SDK you need:
|
|
63
|
+
|
|
64
|
+
1. **A running TokenLens backend** — default at `http://localhost:8000`
|
|
65
|
+
2. **A TokenLens API key** — generate one from the TokenLens dashboard under **Settings → API Keys**. It looks like `tl-abc123...`
|
|
66
|
+
|
|
67
|
+
That is all. No OpenAI or Anthropic key is needed in your app when using `tl.chat()`.
|
|
68
|
+
|
|
69
|
+
---
|
|
70
|
+
|
|
71
|
+
## Quick Start
|
|
72
|
+
|
|
73
|
+
```python
|
|
74
|
+
from tokenlens import TokenLens
|
|
75
|
+
|
|
76
|
+
# 1. Create the client
|
|
77
|
+
tl = TokenLens(
|
|
78
|
+
api_key = "tl-your-key-here",
|
|
79
|
+
base_url = "http://localhost:8000",
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
# 2. Get a chat client — routes through your backend, no provider key needed
|
|
83
|
+
client = tl.chat()
|
|
84
|
+
|
|
85
|
+
# 3. Make an LLM call — identical to the OpenAI SDK interface
|
|
86
|
+
response = client.chat.completions.create(
|
|
87
|
+
model = "gpt4o-mini",
|
|
88
|
+
messages = [{"role": "user", "content": "What is the capital of France?"}],
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
# 4. Use the response
|
|
92
|
+
print(response.choices[0].message.content)
|
|
93
|
+
# → Paris is the capital of France.
|
|
94
|
+
|
|
95
|
+
print(f"Tokens : {response.usage.total_tokens}")
|
|
96
|
+
print(f"Cost : ${response.cost.usd:.8f} / ₹{response.cost.inr:.6f}")
|
|
97
|
+
print(f"Latency: {response.latency_ms:.1f} ms")
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
---
|
|
101
|
+
|
|
102
|
+
## Usage Modes
|
|
103
|
+
|
|
104
|
+
### Mode 1 — `tl.chat()` — Backend as LLM
|
|
105
|
+
|
|
106
|
+
Routes every LLM call through your **TokenLens backend**. The backend makes the actual call to OpenAI/Ollama using keys configured in its own `.env`. Your application only needs the `tl-` key.
|
|
107
|
+
|
|
108
|
+
```python
|
|
109
|
+
tl = TokenLens(api_key="tl-...", base_url="http://localhost:8000")
|
|
110
|
+
client = tl.chat() # starts a new conversation session
|
|
111
|
+
|
|
112
|
+
response = client.chat.completions.create(
|
|
113
|
+
model = "gpt4o-mini", # model must exist in backend config
|
|
114
|
+
messages = [{"role": "user", "content": "Explain async/await in Python."}],
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
print(response.choices[0].message.content)
|
|
118
|
+
print(response.cost.usd) # cost computed by backend
|
|
119
|
+
print(response.latency_ms) # end-to-end latency in ms
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
**Multi-turn conversations** — pass the same `session_id` to maintain memory:
|
|
123
|
+
|
|
124
|
+
```python
|
|
125
|
+
client = tl.chat(session_id="my-session-abc")
|
|
126
|
+
|
|
127
|
+
# Turn 1
|
|
128
|
+
r1 = client.chat.completions.create(
|
|
129
|
+
model = "gpt4o-mini",
|
|
130
|
+
messages = [{"role": "user", "content": "My name is Sandeep."}],
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
# Turn 2 — backend remembers "Sandeep" from turn 1
|
|
134
|
+
r2 = client.chat.completions.create(
|
|
135
|
+
model = "gpt4o-mini",
|
|
136
|
+
messages = [{"role": "user", "content": "What is my name?"}],
|
|
137
|
+
)
|
|
138
|
+
print(r2.choices[0].message.content) # → Your name is Sandeep.
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
**Available models** — depends on your backend configuration:
|
|
142
|
+
|
|
143
|
+
| Model string | Provider |
|
|
144
|
+
|---|---|
|
|
145
|
+
| `"gemma"` | Ollama (local) |
|
|
146
|
+
| `"gpt4"` | OpenAI GPT-4 |
|
|
147
|
+
| `"gpt4o-mini"` | OpenAI GPT-4o Mini |
|
|
148
|
+
|
|
149
|
+
---
|
|
150
|
+
|
|
151
|
+
### Mode 2 — `tl.openai()` / `tl.anthropic()` — Wrap Your Own Client
|
|
152
|
+
|
|
153
|
+
Use this when you already have a provider API key and want TokenLens to log every call automatically. The LLM call goes **directly** to OpenAI/Anthropic — TokenLens only logs the token counts after the fact.
|
|
154
|
+
|
|
155
|
+
#### OpenAI
|
|
156
|
+
|
|
157
|
+
```bash
|
|
158
|
+
pip install tokenlens-sdk[openai]
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
```python
|
|
162
|
+
# Requires OPENAI_API_KEY set in environment
|
|
163
|
+
client = tl.openai()
|
|
164
|
+
|
|
165
|
+
response = client.chat.completions.create(
|
|
166
|
+
model = "gpt-4o-mini",
|
|
167
|
+
messages = [{"role": "user", "content": "Hello!"}],
|
|
168
|
+
)
|
|
169
|
+
print(response.choices[0].message.content)
|
|
170
|
+
# TokenLens silently logged tokens + cost in a background thread
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
**Custom OpenAI settings** (Azure, proxy, org):
|
|
174
|
+
|
|
175
|
+
```python
|
|
176
|
+
client = tl.openai(
|
|
177
|
+
api_key = "sk-...",
|
|
178
|
+
base_url = "https://your-azure-endpoint.openai.azure.com/",
|
|
179
|
+
)
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
#### Anthropic
|
|
183
|
+
|
|
184
|
+
```bash
|
|
185
|
+
pip install tokenlens-sdk[anthropic]
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
```python
|
|
189
|
+
# Requires ANTHROPIC_API_KEY set in environment
|
|
190
|
+
claude = tl.anthropic()
|
|
191
|
+
|
|
192
|
+
response = claude.messages.create(
|
|
193
|
+
model = "claude-3-5-haiku-20241022",
|
|
194
|
+
max_tokens= 512,
|
|
195
|
+
messages = [{"role": "user", "content": "Hello!"}],
|
|
196
|
+
)
|
|
197
|
+
print(response.content[0].text)
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
#### Bring Your Own Client — `tl.wrap()`
|
|
201
|
+
|
|
202
|
+
If you already have a configured client instance:
|
|
203
|
+
|
|
204
|
+
```python
|
|
205
|
+
from openai import OpenAI
|
|
206
|
+
|
|
207
|
+
my_client = OpenAI(api_key="sk-...", organization="org-...")
|
|
208
|
+
client = tl.wrap(my_client) # same as tl.openai() but uses your existing instance
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
Supported types for `tl.wrap()`:
|
|
212
|
+
|
|
213
|
+
- `openai.OpenAI`
|
|
214
|
+
- `openai.AsyncOpenAI`
|
|
215
|
+
- `anthropic.Anthropic`
|
|
216
|
+
- `anthropic.AsyncAnthropic`
|
|
217
|
+
|
|
218
|
+
---
|
|
219
|
+
|
|
220
|
+
### Mode 3 — `tl.log()` — Manual Logging
|
|
221
|
+
|
|
222
|
+
Use when you already have token counts from any source — LangChain, LlamaIndex, a custom HTTP client, or any other framework.
|
|
223
|
+
|
|
224
|
+
```python
|
|
225
|
+
tl.log(
|
|
226
|
+
model = "gpt-4o-mini",
|
|
227
|
+
tokens_in = 150,
|
|
228
|
+
tokens_out = 42,
|
|
229
|
+
latency_ms = 830.5,
|
|
230
|
+
query_text = "Summarise this document...", # optional — shown in dashboard
|
|
231
|
+
)
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
By default this is **fire-and-forget** (runs in background thread, returns `None`).
|
|
235
|
+
|
|
236
|
+
To block and get the response:
|
|
237
|
+
|
|
238
|
+
```python
|
|
239
|
+
tl = TokenLens(api_key="tl-...", background=False)
|
|
240
|
+
|
|
241
|
+
result = tl.log(
|
|
242
|
+
model = "gpt-4o-mini",
|
|
243
|
+
tokens_in = 150,
|
|
244
|
+
tokens_out = 42,
|
|
245
|
+
latency_ms = 830.5,
|
|
246
|
+
)
|
|
247
|
+
print(result)
|
|
248
|
+
# {"usage_id": "uuid...", "cost_usd": 0.00002745, "cost_inr": 0.00233325}
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
---
|
|
252
|
+
|
|
253
|
+
## TokenLens Constructor
|
|
254
|
+
|
|
255
|
+
```python
|
|
256
|
+
TokenLens(
|
|
257
|
+
api_key : str,
|
|
258
|
+
base_url : str = "http://localhost:8000",
|
|
259
|
+
application : str = "tokenlens-sdk",
|
|
260
|
+
background : bool = True,
|
|
261
|
+
timeout : float = 10.0,
|
|
262
|
+
raise_on_error : bool = False,
|
|
263
|
+
)
|
|
264
|
+
```
|
|
265
|
+
|
|
266
|
+
| Parameter | Type | Default | Description |
|
|
267
|
+
|---|---|---|---|
|
|
268
|
+
| `api_key` | `str` | required | Your `tl-` API key from the TokenLens dashboard |
|
|
269
|
+
| `base_url` | `str` | `"http://localhost:8000"` | Base URL of your TokenLens backend |
|
|
270
|
+
| `application` | `str` | `"tokenlens-sdk"` | Label shown in the dashboard for all log entries from this client |
|
|
271
|
+
| `background` | `bool` | `True` | `True` = fire-and-forget (non-blocking). `False` = block and return log result |
|
|
272
|
+
| `timeout` | `float` | `10.0` | HTTP timeout in seconds for log requests |
|
|
273
|
+
| `raise_on_error` | `bool` | `False` | `False` = log a warning on failure, never crash your app. `True` = raise `LoggingError` |
|
|
274
|
+
|
|
275
|
+
---
|
|
276
|
+
|
|
277
|
+
## Method Reference
|
|
278
|
+
|
|
279
|
+
### `tl.chat(session_id=None)`
|
|
280
|
+
|
|
281
|
+
Returns a `TokenLensChatClient`. Routes LLM calls through the TokenLens backend.
|
|
282
|
+
|
|
283
|
+
```python
|
|
284
|
+
client = tl.chat()
|
|
285
|
+
client = tl.chat(session_id="existing-session-id") # continue a session
|
|
286
|
+
```
|
|
287
|
+
|
|
288
|
+
### `tl.async_chat(session_id=None)`
|
|
289
|
+
|
|
290
|
+
Async version. Returns `AsyncTokenLensChatClient`. Use with `await`.
|
|
291
|
+
|
|
292
|
+
### `tl.openai(**kwargs)`
|
|
293
|
+
|
|
294
|
+
Creates and wraps an `openai.OpenAI` client. All kwargs forwarded to `OpenAI()`.
|
|
295
|
+
|
|
296
|
+
```python
|
|
297
|
+
client = tl.openai()
|
|
298
|
+
client = tl.openai(api_key="sk-...", base_url="https://azure-endpoint/")
|
|
299
|
+
```
|
|
300
|
+
|
|
301
|
+
### `tl.async_openai(**kwargs)`
|
|
302
|
+
|
|
303
|
+
Creates and wraps an `openai.AsyncOpenAI` client.
|
|
304
|
+
|
|
305
|
+
### `tl.anthropic(**kwargs)`
|
|
306
|
+
|
|
307
|
+
Creates and wraps an `anthropic.Anthropic` client.
|
|
308
|
+
|
|
309
|
+
### `tl.async_anthropic(**kwargs)`
|
|
310
|
+
|
|
311
|
+
Creates and wraps an `anthropic.AsyncAnthropic` client.
|
|
312
|
+
|
|
313
|
+
### `tl.wrap(client)`
|
|
314
|
+
|
|
315
|
+
Wrap an existing provider client instance.
|
|
316
|
+
|
|
317
|
+
```python
|
|
318
|
+
from openai import OpenAI
|
|
319
|
+
client = tl.wrap(OpenAI())
|
|
320
|
+
```
|
|
321
|
+
|
|
322
|
+
### `tl.log(*, model, tokens_in, tokens_out, latency_ms, query_text=None, application=None)`
|
|
323
|
+
|
|
324
|
+
Manually log one AI request.
|
|
325
|
+
|
|
326
|
+
| Argument | Type | Required | Description |
|
|
327
|
+
|---|---|---|---|
|
|
328
|
+
| `model` | `str` | yes | Model identifier e.g. `"gpt-4o-mini"` |
|
|
329
|
+
| `tokens_in` | `int` | yes | Input / prompt token count |
|
|
330
|
+
| `tokens_out` | `int` | yes | Output / completion token count |
|
|
331
|
+
| `latency_ms` | `float` | yes | End-to-end latency in milliseconds |
|
|
332
|
+
| `query_text` | `str` | no | First 500 chars of the user message (dashboard preview) |
|
|
333
|
+
| `application` | `str` | no | Override the default application label for this entry |
|
|
334
|
+
|
|
335
|
+
### `await tl.alog(*, model, tokens_in, tokens_out, latency_ms, ...)`
|
|
336
|
+
|
|
337
|
+
Async version of `log()`. Always awaits the HTTP call and returns the response dict.
|
|
338
|
+
|
|
339
|
+
---
|
|
340
|
+
|
|
341
|
+
## Response Object
|
|
342
|
+
|
|
343
|
+
Every `client.chat.completions.create()` call returns a `ChatCompletion` object:
|
|
344
|
+
|
|
345
|
+
```python
|
|
346
|
+
response = client.chat.completions.create(model="gpt4o-mini", messages=[...])
|
|
347
|
+
|
|
348
|
+
response.choices[0].message.content # str — the AI's reply
|
|
349
|
+
response.choices[0].message.role # str — always "assistant"
|
|
350
|
+
response.choices[0].finish_reason # str — always "stop"
|
|
351
|
+
|
|
352
|
+
response.usage.prompt_tokens # int — input tokens
|
|
353
|
+
response.usage.completion_tokens # int — output tokens
|
|
354
|
+
response.usage.total_tokens # int — prompt + completion
|
|
355
|
+
|
|
356
|
+
response.cost.usd # float — cost in US dollars
|
|
357
|
+
response.cost.inr # float — cost in Indian rupees
|
|
358
|
+
|
|
359
|
+
response.latency_ms # float — ms from request to first byte of response
|
|
360
|
+
response.model # str — model name echoed from backend
|
|
361
|
+
response._raw # dict — full raw JSON from backend
|
|
362
|
+
```
|
|
363
|
+
|
|
364
|
+
> The `.cost` and `.latency_ms` fields are **extras** that the native OpenAI SDK does not provide. All other fields follow the same path as `openai.types.chat.ChatCompletion` so you can switch between them without changing your code.
|
|
365
|
+
|
|
366
|
+
---
|
|
367
|
+
|
|
368
|
+
## Async Support
|
|
369
|
+
|
|
370
|
+
```python
|
|
371
|
+
import asyncio
|
|
372
|
+
from tokenlens import TokenLens
|
|
373
|
+
|
|
374
|
+
tl = TokenLens(api_key="tl-...", base_url="http://localhost:8000")
|
|
375
|
+
|
|
376
|
+
async def main():
|
|
377
|
+
|
|
378
|
+
# ── Option A: async backend chat (no provider key) ────────────────────────
|
|
379
|
+
client = tl.async_chat()
|
|
380
|
+
response = await client.chat.completions.create(
|
|
381
|
+
model = "gpt4o-mini",
|
|
382
|
+
messages = [{"role": "user", "content": "Hello!"}],
|
|
383
|
+
)
|
|
384
|
+
print(response.choices[0].message.content)
|
|
385
|
+
|
|
386
|
+
# ── Option B: async OpenAI with logging ───────────────────────────────────
|
|
387
|
+
client = tl.async_openai() # needs OPENAI_API_KEY
|
|
388
|
+
response = await client.chat.completions.create(
|
|
389
|
+
model = "gpt-4o-mini",
|
|
390
|
+
messages = [{"role": "user", "content": "Hello!"}],
|
|
391
|
+
)
|
|
392
|
+
|
|
393
|
+
# ── Option C: async manual log ────────────────────────────────────────────
|
|
394
|
+
result = await tl.alog(
|
|
395
|
+
model = "gpt-4o-mini",
|
|
396
|
+
tokens_in = 150,
|
|
397
|
+
tokens_out = 42,
|
|
398
|
+
latency_ms = 500.0,
|
|
399
|
+
query_text = "Hello!",
|
|
400
|
+
)
|
|
401
|
+
print(result)
|
|
402
|
+
# {"usage_id": "...", "cost_usd": 0.00002745, "cost_inr": 0.00233325}
|
|
403
|
+
|
|
404
|
+
asyncio.run(main())
|
|
405
|
+
```
|
|
406
|
+
|
|
407
|
+
---
|
|
408
|
+
|
|
409
|
+
## Cost Utilities
|
|
410
|
+
|
|
411
|
+
Calculate cost locally without making any network call:
|
|
412
|
+
|
|
413
|
+
```python
|
|
414
|
+
from tokenlens.pricing import compute_cost, list_models
|
|
415
|
+
|
|
416
|
+
# Basic cost calculation
|
|
417
|
+
cost = compute_cost("gpt-4o-mini", tokens_in=150, tokens_out=42)
|
|
418
|
+
print(cost)
|
|
419
|
+
# {"usd": 0.00002745, "inr": 0.00233325}
|
|
420
|
+
|
|
421
|
+
# Custom exchange rate
|
|
422
|
+
cost = compute_cost("gpt-4o-mini", tokens_in=150, tokens_out=42, usd_to_inr=84.5)
|
|
423
|
+
|
|
424
|
+
# Custom pricing (for models not in the table, or negotiated rates)
|
|
425
|
+
cost = compute_cost(
|
|
426
|
+
"my-custom-model",
|
|
427
|
+
tokens_in = 1000,
|
|
428
|
+
tokens_out = 500,
|
|
429
|
+
custom_pricing = {"input": 0.002 / 1_000_000, "output": 0.008 / 1_000_000},
|
|
430
|
+
)
|
|
431
|
+
|
|
432
|
+
# List all models in the pricing table
|
|
433
|
+
print(list_models())
|
|
434
|
+
```
|
|
435
|
+
|
|
436
|
+
---
|
|
437
|
+
|
|
438
|
+
## Supported Models & Pricing
|
|
439
|
+
|
|
440
|
+
The SDK ships with a built-in pricing table. The backend uses the same table to compute cost server-side.
|
|
441
|
+
|
|
442
|
+
### OpenAI
|
|
443
|
+
|
|
444
|
+
| Model | Input (per 1M tokens) | Output (per 1M tokens) |
|
|
445
|
+
|---|---|---|
|
|
446
|
+
| `gpt-4o` | $5.00 | $20.00 |
|
|
447
|
+
| `gpt-4o-mini` | $0.15 | $0.60 |
|
|
448
|
+
| `gpt-4-turbo` | $10.00 | $30.00 |
|
|
449
|
+
| `gpt-4` | $30.00 | $60.00 |
|
|
450
|
+
| `gpt-3.5-turbo` | $0.50 | $1.50 |
|
|
451
|
+
| `o1` | $15.00 | $60.00 |
|
|
452
|
+
| `o1-mini` | $3.00 | $12.00 |
|
|
453
|
+
| `o3` | $10.00 | $40.00 |
|
|
454
|
+
| `o3-mini` | $1.10 | $4.40 |
|
|
455
|
+
|
|
456
|
+
### Anthropic
|
|
457
|
+
|
|
458
|
+
| Model | Input (per 1M tokens) | Output (per 1M tokens) |
|
|
459
|
+
|---|---|---|
|
|
460
|
+
| `claude-opus-4-8` | $15.00 | $75.00 |
|
|
461
|
+
| `claude-sonnet-4-6` | $3.00 | $15.00 |
|
|
462
|
+
| `claude-haiku-4-5-20251001` | $0.80 | $4.00 |
|
|
463
|
+
| `claude-3-5-sonnet-20241022` | $3.00 | $15.00 |
|
|
464
|
+
| `claude-3-5-haiku-20241022` | $0.80 | $4.00 |
|
|
465
|
+
| `claude-3-opus-20240229` | $15.00 | $75.00 |
|
|
466
|
+
| `claude-3-haiku-20240307` | $0.25 | $1.25 |
|
|
467
|
+
|
|
468
|
+
### Google
|
|
469
|
+
|
|
470
|
+
| Model | Input (per 1M tokens) | Output (per 1M tokens) |
|
|
471
|
+
|---|---|---|
|
|
472
|
+
| `gemini-1.5-pro` | $1.25 | $5.00 |
|
|
473
|
+
| `gemini-1.5-flash` | $0.075 | $0.30 |
|
|
474
|
+
| `gemini-2.0-flash` | $0.10 | $0.40 |
|
|
475
|
+
| `gemini-2.0-flash-lite` | $0.075 | $0.30 |
|
|
476
|
+
|
|
477
|
+
### TokenLens Backend Aliases
|
|
478
|
+
|
|
479
|
+
| Model string | Maps to |
|
|
480
|
+
|---|---|
|
|
481
|
+
| `"gemma"` | Ollama local model |
|
|
482
|
+
| `"gpt4"` | OpenAI GPT-4 |
|
|
483
|
+
| `"gpt4o-mini"` | OpenAI GPT-4o Mini |
|
|
484
|
+
|
|
485
|
+
> Models not in the table are logged at a minimal fallback rate ($0.001/$0.002 per 1M tokens). Pass `custom_pricing` to `compute_cost()` for accurate local estimates.
|
|
486
|
+
|
|
487
|
+
---
|
|
488
|
+
|
|
489
|
+
## Error Handling
|
|
490
|
+
|
|
491
|
+
The SDK never crashes your application by default (`raise_on_error=False`). Failures are logged as warnings via Python's `logging` module.
|
|
492
|
+
|
|
493
|
+
```python
|
|
494
|
+
import logging
|
|
495
|
+
logging.basicConfig(level=logging.WARNING)
|
|
496
|
+
|
|
497
|
+
tl = TokenLens(api_key="tl-...", raise_on_error=False) # default — safe
|
|
498
|
+
tl.log(model="gpt-4o-mini", tokens_in=100, tokens_out=50, latency_ms=500)
|
|
499
|
+
# If backend is down: logs a warning, returns None, does NOT raise
|
|
500
|
+
```
|
|
501
|
+
|
|
502
|
+
**Strict mode** — raise on any failure (useful for tests):
|
|
503
|
+
|
|
504
|
+
```python
|
|
505
|
+
from tokenlens import TokenLens, LoggingError, AuthError
|
|
506
|
+
|
|
507
|
+
tl = TokenLens(api_key="tl-...", raise_on_error=True)
|
|
508
|
+
|
|
509
|
+
try:
|
|
510
|
+
tl.log(model="gpt-4o-mini", tokens_in=100, tokens_out=50, latency_ms=500)
|
|
511
|
+
except LoggingError as e:
|
|
512
|
+
print(f"Logging failed: {e}")
|
|
513
|
+
```
|
|
514
|
+
|
|
515
|
+
### Exception Types
|
|
516
|
+
|
|
517
|
+
| Exception | When raised |
|
|
518
|
+
|---|---|
|
|
519
|
+
| `AuthError` | `api_key` is empty or does not start with `tl-` |
|
|
520
|
+
| `LoggingError` | Backend returned a non-200 status, or the connection timed out |
|
|
521
|
+
| `TokenLensError` | Base class for all SDK exceptions |
|
|
522
|
+
|
|
523
|
+
---
|
|
524
|
+
|
|
525
|
+
## Where Data Is Saved
|
|
526
|
+
|
|
527
|
+
Every call saves data to the TokenLens backend database:
|
|
528
|
+
|
|
529
|
+
| Call | `query_analytics` table | `api_usage` table |
|
|
530
|
+
|---|---|---|
|
|
531
|
+
| `tl.chat()` | Yes (via backend background task) | Yes (via `/v1/log` post-call) |
|
|
532
|
+
| `tl.openai()` | No | Yes |
|
|
533
|
+
| `tl.anthropic()` | No | Yes |
|
|
534
|
+
| `tl.log()` | No | Yes |
|
|
535
|
+
|
|
536
|
+
- **`query_analytics`** — visible in **Analytics → User History** in the dashboard
|
|
537
|
+
- **`api_usage`** — visible in **Analytics → SDK Usage** in the dashboard
|
|
538
|
+
|
|
539
|
+
---
|
|
540
|
+
|
|
541
|
+
## How `test_tokenlens.py` Works
|
|
542
|
+
|
|
543
|
+
```python
|
|
544
|
+
from tokenlens import TokenLens
|
|
545
|
+
```
|
|
546
|
+
Imports the `TokenLens` class from the installed `tokenlens-sdk` package.
|
|
547
|
+
|
|
548
|
+
---
|
|
549
|
+
|
|
550
|
+
```python
|
|
551
|
+
tl = TokenLens(
|
|
552
|
+
api_key = "tl-VJomad6oeJgDbk4D4aW8qLllrGpjmC4fPhTHN0Awc_4",
|
|
553
|
+
base_url = "http://localhost:8000",
|
|
554
|
+
)
|
|
555
|
+
```
|
|
556
|
+
Creates the SDK client. Validates that the key starts with `tl-`. No network call yet.
|
|
557
|
+
|
|
558
|
+
---
|
|
559
|
+
|
|
560
|
+
```python
|
|
561
|
+
client = tl.chat()
|
|
562
|
+
```
|
|
563
|
+
Creates a `TokenLensChatClient`. Generates a random UUID as the `session_id`. No network call yet.
|
|
564
|
+
|
|
565
|
+
---
|
|
566
|
+
|
|
567
|
+
```python
|
|
568
|
+
response = client.chat.completions.create(
|
|
569
|
+
model = "gpt4o-mini",
|
|
570
|
+
messages = [{"role": "user", "content": "Hi!"}],
|
|
571
|
+
)
|
|
572
|
+
```
|
|
573
|
+
|
|
574
|
+
This triggers the following step-by-step chain:
|
|
575
|
+
|
|
576
|
+
```
|
|
577
|
+
Step 1 — SDK extracts last user message
|
|
578
|
+
"Hi!" from messages list
|
|
579
|
+
|
|
580
|
+
Step 2 — SDK sends POST http://localhost:8000/chat
|
|
581
|
+
Headers: { Authorization: "Bearer tl-VJomad6..." }
|
|
582
|
+
Body: { session_id: "uuid-abc", user_id: "sdk",
|
|
583
|
+
message: "Hi!", model: "gpt4o-mini" }
|
|
584
|
+
|
|
585
|
+
Step 3 — FirebaseAuthMiddleware on backend
|
|
586
|
+
Sees "Bearer tl-..." prefix
|
|
587
|
+
Looks up key hash in api_keys table
|
|
588
|
+
Sets request.state.user = { uid: "firebase-uid-of-key-owner" }
|
|
589
|
+
|
|
590
|
+
Step 4 — /chat endpoint on backend
|
|
591
|
+
Overrides user_id with the real Firebase UID from the key
|
|
592
|
+
Calls LLM (OpenAI or Ollama, from backend .env)
|
|
593
|
+
Counts tokens, computes cost in USD and INR
|
|
594
|
+
Returns HTTP 200 immediately with:
|
|
595
|
+
{ response: "Hello! ...", usage: {...}, cost: {...}, latency_ms: ... }
|
|
596
|
+
Schedules background task: _persist_query() → writes to query_analytics
|
|
597
|
+
|
|
598
|
+
Step 5 — SDK receives JSON, wraps in ChatCompletion object
|
|
599
|
+
|
|
600
|
+
Step 6 — SDK fires background POST http://localhost:8000/v1/log
|
|
601
|
+
(in a daemon thread — does not block your code)
|
|
602
|
+
This writes to api_usage table (SDK usage view in dashboard)
|
|
603
|
+
```
|
|
604
|
+
|
|
605
|
+
---
|
|
606
|
+
|
|
607
|
+
```python
|
|
608
|
+
print(response.choices[0].message.content)
|
|
609
|
+
```
|
|
610
|
+
The AI's reply text. Same path as the real OpenAI SDK.
|
|
611
|
+
|
|
612
|
+
---
|
|
613
|
+
|
|
614
|
+
```python
|
|
615
|
+
print(f"Tokens: {response.usage.total_tokens} | Cost: ${response.cost.usd:.8f}")
|
|
616
|
+
```
|
|
617
|
+
Token count and cost — returned directly by the TokenLens backend, not available from the native OpenAI SDK.
|
|
618
|
+
|
|
619
|
+
Example output:
|
|
620
|
+
```
|
|
621
|
+
Hello! How can I help you today?
|
|
622
|
+
Tokens: 192 | Cost: $0.00002880
|
|
623
|
+
```
|
|
624
|
+
|
|
625
|
+
---
|
|
626
|
+
|
|
627
|
+
## File Structure
|
|
628
|
+
|
|
629
|
+
```
|
|
630
|
+
sdk/
|
|
631
|
+
├── README.md ← this file
|
|
632
|
+
├── pyproject.toml ← package metadata & build config
|
|
633
|
+
├── tokenlens/
|
|
634
|
+
│ ├── __init__.py ← public exports: TokenLens, exceptions
|
|
635
|
+
│ ├── client.py ← TokenLens class (all methods)
|
|
636
|
+
│ ├── chat.py ← TokenLensChatClient (tl.chat())
|
|
637
|
+
│ ├── pricing.py ← compute_cost(), list_models(), pricing table
|
|
638
|
+
│ ├── exceptions.py ← TokenLensError, AuthError, LoggingError
|
|
639
|
+
│ └── wrappers/
|
|
640
|
+
│ ├── openai.py ← proxy for openai.OpenAI / AsyncOpenAI
|
|
641
|
+
│ └── anthropic.py ← proxy for anthropic.Anthropic / AsyncAnthropic
|
|
642
|
+
└── examples/
|
|
643
|
+
├── openai_example.py ← tl.openai() demo
|
|
644
|
+
├── anthropic_example.py ← tl.anthropic() demo
|
|
645
|
+
└── async_example.py ← async OpenAI demo
|
|
646
|
+
```
|
|
647
|
+
|
|
648
|
+
---
|
|
649
|
+
|
|
650
|
+
## Backend Endpoint Reference
|
|
651
|
+
|
|
652
|
+
The SDK communicates with two backend endpoints:
|
|
653
|
+
|
|
654
|
+
| Endpoint | Used by | What it does |
|
|
655
|
+
|---|---|---|
|
|
656
|
+
| `POST /chat` | `tl.chat()` | Full LLM call through backend, saves to `query_analytics` |
|
|
657
|
+
| `POST /v1/log` | `tl.log()`, `tl.openai()`, `tl.anthropic()`, `tl.chat()` | Saves token/cost data to `api_usage` |
|
|
658
|
+
|
|
659
|
+
Both require `Authorization: Bearer tl-<your-key>`.
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""
|
|
2
|
+
examples/anthropic_example.py — Anthropic + TokenLens SDK demo.
|
|
3
|
+
|
|
4
|
+
Prerequisites:
|
|
5
|
+
pip install tokenlens-sdk[anthropic]
|
|
6
|
+
Set ANTHROPIC_API_KEY and TOKENLENS_API_KEY in your environment.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import os
|
|
10
|
+
from anthropic import Anthropic
|
|
11
|
+
from tokenlens import TokenLens
|
|
12
|
+
|
|
13
|
+
TOKENLENS_API_KEY = os.environ["TOKENLENS_API_KEY"]
|
|
14
|
+
TOKENLENS_URL = os.getenv("TOKENLENS_URL", "http://localhost:8000")
|
|
15
|
+
|
|
16
|
+
tl = TokenLens(api_key=TOKENLENS_API_KEY, base_url=TOKENLENS_URL, application="anthropic-demo")
|
|
17
|
+
client = tl.wrap(Anthropic())
|
|
18
|
+
|
|
19
|
+
response = client.messages.create(
|
|
20
|
+
model="claude-3-5-haiku-20241022",
|
|
21
|
+
max_tokens=256,
|
|
22
|
+
messages=[{"role": "user", "content": "What is the capital of France?"}],
|
|
23
|
+
)
|
|
24
|
+
print(response.content[0].text)
|
|
25
|
+
# Token usage logged transparently.
|