tokenhelm 0.1.0rc1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tokenhelm-0.1.0rc1/LICENSE +21 -0
- tokenhelm-0.1.0rc1/PKG-INFO +260 -0
- tokenhelm-0.1.0rc1/README.md +216 -0
- tokenhelm-0.1.0rc1/pyproject.toml +73 -0
- tokenhelm-0.1.0rc1/setup.cfg +4 -0
- tokenhelm-0.1.0rc1/src/tokenhelm/__init__.py +82 -0
- tokenhelm-0.1.0rc1/src/tokenhelm/adapters/__init__.py +20 -0
- tokenhelm-0.1.0rc1/src/tokenhelm/adapters/anthropic.py +113 -0
- tokenhelm-0.1.0rc1/src/tokenhelm/adapters/base.py +73 -0
- tokenhelm-0.1.0rc1/src/tokenhelm/adapters/gemini.py +92 -0
- tokenhelm-0.1.0rc1/src/tokenhelm/adapters/ollama.py +69 -0
- tokenhelm-0.1.0rc1/src/tokenhelm/adapters/openai.py +81 -0
- tokenhelm-0.1.0rc1/src/tokenhelm/core/__init__.py +1 -0
- tokenhelm-0.1.0rc1/src/tokenhelm/core/calculator.py +61 -0
- tokenhelm-0.1.0rc1/src/tokenhelm/core/config.py +14 -0
- tokenhelm-0.1.0rc1/src/tokenhelm/core/errors.py +24 -0
- tokenhelm-0.1.0rc1/src/tokenhelm/core/extraction.py +48 -0
- tokenhelm-0.1.0rc1/src/tokenhelm/core/models.py +145 -0
- tokenhelm-0.1.0rc1/src/tokenhelm/core/tracker.py +153 -0
- tokenhelm-0.1.0rc1/src/tokenhelm/data/pricing.yaml +39 -0
- tokenhelm-0.1.0rc1/src/tokenhelm/dispatch/__init__.py +1 -0
- tokenhelm-0.1.0rc1/src/tokenhelm/dispatch/base.py +19 -0
- tokenhelm-0.1.0rc1/src/tokenhelm/dispatch/default.py +75 -0
- tokenhelm-0.1.0rc1/src/tokenhelm/logging/__init__.py +1 -0
- tokenhelm-0.1.0rc1/src/tokenhelm/logging/base.py +20 -0
- tokenhelm-0.1.0rc1/src/tokenhelm/logging/console.py +30 -0
- tokenhelm-0.1.0rc1/src/tokenhelm/logging/file.py +30 -0
- tokenhelm-0.1.0rc1/src/tokenhelm/logging/json.py +29 -0
- tokenhelm-0.1.0rc1/src/tokenhelm/pricing/__init__.py +1 -0
- tokenhelm-0.1.0rc1/src/tokenhelm/pricing/base.py +20 -0
- tokenhelm-0.1.0rc1/src/tokenhelm/pricing/yaml_provider.py +73 -0
- tokenhelm-0.1.0rc1/src/tokenhelm/py.typed +1 -0
- tokenhelm-0.1.0rc1/src/tokenhelm/sdk/__init__.py +1 -0
- tokenhelm-0.1.0rc1/src/tokenhelm/sdk/client.py +153 -0
- tokenhelm-0.1.0rc1/src/tokenhelm/sdk/context.py +205 -0
- tokenhelm-0.1.0rc1/src/tokenhelm/storage/__init__.py +1 -0
- tokenhelm-0.1.0rc1/src/tokenhelm/storage/base.py +25 -0
- tokenhelm-0.1.0rc1/src/tokenhelm/storage/memory.py +32 -0
- tokenhelm-0.1.0rc1/src/tokenhelm.egg-info/PKG-INFO +260 -0
- tokenhelm-0.1.0rc1/src/tokenhelm.egg-info/SOURCES.txt +41 -0
- tokenhelm-0.1.0rc1/src/tokenhelm.egg-info/dependency_links.txt +1 -0
- tokenhelm-0.1.0rc1/src/tokenhelm.egg-info/requires.txt +26 -0
- tokenhelm-0.1.0rc1/src/tokenhelm.egg-info/top_level.txt +1 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 TokenHelm contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: tokenhelm
|
|
3
|
+
Version: 0.1.0rc1
|
|
4
|
+
Summary: Lightweight, framework-agnostic token tracking and LLM cost calculation across providers.
|
|
5
|
+
Author: TokenHelm contributors
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/quadkeys/tokenhelm
|
|
8
|
+
Keywords: llm,tokens,cost,openai,anthropic,gemini,ollama,observability
|
|
9
|
+
Classifier: Development Status :: 4 - Beta
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Operating System :: OS Independent
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
17
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
18
|
+
Classifier: Topic :: System :: Monitoring
|
|
19
|
+
Classifier: Typing :: Typed
|
|
20
|
+
Requires-Python: >=3.11
|
|
21
|
+
Description-Content-Type: text/markdown
|
|
22
|
+
License-File: LICENSE
|
|
23
|
+
Requires-Dist: PyYAML>=6.0
|
|
24
|
+
Provides-Extra: openai
|
|
25
|
+
Requires-Dist: openai>=1.0; extra == "openai"
|
|
26
|
+
Provides-Extra: gemini
|
|
27
|
+
Requires-Dist: google-genai>=0.1; extra == "gemini"
|
|
28
|
+
Provides-Extra: anthropic
|
|
29
|
+
Requires-Dist: anthropic>=0.40; extra == "anthropic"
|
|
30
|
+
Provides-Extra: ollama
|
|
31
|
+
Requires-Dist: ollama>=0.1; extra == "ollama"
|
|
32
|
+
Provides-Extra: all
|
|
33
|
+
Requires-Dist: openai>=1.0; extra == "all"
|
|
34
|
+
Requires-Dist: google-genai>=0.1; extra == "all"
|
|
35
|
+
Requires-Dist: anthropic>=0.40; extra == "all"
|
|
36
|
+
Requires-Dist: ollama>=0.1; extra == "all"
|
|
37
|
+
Provides-Extra: dev
|
|
38
|
+
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
39
|
+
Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
|
|
40
|
+
Requires-Dist: pytest-cov>=5.0; extra == "dev"
|
|
41
|
+
Requires-Dist: ruff>=0.6; extra == "dev"
|
|
42
|
+
Requires-Dist: build>=1.0; extra == "dev"
|
|
43
|
+
Dynamic: license-file
|
|
44
|
+
|
|
45
|
+
# TokenHelm
|
|
46
|
+
|
|
47
|
+
**Lightweight, framework-agnostic token tracking and LLM cost calculation across providers.**
|
|
48
|
+
|
|
49
|
+
TokenHelm gives you one normalized usage/cost event for every LLM call — OpenAI, Gemini,
|
|
50
|
+
Anthropic, or Ollama — without locking you into any framework, patching any provider SDK, or
|
|
51
|
+
ever touching your credentials. It simply **observes** the response object your own client
|
|
52
|
+
already returns.
|
|
53
|
+
|
|
54
|
+
```python
|
|
55
|
+
from tokenhelm import TokenHelm
|
|
56
|
+
|
|
57
|
+
tracker = TokenHelm() # zero-config
|
|
58
|
+
response = client.chat.completions.create(...) # your own OpenAI call
|
|
59
|
+
event = tracker.track(response) # normalized LLMEvent
|
|
60
|
+
print(event.to_dict())
|
|
61
|
+
# {'provider': 'openai', 'model': 'gpt-4o', 'input_tokens': 1000,
|
|
62
|
+
# 'output_tokens': 500, 'total_tokens': 1500, 'latency': 0.0,
|
|
63
|
+
# 'cost': '0.00750', 'timestamp': '...', 'usage_complete': True,
|
|
64
|
+
# 'priced': True, 'currency': 'USD'}
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
---
|
|
68
|
+
|
|
69
|
+
## Installation
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
pip install tokenhelm # core (only dependency: PyYAML)
|
|
73
|
+
pip install "tokenhelm[openai]" # + OpenAI extras (for your own client)
|
|
74
|
+
pip install "tokenhelm[anthropic]" # + Anthropic
|
|
75
|
+
pip install "tokenhelm[gemini]" # + Google Gemini
|
|
76
|
+
pip install "tokenhelm[ollama]" # + Ollama
|
|
77
|
+
pip install "tokenhelm[all]" # all provider extras
|
|
78
|
+
pip install "tokenhelm[dev]" # test/lint toolchain
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
Requires **Python 3.11+**. The extras only pull in the provider SDKs *you* call — TokenHelm
|
|
82
|
+
itself never imports them to read a response.
|
|
83
|
+
|
|
84
|
+
---
|
|
85
|
+
|
|
86
|
+
## Quick Start
|
|
87
|
+
|
|
88
|
+
```python
|
|
89
|
+
from tokenhelm import TokenHelm
|
|
90
|
+
|
|
91
|
+
tracker = TokenHelm()
|
|
92
|
+
|
|
93
|
+
# 1. Manual tracking — track any completed response
|
|
94
|
+
event = tracker.track(response)
|
|
95
|
+
|
|
96
|
+
# 2. Scoped tracking — collect every event in a block
|
|
97
|
+
with tracker.trace() as scope:
|
|
98
|
+
response = client.chat.completions.create(...)
|
|
99
|
+
scope.track(response)
|
|
100
|
+
print(scope.events) # [LLMEvent(...)]
|
|
101
|
+
|
|
102
|
+
# 3. Choose where events go (any logger, callable, or storage)
|
|
103
|
+
from tokenhelm import ConsoleLogger
|
|
104
|
+
tracker = TokenHelm(logger=[ConsoleLogger(), lambda e: metrics.push(e.to_dict())])
|
|
105
|
+
|
|
106
|
+
# 4. Bring your own pricing (file, dict, or a full PricingProvider)
|
|
107
|
+
tracker = TokenHelm(pricing="my_rates.yaml")
|
|
108
|
+
tracker = TokenHelm(pricing={"openai": {"gpt-4o": {"input": 2.5, "output": 10.0}}})
|
|
109
|
+
|
|
110
|
+
# 5. Reconfigure later without rebuilding
|
|
111
|
+
tracker.configure(currency="EUR")
|
|
112
|
+
|
|
113
|
+
# 6. Streaming — exactly one event after the stream is exhausted
|
|
114
|
+
for chunk in tracker.track_stream(client.chat.completions.create(..., stream=True)):
|
|
115
|
+
... # consume chunks as usual
|
|
116
|
+
|
|
117
|
+
# 7. Async — same API with `async with` / `async for`
|
|
118
|
+
async with tracker.trace() as scope:
|
|
119
|
+
scope.track(await aclient.chat.completions.create(...))
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
Every tracked request yields the same normalized **`LLMEvent`** with the eight mandated
|
|
123
|
+
fields — `provider, model, input_tokens, output_tokens, total_tokens, latency, cost,
|
|
124
|
+
timestamp` — plus `usage_complete` / `priced` status flags. Consumers never see a
|
|
125
|
+
provider-specific usage object. Costs use `decimal.Decimal` (no float drift). Missing usage or
|
|
126
|
+
unknown pricing degrade gracefully via the flags — tracking never raises on missing data.
|
|
127
|
+
|
|
128
|
+
---
|
|
129
|
+
|
|
130
|
+
## Architecture
|
|
131
|
+
|
|
132
|
+
TokenHelm is built around five replaceable extension points; the core depends only on their
|
|
133
|
+
interfaces, never on a concrete implementation.
|
|
134
|
+
|
|
135
|
+
```
|
|
136
|
+
┌──────────────────────────────────────────────────────────┐
|
|
137
|
+
│ Your Application │
|
|
138
|
+
└───────────────────────────┬──────────────────────────────┘
|
|
139
|
+
│ track() / trace() / configure()
|
|
140
|
+
▼
|
|
141
|
+
┌────────────────────┐
|
|
142
|
+
│ TokenHelm │ (sdk: client + TraceScope)
|
|
143
|
+
└─────────┬──────────┘
|
|
144
|
+
▼
|
|
145
|
+
┌────────────────────┐
|
|
146
|
+
│ TokenTracker │ builds the normalized LLMEvent
|
|
147
|
+
└───┬────────────┬───┘
|
|
148
|
+
extract usage │ │ compute cost
|
|
149
|
+
▼ ▼
|
|
150
|
+
┌──────────────────┐ ┌──────────────────┐
|
|
151
|
+
│ BaseAdapter ① │ │ CostCalculator │
|
|
152
|
+
│ OpenAI/Gemini/ │ └────────┬─────────┘
|
|
153
|
+
│ Anthropic/Ollama │ ▼
|
|
154
|
+
└──────────────────┘ ┌──────────────────┐
|
|
155
|
+
│ PricingProvider ② │ (YAML default)
|
|
156
|
+
└──────────────────┘
|
|
157
|
+
│
|
|
158
|
+
▼ emit (tracker is unaware of sinks)
|
|
159
|
+
┌────────────────────┐
|
|
160
|
+
│ EventDispatcher ③ │
|
|
161
|
+
└───┬────────────┬───┘
|
|
162
|
+
▼ ▼
|
|
163
|
+
┌──────────────┐ ┌──────────────────┐
|
|
164
|
+
│ Logger ④ │ │ StorageBackend ⑤ │ (optional)
|
|
165
|
+
│ Console/... │ └──────────────────┘
|
|
166
|
+
└──────────────┘
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
**Extension points** (all public & stable — Constitution Principle VI):
|
|
170
|
+
|
|
171
|
+
| # | Interface | Default | Swap it to… |
|
|
172
|
+
|---|-----------|---------|-------------|
|
|
173
|
+
| ① | `BaseAdapter` | OpenAI, Gemini, Anthropic, Ollama | add a new provider |
|
|
174
|
+
| ② | `PricingProvider` | `YamlPricingProvider` | remote/dynamic pricing, AI FinOps |
|
|
175
|
+
| ③ | `EventDispatcher` | `DefaultEventDispatcher` | custom routing/batching/export |
|
|
176
|
+
| ④ | `Logger` | `ConsoleLogger` | JSON/file/metrics/dashboards |
|
|
177
|
+
| ⑤ | `StorageBackend` | none (opt-in) | in-memory/SQLite/warehouse/analytics |
|
|
178
|
+
|
|
179
|
+
**Dependency direction is strictly one-way** (no reverse dependencies):
|
|
180
|
+
|
|
181
|
+
```
|
|
182
|
+
Application → TokenHelm → TokenTracker → EventDispatcher → Logger / StorageBackend
|
|
183
|
+
└────────→ CostCalculator → PricingProvider
|
|
184
|
+
└────────→ UsageParser → BaseAdapter
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
`CostCalculator` depends *only* on `PricingProvider`; `TokenTracker` emits *only* through
|
|
188
|
+
`EventDispatcher`. Analytics, dashboards, and FinOps are downstream consumers of `LLMEvent`
|
|
189
|
+
behind these interfaces — they require no change to the core.
|
|
190
|
+
|
|
191
|
+
---
|
|
192
|
+
|
|
193
|
+
## Supported Providers
|
|
194
|
+
|
|
195
|
+
All four providers are supported, with streaming and async, in v0.1.0.
|
|
196
|
+
|
|
197
|
+
| Provider | Status | Usage fields read |
|
|
198
|
+
|----------|--------|-------------------|
|
|
199
|
+
| **OpenAI** | ✅ supported | `usage.prompt_tokens` / `completion_tokens` (Chat); `input_tokens` / `output_tokens` (Responses) |
|
|
200
|
+
| **Google Gemini** | ✅ supported | `usage_metadata.prompt_token_count` / `candidates_token_count` |
|
|
201
|
+
| **Anthropic** | ✅ supported | `usage.input_tokens` / `output_tokens` (+ cache token extras) |
|
|
202
|
+
| **Ollama** (local) | ✅ supported | `prompt_eval_count` / `eval_count` |
|
|
203
|
+
|
|
204
|
+
All providers normalize into the **same** `LLMEvent` schema — switching providers is a
|
|
205
|
+
configuration change, not a code change. Each adapter handles both completed responses and
|
|
206
|
+
streaming.
|
|
207
|
+
|
|
208
|
+
---
|
|
209
|
+
|
|
210
|
+
## Roadmap
|
|
211
|
+
|
|
212
|
+
**v0.1.0 — Core SDK ✅ (current)**
|
|
213
|
+
|
|
214
|
+
- [x] Track usage and cost across one provider (MVP): cost calculation, normalized event,
|
|
215
|
+
scoped `trace()`, console logging, graceful degradation.
|
|
216
|
+
- [x] Provider parity: OpenAI, Gemini, Anthropic, Ollama adapters; identical event shape.
|
|
217
|
+
- [x] Output choice: `JSONLogger`, `FileLogger`, `InMemoryStorageBackend`, full `configure()`
|
|
218
|
+
and multi-sink dispatch.
|
|
219
|
+
- [x] Streaming & async: `track_stream()` (one final event), async `trace()`.
|
|
220
|
+
- [x] Hardening: <5 ms / <20 MB budgets, thread/async isolation suite, docs, packaging.
|
|
221
|
+
|
|
222
|
+
**Beyond v0.1** — each tier is additive on the five extension points; the v0.1 core API does
|
|
223
|
+
not change. See [`ROADMAP.md`](ROADMAP.md).
|
|
224
|
+
|
|
225
|
+
- [ ] **v0.2 — Analytics SDK** (`SQLiteStorageBackend` + usage queries)
|
|
226
|
+
- [ ] **v0.3 — Prompt Intelligence** (per-prompt/template attribution)
|
|
227
|
+
- [ ] **v0.4 — RAG Intelligence** (retrieval-aware accounting)
|
|
228
|
+
- [ ] **v0.5 — AI FinOps** (budgets, alerts, remote pricing)
|
|
229
|
+
- [ ] **v1.0 — Enterprise Platform** (stabilize the v0.x surface; dashboard, plugins)
|
|
230
|
+
|
|
231
|
+
---
|
|
232
|
+
|
|
233
|
+
## Design principles
|
|
234
|
+
|
|
235
|
+
Framework-agnostic · provider-independent · zero vendor lock-in · <5 ms overhead ·
|
|
236
|
+
observe-don't-patch · one standardized event · everything replaceable.
|
|
237
|
+
|
|
238
|
+
See `specs/001-core-sdk/` for the constitution, spec, plan, data model, and public API
|
|
239
|
+
contract.
|
|
240
|
+
|
|
241
|
+
## Release Process
|
|
242
|
+
|
|
243
|
+
Releases follow a documented, automated procedure (Conventional Commits → release-please →
|
|
244
|
+
Trusted Publishing on PyPI via OIDC). The canonical, end-to-end release procedure is the
|
|
245
|
+
**[Go-Live & Release checklist](docs/go-live-checklist.md)** — follow it for every release.
|
|
246
|
+
|
|
247
|
+
Supporting docs:
|
|
248
|
+
|
|
249
|
+
- [`docs/releasing.md`](docs/releasing.md) — how publishing works (TestPyPI → PyPI, OIDC).
|
|
250
|
+
- [`docs/repository-setup.md`](docs/repository-setup.md) — branch protection, required checks,
|
|
251
|
+
Dependabot, security features.
|
|
252
|
+
- [`docs/release-checklist.md`](docs/release-checklist.md) — per-version quality gates.
|
|
253
|
+
|
|
254
|
+
Contributors: see [`CONTRIBUTING.md`](CONTRIBUTING.md) for the dev workflow, versioning, and
|
|
255
|
+
deprecation policy.
|
|
256
|
+
|
|
257
|
+
## License
|
|
258
|
+
|
|
259
|
+
MIT
|
|
260
|
+
|
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
# TokenHelm
|
|
2
|
+
|
|
3
|
+
**Lightweight, framework-agnostic token tracking and LLM cost calculation across providers.**
|
|
4
|
+
|
|
5
|
+
TokenHelm gives you one normalized usage/cost event for every LLM call — OpenAI, Gemini,
|
|
6
|
+
Anthropic, or Ollama — without locking you into any framework, patching any provider SDK, or
|
|
7
|
+
ever touching your credentials. It simply **observes** the response object your own client
|
|
8
|
+
already returns.
|
|
9
|
+
|
|
10
|
+
```python
|
|
11
|
+
from tokenhelm import TokenHelm
|
|
12
|
+
|
|
13
|
+
tracker = TokenHelm() # zero-config
|
|
14
|
+
response = client.chat.completions.create(...) # your own OpenAI call
|
|
15
|
+
event = tracker.track(response) # normalized LLMEvent
|
|
16
|
+
print(event.to_dict())
|
|
17
|
+
# {'provider': 'openai', 'model': 'gpt-4o', 'input_tokens': 1000,
|
|
18
|
+
# 'output_tokens': 500, 'total_tokens': 1500, 'latency': 0.0,
|
|
19
|
+
# 'cost': '0.00750', 'timestamp': '...', 'usage_complete': True,
|
|
20
|
+
# 'priced': True, 'currency': 'USD'}
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
---
|
|
24
|
+
|
|
25
|
+
## Installation
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
pip install tokenhelm # core (only dependency: PyYAML)
|
|
29
|
+
pip install "tokenhelm[openai]" # + OpenAI extras (for your own client)
|
|
30
|
+
pip install "tokenhelm[anthropic]" # + Anthropic
|
|
31
|
+
pip install "tokenhelm[gemini]" # + Google Gemini
|
|
32
|
+
pip install "tokenhelm[ollama]" # + Ollama
|
|
33
|
+
pip install "tokenhelm[all]" # all provider extras
|
|
34
|
+
pip install "tokenhelm[dev]" # test/lint toolchain
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
Requires **Python 3.11+**. The extras only pull in the provider SDKs *you* call — TokenHelm
|
|
38
|
+
itself never imports them to read a response.
|
|
39
|
+
|
|
40
|
+
---
|
|
41
|
+
|
|
42
|
+
## Quick Start
|
|
43
|
+
|
|
44
|
+
```python
|
|
45
|
+
from tokenhelm import TokenHelm
|
|
46
|
+
|
|
47
|
+
tracker = TokenHelm()
|
|
48
|
+
|
|
49
|
+
# 1. Manual tracking — track any completed response
|
|
50
|
+
event = tracker.track(response)
|
|
51
|
+
|
|
52
|
+
# 2. Scoped tracking — collect every event in a block
|
|
53
|
+
with tracker.trace() as scope:
|
|
54
|
+
response = client.chat.completions.create(...)
|
|
55
|
+
scope.track(response)
|
|
56
|
+
print(scope.events) # [LLMEvent(...)]
|
|
57
|
+
|
|
58
|
+
# 3. Choose where events go (any logger, callable, or storage)
|
|
59
|
+
from tokenhelm import ConsoleLogger
|
|
60
|
+
tracker = TokenHelm(logger=[ConsoleLogger(), lambda e: metrics.push(e.to_dict())])
|
|
61
|
+
|
|
62
|
+
# 4. Bring your own pricing (file, dict, or a full PricingProvider)
|
|
63
|
+
tracker = TokenHelm(pricing="my_rates.yaml")
|
|
64
|
+
tracker = TokenHelm(pricing={"openai": {"gpt-4o": {"input": 2.5, "output": 10.0}}})
|
|
65
|
+
|
|
66
|
+
# 5. Reconfigure later without rebuilding
|
|
67
|
+
tracker.configure(currency="EUR")
|
|
68
|
+
|
|
69
|
+
# 6. Streaming — exactly one event after the stream is exhausted
|
|
70
|
+
for chunk in tracker.track_stream(client.chat.completions.create(..., stream=True)):
|
|
71
|
+
... # consume chunks as usual
|
|
72
|
+
|
|
73
|
+
# 7. Async — same API with `async with` / `async for`
|
|
74
|
+
async with tracker.trace() as scope:
|
|
75
|
+
scope.track(await aclient.chat.completions.create(...))
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
Every tracked request yields the same normalized **`LLMEvent`** with the eight mandated
|
|
79
|
+
fields — `provider, model, input_tokens, output_tokens, total_tokens, latency, cost,
|
|
80
|
+
timestamp` — plus `usage_complete` / `priced` status flags. Consumers never see a
|
|
81
|
+
provider-specific usage object. Costs use `decimal.Decimal` (no float drift). Missing usage or
|
|
82
|
+
unknown pricing degrade gracefully via the flags — tracking never raises on missing data.
|
|
83
|
+
|
|
84
|
+
---
|
|
85
|
+
|
|
86
|
+
## Architecture
|
|
87
|
+
|
|
88
|
+
TokenHelm is built around five replaceable extension points; the core depends only on their
|
|
89
|
+
interfaces, never on a concrete implementation.
|
|
90
|
+
|
|
91
|
+
```
|
|
92
|
+
┌──────────────────────────────────────────────────────────┐
|
|
93
|
+
│ Your Application │
|
|
94
|
+
└───────────────────────────┬──────────────────────────────┘
|
|
95
|
+
│ track() / trace() / configure()
|
|
96
|
+
▼
|
|
97
|
+
┌────────────────────┐
|
|
98
|
+
│ TokenHelm │ (sdk: client + TraceScope)
|
|
99
|
+
└─────────┬──────────┘
|
|
100
|
+
▼
|
|
101
|
+
┌────────────────────┐
|
|
102
|
+
│ TokenTracker │ builds the normalized LLMEvent
|
|
103
|
+
└───┬────────────┬───┘
|
|
104
|
+
extract usage │ │ compute cost
|
|
105
|
+
▼ ▼
|
|
106
|
+
┌──────────────────┐ ┌──────────────────┐
|
|
107
|
+
│ BaseAdapter ① │ │ CostCalculator │
|
|
108
|
+
│ OpenAI/Gemini/ │ └────────┬─────────┘
|
|
109
|
+
│ Anthropic/Ollama │ ▼
|
|
110
|
+
└──────────────────┘ ┌──────────────────┐
|
|
111
|
+
│ PricingProvider ② │ (YAML default)
|
|
112
|
+
└──────────────────┘
|
|
113
|
+
│
|
|
114
|
+
▼ emit (tracker is unaware of sinks)
|
|
115
|
+
┌────────────────────┐
|
|
116
|
+
│ EventDispatcher ③ │
|
|
117
|
+
└───┬────────────┬───┘
|
|
118
|
+
▼ ▼
|
|
119
|
+
┌──────────────┐ ┌──────────────────┐
|
|
120
|
+
│ Logger ④ │ │ StorageBackend ⑤ │ (optional)
|
|
121
|
+
│ Console/... │ └──────────────────┘
|
|
122
|
+
└──────────────┘
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
**Extension points** (all public & stable — Constitution Principle VI):
|
|
126
|
+
|
|
127
|
+
| # | Interface | Default | Swap it to… |
|
|
128
|
+
|---|-----------|---------|-------------|
|
|
129
|
+
| ① | `BaseAdapter` | OpenAI, Gemini, Anthropic, Ollama | add a new provider |
|
|
130
|
+
| ② | `PricingProvider` | `YamlPricingProvider` | remote/dynamic pricing, AI FinOps |
|
|
131
|
+
| ③ | `EventDispatcher` | `DefaultEventDispatcher` | custom routing/batching/export |
|
|
132
|
+
| ④ | `Logger` | `ConsoleLogger` | JSON/file/metrics/dashboards |
|
|
133
|
+
| ⑤ | `StorageBackend` | none (opt-in) | in-memory/SQLite/warehouse/analytics |
|
|
134
|
+
|
|
135
|
+
**Dependency direction is strictly one-way** (no reverse dependencies):
|
|
136
|
+
|
|
137
|
+
```
|
|
138
|
+
Application → TokenHelm → TokenTracker → EventDispatcher → Logger / StorageBackend
|
|
139
|
+
└────────→ CostCalculator → PricingProvider
|
|
140
|
+
└────────→ UsageParser → BaseAdapter
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
`CostCalculator` depends *only* on `PricingProvider`; `TokenTracker` emits *only* through
|
|
144
|
+
`EventDispatcher`. Analytics, dashboards, and FinOps are downstream consumers of `LLMEvent`
|
|
145
|
+
behind these interfaces — they require no change to the core.
|
|
146
|
+
|
|
147
|
+
---
|
|
148
|
+
|
|
149
|
+
## Supported Providers
|
|
150
|
+
|
|
151
|
+
All four providers are supported, with streaming and async, in v0.1.0.
|
|
152
|
+
|
|
153
|
+
| Provider | Status | Usage fields read |
|
|
154
|
+
|----------|--------|-------------------|
|
|
155
|
+
| **OpenAI** | ✅ supported | `usage.prompt_tokens` / `completion_tokens` (Chat); `input_tokens` / `output_tokens` (Responses) |
|
|
156
|
+
| **Google Gemini** | ✅ supported | `usage_metadata.prompt_token_count` / `candidates_token_count` |
|
|
157
|
+
| **Anthropic** | ✅ supported | `usage.input_tokens` / `output_tokens` (+ cache token extras) |
|
|
158
|
+
| **Ollama** (local) | ✅ supported | `prompt_eval_count` / `eval_count` |
|
|
159
|
+
|
|
160
|
+
All providers normalize into the **same** `LLMEvent` schema — switching providers is a
|
|
161
|
+
configuration change, not a code change. Each adapter handles both completed responses and
|
|
162
|
+
streaming.
|
|
163
|
+
|
|
164
|
+
---
|
|
165
|
+
|
|
166
|
+
## Roadmap
|
|
167
|
+
|
|
168
|
+
**v0.1.0 — Core SDK ✅ (current)**
|
|
169
|
+
|
|
170
|
+
- [x] Track usage and cost across one provider (MVP): cost calculation, normalized event,
|
|
171
|
+
scoped `trace()`, console logging, graceful degradation.
|
|
172
|
+
- [x] Provider parity: OpenAI, Gemini, Anthropic, Ollama adapters; identical event shape.
|
|
173
|
+
- [x] Output choice: `JSONLogger`, `FileLogger`, `InMemoryStorageBackend`, full `configure()`
|
|
174
|
+
and multi-sink dispatch.
|
|
175
|
+
- [x] Streaming & async: `track_stream()` (one final event), async `trace()`.
|
|
176
|
+
- [x] Hardening: <5 ms / <20 MB budgets, thread/async isolation suite, docs, packaging.
|
|
177
|
+
|
|
178
|
+
**Beyond v0.1** — each tier is additive on the five extension points; the v0.1 core API does
|
|
179
|
+
not change. See [`ROADMAP.md`](ROADMAP.md).
|
|
180
|
+
|
|
181
|
+
- [ ] **v0.2 — Analytics SDK** (`SQLiteStorageBackend` + usage queries)
|
|
182
|
+
- [ ] **v0.3 — Prompt Intelligence** (per-prompt/template attribution)
|
|
183
|
+
- [ ] **v0.4 — RAG Intelligence** (retrieval-aware accounting)
|
|
184
|
+
- [ ] **v0.5 — AI FinOps** (budgets, alerts, remote pricing)
|
|
185
|
+
- [ ] **v1.0 — Enterprise Platform** (stabilize the v0.x surface; dashboard, plugins)
|
|
186
|
+
|
|
187
|
+
---
|
|
188
|
+
|
|
189
|
+
## Design principles
|
|
190
|
+
|
|
191
|
+
Framework-agnostic · provider-independent · zero vendor lock-in · <5 ms overhead ·
|
|
192
|
+
observe-don't-patch · one standardized event · everything replaceable.
|
|
193
|
+
|
|
194
|
+
See `specs/001-core-sdk/` for the constitution, spec, plan, data model, and public API
|
|
195
|
+
contract.
|
|
196
|
+
|
|
197
|
+
## Release Process
|
|
198
|
+
|
|
199
|
+
Releases follow a documented, automated procedure (Conventional Commits → release-please →
|
|
200
|
+
Trusted Publishing on PyPI via OIDC). The canonical, end-to-end release procedure is the
|
|
201
|
+
**[Go-Live & Release checklist](docs/go-live-checklist.md)** — follow it for every release.
|
|
202
|
+
|
|
203
|
+
Supporting docs:
|
|
204
|
+
|
|
205
|
+
- [`docs/releasing.md`](docs/releasing.md) — how publishing works (TestPyPI → PyPI, OIDC).
|
|
206
|
+
- [`docs/repository-setup.md`](docs/repository-setup.md) — branch protection, required checks,
|
|
207
|
+
Dependabot, security features.
|
|
208
|
+
- [`docs/release-checklist.md`](docs/release-checklist.md) — per-version quality gates.
|
|
209
|
+
|
|
210
|
+
Contributors: see [`CONTRIBUTING.md`](CONTRIBUTING.md) for the dev workflow, versioning, and
|
|
211
|
+
deprecation policy.
|
|
212
|
+
|
|
213
|
+
## License
|
|
214
|
+
|
|
215
|
+
MIT
|
|
216
|
+
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "tokenhelm"
|
|
7
|
+
version = "0.1.0rc1"
|
|
8
|
+
description = "Lightweight, framework-agnostic token tracking and LLM cost calculation across providers."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.11"
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
authors = [{ name = "TokenHelm contributors" }]
|
|
13
|
+
keywords = ["llm", "tokens", "cost", "openai", "anthropic", "gemini", "ollama", "observability"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 4 - Beta",
|
|
16
|
+
"Intended Audience :: Developers",
|
|
17
|
+
"License :: OSI Approved :: MIT License",
|
|
18
|
+
"Operating System :: OS Independent",
|
|
19
|
+
"Programming Language :: Python :: 3",
|
|
20
|
+
"Programming Language :: Python :: 3.11",
|
|
21
|
+
"Programming Language :: Python :: 3.12",
|
|
22
|
+
"Programming Language :: Python :: 3.13",
|
|
23
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
24
|
+
"Topic :: System :: Monitoring",
|
|
25
|
+
"Typing :: Typed",
|
|
26
|
+
]
|
|
27
|
+
dependencies = ["PyYAML>=6.0"]
|
|
28
|
+
|
|
29
|
+
[project.optional-dependencies]
|
|
30
|
+
openai = ["openai>=1.0"]
|
|
31
|
+
gemini = ["google-genai>=0.1"]
|
|
32
|
+
anthropic = ["anthropic>=0.40"]
|
|
33
|
+
ollama = ["ollama>=0.1"]
|
|
34
|
+
all = ["openai>=1.0", "google-genai>=0.1", "anthropic>=0.40", "ollama>=0.1"]
|
|
35
|
+
dev = ["pytest>=8.0", "pytest-asyncio>=0.23", "pytest-cov>=5.0", "ruff>=0.6", "build>=1.0"]
|
|
36
|
+
|
|
37
|
+
[project.urls]
|
|
38
|
+
Homepage = "https://github.com/quadkeys/tokenhelm"
|
|
39
|
+
|
|
40
|
+
[tool.setuptools.packages.find]
|
|
41
|
+
where = ["src"]
|
|
42
|
+
|
|
43
|
+
[tool.setuptools.package-data]
|
|
44
|
+
tokenhelm = ["data/*.yaml", "py.typed"]
|
|
45
|
+
|
|
46
|
+
[tool.ruff]
|
|
47
|
+
line-length = 100
|
|
48
|
+
target-version = "py311"
|
|
49
|
+
|
|
50
|
+
[tool.ruff.lint]
|
|
51
|
+
select = ["E", "F", "I", "UP", "B"]
|
|
52
|
+
# UP042: LLMProvider deliberately subclasses (str, Enum) with an explicit __str__ for broad
|
|
53
|
+
# compatibility and stable value semantics. Migrating to enum.StrEnum is a deferrable cleanup,
|
|
54
|
+
# not done right before the v0.1 release (it changes a public type's base class).
|
|
55
|
+
ignore = ["UP042"]
|
|
56
|
+
|
|
57
|
+
[tool.pytest.ini_options]
|
|
58
|
+
testpaths = ["tests"]
|
|
59
|
+
asyncio_mode = "auto"
|
|
60
|
+
addopts = "--cov=tokenhelm --cov-report=term-missing --cov-fail-under=90"
|
|
61
|
+
markers = ["benchmark: performance/memory budget checks (deselect with -m 'not benchmark')"]
|
|
62
|
+
|
|
63
|
+
[tool.coverage.run]
|
|
64
|
+
source = ["tokenhelm"]
|
|
65
|
+
branch = true
|
|
66
|
+
|
|
67
|
+
[tool.commitizen]
|
|
68
|
+
name = "cz_conventional_commits"
|
|
69
|
+
tag_format = "v$version"
|
|
70
|
+
version_scheme = "semver"
|
|
71
|
+
version_provider = "pep621"
|
|
72
|
+
update_changelog_on_bump = false # CHANGELOG is owned by release-please in CI
|
|
73
|
+
major_version_zero = true
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
"""TokenHelm — lightweight, framework-agnostic LLM token tracking and cost calculation.
|
|
2
|
+
|
|
3
|
+
Public surface per ``specs/001-core-sdk/contracts/public-api.md``. Names exported here are the
|
|
4
|
+
v0.x stable contract (Principle X). This slice (User Story 1) ships the OpenAI vertical;
|
|
5
|
+
additional providers, loggers, storage, and streaming land in later phases.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
# Errors
|
|
11
|
+
# Extension-point interfaces (Principle VI)
|
|
12
|
+
from .adapters.anthropic import AnthropicAdapter
|
|
13
|
+
from .adapters.base import BaseAdapter, StreamAggregator
|
|
14
|
+
from .adapters.gemini import GeminiAdapter
|
|
15
|
+
from .adapters.ollama import OllamaAdapter
|
|
16
|
+
from .adapters.openai import OpenAIAdapter
|
|
17
|
+
from .core.calculator import CostCalculator
|
|
18
|
+
from .core.errors import ProviderNotInstalledError, TokenHelmError
|
|
19
|
+
|
|
20
|
+
# Core data model
|
|
21
|
+
from .core.models import (
|
|
22
|
+
LLMCost,
|
|
23
|
+
LLMEvent,
|
|
24
|
+
LLMProvider,
|
|
25
|
+
LLMRequest,
|
|
26
|
+
LLMUsage,
|
|
27
|
+
RateEntry,
|
|
28
|
+
)
|
|
29
|
+
from .dispatch.base import EventDispatcher
|
|
30
|
+
from .dispatch.default import DefaultEventDispatcher
|
|
31
|
+
from .logging.base import Logger
|
|
32
|
+
from .logging.console import ConsoleLogger
|
|
33
|
+
from .logging.file import FileLogger
|
|
34
|
+
from .logging.json import JSONLogger
|
|
35
|
+
from .pricing.base import PricingProvider
|
|
36
|
+
from .pricing.yaml_provider import YamlPricingProvider
|
|
37
|
+
|
|
38
|
+
# Client + scope
|
|
39
|
+
from .sdk.client import TokenHelm
|
|
40
|
+
from .sdk.context import StreamSession, TraceScope
|
|
41
|
+
from .storage.base import StorageBackend
|
|
42
|
+
from .storage.memory import InMemoryStorageBackend
|
|
43
|
+
|
|
44
|
+
__version__ = "0.1.0rc1" # x-release-please-version
|
|
45
|
+
|
|
46
|
+
__all__ = [
|
|
47
|
+
"__version__",
|
|
48
|
+
# client
|
|
49
|
+
"TokenHelm",
|
|
50
|
+
"TraceScope",
|
|
51
|
+
"StreamSession",
|
|
52
|
+
# event + enum
|
|
53
|
+
"LLMEvent",
|
|
54
|
+
"LLMProvider",
|
|
55
|
+
"LLMUsage",
|
|
56
|
+
"LLMCost",
|
|
57
|
+
"LLMRequest",
|
|
58
|
+
"RateEntry",
|
|
59
|
+
# interfaces
|
|
60
|
+
"BaseAdapter",
|
|
61
|
+
"StreamAggregator",
|
|
62
|
+
"PricingProvider",
|
|
63
|
+
"EventDispatcher",
|
|
64
|
+
"Logger",
|
|
65
|
+
"StorageBackend",
|
|
66
|
+
# built-in adapters
|
|
67
|
+
"OpenAIAdapter",
|
|
68
|
+
"AnthropicAdapter",
|
|
69
|
+
"GeminiAdapter",
|
|
70
|
+
"OllamaAdapter",
|
|
71
|
+
# default implementations
|
|
72
|
+
"YamlPricingProvider",
|
|
73
|
+
"DefaultEventDispatcher",
|
|
74
|
+
"ConsoleLogger",
|
|
75
|
+
"JSONLogger",
|
|
76
|
+
"FileLogger",
|
|
77
|
+
"InMemoryStorageBackend",
|
|
78
|
+
"CostCalculator",
|
|
79
|
+
# errors
|
|
80
|
+
"TokenHelmError",
|
|
81
|
+
"ProviderNotInstalledError",
|
|
82
|
+
]
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
"""Provider adapters (extension point #1).
|
|
2
|
+
|
|
3
|
+
``default_adapters()`` is the built-in adapter set the client registers by default. It lives
|
|
4
|
+
here (not in ``core``) so the core stays decoupled from concrete adapter implementations —
|
|
5
|
+
``core.extraction.UsageParser`` depends only on :class:`BaseAdapter`.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from .base import BaseAdapter
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def default_adapters() -> list[BaseAdapter]:
|
|
14
|
+
"""Built-in adapters, in resolution order (US1 ships OpenAI; US2 adds the rest)."""
|
|
15
|
+
from .anthropic import AnthropicAdapter
|
|
16
|
+
from .gemini import GeminiAdapter
|
|
17
|
+
from .ollama import OllamaAdapter
|
|
18
|
+
from .openai import OpenAIAdapter
|
|
19
|
+
|
|
20
|
+
return [OpenAIAdapter(), AnthropicAdapter(), GeminiAdapter(), OllamaAdapter()]
|