tokenator 0.1.15__py3-none-any.whl → 0.1.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tokenator/usage.py +454 -378
- {tokenator-0.1.15.dist-info → tokenator-0.1.16.dist-info}/METADATA +9 -4
- {tokenator-0.1.15.dist-info → tokenator-0.1.16.dist-info}/RECORD +5 -5
- {tokenator-0.1.15.dist-info → tokenator-0.1.16.dist-info}/LICENSE +0 -0
- {tokenator-0.1.15.dist-info → tokenator-0.1.16.dist-info}/WHEEL +0 -0
tokenator/usage.py
CHANGED
@@ -16,241 +16,324 @@ from . import state
|
|
16
16
|
|
17
17
|
import requests
|
18
18
|
import logging
|
19
|
+
import time
|
19
20
|
|
20
21
|
logger = logging.getLogger(__name__)
|
21
22
|
|
22
23
|
|
23
24
|
class TokenUsageService:
|
24
25
|
def __init__(self):
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
26
|
+
try:
|
27
|
+
if not state.is_tokenator_enabled:
|
28
|
+
logger.info("Tokenator is disabled. Database access is unavailable.")
|
29
|
+
self.MODEL_COSTS = self._get_model_costs()
|
30
|
+
except Exception as e:
|
31
|
+
logger.error(f"Error in __init__: {e}")
|
32
|
+
self.MODEL_COSTS = {}
|
29
33
|
|
30
34
|
def _get_model_costs(self) -> Dict[str, TokenRate]:
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
35
|
+
try:
|
36
|
+
if not state.is_tokenator_enabled:
|
37
|
+
return {}
|
38
|
+
url = "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json"
|
39
|
+
response = requests.get(url)
|
40
|
+
response.raise_for_status()
|
41
|
+
data = response.json()
|
42
|
+
|
43
|
+
model_costs = {}
|
44
|
+
for model, info in data.items():
|
45
|
+
if (
|
46
|
+
"input_cost_per_token" not in info
|
47
|
+
or "output_cost_per_token" not in info
|
48
|
+
):
|
49
|
+
continue
|
50
|
+
|
51
|
+
rate = TokenRate(
|
52
|
+
prompt=info["input_cost_per_token"],
|
53
|
+
completion=info["output_cost_per_token"],
|
54
|
+
prompt_audio=info.get("input_cost_per_audio_token"),
|
55
|
+
completion_audio=info.get("output_cost_per_audio_token"),
|
56
|
+
prompt_cached_input=info.get("cache_read_input_token_cost") or 0,
|
57
|
+
prompt_cached_creation=info.get("cache_read_creation_token_cost") or 0,
|
58
|
+
)
|
59
|
+
model_costs[model] = rate
|
54
60
|
|
55
|
-
|
61
|
+
return model_costs
|
62
|
+
except Exception as e:
|
63
|
+
logger.error(f"Error in _get_model_costs: {e}")
|
64
|
+
return {}
|
56
65
|
|
57
66
|
def _calculate_cost(
|
58
67
|
self, usages: list[TokenUsage], provider: Optional[str] = None
|
59
68
|
) -> TokenUsageReport:
|
60
|
-
|
61
|
-
|
62
|
-
|
69
|
+
try:
|
70
|
+
if not state.is_tokenator_enabled:
|
71
|
+
logger.warning("Tokenator is disabled. Skipping cost calculation.")
|
72
|
+
return TokenUsageReport()
|
73
|
+
|
74
|
+
if not self.MODEL_COSTS:
|
75
|
+
logger.warning("No model costs available.")
|
76
|
+
return TokenUsageReport()
|
77
|
+
|
78
|
+
# Default GPT4O pricing updated with provided values
|
79
|
+
GPT4O_PRICING = TokenRate(
|
80
|
+
prompt=0.0000025,
|
81
|
+
completion=0.000010,
|
82
|
+
prompt_audio=0.0001,
|
83
|
+
completion_audio=0.0002,
|
84
|
+
prompt_cached_input=0.00000125,
|
85
|
+
prompt_cached_creation=0.00000125,
|
86
|
+
)
|
63
87
|
|
64
|
-
|
65
|
-
logger.
|
66
|
-
return TokenUsageReport()
|
88
|
+
provider_model_usages: Dict[str, Dict[str, list[TokenUsage]]] = {}
|
89
|
+
logger.debug(f"usages: {len(usages)}")
|
67
90
|
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
prompt_cached_creation=0.00000125,
|
76
|
-
)
|
77
|
-
|
78
|
-
provider_model_usages: Dict[str, Dict[str, list[TokenUsage]]] = {}
|
79
|
-
logger.debug(f"usages: {len(usages)}")
|
80
|
-
|
81
|
-
for usage in usages:
|
82
|
-
# Model key resolution logic (unchanged)
|
83
|
-
model_key = usage.model
|
84
|
-
if model_key in self.MODEL_COSTS:
|
85
|
-
pass
|
86
|
-
elif f"{usage.provider}/{usage.model}" in self.MODEL_COSTS:
|
87
|
-
model_key = f"{usage.provider}/{usage.model}"
|
88
|
-
else:
|
89
|
-
matched_keys = [k for k in self.MODEL_COSTS.keys() if usage.model in k]
|
90
|
-
if matched_keys:
|
91
|
-
model_key = matched_keys[0]
|
92
|
-
logger.warning(
|
93
|
-
f"Model {usage.model} matched with {model_key} in pricing data via contains search"
|
94
|
-
)
|
91
|
+
for usage in usages:
|
92
|
+
# Model key resolution logic (unchanged)
|
93
|
+
model_key = usage.model
|
94
|
+
if model_key in self.MODEL_COSTS:
|
95
|
+
pass
|
96
|
+
elif f"{usage.provider}/{usage.model}" in self.MODEL_COSTS:
|
97
|
+
model_key = f"{usage.provider}/{usage.model}"
|
95
98
|
else:
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
provider_metrics = {
|
99
|
+
matched_keys = [k for k in self.MODEL_COSTS.keys() if usage.model in k]
|
100
|
+
if matched_keys:
|
101
|
+
model_key = matched_keys[0]
|
102
|
+
logger.warning(
|
103
|
+
f"Model {usage.model} matched with {model_key} in pricing data via contains search"
|
104
|
+
)
|
105
|
+
else:
|
106
|
+
logger.warning(
|
107
|
+
f"Model {model_key} not found in pricing data. Using gpt-4o pricing as fallback"
|
108
|
+
)
|
109
|
+
self.MODEL_COSTS[model_key] = GPT4O_PRICING
|
110
|
+
|
111
|
+
provider_key = usage.provider or "default"
|
112
|
+
provider_model_usages.setdefault(provider_key, {}).setdefault(
|
113
|
+
model_key, []
|
114
|
+
).append(usage)
|
115
|
+
|
116
|
+
# Calculate totals for each level
|
117
|
+
providers_list = []
|
118
|
+
total_metrics = {
|
117
119
|
"total_cost": 0.0,
|
118
120
|
"total_tokens": 0,
|
119
121
|
"prompt_tokens": 0,
|
120
122
|
"completion_tokens": 0,
|
121
|
-
"prompt_cached_input_tokens": 0,
|
122
|
-
"prompt_cached_creation_tokens": 0,
|
123
|
-
"prompt_audio_tokens": 0,
|
124
|
-
"completion_audio_tokens": 0,
|
125
|
-
"completion_reasoning_tokens": 0,
|
126
|
-
"completion_accepted_prediction_tokens": 0,
|
127
|
-
"completion_rejected_prediction_tokens": 0,
|
128
123
|
}
|
129
|
-
models_list = []
|
130
|
-
|
131
|
-
for model_key, usages in model_usages.items():
|
132
|
-
model_rates = self.MODEL_COSTS[model_key]
|
133
|
-
model_cost = 0.0
|
134
|
-
model_total = 0
|
135
|
-
model_prompt = 0
|
136
|
-
model_completion = 0
|
137
|
-
|
138
|
-
for usage in usages:
|
139
|
-
# Base token costs
|
140
|
-
prompt_text_tokens = usage.prompt_tokens
|
141
|
-
if usage.prompt_cached_input_tokens:
|
142
|
-
prompt_text_tokens = (
|
143
|
-
usage.prompt_tokens - usage.prompt_cached_input_tokens
|
144
|
-
)
|
145
|
-
if usage.prompt_audio_tokens:
|
146
|
-
prompt_text_tokens = (
|
147
|
-
usage.prompt_tokens - usage.prompt_audio_tokens
|
148
|
-
)
|
149
|
-
|
150
|
-
completion_text_tokens = usage.completion_tokens
|
151
|
-
if usage.completion_audio_tokens:
|
152
|
-
completion_text_tokens = (
|
153
|
-
usage.completion_tokens - usage.completion_audio_tokens
|
154
|
-
)
|
155
|
-
|
156
|
-
prompt_cost = prompt_text_tokens * model_rates.prompt
|
157
|
-
completion_cost = completion_text_tokens * model_rates.completion
|
158
|
-
model_cost += prompt_cost + completion_cost
|
159
124
|
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
125
|
+
for provider, model_usages in provider_model_usages.items():
|
126
|
+
provider_metrics = {
|
127
|
+
"total_cost": 0.0,
|
128
|
+
"total_tokens": 0,
|
129
|
+
"prompt_tokens": 0,
|
130
|
+
"completion_tokens": 0,
|
131
|
+
"prompt_cached_input_tokens": 0,
|
132
|
+
"prompt_cached_creation_tokens": 0,
|
133
|
+
"prompt_audio_tokens": 0,
|
134
|
+
"completion_audio_tokens": 0,
|
135
|
+
"completion_reasoning_tokens": 0,
|
136
|
+
"completion_accepted_prediction_tokens": 0,
|
137
|
+
"completion_rejected_prediction_tokens": 0,
|
138
|
+
}
|
139
|
+
models_list = []
|
140
|
+
|
141
|
+
for model_key, usages in model_usages.items():
|
142
|
+
model_rates = self.MODEL_COSTS[model_key]
|
143
|
+
model_cost = 0.0
|
144
|
+
model_total = 0
|
145
|
+
model_prompt = 0
|
146
|
+
model_completion = 0
|
147
|
+
|
148
|
+
for usage in usages:
|
149
|
+
# Base token costs
|
150
|
+
prompt_text_tokens = usage.prompt_tokens
|
151
|
+
if usage.prompt_cached_input_tokens:
|
152
|
+
prompt_text_tokens = (
|
153
|
+
usage.prompt_tokens - usage.prompt_cached_input_tokens
|
165
154
|
)
|
166
|
-
|
167
|
-
|
168
|
-
|
155
|
+
if usage.prompt_audio_tokens:
|
156
|
+
prompt_text_tokens = (
|
157
|
+
usage.prompt_tokens - usage.prompt_audio_tokens
|
169
158
|
)
|
170
159
|
|
171
|
-
|
172
|
-
if
|
173
|
-
|
174
|
-
usage.completion_audio_tokens
|
175
|
-
* model_rates.completion_audio
|
176
|
-
)
|
177
|
-
else:
|
178
|
-
logger.warning(
|
179
|
-
f"Audio completion tokens present for {model_key} but no audio rate defined"
|
160
|
+
completion_text_tokens = usage.completion_tokens
|
161
|
+
if usage.completion_audio_tokens:
|
162
|
+
completion_text_tokens = (
|
163
|
+
usage.completion_tokens - usage.completion_audio_tokens
|
180
164
|
)
|
181
165
|
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
166
|
+
prompt_cost = prompt_text_tokens * model_rates.prompt
|
167
|
+
completion_cost = completion_text_tokens * model_rates.completion
|
168
|
+
model_cost += prompt_cost + completion_cost
|
169
|
+
|
170
|
+
# Audio token costs
|
171
|
+
if usage.prompt_audio_tokens:
|
172
|
+
if model_rates.prompt_audio:
|
173
|
+
model_cost += (
|
174
|
+
usage.prompt_audio_tokens * model_rates.prompt_audio
|
175
|
+
)
|
176
|
+
else:
|
177
|
+
logger.warning(
|
178
|
+
f"Audio prompt tokens present for {model_key} but no audio rate defined"
|
179
|
+
)
|
180
|
+
|
181
|
+
if usage.completion_audio_tokens:
|
182
|
+
if model_rates.completion_audio:
|
183
|
+
model_cost += (
|
184
|
+
usage.completion_audio_tokens
|
185
|
+
* model_rates.completion_audio
|
186
|
+
)
|
187
|
+
else:
|
188
|
+
logger.warning(
|
189
|
+
f"Audio completion tokens present for {model_key} but no audio rate defined"
|
190
|
+
)
|
191
|
+
|
192
|
+
# Cached token costs
|
193
|
+
if usage.prompt_cached_input_tokens:
|
194
|
+
if model_rates.prompt_cached_input:
|
195
|
+
model_cost += (
|
196
|
+
usage.prompt_cached_input_tokens
|
197
|
+
* model_rates.prompt_cached_input
|
198
|
+
)
|
199
|
+
else:
|
200
|
+
logger.warning(
|
201
|
+
f"Cached input tokens present for {model_key} but no cache input rate defined"
|
202
|
+
)
|
203
|
+
|
204
|
+
if usage.prompt_cached_creation_tokens:
|
205
|
+
if model_rates.prompt_cached_creation:
|
206
|
+
model_cost += (
|
207
|
+
usage.prompt_cached_creation_tokens
|
208
|
+
* model_rates.prompt_cached_creation
|
209
|
+
)
|
210
|
+
else:
|
211
|
+
logger.warning(
|
212
|
+
f"Cached creation tokens present for {model_key} but no cache creation rate defined"
|
213
|
+
)
|
214
|
+
|
215
|
+
model_total += usage.total_tokens
|
216
|
+
model_prompt += usage.prompt_tokens
|
217
|
+
model_completion += usage.completion_tokens
|
218
|
+
|
219
|
+
models_list.append(
|
220
|
+
ModelUsage(
|
221
|
+
model=model_key,
|
222
|
+
total_cost=round(model_cost, 6),
|
223
|
+
total_tokens=model_total,
|
224
|
+
prompt_tokens=model_prompt,
|
225
|
+
completion_tokens=model_completion,
|
226
|
+
prompt_tokens_details=PromptTokenDetails(
|
227
|
+
cached_input_tokens=sum(
|
228
|
+
u.prompt_cached_input_tokens or 0 for u in usages
|
229
|
+
),
|
230
|
+
cached_creation_tokens=sum(
|
231
|
+
u.prompt_cached_creation_tokens or 0 for u in usages
|
232
|
+
),
|
233
|
+
audio_tokens=sum(
|
234
|
+
u.prompt_audio_tokens or 0 for u in usages
|
235
|
+
),
|
188
236
|
)
|
189
|
-
|
190
|
-
|
191
|
-
|
237
|
+
if any(
|
238
|
+
u.prompt_cached_input_tokens
|
239
|
+
or u.prompt_cached_creation_tokens
|
240
|
+
or u.prompt_audio_tokens
|
241
|
+
for u in usages
|
192
242
|
)
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
243
|
+
else None,
|
244
|
+
completion_tokens_details=CompletionTokenDetails(
|
245
|
+
audio_tokens=sum(
|
246
|
+
u.completion_audio_tokens or 0 for u in usages
|
247
|
+
),
|
248
|
+
reasoning_tokens=sum(
|
249
|
+
u.completion_reasoning_tokens or 0 for u in usages
|
250
|
+
),
|
251
|
+
accepted_prediction_tokens=sum(
|
252
|
+
u.completion_accepted_prediction_tokens or 0
|
253
|
+
for u in usages
|
254
|
+
),
|
255
|
+
rejected_prediction_tokens=sum(
|
256
|
+
u.completion_rejected_prediction_tokens or 0
|
257
|
+
for u in usages
|
258
|
+
),
|
199
259
|
)
|
200
|
-
|
201
|
-
|
202
|
-
|
260
|
+
if any(
|
261
|
+
getattr(u, attr, None)
|
262
|
+
for u in usages
|
263
|
+
for attr in [
|
264
|
+
"completion_audio_tokens",
|
265
|
+
"completion_reasoning_tokens",
|
266
|
+
"completion_accepted_prediction_tokens",
|
267
|
+
"completion_rejected_prediction_tokens",
|
268
|
+
]
|
203
269
|
)
|
270
|
+
else None,
|
271
|
+
)
|
272
|
+
)
|
273
|
+
|
274
|
+
# Update provider metrics with all token types
|
275
|
+
provider_metrics["total_cost"] += model_cost
|
276
|
+
provider_metrics["total_tokens"] += model_total
|
277
|
+
provider_metrics["prompt_tokens"] += model_prompt
|
278
|
+
provider_metrics["completion_tokens"] += model_completion
|
279
|
+
provider_metrics["prompt_cached_input_tokens"] += sum(
|
280
|
+
u.prompt_cached_input_tokens or 0 for u in usages
|
281
|
+
)
|
282
|
+
provider_metrics["prompt_cached_creation_tokens"] += sum(
|
283
|
+
u.prompt_cached_creation_tokens or 0 for u in usages
|
284
|
+
)
|
285
|
+
provider_metrics["prompt_audio_tokens"] += sum(
|
286
|
+
u.prompt_audio_tokens or 0 for u in usages
|
287
|
+
)
|
288
|
+
provider_metrics["completion_audio_tokens"] += sum(
|
289
|
+
u.completion_audio_tokens or 0 for u in usages
|
290
|
+
)
|
291
|
+
provider_metrics["completion_reasoning_tokens"] += sum(
|
292
|
+
u.completion_reasoning_tokens or 0 for u in usages
|
293
|
+
)
|
294
|
+
provider_metrics["completion_accepted_prediction_tokens"] += sum(
|
295
|
+
u.completion_accepted_prediction_tokens or 0 for u in usages
|
296
|
+
)
|
297
|
+
provider_metrics["completion_rejected_prediction_tokens"] += sum(
|
298
|
+
u.completion_rejected_prediction_tokens or 0 for u in usages
|
299
|
+
)
|
204
300
|
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
total_tokens=model_total,
|
214
|
-
prompt_tokens=model_prompt,
|
215
|
-
completion_tokens=model_completion,
|
301
|
+
providers_list.append(
|
302
|
+
ProviderUsage(
|
303
|
+
provider=provider,
|
304
|
+
models=models_list,
|
305
|
+
total_cost=round(provider_metrics["total_cost"], 6),
|
306
|
+
total_tokens=provider_metrics["total_tokens"],
|
307
|
+
prompt_tokens=provider_metrics["prompt_tokens"],
|
308
|
+
completion_tokens=provider_metrics["completion_tokens"],
|
216
309
|
prompt_tokens_details=PromptTokenDetails(
|
217
|
-
cached_input_tokens=
|
218
|
-
|
219
|
-
|
220
|
-
cached_creation_tokens=
|
221
|
-
|
222
|
-
|
223
|
-
audio_tokens=
|
224
|
-
u.prompt_audio_tokens or 0 for u in usages
|
225
|
-
),
|
226
|
-
)
|
227
|
-
if any(
|
228
|
-
u.prompt_cached_input_tokens
|
229
|
-
or u.prompt_cached_creation_tokens
|
230
|
-
or u.prompt_audio_tokens
|
231
|
-
for u in usages
|
310
|
+
cached_input_tokens=provider_metrics[
|
311
|
+
"prompt_cached_input_tokens"
|
312
|
+
],
|
313
|
+
cached_creation_tokens=provider_metrics[
|
314
|
+
"prompt_cached_creation_tokens"
|
315
|
+
],
|
316
|
+
audio_tokens=provider_metrics["prompt_audio_tokens"],
|
232
317
|
)
|
318
|
+
if provider_metrics["prompt_cached_input_tokens"]
|
319
|
+
or provider_metrics["prompt_cached_creation_tokens"]
|
320
|
+
or provider_metrics["prompt_audio_tokens"]
|
233
321
|
else None,
|
234
322
|
completion_tokens_details=CompletionTokenDetails(
|
235
|
-
audio_tokens=
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
rejected_prediction_tokens=sum(
|
246
|
-
u.completion_rejected_prediction_tokens or 0
|
247
|
-
for u in usages
|
248
|
-
),
|
323
|
+
audio_tokens=provider_metrics["completion_audio_tokens"],
|
324
|
+
reasoning_tokens=provider_metrics[
|
325
|
+
"completion_reasoning_tokens"
|
326
|
+
],
|
327
|
+
accepted_prediction_tokens=provider_metrics[
|
328
|
+
"completion_accepted_prediction_tokens"
|
329
|
+
],
|
330
|
+
rejected_prediction_tokens=provider_metrics[
|
331
|
+
"completion_rejected_prediction_tokens"
|
332
|
+
],
|
249
333
|
)
|
250
334
|
if any(
|
251
|
-
|
252
|
-
for
|
253
|
-
for attr in [
|
335
|
+
provider_metrics[k]
|
336
|
+
for k in [
|
254
337
|
"completion_audio_tokens",
|
255
338
|
"completion_reasoning_tokens",
|
256
339
|
"completion_accepted_prediction_tokens",
|
@@ -261,89 +344,19 @@ class TokenUsageService:
|
|
261
344
|
)
|
262
345
|
)
|
263
346
|
|
264
|
-
|
265
|
-
|
266
|
-
provider_metrics["total_tokens"] += model_total
|
267
|
-
provider_metrics["prompt_tokens"] += model_prompt
|
268
|
-
provider_metrics["completion_tokens"] += model_completion
|
269
|
-
provider_metrics["prompt_cached_input_tokens"] += sum(
|
270
|
-
u.prompt_cached_input_tokens or 0 for u in usages
|
271
|
-
)
|
272
|
-
provider_metrics["prompt_cached_creation_tokens"] += sum(
|
273
|
-
u.prompt_cached_creation_tokens or 0 for u in usages
|
274
|
-
)
|
275
|
-
provider_metrics["prompt_audio_tokens"] += sum(
|
276
|
-
u.prompt_audio_tokens or 0 for u in usages
|
277
|
-
)
|
278
|
-
provider_metrics["completion_audio_tokens"] += sum(
|
279
|
-
u.completion_audio_tokens or 0 for u in usages
|
280
|
-
)
|
281
|
-
provider_metrics["completion_reasoning_tokens"] += sum(
|
282
|
-
u.completion_reasoning_tokens or 0 for u in usages
|
283
|
-
)
|
284
|
-
provider_metrics["completion_accepted_prediction_tokens"] += sum(
|
285
|
-
u.completion_accepted_prediction_tokens or 0 for u in usages
|
286
|
-
)
|
287
|
-
provider_metrics["completion_rejected_prediction_tokens"] += sum(
|
288
|
-
u.completion_rejected_prediction_tokens or 0 for u in usages
|
289
|
-
)
|
347
|
+
for key in total_metrics:
|
348
|
+
total_metrics[key] += provider_metrics[key]
|
290
349
|
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
prompt_tokens=provider_metrics["prompt_tokens"],
|
298
|
-
completion_tokens=provider_metrics["completion_tokens"],
|
299
|
-
prompt_tokens_details=PromptTokenDetails(
|
300
|
-
cached_input_tokens=provider_metrics[
|
301
|
-
"prompt_cached_input_tokens"
|
302
|
-
],
|
303
|
-
cached_creation_tokens=provider_metrics[
|
304
|
-
"prompt_cached_creation_tokens"
|
305
|
-
],
|
306
|
-
audio_tokens=provider_metrics["prompt_audio_tokens"],
|
307
|
-
)
|
308
|
-
if provider_metrics["prompt_cached_input_tokens"]
|
309
|
-
or provider_metrics["prompt_cached_creation_tokens"]
|
310
|
-
or provider_metrics["prompt_audio_tokens"]
|
311
|
-
else None,
|
312
|
-
completion_tokens_details=CompletionTokenDetails(
|
313
|
-
audio_tokens=provider_metrics["completion_audio_tokens"],
|
314
|
-
reasoning_tokens=provider_metrics[
|
315
|
-
"completion_reasoning_tokens"
|
316
|
-
],
|
317
|
-
accepted_prediction_tokens=provider_metrics[
|
318
|
-
"completion_accepted_prediction_tokens"
|
319
|
-
],
|
320
|
-
rejected_prediction_tokens=provider_metrics[
|
321
|
-
"completion_rejected_prediction_tokens"
|
322
|
-
],
|
323
|
-
)
|
324
|
-
if any(
|
325
|
-
provider_metrics[k]
|
326
|
-
for k in [
|
327
|
-
"completion_audio_tokens",
|
328
|
-
"completion_reasoning_tokens",
|
329
|
-
"completion_accepted_prediction_tokens",
|
330
|
-
"completion_rejected_prediction_tokens",
|
331
|
-
]
|
332
|
-
)
|
333
|
-
else None,
|
334
|
-
)
|
350
|
+
return TokenUsageReport(
|
351
|
+
providers=providers_list,
|
352
|
+
**{
|
353
|
+
k: (round(v, 6) if k == "total_cost" else v)
|
354
|
+
for k, v in total_metrics.items()
|
355
|
+
},
|
335
356
|
)
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
return TokenUsageReport(
|
341
|
-
providers=providers_list,
|
342
|
-
**{
|
343
|
-
k: (round(v, 6) if k == "total_cost" else v)
|
344
|
-
for k, v in total_metrics.items()
|
345
|
-
},
|
346
|
-
)
|
357
|
+
except Exception as e:
|
358
|
+
logger.error(f"Error in _calculate_cost: {e}")
|
359
|
+
return TokenUsageReport()
|
347
360
|
|
348
361
|
def _query_usage(
|
349
362
|
self,
|
@@ -352,74 +365,97 @@ class TokenUsageService:
|
|
352
365
|
provider: Optional[str] = None,
|
353
366
|
model: Optional[str] = None,
|
354
367
|
) -> TokenUsageReport:
|
355
|
-
if not state.is_tokenator_enabled:
|
356
|
-
logger.warning("Tokenator is disabled. Skipping usage query.")
|
357
|
-
return TokenUsageReport()
|
358
|
-
|
359
|
-
session = get_session()()
|
360
368
|
try:
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
if provider:
|
366
|
-
query = query.filter(TokenUsage.provider == provider)
|
367
|
-
if model:
|
368
|
-
query = query.filter(TokenUsage.model == model)
|
369
|
+
if not state.is_tokenator_enabled:
|
370
|
+
logger.warning("Tokenator is disabled. Skipping usage query.")
|
371
|
+
return TokenUsageReport()
|
369
372
|
|
370
|
-
|
373
|
+
session = get_session()()
|
374
|
+
try:
|
375
|
+
query = session.query(TokenUsage).filter(
|
376
|
+
TokenUsage.created_at.between(start_date, end_date)
|
377
|
+
)
|
371
378
|
|
372
|
-
|
373
|
-
|
374
|
-
|
379
|
+
if provider:
|
380
|
+
query = query.filter(TokenUsage.provider == provider)
|
381
|
+
if model:
|
382
|
+
query = query.filter(TokenUsage.model == model)
|
383
|
+
|
384
|
+
usages = query.all()
|
385
|
+
|
386
|
+
return self._calculate_cost(usages, provider or "all")
|
387
|
+
except Exception as e:
|
388
|
+
logger.error(f"Error querying usage: {e}")
|
389
|
+
return TokenUsageReport()
|
390
|
+
finally:
|
391
|
+
session.close()
|
392
|
+
except Exception as e:
|
393
|
+
logger.error(f"Unexpected error in _query_usage: {e}")
|
394
|
+
return TokenUsageReport()
|
375
395
|
|
376
396
|
def last_hour(
|
377
397
|
self, provider: Optional[str] = None, model: Optional[str] = None
|
378
398
|
) -> TokenUsageReport:
|
379
|
-
|
399
|
+
try:
|
400
|
+
if not state.is_tokenator_enabled:
|
401
|
+
return TokenUsageReport()
|
402
|
+
logger.debug(
|
403
|
+
f"Getting cost analysis for last hour (provider={provider}, model={model})"
|
404
|
+
)
|
405
|
+
end = datetime.now()
|
406
|
+
start = end - timedelta(hours=1)
|
407
|
+
return self._query_usage(start, end, provider, model)
|
408
|
+
except Exception as e:
|
409
|
+
logger.error(f"Error in last_hour: {e}")
|
380
410
|
return TokenUsageReport()
|
381
|
-
logger.debug(
|
382
|
-
f"Getting cost analysis for last hour (provider={provider}, model={model})"
|
383
|
-
)
|
384
|
-
end = datetime.now()
|
385
|
-
start = end - timedelta(hours=1)
|
386
|
-
return self._query_usage(start, end, provider, model)
|
387
411
|
|
388
412
|
def last_day(
|
389
413
|
self, provider: Optional[str] = None, model: Optional[str] = None
|
390
414
|
) -> TokenUsageReport:
|
391
|
-
|
415
|
+
try:
|
416
|
+
if not state.is_tokenator_enabled:
|
417
|
+
return TokenUsageReport()
|
418
|
+
logger.debug(
|
419
|
+
f"Getting cost analysis for last 24 hours (provider={provider}, model={model})"
|
420
|
+
)
|
421
|
+
end = datetime.now()
|
422
|
+
start = end - timedelta(days=1)
|
423
|
+
return self._query_usage(start, end, provider, model)
|
424
|
+
except Exception as e:
|
425
|
+
logger.error(f"Error in last_day: {e}")
|
392
426
|
return TokenUsageReport()
|
393
|
-
logger.debug(
|
394
|
-
f"Getting cost analysis for last 24 hours (provider={provider}, model={model})"
|
395
|
-
)
|
396
|
-
end = datetime.now()
|
397
|
-
start = end - timedelta(days=1)
|
398
|
-
return self._query_usage(start, end, provider, model)
|
399
427
|
|
400
428
|
def last_week(
|
401
429
|
self, provider: Optional[str] = None, model: Optional[str] = None
|
402
430
|
) -> TokenUsageReport:
|
403
|
-
|
431
|
+
try:
|
432
|
+
if not state.is_tokenator_enabled:
|
433
|
+
return TokenUsageReport()
|
434
|
+
logger.debug(
|
435
|
+
f"Getting cost analysis for last 7 days (provider={provider}, model={model})"
|
436
|
+
)
|
437
|
+
end = datetime.now()
|
438
|
+
start = end - timedelta(weeks=1)
|
439
|
+
return self._query_usage(start, end, provider, model)
|
440
|
+
except Exception as e:
|
441
|
+
logger.error(f"Error in last_week: {e}")
|
404
442
|
return TokenUsageReport()
|
405
|
-
logger.debug(
|
406
|
-
f"Getting cost analysis for last 7 days (provider={provider}, model={model})"
|
407
|
-
)
|
408
|
-
end = datetime.now()
|
409
|
-
start = end - timedelta(weeks=1)
|
410
|
-
return self._query_usage(start, end, provider, model)
|
411
443
|
|
412
444
|
def last_month(
|
413
445
|
self, provider: Optional[str] = None, model: Optional[str] = None
|
414
446
|
) -> TokenUsageReport:
|
415
|
-
|
447
|
+
try:
|
448
|
+
if not state.is_tokenator_enabled:
|
449
|
+
return TokenUsageReport()
|
450
|
+
logger.debug(
|
451
|
+
f"Getting cost analysis for last 30 days (provider={provider}, model={model})"
|
452
|
+
)
|
453
|
+
end = datetime.now()
|
454
|
+
start = end - timedelta(days=30)
|
455
|
+
return self._query_usage(start, end, provider, model)
|
456
|
+
except Exception as e:
|
457
|
+
logger.error(f"Error in last_month: {e}")
|
416
458
|
return TokenUsageReport()
|
417
|
-
logger.debug(
|
418
|
-
f"Getting cost analysis for last 30 days (provider={provider}, model={model})"
|
419
|
-
)
|
420
|
-
end = datetime.now()
|
421
|
-
start = end - timedelta(days=30)
|
422
|
-
return self._query_usage(start, end, provider, model)
|
423
459
|
|
424
460
|
def between(
|
425
461
|
self,
|
@@ -428,76 +464,116 @@ class TokenUsageService:
|
|
428
464
|
provider: Optional[str] = None,
|
429
465
|
model: Optional[str] = None,
|
430
466
|
) -> TokenUsageReport:
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
467
|
+
try:
|
468
|
+
if not state.is_tokenator_enabled:
|
469
|
+
return TokenUsageReport()
|
470
|
+
logger.debug(
|
471
|
+
f"Getting cost analysis between {start_date} and {end_date} (provider={provider}, model={model})"
|
472
|
+
)
|
436
473
|
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
474
|
+
if isinstance(start_date, str):
|
475
|
+
try:
|
476
|
+
start = datetime.strptime(start_date, "%Y-%m-%d %H:%M:%S")
|
477
|
+
except ValueError:
|
478
|
+
logger.warning(
|
479
|
+
f"Date-only string provided for start_date: {start_date}. Setting time to 00:00:00"
|
480
|
+
)
|
481
|
+
start = datetime.strptime(start_date, "%Y-%m-%d")
|
482
|
+
else:
|
483
|
+
start = start_date
|
447
484
|
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
485
|
+
if isinstance(end_date, str):
|
486
|
+
try:
|
487
|
+
end = datetime.strptime(end_date, "%Y-%m-%d %H:%M:%S")
|
488
|
+
except ValueError:
|
489
|
+
logger.warning(
|
490
|
+
f"Date-only string provided for end_date: {end_date}. Setting time to 23:59:59"
|
491
|
+
)
|
492
|
+
end = (
|
493
|
+
datetime.strptime(end_date, "%Y-%m-%d")
|
494
|
+
+ timedelta(days=1)
|
495
|
+
- timedelta(seconds=1)
|
496
|
+
)
|
497
|
+
else:
|
498
|
+
end = end_date
|
462
499
|
|
463
|
-
|
500
|
+
return self._query_usage(start, end, provider, model)
|
501
|
+
except Exception as e:
|
502
|
+
logger.error(f"Error in between: {e}")
|
503
|
+
return TokenUsageReport()
|
464
504
|
|
465
505
|
def for_execution(self, execution_id: str) -> TokenUsageReport:
|
466
|
-
if not state.is_tokenator_enabled:
|
467
|
-
return TokenUsageReport()
|
468
|
-
logger.debug(f"Getting cost analysis for execution_id={execution_id}")
|
469
|
-
session = get_session()()
|
470
506
|
try:
|
471
|
-
|
472
|
-
|
473
|
-
)
|
474
|
-
|
475
|
-
|
476
|
-
|
507
|
+
if not state.is_tokenator_enabled:
|
508
|
+
return TokenUsageReport()
|
509
|
+
logger.debug(f"Getting cost analysis for execution_id={execution_id}")
|
510
|
+
session = get_session()()
|
511
|
+
try:
|
512
|
+
query = session.query(TokenUsage).filter(
|
513
|
+
TokenUsage.execution_id == execution_id
|
514
|
+
)
|
515
|
+
return self._calculate_cost(query.all())
|
516
|
+
except Exception as e:
|
517
|
+
logger.error(f"Error querying for_execution: {e}")
|
518
|
+
return TokenUsageReport()
|
519
|
+
finally:
|
520
|
+
session.close()
|
521
|
+
except Exception as e:
|
522
|
+
logger.error(f"Unexpected error in for_execution: {e}")
|
523
|
+
return TokenUsageReport()
|
477
524
|
|
478
525
|
def last_execution(self) -> TokenUsageReport:
|
479
|
-
if not state.is_tokenator_enabled:
|
480
|
-
return TokenUsageReport()
|
481
|
-
logger.debug("Getting cost analysis for last execution")
|
482
|
-
session = get_session()()
|
483
526
|
try:
|
484
|
-
|
485
|
-
|
486
|
-
)
|
487
|
-
|
488
|
-
|
527
|
+
if not state.is_tokenator_enabled:
|
528
|
+
return TokenUsageReport()
|
529
|
+
logger.debug("Getting cost analysis for last execution")
|
530
|
+
session = get_session()()
|
531
|
+
try:
|
532
|
+
query = (
|
533
|
+
session.query(TokenUsage).order_by(TokenUsage.created_at.desc()).first()
|
534
|
+
)
|
535
|
+
if query:
|
536
|
+
return self.for_execution(query.execution_id)
|
537
|
+
return TokenUsageReport()
|
538
|
+
except Exception as e:
|
539
|
+
logger.error(f"Error querying last_execution: {e}")
|
540
|
+
return TokenUsageReport()
|
541
|
+
finally:
|
542
|
+
session.close()
|
543
|
+
except Exception as e:
|
544
|
+
logger.error(f"Unexpected error in last_execution: {e}")
|
489
545
|
return TokenUsageReport()
|
490
|
-
finally:
|
491
|
-
session.close()
|
492
546
|
|
493
547
|
def all_time(self) -> TokenUsageReport:
|
494
|
-
|
548
|
+
try:
|
549
|
+
if not state.is_tokenator_enabled:
|
550
|
+
return TokenUsageReport()
|
551
|
+
|
552
|
+
logger.warning("Getting cost analysis for all time. This may take a while...")
|
553
|
+
session = get_session()()
|
554
|
+
try:
|
555
|
+
query = session.query(TokenUsage)
|
556
|
+
return self._calculate_cost(query.all())
|
557
|
+
except Exception as e:
|
558
|
+
logger.error(f"Error querying all_time usage: {e}")
|
559
|
+
return TokenUsageReport()
|
560
|
+
finally:
|
561
|
+
session.close()
|
562
|
+
except Exception as e:
|
563
|
+
logger.error(f"Unexpected error in all_time: {e}")
|
495
564
|
return TokenUsageReport()
|
496
565
|
|
497
|
-
|
566
|
+
def wipe(self):
|
567
|
+
logger.warning("All your usage data is about to be wiped, are you sure you want to do this? You have 5 seconds to cancel this operation.")
|
568
|
+
for i in range(5, 0, -1):
|
569
|
+
logger.warning(str(i))
|
570
|
+
time.sleep(1)
|
498
571
|
session = get_session()()
|
499
572
|
try:
|
500
|
-
|
501
|
-
|
573
|
+
session.query(TokenUsage).delete()
|
574
|
+
session.commit()
|
575
|
+
logger.warning("All usage data has been deleted.")
|
576
|
+
except Exception as e:
|
577
|
+
logger.error(f"Error wiping data: {e}")
|
502
578
|
finally:
|
503
579
|
session.close()
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: tokenator
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.16
|
4
4
|
Summary: Token usage tracking wrapper for LLMs
|
5
5
|
License: MIT
|
6
6
|
Author: Ujjwal Maheshwari
|
@@ -21,14 +21,15 @@ Requires-Dist: requests (>=2.32.3,<3.0.0)
|
|
21
21
|
Requires-Dist: sqlalchemy (>=2.0.0,<3.0.0)
|
22
22
|
Description-Content-Type: text/markdown
|
23
23
|
|
24
|
-
# Tokenator : Track
|
24
|
+
# Tokenator : Track, analyze, compare LLM token usage and costs
|
25
25
|
|
26
26
|
Have you ever wondered :
|
27
27
|
- How many tokens does your AI agent consume?
|
28
|
-
- How much does it cost to
|
28
|
+
- How much does it cost to run a complex AI workflow with multiple LLM providers?
|
29
|
+
- Which LLM is more cost effective for my use case?
|
29
30
|
- How much money/tokens did you spend today on developing with LLMs?
|
30
31
|
|
31
|
-
Afraid not, tokenator is here! With tokenator's easy to use
|
32
|
+
Afraid not, tokenator is here! With tokenator's easy to use functions, you can start tracking LLM usage in a matter of minutes.
|
32
33
|
|
33
34
|
Get started with just 3 lines of code!
|
34
35
|
|
@@ -114,6 +115,10 @@ print(cost.last_hour().model_dump_json(indent=4))
|
|
114
115
|
}
|
115
116
|
```
|
116
117
|
|
118
|
+
## Cookbooks
|
119
|
+
|
120
|
+
Want more code, example use cases and ideas? Check out our amazing [cookbooks](https://github.com/ujjwalm29/tokenator/tree/main/docs/cookbooks)!
|
121
|
+
|
117
122
|
## Features
|
118
123
|
|
119
124
|
- Drop-in replacement for OpenAI, Anthropic client
|
@@ -13,9 +13,9 @@ tokenator/openai/client_openai.py,sha256=pbdJ-aZPuJs-7OT1VEv0DW36cCYbRAVKhSQEprx
|
|
13
13
|
tokenator/openai/stream_interceptors.py,sha256=ez1MnjRZW_rEalv2SIPAvrU9oMD6OJoD9vht-057fDM,5243
|
14
14
|
tokenator/schemas.py,sha256=kBmShqgpQ3W-ILAP1NuCaFgqFplQM4OH0MmJteLqrwI,2371
|
15
15
|
tokenator/state.py,sha256=xdqDC-rlEA88-VgqQqHnAOXQ5pNTpnHcgOtohDIImPY,262
|
16
|
-
tokenator/usage.py,sha256=
|
16
|
+
tokenator/usage.py,sha256=YnV4fZo0prUC4oPKNZjyN7misn1od6ANwXcLKCuN21Y,24982
|
17
17
|
tokenator/utils.py,sha256=djoWmAhqH-O2Su3qIcuY-_3Vj1-qPwMcdzwq9IlwiDc,2435
|
18
|
-
tokenator-0.1.
|
19
|
-
tokenator-0.1.
|
20
|
-
tokenator-0.1.
|
21
|
-
tokenator-0.1.
|
18
|
+
tokenator-0.1.16.dist-info/LICENSE,sha256=wdG-B6-ODk8RQ4jq5uXSn0w1UWTzCH_MMyvh7AwtGns,1074
|
19
|
+
tokenator-0.1.16.dist-info/METADATA,sha256=B8sy9h8PaDG075-FYjSCz8rMOqoTdy3eOJsUQlxA9Lk,6257
|
20
|
+
tokenator-0.1.16.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
|
21
|
+
tokenator-0.1.16.dist-info/RECORD,,
|
File without changes
|
File without changes
|