tokenator 0.1.15__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tokenator/__init__.py +8 -1
- tokenator/base_wrapper.py +4 -1
- tokenator/gemini/__init__.py +5 -0
- tokenator/gemini/client_gemini.py +230 -0
- tokenator/gemini/stream_interceptors.py +77 -0
- tokenator/usage.py +464 -377
- tokenator/utils.py +7 -4
- {tokenator-0.1.15.dist-info → tokenator-0.2.0.dist-info}/METADATA +63 -6
- {tokenator-0.1.15.dist-info → tokenator-0.2.0.dist-info}/RECORD +11 -8
- {tokenator-0.1.15.dist-info → tokenator-0.2.0.dist-info}/WHEEL +1 -1
- {tokenator-0.1.15.dist-info → tokenator-0.2.0.dist-info}/LICENSE +0 -0
tokenator/usage.py
CHANGED
@@ -16,241 +16,329 @@ from . import state
|
|
16
16
|
|
17
17
|
import requests
|
18
18
|
import logging
|
19
|
+
import time
|
19
20
|
|
20
21
|
logger = logging.getLogger(__name__)
|
21
22
|
|
22
23
|
|
23
24
|
class TokenUsageService:
|
24
25
|
def __init__(self):
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
26
|
+
try:
|
27
|
+
if not state.is_tokenator_enabled:
|
28
|
+
logger.info("Tokenator is disabled. Database access is unavailable.")
|
29
|
+
self.MODEL_COSTS = self._get_model_costs()
|
30
|
+
except Exception as e:
|
31
|
+
logger.warning(f"Error in __init__: {e}")
|
32
|
+
self.MODEL_COSTS = {}
|
29
33
|
|
30
34
|
def _get_model_costs(self) -> Dict[str, TokenRate]:
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
35
|
+
try:
|
36
|
+
if not state.is_tokenator_enabled:
|
37
|
+
return {}
|
38
|
+
url = "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json"
|
39
|
+
response = requests.get(url)
|
40
|
+
response.raise_for_status()
|
41
|
+
data = response.json()
|
42
|
+
|
43
|
+
model_costs = {}
|
44
|
+
for model, info in data.items():
|
45
|
+
if (
|
46
|
+
"input_cost_per_token" not in info
|
47
|
+
or "output_cost_per_token" not in info
|
48
|
+
):
|
49
|
+
continue
|
50
|
+
|
51
|
+
rate = TokenRate(
|
52
|
+
prompt=info["input_cost_per_token"],
|
53
|
+
completion=info["output_cost_per_token"],
|
54
|
+
prompt_audio=info.get("input_cost_per_audio_token"),
|
55
|
+
completion_audio=info.get("output_cost_per_audio_token"),
|
56
|
+
prompt_cached_input=info.get("cache_read_input_token_cost") or 0,
|
57
|
+
prompt_cached_creation=info.get("cache_read_creation_token_cost")
|
58
|
+
or 0,
|
59
|
+
)
|
60
|
+
model_costs[model] = rate
|
54
61
|
|
55
|
-
|
62
|
+
return model_costs
|
63
|
+
except Exception as e:
|
64
|
+
logger.warning(f"Error in _get_model_costs: {e}")
|
65
|
+
return {}
|
56
66
|
|
57
67
|
def _calculate_cost(
|
58
68
|
self, usages: list[TokenUsage], provider: Optional[str] = None
|
59
69
|
) -> TokenUsageReport:
|
60
|
-
|
61
|
-
|
62
|
-
|
70
|
+
try:
|
71
|
+
if not state.is_tokenator_enabled:
|
72
|
+
logger.warning("Tokenator is disabled. Skipping cost calculation.")
|
73
|
+
return TokenUsageReport()
|
74
|
+
|
75
|
+
if not self.MODEL_COSTS:
|
76
|
+
logger.warning("No model costs available.")
|
77
|
+
return TokenUsageReport()
|
78
|
+
|
79
|
+
# Default GPT4O pricing updated with provided values
|
80
|
+
GPT4O_PRICING = TokenRate(
|
81
|
+
prompt=0.0000025,
|
82
|
+
completion=0.000010,
|
83
|
+
prompt_audio=0.0001,
|
84
|
+
completion_audio=0.0002,
|
85
|
+
prompt_cached_input=0.00000125,
|
86
|
+
prompt_cached_creation=0.00000125,
|
87
|
+
)
|
63
88
|
|
64
|
-
|
65
|
-
logger.
|
66
|
-
return TokenUsageReport()
|
89
|
+
provider_model_usages: Dict[str, Dict[str, list[TokenUsage]]] = {}
|
90
|
+
logger.debug(f"usages: {len(usages)}")
|
67
91
|
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
92
|
+
for usage in usages:
|
93
|
+
# Model key resolution logic (unchanged)
|
94
|
+
model_key = usage.model
|
95
|
+
if model_key in self.MODEL_COSTS:
|
96
|
+
pass
|
97
|
+
elif f"{usage.provider}/{usage.model}" in self.MODEL_COSTS:
|
98
|
+
model_key = f"{usage.provider}/{usage.model}"
|
99
|
+
else:
|
100
|
+
matched_keys = [
|
101
|
+
k for k in self.MODEL_COSTS.keys() if usage.model in k
|
102
|
+
]
|
103
|
+
if matched_keys:
|
104
|
+
model_key = matched_keys[0]
|
105
|
+
logger.warning(
|
106
|
+
f"Model {usage.model} matched with {model_key} in pricing data via contains search"
|
107
|
+
)
|
108
|
+
else:
|
109
|
+
logger.warning(
|
110
|
+
f"Model {model_key} not found in pricing data. Using gpt-4o pricing as fallback"
|
111
|
+
)
|
112
|
+
self.MODEL_COSTS[model_key] = GPT4O_PRICING
|
77
113
|
|
78
|
-
|
79
|
-
|
114
|
+
provider_key = usage.provider or "default"
|
115
|
+
provider_model_usages.setdefault(provider_key, {}).setdefault(
|
116
|
+
model_key, []
|
117
|
+
).append(usage)
|
80
118
|
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
if model_key in self.MODEL_COSTS:
|
85
|
-
pass
|
86
|
-
elif f"{usage.provider}/{usage.model}" in self.MODEL_COSTS:
|
87
|
-
model_key = f"{usage.provider}/{usage.model}"
|
88
|
-
else:
|
89
|
-
matched_keys = [k for k in self.MODEL_COSTS.keys() if usage.model in k]
|
90
|
-
if matched_keys:
|
91
|
-
model_key = matched_keys[0]
|
92
|
-
logger.warning(
|
93
|
-
f"Model {usage.model} matched with {model_key} in pricing data via contains search"
|
94
|
-
)
|
95
|
-
else:
|
96
|
-
logger.warning(
|
97
|
-
f"Model {model_key} not found in pricing data. Using gpt-4o pricing as fallback"
|
98
|
-
)
|
99
|
-
self.MODEL_COSTS[model_key] = GPT4O_PRICING
|
100
|
-
|
101
|
-
provider_key = usage.provider or "default"
|
102
|
-
provider_model_usages.setdefault(provider_key, {}).setdefault(
|
103
|
-
model_key, []
|
104
|
-
).append(usage)
|
105
|
-
|
106
|
-
# Calculate totals for each level
|
107
|
-
providers_list = []
|
108
|
-
total_metrics = {
|
109
|
-
"total_cost": 0.0,
|
110
|
-
"total_tokens": 0,
|
111
|
-
"prompt_tokens": 0,
|
112
|
-
"completion_tokens": 0,
|
113
|
-
}
|
114
|
-
|
115
|
-
for provider, model_usages in provider_model_usages.items():
|
116
|
-
provider_metrics = {
|
119
|
+
# Calculate totals for each level
|
120
|
+
providers_list = []
|
121
|
+
total_metrics = {
|
117
122
|
"total_cost": 0.0,
|
118
123
|
"total_tokens": 0,
|
119
124
|
"prompt_tokens": 0,
|
120
125
|
"completion_tokens": 0,
|
121
|
-
"prompt_cached_input_tokens": 0,
|
122
|
-
"prompt_cached_creation_tokens": 0,
|
123
|
-
"prompt_audio_tokens": 0,
|
124
|
-
"completion_audio_tokens": 0,
|
125
|
-
"completion_reasoning_tokens": 0,
|
126
|
-
"completion_accepted_prediction_tokens": 0,
|
127
|
-
"completion_rejected_prediction_tokens": 0,
|
128
126
|
}
|
129
|
-
models_list = []
|
130
|
-
|
131
|
-
for model_key, usages in model_usages.items():
|
132
|
-
model_rates = self.MODEL_COSTS[model_key]
|
133
|
-
model_cost = 0.0
|
134
|
-
model_total = 0
|
135
|
-
model_prompt = 0
|
136
|
-
model_completion = 0
|
137
|
-
|
138
|
-
for usage in usages:
|
139
|
-
# Base token costs
|
140
|
-
prompt_text_tokens = usage.prompt_tokens
|
141
|
-
if usage.prompt_cached_input_tokens:
|
142
|
-
prompt_text_tokens = (
|
143
|
-
usage.prompt_tokens - usage.prompt_cached_input_tokens
|
144
|
-
)
|
145
|
-
if usage.prompt_audio_tokens:
|
146
|
-
prompt_text_tokens = (
|
147
|
-
usage.prompt_tokens - usage.prompt_audio_tokens
|
148
|
-
)
|
149
127
|
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
128
|
+
for provider, model_usages in provider_model_usages.items():
|
129
|
+
provider_metrics = {
|
130
|
+
"total_cost": 0.0,
|
131
|
+
"total_tokens": 0,
|
132
|
+
"prompt_tokens": 0,
|
133
|
+
"completion_tokens": 0,
|
134
|
+
"prompt_cached_input_tokens": 0,
|
135
|
+
"prompt_cached_creation_tokens": 0,
|
136
|
+
"prompt_audio_tokens": 0,
|
137
|
+
"completion_audio_tokens": 0,
|
138
|
+
"completion_reasoning_tokens": 0,
|
139
|
+
"completion_accepted_prediction_tokens": 0,
|
140
|
+
"completion_rejected_prediction_tokens": 0,
|
141
|
+
}
|
142
|
+
models_list = []
|
143
|
+
|
144
|
+
for model_key, usages in model_usages.items():
|
145
|
+
model_rates = self.MODEL_COSTS[model_key]
|
146
|
+
model_cost = 0.0
|
147
|
+
model_total = 0
|
148
|
+
model_prompt = 0
|
149
|
+
model_completion = 0
|
150
|
+
|
151
|
+
for usage in usages:
|
152
|
+
# Base token costs
|
153
|
+
prompt_text_tokens = usage.prompt_tokens
|
154
|
+
if usage.prompt_cached_input_tokens:
|
155
|
+
prompt_text_tokens = (
|
156
|
+
usage.prompt_tokens - usage.prompt_cached_input_tokens
|
165
157
|
)
|
166
|
-
|
167
|
-
|
168
|
-
|
158
|
+
if usage.prompt_audio_tokens:
|
159
|
+
prompt_text_tokens = (
|
160
|
+
usage.prompt_tokens - usage.prompt_audio_tokens
|
169
161
|
)
|
170
162
|
|
171
|
-
|
172
|
-
if
|
173
|
-
|
174
|
-
usage.completion_audio_tokens
|
175
|
-
* model_rates.completion_audio
|
176
|
-
)
|
177
|
-
else:
|
178
|
-
logger.warning(
|
179
|
-
f"Audio completion tokens present for {model_key} but no audio rate defined"
|
163
|
+
completion_text_tokens = usage.completion_tokens
|
164
|
+
if usage.completion_audio_tokens:
|
165
|
+
completion_text_tokens = (
|
166
|
+
usage.completion_tokens - usage.completion_audio_tokens
|
180
167
|
)
|
181
168
|
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
169
|
+
prompt_cost = prompt_text_tokens * model_rates.prompt
|
170
|
+
completion_cost = (
|
171
|
+
completion_text_tokens * model_rates.completion
|
172
|
+
)
|
173
|
+
model_cost += prompt_cost + completion_cost
|
174
|
+
|
175
|
+
# Audio token costs
|
176
|
+
if usage.prompt_audio_tokens:
|
177
|
+
if model_rates.prompt_audio:
|
178
|
+
model_cost += (
|
179
|
+
usage.prompt_audio_tokens * model_rates.prompt_audio
|
180
|
+
)
|
181
|
+
else:
|
182
|
+
logger.warning(
|
183
|
+
f"Audio prompt tokens present for {model_key} but no audio rate defined"
|
184
|
+
)
|
185
|
+
|
186
|
+
if usage.completion_audio_tokens:
|
187
|
+
if model_rates.completion_audio:
|
188
|
+
model_cost += (
|
189
|
+
usage.completion_audio_tokens
|
190
|
+
* model_rates.completion_audio
|
191
|
+
)
|
192
|
+
else:
|
193
|
+
logger.warning(
|
194
|
+
f"Audio completion tokens present for {model_key} but no audio rate defined"
|
195
|
+
)
|
196
|
+
|
197
|
+
# Cached token costs
|
198
|
+
if usage.prompt_cached_input_tokens:
|
199
|
+
if model_rates.prompt_cached_input:
|
200
|
+
model_cost += (
|
201
|
+
usage.prompt_cached_input_tokens
|
202
|
+
* model_rates.prompt_cached_input
|
203
|
+
)
|
204
|
+
else:
|
205
|
+
logger.warning(
|
206
|
+
f"Cached input tokens present for {model_key} but no cache input rate defined"
|
207
|
+
)
|
208
|
+
|
209
|
+
if usage.prompt_cached_creation_tokens:
|
210
|
+
if model_rates.prompt_cached_creation:
|
211
|
+
model_cost += (
|
212
|
+
usage.prompt_cached_creation_tokens
|
213
|
+
* model_rates.prompt_cached_creation
|
214
|
+
)
|
215
|
+
else:
|
216
|
+
logger.warning(
|
217
|
+
f"Cached creation tokens present for {model_key} but no cache creation rate defined"
|
218
|
+
)
|
219
|
+
|
220
|
+
model_total += usage.total_tokens
|
221
|
+
model_prompt += usage.prompt_tokens
|
222
|
+
model_completion += usage.completion_tokens
|
223
|
+
|
224
|
+
models_list.append(
|
225
|
+
ModelUsage(
|
226
|
+
model=model_key,
|
227
|
+
total_cost=round(model_cost, 6),
|
228
|
+
total_tokens=model_total,
|
229
|
+
prompt_tokens=model_prompt,
|
230
|
+
completion_tokens=model_completion,
|
231
|
+
prompt_tokens_details=PromptTokenDetails(
|
232
|
+
cached_input_tokens=sum(
|
233
|
+
u.prompt_cached_input_tokens or 0 for u in usages
|
234
|
+
),
|
235
|
+
cached_creation_tokens=sum(
|
236
|
+
u.prompt_cached_creation_tokens or 0 for u in usages
|
237
|
+
),
|
238
|
+
audio_tokens=sum(
|
239
|
+
u.prompt_audio_tokens or 0 for u in usages
|
240
|
+
),
|
188
241
|
)
|
189
|
-
|
190
|
-
|
191
|
-
|
242
|
+
if any(
|
243
|
+
u.prompt_cached_input_tokens
|
244
|
+
or u.prompt_cached_creation_tokens
|
245
|
+
or u.prompt_audio_tokens
|
246
|
+
for u in usages
|
192
247
|
)
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
248
|
+
else None,
|
249
|
+
completion_tokens_details=CompletionTokenDetails(
|
250
|
+
audio_tokens=sum(
|
251
|
+
u.completion_audio_tokens or 0 for u in usages
|
252
|
+
),
|
253
|
+
reasoning_tokens=sum(
|
254
|
+
u.completion_reasoning_tokens or 0 for u in usages
|
255
|
+
),
|
256
|
+
accepted_prediction_tokens=sum(
|
257
|
+
u.completion_accepted_prediction_tokens or 0
|
258
|
+
for u in usages
|
259
|
+
),
|
260
|
+
rejected_prediction_tokens=sum(
|
261
|
+
u.completion_rejected_prediction_tokens or 0
|
262
|
+
for u in usages
|
263
|
+
),
|
199
264
|
)
|
200
|
-
|
201
|
-
|
202
|
-
|
265
|
+
if any(
|
266
|
+
getattr(u, attr, None)
|
267
|
+
for u in usages
|
268
|
+
for attr in [
|
269
|
+
"completion_audio_tokens",
|
270
|
+
"completion_reasoning_tokens",
|
271
|
+
"completion_accepted_prediction_tokens",
|
272
|
+
"completion_rejected_prediction_tokens",
|
273
|
+
]
|
203
274
|
)
|
275
|
+
else None,
|
276
|
+
)
|
277
|
+
)
|
204
278
|
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
279
|
+
# Update provider metrics with all token types
|
280
|
+
provider_metrics["total_cost"] += model_cost
|
281
|
+
provider_metrics["total_tokens"] += model_total
|
282
|
+
provider_metrics["prompt_tokens"] += model_prompt
|
283
|
+
provider_metrics["completion_tokens"] += model_completion
|
284
|
+
provider_metrics["prompt_cached_input_tokens"] += sum(
|
285
|
+
u.prompt_cached_input_tokens or 0 for u in usages
|
286
|
+
)
|
287
|
+
provider_metrics["prompt_cached_creation_tokens"] += sum(
|
288
|
+
u.prompt_cached_creation_tokens or 0 for u in usages
|
289
|
+
)
|
290
|
+
provider_metrics["prompt_audio_tokens"] += sum(
|
291
|
+
u.prompt_audio_tokens or 0 for u in usages
|
292
|
+
)
|
293
|
+
provider_metrics["completion_audio_tokens"] += sum(
|
294
|
+
u.completion_audio_tokens or 0 for u in usages
|
295
|
+
)
|
296
|
+
provider_metrics["completion_reasoning_tokens"] += sum(
|
297
|
+
u.completion_reasoning_tokens or 0 for u in usages
|
298
|
+
)
|
299
|
+
provider_metrics["completion_accepted_prediction_tokens"] += sum(
|
300
|
+
u.completion_accepted_prediction_tokens or 0 for u in usages
|
301
|
+
)
|
302
|
+
provider_metrics["completion_rejected_prediction_tokens"] += sum(
|
303
|
+
u.completion_rejected_prediction_tokens or 0 for u in usages
|
304
|
+
)
|
305
|
+
|
306
|
+
providers_list.append(
|
307
|
+
ProviderUsage(
|
308
|
+
provider=provider,
|
309
|
+
models=models_list,
|
310
|
+
total_cost=round(provider_metrics["total_cost"], 6),
|
311
|
+
total_tokens=provider_metrics["total_tokens"],
|
312
|
+
prompt_tokens=provider_metrics["prompt_tokens"],
|
313
|
+
completion_tokens=provider_metrics["completion_tokens"],
|
216
314
|
prompt_tokens_details=PromptTokenDetails(
|
217
|
-
cached_input_tokens=
|
218
|
-
|
219
|
-
|
220
|
-
cached_creation_tokens=
|
221
|
-
|
222
|
-
|
223
|
-
audio_tokens=
|
224
|
-
u.prompt_audio_tokens or 0 for u in usages
|
225
|
-
),
|
226
|
-
)
|
227
|
-
if any(
|
228
|
-
u.prompt_cached_input_tokens
|
229
|
-
or u.prompt_cached_creation_tokens
|
230
|
-
or u.prompt_audio_tokens
|
231
|
-
for u in usages
|
315
|
+
cached_input_tokens=provider_metrics[
|
316
|
+
"prompt_cached_input_tokens"
|
317
|
+
],
|
318
|
+
cached_creation_tokens=provider_metrics[
|
319
|
+
"prompt_cached_creation_tokens"
|
320
|
+
],
|
321
|
+
audio_tokens=provider_metrics["prompt_audio_tokens"],
|
232
322
|
)
|
323
|
+
if provider_metrics["prompt_cached_input_tokens"]
|
324
|
+
or provider_metrics["prompt_cached_creation_tokens"]
|
325
|
+
or provider_metrics["prompt_audio_tokens"]
|
233
326
|
else None,
|
234
327
|
completion_tokens_details=CompletionTokenDetails(
|
235
|
-
audio_tokens=
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
rejected_prediction_tokens=sum(
|
246
|
-
u.completion_rejected_prediction_tokens or 0
|
247
|
-
for u in usages
|
248
|
-
),
|
328
|
+
audio_tokens=provider_metrics["completion_audio_tokens"],
|
329
|
+
reasoning_tokens=provider_metrics[
|
330
|
+
"completion_reasoning_tokens"
|
331
|
+
],
|
332
|
+
accepted_prediction_tokens=provider_metrics[
|
333
|
+
"completion_accepted_prediction_tokens"
|
334
|
+
],
|
335
|
+
rejected_prediction_tokens=provider_metrics[
|
336
|
+
"completion_rejected_prediction_tokens"
|
337
|
+
],
|
249
338
|
)
|
250
339
|
if any(
|
251
|
-
|
252
|
-
for
|
253
|
-
for attr in [
|
340
|
+
provider_metrics[k]
|
341
|
+
for k in [
|
254
342
|
"completion_audio_tokens",
|
255
343
|
"completion_reasoning_tokens",
|
256
344
|
"completion_accepted_prediction_tokens",
|
@@ -261,89 +349,19 @@ class TokenUsageService:
|
|
261
349
|
)
|
262
350
|
)
|
263
351
|
|
264
|
-
|
265
|
-
|
266
|
-
provider_metrics["total_tokens"] += model_total
|
267
|
-
provider_metrics["prompt_tokens"] += model_prompt
|
268
|
-
provider_metrics["completion_tokens"] += model_completion
|
269
|
-
provider_metrics["prompt_cached_input_tokens"] += sum(
|
270
|
-
u.prompt_cached_input_tokens or 0 for u in usages
|
271
|
-
)
|
272
|
-
provider_metrics["prompt_cached_creation_tokens"] += sum(
|
273
|
-
u.prompt_cached_creation_tokens or 0 for u in usages
|
274
|
-
)
|
275
|
-
provider_metrics["prompt_audio_tokens"] += sum(
|
276
|
-
u.prompt_audio_tokens or 0 for u in usages
|
277
|
-
)
|
278
|
-
provider_metrics["completion_audio_tokens"] += sum(
|
279
|
-
u.completion_audio_tokens or 0 for u in usages
|
280
|
-
)
|
281
|
-
provider_metrics["completion_reasoning_tokens"] += sum(
|
282
|
-
u.completion_reasoning_tokens or 0 for u in usages
|
283
|
-
)
|
284
|
-
provider_metrics["completion_accepted_prediction_tokens"] += sum(
|
285
|
-
u.completion_accepted_prediction_tokens or 0 for u in usages
|
286
|
-
)
|
287
|
-
provider_metrics["completion_rejected_prediction_tokens"] += sum(
|
288
|
-
u.completion_rejected_prediction_tokens or 0 for u in usages
|
289
|
-
)
|
352
|
+
for key in total_metrics:
|
353
|
+
total_metrics[key] += provider_metrics[key]
|
290
354
|
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
prompt_tokens=provider_metrics["prompt_tokens"],
|
298
|
-
completion_tokens=provider_metrics["completion_tokens"],
|
299
|
-
prompt_tokens_details=PromptTokenDetails(
|
300
|
-
cached_input_tokens=provider_metrics[
|
301
|
-
"prompt_cached_input_tokens"
|
302
|
-
],
|
303
|
-
cached_creation_tokens=provider_metrics[
|
304
|
-
"prompt_cached_creation_tokens"
|
305
|
-
],
|
306
|
-
audio_tokens=provider_metrics["prompt_audio_tokens"],
|
307
|
-
)
|
308
|
-
if provider_metrics["prompt_cached_input_tokens"]
|
309
|
-
or provider_metrics["prompt_cached_creation_tokens"]
|
310
|
-
or provider_metrics["prompt_audio_tokens"]
|
311
|
-
else None,
|
312
|
-
completion_tokens_details=CompletionTokenDetails(
|
313
|
-
audio_tokens=provider_metrics["completion_audio_tokens"],
|
314
|
-
reasoning_tokens=provider_metrics[
|
315
|
-
"completion_reasoning_tokens"
|
316
|
-
],
|
317
|
-
accepted_prediction_tokens=provider_metrics[
|
318
|
-
"completion_accepted_prediction_tokens"
|
319
|
-
],
|
320
|
-
rejected_prediction_tokens=provider_metrics[
|
321
|
-
"completion_rejected_prediction_tokens"
|
322
|
-
],
|
323
|
-
)
|
324
|
-
if any(
|
325
|
-
provider_metrics[k]
|
326
|
-
for k in [
|
327
|
-
"completion_audio_tokens",
|
328
|
-
"completion_reasoning_tokens",
|
329
|
-
"completion_accepted_prediction_tokens",
|
330
|
-
"completion_rejected_prediction_tokens",
|
331
|
-
]
|
332
|
-
)
|
333
|
-
else None,
|
334
|
-
)
|
355
|
+
return TokenUsageReport(
|
356
|
+
providers=providers_list,
|
357
|
+
**{
|
358
|
+
k: (round(v, 6) if k == "total_cost" else v)
|
359
|
+
for k, v in total_metrics.items()
|
360
|
+
},
|
335
361
|
)
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
return TokenUsageReport(
|
341
|
-
providers=providers_list,
|
342
|
-
**{
|
343
|
-
k: (round(v, 6) if k == "total_cost" else v)
|
344
|
-
for k, v in total_metrics.items()
|
345
|
-
},
|
346
|
-
)
|
362
|
+
except Exception as e:
|
363
|
+
logger.warning(f"Error in _calculate_cost: {e}")
|
364
|
+
return TokenUsageReport()
|
347
365
|
|
348
366
|
def _query_usage(
|
349
367
|
self,
|
@@ -352,74 +370,97 @@ class TokenUsageService:
|
|
352
370
|
provider: Optional[str] = None,
|
353
371
|
model: Optional[str] = None,
|
354
372
|
) -> TokenUsageReport:
|
355
|
-
if not state.is_tokenator_enabled:
|
356
|
-
logger.warning("Tokenator is disabled. Skipping usage query.")
|
357
|
-
return TokenUsageReport()
|
358
|
-
|
359
|
-
session = get_session()()
|
360
373
|
try:
|
361
|
-
|
362
|
-
|
363
|
-
|
374
|
+
if not state.is_tokenator_enabled:
|
375
|
+
logger.warning("Tokenator is disabled. Skipping usage query.")
|
376
|
+
return TokenUsageReport()
|
364
377
|
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
usages = query.all()
|
378
|
+
session = get_session()()
|
379
|
+
try:
|
380
|
+
query = session.query(TokenUsage).filter(
|
381
|
+
TokenUsage.created_at.between(start_date, end_date)
|
382
|
+
)
|
371
383
|
|
372
|
-
|
373
|
-
|
374
|
-
|
384
|
+
if provider:
|
385
|
+
query = query.filter(TokenUsage.provider.ilike(provider))
|
386
|
+
if model:
|
387
|
+
query = query.filter(TokenUsage.model == model)
|
388
|
+
|
389
|
+
usages = query.all()
|
390
|
+
|
391
|
+
return self._calculate_cost(usages, provider or "all")
|
392
|
+
except Exception as e:
|
393
|
+
logger.warning(f"Error querying usage: {e}")
|
394
|
+
return TokenUsageReport()
|
395
|
+
finally:
|
396
|
+
session.close()
|
397
|
+
except Exception as e:
|
398
|
+
logger.warning(f"Unexpected error in _query_usage: {e}")
|
399
|
+
return TokenUsageReport()
|
375
400
|
|
376
401
|
def last_hour(
|
377
402
|
self, provider: Optional[str] = None, model: Optional[str] = None
|
378
403
|
) -> TokenUsageReport:
|
379
|
-
|
404
|
+
try:
|
405
|
+
if not state.is_tokenator_enabled:
|
406
|
+
return TokenUsageReport()
|
407
|
+
logger.debug(
|
408
|
+
f"Getting cost analysis for last hour (provider={provider}, model={model})"
|
409
|
+
)
|
410
|
+
end = datetime.now()
|
411
|
+
start = end - timedelta(hours=1)
|
412
|
+
return self._query_usage(start, end, provider, model)
|
413
|
+
except Exception as e:
|
414
|
+
logger.warning(f"Error in last_hour: {e}")
|
380
415
|
return TokenUsageReport()
|
381
|
-
logger.debug(
|
382
|
-
f"Getting cost analysis for last hour (provider={provider}, model={model})"
|
383
|
-
)
|
384
|
-
end = datetime.now()
|
385
|
-
start = end - timedelta(hours=1)
|
386
|
-
return self._query_usage(start, end, provider, model)
|
387
416
|
|
388
417
|
def last_day(
|
389
418
|
self, provider: Optional[str] = None, model: Optional[str] = None
|
390
419
|
) -> TokenUsageReport:
|
391
|
-
|
420
|
+
try:
|
421
|
+
if not state.is_tokenator_enabled:
|
422
|
+
return TokenUsageReport()
|
423
|
+
logger.debug(
|
424
|
+
f"Getting cost analysis for last 24 hours (provider={provider}, model={model})"
|
425
|
+
)
|
426
|
+
end = datetime.now()
|
427
|
+
start = end - timedelta(days=1)
|
428
|
+
return self._query_usage(start, end, provider, model)
|
429
|
+
except Exception as e:
|
430
|
+
logger.warning(f"Error in last_day: {e}")
|
392
431
|
return TokenUsageReport()
|
393
|
-
logger.debug(
|
394
|
-
f"Getting cost analysis for last 24 hours (provider={provider}, model={model})"
|
395
|
-
)
|
396
|
-
end = datetime.now()
|
397
|
-
start = end - timedelta(days=1)
|
398
|
-
return self._query_usage(start, end, provider, model)
|
399
432
|
|
400
433
|
def last_week(
|
401
434
|
self, provider: Optional[str] = None, model: Optional[str] = None
|
402
435
|
) -> TokenUsageReport:
|
403
|
-
|
436
|
+
try:
|
437
|
+
if not state.is_tokenator_enabled:
|
438
|
+
return TokenUsageReport()
|
439
|
+
logger.debug(
|
440
|
+
f"Getting cost analysis for last 7 days (provider={provider}, model={model})"
|
441
|
+
)
|
442
|
+
end = datetime.now()
|
443
|
+
start = end - timedelta(weeks=1)
|
444
|
+
return self._query_usage(start, end, provider, model)
|
445
|
+
except Exception as e:
|
446
|
+
logger.warning(f"Error in last_week: {e}")
|
404
447
|
return TokenUsageReport()
|
405
|
-
logger.debug(
|
406
|
-
f"Getting cost analysis for last 7 days (provider={provider}, model={model})"
|
407
|
-
)
|
408
|
-
end = datetime.now()
|
409
|
-
start = end - timedelta(weeks=1)
|
410
|
-
return self._query_usage(start, end, provider, model)
|
411
448
|
|
412
449
|
def last_month(
|
413
450
|
self, provider: Optional[str] = None, model: Optional[str] = None
|
414
451
|
) -> TokenUsageReport:
|
415
|
-
|
452
|
+
try:
|
453
|
+
if not state.is_tokenator_enabled:
|
454
|
+
return TokenUsageReport()
|
455
|
+
logger.debug(
|
456
|
+
f"Getting cost analysis for last 30 days (provider={provider}, model={model})"
|
457
|
+
)
|
458
|
+
end = datetime.now()
|
459
|
+
start = end - timedelta(days=30)
|
460
|
+
return self._query_usage(start, end, provider, model)
|
461
|
+
except Exception as e:
|
462
|
+
logger.warning(f"Error in last_month: {e}")
|
416
463
|
return TokenUsageReport()
|
417
|
-
logger.debug(
|
418
|
-
f"Getting cost analysis for last 30 days (provider={provider}, model={model})"
|
419
|
-
)
|
420
|
-
end = datetime.now()
|
421
|
-
start = end - timedelta(days=30)
|
422
|
-
return self._query_usage(start, end, provider, model)
|
423
464
|
|
424
465
|
def between(
|
425
466
|
self,
|
@@ -428,76 +469,122 @@ class TokenUsageService:
|
|
428
469
|
provider: Optional[str] = None,
|
429
470
|
model: Optional[str] = None,
|
430
471
|
) -> TokenUsageReport:
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
472
|
+
try:
|
473
|
+
if not state.is_tokenator_enabled:
|
474
|
+
return TokenUsageReport()
|
475
|
+
logger.debug(
|
476
|
+
f"Getting cost analysis between {start_date} and {end_date} (provider={provider}, model={model})"
|
477
|
+
)
|
436
478
|
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
479
|
+
if isinstance(start_date, str):
|
480
|
+
try:
|
481
|
+
start = datetime.strptime(start_date, "%Y-%m-%d %H:%M:%S")
|
482
|
+
except ValueError:
|
483
|
+
logger.warning(
|
484
|
+
f"Date-only string provided for start_date: {start_date}. Setting time to 00:00:00"
|
485
|
+
)
|
486
|
+
start = datetime.strptime(start_date, "%Y-%m-%d")
|
487
|
+
else:
|
488
|
+
start = start_date
|
447
489
|
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
490
|
+
if isinstance(end_date, str):
|
491
|
+
try:
|
492
|
+
end = datetime.strptime(end_date, "%Y-%m-%d %H:%M:%S")
|
493
|
+
except ValueError:
|
494
|
+
logger.warning(
|
495
|
+
f"Date-only string provided for end_date: {end_date}. Setting time to 23:59:59"
|
496
|
+
)
|
497
|
+
end = (
|
498
|
+
datetime.strptime(end_date, "%Y-%m-%d")
|
499
|
+
+ timedelta(days=1)
|
500
|
+
- timedelta(seconds=1)
|
501
|
+
)
|
502
|
+
else:
|
503
|
+
end = end_date
|
462
504
|
|
463
|
-
|
505
|
+
return self._query_usage(start, end, provider, model)
|
506
|
+
except Exception as e:
|
507
|
+
logger.warning(f"Error in between: {e}")
|
508
|
+
return TokenUsageReport()
|
464
509
|
|
465
510
|
def for_execution(self, execution_id: str) -> TokenUsageReport:
|
466
|
-
if not state.is_tokenator_enabled:
|
467
|
-
return TokenUsageReport()
|
468
|
-
logger.debug(f"Getting cost analysis for execution_id={execution_id}")
|
469
|
-
session = get_session()()
|
470
511
|
try:
|
471
|
-
|
472
|
-
|
473
|
-
)
|
474
|
-
|
475
|
-
|
476
|
-
|
512
|
+
if not state.is_tokenator_enabled:
|
513
|
+
return TokenUsageReport()
|
514
|
+
logger.debug(f"Getting cost analysis for execution_id={execution_id}")
|
515
|
+
session = get_session()()
|
516
|
+
try:
|
517
|
+
query = session.query(TokenUsage).filter(
|
518
|
+
TokenUsage.execution_id == execution_id
|
519
|
+
)
|
520
|
+
return self._calculate_cost(query.all())
|
521
|
+
except Exception as e:
|
522
|
+
logger.warning(f"Error querying for_execution: {e}")
|
523
|
+
return TokenUsageReport()
|
524
|
+
finally:
|
525
|
+
session.close()
|
526
|
+
except Exception as e:
|
527
|
+
logger.warning(f"Unexpected error in for_execution: {e}")
|
528
|
+
return TokenUsageReport()
|
477
529
|
|
478
530
|
def last_execution(self) -> TokenUsageReport:
|
479
|
-
if not state.is_tokenator_enabled:
|
480
|
-
return TokenUsageReport()
|
481
|
-
logger.debug("Getting cost analysis for last execution")
|
482
|
-
session = get_session()()
|
483
531
|
try:
|
484
|
-
|
485
|
-
|
486
|
-
)
|
487
|
-
|
488
|
-
|
532
|
+
if not state.is_tokenator_enabled:
|
533
|
+
return TokenUsageReport()
|
534
|
+
logger.debug("Getting cost analysis for last execution")
|
535
|
+
session = get_session()()
|
536
|
+
try:
|
537
|
+
query = (
|
538
|
+
session.query(TokenUsage)
|
539
|
+
.order_by(TokenUsage.created_at.desc())
|
540
|
+
.first()
|
541
|
+
)
|
542
|
+
if query:
|
543
|
+
return self.for_execution(query.execution_id)
|
544
|
+
return TokenUsageReport()
|
545
|
+
except Exception as e:
|
546
|
+
logger.warning(f"Error querying last_execution: {e}")
|
547
|
+
return TokenUsageReport()
|
548
|
+
finally:
|
549
|
+
session.close()
|
550
|
+
except Exception as e:
|
551
|
+
logger.warning(f"Unexpected error in last_execution: {e}")
|
489
552
|
return TokenUsageReport()
|
490
|
-
finally:
|
491
|
-
session.close()
|
492
553
|
|
493
554
|
def all_time(self) -> TokenUsageReport:
|
494
|
-
|
555
|
+
try:
|
556
|
+
if not state.is_tokenator_enabled:
|
557
|
+
return TokenUsageReport()
|
558
|
+
|
559
|
+
logger.warning(
|
560
|
+
"Getting cost analysis for all time. This may take a while..."
|
561
|
+
)
|
562
|
+
session = get_session()()
|
563
|
+
try:
|
564
|
+
query = session.query(TokenUsage)
|
565
|
+
return self._calculate_cost(query.all())
|
566
|
+
except Exception as e:
|
567
|
+
logger.warning(f"Error querying all_time usage: {e}")
|
568
|
+
return TokenUsageReport()
|
569
|
+
finally:
|
570
|
+
session.close()
|
571
|
+
except Exception as e:
|
572
|
+
logger.warning(f"Unexpected error in all_time: {e}")
|
495
573
|
return TokenUsageReport()
|
496
574
|
|
497
|
-
|
575
|
+
def wipe(self):
|
576
|
+
logger.warning(
|
577
|
+
"All your usage data is about to be wiped, are you sure you want to do this? You have 5 seconds to cancel this operation."
|
578
|
+
)
|
579
|
+
for i in range(5, 0, -1):
|
580
|
+
logger.warning(str(i))
|
581
|
+
time.sleep(1)
|
498
582
|
session = get_session()()
|
499
583
|
try:
|
500
|
-
|
501
|
-
|
584
|
+
session.query(TokenUsage).delete()
|
585
|
+
session.commit()
|
586
|
+
logger.warning("All usage data has been deleted.")
|
587
|
+
except Exception as e:
|
588
|
+
logger.warning(f"Error wiping data: {e}")
|
502
589
|
finally:
|
503
590
|
session.close()
|