tokenator 0.1.15__tar.gz → 0.1.16__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (22) hide show
  1. {tokenator-0.1.15 → tokenator-0.1.16}/PKG-INFO +9 -4
  2. {tokenator-0.1.15 → tokenator-0.1.16}/README.md +8 -3
  3. {tokenator-0.1.15 → tokenator-0.1.16}/pyproject.toml +1 -1
  4. tokenator-0.1.16/src/tokenator/usage.py +579 -0
  5. tokenator-0.1.15/src/tokenator/usage.py +0 -503
  6. {tokenator-0.1.15 → tokenator-0.1.16}/LICENSE +0 -0
  7. {tokenator-0.1.15 → tokenator-0.1.16}/src/tokenator/__init__.py +0 -0
  8. {tokenator-0.1.15 → tokenator-0.1.16}/src/tokenator/anthropic/client_anthropic.py +0 -0
  9. {tokenator-0.1.15 → tokenator-0.1.16}/src/tokenator/anthropic/stream_interceptors.py +0 -0
  10. {tokenator-0.1.15 → tokenator-0.1.16}/src/tokenator/base_wrapper.py +0 -0
  11. {tokenator-0.1.15 → tokenator-0.1.16}/src/tokenator/create_migrations.py +0 -0
  12. {tokenator-0.1.15 → tokenator-0.1.16}/src/tokenator/migrations/env.py +0 -0
  13. {tokenator-0.1.15 → tokenator-0.1.16}/src/tokenator/migrations/script.py.mako +0 -0
  14. {tokenator-0.1.15 → tokenator-0.1.16}/src/tokenator/migrations/versions/f028b8155fed_adding_detailed_input_and_output_token_.py +0 -0
  15. {tokenator-0.1.15 → tokenator-0.1.16}/src/tokenator/migrations/versions/f6f1f2437513_initial_migration.py +0 -0
  16. {tokenator-0.1.15 → tokenator-0.1.16}/src/tokenator/migrations.py +0 -0
  17. {tokenator-0.1.15 → tokenator-0.1.16}/src/tokenator/models.py +0 -0
  18. {tokenator-0.1.15 → tokenator-0.1.16}/src/tokenator/openai/client_openai.py +0 -0
  19. {tokenator-0.1.15 → tokenator-0.1.16}/src/tokenator/openai/stream_interceptors.py +0 -0
  20. {tokenator-0.1.15 → tokenator-0.1.16}/src/tokenator/schemas.py +0 -0
  21. {tokenator-0.1.15 → tokenator-0.1.16}/src/tokenator/state.py +0 -0
  22. {tokenator-0.1.15 → tokenator-0.1.16}/src/tokenator/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: tokenator
3
- Version: 0.1.15
3
+ Version: 0.1.16
4
4
  Summary: Token usage tracking wrapper for LLMs
5
5
  License: MIT
6
6
  Author: Ujjwal Maheshwari
@@ -21,14 +21,15 @@ Requires-Dist: requests (>=2.32.3,<3.0.0)
21
21
  Requires-Dist: sqlalchemy (>=2.0.0,<3.0.0)
22
22
  Description-Content-Type: text/markdown
23
23
 
24
- # Tokenator : Track and analyze LLM token usage and cost
24
+ # Tokenator : Track, analyze, compare LLM token usage and costs
25
25
 
26
26
  Have you ever wondered :
27
27
  - How many tokens does your AI agent consume?
28
- - How much does it cost to do run a complex AI workflow with multiple LLM providers?
28
+ - How much does it cost to run a complex AI workflow with multiple LLM providers?
29
+ - Which LLM is more cost effective for my use case?
29
30
  - How much money/tokens did you spend today on developing with LLMs?
30
31
 
31
- Afraid not, tokenator is here! With tokenator's easy to use API, you can start tracking LLM usage in a matter of minutes.
32
+ Afraid not, tokenator is here! With tokenator's easy to use functions, you can start tracking LLM usage in a matter of minutes.
32
33
 
33
34
  Get started with just 3 lines of code!
34
35
 
@@ -114,6 +115,10 @@ print(cost.last_hour().model_dump_json(indent=4))
114
115
  }
115
116
  ```
116
117
 
118
+ ## Cookbooks
119
+
120
+ Want more code, example use cases and ideas? Check out our amazing [cookbooks](https://github.com/ujjwalm29/tokenator/tree/main/docs/cookbooks)!
121
+
117
122
  ## Features
118
123
 
119
124
  - Drop-in replacement for OpenAI, Anthropic client
@@ -1,11 +1,12 @@
1
- # Tokenator : Track and analyze LLM token usage and cost
1
+ # Tokenator : Track, analyze, compare LLM token usage and costs
2
2
 
3
3
  Have you ever wondered :
4
4
  - How many tokens does your AI agent consume?
5
- - How much does it cost to do run a complex AI workflow with multiple LLM providers?
5
+ - How much does it cost to run a complex AI workflow with multiple LLM providers?
6
+ - Which LLM is more cost effective for my use case?
6
7
  - How much money/tokens did you spend today on developing with LLMs?
7
8
 
8
- Afraid not, tokenator is here! With tokenator's easy to use API, you can start tracking LLM usage in a matter of minutes.
9
+ Afraid not, tokenator is here! With tokenator's easy to use functions, you can start tracking LLM usage in a matter of minutes.
9
10
 
10
11
  Get started with just 3 lines of code!
11
12
 
@@ -91,6 +92,10 @@ print(cost.last_hour().model_dump_json(indent=4))
91
92
  }
92
93
  ```
93
94
 
95
+ ## Cookbooks
96
+
97
+ Want more code, example use cases and ideas? Check out our amazing [cookbooks](https://github.com/ujjwalm29/tokenator/tree/main/docs/cookbooks)!
98
+
94
99
  ## Features
95
100
 
96
101
  - Drop-in replacement for OpenAI, Anthropic client
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "tokenator"
3
- version = "0.1.15"
3
+ version = "0.1.16"
4
4
  description = "Token usage tracking wrapper for LLMs"
5
5
  authors = ["Ujjwal Maheshwari <your.email@example.com>"]
6
6
  readme = "README.md"
@@ -0,0 +1,579 @@
1
+ """Cost analysis functions for token usage."""
2
+
3
+ from datetime import datetime, timedelta
4
+ from typing import Dict, Optional, Union
5
+
6
+ from .schemas import get_session, TokenUsage
7
+ from .models import (
8
+ CompletionTokenDetails,
9
+ PromptTokenDetails,
10
+ TokenRate,
11
+ TokenUsageReport,
12
+ ModelUsage,
13
+ ProviderUsage,
14
+ )
15
+ from . import state
16
+
17
+ import requests
18
+ import logging
19
+ import time
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+
24
+ class TokenUsageService:
25
+ def __init__(self):
26
+ try:
27
+ if not state.is_tokenator_enabled:
28
+ logger.info("Tokenator is disabled. Database access is unavailable.")
29
+ self.MODEL_COSTS = self._get_model_costs()
30
+ except Exception as e:
31
+ logger.error(f"Error in __init__: {e}")
32
+ self.MODEL_COSTS = {}
33
+
34
+ def _get_model_costs(self) -> Dict[str, TokenRate]:
35
+ try:
36
+ if not state.is_tokenator_enabled:
37
+ return {}
38
+ url = "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json"
39
+ response = requests.get(url)
40
+ response.raise_for_status()
41
+ data = response.json()
42
+
43
+ model_costs = {}
44
+ for model, info in data.items():
45
+ if (
46
+ "input_cost_per_token" not in info
47
+ or "output_cost_per_token" not in info
48
+ ):
49
+ continue
50
+
51
+ rate = TokenRate(
52
+ prompt=info["input_cost_per_token"],
53
+ completion=info["output_cost_per_token"],
54
+ prompt_audio=info.get("input_cost_per_audio_token"),
55
+ completion_audio=info.get("output_cost_per_audio_token"),
56
+ prompt_cached_input=info.get("cache_read_input_token_cost") or 0,
57
+ prompt_cached_creation=info.get("cache_read_creation_token_cost") or 0,
58
+ )
59
+ model_costs[model] = rate
60
+
61
+ return model_costs
62
+ except Exception as e:
63
+ logger.error(f"Error in _get_model_costs: {e}")
64
+ return {}
65
+
66
+ def _calculate_cost(
67
+ self, usages: list[TokenUsage], provider: Optional[str] = None
68
+ ) -> TokenUsageReport:
69
+ try:
70
+ if not state.is_tokenator_enabled:
71
+ logger.warning("Tokenator is disabled. Skipping cost calculation.")
72
+ return TokenUsageReport()
73
+
74
+ if not self.MODEL_COSTS:
75
+ logger.warning("No model costs available.")
76
+ return TokenUsageReport()
77
+
78
+ # Default GPT4O pricing updated with provided values
79
+ GPT4O_PRICING = TokenRate(
80
+ prompt=0.0000025,
81
+ completion=0.000010,
82
+ prompt_audio=0.0001,
83
+ completion_audio=0.0002,
84
+ prompt_cached_input=0.00000125,
85
+ prompt_cached_creation=0.00000125,
86
+ )
87
+
88
+ provider_model_usages: Dict[str, Dict[str, list[TokenUsage]]] = {}
89
+ logger.debug(f"usages: {len(usages)}")
90
+
91
+ for usage in usages:
92
+ # Model key resolution logic (unchanged)
93
+ model_key = usage.model
94
+ if model_key in self.MODEL_COSTS:
95
+ pass
96
+ elif f"{usage.provider}/{usage.model}" in self.MODEL_COSTS:
97
+ model_key = f"{usage.provider}/{usage.model}"
98
+ else:
99
+ matched_keys = [k for k in self.MODEL_COSTS.keys() if usage.model in k]
100
+ if matched_keys:
101
+ model_key = matched_keys[0]
102
+ logger.warning(
103
+ f"Model {usage.model} matched with {model_key} in pricing data via contains search"
104
+ )
105
+ else:
106
+ logger.warning(
107
+ f"Model {model_key} not found in pricing data. Using gpt-4o pricing as fallback"
108
+ )
109
+ self.MODEL_COSTS[model_key] = GPT4O_PRICING
110
+
111
+ provider_key = usage.provider or "default"
112
+ provider_model_usages.setdefault(provider_key, {}).setdefault(
113
+ model_key, []
114
+ ).append(usage)
115
+
116
+ # Calculate totals for each level
117
+ providers_list = []
118
+ total_metrics = {
119
+ "total_cost": 0.0,
120
+ "total_tokens": 0,
121
+ "prompt_tokens": 0,
122
+ "completion_tokens": 0,
123
+ }
124
+
125
+ for provider, model_usages in provider_model_usages.items():
126
+ provider_metrics = {
127
+ "total_cost": 0.0,
128
+ "total_tokens": 0,
129
+ "prompt_tokens": 0,
130
+ "completion_tokens": 0,
131
+ "prompt_cached_input_tokens": 0,
132
+ "prompt_cached_creation_tokens": 0,
133
+ "prompt_audio_tokens": 0,
134
+ "completion_audio_tokens": 0,
135
+ "completion_reasoning_tokens": 0,
136
+ "completion_accepted_prediction_tokens": 0,
137
+ "completion_rejected_prediction_tokens": 0,
138
+ }
139
+ models_list = []
140
+
141
+ for model_key, usages in model_usages.items():
142
+ model_rates = self.MODEL_COSTS[model_key]
143
+ model_cost = 0.0
144
+ model_total = 0
145
+ model_prompt = 0
146
+ model_completion = 0
147
+
148
+ for usage in usages:
149
+ # Base token costs
150
+ prompt_text_tokens = usage.prompt_tokens
151
+ if usage.prompt_cached_input_tokens:
152
+ prompt_text_tokens = (
153
+ usage.prompt_tokens - usage.prompt_cached_input_tokens
154
+ )
155
+ if usage.prompt_audio_tokens:
156
+ prompt_text_tokens = (
157
+ usage.prompt_tokens - usage.prompt_audio_tokens
158
+ )
159
+
160
+ completion_text_tokens = usage.completion_tokens
161
+ if usage.completion_audio_tokens:
162
+ completion_text_tokens = (
163
+ usage.completion_tokens - usage.completion_audio_tokens
164
+ )
165
+
166
+ prompt_cost = prompt_text_tokens * model_rates.prompt
167
+ completion_cost = completion_text_tokens * model_rates.completion
168
+ model_cost += prompt_cost + completion_cost
169
+
170
+ # Audio token costs
171
+ if usage.prompt_audio_tokens:
172
+ if model_rates.prompt_audio:
173
+ model_cost += (
174
+ usage.prompt_audio_tokens * model_rates.prompt_audio
175
+ )
176
+ else:
177
+ logger.warning(
178
+ f"Audio prompt tokens present for {model_key} but no audio rate defined"
179
+ )
180
+
181
+ if usage.completion_audio_tokens:
182
+ if model_rates.completion_audio:
183
+ model_cost += (
184
+ usage.completion_audio_tokens
185
+ * model_rates.completion_audio
186
+ )
187
+ else:
188
+ logger.warning(
189
+ f"Audio completion tokens present for {model_key} but no audio rate defined"
190
+ )
191
+
192
+ # Cached token costs
193
+ if usage.prompt_cached_input_tokens:
194
+ if model_rates.prompt_cached_input:
195
+ model_cost += (
196
+ usage.prompt_cached_input_tokens
197
+ * model_rates.prompt_cached_input
198
+ )
199
+ else:
200
+ logger.warning(
201
+ f"Cached input tokens present for {model_key} but no cache input rate defined"
202
+ )
203
+
204
+ if usage.prompt_cached_creation_tokens:
205
+ if model_rates.prompt_cached_creation:
206
+ model_cost += (
207
+ usage.prompt_cached_creation_tokens
208
+ * model_rates.prompt_cached_creation
209
+ )
210
+ else:
211
+ logger.warning(
212
+ f"Cached creation tokens present for {model_key} but no cache creation rate defined"
213
+ )
214
+
215
+ model_total += usage.total_tokens
216
+ model_prompt += usage.prompt_tokens
217
+ model_completion += usage.completion_tokens
218
+
219
+ models_list.append(
220
+ ModelUsage(
221
+ model=model_key,
222
+ total_cost=round(model_cost, 6),
223
+ total_tokens=model_total,
224
+ prompt_tokens=model_prompt,
225
+ completion_tokens=model_completion,
226
+ prompt_tokens_details=PromptTokenDetails(
227
+ cached_input_tokens=sum(
228
+ u.prompt_cached_input_tokens or 0 for u in usages
229
+ ),
230
+ cached_creation_tokens=sum(
231
+ u.prompt_cached_creation_tokens or 0 for u in usages
232
+ ),
233
+ audio_tokens=sum(
234
+ u.prompt_audio_tokens or 0 for u in usages
235
+ ),
236
+ )
237
+ if any(
238
+ u.prompt_cached_input_tokens
239
+ or u.prompt_cached_creation_tokens
240
+ or u.prompt_audio_tokens
241
+ for u in usages
242
+ )
243
+ else None,
244
+ completion_tokens_details=CompletionTokenDetails(
245
+ audio_tokens=sum(
246
+ u.completion_audio_tokens or 0 for u in usages
247
+ ),
248
+ reasoning_tokens=sum(
249
+ u.completion_reasoning_tokens or 0 for u in usages
250
+ ),
251
+ accepted_prediction_tokens=sum(
252
+ u.completion_accepted_prediction_tokens or 0
253
+ for u in usages
254
+ ),
255
+ rejected_prediction_tokens=sum(
256
+ u.completion_rejected_prediction_tokens or 0
257
+ for u in usages
258
+ ),
259
+ )
260
+ if any(
261
+ getattr(u, attr, None)
262
+ for u in usages
263
+ for attr in [
264
+ "completion_audio_tokens",
265
+ "completion_reasoning_tokens",
266
+ "completion_accepted_prediction_tokens",
267
+ "completion_rejected_prediction_tokens",
268
+ ]
269
+ )
270
+ else None,
271
+ )
272
+ )
273
+
274
+ # Update provider metrics with all token types
275
+ provider_metrics["total_cost"] += model_cost
276
+ provider_metrics["total_tokens"] += model_total
277
+ provider_metrics["prompt_tokens"] += model_prompt
278
+ provider_metrics["completion_tokens"] += model_completion
279
+ provider_metrics["prompt_cached_input_tokens"] += sum(
280
+ u.prompt_cached_input_tokens or 0 for u in usages
281
+ )
282
+ provider_metrics["prompt_cached_creation_tokens"] += sum(
283
+ u.prompt_cached_creation_tokens or 0 for u in usages
284
+ )
285
+ provider_metrics["prompt_audio_tokens"] += sum(
286
+ u.prompt_audio_tokens or 0 for u in usages
287
+ )
288
+ provider_metrics["completion_audio_tokens"] += sum(
289
+ u.completion_audio_tokens or 0 for u in usages
290
+ )
291
+ provider_metrics["completion_reasoning_tokens"] += sum(
292
+ u.completion_reasoning_tokens or 0 for u in usages
293
+ )
294
+ provider_metrics["completion_accepted_prediction_tokens"] += sum(
295
+ u.completion_accepted_prediction_tokens or 0 for u in usages
296
+ )
297
+ provider_metrics["completion_rejected_prediction_tokens"] += sum(
298
+ u.completion_rejected_prediction_tokens or 0 for u in usages
299
+ )
300
+
301
+ providers_list.append(
302
+ ProviderUsage(
303
+ provider=provider,
304
+ models=models_list,
305
+ total_cost=round(provider_metrics["total_cost"], 6),
306
+ total_tokens=provider_metrics["total_tokens"],
307
+ prompt_tokens=provider_metrics["prompt_tokens"],
308
+ completion_tokens=provider_metrics["completion_tokens"],
309
+ prompt_tokens_details=PromptTokenDetails(
310
+ cached_input_tokens=provider_metrics[
311
+ "prompt_cached_input_tokens"
312
+ ],
313
+ cached_creation_tokens=provider_metrics[
314
+ "prompt_cached_creation_tokens"
315
+ ],
316
+ audio_tokens=provider_metrics["prompt_audio_tokens"],
317
+ )
318
+ if provider_metrics["prompt_cached_input_tokens"]
319
+ or provider_metrics["prompt_cached_creation_tokens"]
320
+ or provider_metrics["prompt_audio_tokens"]
321
+ else None,
322
+ completion_tokens_details=CompletionTokenDetails(
323
+ audio_tokens=provider_metrics["completion_audio_tokens"],
324
+ reasoning_tokens=provider_metrics[
325
+ "completion_reasoning_tokens"
326
+ ],
327
+ accepted_prediction_tokens=provider_metrics[
328
+ "completion_accepted_prediction_tokens"
329
+ ],
330
+ rejected_prediction_tokens=provider_metrics[
331
+ "completion_rejected_prediction_tokens"
332
+ ],
333
+ )
334
+ if any(
335
+ provider_metrics[k]
336
+ for k in [
337
+ "completion_audio_tokens",
338
+ "completion_reasoning_tokens",
339
+ "completion_accepted_prediction_tokens",
340
+ "completion_rejected_prediction_tokens",
341
+ ]
342
+ )
343
+ else None,
344
+ )
345
+ )
346
+
347
+ for key in total_metrics:
348
+ total_metrics[key] += provider_metrics[key]
349
+
350
+ return TokenUsageReport(
351
+ providers=providers_list,
352
+ **{
353
+ k: (round(v, 6) if k == "total_cost" else v)
354
+ for k, v in total_metrics.items()
355
+ },
356
+ )
357
+ except Exception as e:
358
+ logger.error(f"Error in _calculate_cost: {e}")
359
+ return TokenUsageReport()
360
+
361
+ def _query_usage(
362
+ self,
363
+ start_date: datetime,
364
+ end_date: datetime,
365
+ provider: Optional[str] = None,
366
+ model: Optional[str] = None,
367
+ ) -> TokenUsageReport:
368
+ try:
369
+ if not state.is_tokenator_enabled:
370
+ logger.warning("Tokenator is disabled. Skipping usage query.")
371
+ return TokenUsageReport()
372
+
373
+ session = get_session()()
374
+ try:
375
+ query = session.query(TokenUsage).filter(
376
+ TokenUsage.created_at.between(start_date, end_date)
377
+ )
378
+
379
+ if provider:
380
+ query = query.filter(TokenUsage.provider == provider)
381
+ if model:
382
+ query = query.filter(TokenUsage.model == model)
383
+
384
+ usages = query.all()
385
+
386
+ return self._calculate_cost(usages, provider or "all")
387
+ except Exception as e:
388
+ logger.error(f"Error querying usage: {e}")
389
+ return TokenUsageReport()
390
+ finally:
391
+ session.close()
392
+ except Exception as e:
393
+ logger.error(f"Unexpected error in _query_usage: {e}")
394
+ return TokenUsageReport()
395
+
396
+ def last_hour(
397
+ self, provider: Optional[str] = None, model: Optional[str] = None
398
+ ) -> TokenUsageReport:
399
+ try:
400
+ if not state.is_tokenator_enabled:
401
+ return TokenUsageReport()
402
+ logger.debug(
403
+ f"Getting cost analysis for last hour (provider={provider}, model={model})"
404
+ )
405
+ end = datetime.now()
406
+ start = end - timedelta(hours=1)
407
+ return self._query_usage(start, end, provider, model)
408
+ except Exception as e:
409
+ logger.error(f"Error in last_hour: {e}")
410
+ return TokenUsageReport()
411
+
412
+ def last_day(
413
+ self, provider: Optional[str] = None, model: Optional[str] = None
414
+ ) -> TokenUsageReport:
415
+ try:
416
+ if not state.is_tokenator_enabled:
417
+ return TokenUsageReport()
418
+ logger.debug(
419
+ f"Getting cost analysis for last 24 hours (provider={provider}, model={model})"
420
+ )
421
+ end = datetime.now()
422
+ start = end - timedelta(days=1)
423
+ return self._query_usage(start, end, provider, model)
424
+ except Exception as e:
425
+ logger.error(f"Error in last_day: {e}")
426
+ return TokenUsageReport()
427
+
428
+ def last_week(
429
+ self, provider: Optional[str] = None, model: Optional[str] = None
430
+ ) -> TokenUsageReport:
431
+ try:
432
+ if not state.is_tokenator_enabled:
433
+ return TokenUsageReport()
434
+ logger.debug(
435
+ f"Getting cost analysis for last 7 days (provider={provider}, model={model})"
436
+ )
437
+ end = datetime.now()
438
+ start = end - timedelta(weeks=1)
439
+ return self._query_usage(start, end, provider, model)
440
+ except Exception as e:
441
+ logger.error(f"Error in last_week: {e}")
442
+ return TokenUsageReport()
443
+
444
+ def last_month(
445
+ self, provider: Optional[str] = None, model: Optional[str] = None
446
+ ) -> TokenUsageReport:
447
+ try:
448
+ if not state.is_tokenator_enabled:
449
+ return TokenUsageReport()
450
+ logger.debug(
451
+ f"Getting cost analysis for last 30 days (provider={provider}, model={model})"
452
+ )
453
+ end = datetime.now()
454
+ start = end - timedelta(days=30)
455
+ return self._query_usage(start, end, provider, model)
456
+ except Exception as e:
457
+ logger.error(f"Error in last_month: {e}")
458
+ return TokenUsageReport()
459
+
460
+ def between(
461
+ self,
462
+ start_date: Union[datetime, str],
463
+ end_date: Union[datetime, str],
464
+ provider: Optional[str] = None,
465
+ model: Optional[str] = None,
466
+ ) -> TokenUsageReport:
467
+ try:
468
+ if not state.is_tokenator_enabled:
469
+ return TokenUsageReport()
470
+ logger.debug(
471
+ f"Getting cost analysis between {start_date} and {end_date} (provider={provider}, model={model})"
472
+ )
473
+
474
+ if isinstance(start_date, str):
475
+ try:
476
+ start = datetime.strptime(start_date, "%Y-%m-%d %H:%M:%S")
477
+ except ValueError:
478
+ logger.warning(
479
+ f"Date-only string provided for start_date: {start_date}. Setting time to 00:00:00"
480
+ )
481
+ start = datetime.strptime(start_date, "%Y-%m-%d")
482
+ else:
483
+ start = start_date
484
+
485
+ if isinstance(end_date, str):
486
+ try:
487
+ end = datetime.strptime(end_date, "%Y-%m-%d %H:%M:%S")
488
+ except ValueError:
489
+ logger.warning(
490
+ f"Date-only string provided for end_date: {end_date}. Setting time to 23:59:59"
491
+ )
492
+ end = (
493
+ datetime.strptime(end_date, "%Y-%m-%d")
494
+ + timedelta(days=1)
495
+ - timedelta(seconds=1)
496
+ )
497
+ else:
498
+ end = end_date
499
+
500
+ return self._query_usage(start, end, provider, model)
501
+ except Exception as e:
502
+ logger.error(f"Error in between: {e}")
503
+ return TokenUsageReport()
504
+
505
+ def for_execution(self, execution_id: str) -> TokenUsageReport:
506
+ try:
507
+ if not state.is_tokenator_enabled:
508
+ return TokenUsageReport()
509
+ logger.debug(f"Getting cost analysis for execution_id={execution_id}")
510
+ session = get_session()()
511
+ try:
512
+ query = session.query(TokenUsage).filter(
513
+ TokenUsage.execution_id == execution_id
514
+ )
515
+ return self._calculate_cost(query.all())
516
+ except Exception as e:
517
+ logger.error(f"Error querying for_execution: {e}")
518
+ return TokenUsageReport()
519
+ finally:
520
+ session.close()
521
+ except Exception as e:
522
+ logger.error(f"Unexpected error in for_execution: {e}")
523
+ return TokenUsageReport()
524
+
525
+ def last_execution(self) -> TokenUsageReport:
526
+ try:
527
+ if not state.is_tokenator_enabled:
528
+ return TokenUsageReport()
529
+ logger.debug("Getting cost analysis for last execution")
530
+ session = get_session()()
531
+ try:
532
+ query = (
533
+ session.query(TokenUsage).order_by(TokenUsage.created_at.desc()).first()
534
+ )
535
+ if query:
536
+ return self.for_execution(query.execution_id)
537
+ return TokenUsageReport()
538
+ except Exception as e:
539
+ logger.error(f"Error querying last_execution: {e}")
540
+ return TokenUsageReport()
541
+ finally:
542
+ session.close()
543
+ except Exception as e:
544
+ logger.error(f"Unexpected error in last_execution: {e}")
545
+ return TokenUsageReport()
546
+
547
+ def all_time(self) -> TokenUsageReport:
548
+ try:
549
+ if not state.is_tokenator_enabled:
550
+ return TokenUsageReport()
551
+
552
+ logger.warning("Getting cost analysis for all time. This may take a while...")
553
+ session = get_session()()
554
+ try:
555
+ query = session.query(TokenUsage)
556
+ return self._calculate_cost(query.all())
557
+ except Exception as e:
558
+ logger.error(f"Error querying all_time usage: {e}")
559
+ return TokenUsageReport()
560
+ finally:
561
+ session.close()
562
+ except Exception as e:
563
+ logger.error(f"Unexpected error in all_time: {e}")
564
+ return TokenUsageReport()
565
+
566
+ def wipe(self):
567
+ logger.warning("All your usage data is about to be wiped, are you sure you want to do this? You have 5 seconds to cancel this operation.")
568
+ for i in range(5, 0, -1):
569
+ logger.warning(str(i))
570
+ time.sleep(1)
571
+ session = get_session()()
572
+ try:
573
+ session.query(TokenUsage).delete()
574
+ session.commit()
575
+ logger.warning("All usage data has been deleted.")
576
+ except Exception as e:
577
+ logger.error(f"Error wiping data: {e}")
578
+ finally:
579
+ session.close()
@@ -1,503 +0,0 @@
1
- """Cost analysis functions for token usage."""
2
-
3
- from datetime import datetime, timedelta
4
- from typing import Dict, Optional, Union
5
-
6
- from .schemas import get_session, TokenUsage
7
- from .models import (
8
- CompletionTokenDetails,
9
- PromptTokenDetails,
10
- TokenRate,
11
- TokenUsageReport,
12
- ModelUsage,
13
- ProviderUsage,
14
- )
15
- from . import state
16
-
17
- import requests
18
- import logging
19
-
20
- logger = logging.getLogger(__name__)
21
-
22
-
23
- class TokenUsageService:
24
- def __init__(self):
25
- if not state.is_tokenator_enabled:
26
- logger.info("Tokenator is disabled. Database access is unavailable.")
27
-
28
- self.MODEL_COSTS = self._get_model_costs()
29
-
30
- def _get_model_costs(self) -> Dict[str, TokenRate]:
31
- if not state.is_tokenator_enabled:
32
- return {}
33
- url = "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json"
34
- response = requests.get(url)
35
- data = response.json()
36
-
37
- model_costs = {}
38
- for model, info in data.items():
39
- if (
40
- "input_cost_per_token" not in info
41
- or "output_cost_per_token" not in info
42
- ):
43
- continue
44
-
45
- rate = TokenRate(
46
- prompt=info["input_cost_per_token"],
47
- completion=info["output_cost_per_token"],
48
- prompt_audio=info.get("input_cost_per_audio_token"),
49
- completion_audio=info.get("output_cost_per_audio_token"),
50
- prompt_cached_input=info.get("cache_read_input_token_cost") or 0,
51
- prompt_cached_creation=info.get("cache_read_creation_token_cost") or 0,
52
- )
53
- model_costs[model] = rate
54
-
55
- return model_costs
56
-
57
- def _calculate_cost(
58
- self, usages: list[TokenUsage], provider: Optional[str] = None
59
- ) -> TokenUsageReport:
60
- if not state.is_tokenator_enabled:
61
- logger.warning("Tokenator is disabled. Skipping cost calculation.")
62
- return TokenUsageReport()
63
-
64
- if not self.MODEL_COSTS:
65
- logger.warning("No model costs available.")
66
- return TokenUsageReport()
67
-
68
- # Default GPT4O pricing updated with provided values
69
- GPT4O_PRICING = TokenRate(
70
- prompt=0.0000025,
71
- completion=0.000010,
72
- prompt_audio=0.0001,
73
- completion_audio=0.0002,
74
- prompt_cached_input=0.00000125,
75
- prompt_cached_creation=0.00000125,
76
- )
77
-
78
- provider_model_usages: Dict[str, Dict[str, list[TokenUsage]]] = {}
79
- logger.debug(f"usages: {len(usages)}")
80
-
81
- for usage in usages:
82
- # Model key resolution logic (unchanged)
83
- model_key = usage.model
84
- if model_key in self.MODEL_COSTS:
85
- pass
86
- elif f"{usage.provider}/{usage.model}" in self.MODEL_COSTS:
87
- model_key = f"{usage.provider}/{usage.model}"
88
- else:
89
- matched_keys = [k for k in self.MODEL_COSTS.keys() if usage.model in k]
90
- if matched_keys:
91
- model_key = matched_keys[0]
92
- logger.warning(
93
- f"Model {usage.model} matched with {model_key} in pricing data via contains search"
94
- )
95
- else:
96
- logger.warning(
97
- f"Model {model_key} not found in pricing data. Using gpt-4o pricing as fallback"
98
- )
99
- self.MODEL_COSTS[model_key] = GPT4O_PRICING
100
-
101
- provider_key = usage.provider or "default"
102
- provider_model_usages.setdefault(provider_key, {}).setdefault(
103
- model_key, []
104
- ).append(usage)
105
-
106
- # Calculate totals for each level
107
- providers_list = []
108
- total_metrics = {
109
- "total_cost": 0.0,
110
- "total_tokens": 0,
111
- "prompt_tokens": 0,
112
- "completion_tokens": 0,
113
- }
114
-
115
- for provider, model_usages in provider_model_usages.items():
116
- provider_metrics = {
117
- "total_cost": 0.0,
118
- "total_tokens": 0,
119
- "prompt_tokens": 0,
120
- "completion_tokens": 0,
121
- "prompt_cached_input_tokens": 0,
122
- "prompt_cached_creation_tokens": 0,
123
- "prompt_audio_tokens": 0,
124
- "completion_audio_tokens": 0,
125
- "completion_reasoning_tokens": 0,
126
- "completion_accepted_prediction_tokens": 0,
127
- "completion_rejected_prediction_tokens": 0,
128
- }
129
- models_list = []
130
-
131
- for model_key, usages in model_usages.items():
132
- model_rates = self.MODEL_COSTS[model_key]
133
- model_cost = 0.0
134
- model_total = 0
135
- model_prompt = 0
136
- model_completion = 0
137
-
138
- for usage in usages:
139
- # Base token costs
140
- prompt_text_tokens = usage.prompt_tokens
141
- if usage.prompt_cached_input_tokens:
142
- prompt_text_tokens = (
143
- usage.prompt_tokens - usage.prompt_cached_input_tokens
144
- )
145
- if usage.prompt_audio_tokens:
146
- prompt_text_tokens = (
147
- usage.prompt_tokens - usage.prompt_audio_tokens
148
- )
149
-
150
- completion_text_tokens = usage.completion_tokens
151
- if usage.completion_audio_tokens:
152
- completion_text_tokens = (
153
- usage.completion_tokens - usage.completion_audio_tokens
154
- )
155
-
156
- prompt_cost = prompt_text_tokens * model_rates.prompt
157
- completion_cost = completion_text_tokens * model_rates.completion
158
- model_cost += prompt_cost + completion_cost
159
-
160
- # Audio token costs
161
- if usage.prompt_audio_tokens:
162
- if model_rates.prompt_audio:
163
- model_cost += (
164
- usage.prompt_audio_tokens * model_rates.prompt_audio
165
- )
166
- else:
167
- logger.warning(
168
- f"Audio prompt tokens present for {model_key} but no audio rate defined"
169
- )
170
-
171
- if usage.completion_audio_tokens:
172
- if model_rates.completion_audio:
173
- model_cost += (
174
- usage.completion_audio_tokens
175
- * model_rates.completion_audio
176
- )
177
- else:
178
- logger.warning(
179
- f"Audio completion tokens present for {model_key} but no audio rate defined"
180
- )
181
-
182
- # Cached token costs
183
- if usage.prompt_cached_input_tokens:
184
- if model_rates.prompt_cached_input:
185
- model_cost += (
186
- usage.prompt_cached_input_tokens
187
- * model_rates.prompt_cached_input
188
- )
189
- else:
190
- logger.warning(
191
- f"Cached input tokens present for {model_key} but no cache input rate defined"
192
- )
193
-
194
- if usage.prompt_cached_creation_tokens:
195
- if model_rates.prompt_cached_creation:
196
- model_cost += (
197
- usage.prompt_cached_creation_tokens
198
- * model_rates.prompt_cached_creation
199
- )
200
- else:
201
- logger.warning(
202
- f"Cached creation tokens present for {model_key} but no cache creation rate defined"
203
- )
204
-
205
- model_total += usage.total_tokens
206
- model_prompt += usage.prompt_tokens
207
- model_completion += usage.completion_tokens
208
-
209
- models_list.append(
210
- ModelUsage(
211
- model=model_key,
212
- total_cost=round(model_cost, 6),
213
- total_tokens=model_total,
214
- prompt_tokens=model_prompt,
215
- completion_tokens=model_completion,
216
- prompt_tokens_details=PromptTokenDetails(
217
- cached_input_tokens=sum(
218
- u.prompt_cached_input_tokens or 0 for u in usages
219
- ),
220
- cached_creation_tokens=sum(
221
- u.prompt_cached_creation_tokens or 0 for u in usages
222
- ),
223
- audio_tokens=sum(
224
- u.prompt_audio_tokens or 0 for u in usages
225
- ),
226
- )
227
- if any(
228
- u.prompt_cached_input_tokens
229
- or u.prompt_cached_creation_tokens
230
- or u.prompt_audio_tokens
231
- for u in usages
232
- )
233
- else None,
234
- completion_tokens_details=CompletionTokenDetails(
235
- audio_tokens=sum(
236
- u.completion_audio_tokens or 0 for u in usages
237
- ),
238
- reasoning_tokens=sum(
239
- u.completion_reasoning_tokens or 0 for u in usages
240
- ),
241
- accepted_prediction_tokens=sum(
242
- u.completion_accepted_prediction_tokens or 0
243
- for u in usages
244
- ),
245
- rejected_prediction_tokens=sum(
246
- u.completion_rejected_prediction_tokens or 0
247
- for u in usages
248
- ),
249
- )
250
- if any(
251
- getattr(u, attr, None)
252
- for u in usages
253
- for attr in [
254
- "completion_audio_tokens",
255
- "completion_reasoning_tokens",
256
- "completion_accepted_prediction_tokens",
257
- "completion_rejected_prediction_tokens",
258
- ]
259
- )
260
- else None,
261
- )
262
- )
263
-
264
- # Update provider metrics with all token types
265
- provider_metrics["total_cost"] += model_cost
266
- provider_metrics["total_tokens"] += model_total
267
- provider_metrics["prompt_tokens"] += model_prompt
268
- provider_metrics["completion_tokens"] += model_completion
269
- provider_metrics["prompt_cached_input_tokens"] += sum(
270
- u.prompt_cached_input_tokens or 0 for u in usages
271
- )
272
- provider_metrics["prompt_cached_creation_tokens"] += sum(
273
- u.prompt_cached_creation_tokens or 0 for u in usages
274
- )
275
- provider_metrics["prompt_audio_tokens"] += sum(
276
- u.prompt_audio_tokens or 0 for u in usages
277
- )
278
- provider_metrics["completion_audio_tokens"] += sum(
279
- u.completion_audio_tokens or 0 for u in usages
280
- )
281
- provider_metrics["completion_reasoning_tokens"] += sum(
282
- u.completion_reasoning_tokens or 0 for u in usages
283
- )
284
- provider_metrics["completion_accepted_prediction_tokens"] += sum(
285
- u.completion_accepted_prediction_tokens or 0 for u in usages
286
- )
287
- provider_metrics["completion_rejected_prediction_tokens"] += sum(
288
- u.completion_rejected_prediction_tokens or 0 for u in usages
289
- )
290
-
291
- providers_list.append(
292
- ProviderUsage(
293
- provider=provider,
294
- models=models_list,
295
- total_cost=round(provider_metrics["total_cost"], 6),
296
- total_tokens=provider_metrics["total_tokens"],
297
- prompt_tokens=provider_metrics["prompt_tokens"],
298
- completion_tokens=provider_metrics["completion_tokens"],
299
- prompt_tokens_details=PromptTokenDetails(
300
- cached_input_tokens=provider_metrics[
301
- "prompt_cached_input_tokens"
302
- ],
303
- cached_creation_tokens=provider_metrics[
304
- "prompt_cached_creation_tokens"
305
- ],
306
- audio_tokens=provider_metrics["prompt_audio_tokens"],
307
- )
308
- if provider_metrics["prompt_cached_input_tokens"]
309
- or provider_metrics["prompt_cached_creation_tokens"]
310
- or provider_metrics["prompt_audio_tokens"]
311
- else None,
312
- completion_tokens_details=CompletionTokenDetails(
313
- audio_tokens=provider_metrics["completion_audio_tokens"],
314
- reasoning_tokens=provider_metrics[
315
- "completion_reasoning_tokens"
316
- ],
317
- accepted_prediction_tokens=provider_metrics[
318
- "completion_accepted_prediction_tokens"
319
- ],
320
- rejected_prediction_tokens=provider_metrics[
321
- "completion_rejected_prediction_tokens"
322
- ],
323
- )
324
- if any(
325
- provider_metrics[k]
326
- for k in [
327
- "completion_audio_tokens",
328
- "completion_reasoning_tokens",
329
- "completion_accepted_prediction_tokens",
330
- "completion_rejected_prediction_tokens",
331
- ]
332
- )
333
- else None,
334
- )
335
- )
336
-
337
- for key in total_metrics:
338
- total_metrics[key] += provider_metrics[key]
339
-
340
- return TokenUsageReport(
341
- providers=providers_list,
342
- **{
343
- k: (round(v, 6) if k == "total_cost" else v)
344
- for k, v in total_metrics.items()
345
- },
346
- )
347
-
348
- def _query_usage(
349
- self,
350
- start_date: datetime,
351
- end_date: datetime,
352
- provider: Optional[str] = None,
353
- model: Optional[str] = None,
354
- ) -> TokenUsageReport:
355
- if not state.is_tokenator_enabled:
356
- logger.warning("Tokenator is disabled. Skipping usage query.")
357
- return TokenUsageReport()
358
-
359
- session = get_session()()
360
- try:
361
- query = session.query(TokenUsage).filter(
362
- TokenUsage.created_at.between(start_date, end_date)
363
- )
364
-
365
- if provider:
366
- query = query.filter(TokenUsage.provider == provider)
367
- if model:
368
- query = query.filter(TokenUsage.model == model)
369
-
370
- usages = query.all()
371
-
372
- return self._calculate_cost(usages, provider or "all")
373
- finally:
374
- session.close()
375
-
376
- def last_hour(
377
- self, provider: Optional[str] = None, model: Optional[str] = None
378
- ) -> TokenUsageReport:
379
- if not state.is_tokenator_enabled:
380
- return TokenUsageReport()
381
- logger.debug(
382
- f"Getting cost analysis for last hour (provider={provider}, model={model})"
383
- )
384
- end = datetime.now()
385
- start = end - timedelta(hours=1)
386
- return self._query_usage(start, end, provider, model)
387
-
388
- def last_day(
389
- self, provider: Optional[str] = None, model: Optional[str] = None
390
- ) -> TokenUsageReport:
391
- if not state.is_tokenator_enabled:
392
- return TokenUsageReport()
393
- logger.debug(
394
- f"Getting cost analysis for last 24 hours (provider={provider}, model={model})"
395
- )
396
- end = datetime.now()
397
- start = end - timedelta(days=1)
398
- return self._query_usage(start, end, provider, model)
399
-
400
- def last_week(
401
- self, provider: Optional[str] = None, model: Optional[str] = None
402
- ) -> TokenUsageReport:
403
- if not state.is_tokenator_enabled:
404
- return TokenUsageReport()
405
- logger.debug(
406
- f"Getting cost analysis for last 7 days (provider={provider}, model={model})"
407
- )
408
- end = datetime.now()
409
- start = end - timedelta(weeks=1)
410
- return self._query_usage(start, end, provider, model)
411
-
412
- def last_month(
413
- self, provider: Optional[str] = None, model: Optional[str] = None
414
- ) -> TokenUsageReport:
415
- if not state.is_tokenator_enabled:
416
- return TokenUsageReport()
417
- logger.debug(
418
- f"Getting cost analysis for last 30 days (provider={provider}, model={model})"
419
- )
420
- end = datetime.now()
421
- start = end - timedelta(days=30)
422
- return self._query_usage(start, end, provider, model)
423
-
424
- def between(
425
- self,
426
- start_date: Union[datetime, str],
427
- end_date: Union[datetime, str],
428
- provider: Optional[str] = None,
429
- model: Optional[str] = None,
430
- ) -> TokenUsageReport:
431
- if not state.is_tokenator_enabled:
432
- return TokenUsageReport()
433
- logger.debug(
434
- f"Getting cost analysis between {start_date} and {end_date} (provider={provider}, model={model})"
435
- )
436
-
437
- if isinstance(start_date, str):
438
- try:
439
- start = datetime.strptime(start_date, "%Y-%m-%d %H:%M:%S")
440
- except ValueError:
441
- logger.warning(
442
- f"Date-only string provided for start_date: {start_date}. Setting time to 00:00:00"
443
- )
444
- start = datetime.strptime(start_date, "%Y-%m-%d")
445
- else:
446
- start = start_date
447
-
448
- if isinstance(end_date, str):
449
- try:
450
- end = datetime.strptime(end_date, "%Y-%m-%d %H:%M:%S")
451
- except ValueError:
452
- logger.warning(
453
- f"Date-only string provided for end_date: {end_date}. Setting time to 23:59:59"
454
- )
455
- end = (
456
- datetime.strptime(end_date, "%Y-%m-%d")
457
- + timedelta(days=1)
458
- - timedelta(seconds=1)
459
- )
460
- else:
461
- end = end_date
462
-
463
- return self._query_usage(start, end, provider, model)
464
-
465
- def for_execution(self, execution_id: str) -> TokenUsageReport:
466
- if not state.is_tokenator_enabled:
467
- return TokenUsageReport()
468
- logger.debug(f"Getting cost analysis for execution_id={execution_id}")
469
- session = get_session()()
470
- try:
471
- query = session.query(TokenUsage).filter(
472
- TokenUsage.execution_id == execution_id
473
- )
474
- return self._calculate_cost(query.all())
475
- finally:
476
- session.close()
477
-
478
- def last_execution(self) -> TokenUsageReport:
479
- if not state.is_tokenator_enabled:
480
- return TokenUsageReport()
481
- logger.debug("Getting cost analysis for last execution")
482
- session = get_session()()
483
- try:
484
- query = (
485
- session.query(TokenUsage).order_by(TokenUsage.created_at.desc()).first()
486
- )
487
- if query:
488
- return self.for_execution(query.execution_id)
489
- return TokenUsageReport()
490
- finally:
491
- session.close()
492
-
493
- def all_time(self) -> TokenUsageReport:
494
- if not state.is_tokenator_enabled:
495
- return TokenUsageReport()
496
-
497
- logger.warning("Getting cost analysis for all time. This may take a while...")
498
- session = get_session()()
499
- try:
500
- query = session.query(TokenUsage)
501
- return self._calculate_cost(query.all())
502
- finally:
503
- session.close()
File without changes