tokenator 0.1.14__py3-none-any.whl → 0.1.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,7 +6,7 @@ import logging
6
6
  from anthropic import Anthropic, AsyncAnthropic
7
7
  from anthropic.types import Message, RawMessageStartEvent, RawMessageDeltaEvent
8
8
 
9
- from ..models import Usage, TokenUsageStats
9
+ from ..models import PromptTokenDetails, TokenMetrics, TokenUsageStats
10
10
  from ..base_wrapper import BaseWrapper, ResponseType
11
11
  from .stream_interceptors import (
12
12
  AnthropicAsyncStreamInterceptor,
@@ -28,28 +28,46 @@ class BaseAnthropicWrapper(BaseWrapper):
28
28
  if isinstance(response, Message):
29
29
  if not hasattr(response, "usage"):
30
30
  return None
31
- usage = Usage(
32
- prompt_tokens=response.usage.input_tokens,
31
+ usage = TokenMetrics(
32
+ prompt_tokens=response.usage.input_tokens
33
+ + (getattr(response.usage, "cache_creation_input_tokens", 0) or 0),
33
34
  completion_tokens=response.usage.output_tokens,
34
35
  total_tokens=response.usage.input_tokens
35
36
  + response.usage.output_tokens,
37
+ prompt_tokens_details=PromptTokenDetails(
38
+ cached_input_tokens=getattr(
39
+ response.usage, "cache_read_input_tokens", None
40
+ ),
41
+ cached_creation_tokens=getattr(
42
+ response.usage, "cache_creation_input_tokens", None
43
+ ),
44
+ ),
36
45
  )
37
46
  return TokenUsageStats(model=response.model, usage=usage)
38
47
  elif isinstance(response, dict):
39
48
  usage_dict = response.get("usage")
40
49
  if not usage_dict:
41
50
  return None
42
- usage = Usage(
43
- prompt_tokens=usage_dict.get("input_tokens", 0),
51
+ usage = TokenMetrics(
52
+ prompt_tokens=usage_dict.get("input_tokens", 0)
53
+ + (getattr(usage_dict, "cache_creation_input_tokens", 0) or 0),
44
54
  completion_tokens=usage_dict.get("output_tokens", 0),
45
55
  total_tokens=usage_dict.get("input_tokens", 0)
46
56
  + usage_dict.get("output_tokens", 0),
57
+ prompt_tokens_details=PromptTokenDetails(
58
+ cached_input_tokens=getattr(
59
+ usage_dict, "cache_read_input_tokens", None
60
+ ),
61
+ cached_creation_tokens=getattr(
62
+ usage_dict, "cache_creation_input_tokens", None
63
+ ),
64
+ ),
47
65
  )
48
66
  return TokenUsageStats(
49
67
  model=response.get("model", "unknown"), usage=usage
50
68
  )
51
69
  except Exception as e:
52
- logger.warning("Failed to process usage stats: %s", str(e))
70
+ logger.warning("Failed to process usage stats: %s", str(e), exc_info=True)
53
71
  return None
54
72
  return None
55
73
 
@@ -74,7 +92,7 @@ def _create_usage_callback(execution_id, log_usage_fn):
74
92
  model=chunks[0].message.model
75
93
  if isinstance(chunks[0], RawMessageStartEvent)
76
94
  else "",
77
- usage=Usage(),
95
+ usage=TokenMetrics(),
78
96
  )
79
97
 
80
98
  for chunk in chunks:
tokenator/base_wrapper.py CHANGED
@@ -58,9 +58,33 @@ class BaseWrapper:
58
58
  execution_id=execution_id,
59
59
  provider=self.provider,
60
60
  model=token_usage_stats.model,
61
+ total_cost=0, # This needs to be calculated based on your rates
61
62
  prompt_tokens=token_usage_stats.usage.prompt_tokens,
62
63
  completion_tokens=token_usage_stats.usage.completion_tokens,
63
64
  total_tokens=token_usage_stats.usage.total_tokens,
65
+ # Prompt details
66
+ prompt_cached_input_tokens=token_usage_stats.usage.prompt_tokens_details.cached_input_tokens
67
+ if token_usage_stats.usage.prompt_tokens_details
68
+ else None,
69
+ prompt_cached_creation_tokens=token_usage_stats.usage.prompt_tokens_details.cached_creation_tokens
70
+ if token_usage_stats.usage.prompt_tokens_details
71
+ else None,
72
+ prompt_audio_tokens=token_usage_stats.usage.prompt_tokens_details.audio_tokens
73
+ if token_usage_stats.usage.prompt_tokens_details
74
+ else None,
75
+ # Completion details
76
+ completion_audio_tokens=token_usage_stats.usage.completion_tokens_details.audio_tokens
77
+ if token_usage_stats.usage.completion_tokens_details
78
+ else None,
79
+ completion_reasoning_tokens=token_usage_stats.usage.completion_tokens_details.reasoning_tokens
80
+ if token_usage_stats.usage.completion_tokens_details
81
+ else None,
82
+ completion_accepted_prediction_tokens=token_usage_stats.usage.completion_tokens_details.accepted_prediction_tokens
83
+ if token_usage_stats.usage.completion_tokens_details
84
+ else None,
85
+ completion_rejected_prediction_tokens=token_usage_stats.usage.completion_tokens_details.rejected_prediction_tokens
86
+ if token_usage_stats.usage.completion_tokens_details
87
+ else None,
64
88
  )
65
89
  session.add(token_usage)
66
90
  logger.debug(
@@ -82,11 +106,13 @@ class BaseWrapper:
82
106
  if not execution_id:
83
107
  execution_id = str(uuid.uuid4())
84
108
 
109
+ logger.debug("Starting token usage logging for execution_id: %s", execution_id)
85
110
  session = self.Session()
86
111
  try:
87
112
  try:
88
113
  self._log_usage_impl(token_usage_stats, session, execution_id)
89
114
  session.commit()
115
+ logger.debug("Successfully committed token usage for execution_id: %s", execution_id)
90
116
  except Exception as e:
91
117
  logger.error("Failed to log token usage: %s", str(e))
92
118
  session.rollback()
@@ -0,0 +1,64 @@
1
+ """Adding detailed input and output token schema
2
+
3
+ Revision ID: f028b8155fed
4
+ Revises: f6f1f2437513
5
+ Create Date: 2025-01-19 15:41:12.715623
6
+
7
+ """
8
+
9
+ from typing import Sequence, Union
10
+
11
+ from alembic import op
12
+ import sqlalchemy as sa
13
+
14
+
15
+ # revision identifiers, used by Alembic.
16
+ revision: str = "f028b8155fed"
17
+ down_revision: Union[str, None] = "f6f1f2437513"
18
+ branch_labels: Union[str, Sequence[str], None] = None
19
+ depends_on: Union[str, Sequence[str], None] = None
20
+
21
+
22
+ def upgrade() -> None:
23
+ # ### commands auto generated by Alembic - please adjust! ###
24
+ op.add_column("token_usage", sa.Column("total_cost", sa.Integer(), nullable=False))
25
+ op.add_column(
26
+ "token_usage",
27
+ sa.Column("prompt_cached_input_tokens", sa.Integer(), nullable=True),
28
+ )
29
+ op.add_column(
30
+ "token_usage",
31
+ sa.Column("prompt_cached_creation_tokens", sa.Integer(), nullable=True),
32
+ )
33
+ op.add_column(
34
+ "token_usage", sa.Column("prompt_audio_tokens", sa.Integer(), nullable=True)
35
+ )
36
+ op.add_column(
37
+ "token_usage", sa.Column("completion_audio_tokens", sa.Integer(), nullable=True)
38
+ )
39
+ op.add_column(
40
+ "token_usage",
41
+ sa.Column("completion_reasoning_tokens", sa.Integer(), nullable=True),
42
+ )
43
+ op.add_column(
44
+ "token_usage",
45
+ sa.Column("completion_accepted_prediction_tokens", sa.Integer(), nullable=True),
46
+ )
47
+ op.add_column(
48
+ "token_usage",
49
+ sa.Column("completion_rejected_prediction_tokens", sa.Integer(), nullable=True),
50
+ )
51
+ # ### end Alembic commands ###
52
+
53
+
54
+ def downgrade() -> None:
55
+ # ### commands auto generated by Alembic - please adjust! ###
56
+ op.drop_column("token_usage", "completion_rejected_prediction_tokens")
57
+ op.drop_column("token_usage", "completion_accepted_prediction_tokens")
58
+ op.drop_column("token_usage", "completion_reasoning_tokens")
59
+ op.drop_column("token_usage", "completion_audio_tokens")
60
+ op.drop_column("token_usage", "prompt_audio_tokens")
61
+ op.drop_column("token_usage", "prompt_cached_creation_tokens")
62
+ op.drop_column("token_usage", "prompt_cached_input_tokens")
63
+ op.drop_column("token_usage", "total_cost")
64
+ # ### end Alembic commands ###
tokenator/models.py CHANGED
@@ -1,10 +1,35 @@
1
1
  from pydantic import BaseModel, Field
2
- from typing import List
2
+ from typing import List, Optional
3
3
 
4
4
 
5
5
  class TokenRate(BaseModel):
6
6
  prompt: float = Field(..., description="Cost per prompt token")
7
7
  completion: float = Field(..., description="Cost per completion token")
8
+ prompt_audio: Optional[float] = Field(
9
+ None, description="Cost per audio prompt token"
10
+ )
11
+ completion_audio: Optional[float] = Field(
12
+ None, description="Cost per audio completion token"
13
+ )
14
+ prompt_cached_input: Optional[float] = Field(
15
+ None, description="Cost per cached prompt input token"
16
+ )
17
+ prompt_cached_creation: Optional[float] = Field(
18
+ None, description="Cost per cached prompt creation token"
19
+ )
20
+
21
+
22
+ class PromptTokenDetails(BaseModel):
23
+ cached_input_tokens: Optional[int] = None
24
+ cached_creation_tokens: Optional[int] = None
25
+ audio_tokens: Optional[int] = None
26
+
27
+
28
+ class CompletionTokenDetails(BaseModel):
29
+ reasoning_tokens: Optional[int] = None
30
+ audio_tokens: Optional[int] = None
31
+ accepted_prediction_tokens: Optional[int] = None
32
+ rejected_prediction_tokens: Optional[int] = None
8
33
 
9
34
 
10
35
  class TokenMetrics(BaseModel):
@@ -12,6 +37,8 @@ class TokenMetrics(BaseModel):
12
37
  total_tokens: int = Field(default=0, description="Total tokens used")
13
38
  prompt_tokens: int = Field(default=0, description="Number of prompt tokens")
14
39
  completion_tokens: int = Field(default=0, description="Number of completion tokens")
40
+ prompt_tokens_details: Optional[PromptTokenDetails] = None
41
+ completion_tokens_details: Optional[CompletionTokenDetails] = None
15
42
 
16
43
 
17
44
  class ModelUsage(TokenMetrics):
@@ -31,12 +58,6 @@ class TokenUsageReport(TokenMetrics):
31
58
  )
32
59
 
33
60
 
34
- class Usage(BaseModel):
35
- prompt_tokens: int = 0
36
- completion_tokens: int = 0
37
- total_tokens: int = 0
38
-
39
-
40
61
  class TokenUsageStats(BaseModel):
41
62
  model: str
42
- usage: Usage
63
+ usage: TokenMetrics
@@ -6,7 +6,12 @@ import logging
6
6
  from openai import AsyncOpenAI, OpenAI
7
7
  from openai.types.chat import ChatCompletion, ChatCompletionChunk
8
8
 
9
- from ..models import Usage, TokenUsageStats
9
+ from ..models import (
10
+ TokenMetrics,
11
+ TokenUsageStats,
12
+ PromptTokenDetails,
13
+ CompletionTokenDetails,
14
+ )
10
15
  from ..base_wrapper import BaseWrapper, ResponseType
11
16
  from .stream_interceptors import (
12
17
  OpenAIAsyncStreamInterceptor,
@@ -30,18 +35,49 @@ class BaseOpenAIWrapper(BaseWrapper):
30
35
  if isinstance(response, ChatCompletion):
31
36
  if response.usage is None:
32
37
  return None
33
- usage = Usage(
38
+ usage = TokenMetrics(
34
39
  prompt_tokens=response.usage.prompt_tokens,
35
40
  completion_tokens=response.usage.completion_tokens,
36
41
  total_tokens=response.usage.total_tokens,
42
+ prompt_tokens_details=PromptTokenDetails(
43
+ cached_input_tokens=getattr(
44
+ response.usage.prompt_tokens_details, "cached_tokens", None
45
+ ),
46
+ audio_tokens=getattr(
47
+ response.usage.prompt_tokens_details, "audio_tokens", None
48
+ ),
49
+ ),
50
+ completion_tokens_details=CompletionTokenDetails(
51
+ reasoning_tokens=getattr(
52
+ response.usage.completion_tokens_details,
53
+ "reasoning_tokens",
54
+ None,
55
+ ),
56
+ audio_tokens=getattr(
57
+ response.usage.completion_tokens_details,
58
+ "audio_tokens",
59
+ None,
60
+ ),
61
+ accepted_prediction_tokens=getattr(
62
+ response.usage.completion_tokens_details,
63
+ "accepted_prediction_tokens",
64
+ None,
65
+ ),
66
+ rejected_prediction_tokens=getattr(
67
+ response.usage.completion_tokens_details,
68
+ "rejected_prediction_tokens",
69
+ None,
70
+ ),
71
+ ),
37
72
  )
73
+
38
74
  return TokenUsageStats(model=response.model, usage=usage)
39
75
 
40
76
  elif isinstance(response, dict):
41
77
  usage_dict = response.get("usage")
42
78
  if not usage_dict:
43
79
  return None
44
- usage = Usage(
80
+ usage = TokenMetrics(
45
81
  prompt_tokens=usage_dict.get("prompt_tokens", 0),
46
82
  completion_tokens=usage_dict.get("completion_tokens", 0),
47
83
  total_tokens=usage_dict.get("total_tokens", 0),
@@ -62,6 +98,10 @@ class BaseOpenAIWrapper(BaseWrapper):
62
98
  def completions(self):
63
99
  return self
64
100
 
101
+ @property
102
+ def beta(self):
103
+ return self
104
+
65
105
 
66
106
  def _create_usage_callback(execution_id, log_usage_fn):
67
107
  """Creates a callback function for processing usage statistics from stream chunks."""
@@ -75,10 +115,12 @@ def _create_usage_callback(execution_id, log_usage_fn):
75
115
  logger.debug("Tokenator is disabled - skipping stream usage logging")
76
116
  return
77
117
 
118
+ logger.debug("Processing stream usage for execution_id: %s", execution_id)
119
+
78
120
  # Build usage_data from the first chunk's model
79
121
  usage_data = TokenUsageStats(
80
122
  model=chunks[0].model,
81
- usage=Usage(),
123
+ usage=TokenMetrics(),
82
124
  )
83
125
  # Sum up usage from all chunks
84
126
  has_usage = False
@@ -116,6 +158,26 @@ class OpenAIWrapper(BaseOpenAIWrapper):
116
158
 
117
159
  return response
118
160
 
161
+ def parse(
162
+ self, *args: Any, execution_id: Optional[str] = None, **kwargs: Any
163
+ ) -> Union[ChatCompletion, Iterator[ChatCompletion]]:
164
+ """Create a chat completion parse and log token usage."""
165
+ logger.debug("Creating chat completion with args: %s, kwargs: %s", args, kwargs)
166
+
167
+ if kwargs.get("stream", False):
168
+ base_stream = self.client.beta.chat.completions.parse(*args, **kwargs)
169
+ return OpenAISyncStreamInterceptor(
170
+ base_stream=base_stream,
171
+ usage_callback=_create_usage_callback(execution_id, self._log_usage),
172
+ )
173
+
174
+ response = self.client.beta.chat.completions.parse(*args, **kwargs)
175
+ usage_data = self._process_response_usage(response)
176
+ if usage_data:
177
+ self._log_usage(usage_data, execution_id=execution_id)
178
+
179
+ return response
180
+
119
181
 
120
182
  class AsyncOpenAIWrapper(BaseOpenAIWrapper):
121
183
  async def create(
@@ -141,6 +203,26 @@ class AsyncOpenAIWrapper(BaseOpenAIWrapper):
141
203
  self._log_usage(usage_data, execution_id=execution_id)
142
204
  return response
143
205
 
206
+ async def parse(
207
+ self, *args: Any, execution_id: Optional[str] = None, **kwargs: Any
208
+ ) -> Union[ChatCompletion, AsyncIterator[ChatCompletionChunk]]:
209
+ """Create a chat completion parse and log token usage."""
210
+ logger.debug("Creating chat completion with args: %s, kwargs: %s", args, kwargs)
211
+
212
+ if kwargs.get("stream", False):
213
+ base_stream = await self.client.beta.chat.completions.parse(*args, **kwargs)
214
+ return OpenAIAsyncStreamInterceptor(
215
+ base_stream=base_stream,
216
+ usage_callback=_create_usage_callback(execution_id, self._log_usage),
217
+ )
218
+
219
+ response = await self.client.beta.chat.completions.parse(*args, **kwargs)
220
+ usage_data = self._process_response_usage(response)
221
+ if usage_data:
222
+ self._log_usage(usage_data, execution_id=execution_id)
223
+
224
+ return response
225
+
144
226
 
145
227
  @overload
146
228
  def tokenator_openai(
tokenator/schemas.py CHANGED
@@ -40,28 +40,28 @@ class TokenUsage(Base):
40
40
  updated_at = Column(
41
41
  DateTime, nullable=False, default=datetime.now, onupdate=datetime.now
42
42
  )
43
+
44
+ # Core metrics (mandatory)
45
+ total_cost = Column(Integer, nullable=False)
43
46
  prompt_tokens = Column(Integer, nullable=False)
44
47
  completion_tokens = Column(Integer, nullable=False)
45
48
  total_tokens = Column(Integer, nullable=False)
46
49
 
47
- # Create indexes
50
+ # Prompt token details (optional)
51
+ prompt_cached_input_tokens = Column(Integer, nullable=True)
52
+ prompt_cached_creation_tokens = Column(Integer, nullable=True)
53
+ prompt_audio_tokens = Column(Integer, nullable=True)
54
+
55
+ # Completion token details (optional)
56
+ completion_audio_tokens = Column(Integer, nullable=True)
57
+ completion_reasoning_tokens = Column(Integer, nullable=True)
58
+ completion_accepted_prediction_tokens = Column(Integer, nullable=True)
59
+ completion_rejected_prediction_tokens = Column(Integer, nullable=True)
60
+
61
+ # Keep existing indexes
48
62
  __table_args__ = (
49
63
  Index("idx_created_at", "created_at"),
50
64
  Index("idx_execution_id", "execution_id"),
51
65
  Index("idx_provider", "provider"),
52
66
  Index("idx_model", "model"),
53
67
  )
54
-
55
- def to_dict(self):
56
- """Convert model instance to dictionary."""
57
- return {
58
- "id": self.id,
59
- "execution_id": self.execution_id,
60
- "provider": self.provider,
61
- "model": self.model,
62
- "created_at": self.created_at,
63
- "updated_at": self.updated_at,
64
- "prompt_tokens": self.prompt_tokens,
65
- "completion_tokens": self.completion_tokens,
66
- "total_tokens": self.total_tokens,
67
- }
tokenator/usage.py CHANGED
@@ -4,7 +4,14 @@ from datetime import datetime, timedelta
4
4
  from typing import Dict, Optional, Union
5
5
 
6
6
  from .schemas import get_session, TokenUsage
7
- from .models import TokenRate, TokenUsageReport, ModelUsage, ProviderUsage
7
+ from .models import (
8
+ CompletionTokenDetails,
9
+ PromptTokenDetails,
10
+ TokenRate,
11
+ TokenUsageReport,
12
+ ModelUsage,
13
+ ProviderUsage,
14
+ )
8
15
  from . import state
9
16
 
10
17
  import requests
@@ -27,14 +34,25 @@ class TokenUsageService:
27
34
  response = requests.get(url)
28
35
  data = response.json()
29
36
 
30
- return {
31
- model: TokenRate(
37
+ model_costs = {}
38
+ for model, info in data.items():
39
+ if (
40
+ "input_cost_per_token" not in info
41
+ or "output_cost_per_token" not in info
42
+ ):
43
+ continue
44
+
45
+ rate = TokenRate(
32
46
  prompt=info["input_cost_per_token"],
33
47
  completion=info["output_cost_per_token"],
48
+ prompt_audio=info.get("input_cost_per_audio_token"),
49
+ completion_audio=info.get("output_cost_per_audio_token"),
50
+ prompt_cached_input=info.get("cache_read_input_token_cost") or 0,
51
+ prompt_cached_creation=info.get("cache_read_creation_token_cost") or 0,
34
52
  )
35
- for model, info in data.items()
36
- if "input_cost_per_token" in info and "output_cost_per_token" in info
37
- }
53
+ model_costs[model] = rate
54
+
55
+ return model_costs
38
56
 
39
57
  def _calculate_cost(
40
58
  self, usages: list[TokenUsage], provider: Optional[str] = None
@@ -47,23 +65,26 @@ class TokenUsageService:
47
65
  logger.warning("No model costs available.")
48
66
  return TokenUsageReport()
49
67
 
50
- GPT4O_PRICING = self.MODEL_COSTS.get(
51
- "gpt-4o", TokenRate(prompt=0.0000025, completion=0.000010)
68
+ # Default GPT4O pricing updated with provided values
69
+ GPT4O_PRICING = TokenRate(
70
+ prompt=0.0000025,
71
+ completion=0.000010,
72
+ prompt_audio=0.0001,
73
+ completion_audio=0.0002,
74
+ prompt_cached_input=0.00000125,
75
+ prompt_cached_creation=0.00000125,
52
76
  )
53
77
 
54
- # Existing calculation logic...
55
78
  provider_model_usages: Dict[str, Dict[str, list[TokenUsage]]] = {}
56
79
  logger.debug(f"usages: {len(usages)}")
57
80
 
58
81
  for usage in usages:
59
- # 1st priority - direct match
82
+ # Model key resolution logic (unchanged)
60
83
  model_key = usage.model
61
84
  if model_key in self.MODEL_COSTS:
62
85
  pass
63
- # 2nd priority - provider/model format
64
86
  elif f"{usage.provider}/{usage.model}" in self.MODEL_COSTS:
65
87
  model_key = f"{usage.provider}/{usage.model}"
66
- # 3rd priority - contains search
67
88
  else:
68
89
  matched_keys = [k for k in self.MODEL_COSTS.keys() if usage.model in k]
69
90
  if matched_keys:
@@ -72,10 +93,8 @@ class TokenUsageService:
72
93
  f"Model {usage.model} matched with {model_key} in pricing data via contains search"
73
94
  )
74
95
  else:
75
- # Fallback to GPT4O pricing
76
96
  logger.warning(
77
- f"Model {model_key} not found in pricing data. Using gpt-4o pricing as fallback "
78
- f"(prompt: ${GPT4O_PRICING.prompt}/token, completion: ${GPT4O_PRICING.completion}/token)"
97
+ f"Model {model_key} not found in pricing data. Using gpt-4o pricing as fallback"
79
98
  )
80
99
  self.MODEL_COSTS[model_key] = GPT4O_PRICING
81
100
 
@@ -99,18 +118,93 @@ class TokenUsageService:
99
118
  "total_tokens": 0,
100
119
  "prompt_tokens": 0,
101
120
  "completion_tokens": 0,
121
+ "prompt_cached_input_tokens": 0,
122
+ "prompt_cached_creation_tokens": 0,
123
+ "prompt_audio_tokens": 0,
124
+ "completion_audio_tokens": 0,
125
+ "completion_reasoning_tokens": 0,
126
+ "completion_accepted_prediction_tokens": 0,
127
+ "completion_rejected_prediction_tokens": 0,
102
128
  }
103
129
  models_list = []
104
130
 
105
131
  for model_key, usages in model_usages.items():
106
- model_cost = sum(
107
- usage.prompt_tokens * self.MODEL_COSTS[model_key].prompt
108
- + usage.completion_tokens * self.MODEL_COSTS[model_key].completion
109
- for usage in usages
110
- )
111
- model_total = sum(usage.total_tokens for usage in usages)
112
- model_prompt = sum(usage.prompt_tokens for usage in usages)
113
- model_completion = sum(usage.completion_tokens for usage in usages)
132
+ model_rates = self.MODEL_COSTS[model_key]
133
+ model_cost = 0.0
134
+ model_total = 0
135
+ model_prompt = 0
136
+ model_completion = 0
137
+
138
+ for usage in usages:
139
+ # Base token costs
140
+ prompt_text_tokens = usage.prompt_tokens
141
+ if usage.prompt_cached_input_tokens:
142
+ prompt_text_tokens = (
143
+ usage.prompt_tokens - usage.prompt_cached_input_tokens
144
+ )
145
+ if usage.prompt_audio_tokens:
146
+ prompt_text_tokens = (
147
+ usage.prompt_tokens - usage.prompt_audio_tokens
148
+ )
149
+
150
+ completion_text_tokens = usage.completion_tokens
151
+ if usage.completion_audio_tokens:
152
+ completion_text_tokens = (
153
+ usage.completion_tokens - usage.completion_audio_tokens
154
+ )
155
+
156
+ prompt_cost = prompt_text_tokens * model_rates.prompt
157
+ completion_cost = completion_text_tokens * model_rates.completion
158
+ model_cost += prompt_cost + completion_cost
159
+
160
+ # Audio token costs
161
+ if usage.prompt_audio_tokens:
162
+ if model_rates.prompt_audio:
163
+ model_cost += (
164
+ usage.prompt_audio_tokens * model_rates.prompt_audio
165
+ )
166
+ else:
167
+ logger.warning(
168
+ f"Audio prompt tokens present for {model_key} but no audio rate defined"
169
+ )
170
+
171
+ if usage.completion_audio_tokens:
172
+ if model_rates.completion_audio:
173
+ model_cost += (
174
+ usage.completion_audio_tokens
175
+ * model_rates.completion_audio
176
+ )
177
+ else:
178
+ logger.warning(
179
+ f"Audio completion tokens present for {model_key} but no audio rate defined"
180
+ )
181
+
182
+ # Cached token costs
183
+ if usage.prompt_cached_input_tokens:
184
+ if model_rates.prompt_cached_input:
185
+ model_cost += (
186
+ usage.prompt_cached_input_tokens
187
+ * model_rates.prompt_cached_input
188
+ )
189
+ else:
190
+ logger.warning(
191
+ f"Cached input tokens present for {model_key} but no cache input rate defined"
192
+ )
193
+
194
+ if usage.prompt_cached_creation_tokens:
195
+ if model_rates.prompt_cached_creation:
196
+ model_cost += (
197
+ usage.prompt_cached_creation_tokens
198
+ * model_rates.prompt_cached_creation
199
+ )
200
+ else:
201
+ logger.warning(
202
+ f"Cached creation tokens present for {model_key} but no cache creation rate defined"
203
+ )
204
+
205
+ model_total += usage.total_tokens
206
+ model_prompt += usage.prompt_tokens
207
+ model_completion += usage.completion_tokens
114
208
 
115
209
  models_list.append(
116
210
  ModelUsage(
@@ -119,22 +213,124 @@ class TokenUsageService:
119
213
  total_tokens=model_total,
120
214
  prompt_tokens=model_prompt,
121
215
  completion_tokens=model_completion,
216
+ prompt_tokens_details=PromptTokenDetails(
217
+ cached_input_tokens=sum(
218
+ u.prompt_cached_input_tokens or 0 for u in usages
219
+ ),
220
+ cached_creation_tokens=sum(
221
+ u.prompt_cached_creation_tokens or 0 for u in usages
222
+ ),
223
+ audio_tokens=sum(
224
+ u.prompt_audio_tokens or 0 for u in usages
225
+ ),
226
+ )
227
+ if any(
228
+ u.prompt_cached_input_tokens
229
+ or u.prompt_cached_creation_tokens
230
+ or u.prompt_audio_tokens
231
+ for u in usages
232
+ )
233
+ else None,
234
+ completion_tokens_details=CompletionTokenDetails(
235
+ audio_tokens=sum(
236
+ u.completion_audio_tokens or 0 for u in usages
237
+ ),
238
+ reasoning_tokens=sum(
239
+ u.completion_reasoning_tokens or 0 for u in usages
240
+ ),
241
+ accepted_prediction_tokens=sum(
242
+ u.completion_accepted_prediction_tokens or 0
243
+ for u in usages
244
+ ),
245
+ rejected_prediction_tokens=sum(
246
+ u.completion_rejected_prediction_tokens or 0
247
+ for u in usages
248
+ ),
249
+ )
250
+ if any(
251
+ getattr(u, attr, None)
252
+ for u in usages
253
+ for attr in [
254
+ "completion_audio_tokens",
255
+ "completion_reasoning_tokens",
256
+ "completion_accepted_prediction_tokens",
257
+ "completion_rejected_prediction_tokens",
258
+ ]
259
+ )
260
+ else None,
122
261
  )
123
262
  )
124
263
 
264
+ # Update provider metrics with all token types
125
265
  provider_metrics["total_cost"] += model_cost
126
266
  provider_metrics["total_tokens"] += model_total
127
267
  provider_metrics["prompt_tokens"] += model_prompt
128
268
  provider_metrics["completion_tokens"] += model_completion
269
+ provider_metrics["prompt_cached_input_tokens"] += sum(
270
+ u.prompt_cached_input_tokens or 0 for u in usages
271
+ )
272
+ provider_metrics["prompt_cached_creation_tokens"] += sum(
273
+ u.prompt_cached_creation_tokens or 0 for u in usages
274
+ )
275
+ provider_metrics["prompt_audio_tokens"] += sum(
276
+ u.prompt_audio_tokens or 0 for u in usages
277
+ )
278
+ provider_metrics["completion_audio_tokens"] += sum(
279
+ u.completion_audio_tokens or 0 for u in usages
280
+ )
281
+ provider_metrics["completion_reasoning_tokens"] += sum(
282
+ u.completion_reasoning_tokens or 0 for u in usages
283
+ )
284
+ provider_metrics["completion_accepted_prediction_tokens"] += sum(
285
+ u.completion_accepted_prediction_tokens or 0 for u in usages
286
+ )
287
+ provider_metrics["completion_rejected_prediction_tokens"] += sum(
288
+ u.completion_rejected_prediction_tokens or 0 for u in usages
289
+ )
129
290
 
130
291
  providers_list.append(
131
292
  ProviderUsage(
132
293
  provider=provider,
133
294
  models=models_list,
134
- **{
135
- k: (round(v, 6) if k == "total_cost" else v)
136
- for k, v in provider_metrics.items()
137
- },
295
+ total_cost=round(provider_metrics["total_cost"], 6),
296
+ total_tokens=provider_metrics["total_tokens"],
297
+ prompt_tokens=provider_metrics["prompt_tokens"],
298
+ completion_tokens=provider_metrics["completion_tokens"],
299
+ prompt_tokens_details=PromptTokenDetails(
300
+ cached_input_tokens=provider_metrics[
301
+ "prompt_cached_input_tokens"
302
+ ],
303
+ cached_creation_tokens=provider_metrics[
304
+ "prompt_cached_creation_tokens"
305
+ ],
306
+ audio_tokens=provider_metrics["prompt_audio_tokens"],
307
+ )
308
+ if provider_metrics["prompt_cached_input_tokens"]
309
+ or provider_metrics["prompt_cached_creation_tokens"]
310
+ or provider_metrics["prompt_audio_tokens"]
311
+ else None,
312
+ completion_tokens_details=CompletionTokenDetails(
313
+ audio_tokens=provider_metrics["completion_audio_tokens"],
314
+ reasoning_tokens=provider_metrics[
315
+ "completion_reasoning_tokens"
316
+ ],
317
+ accepted_prediction_tokens=provider_metrics[
318
+ "completion_accepted_prediction_tokens"
319
+ ],
320
+ rejected_prediction_tokens=provider_metrics[
321
+ "completion_rejected_prediction_tokens"
322
+ ],
323
+ )
324
+ if any(
325
+ provider_metrics[k]
326
+ for k in [
327
+ "completion_audio_tokens",
328
+ "completion_reasoning_tokens",
329
+ "completion_accepted_prediction_tokens",
330
+ "completion_rejected_prediction_tokens",
331
+ ]
332
+ )
333
+ else None,
138
334
  )
139
335
  )
140
336
 
tokenator/utils.py CHANGED
@@ -5,8 +5,21 @@ import platform
5
5
  import logging
6
6
  from pathlib import Path
7
7
 
8
+
8
9
  logger = logging.getLogger(__name__)
9
10
 
11
+ def is_notebook() -> bool:
12
+ try:
13
+ from IPython import get_ipython # type: ignore
14
+ shell = get_ipython().__class__.__name__
15
+ if shell == 'ZMQInteractiveShell':
16
+ return True # Jupyter notebook or qtconsole
17
+ elif shell == 'TerminalInteractiveShell':
18
+ return False # Terminal running IPython
19
+ else:
20
+ return False # Other type (?)
21
+ except NameError:
22
+ return False
10
23
 
11
24
  def is_colab() -> bool:
12
25
  """Check if running in Google Colab."""
@@ -21,7 +34,7 @@ def is_colab() -> bool:
21
34
  def get_default_db_path() -> str:
22
35
  """Get the platform-specific default database path."""
23
36
  try:
24
- if is_colab():
37
+ if is_colab() or is_notebook():
25
38
  # Use in-memory database for Colab
26
39
  return "usage.db"
27
40
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: tokenator
3
- Version: 0.1.14
3
+ Version: 0.1.15
4
4
  Summary: Token usage tracking wrapper for LLMs
5
5
  License: MIT
6
6
  Author: Ujjwal Maheshwari
@@ -15,6 +15,7 @@ Classifier: Programming Language :: Python :: 3.12
15
15
  Classifier: Programming Language :: Python :: 3.13
16
16
  Requires-Dist: alembic (>=1.13.0,<2.0.0)
17
17
  Requires-Dist: anthropic (>=0.43.0,<0.44.0)
18
+ Requires-Dist: ipython
18
19
  Requires-Dist: openai (>=1.59.0,<2.0.0)
19
20
  Requires-Dist: requests (>=2.32.3,<3.0.0)
20
21
  Requires-Dist: sqlalchemy (>=2.0.0,<3.0.0)
@@ -22,7 +23,7 @@ Description-Content-Type: text/markdown
22
23
 
23
24
  # Tokenator : Track and analyze LLM token usage and cost
24
25
 
25
- Have you ever wondered about :
26
+ Have you ever wondered :
26
27
  - How many tokens does your AI agent consume?
27
28
  - How much does it cost to do run a complex AI workflow with multiple LLM providers?
28
29
  - How much money/tokens did you spend today on developing with LLMs?
@@ -0,0 +1,21 @@
1
+ tokenator/__init__.py,sha256=AEPE73UGB_TeNLhro3eY0hU8yy6T-_6AyDls8vWApnE,465
2
+ tokenator/anthropic/client_anthropic.py,sha256=2oxTLb5-sPK_KL-OumCjE4wPVI8U_eFyRonn9XjGXJw,7196
3
+ tokenator/anthropic/stream_interceptors.py,sha256=4VHC_-WkG3Pa10YizmFLrHcbz0Tm2MR_YB5-uohKp5A,5221
4
+ tokenator/base_wrapper.py,sha256=EQ49xGduEp05-gj1xyZDasrck4RpComaoKslHxQTwuw,4956
5
+ tokenator/create_migrations.py,sha256=k9IHiGK21dLTA8MYNsuhO0-kUVIcMSViMFYtY4WU2Rw,730
6
+ tokenator/migrations/env.py,sha256=JoF5MJ4ae0wJW5kdBHuFlG3ZqeCCDvbMcU8fNA_a6hM,1396
7
+ tokenator/migrations/script.py.mako,sha256=nJL-tbLQE0Qy4P9S4r4ntNAcikPtoFUlvXe6xvm9ot8,635
8
+ tokenator/migrations/versions/f028b8155fed_adding_detailed_input_and_output_token_.py,sha256=WIZN5HdNRXlRdfpUJpJFaPD4G1s-SgRdTMQl4WDB-hA,2189
9
+ tokenator/migrations/versions/f6f1f2437513_initial_migration.py,sha256=4cveHkwSxs-hxOPCm81YfvGZTkJJ2ClAFmyL98-1VCo,1910
10
+ tokenator/migrations.py,sha256=YAf9gZmDzAq36PWWXPtdUQoJFYPXtIDzflC79H6gcJg,1114
11
+ tokenator/models.py,sha256=p4uoFqJYGMlygotxip_HZcfM16Jm4LoyFLFTsM1Z8a4,2132
12
+ tokenator/openai/client_openai.py,sha256=pbdJ-aZPuJs-7OT1VEv0DW36cCYbRAVKhSQEprxVIdY,9686
13
+ tokenator/openai/stream_interceptors.py,sha256=ez1MnjRZW_rEalv2SIPAvrU9oMD6OJoD9vht-057fDM,5243
14
+ tokenator/schemas.py,sha256=kBmShqgpQ3W-ILAP1NuCaFgqFplQM4OH0MmJteLqrwI,2371
15
+ tokenator/state.py,sha256=xdqDC-rlEA88-VgqQqHnAOXQ5pNTpnHcgOtohDIImPY,262
16
+ tokenator/usage.py,sha256=QaudrO6uwnMNRn9aCYVPj9yiQHmbdoAVZ9-G4Q1B0fw,20511
17
+ tokenator/utils.py,sha256=djoWmAhqH-O2Su3qIcuY-_3Vj1-qPwMcdzwq9IlwiDc,2435
18
+ tokenator-0.1.15.dist-info/LICENSE,sha256=wdG-B6-ODk8RQ4jq5uXSn0w1UWTzCH_MMyvh7AwtGns,1074
19
+ tokenator-0.1.15.dist-info/METADATA,sha256=dtws3Qwm2iZLCYZv0meqQP80Q49821HdyZgUmDeqDcg,6035
20
+ tokenator-0.1.15.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
21
+ tokenator-0.1.15.dist-info/RECORD,,
@@ -1,20 +0,0 @@
1
- tokenator/__init__.py,sha256=AEPE73UGB_TeNLhro3eY0hU8yy6T-_6AyDls8vWApnE,465
2
- tokenator/anthropic/client_anthropic.py,sha256=uWUrRId7vJlMG6hVKLUzaA3PoOT6mJwTqSRIhAidRFY,6163
3
- tokenator/anthropic/stream_interceptors.py,sha256=4VHC_-WkG3Pa10YizmFLrHcbz0Tm2MR_YB5-uohKp5A,5221
4
- tokenator/base_wrapper.py,sha256=UoS3cOuPa3HpuXPTawybvAtwufgZwzzKBj0BhyB-z6w,3160
5
- tokenator/create_migrations.py,sha256=k9IHiGK21dLTA8MYNsuhO0-kUVIcMSViMFYtY4WU2Rw,730
6
- tokenator/migrations/env.py,sha256=JoF5MJ4ae0wJW5kdBHuFlG3ZqeCCDvbMcU8fNA_a6hM,1396
7
- tokenator/migrations/script.py.mako,sha256=nJL-tbLQE0Qy4P9S4r4ntNAcikPtoFUlvXe6xvm9ot8,635
8
- tokenator/migrations/versions/f6f1f2437513_initial_migration.py,sha256=4cveHkwSxs-hxOPCm81YfvGZTkJJ2ClAFmyL98-1VCo,1910
9
- tokenator/migrations.py,sha256=YAf9gZmDzAq36PWWXPtdUQoJFYPXtIDzflC79H6gcJg,1114
10
- tokenator/models.py,sha256=AlNC5NVrycLg0LhDJIww9HXQ3lwM8CoKvRSqXU6iw-k,1225
11
- tokenator/openai/client_openai.py,sha256=LhD1IbpzPXRK9eSqtcfUfoM9vBsyw6OHA0_a7N_tS9U,6230
12
- tokenator/openai/stream_interceptors.py,sha256=ez1MnjRZW_rEalv2SIPAvrU9oMD6OJoD9vht-057fDM,5243
13
- tokenator/schemas.py,sha256=zIgfmSsFJV9ziJdKrpV8p2P1f-BVWUVIpWoqCLpzhEU,2225
14
- tokenator/state.py,sha256=xdqDC-rlEA88-VgqQqHnAOXQ5pNTpnHcgOtohDIImPY,262
15
- tokenator/usage.py,sha256=ghnZ7pQuIxeI38O63xDAbEm6jOSmkYE7MChHBGPxbyM,11229
16
- tokenator/utils.py,sha256=xg9l2GV1yJL1BlxKL1r8CboABWDslf3G5rGQEJSjFrE,1973
17
- tokenator-0.1.14.dist-info/LICENSE,sha256=wdG-B6-ODk8RQ4jq5uXSn0w1UWTzCH_MMyvh7AwtGns,1074
18
- tokenator-0.1.14.dist-info/METADATA,sha256=L93LfqCfqvhES92COaQZpX5w9_c2aDaX8pj2wT74Sxw,6018
19
- tokenator-0.1.14.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
20
- tokenator-0.1.14.dist-info/RECORD,,