tokenator 0.1.14__py3-none-any.whl → 0.1.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,7 +6,7 @@ import logging
6
6
  from anthropic import Anthropic, AsyncAnthropic
7
7
  from anthropic.types import Message, RawMessageStartEvent, RawMessageDeltaEvent
8
8
 
9
- from ..models import Usage, TokenUsageStats
9
+ from ..models import PromptTokenDetails, TokenMetrics, TokenUsageStats
10
10
  from ..base_wrapper import BaseWrapper, ResponseType
11
11
  from .stream_interceptors import (
12
12
  AnthropicAsyncStreamInterceptor,
@@ -28,28 +28,46 @@ class BaseAnthropicWrapper(BaseWrapper):
28
28
  if isinstance(response, Message):
29
29
  if not hasattr(response, "usage"):
30
30
  return None
31
- usage = Usage(
32
- prompt_tokens=response.usage.input_tokens,
31
+ usage = TokenMetrics(
32
+ prompt_tokens=response.usage.input_tokens
33
+ + (getattr(response.usage, "cache_creation_input_tokens", 0) or 0),
33
34
  completion_tokens=response.usage.output_tokens,
34
35
  total_tokens=response.usage.input_tokens
35
36
  + response.usage.output_tokens,
37
+ prompt_tokens_details=PromptTokenDetails(
38
+ cached_input_tokens=getattr(
39
+ response.usage, "cache_read_input_tokens", None
40
+ ),
41
+ cached_creation_tokens=getattr(
42
+ response.usage, "cache_creation_input_tokens", None
43
+ ),
44
+ ),
36
45
  )
37
46
  return TokenUsageStats(model=response.model, usage=usage)
38
47
  elif isinstance(response, dict):
39
48
  usage_dict = response.get("usage")
40
49
  if not usage_dict:
41
50
  return None
42
- usage = Usage(
43
- prompt_tokens=usage_dict.get("input_tokens", 0),
51
+ usage = TokenMetrics(
52
+ prompt_tokens=usage_dict.get("input_tokens", 0)
53
+ + (getattr(usage_dict, "cache_creation_input_tokens", 0) or 0),
44
54
  completion_tokens=usage_dict.get("output_tokens", 0),
45
55
  total_tokens=usage_dict.get("input_tokens", 0)
46
56
  + usage_dict.get("output_tokens", 0),
57
+ prompt_tokens_details=PromptTokenDetails(
58
+ cached_input_tokens=getattr(
59
+ usage_dict, "cache_read_input_tokens", None
60
+ ),
61
+ cached_creation_tokens=getattr(
62
+ usage_dict, "cache_creation_input_tokens", None
63
+ ),
64
+ ),
47
65
  )
48
66
  return TokenUsageStats(
49
67
  model=response.get("model", "unknown"), usage=usage
50
68
  )
51
69
  except Exception as e:
52
- logger.warning("Failed to process usage stats: %s", str(e))
70
+ logger.warning("Failed to process usage stats: %s", str(e), exc_info=True)
53
71
  return None
54
72
  return None
55
73
 
@@ -74,7 +92,7 @@ def _create_usage_callback(execution_id, log_usage_fn):
74
92
  model=chunks[0].message.model
75
93
  if isinstance(chunks[0], RawMessageStartEvent)
76
94
  else "",
77
- usage=Usage(),
95
+ usage=TokenMetrics(),
78
96
  )
79
97
 
80
98
  for chunk in chunks:
tokenator/base_wrapper.py CHANGED
@@ -58,9 +58,33 @@ class BaseWrapper:
58
58
  execution_id=execution_id,
59
59
  provider=self.provider,
60
60
  model=token_usage_stats.model,
61
+ total_cost=0, # This needs to be calculated based on your rates
61
62
  prompt_tokens=token_usage_stats.usage.prompt_tokens,
62
63
  completion_tokens=token_usage_stats.usage.completion_tokens,
63
64
  total_tokens=token_usage_stats.usage.total_tokens,
65
+ # Prompt details
66
+ prompt_cached_input_tokens=token_usage_stats.usage.prompt_tokens_details.cached_input_tokens
67
+ if token_usage_stats.usage.prompt_tokens_details
68
+ else None,
69
+ prompt_cached_creation_tokens=token_usage_stats.usage.prompt_tokens_details.cached_creation_tokens
70
+ if token_usage_stats.usage.prompt_tokens_details
71
+ else None,
72
+ prompt_audio_tokens=token_usage_stats.usage.prompt_tokens_details.audio_tokens
73
+ if token_usage_stats.usage.prompt_tokens_details
74
+ else None,
75
+ # Completion details
76
+ completion_audio_tokens=token_usage_stats.usage.completion_tokens_details.audio_tokens
77
+ if token_usage_stats.usage.completion_tokens_details
78
+ else None,
79
+ completion_reasoning_tokens=token_usage_stats.usage.completion_tokens_details.reasoning_tokens
80
+ if token_usage_stats.usage.completion_tokens_details
81
+ else None,
82
+ completion_accepted_prediction_tokens=token_usage_stats.usage.completion_tokens_details.accepted_prediction_tokens
83
+ if token_usage_stats.usage.completion_tokens_details
84
+ else None,
85
+ completion_rejected_prediction_tokens=token_usage_stats.usage.completion_tokens_details.rejected_prediction_tokens
86
+ if token_usage_stats.usage.completion_tokens_details
87
+ else None,
64
88
  )
65
89
  session.add(token_usage)
66
90
  logger.debug(
@@ -82,11 +106,13 @@ class BaseWrapper:
82
106
  if not execution_id:
83
107
  execution_id = str(uuid.uuid4())
84
108
 
109
+ logger.debug("Starting token usage logging for execution_id: %s", execution_id)
85
110
  session = self.Session()
86
111
  try:
87
112
  try:
88
113
  self._log_usage_impl(token_usage_stats, session, execution_id)
89
114
  session.commit()
115
+ logger.debug("Successfully committed token usage for execution_id: %s", execution_id)
90
116
  except Exception as e:
91
117
  logger.error("Failed to log token usage: %s", str(e))
92
118
  session.rollback()
@@ -0,0 +1,64 @@
1
+ """Adding detailed input and output token schema
2
+
3
+ Revision ID: f028b8155fed
4
+ Revises: f6f1f2437513
5
+ Create Date: 2025-01-19 15:41:12.715623
6
+
7
+ """
8
+
9
+ from typing import Sequence, Union
10
+
11
+ from alembic import op
12
+ import sqlalchemy as sa
13
+
14
+
15
+ # revision identifiers, used by Alembic.
16
+ revision: str = "f028b8155fed"
17
+ down_revision: Union[str, None] = "f6f1f2437513"
18
+ branch_labels: Union[str, Sequence[str], None] = None
19
+ depends_on: Union[str, Sequence[str], None] = None
20
+
21
+
22
+ def upgrade() -> None:
23
+ # ### commands auto generated by Alembic - please adjust! ###
24
+ op.add_column("token_usage", sa.Column("total_cost", sa.Integer(), nullable=False))
25
+ op.add_column(
26
+ "token_usage",
27
+ sa.Column("prompt_cached_input_tokens", sa.Integer(), nullable=True),
28
+ )
29
+ op.add_column(
30
+ "token_usage",
31
+ sa.Column("prompt_cached_creation_tokens", sa.Integer(), nullable=True),
32
+ )
33
+ op.add_column(
34
+ "token_usage", sa.Column("prompt_audio_tokens", sa.Integer(), nullable=True)
35
+ )
36
+ op.add_column(
37
+ "token_usage", sa.Column("completion_audio_tokens", sa.Integer(), nullable=True)
38
+ )
39
+ op.add_column(
40
+ "token_usage",
41
+ sa.Column("completion_reasoning_tokens", sa.Integer(), nullable=True),
42
+ )
43
+ op.add_column(
44
+ "token_usage",
45
+ sa.Column("completion_accepted_prediction_tokens", sa.Integer(), nullable=True),
46
+ )
47
+ op.add_column(
48
+ "token_usage",
49
+ sa.Column("completion_rejected_prediction_tokens", sa.Integer(), nullable=True),
50
+ )
51
+ # ### end Alembic commands ###
52
+
53
+
54
+ def downgrade() -> None:
55
+ # ### commands auto generated by Alembic - please adjust! ###
56
+ op.drop_column("token_usage", "completion_rejected_prediction_tokens")
57
+ op.drop_column("token_usage", "completion_accepted_prediction_tokens")
58
+ op.drop_column("token_usage", "completion_reasoning_tokens")
59
+ op.drop_column("token_usage", "completion_audio_tokens")
60
+ op.drop_column("token_usage", "prompt_audio_tokens")
61
+ op.drop_column("token_usage", "prompt_cached_creation_tokens")
62
+ op.drop_column("token_usage", "prompt_cached_input_tokens")
63
+ op.drop_column("token_usage", "total_cost")
64
+ # ### end Alembic commands ###
tokenator/models.py CHANGED
@@ -1,10 +1,35 @@
1
1
  from pydantic import BaseModel, Field
2
- from typing import List
2
+ from typing import List, Optional
3
3
 
4
4
 
5
5
  class TokenRate(BaseModel):
6
6
  prompt: float = Field(..., description="Cost per prompt token")
7
7
  completion: float = Field(..., description="Cost per completion token")
8
+ prompt_audio: Optional[float] = Field(
9
+ None, description="Cost per audio prompt token"
10
+ )
11
+ completion_audio: Optional[float] = Field(
12
+ None, description="Cost per audio completion token"
13
+ )
14
+ prompt_cached_input: Optional[float] = Field(
15
+ None, description="Cost per cached prompt input token"
16
+ )
17
+ prompt_cached_creation: Optional[float] = Field(
18
+ None, description="Cost per cached prompt creation token"
19
+ )
20
+
21
+
22
+ class PromptTokenDetails(BaseModel):
23
+ cached_input_tokens: Optional[int] = None
24
+ cached_creation_tokens: Optional[int] = None
25
+ audio_tokens: Optional[int] = None
26
+
27
+
28
+ class CompletionTokenDetails(BaseModel):
29
+ reasoning_tokens: Optional[int] = None
30
+ audio_tokens: Optional[int] = None
31
+ accepted_prediction_tokens: Optional[int] = None
32
+ rejected_prediction_tokens: Optional[int] = None
8
33
 
9
34
 
10
35
  class TokenMetrics(BaseModel):
@@ -12,6 +37,8 @@ class TokenMetrics(BaseModel):
12
37
  total_tokens: int = Field(default=0, description="Total tokens used")
13
38
  prompt_tokens: int = Field(default=0, description="Number of prompt tokens")
14
39
  completion_tokens: int = Field(default=0, description="Number of completion tokens")
40
+ prompt_tokens_details: Optional[PromptTokenDetails] = None
41
+ completion_tokens_details: Optional[CompletionTokenDetails] = None
15
42
 
16
43
 
17
44
  class ModelUsage(TokenMetrics):
@@ -31,12 +58,6 @@ class TokenUsageReport(TokenMetrics):
31
58
  )
32
59
 
33
60
 
34
- class Usage(BaseModel):
35
- prompt_tokens: int = 0
36
- completion_tokens: int = 0
37
- total_tokens: int = 0
38
-
39
-
40
61
  class TokenUsageStats(BaseModel):
41
62
  model: str
42
- usage: Usage
63
+ usage: TokenMetrics
@@ -6,7 +6,12 @@ import logging
6
6
  from openai import AsyncOpenAI, OpenAI
7
7
  from openai.types.chat import ChatCompletion, ChatCompletionChunk
8
8
 
9
- from ..models import Usage, TokenUsageStats
9
+ from ..models import (
10
+ TokenMetrics,
11
+ TokenUsageStats,
12
+ PromptTokenDetails,
13
+ CompletionTokenDetails,
14
+ )
10
15
  from ..base_wrapper import BaseWrapper, ResponseType
11
16
  from .stream_interceptors import (
12
17
  OpenAIAsyncStreamInterceptor,
@@ -30,18 +35,49 @@ class BaseOpenAIWrapper(BaseWrapper):
30
35
  if isinstance(response, ChatCompletion):
31
36
  if response.usage is None:
32
37
  return None
33
- usage = Usage(
38
+ usage = TokenMetrics(
34
39
  prompt_tokens=response.usage.prompt_tokens,
35
40
  completion_tokens=response.usage.completion_tokens,
36
41
  total_tokens=response.usage.total_tokens,
42
+ prompt_tokens_details=PromptTokenDetails(
43
+ cached_input_tokens=getattr(
44
+ response.usage.prompt_tokens_details, "cached_tokens", None
45
+ ),
46
+ audio_tokens=getattr(
47
+ response.usage.prompt_tokens_details, "audio_tokens", None
48
+ ),
49
+ ),
50
+ completion_tokens_details=CompletionTokenDetails(
51
+ reasoning_tokens=getattr(
52
+ response.usage.completion_tokens_details,
53
+ "reasoning_tokens",
54
+ None,
55
+ ),
56
+ audio_tokens=getattr(
57
+ response.usage.completion_tokens_details,
58
+ "audio_tokens",
59
+ None,
60
+ ),
61
+ accepted_prediction_tokens=getattr(
62
+ response.usage.completion_tokens_details,
63
+ "accepted_prediction_tokens",
64
+ None,
65
+ ),
66
+ rejected_prediction_tokens=getattr(
67
+ response.usage.completion_tokens_details,
68
+ "rejected_prediction_tokens",
69
+ None,
70
+ ),
71
+ ),
37
72
  )
73
+
38
74
  return TokenUsageStats(model=response.model, usage=usage)
39
75
 
40
76
  elif isinstance(response, dict):
41
77
  usage_dict = response.get("usage")
42
78
  if not usage_dict:
43
79
  return None
44
- usage = Usage(
80
+ usage = TokenMetrics(
45
81
  prompt_tokens=usage_dict.get("prompt_tokens", 0),
46
82
  completion_tokens=usage_dict.get("completion_tokens", 0),
47
83
  total_tokens=usage_dict.get("total_tokens", 0),
@@ -62,6 +98,10 @@ class BaseOpenAIWrapper(BaseWrapper):
62
98
  def completions(self):
63
99
  return self
64
100
 
101
+ @property
102
+ def beta(self):
103
+ return self
104
+
65
105
 
66
106
  def _create_usage_callback(execution_id, log_usage_fn):
67
107
  """Creates a callback function for processing usage statistics from stream chunks."""
@@ -75,10 +115,12 @@ def _create_usage_callback(execution_id, log_usage_fn):
75
115
  logger.debug("Tokenator is disabled - skipping stream usage logging")
76
116
  return
77
117
 
118
+ logger.debug("Processing stream usage for execution_id: %s", execution_id)
119
+
78
120
  # Build usage_data from the first chunk's model
79
121
  usage_data = TokenUsageStats(
80
122
  model=chunks[0].model,
81
- usage=Usage(),
123
+ usage=TokenMetrics(),
82
124
  )
83
125
  # Sum up usage from all chunks
84
126
  has_usage = False
@@ -116,6 +158,26 @@ class OpenAIWrapper(BaseOpenAIWrapper):
116
158
 
117
159
  return response
118
160
 
161
+ def parse(
162
+ self, *args: Any, execution_id: Optional[str] = None, **kwargs: Any
163
+ ) -> Union[ChatCompletion, Iterator[ChatCompletion]]:
164
+ """Create a chat completion parse and log token usage."""
165
+ logger.debug("Creating chat completion with args: %s, kwargs: %s", args, kwargs)
166
+
167
+ if kwargs.get("stream", False):
168
+ base_stream = self.client.beta.chat.completions.parse(*args, **kwargs)
169
+ return OpenAISyncStreamInterceptor(
170
+ base_stream=base_stream,
171
+ usage_callback=_create_usage_callback(execution_id, self._log_usage),
172
+ )
173
+
174
+ response = self.client.beta.chat.completions.parse(*args, **kwargs)
175
+ usage_data = self._process_response_usage(response)
176
+ if usage_data:
177
+ self._log_usage(usage_data, execution_id=execution_id)
178
+
179
+ return response
180
+
119
181
 
120
182
  class AsyncOpenAIWrapper(BaseOpenAIWrapper):
121
183
  async def create(
@@ -141,6 +203,26 @@ class AsyncOpenAIWrapper(BaseOpenAIWrapper):
141
203
  self._log_usage(usage_data, execution_id=execution_id)
142
204
  return response
143
205
 
206
+ async def parse(
207
+ self, *args: Any, execution_id: Optional[str] = None, **kwargs: Any
208
+ ) -> Union[ChatCompletion, AsyncIterator[ChatCompletionChunk]]:
209
+ """Create a chat completion parse and log token usage."""
210
+ logger.debug("Creating chat completion with args: %s, kwargs: %s", args, kwargs)
211
+
212
+ if kwargs.get("stream", False):
213
+ base_stream = await self.client.beta.chat.completions.parse(*args, **kwargs)
214
+ return OpenAIAsyncStreamInterceptor(
215
+ base_stream=base_stream,
216
+ usage_callback=_create_usage_callback(execution_id, self._log_usage),
217
+ )
218
+
219
+ response = await self.client.beta.chat.completions.parse(*args, **kwargs)
220
+ usage_data = self._process_response_usage(response)
221
+ if usage_data:
222
+ self._log_usage(usage_data, execution_id=execution_id)
223
+
224
+ return response
225
+
144
226
 
145
227
  @overload
146
228
  def tokenator_openai(
tokenator/schemas.py CHANGED
@@ -40,28 +40,28 @@ class TokenUsage(Base):
40
40
  updated_at = Column(
41
41
  DateTime, nullable=False, default=datetime.now, onupdate=datetime.now
42
42
  )
43
+
44
+ # Core metrics (mandatory)
45
+ total_cost = Column(Integer, nullable=False)
43
46
  prompt_tokens = Column(Integer, nullable=False)
44
47
  completion_tokens = Column(Integer, nullable=False)
45
48
  total_tokens = Column(Integer, nullable=False)
46
49
 
47
- # Create indexes
50
+ # Prompt token details (optional)
51
+ prompt_cached_input_tokens = Column(Integer, nullable=True)
52
+ prompt_cached_creation_tokens = Column(Integer, nullable=True)
53
+ prompt_audio_tokens = Column(Integer, nullable=True)
54
+
55
+ # Completion token details (optional)
56
+ completion_audio_tokens = Column(Integer, nullable=True)
57
+ completion_reasoning_tokens = Column(Integer, nullable=True)
58
+ completion_accepted_prediction_tokens = Column(Integer, nullable=True)
59
+ completion_rejected_prediction_tokens = Column(Integer, nullable=True)
60
+
61
+ # Keep existing indexes
48
62
  __table_args__ = (
49
63
  Index("idx_created_at", "created_at"),
50
64
  Index("idx_execution_id", "execution_id"),
51
65
  Index("idx_provider", "provider"),
52
66
  Index("idx_model", "model"),
53
67
  )
54
-
55
- def to_dict(self):
56
- """Convert model instance to dictionary."""
57
- return {
58
- "id": self.id,
59
- "execution_id": self.execution_id,
60
- "provider": self.provider,
61
- "model": self.model,
62
- "created_at": self.created_at,
63
- "updated_at": self.updated_at,
64
- "prompt_tokens": self.prompt_tokens,
65
- "completion_tokens": self.completion_tokens,
66
- "total_tokens": self.total_tokens,
67
- }