tokenator 0.1.14__py3-none-any.whl → 0.1.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tokenator/anthropic/client_anthropic.py +25 -7
- tokenator/base_wrapper.py +26 -0
- tokenator/migrations/versions/f028b8155fed_adding_detailed_input_and_output_token_.py +64 -0
- tokenator/models.py +29 -8
- tokenator/openai/client_openai.py +86 -4
- tokenator/schemas.py +15 -15
- tokenator/usage.py +223 -27
- tokenator/utils.py +14 -1
- {tokenator-0.1.14.dist-info → tokenator-0.1.15.dist-info}/METADATA +3 -2
- tokenator-0.1.15.dist-info/RECORD +21 -0
- tokenator-0.1.14.dist-info/RECORD +0 -20
- {tokenator-0.1.14.dist-info → tokenator-0.1.15.dist-info}/LICENSE +0 -0
- {tokenator-0.1.14.dist-info → tokenator-0.1.15.dist-info}/WHEEL +0 -0
@@ -6,7 +6,7 @@ import logging
|
|
6
6
|
from anthropic import Anthropic, AsyncAnthropic
|
7
7
|
from anthropic.types import Message, RawMessageStartEvent, RawMessageDeltaEvent
|
8
8
|
|
9
|
-
from ..models import
|
9
|
+
from ..models import PromptTokenDetails, TokenMetrics, TokenUsageStats
|
10
10
|
from ..base_wrapper import BaseWrapper, ResponseType
|
11
11
|
from .stream_interceptors import (
|
12
12
|
AnthropicAsyncStreamInterceptor,
|
@@ -28,28 +28,46 @@ class BaseAnthropicWrapper(BaseWrapper):
|
|
28
28
|
if isinstance(response, Message):
|
29
29
|
if not hasattr(response, "usage"):
|
30
30
|
return None
|
31
|
-
usage =
|
32
|
-
prompt_tokens=response.usage.input_tokens
|
31
|
+
usage = TokenMetrics(
|
32
|
+
prompt_tokens=response.usage.input_tokens
|
33
|
+
+ (getattr(response.usage, "cache_creation_input_tokens", 0) or 0),
|
33
34
|
completion_tokens=response.usage.output_tokens,
|
34
35
|
total_tokens=response.usage.input_tokens
|
35
36
|
+ response.usage.output_tokens,
|
37
|
+
prompt_tokens_details=PromptTokenDetails(
|
38
|
+
cached_input_tokens=getattr(
|
39
|
+
response.usage, "cache_read_input_tokens", None
|
40
|
+
),
|
41
|
+
cached_creation_tokens=getattr(
|
42
|
+
response.usage, "cache_creation_input_tokens", None
|
43
|
+
),
|
44
|
+
),
|
36
45
|
)
|
37
46
|
return TokenUsageStats(model=response.model, usage=usage)
|
38
47
|
elif isinstance(response, dict):
|
39
48
|
usage_dict = response.get("usage")
|
40
49
|
if not usage_dict:
|
41
50
|
return None
|
42
|
-
usage =
|
43
|
-
prompt_tokens=usage_dict.get("input_tokens", 0)
|
51
|
+
usage = TokenMetrics(
|
52
|
+
prompt_tokens=usage_dict.get("input_tokens", 0)
|
53
|
+
+ (getattr(usage_dict, "cache_creation_input_tokens", 0) or 0),
|
44
54
|
completion_tokens=usage_dict.get("output_tokens", 0),
|
45
55
|
total_tokens=usage_dict.get("input_tokens", 0)
|
46
56
|
+ usage_dict.get("output_tokens", 0),
|
57
|
+
prompt_tokens_details=PromptTokenDetails(
|
58
|
+
cached_input_tokens=getattr(
|
59
|
+
usage_dict, "cache_read_input_tokens", None
|
60
|
+
),
|
61
|
+
cached_creation_tokens=getattr(
|
62
|
+
usage_dict, "cache_creation_input_tokens", None
|
63
|
+
),
|
64
|
+
),
|
47
65
|
)
|
48
66
|
return TokenUsageStats(
|
49
67
|
model=response.get("model", "unknown"), usage=usage
|
50
68
|
)
|
51
69
|
except Exception as e:
|
52
|
-
logger.warning("Failed to process usage stats: %s", str(e))
|
70
|
+
logger.warning("Failed to process usage stats: %s", str(e), exc_info=True)
|
53
71
|
return None
|
54
72
|
return None
|
55
73
|
|
@@ -74,7 +92,7 @@ def _create_usage_callback(execution_id, log_usage_fn):
|
|
74
92
|
model=chunks[0].message.model
|
75
93
|
if isinstance(chunks[0], RawMessageStartEvent)
|
76
94
|
else "",
|
77
|
-
usage=
|
95
|
+
usage=TokenMetrics(),
|
78
96
|
)
|
79
97
|
|
80
98
|
for chunk in chunks:
|
tokenator/base_wrapper.py
CHANGED
@@ -58,9 +58,33 @@ class BaseWrapper:
|
|
58
58
|
execution_id=execution_id,
|
59
59
|
provider=self.provider,
|
60
60
|
model=token_usage_stats.model,
|
61
|
+
total_cost=0, # This needs to be calculated based on your rates
|
61
62
|
prompt_tokens=token_usage_stats.usage.prompt_tokens,
|
62
63
|
completion_tokens=token_usage_stats.usage.completion_tokens,
|
63
64
|
total_tokens=token_usage_stats.usage.total_tokens,
|
65
|
+
# Prompt details
|
66
|
+
prompt_cached_input_tokens=token_usage_stats.usage.prompt_tokens_details.cached_input_tokens
|
67
|
+
if token_usage_stats.usage.prompt_tokens_details
|
68
|
+
else None,
|
69
|
+
prompt_cached_creation_tokens=token_usage_stats.usage.prompt_tokens_details.cached_creation_tokens
|
70
|
+
if token_usage_stats.usage.prompt_tokens_details
|
71
|
+
else None,
|
72
|
+
prompt_audio_tokens=token_usage_stats.usage.prompt_tokens_details.audio_tokens
|
73
|
+
if token_usage_stats.usage.prompt_tokens_details
|
74
|
+
else None,
|
75
|
+
# Completion details
|
76
|
+
completion_audio_tokens=token_usage_stats.usage.completion_tokens_details.audio_tokens
|
77
|
+
if token_usage_stats.usage.completion_tokens_details
|
78
|
+
else None,
|
79
|
+
completion_reasoning_tokens=token_usage_stats.usage.completion_tokens_details.reasoning_tokens
|
80
|
+
if token_usage_stats.usage.completion_tokens_details
|
81
|
+
else None,
|
82
|
+
completion_accepted_prediction_tokens=token_usage_stats.usage.completion_tokens_details.accepted_prediction_tokens
|
83
|
+
if token_usage_stats.usage.completion_tokens_details
|
84
|
+
else None,
|
85
|
+
completion_rejected_prediction_tokens=token_usage_stats.usage.completion_tokens_details.rejected_prediction_tokens
|
86
|
+
if token_usage_stats.usage.completion_tokens_details
|
87
|
+
else None,
|
64
88
|
)
|
65
89
|
session.add(token_usage)
|
66
90
|
logger.debug(
|
@@ -82,11 +106,13 @@ class BaseWrapper:
|
|
82
106
|
if not execution_id:
|
83
107
|
execution_id = str(uuid.uuid4())
|
84
108
|
|
109
|
+
logger.debug("Starting token usage logging for execution_id: %s", execution_id)
|
85
110
|
session = self.Session()
|
86
111
|
try:
|
87
112
|
try:
|
88
113
|
self._log_usage_impl(token_usage_stats, session, execution_id)
|
89
114
|
session.commit()
|
115
|
+
logger.debug("Successfully committed token usage for execution_id: %s", execution_id)
|
90
116
|
except Exception as e:
|
91
117
|
logger.error("Failed to log token usage: %s", str(e))
|
92
118
|
session.rollback()
|
@@ -0,0 +1,64 @@
|
|
1
|
+
"""Adding detailed input and output token schema
|
2
|
+
|
3
|
+
Revision ID: f028b8155fed
|
4
|
+
Revises: f6f1f2437513
|
5
|
+
Create Date: 2025-01-19 15:41:12.715623
|
6
|
+
|
7
|
+
"""
|
8
|
+
|
9
|
+
from typing import Sequence, Union
|
10
|
+
|
11
|
+
from alembic import op
|
12
|
+
import sqlalchemy as sa
|
13
|
+
|
14
|
+
|
15
|
+
# revision identifiers, used by Alembic.
|
16
|
+
revision: str = "f028b8155fed"
|
17
|
+
down_revision: Union[str, None] = "f6f1f2437513"
|
18
|
+
branch_labels: Union[str, Sequence[str], None] = None
|
19
|
+
depends_on: Union[str, Sequence[str], None] = None
|
20
|
+
|
21
|
+
|
22
|
+
def upgrade() -> None:
|
23
|
+
# ### commands auto generated by Alembic - please adjust! ###
|
24
|
+
op.add_column("token_usage", sa.Column("total_cost", sa.Integer(), nullable=False))
|
25
|
+
op.add_column(
|
26
|
+
"token_usage",
|
27
|
+
sa.Column("prompt_cached_input_tokens", sa.Integer(), nullable=True),
|
28
|
+
)
|
29
|
+
op.add_column(
|
30
|
+
"token_usage",
|
31
|
+
sa.Column("prompt_cached_creation_tokens", sa.Integer(), nullable=True),
|
32
|
+
)
|
33
|
+
op.add_column(
|
34
|
+
"token_usage", sa.Column("prompt_audio_tokens", sa.Integer(), nullable=True)
|
35
|
+
)
|
36
|
+
op.add_column(
|
37
|
+
"token_usage", sa.Column("completion_audio_tokens", sa.Integer(), nullable=True)
|
38
|
+
)
|
39
|
+
op.add_column(
|
40
|
+
"token_usage",
|
41
|
+
sa.Column("completion_reasoning_tokens", sa.Integer(), nullable=True),
|
42
|
+
)
|
43
|
+
op.add_column(
|
44
|
+
"token_usage",
|
45
|
+
sa.Column("completion_accepted_prediction_tokens", sa.Integer(), nullable=True),
|
46
|
+
)
|
47
|
+
op.add_column(
|
48
|
+
"token_usage",
|
49
|
+
sa.Column("completion_rejected_prediction_tokens", sa.Integer(), nullable=True),
|
50
|
+
)
|
51
|
+
# ### end Alembic commands ###
|
52
|
+
|
53
|
+
|
54
|
+
def downgrade() -> None:
|
55
|
+
# ### commands auto generated by Alembic - please adjust! ###
|
56
|
+
op.drop_column("token_usage", "completion_rejected_prediction_tokens")
|
57
|
+
op.drop_column("token_usage", "completion_accepted_prediction_tokens")
|
58
|
+
op.drop_column("token_usage", "completion_reasoning_tokens")
|
59
|
+
op.drop_column("token_usage", "completion_audio_tokens")
|
60
|
+
op.drop_column("token_usage", "prompt_audio_tokens")
|
61
|
+
op.drop_column("token_usage", "prompt_cached_creation_tokens")
|
62
|
+
op.drop_column("token_usage", "prompt_cached_input_tokens")
|
63
|
+
op.drop_column("token_usage", "total_cost")
|
64
|
+
# ### end Alembic commands ###
|
tokenator/models.py
CHANGED
@@ -1,10 +1,35 @@
|
|
1
1
|
from pydantic import BaseModel, Field
|
2
|
-
from typing import List
|
2
|
+
from typing import List, Optional
|
3
3
|
|
4
4
|
|
5
5
|
class TokenRate(BaseModel):
|
6
6
|
prompt: float = Field(..., description="Cost per prompt token")
|
7
7
|
completion: float = Field(..., description="Cost per completion token")
|
8
|
+
prompt_audio: Optional[float] = Field(
|
9
|
+
None, description="Cost per audio prompt token"
|
10
|
+
)
|
11
|
+
completion_audio: Optional[float] = Field(
|
12
|
+
None, description="Cost per audio completion token"
|
13
|
+
)
|
14
|
+
prompt_cached_input: Optional[float] = Field(
|
15
|
+
None, description="Cost per cached prompt input token"
|
16
|
+
)
|
17
|
+
prompt_cached_creation: Optional[float] = Field(
|
18
|
+
None, description="Cost per cached prompt creation token"
|
19
|
+
)
|
20
|
+
|
21
|
+
|
22
|
+
class PromptTokenDetails(BaseModel):
|
23
|
+
cached_input_tokens: Optional[int] = None
|
24
|
+
cached_creation_tokens: Optional[int] = None
|
25
|
+
audio_tokens: Optional[int] = None
|
26
|
+
|
27
|
+
|
28
|
+
class CompletionTokenDetails(BaseModel):
|
29
|
+
reasoning_tokens: Optional[int] = None
|
30
|
+
audio_tokens: Optional[int] = None
|
31
|
+
accepted_prediction_tokens: Optional[int] = None
|
32
|
+
rejected_prediction_tokens: Optional[int] = None
|
8
33
|
|
9
34
|
|
10
35
|
class TokenMetrics(BaseModel):
|
@@ -12,6 +37,8 @@ class TokenMetrics(BaseModel):
|
|
12
37
|
total_tokens: int = Field(default=0, description="Total tokens used")
|
13
38
|
prompt_tokens: int = Field(default=0, description="Number of prompt tokens")
|
14
39
|
completion_tokens: int = Field(default=0, description="Number of completion tokens")
|
40
|
+
prompt_tokens_details: Optional[PromptTokenDetails] = None
|
41
|
+
completion_tokens_details: Optional[CompletionTokenDetails] = None
|
15
42
|
|
16
43
|
|
17
44
|
class ModelUsage(TokenMetrics):
|
@@ -31,12 +58,6 @@ class TokenUsageReport(TokenMetrics):
|
|
31
58
|
)
|
32
59
|
|
33
60
|
|
34
|
-
class Usage(BaseModel):
|
35
|
-
prompt_tokens: int = 0
|
36
|
-
completion_tokens: int = 0
|
37
|
-
total_tokens: int = 0
|
38
|
-
|
39
|
-
|
40
61
|
class TokenUsageStats(BaseModel):
|
41
62
|
model: str
|
42
|
-
usage:
|
63
|
+
usage: TokenMetrics
|
@@ -6,7 +6,12 @@ import logging
|
|
6
6
|
from openai import AsyncOpenAI, OpenAI
|
7
7
|
from openai.types.chat import ChatCompletion, ChatCompletionChunk
|
8
8
|
|
9
|
-
from ..models import
|
9
|
+
from ..models import (
|
10
|
+
TokenMetrics,
|
11
|
+
TokenUsageStats,
|
12
|
+
PromptTokenDetails,
|
13
|
+
CompletionTokenDetails,
|
14
|
+
)
|
10
15
|
from ..base_wrapper import BaseWrapper, ResponseType
|
11
16
|
from .stream_interceptors import (
|
12
17
|
OpenAIAsyncStreamInterceptor,
|
@@ -30,18 +35,49 @@ class BaseOpenAIWrapper(BaseWrapper):
|
|
30
35
|
if isinstance(response, ChatCompletion):
|
31
36
|
if response.usage is None:
|
32
37
|
return None
|
33
|
-
usage =
|
38
|
+
usage = TokenMetrics(
|
34
39
|
prompt_tokens=response.usage.prompt_tokens,
|
35
40
|
completion_tokens=response.usage.completion_tokens,
|
36
41
|
total_tokens=response.usage.total_tokens,
|
42
|
+
prompt_tokens_details=PromptTokenDetails(
|
43
|
+
cached_input_tokens=getattr(
|
44
|
+
response.usage.prompt_tokens_details, "cached_tokens", None
|
45
|
+
),
|
46
|
+
audio_tokens=getattr(
|
47
|
+
response.usage.prompt_tokens_details, "audio_tokens", None
|
48
|
+
),
|
49
|
+
),
|
50
|
+
completion_tokens_details=CompletionTokenDetails(
|
51
|
+
reasoning_tokens=getattr(
|
52
|
+
response.usage.completion_tokens_details,
|
53
|
+
"reasoning_tokens",
|
54
|
+
None,
|
55
|
+
),
|
56
|
+
audio_tokens=getattr(
|
57
|
+
response.usage.completion_tokens_details,
|
58
|
+
"audio_tokens",
|
59
|
+
None,
|
60
|
+
),
|
61
|
+
accepted_prediction_tokens=getattr(
|
62
|
+
response.usage.completion_tokens_details,
|
63
|
+
"accepted_prediction_tokens",
|
64
|
+
None,
|
65
|
+
),
|
66
|
+
rejected_prediction_tokens=getattr(
|
67
|
+
response.usage.completion_tokens_details,
|
68
|
+
"rejected_prediction_tokens",
|
69
|
+
None,
|
70
|
+
),
|
71
|
+
),
|
37
72
|
)
|
73
|
+
|
38
74
|
return TokenUsageStats(model=response.model, usage=usage)
|
39
75
|
|
40
76
|
elif isinstance(response, dict):
|
41
77
|
usage_dict = response.get("usage")
|
42
78
|
if not usage_dict:
|
43
79
|
return None
|
44
|
-
usage =
|
80
|
+
usage = TokenMetrics(
|
45
81
|
prompt_tokens=usage_dict.get("prompt_tokens", 0),
|
46
82
|
completion_tokens=usage_dict.get("completion_tokens", 0),
|
47
83
|
total_tokens=usage_dict.get("total_tokens", 0),
|
@@ -62,6 +98,10 @@ class BaseOpenAIWrapper(BaseWrapper):
|
|
62
98
|
def completions(self):
|
63
99
|
return self
|
64
100
|
|
101
|
+
@property
|
102
|
+
def beta(self):
|
103
|
+
return self
|
104
|
+
|
65
105
|
|
66
106
|
def _create_usage_callback(execution_id, log_usage_fn):
|
67
107
|
"""Creates a callback function for processing usage statistics from stream chunks."""
|
@@ -75,10 +115,12 @@ def _create_usage_callback(execution_id, log_usage_fn):
|
|
75
115
|
logger.debug("Tokenator is disabled - skipping stream usage logging")
|
76
116
|
return
|
77
117
|
|
118
|
+
logger.debug("Processing stream usage for execution_id: %s", execution_id)
|
119
|
+
|
78
120
|
# Build usage_data from the first chunk's model
|
79
121
|
usage_data = TokenUsageStats(
|
80
122
|
model=chunks[0].model,
|
81
|
-
usage=
|
123
|
+
usage=TokenMetrics(),
|
82
124
|
)
|
83
125
|
# Sum up usage from all chunks
|
84
126
|
has_usage = False
|
@@ -116,6 +158,26 @@ class OpenAIWrapper(BaseOpenAIWrapper):
|
|
116
158
|
|
117
159
|
return response
|
118
160
|
|
161
|
+
def parse(
|
162
|
+
self, *args: Any, execution_id: Optional[str] = None, **kwargs: Any
|
163
|
+
) -> Union[ChatCompletion, Iterator[ChatCompletion]]:
|
164
|
+
"""Create a chat completion parse and log token usage."""
|
165
|
+
logger.debug("Creating chat completion with args: %s, kwargs: %s", args, kwargs)
|
166
|
+
|
167
|
+
if kwargs.get("stream", False):
|
168
|
+
base_stream = self.client.beta.chat.completions.parse(*args, **kwargs)
|
169
|
+
return OpenAISyncStreamInterceptor(
|
170
|
+
base_stream=base_stream,
|
171
|
+
usage_callback=_create_usage_callback(execution_id, self._log_usage),
|
172
|
+
)
|
173
|
+
|
174
|
+
response = self.client.beta.chat.completions.parse(*args, **kwargs)
|
175
|
+
usage_data = self._process_response_usage(response)
|
176
|
+
if usage_data:
|
177
|
+
self._log_usage(usage_data, execution_id=execution_id)
|
178
|
+
|
179
|
+
return response
|
180
|
+
|
119
181
|
|
120
182
|
class AsyncOpenAIWrapper(BaseOpenAIWrapper):
|
121
183
|
async def create(
|
@@ -141,6 +203,26 @@ class AsyncOpenAIWrapper(BaseOpenAIWrapper):
|
|
141
203
|
self._log_usage(usage_data, execution_id=execution_id)
|
142
204
|
return response
|
143
205
|
|
206
|
+
async def parse(
|
207
|
+
self, *args: Any, execution_id: Optional[str] = None, **kwargs: Any
|
208
|
+
) -> Union[ChatCompletion, AsyncIterator[ChatCompletionChunk]]:
|
209
|
+
"""Create a chat completion parse and log token usage."""
|
210
|
+
logger.debug("Creating chat completion with args: %s, kwargs: %s", args, kwargs)
|
211
|
+
|
212
|
+
if kwargs.get("stream", False):
|
213
|
+
base_stream = await self.client.beta.chat.completions.parse(*args, **kwargs)
|
214
|
+
return OpenAIAsyncStreamInterceptor(
|
215
|
+
base_stream=base_stream,
|
216
|
+
usage_callback=_create_usage_callback(execution_id, self._log_usage),
|
217
|
+
)
|
218
|
+
|
219
|
+
response = await self.client.beta.chat.completions.parse(*args, **kwargs)
|
220
|
+
usage_data = self._process_response_usage(response)
|
221
|
+
if usage_data:
|
222
|
+
self._log_usage(usage_data, execution_id=execution_id)
|
223
|
+
|
224
|
+
return response
|
225
|
+
|
144
226
|
|
145
227
|
@overload
|
146
228
|
def tokenator_openai(
|
tokenator/schemas.py
CHANGED
@@ -40,28 +40,28 @@ class TokenUsage(Base):
|
|
40
40
|
updated_at = Column(
|
41
41
|
DateTime, nullable=False, default=datetime.now, onupdate=datetime.now
|
42
42
|
)
|
43
|
+
|
44
|
+
# Core metrics (mandatory)
|
45
|
+
total_cost = Column(Integer, nullable=False)
|
43
46
|
prompt_tokens = Column(Integer, nullable=False)
|
44
47
|
completion_tokens = Column(Integer, nullable=False)
|
45
48
|
total_tokens = Column(Integer, nullable=False)
|
46
49
|
|
47
|
-
#
|
50
|
+
# Prompt token details (optional)
|
51
|
+
prompt_cached_input_tokens = Column(Integer, nullable=True)
|
52
|
+
prompt_cached_creation_tokens = Column(Integer, nullable=True)
|
53
|
+
prompt_audio_tokens = Column(Integer, nullable=True)
|
54
|
+
|
55
|
+
# Completion token details (optional)
|
56
|
+
completion_audio_tokens = Column(Integer, nullable=True)
|
57
|
+
completion_reasoning_tokens = Column(Integer, nullable=True)
|
58
|
+
completion_accepted_prediction_tokens = Column(Integer, nullable=True)
|
59
|
+
completion_rejected_prediction_tokens = Column(Integer, nullable=True)
|
60
|
+
|
61
|
+
# Keep existing indexes
|
48
62
|
__table_args__ = (
|
49
63
|
Index("idx_created_at", "created_at"),
|
50
64
|
Index("idx_execution_id", "execution_id"),
|
51
65
|
Index("idx_provider", "provider"),
|
52
66
|
Index("idx_model", "model"),
|
53
67
|
)
|
54
|
-
|
55
|
-
def to_dict(self):
|
56
|
-
"""Convert model instance to dictionary."""
|
57
|
-
return {
|
58
|
-
"id": self.id,
|
59
|
-
"execution_id": self.execution_id,
|
60
|
-
"provider": self.provider,
|
61
|
-
"model": self.model,
|
62
|
-
"created_at": self.created_at,
|
63
|
-
"updated_at": self.updated_at,
|
64
|
-
"prompt_tokens": self.prompt_tokens,
|
65
|
-
"completion_tokens": self.completion_tokens,
|
66
|
-
"total_tokens": self.total_tokens,
|
67
|
-
}
|
tokenator/usage.py
CHANGED
@@ -4,7 +4,14 @@ from datetime import datetime, timedelta
|
|
4
4
|
from typing import Dict, Optional, Union
|
5
5
|
|
6
6
|
from .schemas import get_session, TokenUsage
|
7
|
-
from .models import
|
7
|
+
from .models import (
|
8
|
+
CompletionTokenDetails,
|
9
|
+
PromptTokenDetails,
|
10
|
+
TokenRate,
|
11
|
+
TokenUsageReport,
|
12
|
+
ModelUsage,
|
13
|
+
ProviderUsage,
|
14
|
+
)
|
8
15
|
from . import state
|
9
16
|
|
10
17
|
import requests
|
@@ -27,14 +34,25 @@ class TokenUsageService:
|
|
27
34
|
response = requests.get(url)
|
28
35
|
data = response.json()
|
29
36
|
|
30
|
-
|
31
|
-
|
37
|
+
model_costs = {}
|
38
|
+
for model, info in data.items():
|
39
|
+
if (
|
40
|
+
"input_cost_per_token" not in info
|
41
|
+
or "output_cost_per_token" not in info
|
42
|
+
):
|
43
|
+
continue
|
44
|
+
|
45
|
+
rate = TokenRate(
|
32
46
|
prompt=info["input_cost_per_token"],
|
33
47
|
completion=info["output_cost_per_token"],
|
48
|
+
prompt_audio=info.get("input_cost_per_audio_token"),
|
49
|
+
completion_audio=info.get("output_cost_per_audio_token"),
|
50
|
+
prompt_cached_input=info.get("cache_read_input_token_cost") or 0,
|
51
|
+
prompt_cached_creation=info.get("cache_read_creation_token_cost") or 0,
|
34
52
|
)
|
35
|
-
|
36
|
-
|
37
|
-
|
53
|
+
model_costs[model] = rate
|
54
|
+
|
55
|
+
return model_costs
|
38
56
|
|
39
57
|
def _calculate_cost(
|
40
58
|
self, usages: list[TokenUsage], provider: Optional[str] = None
|
@@ -47,23 +65,26 @@ class TokenUsageService:
|
|
47
65
|
logger.warning("No model costs available.")
|
48
66
|
return TokenUsageReport()
|
49
67
|
|
50
|
-
|
51
|
-
|
68
|
+
# Default GPT4O pricing updated with provided values
|
69
|
+
GPT4O_PRICING = TokenRate(
|
70
|
+
prompt=0.0000025,
|
71
|
+
completion=0.000010,
|
72
|
+
prompt_audio=0.0001,
|
73
|
+
completion_audio=0.0002,
|
74
|
+
prompt_cached_input=0.00000125,
|
75
|
+
prompt_cached_creation=0.00000125,
|
52
76
|
)
|
53
77
|
|
54
|
-
# Existing calculation logic...
|
55
78
|
provider_model_usages: Dict[str, Dict[str, list[TokenUsage]]] = {}
|
56
79
|
logger.debug(f"usages: {len(usages)}")
|
57
80
|
|
58
81
|
for usage in usages:
|
59
|
-
#
|
82
|
+
# Model key resolution logic (unchanged)
|
60
83
|
model_key = usage.model
|
61
84
|
if model_key in self.MODEL_COSTS:
|
62
85
|
pass
|
63
|
-
# 2nd priority - provider/model format
|
64
86
|
elif f"{usage.provider}/{usage.model}" in self.MODEL_COSTS:
|
65
87
|
model_key = f"{usage.provider}/{usage.model}"
|
66
|
-
# 3rd priority - contains search
|
67
88
|
else:
|
68
89
|
matched_keys = [k for k in self.MODEL_COSTS.keys() if usage.model in k]
|
69
90
|
if matched_keys:
|
@@ -72,10 +93,8 @@ class TokenUsageService:
|
|
72
93
|
f"Model {usage.model} matched with {model_key} in pricing data via contains search"
|
73
94
|
)
|
74
95
|
else:
|
75
|
-
# Fallback to GPT4O pricing
|
76
96
|
logger.warning(
|
77
|
-
f"Model {model_key} not found in pricing data. Using gpt-4o pricing as fallback
|
78
|
-
f"(prompt: ${GPT4O_PRICING.prompt}/token, completion: ${GPT4O_PRICING.completion}/token)"
|
97
|
+
f"Model {model_key} not found in pricing data. Using gpt-4o pricing as fallback"
|
79
98
|
)
|
80
99
|
self.MODEL_COSTS[model_key] = GPT4O_PRICING
|
81
100
|
|
@@ -99,18 +118,93 @@ class TokenUsageService:
|
|
99
118
|
"total_tokens": 0,
|
100
119
|
"prompt_tokens": 0,
|
101
120
|
"completion_tokens": 0,
|
121
|
+
"prompt_cached_input_tokens": 0,
|
122
|
+
"prompt_cached_creation_tokens": 0,
|
123
|
+
"prompt_audio_tokens": 0,
|
124
|
+
"completion_audio_tokens": 0,
|
125
|
+
"completion_reasoning_tokens": 0,
|
126
|
+
"completion_accepted_prediction_tokens": 0,
|
127
|
+
"completion_rejected_prediction_tokens": 0,
|
102
128
|
}
|
103
129
|
models_list = []
|
104
130
|
|
105
131
|
for model_key, usages in model_usages.items():
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
132
|
+
model_rates = self.MODEL_COSTS[model_key]
|
133
|
+
model_cost = 0.0
|
134
|
+
model_total = 0
|
135
|
+
model_prompt = 0
|
136
|
+
model_completion = 0
|
137
|
+
|
138
|
+
for usage in usages:
|
139
|
+
# Base token costs
|
140
|
+
prompt_text_tokens = usage.prompt_tokens
|
141
|
+
if usage.prompt_cached_input_tokens:
|
142
|
+
prompt_text_tokens = (
|
143
|
+
usage.prompt_tokens - usage.prompt_cached_input_tokens
|
144
|
+
)
|
145
|
+
if usage.prompt_audio_tokens:
|
146
|
+
prompt_text_tokens = (
|
147
|
+
usage.prompt_tokens - usage.prompt_audio_tokens
|
148
|
+
)
|
149
|
+
|
150
|
+
completion_text_tokens = usage.completion_tokens
|
151
|
+
if usage.completion_audio_tokens:
|
152
|
+
completion_text_tokens = (
|
153
|
+
usage.completion_tokens - usage.completion_audio_tokens
|
154
|
+
)
|
155
|
+
|
156
|
+
prompt_cost = prompt_text_tokens * model_rates.prompt
|
157
|
+
completion_cost = completion_text_tokens * model_rates.completion
|
158
|
+
model_cost += prompt_cost + completion_cost
|
159
|
+
|
160
|
+
# Audio token costs
|
161
|
+
if usage.prompt_audio_tokens:
|
162
|
+
if model_rates.prompt_audio:
|
163
|
+
model_cost += (
|
164
|
+
usage.prompt_audio_tokens * model_rates.prompt_audio
|
165
|
+
)
|
166
|
+
else:
|
167
|
+
logger.warning(
|
168
|
+
f"Audio prompt tokens present for {model_key} but no audio rate defined"
|
169
|
+
)
|
170
|
+
|
171
|
+
if usage.completion_audio_tokens:
|
172
|
+
if model_rates.completion_audio:
|
173
|
+
model_cost += (
|
174
|
+
usage.completion_audio_tokens
|
175
|
+
* model_rates.completion_audio
|
176
|
+
)
|
177
|
+
else:
|
178
|
+
logger.warning(
|
179
|
+
f"Audio completion tokens present for {model_key} but no audio rate defined"
|
180
|
+
)
|
181
|
+
|
182
|
+
# Cached token costs
|
183
|
+
if usage.prompt_cached_input_tokens:
|
184
|
+
if model_rates.prompt_cached_input:
|
185
|
+
model_cost += (
|
186
|
+
usage.prompt_cached_input_tokens
|
187
|
+
* model_rates.prompt_cached_input
|
188
|
+
)
|
189
|
+
else:
|
190
|
+
logger.warning(
|
191
|
+
f"Cached input tokens present for {model_key} but no cache input rate defined"
|
192
|
+
)
|
193
|
+
|
194
|
+
if usage.prompt_cached_creation_tokens:
|
195
|
+
if model_rates.prompt_cached_creation:
|
196
|
+
model_cost += (
|
197
|
+
usage.prompt_cached_creation_tokens
|
198
|
+
* model_rates.prompt_cached_creation
|
199
|
+
)
|
200
|
+
else:
|
201
|
+
logger.warning(
|
202
|
+
f"Cached creation tokens present for {model_key} but no cache creation rate defined"
|
203
|
+
)
|
204
|
+
|
205
|
+
model_total += usage.total_tokens
|
206
|
+
model_prompt += usage.prompt_tokens
|
207
|
+
model_completion += usage.completion_tokens
|
114
208
|
|
115
209
|
models_list.append(
|
116
210
|
ModelUsage(
|
@@ -119,22 +213,124 @@ class TokenUsageService:
|
|
119
213
|
total_tokens=model_total,
|
120
214
|
prompt_tokens=model_prompt,
|
121
215
|
completion_tokens=model_completion,
|
216
|
+
prompt_tokens_details=PromptTokenDetails(
|
217
|
+
cached_input_tokens=sum(
|
218
|
+
u.prompt_cached_input_tokens or 0 for u in usages
|
219
|
+
),
|
220
|
+
cached_creation_tokens=sum(
|
221
|
+
u.prompt_cached_creation_tokens or 0 for u in usages
|
222
|
+
),
|
223
|
+
audio_tokens=sum(
|
224
|
+
u.prompt_audio_tokens or 0 for u in usages
|
225
|
+
),
|
226
|
+
)
|
227
|
+
if any(
|
228
|
+
u.prompt_cached_input_tokens
|
229
|
+
or u.prompt_cached_creation_tokens
|
230
|
+
or u.prompt_audio_tokens
|
231
|
+
for u in usages
|
232
|
+
)
|
233
|
+
else None,
|
234
|
+
completion_tokens_details=CompletionTokenDetails(
|
235
|
+
audio_tokens=sum(
|
236
|
+
u.completion_audio_tokens or 0 for u in usages
|
237
|
+
),
|
238
|
+
reasoning_tokens=sum(
|
239
|
+
u.completion_reasoning_tokens or 0 for u in usages
|
240
|
+
),
|
241
|
+
accepted_prediction_tokens=sum(
|
242
|
+
u.completion_accepted_prediction_tokens or 0
|
243
|
+
for u in usages
|
244
|
+
),
|
245
|
+
rejected_prediction_tokens=sum(
|
246
|
+
u.completion_rejected_prediction_tokens or 0
|
247
|
+
for u in usages
|
248
|
+
),
|
249
|
+
)
|
250
|
+
if any(
|
251
|
+
getattr(u, attr, None)
|
252
|
+
for u in usages
|
253
|
+
for attr in [
|
254
|
+
"completion_audio_tokens",
|
255
|
+
"completion_reasoning_tokens",
|
256
|
+
"completion_accepted_prediction_tokens",
|
257
|
+
"completion_rejected_prediction_tokens",
|
258
|
+
]
|
259
|
+
)
|
260
|
+
else None,
|
122
261
|
)
|
123
262
|
)
|
124
263
|
|
264
|
+
# Update provider metrics with all token types
|
125
265
|
provider_metrics["total_cost"] += model_cost
|
126
266
|
provider_metrics["total_tokens"] += model_total
|
127
267
|
provider_metrics["prompt_tokens"] += model_prompt
|
128
268
|
provider_metrics["completion_tokens"] += model_completion
|
269
|
+
provider_metrics["prompt_cached_input_tokens"] += sum(
|
270
|
+
u.prompt_cached_input_tokens or 0 for u in usages
|
271
|
+
)
|
272
|
+
provider_metrics["prompt_cached_creation_tokens"] += sum(
|
273
|
+
u.prompt_cached_creation_tokens or 0 for u in usages
|
274
|
+
)
|
275
|
+
provider_metrics["prompt_audio_tokens"] += sum(
|
276
|
+
u.prompt_audio_tokens or 0 for u in usages
|
277
|
+
)
|
278
|
+
provider_metrics["completion_audio_tokens"] += sum(
|
279
|
+
u.completion_audio_tokens or 0 for u in usages
|
280
|
+
)
|
281
|
+
provider_metrics["completion_reasoning_tokens"] += sum(
|
282
|
+
u.completion_reasoning_tokens or 0 for u in usages
|
283
|
+
)
|
284
|
+
provider_metrics["completion_accepted_prediction_tokens"] += sum(
|
285
|
+
u.completion_accepted_prediction_tokens or 0 for u in usages
|
286
|
+
)
|
287
|
+
provider_metrics["completion_rejected_prediction_tokens"] += sum(
|
288
|
+
u.completion_rejected_prediction_tokens or 0 for u in usages
|
289
|
+
)
|
129
290
|
|
130
291
|
providers_list.append(
|
131
292
|
ProviderUsage(
|
132
293
|
provider=provider,
|
133
294
|
models=models_list,
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
295
|
+
total_cost=round(provider_metrics["total_cost"], 6),
|
296
|
+
total_tokens=provider_metrics["total_tokens"],
|
297
|
+
prompt_tokens=provider_metrics["prompt_tokens"],
|
298
|
+
completion_tokens=provider_metrics["completion_tokens"],
|
299
|
+
prompt_tokens_details=PromptTokenDetails(
|
300
|
+
cached_input_tokens=provider_metrics[
|
301
|
+
"prompt_cached_input_tokens"
|
302
|
+
],
|
303
|
+
cached_creation_tokens=provider_metrics[
|
304
|
+
"prompt_cached_creation_tokens"
|
305
|
+
],
|
306
|
+
audio_tokens=provider_metrics["prompt_audio_tokens"],
|
307
|
+
)
|
308
|
+
if provider_metrics["prompt_cached_input_tokens"]
|
309
|
+
or provider_metrics["prompt_cached_creation_tokens"]
|
310
|
+
or provider_metrics["prompt_audio_tokens"]
|
311
|
+
else None,
|
312
|
+
completion_tokens_details=CompletionTokenDetails(
|
313
|
+
audio_tokens=provider_metrics["completion_audio_tokens"],
|
314
|
+
reasoning_tokens=provider_metrics[
|
315
|
+
"completion_reasoning_tokens"
|
316
|
+
],
|
317
|
+
accepted_prediction_tokens=provider_metrics[
|
318
|
+
"completion_accepted_prediction_tokens"
|
319
|
+
],
|
320
|
+
rejected_prediction_tokens=provider_metrics[
|
321
|
+
"completion_rejected_prediction_tokens"
|
322
|
+
],
|
323
|
+
)
|
324
|
+
if any(
|
325
|
+
provider_metrics[k]
|
326
|
+
for k in [
|
327
|
+
"completion_audio_tokens",
|
328
|
+
"completion_reasoning_tokens",
|
329
|
+
"completion_accepted_prediction_tokens",
|
330
|
+
"completion_rejected_prediction_tokens",
|
331
|
+
]
|
332
|
+
)
|
333
|
+
else None,
|
138
334
|
)
|
139
335
|
)
|
140
336
|
|
tokenator/utils.py
CHANGED
@@ -5,8 +5,21 @@ import platform
|
|
5
5
|
import logging
|
6
6
|
from pathlib import Path
|
7
7
|
|
8
|
+
|
8
9
|
logger = logging.getLogger(__name__)
|
9
10
|
|
11
|
+
def is_notebook() -> bool:
|
12
|
+
try:
|
13
|
+
from IPython import get_ipython # type: ignore
|
14
|
+
shell = get_ipython().__class__.__name__
|
15
|
+
if shell == 'ZMQInteractiveShell':
|
16
|
+
return True # Jupyter notebook or qtconsole
|
17
|
+
elif shell == 'TerminalInteractiveShell':
|
18
|
+
return False # Terminal running IPython
|
19
|
+
else:
|
20
|
+
return False # Other type (?)
|
21
|
+
except NameError:
|
22
|
+
return False
|
10
23
|
|
11
24
|
def is_colab() -> bool:
|
12
25
|
"""Check if running in Google Colab."""
|
@@ -21,7 +34,7 @@ def is_colab() -> bool:
|
|
21
34
|
def get_default_db_path() -> str:
|
22
35
|
"""Get the platform-specific default database path."""
|
23
36
|
try:
|
24
|
-
if is_colab():
|
37
|
+
if is_colab() or is_notebook():
|
25
38
|
# Use in-memory database for Colab
|
26
39
|
return "usage.db"
|
27
40
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: tokenator
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.15
|
4
4
|
Summary: Token usage tracking wrapper for LLMs
|
5
5
|
License: MIT
|
6
6
|
Author: Ujjwal Maheshwari
|
@@ -15,6 +15,7 @@ Classifier: Programming Language :: Python :: 3.12
|
|
15
15
|
Classifier: Programming Language :: Python :: 3.13
|
16
16
|
Requires-Dist: alembic (>=1.13.0,<2.0.0)
|
17
17
|
Requires-Dist: anthropic (>=0.43.0,<0.44.0)
|
18
|
+
Requires-Dist: ipython
|
18
19
|
Requires-Dist: openai (>=1.59.0,<2.0.0)
|
19
20
|
Requires-Dist: requests (>=2.32.3,<3.0.0)
|
20
21
|
Requires-Dist: sqlalchemy (>=2.0.0,<3.0.0)
|
@@ -22,7 +23,7 @@ Description-Content-Type: text/markdown
|
|
22
23
|
|
23
24
|
# Tokenator : Track and analyze LLM token usage and cost
|
24
25
|
|
25
|
-
Have you ever wondered
|
26
|
+
Have you ever wondered :
|
26
27
|
- How many tokens does your AI agent consume?
|
27
28
|
- How much does it cost to do run a complex AI workflow with multiple LLM providers?
|
28
29
|
- How much money/tokens did you spend today on developing with LLMs?
|
@@ -0,0 +1,21 @@
|
|
1
|
+
tokenator/__init__.py,sha256=AEPE73UGB_TeNLhro3eY0hU8yy6T-_6AyDls8vWApnE,465
|
2
|
+
tokenator/anthropic/client_anthropic.py,sha256=2oxTLb5-sPK_KL-OumCjE4wPVI8U_eFyRonn9XjGXJw,7196
|
3
|
+
tokenator/anthropic/stream_interceptors.py,sha256=4VHC_-WkG3Pa10YizmFLrHcbz0Tm2MR_YB5-uohKp5A,5221
|
4
|
+
tokenator/base_wrapper.py,sha256=EQ49xGduEp05-gj1xyZDasrck4RpComaoKslHxQTwuw,4956
|
5
|
+
tokenator/create_migrations.py,sha256=k9IHiGK21dLTA8MYNsuhO0-kUVIcMSViMFYtY4WU2Rw,730
|
6
|
+
tokenator/migrations/env.py,sha256=JoF5MJ4ae0wJW5kdBHuFlG3ZqeCCDvbMcU8fNA_a6hM,1396
|
7
|
+
tokenator/migrations/script.py.mako,sha256=nJL-tbLQE0Qy4P9S4r4ntNAcikPtoFUlvXe6xvm9ot8,635
|
8
|
+
tokenator/migrations/versions/f028b8155fed_adding_detailed_input_and_output_token_.py,sha256=WIZN5HdNRXlRdfpUJpJFaPD4G1s-SgRdTMQl4WDB-hA,2189
|
9
|
+
tokenator/migrations/versions/f6f1f2437513_initial_migration.py,sha256=4cveHkwSxs-hxOPCm81YfvGZTkJJ2ClAFmyL98-1VCo,1910
|
10
|
+
tokenator/migrations.py,sha256=YAf9gZmDzAq36PWWXPtdUQoJFYPXtIDzflC79H6gcJg,1114
|
11
|
+
tokenator/models.py,sha256=p4uoFqJYGMlygotxip_HZcfM16Jm4LoyFLFTsM1Z8a4,2132
|
12
|
+
tokenator/openai/client_openai.py,sha256=pbdJ-aZPuJs-7OT1VEv0DW36cCYbRAVKhSQEprxVIdY,9686
|
13
|
+
tokenator/openai/stream_interceptors.py,sha256=ez1MnjRZW_rEalv2SIPAvrU9oMD6OJoD9vht-057fDM,5243
|
14
|
+
tokenator/schemas.py,sha256=kBmShqgpQ3W-ILAP1NuCaFgqFplQM4OH0MmJteLqrwI,2371
|
15
|
+
tokenator/state.py,sha256=xdqDC-rlEA88-VgqQqHnAOXQ5pNTpnHcgOtohDIImPY,262
|
16
|
+
tokenator/usage.py,sha256=QaudrO6uwnMNRn9aCYVPj9yiQHmbdoAVZ9-G4Q1B0fw,20511
|
17
|
+
tokenator/utils.py,sha256=djoWmAhqH-O2Su3qIcuY-_3Vj1-qPwMcdzwq9IlwiDc,2435
|
18
|
+
tokenator-0.1.15.dist-info/LICENSE,sha256=wdG-B6-ODk8RQ4jq5uXSn0w1UWTzCH_MMyvh7AwtGns,1074
|
19
|
+
tokenator-0.1.15.dist-info/METADATA,sha256=dtws3Qwm2iZLCYZv0meqQP80Q49821HdyZgUmDeqDcg,6035
|
20
|
+
tokenator-0.1.15.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
|
21
|
+
tokenator-0.1.15.dist-info/RECORD,,
|
@@ -1,20 +0,0 @@
|
|
1
|
-
tokenator/__init__.py,sha256=AEPE73UGB_TeNLhro3eY0hU8yy6T-_6AyDls8vWApnE,465
|
2
|
-
tokenator/anthropic/client_anthropic.py,sha256=uWUrRId7vJlMG6hVKLUzaA3PoOT6mJwTqSRIhAidRFY,6163
|
3
|
-
tokenator/anthropic/stream_interceptors.py,sha256=4VHC_-WkG3Pa10YizmFLrHcbz0Tm2MR_YB5-uohKp5A,5221
|
4
|
-
tokenator/base_wrapper.py,sha256=UoS3cOuPa3HpuXPTawybvAtwufgZwzzKBj0BhyB-z6w,3160
|
5
|
-
tokenator/create_migrations.py,sha256=k9IHiGK21dLTA8MYNsuhO0-kUVIcMSViMFYtY4WU2Rw,730
|
6
|
-
tokenator/migrations/env.py,sha256=JoF5MJ4ae0wJW5kdBHuFlG3ZqeCCDvbMcU8fNA_a6hM,1396
|
7
|
-
tokenator/migrations/script.py.mako,sha256=nJL-tbLQE0Qy4P9S4r4ntNAcikPtoFUlvXe6xvm9ot8,635
|
8
|
-
tokenator/migrations/versions/f6f1f2437513_initial_migration.py,sha256=4cveHkwSxs-hxOPCm81YfvGZTkJJ2ClAFmyL98-1VCo,1910
|
9
|
-
tokenator/migrations.py,sha256=YAf9gZmDzAq36PWWXPtdUQoJFYPXtIDzflC79H6gcJg,1114
|
10
|
-
tokenator/models.py,sha256=AlNC5NVrycLg0LhDJIww9HXQ3lwM8CoKvRSqXU6iw-k,1225
|
11
|
-
tokenator/openai/client_openai.py,sha256=LhD1IbpzPXRK9eSqtcfUfoM9vBsyw6OHA0_a7N_tS9U,6230
|
12
|
-
tokenator/openai/stream_interceptors.py,sha256=ez1MnjRZW_rEalv2SIPAvrU9oMD6OJoD9vht-057fDM,5243
|
13
|
-
tokenator/schemas.py,sha256=zIgfmSsFJV9ziJdKrpV8p2P1f-BVWUVIpWoqCLpzhEU,2225
|
14
|
-
tokenator/state.py,sha256=xdqDC-rlEA88-VgqQqHnAOXQ5pNTpnHcgOtohDIImPY,262
|
15
|
-
tokenator/usage.py,sha256=ghnZ7pQuIxeI38O63xDAbEm6jOSmkYE7MChHBGPxbyM,11229
|
16
|
-
tokenator/utils.py,sha256=xg9l2GV1yJL1BlxKL1r8CboABWDslf3G5rGQEJSjFrE,1973
|
17
|
-
tokenator-0.1.14.dist-info/LICENSE,sha256=wdG-B6-ODk8RQ4jq5uXSn0w1UWTzCH_MMyvh7AwtGns,1074
|
18
|
-
tokenator-0.1.14.dist-info/METADATA,sha256=L93LfqCfqvhES92COaQZpX5w9_c2aDaX8pj2wT74Sxw,6018
|
19
|
-
tokenator-0.1.14.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
|
20
|
-
tokenator-0.1.14.dist-info/RECORD,,
|
File without changes
|
File without changes
|