tokenator 0.1.13__tar.gz → 0.1.15__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {tokenator-0.1.13 → tokenator-0.1.15}/PKG-INFO +5 -4
- {tokenator-0.1.13 → tokenator-0.1.15}/README.md +1 -1
- {tokenator-0.1.13 → tokenator-0.1.15}/pyproject.toml +4 -3
- {tokenator-0.1.13 → tokenator-0.1.15}/src/tokenator/__init__.py +2 -7
- {tokenator-0.1.13 → tokenator-0.1.15}/src/tokenator/anthropic/client_anthropic.py +51 -15
- tokenator-0.1.15/src/tokenator/base_wrapper.py +120 -0
- tokenator-0.1.15/src/tokenator/migrations/versions/f028b8155fed_adding_detailed_input_and_output_token_.py +64 -0
- tokenator-0.1.15/src/tokenator/models.py +63 -0
- {tokenator-0.1.13 → tokenator-0.1.15}/src/tokenator/openai/client_openai.py +97 -5
- {tokenator-0.1.13 → tokenator-0.1.15}/src/tokenator/schemas.py +21 -19
- tokenator-0.1.15/src/tokenator/state.py +12 -0
- tokenator-0.1.15/src/tokenator/usage.py +503 -0
- {tokenator-0.1.13 → tokenator-0.1.15}/src/tokenator/utils.py +14 -1
- tokenator-0.1.13/src/tokenator/base_wrapper.py +0 -74
- tokenator-0.1.13/src/tokenator/models.py +0 -42
- tokenator-0.1.13/src/tokenator/usage.py +0 -269
- {tokenator-0.1.13 → tokenator-0.1.15}/LICENSE +0 -0
- {tokenator-0.1.13 → tokenator-0.1.15}/src/tokenator/anthropic/stream_interceptors.py +0 -0
- {tokenator-0.1.13 → tokenator-0.1.15}/src/tokenator/create_migrations.py +0 -0
- {tokenator-0.1.13 → tokenator-0.1.15}/src/tokenator/migrations/env.py +0 -0
- {tokenator-0.1.13 → tokenator-0.1.15}/src/tokenator/migrations/script.py.mako +0 -0
- {tokenator-0.1.13 → tokenator-0.1.15}/src/tokenator/migrations/versions/f6f1f2437513_initial_migration.py +0 -0
- {tokenator-0.1.13 → tokenator-0.1.15}/src/tokenator/migrations.py +0 -0
- {tokenator-0.1.13 → tokenator-0.1.15}/src/tokenator/openai/stream_interceptors.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: tokenator
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.15
|
4
4
|
Summary: Token usage tracking wrapper for LLMs
|
5
5
|
License: MIT
|
6
6
|
Author: Ujjwal Maheshwari
|
@@ -14,15 +14,16 @@ Classifier: Programming Language :: Python :: 3.11
|
|
14
14
|
Classifier: Programming Language :: Python :: 3.12
|
15
15
|
Classifier: Programming Language :: Python :: 3.13
|
16
16
|
Requires-Dist: alembic (>=1.13.0,<2.0.0)
|
17
|
-
Requires-Dist: anthropic (>=0.
|
18
|
-
Requires-Dist:
|
17
|
+
Requires-Dist: anthropic (>=0.43.0,<0.44.0)
|
18
|
+
Requires-Dist: ipython
|
19
|
+
Requires-Dist: openai (>=1.59.0,<2.0.0)
|
19
20
|
Requires-Dist: requests (>=2.32.3,<3.0.0)
|
20
21
|
Requires-Dist: sqlalchemy (>=2.0.0,<3.0.0)
|
21
22
|
Description-Content-Type: text/markdown
|
22
23
|
|
23
24
|
# Tokenator : Track and analyze LLM token usage and cost
|
24
25
|
|
25
|
-
Have you ever wondered
|
26
|
+
Have you ever wondered :
|
26
27
|
- How many tokens does your AI agent consume?
|
27
28
|
- How much does it cost to do run a complex AI workflow with multiple LLM providers?
|
28
29
|
- How much money/tokens did you spend today on developing with LLMs?
|
@@ -1,6 +1,6 @@
|
|
1
1
|
# Tokenator : Track and analyze LLM token usage and cost
|
2
2
|
|
3
|
-
Have you ever wondered
|
3
|
+
Have you ever wondered :
|
4
4
|
- How many tokens does your AI agent consume?
|
5
5
|
- How much does it cost to do run a complex AI workflow with multiple LLM providers?
|
6
6
|
- How much money/tokens did you spend today on developing with LLMs?
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "tokenator"
|
3
|
-
version = "0.1.
|
3
|
+
version = "0.1.15"
|
4
4
|
description = "Token usage tracking wrapper for LLMs"
|
5
5
|
authors = ["Ujjwal Maheshwari <your.email@example.com>"]
|
6
6
|
readme = "README.md"
|
@@ -9,11 +9,12 @@ packages = [{include = "tokenator", from = "src"}]
|
|
9
9
|
|
10
10
|
[tool.poetry.dependencies]
|
11
11
|
python = "^3.9"
|
12
|
-
openai = "^1.
|
12
|
+
openai = "^1.59.0"
|
13
13
|
sqlalchemy = "^2.0.0"
|
14
14
|
requests = "^2.32.3"
|
15
15
|
alembic = "^1.13.0"
|
16
|
-
anthropic = "^0.
|
16
|
+
anthropic = "^0.43.0"
|
17
|
+
ipython = "*"
|
17
18
|
|
18
19
|
[tool.poetry.group.dev.dependencies]
|
19
20
|
pytest = "^8.0.0"
|
@@ -5,14 +5,9 @@ from .openai.client_openai import tokenator_openai
|
|
5
5
|
from .anthropic.client_anthropic import tokenator_anthropic
|
6
6
|
from . import usage
|
7
7
|
from .utils import get_default_db_path
|
8
|
-
from .
|
8
|
+
from .usage import TokenUsageService
|
9
9
|
|
10
|
-
|
10
|
+
usage = TokenUsageService() # noqa: F811
|
11
11
|
__all__ = ["tokenator_openai", "tokenator_anthropic", "usage", "get_default_db_path"]
|
12
12
|
|
13
13
|
logger = logging.getLogger(__name__)
|
14
|
-
|
15
|
-
try:
|
16
|
-
check_and_run_migrations()
|
17
|
-
except Exception as e:
|
18
|
-
logger.warning(f"Failed to run migrations, but continuing anyway: {e}")
|
@@ -6,9 +6,13 @@ import logging
|
|
6
6
|
from anthropic import Anthropic, AsyncAnthropic
|
7
7
|
from anthropic.types import Message, RawMessageStartEvent, RawMessageDeltaEvent
|
8
8
|
|
9
|
-
from ..models import
|
9
|
+
from ..models import PromptTokenDetails, TokenMetrics, TokenUsageStats
|
10
10
|
from ..base_wrapper import BaseWrapper, ResponseType
|
11
|
-
from .stream_interceptors import
|
11
|
+
from .stream_interceptors import (
|
12
|
+
AnthropicAsyncStreamInterceptor,
|
13
|
+
AnthropicSyncStreamInterceptor,
|
14
|
+
)
|
15
|
+
from ..state import is_tokenator_enabled
|
12
16
|
|
13
17
|
logger = logging.getLogger(__name__)
|
14
18
|
|
@@ -24,28 +28,46 @@ class BaseAnthropicWrapper(BaseWrapper):
|
|
24
28
|
if isinstance(response, Message):
|
25
29
|
if not hasattr(response, "usage"):
|
26
30
|
return None
|
27
|
-
usage =
|
28
|
-
prompt_tokens=response.usage.input_tokens
|
31
|
+
usage = TokenMetrics(
|
32
|
+
prompt_tokens=response.usage.input_tokens
|
33
|
+
+ (getattr(response.usage, "cache_creation_input_tokens", 0) or 0),
|
29
34
|
completion_tokens=response.usage.output_tokens,
|
30
35
|
total_tokens=response.usage.input_tokens
|
31
36
|
+ response.usage.output_tokens,
|
37
|
+
prompt_tokens_details=PromptTokenDetails(
|
38
|
+
cached_input_tokens=getattr(
|
39
|
+
response.usage, "cache_read_input_tokens", None
|
40
|
+
),
|
41
|
+
cached_creation_tokens=getattr(
|
42
|
+
response.usage, "cache_creation_input_tokens", None
|
43
|
+
),
|
44
|
+
),
|
32
45
|
)
|
33
46
|
return TokenUsageStats(model=response.model, usage=usage)
|
34
47
|
elif isinstance(response, dict):
|
35
48
|
usage_dict = response.get("usage")
|
36
49
|
if not usage_dict:
|
37
50
|
return None
|
38
|
-
usage =
|
39
|
-
prompt_tokens=usage_dict.get("input_tokens", 0)
|
51
|
+
usage = TokenMetrics(
|
52
|
+
prompt_tokens=usage_dict.get("input_tokens", 0)
|
53
|
+
+ (getattr(usage_dict, "cache_creation_input_tokens", 0) or 0),
|
40
54
|
completion_tokens=usage_dict.get("output_tokens", 0),
|
41
55
|
total_tokens=usage_dict.get("input_tokens", 0)
|
42
56
|
+ usage_dict.get("output_tokens", 0),
|
57
|
+
prompt_tokens_details=PromptTokenDetails(
|
58
|
+
cached_input_tokens=getattr(
|
59
|
+
usage_dict, "cache_read_input_tokens", None
|
60
|
+
),
|
61
|
+
cached_creation_tokens=getattr(
|
62
|
+
usage_dict, "cache_creation_input_tokens", None
|
63
|
+
),
|
64
|
+
),
|
43
65
|
)
|
44
66
|
return TokenUsageStats(
|
45
67
|
model=response.get("model", "unknown"), usage=usage
|
46
68
|
)
|
47
69
|
except Exception as e:
|
48
|
-
logger.warning("Failed to process usage stats: %s", str(e))
|
70
|
+
logger.warning("Failed to process usage stats: %s", str(e), exc_info=True)
|
49
71
|
return None
|
50
72
|
return None
|
51
73
|
|
@@ -56,15 +78,23 @@ class BaseAnthropicWrapper(BaseWrapper):
|
|
56
78
|
|
57
79
|
def _create_usage_callback(execution_id, log_usage_fn):
|
58
80
|
"""Creates a callback function for processing usage statistics from stream chunks."""
|
81
|
+
|
59
82
|
def usage_callback(chunks):
|
60
83
|
if not chunks:
|
61
84
|
return
|
62
|
-
|
85
|
+
|
86
|
+
# Skip if tokenator is disabled
|
87
|
+
if not is_tokenator_enabled:
|
88
|
+
logger.debug("Tokenator is disabled - skipping stream usage logging")
|
89
|
+
return
|
90
|
+
|
63
91
|
usage_data = TokenUsageStats(
|
64
|
-
model=chunks[0].message.model
|
65
|
-
|
92
|
+
model=chunks[0].message.model
|
93
|
+
if isinstance(chunks[0], RawMessageStartEvent)
|
94
|
+
else "",
|
95
|
+
usage=TokenMetrics(),
|
66
96
|
)
|
67
|
-
|
97
|
+
|
68
98
|
for chunk in chunks:
|
69
99
|
if isinstance(chunk, RawMessageStartEvent):
|
70
100
|
usage_data.model = chunk.message.model
|
@@ -72,8 +102,10 @@ def _create_usage_callback(execution_id, log_usage_fn):
|
|
72
102
|
usage_data.usage.completion_tokens += chunk.message.usage.output_tokens
|
73
103
|
elif isinstance(chunk, RawMessageDeltaEvent):
|
74
104
|
usage_data.usage.completion_tokens += chunk.usage.output_tokens
|
75
|
-
|
76
|
-
usage_data.usage.total_tokens =
|
105
|
+
|
106
|
+
usage_data.usage.total_tokens = (
|
107
|
+
usage_data.usage.prompt_tokens + usage_data.usage.completion_tokens
|
108
|
+
)
|
77
109
|
log_usage_fn(usage_data, execution_id=execution_id)
|
78
110
|
|
79
111
|
return usage_callback
|
@@ -84,7 +116,9 @@ class AnthropicWrapper(BaseAnthropicWrapper):
|
|
84
116
|
self, *args: Any, execution_id: Optional[str] = None, **kwargs: Any
|
85
117
|
) -> Union[Message, Iterator[Message]]:
|
86
118
|
"""Create a message completion and log token usage."""
|
87
|
-
logger.debug(
|
119
|
+
logger.debug(
|
120
|
+
"Creating message completion with args: %s, kwargs: %s", args, kwargs
|
121
|
+
)
|
88
122
|
|
89
123
|
if kwargs.get("stream", False):
|
90
124
|
base_stream = self.client.messages.create(*args, **kwargs)
|
@@ -105,7 +139,9 @@ class AsyncAnthropicWrapper(BaseAnthropicWrapper):
|
|
105
139
|
self, *args: Any, execution_id: Optional[str] = None, **kwargs: Any
|
106
140
|
) -> Union[Message, AsyncIterator[Message]]:
|
107
141
|
"""Create a message completion and log token usage."""
|
108
|
-
logger.debug(
|
142
|
+
logger.debug(
|
143
|
+
"Creating message completion with args: %s, kwargs: %s", args, kwargs
|
144
|
+
)
|
109
145
|
|
110
146
|
if kwargs.get("stream", False):
|
111
147
|
base_stream = await self.client.messages.create(*args, **kwargs)
|
@@ -0,0 +1,120 @@
|
|
1
|
+
"""Base wrapper class for token usage tracking."""
|
2
|
+
|
3
|
+
from pathlib import Path
|
4
|
+
from typing import Any, Optional, TypeVar
|
5
|
+
import logging
|
6
|
+
import uuid
|
7
|
+
|
8
|
+
from .models import TokenUsageStats
|
9
|
+
from .schemas import get_session, TokenUsage
|
10
|
+
from . import state
|
11
|
+
|
12
|
+
from .migrations import check_and_run_migrations
|
13
|
+
|
14
|
+
logger = logging.getLogger(__name__)
|
15
|
+
|
16
|
+
ResponseType = TypeVar("ResponseType")
|
17
|
+
|
18
|
+
|
19
|
+
class BaseWrapper:
|
20
|
+
def __init__(self, client: Any, db_path: Optional[str] = None):
|
21
|
+
"""Initialize the base wrapper."""
|
22
|
+
state.is_tokenator_enabled = True
|
23
|
+
try:
|
24
|
+
self.client = client
|
25
|
+
|
26
|
+
if db_path:
|
27
|
+
Path(db_path).parent.mkdir(parents=True, exist_ok=True)
|
28
|
+
logger.info("Created database directory at: %s", Path(db_path).parent)
|
29
|
+
state.db_path = db_path # Store db_path in state
|
30
|
+
|
31
|
+
else:
|
32
|
+
state.db_path = None # Use default path
|
33
|
+
|
34
|
+
self.Session = get_session()
|
35
|
+
|
36
|
+
logger.debug(
|
37
|
+
"Initializing %s with db_path: %s", self.__class__.__name__, db_path
|
38
|
+
)
|
39
|
+
|
40
|
+
check_and_run_migrations(db_path)
|
41
|
+
except Exception as e:
|
42
|
+
state.is_tokenator_enabled = False
|
43
|
+
logger.warning(
|
44
|
+
f"Tokenator initialization failed. Usage tracking will be disabled. Error: {e}"
|
45
|
+
)
|
46
|
+
|
47
|
+
def _log_usage_impl(
|
48
|
+
self, token_usage_stats: TokenUsageStats, session, execution_id: str
|
49
|
+
) -> None:
|
50
|
+
"""Implementation of token usage logging."""
|
51
|
+
logger.debug(
|
52
|
+
"Logging usage for model %s: %s",
|
53
|
+
token_usage_stats.model,
|
54
|
+
token_usage_stats.usage.model_dump(),
|
55
|
+
)
|
56
|
+
try:
|
57
|
+
token_usage = TokenUsage(
|
58
|
+
execution_id=execution_id,
|
59
|
+
provider=self.provider,
|
60
|
+
model=token_usage_stats.model,
|
61
|
+
total_cost=0, # This needs to be calculated based on your rates
|
62
|
+
prompt_tokens=token_usage_stats.usage.prompt_tokens,
|
63
|
+
completion_tokens=token_usage_stats.usage.completion_tokens,
|
64
|
+
total_tokens=token_usage_stats.usage.total_tokens,
|
65
|
+
# Prompt details
|
66
|
+
prompt_cached_input_tokens=token_usage_stats.usage.prompt_tokens_details.cached_input_tokens
|
67
|
+
if token_usage_stats.usage.prompt_tokens_details
|
68
|
+
else None,
|
69
|
+
prompt_cached_creation_tokens=token_usage_stats.usage.prompt_tokens_details.cached_creation_tokens
|
70
|
+
if token_usage_stats.usage.prompt_tokens_details
|
71
|
+
else None,
|
72
|
+
prompt_audio_tokens=token_usage_stats.usage.prompt_tokens_details.audio_tokens
|
73
|
+
if token_usage_stats.usage.prompt_tokens_details
|
74
|
+
else None,
|
75
|
+
# Completion details
|
76
|
+
completion_audio_tokens=token_usage_stats.usage.completion_tokens_details.audio_tokens
|
77
|
+
if token_usage_stats.usage.completion_tokens_details
|
78
|
+
else None,
|
79
|
+
completion_reasoning_tokens=token_usage_stats.usage.completion_tokens_details.reasoning_tokens
|
80
|
+
if token_usage_stats.usage.completion_tokens_details
|
81
|
+
else None,
|
82
|
+
completion_accepted_prediction_tokens=token_usage_stats.usage.completion_tokens_details.accepted_prediction_tokens
|
83
|
+
if token_usage_stats.usage.completion_tokens_details
|
84
|
+
else None,
|
85
|
+
completion_rejected_prediction_tokens=token_usage_stats.usage.completion_tokens_details.rejected_prediction_tokens
|
86
|
+
if token_usage_stats.usage.completion_tokens_details
|
87
|
+
else None,
|
88
|
+
)
|
89
|
+
session.add(token_usage)
|
90
|
+
logger.debug(
|
91
|
+
"Logged token usage: model=%s, total_tokens=%d",
|
92
|
+
token_usage_stats.model,
|
93
|
+
token_usage_stats.usage.total_tokens,
|
94
|
+
)
|
95
|
+
except Exception as e:
|
96
|
+
logger.error("Failed to log token usage: %s", str(e))
|
97
|
+
|
98
|
+
def _log_usage(
|
99
|
+
self, token_usage_stats: TokenUsageStats, execution_id: Optional[str] = None
|
100
|
+
):
|
101
|
+
"""Log token usage to database."""
|
102
|
+
if not state.is_tokenator_enabled:
|
103
|
+
logger.debug("Tokenator is disabled - skipping usage logging")
|
104
|
+
return
|
105
|
+
|
106
|
+
if not execution_id:
|
107
|
+
execution_id = str(uuid.uuid4())
|
108
|
+
|
109
|
+
logger.debug("Starting token usage logging for execution_id: %s", execution_id)
|
110
|
+
session = self.Session()
|
111
|
+
try:
|
112
|
+
try:
|
113
|
+
self._log_usage_impl(token_usage_stats, session, execution_id)
|
114
|
+
session.commit()
|
115
|
+
logger.debug("Successfully committed token usage for execution_id: %s", execution_id)
|
116
|
+
except Exception as e:
|
117
|
+
logger.error("Failed to log token usage: %s", str(e))
|
118
|
+
session.rollback()
|
119
|
+
finally:
|
120
|
+
session.close()
|
@@ -0,0 +1,64 @@
|
|
1
|
+
"""Adding detailed input and output token schema
|
2
|
+
|
3
|
+
Revision ID: f028b8155fed
|
4
|
+
Revises: f6f1f2437513
|
5
|
+
Create Date: 2025-01-19 15:41:12.715623
|
6
|
+
|
7
|
+
"""
|
8
|
+
|
9
|
+
from typing import Sequence, Union
|
10
|
+
|
11
|
+
from alembic import op
|
12
|
+
import sqlalchemy as sa
|
13
|
+
|
14
|
+
|
15
|
+
# revision identifiers, used by Alembic.
|
16
|
+
revision: str = "f028b8155fed"
|
17
|
+
down_revision: Union[str, None] = "f6f1f2437513"
|
18
|
+
branch_labels: Union[str, Sequence[str], None] = None
|
19
|
+
depends_on: Union[str, Sequence[str], None] = None
|
20
|
+
|
21
|
+
|
22
|
+
def upgrade() -> None:
|
23
|
+
# ### commands auto generated by Alembic - please adjust! ###
|
24
|
+
op.add_column("token_usage", sa.Column("total_cost", sa.Integer(), nullable=False))
|
25
|
+
op.add_column(
|
26
|
+
"token_usage",
|
27
|
+
sa.Column("prompt_cached_input_tokens", sa.Integer(), nullable=True),
|
28
|
+
)
|
29
|
+
op.add_column(
|
30
|
+
"token_usage",
|
31
|
+
sa.Column("prompt_cached_creation_tokens", sa.Integer(), nullable=True),
|
32
|
+
)
|
33
|
+
op.add_column(
|
34
|
+
"token_usage", sa.Column("prompt_audio_tokens", sa.Integer(), nullable=True)
|
35
|
+
)
|
36
|
+
op.add_column(
|
37
|
+
"token_usage", sa.Column("completion_audio_tokens", sa.Integer(), nullable=True)
|
38
|
+
)
|
39
|
+
op.add_column(
|
40
|
+
"token_usage",
|
41
|
+
sa.Column("completion_reasoning_tokens", sa.Integer(), nullable=True),
|
42
|
+
)
|
43
|
+
op.add_column(
|
44
|
+
"token_usage",
|
45
|
+
sa.Column("completion_accepted_prediction_tokens", sa.Integer(), nullable=True),
|
46
|
+
)
|
47
|
+
op.add_column(
|
48
|
+
"token_usage",
|
49
|
+
sa.Column("completion_rejected_prediction_tokens", sa.Integer(), nullable=True),
|
50
|
+
)
|
51
|
+
# ### end Alembic commands ###
|
52
|
+
|
53
|
+
|
54
|
+
def downgrade() -> None:
|
55
|
+
# ### commands auto generated by Alembic - please adjust! ###
|
56
|
+
op.drop_column("token_usage", "completion_rejected_prediction_tokens")
|
57
|
+
op.drop_column("token_usage", "completion_accepted_prediction_tokens")
|
58
|
+
op.drop_column("token_usage", "completion_reasoning_tokens")
|
59
|
+
op.drop_column("token_usage", "completion_audio_tokens")
|
60
|
+
op.drop_column("token_usage", "prompt_audio_tokens")
|
61
|
+
op.drop_column("token_usage", "prompt_cached_creation_tokens")
|
62
|
+
op.drop_column("token_usage", "prompt_cached_input_tokens")
|
63
|
+
op.drop_column("token_usage", "total_cost")
|
64
|
+
# ### end Alembic commands ###
|
@@ -0,0 +1,63 @@
|
|
1
|
+
from pydantic import BaseModel, Field
|
2
|
+
from typing import List, Optional
|
3
|
+
|
4
|
+
|
5
|
+
class TokenRate(BaseModel):
|
6
|
+
prompt: float = Field(..., description="Cost per prompt token")
|
7
|
+
completion: float = Field(..., description="Cost per completion token")
|
8
|
+
prompt_audio: Optional[float] = Field(
|
9
|
+
None, description="Cost per audio prompt token"
|
10
|
+
)
|
11
|
+
completion_audio: Optional[float] = Field(
|
12
|
+
None, description="Cost per audio completion token"
|
13
|
+
)
|
14
|
+
prompt_cached_input: Optional[float] = Field(
|
15
|
+
None, description="Cost per cached prompt input token"
|
16
|
+
)
|
17
|
+
prompt_cached_creation: Optional[float] = Field(
|
18
|
+
None, description="Cost per cached prompt creation token"
|
19
|
+
)
|
20
|
+
|
21
|
+
|
22
|
+
class PromptTokenDetails(BaseModel):
|
23
|
+
cached_input_tokens: Optional[int] = None
|
24
|
+
cached_creation_tokens: Optional[int] = None
|
25
|
+
audio_tokens: Optional[int] = None
|
26
|
+
|
27
|
+
|
28
|
+
class CompletionTokenDetails(BaseModel):
|
29
|
+
reasoning_tokens: Optional[int] = None
|
30
|
+
audio_tokens: Optional[int] = None
|
31
|
+
accepted_prediction_tokens: Optional[int] = None
|
32
|
+
rejected_prediction_tokens: Optional[int] = None
|
33
|
+
|
34
|
+
|
35
|
+
class TokenMetrics(BaseModel):
|
36
|
+
total_cost: float = Field(default=0, description="Total cost in USD")
|
37
|
+
total_tokens: int = Field(default=0, description="Total tokens used")
|
38
|
+
prompt_tokens: int = Field(default=0, description="Number of prompt tokens")
|
39
|
+
completion_tokens: int = Field(default=0, description="Number of completion tokens")
|
40
|
+
prompt_tokens_details: Optional[PromptTokenDetails] = None
|
41
|
+
completion_tokens_details: Optional[CompletionTokenDetails] = None
|
42
|
+
|
43
|
+
|
44
|
+
class ModelUsage(TokenMetrics):
|
45
|
+
model: str = Field(..., description="Model name")
|
46
|
+
|
47
|
+
|
48
|
+
class ProviderUsage(TokenMetrics):
|
49
|
+
provider: str = Field(..., description="Provider name")
|
50
|
+
models: List[ModelUsage] = Field(
|
51
|
+
default_factory=list, description="Usage breakdown by model"
|
52
|
+
)
|
53
|
+
|
54
|
+
|
55
|
+
class TokenUsageReport(TokenMetrics):
|
56
|
+
providers: List[ProviderUsage] = Field(
|
57
|
+
default_factory=list, description="Usage breakdown by provider"
|
58
|
+
)
|
59
|
+
|
60
|
+
|
61
|
+
class TokenUsageStats(BaseModel):
|
62
|
+
model: str
|
63
|
+
usage: TokenMetrics
|
@@ -6,9 +6,18 @@ import logging
|
|
6
6
|
from openai import AsyncOpenAI, OpenAI
|
7
7
|
from openai.types.chat import ChatCompletion, ChatCompletionChunk
|
8
8
|
|
9
|
-
from ..models import
|
9
|
+
from ..models import (
|
10
|
+
TokenMetrics,
|
11
|
+
TokenUsageStats,
|
12
|
+
PromptTokenDetails,
|
13
|
+
CompletionTokenDetails,
|
14
|
+
)
|
10
15
|
from ..base_wrapper import BaseWrapper, ResponseType
|
11
|
-
from .stream_interceptors import
|
16
|
+
from .stream_interceptors import (
|
17
|
+
OpenAIAsyncStreamInterceptor,
|
18
|
+
OpenAISyncStreamInterceptor,
|
19
|
+
)
|
20
|
+
from ..state import is_tokenator_enabled
|
12
21
|
|
13
22
|
logger = logging.getLogger(__name__)
|
14
23
|
|
@@ -26,18 +35,49 @@ class BaseOpenAIWrapper(BaseWrapper):
|
|
26
35
|
if isinstance(response, ChatCompletion):
|
27
36
|
if response.usage is None:
|
28
37
|
return None
|
29
|
-
usage =
|
38
|
+
usage = TokenMetrics(
|
30
39
|
prompt_tokens=response.usage.prompt_tokens,
|
31
40
|
completion_tokens=response.usage.completion_tokens,
|
32
41
|
total_tokens=response.usage.total_tokens,
|
42
|
+
prompt_tokens_details=PromptTokenDetails(
|
43
|
+
cached_input_tokens=getattr(
|
44
|
+
response.usage.prompt_tokens_details, "cached_tokens", None
|
45
|
+
),
|
46
|
+
audio_tokens=getattr(
|
47
|
+
response.usage.prompt_tokens_details, "audio_tokens", None
|
48
|
+
),
|
49
|
+
),
|
50
|
+
completion_tokens_details=CompletionTokenDetails(
|
51
|
+
reasoning_tokens=getattr(
|
52
|
+
response.usage.completion_tokens_details,
|
53
|
+
"reasoning_tokens",
|
54
|
+
None,
|
55
|
+
),
|
56
|
+
audio_tokens=getattr(
|
57
|
+
response.usage.completion_tokens_details,
|
58
|
+
"audio_tokens",
|
59
|
+
None,
|
60
|
+
),
|
61
|
+
accepted_prediction_tokens=getattr(
|
62
|
+
response.usage.completion_tokens_details,
|
63
|
+
"accepted_prediction_tokens",
|
64
|
+
None,
|
65
|
+
),
|
66
|
+
rejected_prediction_tokens=getattr(
|
67
|
+
response.usage.completion_tokens_details,
|
68
|
+
"rejected_prediction_tokens",
|
69
|
+
None,
|
70
|
+
),
|
71
|
+
),
|
33
72
|
)
|
73
|
+
|
34
74
|
return TokenUsageStats(model=response.model, usage=usage)
|
35
75
|
|
36
76
|
elif isinstance(response, dict):
|
37
77
|
usage_dict = response.get("usage")
|
38
78
|
if not usage_dict:
|
39
79
|
return None
|
40
|
-
usage =
|
80
|
+
usage = TokenMetrics(
|
41
81
|
prompt_tokens=usage_dict.get("prompt_tokens", 0),
|
42
82
|
completion_tokens=usage_dict.get("completion_tokens", 0),
|
43
83
|
total_tokens=usage_dict.get("total_tokens", 0),
|
@@ -58,6 +98,10 @@ class BaseOpenAIWrapper(BaseWrapper):
|
|
58
98
|
def completions(self):
|
59
99
|
return self
|
60
100
|
|
101
|
+
@property
|
102
|
+
def beta(self):
|
103
|
+
return self
|
104
|
+
|
61
105
|
|
62
106
|
def _create_usage_callback(execution_id, log_usage_fn):
|
63
107
|
"""Creates a callback function for processing usage statistics from stream chunks."""
|
@@ -65,10 +109,18 @@ def _create_usage_callback(execution_id, log_usage_fn):
|
|
65
109
|
def usage_callback(chunks):
|
66
110
|
if not chunks:
|
67
111
|
return
|
112
|
+
|
113
|
+
# Skip if tokenator is disabled
|
114
|
+
if not is_tokenator_enabled:
|
115
|
+
logger.debug("Tokenator is disabled - skipping stream usage logging")
|
116
|
+
return
|
117
|
+
|
118
|
+
logger.debug("Processing stream usage for execution_id: %s", execution_id)
|
119
|
+
|
68
120
|
# Build usage_data from the first chunk's model
|
69
121
|
usage_data = TokenUsageStats(
|
70
122
|
model=chunks[0].model,
|
71
|
-
usage=
|
123
|
+
usage=TokenMetrics(),
|
72
124
|
)
|
73
125
|
# Sum up usage from all chunks
|
74
126
|
has_usage = False
|
@@ -106,6 +158,26 @@ class OpenAIWrapper(BaseOpenAIWrapper):
|
|
106
158
|
|
107
159
|
return response
|
108
160
|
|
161
|
+
def parse(
|
162
|
+
self, *args: Any, execution_id: Optional[str] = None, **kwargs: Any
|
163
|
+
) -> Union[ChatCompletion, Iterator[ChatCompletion]]:
|
164
|
+
"""Create a chat completion parse and log token usage."""
|
165
|
+
logger.debug("Creating chat completion with args: %s, kwargs: %s", args, kwargs)
|
166
|
+
|
167
|
+
if kwargs.get("stream", False):
|
168
|
+
base_stream = self.client.beta.chat.completions.parse(*args, **kwargs)
|
169
|
+
return OpenAISyncStreamInterceptor(
|
170
|
+
base_stream=base_stream,
|
171
|
+
usage_callback=_create_usage_callback(execution_id, self._log_usage),
|
172
|
+
)
|
173
|
+
|
174
|
+
response = self.client.beta.chat.completions.parse(*args, **kwargs)
|
175
|
+
usage_data = self._process_response_usage(response)
|
176
|
+
if usage_data:
|
177
|
+
self._log_usage(usage_data, execution_id=execution_id)
|
178
|
+
|
179
|
+
return response
|
180
|
+
|
109
181
|
|
110
182
|
class AsyncOpenAIWrapper(BaseOpenAIWrapper):
|
111
183
|
async def create(
|
@@ -131,6 +203,26 @@ class AsyncOpenAIWrapper(BaseOpenAIWrapper):
|
|
131
203
|
self._log_usage(usage_data, execution_id=execution_id)
|
132
204
|
return response
|
133
205
|
|
206
|
+
async def parse(
|
207
|
+
self, *args: Any, execution_id: Optional[str] = None, **kwargs: Any
|
208
|
+
) -> Union[ChatCompletion, AsyncIterator[ChatCompletionChunk]]:
|
209
|
+
"""Create a chat completion parse and log token usage."""
|
210
|
+
logger.debug("Creating chat completion with args: %s, kwargs: %s", args, kwargs)
|
211
|
+
|
212
|
+
if kwargs.get("stream", False):
|
213
|
+
base_stream = await self.client.beta.chat.completions.parse(*args, **kwargs)
|
214
|
+
return OpenAIAsyncStreamInterceptor(
|
215
|
+
base_stream=base_stream,
|
216
|
+
usage_callback=_create_usage_callback(execution_id, self._log_usage),
|
217
|
+
)
|
218
|
+
|
219
|
+
response = await self.client.beta.chat.completions.parse(*args, **kwargs)
|
220
|
+
usage_data = self._process_response_usage(response)
|
221
|
+
if usage_data:
|
222
|
+
self._log_usage(usage_data, execution_id=execution_id)
|
223
|
+
|
224
|
+
return response
|
225
|
+
|
134
226
|
|
135
227
|
@overload
|
136
228
|
def tokenator_openai(
|