tokenator 0.1.13__tar.gz → 0.1.15__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. {tokenator-0.1.13 → tokenator-0.1.15}/PKG-INFO +5 -4
  2. {tokenator-0.1.13 → tokenator-0.1.15}/README.md +1 -1
  3. {tokenator-0.1.13 → tokenator-0.1.15}/pyproject.toml +4 -3
  4. {tokenator-0.1.13 → tokenator-0.1.15}/src/tokenator/__init__.py +2 -7
  5. {tokenator-0.1.13 → tokenator-0.1.15}/src/tokenator/anthropic/client_anthropic.py +51 -15
  6. tokenator-0.1.15/src/tokenator/base_wrapper.py +120 -0
  7. tokenator-0.1.15/src/tokenator/migrations/versions/f028b8155fed_adding_detailed_input_and_output_token_.py +64 -0
  8. tokenator-0.1.15/src/tokenator/models.py +63 -0
  9. {tokenator-0.1.13 → tokenator-0.1.15}/src/tokenator/openai/client_openai.py +97 -5
  10. {tokenator-0.1.13 → tokenator-0.1.15}/src/tokenator/schemas.py +21 -19
  11. tokenator-0.1.15/src/tokenator/state.py +12 -0
  12. tokenator-0.1.15/src/tokenator/usage.py +503 -0
  13. {tokenator-0.1.13 → tokenator-0.1.15}/src/tokenator/utils.py +14 -1
  14. tokenator-0.1.13/src/tokenator/base_wrapper.py +0 -74
  15. tokenator-0.1.13/src/tokenator/models.py +0 -42
  16. tokenator-0.1.13/src/tokenator/usage.py +0 -269
  17. {tokenator-0.1.13 → tokenator-0.1.15}/LICENSE +0 -0
  18. {tokenator-0.1.13 → tokenator-0.1.15}/src/tokenator/anthropic/stream_interceptors.py +0 -0
  19. {tokenator-0.1.13 → tokenator-0.1.15}/src/tokenator/create_migrations.py +0 -0
  20. {tokenator-0.1.13 → tokenator-0.1.15}/src/tokenator/migrations/env.py +0 -0
  21. {tokenator-0.1.13 → tokenator-0.1.15}/src/tokenator/migrations/script.py.mako +0 -0
  22. {tokenator-0.1.13 → tokenator-0.1.15}/src/tokenator/migrations/versions/f6f1f2437513_initial_migration.py +0 -0
  23. {tokenator-0.1.13 → tokenator-0.1.15}/src/tokenator/migrations.py +0 -0
  24. {tokenator-0.1.13 → tokenator-0.1.15}/src/tokenator/openai/stream_interceptors.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: tokenator
3
- Version: 0.1.13
3
+ Version: 0.1.15
4
4
  Summary: Token usage tracking wrapper for LLMs
5
5
  License: MIT
6
6
  Author: Ujjwal Maheshwari
@@ -14,15 +14,16 @@ Classifier: Programming Language :: Python :: 3.11
14
14
  Classifier: Programming Language :: Python :: 3.12
15
15
  Classifier: Programming Language :: Python :: 3.13
16
16
  Requires-Dist: alembic (>=1.13.0,<2.0.0)
17
- Requires-Dist: anthropic (>=0.40.0,<0.41.0)
18
- Requires-Dist: openai (>=1.57.0,<2.0.0)
17
+ Requires-Dist: anthropic (>=0.43.0,<0.44.0)
18
+ Requires-Dist: ipython
19
+ Requires-Dist: openai (>=1.59.0,<2.0.0)
19
20
  Requires-Dist: requests (>=2.32.3,<3.0.0)
20
21
  Requires-Dist: sqlalchemy (>=2.0.0,<3.0.0)
21
22
  Description-Content-Type: text/markdown
22
23
 
23
24
  # Tokenator : Track and analyze LLM token usage and cost
24
25
 
25
- Have you ever wondered about :
26
+ Have you ever wondered :
26
27
  - How many tokens does your AI agent consume?
27
28
  - How much does it cost to do run a complex AI workflow with multiple LLM providers?
28
29
  - How much money/tokens did you spend today on developing with LLMs?
@@ -1,6 +1,6 @@
1
1
  # Tokenator : Track and analyze LLM token usage and cost
2
2
 
3
- Have you ever wondered about :
3
+ Have you ever wondered :
4
4
  - How many tokens does your AI agent consume?
5
5
  - How much does it cost to do run a complex AI workflow with multiple LLM providers?
6
6
  - How much money/tokens did you spend today on developing with LLMs?
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "tokenator"
3
- version = "0.1.13"
3
+ version = "0.1.15"
4
4
  description = "Token usage tracking wrapper for LLMs"
5
5
  authors = ["Ujjwal Maheshwari <your.email@example.com>"]
6
6
  readme = "README.md"
@@ -9,11 +9,12 @@ packages = [{include = "tokenator", from = "src"}]
9
9
 
10
10
  [tool.poetry.dependencies]
11
11
  python = "^3.9"
12
- openai = "^1.57.0"
12
+ openai = "^1.59.0"
13
13
  sqlalchemy = "^2.0.0"
14
14
  requests = "^2.32.3"
15
15
  alembic = "^1.13.0"
16
- anthropic = "^0.40.0"
16
+ anthropic = "^0.43.0"
17
+ ipython = "*"
17
18
 
18
19
  [tool.poetry.group.dev.dependencies]
19
20
  pytest = "^8.0.0"
@@ -5,14 +5,9 @@ from .openai.client_openai import tokenator_openai
5
5
  from .anthropic.client_anthropic import tokenator_anthropic
6
6
  from . import usage
7
7
  from .utils import get_default_db_path
8
- from .migrations import check_and_run_migrations
8
+ from .usage import TokenUsageService
9
9
 
10
- __version__ = "0.1.0"
10
+ usage = TokenUsageService() # noqa: F811
11
11
  __all__ = ["tokenator_openai", "tokenator_anthropic", "usage", "get_default_db_path"]
12
12
 
13
13
  logger = logging.getLogger(__name__)
14
-
15
- try:
16
- check_and_run_migrations()
17
- except Exception as e:
18
- logger.warning(f"Failed to run migrations, but continuing anyway: {e}")
@@ -6,9 +6,13 @@ import logging
6
6
  from anthropic import Anthropic, AsyncAnthropic
7
7
  from anthropic.types import Message, RawMessageStartEvent, RawMessageDeltaEvent
8
8
 
9
- from ..models import Usage, TokenUsageStats
9
+ from ..models import PromptTokenDetails, TokenMetrics, TokenUsageStats
10
10
  from ..base_wrapper import BaseWrapper, ResponseType
11
- from .stream_interceptors import AnthropicAsyncStreamInterceptor, AnthropicSyncStreamInterceptor
11
+ from .stream_interceptors import (
12
+ AnthropicAsyncStreamInterceptor,
13
+ AnthropicSyncStreamInterceptor,
14
+ )
15
+ from ..state import is_tokenator_enabled
12
16
 
13
17
  logger = logging.getLogger(__name__)
14
18
 
@@ -24,28 +28,46 @@ class BaseAnthropicWrapper(BaseWrapper):
24
28
  if isinstance(response, Message):
25
29
  if not hasattr(response, "usage"):
26
30
  return None
27
- usage = Usage(
28
- prompt_tokens=response.usage.input_tokens,
31
+ usage = TokenMetrics(
32
+ prompt_tokens=response.usage.input_tokens
33
+ + (getattr(response.usage, "cache_creation_input_tokens", 0) or 0),
29
34
  completion_tokens=response.usage.output_tokens,
30
35
  total_tokens=response.usage.input_tokens
31
36
  + response.usage.output_tokens,
37
+ prompt_tokens_details=PromptTokenDetails(
38
+ cached_input_tokens=getattr(
39
+ response.usage, "cache_read_input_tokens", None
40
+ ),
41
+ cached_creation_tokens=getattr(
42
+ response.usage, "cache_creation_input_tokens", None
43
+ ),
44
+ ),
32
45
  )
33
46
  return TokenUsageStats(model=response.model, usage=usage)
34
47
  elif isinstance(response, dict):
35
48
  usage_dict = response.get("usage")
36
49
  if not usage_dict:
37
50
  return None
38
- usage = Usage(
39
- prompt_tokens=usage_dict.get("input_tokens", 0),
51
+ usage = TokenMetrics(
52
+ prompt_tokens=usage_dict.get("input_tokens", 0)
53
+ + (getattr(usage_dict, "cache_creation_input_tokens", 0) or 0),
40
54
  completion_tokens=usage_dict.get("output_tokens", 0),
41
55
  total_tokens=usage_dict.get("input_tokens", 0)
42
56
  + usage_dict.get("output_tokens", 0),
57
+ prompt_tokens_details=PromptTokenDetails(
58
+ cached_input_tokens=getattr(
59
+ usage_dict, "cache_read_input_tokens", None
60
+ ),
61
+ cached_creation_tokens=getattr(
62
+ usage_dict, "cache_creation_input_tokens", None
63
+ ),
64
+ ),
43
65
  )
44
66
  return TokenUsageStats(
45
67
  model=response.get("model", "unknown"), usage=usage
46
68
  )
47
69
  except Exception as e:
48
- logger.warning("Failed to process usage stats: %s", str(e))
70
+ logger.warning("Failed to process usage stats: %s", str(e), exc_info=True)
49
71
  return None
50
72
  return None
51
73
 
@@ -56,15 +78,23 @@ class BaseAnthropicWrapper(BaseWrapper):
56
78
 
57
79
  def _create_usage_callback(execution_id, log_usage_fn):
58
80
  """Creates a callback function for processing usage statistics from stream chunks."""
81
+
59
82
  def usage_callback(chunks):
60
83
  if not chunks:
61
84
  return
62
-
85
+
86
+ # Skip if tokenator is disabled
87
+ if not is_tokenator_enabled:
88
+ logger.debug("Tokenator is disabled - skipping stream usage logging")
89
+ return
90
+
63
91
  usage_data = TokenUsageStats(
64
- model=chunks[0].message.model if isinstance(chunks[0], RawMessageStartEvent) else "",
65
- usage=Usage(),
92
+ model=chunks[0].message.model
93
+ if isinstance(chunks[0], RawMessageStartEvent)
94
+ else "",
95
+ usage=TokenMetrics(),
66
96
  )
67
-
97
+
68
98
  for chunk in chunks:
69
99
  if isinstance(chunk, RawMessageStartEvent):
70
100
  usage_data.model = chunk.message.model
@@ -72,8 +102,10 @@ def _create_usage_callback(execution_id, log_usage_fn):
72
102
  usage_data.usage.completion_tokens += chunk.message.usage.output_tokens
73
103
  elif isinstance(chunk, RawMessageDeltaEvent):
74
104
  usage_data.usage.completion_tokens += chunk.usage.output_tokens
75
-
76
- usage_data.usage.total_tokens = usage_data.usage.prompt_tokens + usage_data.usage.completion_tokens
105
+
106
+ usage_data.usage.total_tokens = (
107
+ usage_data.usage.prompt_tokens + usage_data.usage.completion_tokens
108
+ )
77
109
  log_usage_fn(usage_data, execution_id=execution_id)
78
110
 
79
111
  return usage_callback
@@ -84,7 +116,9 @@ class AnthropicWrapper(BaseAnthropicWrapper):
84
116
  self, *args: Any, execution_id: Optional[str] = None, **kwargs: Any
85
117
  ) -> Union[Message, Iterator[Message]]:
86
118
  """Create a message completion and log token usage."""
87
- logger.debug("Creating message completion with args: %s, kwargs: %s", args, kwargs)
119
+ logger.debug(
120
+ "Creating message completion with args: %s, kwargs: %s", args, kwargs
121
+ )
88
122
 
89
123
  if kwargs.get("stream", False):
90
124
  base_stream = self.client.messages.create(*args, **kwargs)
@@ -105,7 +139,9 @@ class AsyncAnthropicWrapper(BaseAnthropicWrapper):
105
139
  self, *args: Any, execution_id: Optional[str] = None, **kwargs: Any
106
140
  ) -> Union[Message, AsyncIterator[Message]]:
107
141
  """Create a message completion and log token usage."""
108
- logger.debug("Creating message completion with args: %s, kwargs: %s", args, kwargs)
142
+ logger.debug(
143
+ "Creating message completion with args: %s, kwargs: %s", args, kwargs
144
+ )
109
145
 
110
146
  if kwargs.get("stream", False):
111
147
  base_stream = await self.client.messages.create(*args, **kwargs)
@@ -0,0 +1,120 @@
1
+ """Base wrapper class for token usage tracking."""
2
+
3
+ from pathlib import Path
4
+ from typing import Any, Optional, TypeVar
5
+ import logging
6
+ import uuid
7
+
8
+ from .models import TokenUsageStats
9
+ from .schemas import get_session, TokenUsage
10
+ from . import state
11
+
12
+ from .migrations import check_and_run_migrations
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+ ResponseType = TypeVar("ResponseType")
17
+
18
+
19
+ class BaseWrapper:
20
+ def __init__(self, client: Any, db_path: Optional[str] = None):
21
+ """Initialize the base wrapper."""
22
+ state.is_tokenator_enabled = True
23
+ try:
24
+ self.client = client
25
+
26
+ if db_path:
27
+ Path(db_path).parent.mkdir(parents=True, exist_ok=True)
28
+ logger.info("Created database directory at: %s", Path(db_path).parent)
29
+ state.db_path = db_path # Store db_path in state
30
+
31
+ else:
32
+ state.db_path = None # Use default path
33
+
34
+ self.Session = get_session()
35
+
36
+ logger.debug(
37
+ "Initializing %s with db_path: %s", self.__class__.__name__, db_path
38
+ )
39
+
40
+ check_and_run_migrations(db_path)
41
+ except Exception as e:
42
+ state.is_tokenator_enabled = False
43
+ logger.warning(
44
+ f"Tokenator initialization failed. Usage tracking will be disabled. Error: {e}"
45
+ )
46
+
47
+ def _log_usage_impl(
48
+ self, token_usage_stats: TokenUsageStats, session, execution_id: str
49
+ ) -> None:
50
+ """Implementation of token usage logging."""
51
+ logger.debug(
52
+ "Logging usage for model %s: %s",
53
+ token_usage_stats.model,
54
+ token_usage_stats.usage.model_dump(),
55
+ )
56
+ try:
57
+ token_usage = TokenUsage(
58
+ execution_id=execution_id,
59
+ provider=self.provider,
60
+ model=token_usage_stats.model,
61
+ total_cost=0, # This needs to be calculated based on your rates
62
+ prompt_tokens=token_usage_stats.usage.prompt_tokens,
63
+ completion_tokens=token_usage_stats.usage.completion_tokens,
64
+ total_tokens=token_usage_stats.usage.total_tokens,
65
+ # Prompt details
66
+ prompt_cached_input_tokens=token_usage_stats.usage.prompt_tokens_details.cached_input_tokens
67
+ if token_usage_stats.usage.prompt_tokens_details
68
+ else None,
69
+ prompt_cached_creation_tokens=token_usage_stats.usage.prompt_tokens_details.cached_creation_tokens
70
+ if token_usage_stats.usage.prompt_tokens_details
71
+ else None,
72
+ prompt_audio_tokens=token_usage_stats.usage.prompt_tokens_details.audio_tokens
73
+ if token_usage_stats.usage.prompt_tokens_details
74
+ else None,
75
+ # Completion details
76
+ completion_audio_tokens=token_usage_stats.usage.completion_tokens_details.audio_tokens
77
+ if token_usage_stats.usage.completion_tokens_details
78
+ else None,
79
+ completion_reasoning_tokens=token_usage_stats.usage.completion_tokens_details.reasoning_tokens
80
+ if token_usage_stats.usage.completion_tokens_details
81
+ else None,
82
+ completion_accepted_prediction_tokens=token_usage_stats.usage.completion_tokens_details.accepted_prediction_tokens
83
+ if token_usage_stats.usage.completion_tokens_details
84
+ else None,
85
+ completion_rejected_prediction_tokens=token_usage_stats.usage.completion_tokens_details.rejected_prediction_tokens
86
+ if token_usage_stats.usage.completion_tokens_details
87
+ else None,
88
+ )
89
+ session.add(token_usage)
90
+ logger.debug(
91
+ "Logged token usage: model=%s, total_tokens=%d",
92
+ token_usage_stats.model,
93
+ token_usage_stats.usage.total_tokens,
94
+ )
95
+ except Exception as e:
96
+ logger.error("Failed to log token usage: %s", str(e))
97
+
98
+ def _log_usage(
99
+ self, token_usage_stats: TokenUsageStats, execution_id: Optional[str] = None
100
+ ):
101
+ """Log token usage to database."""
102
+ if not state.is_tokenator_enabled:
103
+ logger.debug("Tokenator is disabled - skipping usage logging")
104
+ return
105
+
106
+ if not execution_id:
107
+ execution_id = str(uuid.uuid4())
108
+
109
+ logger.debug("Starting token usage logging for execution_id: %s", execution_id)
110
+ session = self.Session()
111
+ try:
112
+ try:
113
+ self._log_usage_impl(token_usage_stats, session, execution_id)
114
+ session.commit()
115
+ logger.debug("Successfully committed token usage for execution_id: %s", execution_id)
116
+ except Exception as e:
117
+ logger.error("Failed to log token usage: %s", str(e))
118
+ session.rollback()
119
+ finally:
120
+ session.close()
@@ -0,0 +1,64 @@
1
+ """Adding detailed input and output token schema
2
+
3
+ Revision ID: f028b8155fed
4
+ Revises: f6f1f2437513
5
+ Create Date: 2025-01-19 15:41:12.715623
6
+
7
+ """
8
+
9
+ from typing import Sequence, Union
10
+
11
+ from alembic import op
12
+ import sqlalchemy as sa
13
+
14
+
15
+ # revision identifiers, used by Alembic.
16
+ revision: str = "f028b8155fed"
17
+ down_revision: Union[str, None] = "f6f1f2437513"
18
+ branch_labels: Union[str, Sequence[str], None] = None
19
+ depends_on: Union[str, Sequence[str], None] = None
20
+
21
+
22
+ def upgrade() -> None:
23
+ # ### commands auto generated by Alembic - please adjust! ###
24
+ op.add_column("token_usage", sa.Column("total_cost", sa.Integer(), nullable=False))
25
+ op.add_column(
26
+ "token_usage",
27
+ sa.Column("prompt_cached_input_tokens", sa.Integer(), nullable=True),
28
+ )
29
+ op.add_column(
30
+ "token_usage",
31
+ sa.Column("prompt_cached_creation_tokens", sa.Integer(), nullable=True),
32
+ )
33
+ op.add_column(
34
+ "token_usage", sa.Column("prompt_audio_tokens", sa.Integer(), nullable=True)
35
+ )
36
+ op.add_column(
37
+ "token_usage", sa.Column("completion_audio_tokens", sa.Integer(), nullable=True)
38
+ )
39
+ op.add_column(
40
+ "token_usage",
41
+ sa.Column("completion_reasoning_tokens", sa.Integer(), nullable=True),
42
+ )
43
+ op.add_column(
44
+ "token_usage",
45
+ sa.Column("completion_accepted_prediction_tokens", sa.Integer(), nullable=True),
46
+ )
47
+ op.add_column(
48
+ "token_usage",
49
+ sa.Column("completion_rejected_prediction_tokens", sa.Integer(), nullable=True),
50
+ )
51
+ # ### end Alembic commands ###
52
+
53
+
54
+ def downgrade() -> None:
55
+ # ### commands auto generated by Alembic - please adjust! ###
56
+ op.drop_column("token_usage", "completion_rejected_prediction_tokens")
57
+ op.drop_column("token_usage", "completion_accepted_prediction_tokens")
58
+ op.drop_column("token_usage", "completion_reasoning_tokens")
59
+ op.drop_column("token_usage", "completion_audio_tokens")
60
+ op.drop_column("token_usage", "prompt_audio_tokens")
61
+ op.drop_column("token_usage", "prompt_cached_creation_tokens")
62
+ op.drop_column("token_usage", "prompt_cached_input_tokens")
63
+ op.drop_column("token_usage", "total_cost")
64
+ # ### end Alembic commands ###
@@ -0,0 +1,63 @@
1
+ from pydantic import BaseModel, Field
2
+ from typing import List, Optional
3
+
4
+
5
+ class TokenRate(BaseModel):
6
+ prompt: float = Field(..., description="Cost per prompt token")
7
+ completion: float = Field(..., description="Cost per completion token")
8
+ prompt_audio: Optional[float] = Field(
9
+ None, description="Cost per audio prompt token"
10
+ )
11
+ completion_audio: Optional[float] = Field(
12
+ None, description="Cost per audio completion token"
13
+ )
14
+ prompt_cached_input: Optional[float] = Field(
15
+ None, description="Cost per cached prompt input token"
16
+ )
17
+ prompt_cached_creation: Optional[float] = Field(
18
+ None, description="Cost per cached prompt creation token"
19
+ )
20
+
21
+
22
+ class PromptTokenDetails(BaseModel):
23
+ cached_input_tokens: Optional[int] = None
24
+ cached_creation_tokens: Optional[int] = None
25
+ audio_tokens: Optional[int] = None
26
+
27
+
28
+ class CompletionTokenDetails(BaseModel):
29
+ reasoning_tokens: Optional[int] = None
30
+ audio_tokens: Optional[int] = None
31
+ accepted_prediction_tokens: Optional[int] = None
32
+ rejected_prediction_tokens: Optional[int] = None
33
+
34
+
35
+ class TokenMetrics(BaseModel):
36
+ total_cost: float = Field(default=0, description="Total cost in USD")
37
+ total_tokens: int = Field(default=0, description="Total tokens used")
38
+ prompt_tokens: int = Field(default=0, description="Number of prompt tokens")
39
+ completion_tokens: int = Field(default=0, description="Number of completion tokens")
40
+ prompt_tokens_details: Optional[PromptTokenDetails] = None
41
+ completion_tokens_details: Optional[CompletionTokenDetails] = None
42
+
43
+
44
+ class ModelUsage(TokenMetrics):
45
+ model: str = Field(..., description="Model name")
46
+
47
+
48
+ class ProviderUsage(TokenMetrics):
49
+ provider: str = Field(..., description="Provider name")
50
+ models: List[ModelUsage] = Field(
51
+ default_factory=list, description="Usage breakdown by model"
52
+ )
53
+
54
+
55
+ class TokenUsageReport(TokenMetrics):
56
+ providers: List[ProviderUsage] = Field(
57
+ default_factory=list, description="Usage breakdown by provider"
58
+ )
59
+
60
+
61
+ class TokenUsageStats(BaseModel):
62
+ model: str
63
+ usage: TokenMetrics
@@ -6,9 +6,18 @@ import logging
6
6
  from openai import AsyncOpenAI, OpenAI
7
7
  from openai.types.chat import ChatCompletion, ChatCompletionChunk
8
8
 
9
- from ..models import Usage, TokenUsageStats
9
+ from ..models import (
10
+ TokenMetrics,
11
+ TokenUsageStats,
12
+ PromptTokenDetails,
13
+ CompletionTokenDetails,
14
+ )
10
15
  from ..base_wrapper import BaseWrapper, ResponseType
11
- from .stream_interceptors import OpenAIAsyncStreamInterceptor, OpenAISyncStreamInterceptor
16
+ from .stream_interceptors import (
17
+ OpenAIAsyncStreamInterceptor,
18
+ OpenAISyncStreamInterceptor,
19
+ )
20
+ from ..state import is_tokenator_enabled
12
21
 
13
22
  logger = logging.getLogger(__name__)
14
23
 
@@ -26,18 +35,49 @@ class BaseOpenAIWrapper(BaseWrapper):
26
35
  if isinstance(response, ChatCompletion):
27
36
  if response.usage is None:
28
37
  return None
29
- usage = Usage(
38
+ usage = TokenMetrics(
30
39
  prompt_tokens=response.usage.prompt_tokens,
31
40
  completion_tokens=response.usage.completion_tokens,
32
41
  total_tokens=response.usage.total_tokens,
42
+ prompt_tokens_details=PromptTokenDetails(
43
+ cached_input_tokens=getattr(
44
+ response.usage.prompt_tokens_details, "cached_tokens", None
45
+ ),
46
+ audio_tokens=getattr(
47
+ response.usage.prompt_tokens_details, "audio_tokens", None
48
+ ),
49
+ ),
50
+ completion_tokens_details=CompletionTokenDetails(
51
+ reasoning_tokens=getattr(
52
+ response.usage.completion_tokens_details,
53
+ "reasoning_tokens",
54
+ None,
55
+ ),
56
+ audio_tokens=getattr(
57
+ response.usage.completion_tokens_details,
58
+ "audio_tokens",
59
+ None,
60
+ ),
61
+ accepted_prediction_tokens=getattr(
62
+ response.usage.completion_tokens_details,
63
+ "accepted_prediction_tokens",
64
+ None,
65
+ ),
66
+ rejected_prediction_tokens=getattr(
67
+ response.usage.completion_tokens_details,
68
+ "rejected_prediction_tokens",
69
+ None,
70
+ ),
71
+ ),
33
72
  )
73
+
34
74
  return TokenUsageStats(model=response.model, usage=usage)
35
75
 
36
76
  elif isinstance(response, dict):
37
77
  usage_dict = response.get("usage")
38
78
  if not usage_dict:
39
79
  return None
40
- usage = Usage(
80
+ usage = TokenMetrics(
41
81
  prompt_tokens=usage_dict.get("prompt_tokens", 0),
42
82
  completion_tokens=usage_dict.get("completion_tokens", 0),
43
83
  total_tokens=usage_dict.get("total_tokens", 0),
@@ -58,6 +98,10 @@ class BaseOpenAIWrapper(BaseWrapper):
58
98
  def completions(self):
59
99
  return self
60
100
 
101
+ @property
102
+ def beta(self):
103
+ return self
104
+
61
105
 
62
106
  def _create_usage_callback(execution_id, log_usage_fn):
63
107
  """Creates a callback function for processing usage statistics from stream chunks."""
@@ -65,10 +109,18 @@ def _create_usage_callback(execution_id, log_usage_fn):
65
109
  def usage_callback(chunks):
66
110
  if not chunks:
67
111
  return
112
+
113
+ # Skip if tokenator is disabled
114
+ if not is_tokenator_enabled:
115
+ logger.debug("Tokenator is disabled - skipping stream usage logging")
116
+ return
117
+
118
+ logger.debug("Processing stream usage for execution_id: %s", execution_id)
119
+
68
120
  # Build usage_data from the first chunk's model
69
121
  usage_data = TokenUsageStats(
70
122
  model=chunks[0].model,
71
- usage=Usage(),
123
+ usage=TokenMetrics(),
72
124
  )
73
125
  # Sum up usage from all chunks
74
126
  has_usage = False
@@ -106,6 +158,26 @@ class OpenAIWrapper(BaseOpenAIWrapper):
106
158
 
107
159
  return response
108
160
 
161
+ def parse(
162
+ self, *args: Any, execution_id: Optional[str] = None, **kwargs: Any
163
+ ) -> Union[ChatCompletion, Iterator[ChatCompletion]]:
164
+ """Create a chat completion parse and log token usage."""
165
+ logger.debug("Creating chat completion with args: %s, kwargs: %s", args, kwargs)
166
+
167
+ if kwargs.get("stream", False):
168
+ base_stream = self.client.beta.chat.completions.parse(*args, **kwargs)
169
+ return OpenAISyncStreamInterceptor(
170
+ base_stream=base_stream,
171
+ usage_callback=_create_usage_callback(execution_id, self._log_usage),
172
+ )
173
+
174
+ response = self.client.beta.chat.completions.parse(*args, **kwargs)
175
+ usage_data = self._process_response_usage(response)
176
+ if usage_data:
177
+ self._log_usage(usage_data, execution_id=execution_id)
178
+
179
+ return response
180
+
109
181
 
110
182
  class AsyncOpenAIWrapper(BaseOpenAIWrapper):
111
183
  async def create(
@@ -131,6 +203,26 @@ class AsyncOpenAIWrapper(BaseOpenAIWrapper):
131
203
  self._log_usage(usage_data, execution_id=execution_id)
132
204
  return response
133
205
 
206
+ async def parse(
207
+ self, *args: Any, execution_id: Optional[str] = None, **kwargs: Any
208
+ ) -> Union[ChatCompletion, AsyncIterator[ChatCompletionChunk]]:
209
+ """Create a chat completion parse and log token usage."""
210
+ logger.debug("Creating chat completion with args: %s, kwargs: %s", args, kwargs)
211
+
212
+ if kwargs.get("stream", False):
213
+ base_stream = await self.client.beta.chat.completions.parse(*args, **kwargs)
214
+ return OpenAIAsyncStreamInterceptor(
215
+ base_stream=base_stream,
216
+ usage_callback=_create_usage_callback(execution_id, self._log_usage),
217
+ )
218
+
219
+ response = await self.client.beta.chat.completions.parse(*args, **kwargs)
220
+ usage_data = self._process_response_usage(response)
221
+ if usage_data:
222
+ self._log_usage(usage_data, execution_id=execution_id)
223
+
224
+ return response
225
+
134
226
 
135
227
  @overload
136
228
  def tokenator_openai(