tokenator 0.1.15__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. {tokenator-0.1.15 → tokenator-0.2.0}/PKG-INFO +63 -6
  2. {tokenator-0.1.15 → tokenator-0.2.0}/README.md +60 -4
  3. {tokenator-0.1.15 → tokenator-0.2.0}/pyproject.toml +5 -2
  4. {tokenator-0.1.15 → tokenator-0.2.0}/src/tokenator/__init__.py +8 -1
  5. {tokenator-0.1.15 → tokenator-0.2.0}/src/tokenator/base_wrapper.py +4 -1
  6. tokenator-0.2.0/src/tokenator/gemini/__init__.py +5 -0
  7. tokenator-0.2.0/src/tokenator/gemini/client_gemini.py +230 -0
  8. tokenator-0.2.0/src/tokenator/gemini/stream_interceptors.py +77 -0
  9. tokenator-0.2.0/src/tokenator/usage.py +590 -0
  10. {tokenator-0.1.15 → tokenator-0.2.0}/src/tokenator/utils.py +7 -4
  11. tokenator-0.1.15/src/tokenator/usage.py +0 -503
  12. {tokenator-0.1.15 → tokenator-0.2.0}/LICENSE +0 -0
  13. {tokenator-0.1.15 → tokenator-0.2.0}/src/tokenator/anthropic/client_anthropic.py +0 -0
  14. {tokenator-0.1.15 → tokenator-0.2.0}/src/tokenator/anthropic/stream_interceptors.py +0 -0
  15. {tokenator-0.1.15 → tokenator-0.2.0}/src/tokenator/create_migrations.py +0 -0
  16. {tokenator-0.1.15 → tokenator-0.2.0}/src/tokenator/migrations/env.py +0 -0
  17. {tokenator-0.1.15 → tokenator-0.2.0}/src/tokenator/migrations/script.py.mako +0 -0
  18. {tokenator-0.1.15 → tokenator-0.2.0}/src/tokenator/migrations/versions/f028b8155fed_adding_detailed_input_and_output_token_.py +0 -0
  19. {tokenator-0.1.15 → tokenator-0.2.0}/src/tokenator/migrations/versions/f6f1f2437513_initial_migration.py +0 -0
  20. {tokenator-0.1.15 → tokenator-0.2.0}/src/tokenator/migrations.py +0 -0
  21. {tokenator-0.1.15 → tokenator-0.2.0}/src/tokenator/models.py +0 -0
  22. {tokenator-0.1.15 → tokenator-0.2.0}/src/tokenator/openai/client_openai.py +0 -0
  23. {tokenator-0.1.15 → tokenator-0.2.0}/src/tokenator/openai/stream_interceptors.py +0 -0
  24. {tokenator-0.1.15 → tokenator-0.2.0}/src/tokenator/schemas.py +0 -0
  25. {tokenator-0.1.15 → tokenator-0.2.0}/src/tokenator/state.py +0 -0
@@ -1,10 +1,10 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: tokenator
3
- Version: 0.1.15
3
+ Version: 0.2.0
4
4
  Summary: Token usage tracking wrapper for LLMs
5
5
  License: MIT
6
6
  Author: Ujjwal Maheshwari
7
- Author-email: your.email@example.com
7
+ Author-email: ujjwalm29@gmail.com
8
8
  Requires-Python: >=3.9,<4.0
9
9
  Classifier: License :: OSI Approved :: MIT License
10
10
  Classifier: Programming Language :: Python :: 3
@@ -15,23 +15,28 @@ Classifier: Programming Language :: Python :: 3.12
15
15
  Classifier: Programming Language :: Python :: 3.13
16
16
  Requires-Dist: alembic (>=1.13.0,<2.0.0)
17
17
  Requires-Dist: anthropic (>=0.43.0,<0.44.0)
18
+ Requires-Dist: google-genai (>=1.3.0,<2.0.0)
18
19
  Requires-Dist: ipython
19
20
  Requires-Dist: openai (>=1.59.0,<2.0.0)
20
21
  Requires-Dist: requests (>=2.32.3,<3.0.0)
21
22
  Requires-Dist: sqlalchemy (>=2.0.0,<3.0.0)
22
23
  Description-Content-Type: text/markdown
23
24
 
24
- # Tokenator : Track and analyze LLM token usage and cost
25
+ # Tokenator : Track, analyze, compare LLM token usage and costs
25
26
 
26
27
  Have you ever wondered :
27
28
  - How many tokens does your AI agent consume?
28
- - How much does it cost to do run a complex AI workflow with multiple LLM providers?
29
+ - How much does it cost to run a complex AI workflow with multiple LLM providers?
30
+ - Which LLM is more cost effective for my use case?
29
31
  - How much money/tokens did you spend today on developing with LLMs?
30
32
 
31
- Afraid not, tokenator is here! With tokenator's easy to use API, you can start tracking LLM usage in a matter of minutes.
33
+ Afraid not, tokenator is here! With tokenator's easy to use functions, you can start tracking LLM usage in a matter of minutes.
32
34
 
33
35
  Get started with just 3 lines of code!
34
36
 
37
+ Tokenator supports the official SDKs from openai, anthropic and google-genai(the new one).
38
+ LLM providers which use the openai SDK like perplexity, deepseek and xAI are also supported.
39
+
35
40
  ## Installation
36
41
 
37
42
  ```bash
@@ -114,6 +119,10 @@ print(cost.last_hour().model_dump_json(indent=4))
114
119
  }
115
120
  ```
116
121
 
122
+ ## Cookbooks
123
+
124
+ Want more code, example use cases and ideas? Check out our amazing [cookbooks](https://github.com/ujjwalm29/tokenator/tree/main/docs/cookbooks)!
125
+
117
126
  ## Features
118
127
 
119
128
  - Drop-in replacement for OpenAI, Anthropic client
@@ -173,6 +182,54 @@ print(usage.last_execution().model_dump_json(indent=4))
173
182
  """
174
183
  ```
175
184
 
185
+ ### Google (Gemini - through AI studio)
186
+
187
+ ```python
188
+ from google import genai
189
+ from tokenator import tokenator_gemini
190
+
191
+ gemini_client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
192
+
193
+ # Wrap it with Tokenator
194
+ client = tokenator_gemini(gemini_client)
195
+
196
+ # Use it exactly like the google-genai client
197
+ response = models.generate_content(
198
+ model="gemini-2.0-flash",
199
+ contents="hello how are you",
200
+ )
201
+
202
+ print(response)
203
+
204
+ print(usage.last_execution().model_dump_json(indent=4))
205
+ """
206
+ {
207
+ "total_cost": 0.0001,
208
+ "total_tokens": 23,
209
+ "prompt_tokens": 10,
210
+ "completion_tokens": 13,
211
+ "providers": [
212
+ {
213
+ "total_cost": 0.0001,
214
+ "total_tokens": 23,
215
+ "prompt_tokens": 10,
216
+ "completion_tokens": 13,
217
+ "provider": "gemini",
218
+ "models": [
219
+ {
220
+ "total_cost": 0.0004,
221
+ "total_tokens": 79,
222
+ "prompt_tokens": 52,
223
+ "completion_tokens": 27,
224
+ "model": "gemini-2.0-flash"
225
+ }
226
+ ]
227
+ }
228
+ ]
229
+ }
230
+ """
231
+ ```
232
+
176
233
  ### xAI
177
234
 
178
235
  You can use xAI models through the `openai` SDK and track usage using `provider` parameter in `tokenator`.
@@ -221,7 +278,7 @@ client = tokenator_openai(perplexity_client, db_path=temp_db, provider="perplexi
221
278
 
222
279
  # Use it exactly like the OpenAI client but with perplexity models
223
280
  response = client.chat.completions.create(
224
- model="llama-3.1-sonar-small-128k-online",
281
+ model="sonar",
225
282
  messages=[{"role": "user", "content": "Hello!"}]
226
283
  )
227
284
 
@@ -1,14 +1,18 @@
1
- # Tokenator : Track and analyze LLM token usage and cost
1
+ # Tokenator : Track, analyze, compare LLM token usage and costs
2
2
 
3
3
  Have you ever wondered :
4
4
  - How many tokens does your AI agent consume?
5
- - How much does it cost to do run a complex AI workflow with multiple LLM providers?
5
+ - How much does it cost to run a complex AI workflow with multiple LLM providers?
6
+ - Which LLM is more cost effective for my use case?
6
7
  - How much money/tokens did you spend today on developing with LLMs?
7
8
 
8
- Afraid not, tokenator is here! With tokenator's easy to use API, you can start tracking LLM usage in a matter of minutes.
9
+ Afraid not, tokenator is here! With tokenator's easy to use functions, you can start tracking LLM usage in a matter of minutes.
9
10
 
10
11
  Get started with just 3 lines of code!
11
12
 
13
+ Tokenator supports the official SDKs from openai, anthropic and google-genai(the new one).
14
+ LLM providers which use the openai SDK like perplexity, deepseek and xAI are also supported.
15
+
12
16
  ## Installation
13
17
 
14
18
  ```bash
@@ -91,6 +95,10 @@ print(cost.last_hour().model_dump_json(indent=4))
91
95
  }
92
96
  ```
93
97
 
98
+ ## Cookbooks
99
+
100
+ Want more code, example use cases and ideas? Check out our amazing [cookbooks](https://github.com/ujjwalm29/tokenator/tree/main/docs/cookbooks)!
101
+
94
102
  ## Features
95
103
 
96
104
  - Drop-in replacement for OpenAI, Anthropic client
@@ -150,6 +158,54 @@ print(usage.last_execution().model_dump_json(indent=4))
150
158
  """
151
159
  ```
152
160
 
161
+ ### Google (Gemini - through AI studio)
162
+
163
+ ```python
164
+ from google import genai
165
+ from tokenator import tokenator_gemini
166
+
167
+ gemini_client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
168
+
169
+ # Wrap it with Tokenator
170
+ client = tokenator_gemini(gemini_client)
171
+
172
+ # Use it exactly like the google-genai client
173
+ response = models.generate_content(
174
+ model="gemini-2.0-flash",
175
+ contents="hello how are you",
176
+ )
177
+
178
+ print(response)
179
+
180
+ print(usage.last_execution().model_dump_json(indent=4))
181
+ """
182
+ {
183
+ "total_cost": 0.0001,
184
+ "total_tokens": 23,
185
+ "prompt_tokens": 10,
186
+ "completion_tokens": 13,
187
+ "providers": [
188
+ {
189
+ "total_cost": 0.0001,
190
+ "total_tokens": 23,
191
+ "prompt_tokens": 10,
192
+ "completion_tokens": 13,
193
+ "provider": "gemini",
194
+ "models": [
195
+ {
196
+ "total_cost": 0.0004,
197
+ "total_tokens": 79,
198
+ "prompt_tokens": 52,
199
+ "completion_tokens": 27,
200
+ "model": "gemini-2.0-flash"
201
+ }
202
+ ]
203
+ }
204
+ ]
205
+ }
206
+ """
207
+ ```
208
+
153
209
  ### xAI
154
210
 
155
211
  You can use xAI models through the `openai` SDK and track usage using `provider` parameter in `tokenator`.
@@ -198,7 +254,7 @@ client = tokenator_openai(perplexity_client, db_path=temp_db, provider="perplexi
198
254
 
199
255
  # Use it exactly like the OpenAI client but with perplexity models
200
256
  response = client.chat.completions.create(
201
- model="llama-3.1-sonar-small-128k-online",
257
+ model="sonar",
202
258
  messages=[{"role": "user", "content": "Hello!"}]
203
259
  )
204
260
 
@@ -1,8 +1,8 @@
1
1
  [tool.poetry]
2
2
  name = "tokenator"
3
- version = "0.1.15"
3
+ version = "0.2.0"
4
4
  description = "Token usage tracking wrapper for LLMs"
5
- authors = ["Ujjwal Maheshwari <your.email@example.com>"]
5
+ authors = ["Ujjwal Maheshwari <ujjwalm29@gmail.com>"]
6
6
  readme = "README.md"
7
7
  license = "MIT"
8
8
  packages = [{include = "tokenator", from = "src"}]
@@ -15,12 +15,15 @@ requests = "^2.32.3"
15
15
  alembic = "^1.13.0"
16
16
  anthropic = "^0.43.0"
17
17
  ipython = "*"
18
+ google-genai = "^1.3.0"
18
19
 
19
20
  [tool.poetry.group.dev.dependencies]
20
21
  pytest = "^8.0.0"
21
22
  pytest-asyncio = "^0.23.0"
22
23
  pytest-cov = "^4.1.0"
23
24
  ruff = "^0.8.4"
25
+ langsmith = "^0.3.0"
26
+ python-dotenv = "^1.0.1"
24
27
 
25
28
  [build-system]
26
29
  requires = ["poetry-core"]
@@ -3,11 +3,18 @@
3
3
  import logging
4
4
  from .openai.client_openai import tokenator_openai
5
5
  from .anthropic.client_anthropic import tokenator_anthropic
6
+ from .gemini.client_gemini import tokenator_gemini
6
7
  from . import usage
7
8
  from .utils import get_default_db_path
8
9
  from .usage import TokenUsageService
9
10
 
10
11
  usage = TokenUsageService() # noqa: F811
11
- __all__ = ["tokenator_openai", "tokenator_anthropic", "usage", "get_default_db_path"]
12
+ __all__ = [
13
+ "tokenator_openai",
14
+ "tokenator_anthropic",
15
+ "tokenator_gemini",
16
+ "usage",
17
+ "get_default_db_path",
18
+ ]
12
19
 
13
20
  logger = logging.getLogger(__name__)
@@ -112,7 +112,10 @@ class BaseWrapper:
112
112
  try:
113
113
  self._log_usage_impl(token_usage_stats, session, execution_id)
114
114
  session.commit()
115
- logger.debug("Successfully committed token usage for execution_id: %s", execution_id)
115
+ logger.debug(
116
+ "Successfully committed token usage for execution_id: %s",
117
+ execution_id,
118
+ )
116
119
  except Exception as e:
117
120
  logger.error("Failed to log token usage: %s", str(e))
118
121
  session.rollback()
@@ -0,0 +1,5 @@
1
+ """Gemini client wrapper with token usage tracking."""
2
+
3
+ from .client_gemini import tokenator_gemini
4
+
5
+ __all__ = ["tokenator_gemini"]
@@ -0,0 +1,230 @@
1
+ """Gemini client wrapper with token usage tracking."""
2
+
3
+ from typing import Any, Optional, Iterator, AsyncIterator
4
+ import logging
5
+
6
+ from google import genai
7
+ from google.genai.types import GenerateContentResponse
8
+
9
+ from ..models import (
10
+ TokenMetrics,
11
+ TokenUsageStats,
12
+ )
13
+ from ..base_wrapper import BaseWrapper, ResponseType
14
+ from .stream_interceptors import (
15
+ GeminiAsyncStreamInterceptor,
16
+ GeminiSyncStreamInterceptor,
17
+ )
18
+ from ..state import is_tokenator_enabled
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ def _create_usage_callback(execution_id, log_usage_fn):
24
+ """Creates a callback function for processing usage statistics from stream chunks."""
25
+
26
+ def usage_callback(chunks):
27
+ if not chunks:
28
+ return
29
+
30
+ # Skip if tokenator is disabled
31
+ if not is_tokenator_enabled:
32
+ logger.debug("Tokenator is disabled - skipping stream usage logging")
33
+ return
34
+
35
+ logger.debug("Processing stream usage for execution_id: %s", execution_id)
36
+
37
+ # Build usage_data from the first chunk's model
38
+ usage_data = TokenUsageStats(
39
+ model=chunks[0].model_version,
40
+ usage=TokenMetrics(),
41
+ )
42
+
43
+ # Only take usage from the last chunk as it contains complete usage info
44
+ last_chunk = chunks[-1]
45
+ if last_chunk.usage_metadata:
46
+ usage_data.usage.prompt_tokens = (
47
+ last_chunk.usage_metadata.prompt_token_count
48
+ )
49
+ usage_data.usage.completion_tokens = (
50
+ last_chunk.usage_metadata.candidates_token_count or 0
51
+ )
52
+ usage_data.usage.total_tokens = last_chunk.usage_metadata.total_token_count
53
+ log_usage_fn(usage_data, execution_id=execution_id)
54
+
55
+ return usage_callback
56
+
57
+
58
+ class BaseGeminiWrapper(BaseWrapper):
59
+ def __init__(self, client, db_path=None, provider: str = "gemini"):
60
+ super().__init__(client, db_path)
61
+ self.provider = provider
62
+ self._async_wrapper = None
63
+
64
+ def _process_response_usage(
65
+ self, response: ResponseType
66
+ ) -> Optional[TokenUsageStats]:
67
+ """Process and log usage statistics from a response."""
68
+ try:
69
+ if isinstance(response, GenerateContentResponse):
70
+ if response.usage_metadata is None:
71
+ return None
72
+ usage = TokenMetrics(
73
+ prompt_tokens=response.usage_metadata.prompt_token_count,
74
+ completion_tokens=response.usage_metadata.candidates_token_count,
75
+ total_tokens=response.usage_metadata.total_token_count,
76
+ )
77
+ return TokenUsageStats(model=response.model_version, usage=usage)
78
+
79
+ elif isinstance(response, dict):
80
+ usage_dict = response.get("usage_metadata")
81
+ if not usage_dict:
82
+ return None
83
+ usage = TokenMetrics(
84
+ prompt_tokens=usage_dict.get("prompt_token_count", 0),
85
+ completion_tokens=usage_dict.get("candidates_token_count", 0),
86
+ total_tokens=usage_dict.get("total_token_count", 0),
87
+ )
88
+ return TokenUsageStats(
89
+ model=response.get("model", "unknown"), usage=usage
90
+ )
91
+ except Exception as e:
92
+ logger.warning("Failed to process usage stats: %s", str(e))
93
+ return None
94
+ return None
95
+
96
+ @property
97
+ def chat(self):
98
+ return self
99
+
100
+ @property
101
+ def chats(self):
102
+ return self
103
+
104
+ @property
105
+ def models(self):
106
+ return self
107
+
108
+ @property
109
+ def aio(self):
110
+ if self._async_wrapper is None:
111
+ self._async_wrapper = AsyncGeminiWrapper(self)
112
+ return self._async_wrapper
113
+
114
+ def count_tokens(self, *args: Any, **kwargs: Any):
115
+ return self.client.models.count_tokens(*args, **kwargs)
116
+
117
+
118
+ class AsyncGeminiWrapper:
119
+ """Async wrapper for Gemini client to match the official SDK structure."""
120
+
121
+ def __init__(self, wrapper: BaseGeminiWrapper):
122
+ self.wrapper = wrapper
123
+ self._models = None
124
+
125
+ @property
126
+ def models(self):
127
+ if self._models is None:
128
+ self._models = AsyncModelsWrapper(self.wrapper)
129
+ return self._models
130
+
131
+
132
+ class AsyncModelsWrapper:
133
+ """Async wrapper for models to match the official SDK structure."""
134
+
135
+ def __init__(self, wrapper: BaseGeminiWrapper):
136
+ self.wrapper = wrapper
137
+
138
+ async def generate_content(
139
+ self, *args: Any, **kwargs: Any
140
+ ) -> GenerateContentResponse:
141
+ """Async method for generate_content."""
142
+ execution_id = kwargs.pop("execution_id", None)
143
+ return await self.wrapper.generate_content_async(
144
+ *args, execution_id=execution_id, **kwargs
145
+ )
146
+
147
+ async def generate_content_stream(
148
+ self, *args: Any, **kwargs: Any
149
+ ) -> AsyncIterator[GenerateContentResponse]:
150
+ """Async method for generate_content_stream."""
151
+ execution_id = kwargs.pop("execution_id", None)
152
+ return await self.wrapper.generate_content_stream_async(
153
+ *args, execution_id=execution_id, **kwargs
154
+ )
155
+
156
+
157
+ class GeminiWrapper(BaseGeminiWrapper):
158
+ def generate_content(
159
+ self, *args: Any, execution_id: Optional[str] = None, **kwargs: Any
160
+ ) -> GenerateContentResponse:
161
+ """Generate content and log token usage."""
162
+ logger.debug("Generating content with args: %s, kwargs: %s", args, kwargs)
163
+
164
+ response = self.client.models.generate_content(*args, **kwargs)
165
+ usage_data = self._process_response_usage(response)
166
+ if usage_data:
167
+ self._log_usage(usage_data, execution_id=execution_id)
168
+
169
+ return response
170
+
171
+ def generate_content_stream(
172
+ self, *args: Any, execution_id: Optional[str] = None, **kwargs: Any
173
+ ) -> Iterator[GenerateContentResponse]:
174
+ """Generate content with streaming and log token usage."""
175
+ logger.debug(
176
+ "Generating content stream with args: %s, kwargs: %s", args, kwargs
177
+ )
178
+
179
+ base_stream = self.client.models.generate_content_stream(*args, **kwargs)
180
+ return GeminiSyncStreamInterceptor(
181
+ base_stream=base_stream,
182
+ usage_callback=_create_usage_callback(execution_id, self._log_usage),
183
+ )
184
+
185
+ async def generate_content_async(
186
+ self, *args: Any, execution_id: Optional[str] = None, **kwargs: Any
187
+ ) -> GenerateContentResponse:
188
+ """Generate content asynchronously and log token usage."""
189
+ logger.debug("Generating content async with args: %s, kwargs: %s", args, kwargs)
190
+
191
+ response = await self.client.aio.models.generate_content(*args, **kwargs)
192
+ usage_data = self._process_response_usage(response)
193
+ if usage_data:
194
+ self._log_usage(usage_data, execution_id=execution_id)
195
+
196
+ return response
197
+
198
+ async def generate_content_stream_async(
199
+ self, *args: Any, execution_id: Optional[str] = None, **kwargs: Any
200
+ ) -> AsyncIterator[GenerateContentResponse]:
201
+ """Generate content with async streaming and log token usage."""
202
+ logger.debug(
203
+ "Generating content stream async with args: %s, kwargs: %s", args, kwargs
204
+ )
205
+
206
+ base_stream = await self.client.aio.models.generate_content_stream(
207
+ *args, **kwargs
208
+ )
209
+ return GeminiAsyncStreamInterceptor(
210
+ base_stream=base_stream,
211
+ usage_callback=_create_usage_callback(execution_id, self._log_usage),
212
+ )
213
+
214
+
215
+ def tokenator_gemini(
216
+ client: genai.Client,
217
+ db_path: Optional[str] = None,
218
+ provider: str = "gemini",
219
+ ) -> GeminiWrapper:
220
+ """Create a token-tracking wrapper for a Gemini client.
221
+
222
+ Args:
223
+ client: Gemini client instance
224
+ db_path: Optional path to SQLite database for token tracking
225
+ provider: Provider name, defaults to "gemini"
226
+ """
227
+ if not isinstance(client, genai.Client):
228
+ raise ValueError("Client must be an instance of genai.Client")
229
+
230
+ return GeminiWrapper(client=client, db_path=db_path, provider=provider)
@@ -0,0 +1,77 @@
1
+ """Stream interceptors for Gemini responses."""
2
+
3
+ import logging
4
+ from typing import AsyncIterator, Callable, List, Optional, TypeVar, Iterator
5
+
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+ _T = TypeVar("_T") # GenerateContentResponse
10
+
11
+
12
+ class GeminiAsyncStreamInterceptor(AsyncIterator[_T]):
13
+ """
14
+ A wrapper around Gemini async stream that intercepts each chunk to handle usage or
15
+ logging logic.
16
+ """
17
+
18
+ def __init__(
19
+ self,
20
+ base_stream: AsyncIterator[_T],
21
+ usage_callback: Optional[Callable[[List[_T]], None]] = None,
22
+ ):
23
+ self._base_stream = base_stream
24
+ self._usage_callback = usage_callback
25
+ self._chunks: List[_T] = []
26
+
27
+ def __aiter__(self) -> AsyncIterator[_T]:
28
+ """Return self as async iterator."""
29
+ return self
30
+
31
+ async def __anext__(self) -> _T:
32
+ """Get next chunk and track it."""
33
+ try:
34
+ chunk = await self._base_stream.__anext__()
35
+ except StopAsyncIteration:
36
+ # Once the base stream is fully consumed, we can do final usage/logging.
37
+ if self._usage_callback and self._chunks:
38
+ self._usage_callback(self._chunks)
39
+ raise
40
+
41
+ # Intercept each chunk
42
+ self._chunks.append(chunk)
43
+ return chunk
44
+
45
+
46
+ class GeminiSyncStreamInterceptor(Iterator[_T]):
47
+ """
48
+ A wrapper around Gemini sync stream that intercepts each chunk to handle usage or
49
+ logging logic.
50
+ """
51
+
52
+ def __init__(
53
+ self,
54
+ base_stream: Iterator[_T],
55
+ usage_callback: Optional[Callable[[List[_T]], None]] = None,
56
+ ):
57
+ self._base_stream = base_stream
58
+ self._usage_callback = usage_callback
59
+ self._chunks: List[_T] = []
60
+
61
+ def __iter__(self) -> Iterator[_T]:
62
+ """Return self as iterator."""
63
+ return self
64
+
65
+ def __next__(self) -> _T:
66
+ """Get next chunk and track it."""
67
+ try:
68
+ chunk = next(self._base_stream)
69
+ except StopIteration:
70
+ # Once the base stream is fully consumed, we can do final usage/logging.
71
+ if self._usage_callback and self._chunks:
72
+ self._usage_callback(self._chunks)
73
+ raise
74
+
75
+ # Intercept each chunk
76
+ self._chunks.append(chunk)
77
+ return chunk