tokenator 0.1.15__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {tokenator-0.1.15 → tokenator-0.2.0}/PKG-INFO +63 -6
- {tokenator-0.1.15 → tokenator-0.2.0}/README.md +60 -4
- {tokenator-0.1.15 → tokenator-0.2.0}/pyproject.toml +5 -2
- {tokenator-0.1.15 → tokenator-0.2.0}/src/tokenator/__init__.py +8 -1
- {tokenator-0.1.15 → tokenator-0.2.0}/src/tokenator/base_wrapper.py +4 -1
- tokenator-0.2.0/src/tokenator/gemini/__init__.py +5 -0
- tokenator-0.2.0/src/tokenator/gemini/client_gemini.py +230 -0
- tokenator-0.2.0/src/tokenator/gemini/stream_interceptors.py +77 -0
- tokenator-0.2.0/src/tokenator/usage.py +590 -0
- {tokenator-0.1.15 → tokenator-0.2.0}/src/tokenator/utils.py +7 -4
- tokenator-0.1.15/src/tokenator/usage.py +0 -503
- {tokenator-0.1.15 → tokenator-0.2.0}/LICENSE +0 -0
- {tokenator-0.1.15 → tokenator-0.2.0}/src/tokenator/anthropic/client_anthropic.py +0 -0
- {tokenator-0.1.15 → tokenator-0.2.0}/src/tokenator/anthropic/stream_interceptors.py +0 -0
- {tokenator-0.1.15 → tokenator-0.2.0}/src/tokenator/create_migrations.py +0 -0
- {tokenator-0.1.15 → tokenator-0.2.0}/src/tokenator/migrations/env.py +0 -0
- {tokenator-0.1.15 → tokenator-0.2.0}/src/tokenator/migrations/script.py.mako +0 -0
- {tokenator-0.1.15 → tokenator-0.2.0}/src/tokenator/migrations/versions/f028b8155fed_adding_detailed_input_and_output_token_.py +0 -0
- {tokenator-0.1.15 → tokenator-0.2.0}/src/tokenator/migrations/versions/f6f1f2437513_initial_migration.py +0 -0
- {tokenator-0.1.15 → tokenator-0.2.0}/src/tokenator/migrations.py +0 -0
- {tokenator-0.1.15 → tokenator-0.2.0}/src/tokenator/models.py +0 -0
- {tokenator-0.1.15 → tokenator-0.2.0}/src/tokenator/openai/client_openai.py +0 -0
- {tokenator-0.1.15 → tokenator-0.2.0}/src/tokenator/openai/stream_interceptors.py +0 -0
- {tokenator-0.1.15 → tokenator-0.2.0}/src/tokenator/schemas.py +0 -0
- {tokenator-0.1.15 → tokenator-0.2.0}/src/tokenator/state.py +0 -0
@@ -1,10 +1,10 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: tokenator
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.2.0
|
4
4
|
Summary: Token usage tracking wrapper for LLMs
|
5
5
|
License: MIT
|
6
6
|
Author: Ujjwal Maheshwari
|
7
|
-
Author-email:
|
7
|
+
Author-email: ujjwalm29@gmail.com
|
8
8
|
Requires-Python: >=3.9,<4.0
|
9
9
|
Classifier: License :: OSI Approved :: MIT License
|
10
10
|
Classifier: Programming Language :: Python :: 3
|
@@ -15,23 +15,28 @@ Classifier: Programming Language :: Python :: 3.12
|
|
15
15
|
Classifier: Programming Language :: Python :: 3.13
|
16
16
|
Requires-Dist: alembic (>=1.13.0,<2.0.0)
|
17
17
|
Requires-Dist: anthropic (>=0.43.0,<0.44.0)
|
18
|
+
Requires-Dist: google-genai (>=1.3.0,<2.0.0)
|
18
19
|
Requires-Dist: ipython
|
19
20
|
Requires-Dist: openai (>=1.59.0,<2.0.0)
|
20
21
|
Requires-Dist: requests (>=2.32.3,<3.0.0)
|
21
22
|
Requires-Dist: sqlalchemy (>=2.0.0,<3.0.0)
|
22
23
|
Description-Content-Type: text/markdown
|
23
24
|
|
24
|
-
# Tokenator : Track
|
25
|
+
# Tokenator : Track, analyze, compare LLM token usage and costs
|
25
26
|
|
26
27
|
Have you ever wondered :
|
27
28
|
- How many tokens does your AI agent consume?
|
28
|
-
- How much does it cost to
|
29
|
+
- How much does it cost to run a complex AI workflow with multiple LLM providers?
|
30
|
+
- Which LLM is more cost effective for my use case?
|
29
31
|
- How much money/tokens did you spend today on developing with LLMs?
|
30
32
|
|
31
|
-
Afraid not, tokenator is here! With tokenator's easy to use
|
33
|
+
Afraid not, tokenator is here! With tokenator's easy to use functions, you can start tracking LLM usage in a matter of minutes.
|
32
34
|
|
33
35
|
Get started with just 3 lines of code!
|
34
36
|
|
37
|
+
Tokenator supports the official SDKs from openai, anthropic and google-genai(the new one).
|
38
|
+
LLM providers which use the openai SDK like perplexity, deepseek and xAI are also supported.
|
39
|
+
|
35
40
|
## Installation
|
36
41
|
|
37
42
|
```bash
|
@@ -114,6 +119,10 @@ print(cost.last_hour().model_dump_json(indent=4))
|
|
114
119
|
}
|
115
120
|
```
|
116
121
|
|
122
|
+
## Cookbooks
|
123
|
+
|
124
|
+
Want more code, example use cases and ideas? Check out our amazing [cookbooks](https://github.com/ujjwalm29/tokenator/tree/main/docs/cookbooks)!
|
125
|
+
|
117
126
|
## Features
|
118
127
|
|
119
128
|
- Drop-in replacement for OpenAI, Anthropic client
|
@@ -173,6 +182,54 @@ print(usage.last_execution().model_dump_json(indent=4))
|
|
173
182
|
"""
|
174
183
|
```
|
175
184
|
|
185
|
+
### Google (Gemini - through AI studio)
|
186
|
+
|
187
|
+
```python
|
188
|
+
from google import genai
|
189
|
+
from tokenator import tokenator_gemini
|
190
|
+
|
191
|
+
gemini_client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
|
192
|
+
|
193
|
+
# Wrap it with Tokenator
|
194
|
+
client = tokenator_gemini(gemini_client)
|
195
|
+
|
196
|
+
# Use it exactly like the google-genai client
|
197
|
+
response = models.generate_content(
|
198
|
+
model="gemini-2.0-flash",
|
199
|
+
contents="hello how are you",
|
200
|
+
)
|
201
|
+
|
202
|
+
print(response)
|
203
|
+
|
204
|
+
print(usage.last_execution().model_dump_json(indent=4))
|
205
|
+
"""
|
206
|
+
{
|
207
|
+
"total_cost": 0.0001,
|
208
|
+
"total_tokens": 23,
|
209
|
+
"prompt_tokens": 10,
|
210
|
+
"completion_tokens": 13,
|
211
|
+
"providers": [
|
212
|
+
{
|
213
|
+
"total_cost": 0.0001,
|
214
|
+
"total_tokens": 23,
|
215
|
+
"prompt_tokens": 10,
|
216
|
+
"completion_tokens": 13,
|
217
|
+
"provider": "gemini",
|
218
|
+
"models": [
|
219
|
+
{
|
220
|
+
"total_cost": 0.0004,
|
221
|
+
"total_tokens": 79,
|
222
|
+
"prompt_tokens": 52,
|
223
|
+
"completion_tokens": 27,
|
224
|
+
"model": "gemini-2.0-flash"
|
225
|
+
}
|
226
|
+
]
|
227
|
+
}
|
228
|
+
]
|
229
|
+
}
|
230
|
+
"""
|
231
|
+
```
|
232
|
+
|
176
233
|
### xAI
|
177
234
|
|
178
235
|
You can use xAI models through the `openai` SDK and track usage using `provider` parameter in `tokenator`.
|
@@ -221,7 +278,7 @@ client = tokenator_openai(perplexity_client, db_path=temp_db, provider="perplexi
|
|
221
278
|
|
222
279
|
# Use it exactly like the OpenAI client but with perplexity models
|
223
280
|
response = client.chat.completions.create(
|
224
|
-
model="
|
281
|
+
model="sonar",
|
225
282
|
messages=[{"role": "user", "content": "Hello!"}]
|
226
283
|
)
|
227
284
|
|
@@ -1,14 +1,18 @@
|
|
1
|
-
# Tokenator : Track
|
1
|
+
# Tokenator : Track, analyze, compare LLM token usage and costs
|
2
2
|
|
3
3
|
Have you ever wondered :
|
4
4
|
- How many tokens does your AI agent consume?
|
5
|
-
- How much does it cost to
|
5
|
+
- How much does it cost to run a complex AI workflow with multiple LLM providers?
|
6
|
+
- Which LLM is more cost effective for my use case?
|
6
7
|
- How much money/tokens did you spend today on developing with LLMs?
|
7
8
|
|
8
|
-
Afraid not, tokenator is here! With tokenator's easy to use
|
9
|
+
Afraid not, tokenator is here! With tokenator's easy to use functions, you can start tracking LLM usage in a matter of minutes.
|
9
10
|
|
10
11
|
Get started with just 3 lines of code!
|
11
12
|
|
13
|
+
Tokenator supports the official SDKs from openai, anthropic and google-genai(the new one).
|
14
|
+
LLM providers which use the openai SDK like perplexity, deepseek and xAI are also supported.
|
15
|
+
|
12
16
|
## Installation
|
13
17
|
|
14
18
|
```bash
|
@@ -91,6 +95,10 @@ print(cost.last_hour().model_dump_json(indent=4))
|
|
91
95
|
}
|
92
96
|
```
|
93
97
|
|
98
|
+
## Cookbooks
|
99
|
+
|
100
|
+
Want more code, example use cases and ideas? Check out our amazing [cookbooks](https://github.com/ujjwalm29/tokenator/tree/main/docs/cookbooks)!
|
101
|
+
|
94
102
|
## Features
|
95
103
|
|
96
104
|
- Drop-in replacement for OpenAI, Anthropic client
|
@@ -150,6 +158,54 @@ print(usage.last_execution().model_dump_json(indent=4))
|
|
150
158
|
"""
|
151
159
|
```
|
152
160
|
|
161
|
+
### Google (Gemini - through AI studio)
|
162
|
+
|
163
|
+
```python
|
164
|
+
from google import genai
|
165
|
+
from tokenator import tokenator_gemini
|
166
|
+
|
167
|
+
gemini_client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
|
168
|
+
|
169
|
+
# Wrap it with Tokenator
|
170
|
+
client = tokenator_gemini(gemini_client)
|
171
|
+
|
172
|
+
# Use it exactly like the google-genai client
|
173
|
+
response = models.generate_content(
|
174
|
+
model="gemini-2.0-flash",
|
175
|
+
contents="hello how are you",
|
176
|
+
)
|
177
|
+
|
178
|
+
print(response)
|
179
|
+
|
180
|
+
print(usage.last_execution().model_dump_json(indent=4))
|
181
|
+
"""
|
182
|
+
{
|
183
|
+
"total_cost": 0.0001,
|
184
|
+
"total_tokens": 23,
|
185
|
+
"prompt_tokens": 10,
|
186
|
+
"completion_tokens": 13,
|
187
|
+
"providers": [
|
188
|
+
{
|
189
|
+
"total_cost": 0.0001,
|
190
|
+
"total_tokens": 23,
|
191
|
+
"prompt_tokens": 10,
|
192
|
+
"completion_tokens": 13,
|
193
|
+
"provider": "gemini",
|
194
|
+
"models": [
|
195
|
+
{
|
196
|
+
"total_cost": 0.0004,
|
197
|
+
"total_tokens": 79,
|
198
|
+
"prompt_tokens": 52,
|
199
|
+
"completion_tokens": 27,
|
200
|
+
"model": "gemini-2.0-flash"
|
201
|
+
}
|
202
|
+
]
|
203
|
+
}
|
204
|
+
]
|
205
|
+
}
|
206
|
+
"""
|
207
|
+
```
|
208
|
+
|
153
209
|
### xAI
|
154
210
|
|
155
211
|
You can use xAI models through the `openai` SDK and track usage using `provider` parameter in `tokenator`.
|
@@ -198,7 +254,7 @@ client = tokenator_openai(perplexity_client, db_path=temp_db, provider="perplexi
|
|
198
254
|
|
199
255
|
# Use it exactly like the OpenAI client but with perplexity models
|
200
256
|
response = client.chat.completions.create(
|
201
|
-
model="
|
257
|
+
model="sonar",
|
202
258
|
messages=[{"role": "user", "content": "Hello!"}]
|
203
259
|
)
|
204
260
|
|
@@ -1,8 +1,8 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "tokenator"
|
3
|
-
version = "0.
|
3
|
+
version = "0.2.0"
|
4
4
|
description = "Token usage tracking wrapper for LLMs"
|
5
|
-
authors = ["Ujjwal Maheshwari <
|
5
|
+
authors = ["Ujjwal Maheshwari <ujjwalm29@gmail.com>"]
|
6
6
|
readme = "README.md"
|
7
7
|
license = "MIT"
|
8
8
|
packages = [{include = "tokenator", from = "src"}]
|
@@ -15,12 +15,15 @@ requests = "^2.32.3"
|
|
15
15
|
alembic = "^1.13.0"
|
16
16
|
anthropic = "^0.43.0"
|
17
17
|
ipython = "*"
|
18
|
+
google-genai = "^1.3.0"
|
18
19
|
|
19
20
|
[tool.poetry.group.dev.dependencies]
|
20
21
|
pytest = "^8.0.0"
|
21
22
|
pytest-asyncio = "^0.23.0"
|
22
23
|
pytest-cov = "^4.1.0"
|
23
24
|
ruff = "^0.8.4"
|
25
|
+
langsmith = "^0.3.0"
|
26
|
+
python-dotenv = "^1.0.1"
|
24
27
|
|
25
28
|
[build-system]
|
26
29
|
requires = ["poetry-core"]
|
@@ -3,11 +3,18 @@
|
|
3
3
|
import logging
|
4
4
|
from .openai.client_openai import tokenator_openai
|
5
5
|
from .anthropic.client_anthropic import tokenator_anthropic
|
6
|
+
from .gemini.client_gemini import tokenator_gemini
|
6
7
|
from . import usage
|
7
8
|
from .utils import get_default_db_path
|
8
9
|
from .usage import TokenUsageService
|
9
10
|
|
10
11
|
usage = TokenUsageService() # noqa: F811
|
11
|
-
__all__ = [
|
12
|
+
__all__ = [
|
13
|
+
"tokenator_openai",
|
14
|
+
"tokenator_anthropic",
|
15
|
+
"tokenator_gemini",
|
16
|
+
"usage",
|
17
|
+
"get_default_db_path",
|
18
|
+
]
|
12
19
|
|
13
20
|
logger = logging.getLogger(__name__)
|
@@ -112,7 +112,10 @@ class BaseWrapper:
|
|
112
112
|
try:
|
113
113
|
self._log_usage_impl(token_usage_stats, session, execution_id)
|
114
114
|
session.commit()
|
115
|
-
logger.debug(
|
115
|
+
logger.debug(
|
116
|
+
"Successfully committed token usage for execution_id: %s",
|
117
|
+
execution_id,
|
118
|
+
)
|
116
119
|
except Exception as e:
|
117
120
|
logger.error("Failed to log token usage: %s", str(e))
|
118
121
|
session.rollback()
|
@@ -0,0 +1,230 @@
|
|
1
|
+
"""Gemini client wrapper with token usage tracking."""
|
2
|
+
|
3
|
+
from typing import Any, Optional, Iterator, AsyncIterator
|
4
|
+
import logging
|
5
|
+
|
6
|
+
from google import genai
|
7
|
+
from google.genai.types import GenerateContentResponse
|
8
|
+
|
9
|
+
from ..models import (
|
10
|
+
TokenMetrics,
|
11
|
+
TokenUsageStats,
|
12
|
+
)
|
13
|
+
from ..base_wrapper import BaseWrapper, ResponseType
|
14
|
+
from .stream_interceptors import (
|
15
|
+
GeminiAsyncStreamInterceptor,
|
16
|
+
GeminiSyncStreamInterceptor,
|
17
|
+
)
|
18
|
+
from ..state import is_tokenator_enabled
|
19
|
+
|
20
|
+
logger = logging.getLogger(__name__)
|
21
|
+
|
22
|
+
|
23
|
+
def _create_usage_callback(execution_id, log_usage_fn):
|
24
|
+
"""Creates a callback function for processing usage statistics from stream chunks."""
|
25
|
+
|
26
|
+
def usage_callback(chunks):
|
27
|
+
if not chunks:
|
28
|
+
return
|
29
|
+
|
30
|
+
# Skip if tokenator is disabled
|
31
|
+
if not is_tokenator_enabled:
|
32
|
+
logger.debug("Tokenator is disabled - skipping stream usage logging")
|
33
|
+
return
|
34
|
+
|
35
|
+
logger.debug("Processing stream usage for execution_id: %s", execution_id)
|
36
|
+
|
37
|
+
# Build usage_data from the first chunk's model
|
38
|
+
usage_data = TokenUsageStats(
|
39
|
+
model=chunks[0].model_version,
|
40
|
+
usage=TokenMetrics(),
|
41
|
+
)
|
42
|
+
|
43
|
+
# Only take usage from the last chunk as it contains complete usage info
|
44
|
+
last_chunk = chunks[-1]
|
45
|
+
if last_chunk.usage_metadata:
|
46
|
+
usage_data.usage.prompt_tokens = (
|
47
|
+
last_chunk.usage_metadata.prompt_token_count
|
48
|
+
)
|
49
|
+
usage_data.usage.completion_tokens = (
|
50
|
+
last_chunk.usage_metadata.candidates_token_count or 0
|
51
|
+
)
|
52
|
+
usage_data.usage.total_tokens = last_chunk.usage_metadata.total_token_count
|
53
|
+
log_usage_fn(usage_data, execution_id=execution_id)
|
54
|
+
|
55
|
+
return usage_callback
|
56
|
+
|
57
|
+
|
58
|
+
class BaseGeminiWrapper(BaseWrapper):
|
59
|
+
def __init__(self, client, db_path=None, provider: str = "gemini"):
|
60
|
+
super().__init__(client, db_path)
|
61
|
+
self.provider = provider
|
62
|
+
self._async_wrapper = None
|
63
|
+
|
64
|
+
def _process_response_usage(
|
65
|
+
self, response: ResponseType
|
66
|
+
) -> Optional[TokenUsageStats]:
|
67
|
+
"""Process and log usage statistics from a response."""
|
68
|
+
try:
|
69
|
+
if isinstance(response, GenerateContentResponse):
|
70
|
+
if response.usage_metadata is None:
|
71
|
+
return None
|
72
|
+
usage = TokenMetrics(
|
73
|
+
prompt_tokens=response.usage_metadata.prompt_token_count,
|
74
|
+
completion_tokens=response.usage_metadata.candidates_token_count,
|
75
|
+
total_tokens=response.usage_metadata.total_token_count,
|
76
|
+
)
|
77
|
+
return TokenUsageStats(model=response.model_version, usage=usage)
|
78
|
+
|
79
|
+
elif isinstance(response, dict):
|
80
|
+
usage_dict = response.get("usage_metadata")
|
81
|
+
if not usage_dict:
|
82
|
+
return None
|
83
|
+
usage = TokenMetrics(
|
84
|
+
prompt_tokens=usage_dict.get("prompt_token_count", 0),
|
85
|
+
completion_tokens=usage_dict.get("candidates_token_count", 0),
|
86
|
+
total_tokens=usage_dict.get("total_token_count", 0),
|
87
|
+
)
|
88
|
+
return TokenUsageStats(
|
89
|
+
model=response.get("model", "unknown"), usage=usage
|
90
|
+
)
|
91
|
+
except Exception as e:
|
92
|
+
logger.warning("Failed to process usage stats: %s", str(e))
|
93
|
+
return None
|
94
|
+
return None
|
95
|
+
|
96
|
+
@property
|
97
|
+
def chat(self):
|
98
|
+
return self
|
99
|
+
|
100
|
+
@property
|
101
|
+
def chats(self):
|
102
|
+
return self
|
103
|
+
|
104
|
+
@property
|
105
|
+
def models(self):
|
106
|
+
return self
|
107
|
+
|
108
|
+
@property
|
109
|
+
def aio(self):
|
110
|
+
if self._async_wrapper is None:
|
111
|
+
self._async_wrapper = AsyncGeminiWrapper(self)
|
112
|
+
return self._async_wrapper
|
113
|
+
|
114
|
+
def count_tokens(self, *args: Any, **kwargs: Any):
|
115
|
+
return self.client.models.count_tokens(*args, **kwargs)
|
116
|
+
|
117
|
+
|
118
|
+
class AsyncGeminiWrapper:
|
119
|
+
"""Async wrapper for Gemini client to match the official SDK structure."""
|
120
|
+
|
121
|
+
def __init__(self, wrapper: BaseGeminiWrapper):
|
122
|
+
self.wrapper = wrapper
|
123
|
+
self._models = None
|
124
|
+
|
125
|
+
@property
|
126
|
+
def models(self):
|
127
|
+
if self._models is None:
|
128
|
+
self._models = AsyncModelsWrapper(self.wrapper)
|
129
|
+
return self._models
|
130
|
+
|
131
|
+
|
132
|
+
class AsyncModelsWrapper:
|
133
|
+
"""Async wrapper for models to match the official SDK structure."""
|
134
|
+
|
135
|
+
def __init__(self, wrapper: BaseGeminiWrapper):
|
136
|
+
self.wrapper = wrapper
|
137
|
+
|
138
|
+
async def generate_content(
|
139
|
+
self, *args: Any, **kwargs: Any
|
140
|
+
) -> GenerateContentResponse:
|
141
|
+
"""Async method for generate_content."""
|
142
|
+
execution_id = kwargs.pop("execution_id", None)
|
143
|
+
return await self.wrapper.generate_content_async(
|
144
|
+
*args, execution_id=execution_id, **kwargs
|
145
|
+
)
|
146
|
+
|
147
|
+
async def generate_content_stream(
|
148
|
+
self, *args: Any, **kwargs: Any
|
149
|
+
) -> AsyncIterator[GenerateContentResponse]:
|
150
|
+
"""Async method for generate_content_stream."""
|
151
|
+
execution_id = kwargs.pop("execution_id", None)
|
152
|
+
return await self.wrapper.generate_content_stream_async(
|
153
|
+
*args, execution_id=execution_id, **kwargs
|
154
|
+
)
|
155
|
+
|
156
|
+
|
157
|
+
class GeminiWrapper(BaseGeminiWrapper):
|
158
|
+
def generate_content(
|
159
|
+
self, *args: Any, execution_id: Optional[str] = None, **kwargs: Any
|
160
|
+
) -> GenerateContentResponse:
|
161
|
+
"""Generate content and log token usage."""
|
162
|
+
logger.debug("Generating content with args: %s, kwargs: %s", args, kwargs)
|
163
|
+
|
164
|
+
response = self.client.models.generate_content(*args, **kwargs)
|
165
|
+
usage_data = self._process_response_usage(response)
|
166
|
+
if usage_data:
|
167
|
+
self._log_usage(usage_data, execution_id=execution_id)
|
168
|
+
|
169
|
+
return response
|
170
|
+
|
171
|
+
def generate_content_stream(
|
172
|
+
self, *args: Any, execution_id: Optional[str] = None, **kwargs: Any
|
173
|
+
) -> Iterator[GenerateContentResponse]:
|
174
|
+
"""Generate content with streaming and log token usage."""
|
175
|
+
logger.debug(
|
176
|
+
"Generating content stream with args: %s, kwargs: %s", args, kwargs
|
177
|
+
)
|
178
|
+
|
179
|
+
base_stream = self.client.models.generate_content_stream(*args, **kwargs)
|
180
|
+
return GeminiSyncStreamInterceptor(
|
181
|
+
base_stream=base_stream,
|
182
|
+
usage_callback=_create_usage_callback(execution_id, self._log_usage),
|
183
|
+
)
|
184
|
+
|
185
|
+
async def generate_content_async(
|
186
|
+
self, *args: Any, execution_id: Optional[str] = None, **kwargs: Any
|
187
|
+
) -> GenerateContentResponse:
|
188
|
+
"""Generate content asynchronously and log token usage."""
|
189
|
+
logger.debug("Generating content async with args: %s, kwargs: %s", args, kwargs)
|
190
|
+
|
191
|
+
response = await self.client.aio.models.generate_content(*args, **kwargs)
|
192
|
+
usage_data = self._process_response_usage(response)
|
193
|
+
if usage_data:
|
194
|
+
self._log_usage(usage_data, execution_id=execution_id)
|
195
|
+
|
196
|
+
return response
|
197
|
+
|
198
|
+
async def generate_content_stream_async(
|
199
|
+
self, *args: Any, execution_id: Optional[str] = None, **kwargs: Any
|
200
|
+
) -> AsyncIterator[GenerateContentResponse]:
|
201
|
+
"""Generate content with async streaming and log token usage."""
|
202
|
+
logger.debug(
|
203
|
+
"Generating content stream async with args: %s, kwargs: %s", args, kwargs
|
204
|
+
)
|
205
|
+
|
206
|
+
base_stream = await self.client.aio.models.generate_content_stream(
|
207
|
+
*args, **kwargs
|
208
|
+
)
|
209
|
+
return GeminiAsyncStreamInterceptor(
|
210
|
+
base_stream=base_stream,
|
211
|
+
usage_callback=_create_usage_callback(execution_id, self._log_usage),
|
212
|
+
)
|
213
|
+
|
214
|
+
|
215
|
+
def tokenator_gemini(
|
216
|
+
client: genai.Client,
|
217
|
+
db_path: Optional[str] = None,
|
218
|
+
provider: str = "gemini",
|
219
|
+
) -> GeminiWrapper:
|
220
|
+
"""Create a token-tracking wrapper for a Gemini client.
|
221
|
+
|
222
|
+
Args:
|
223
|
+
client: Gemini client instance
|
224
|
+
db_path: Optional path to SQLite database for token tracking
|
225
|
+
provider: Provider name, defaults to "gemini"
|
226
|
+
"""
|
227
|
+
if not isinstance(client, genai.Client):
|
228
|
+
raise ValueError("Client must be an instance of genai.Client")
|
229
|
+
|
230
|
+
return GeminiWrapper(client=client, db_path=db_path, provider=provider)
|
@@ -0,0 +1,77 @@
|
|
1
|
+
"""Stream interceptors for Gemini responses."""
|
2
|
+
|
3
|
+
import logging
|
4
|
+
from typing import AsyncIterator, Callable, List, Optional, TypeVar, Iterator
|
5
|
+
|
6
|
+
|
7
|
+
logger = logging.getLogger(__name__)
|
8
|
+
|
9
|
+
_T = TypeVar("_T") # GenerateContentResponse
|
10
|
+
|
11
|
+
|
12
|
+
class GeminiAsyncStreamInterceptor(AsyncIterator[_T]):
|
13
|
+
"""
|
14
|
+
A wrapper around Gemini async stream that intercepts each chunk to handle usage or
|
15
|
+
logging logic.
|
16
|
+
"""
|
17
|
+
|
18
|
+
def __init__(
|
19
|
+
self,
|
20
|
+
base_stream: AsyncIterator[_T],
|
21
|
+
usage_callback: Optional[Callable[[List[_T]], None]] = None,
|
22
|
+
):
|
23
|
+
self._base_stream = base_stream
|
24
|
+
self._usage_callback = usage_callback
|
25
|
+
self._chunks: List[_T] = []
|
26
|
+
|
27
|
+
def __aiter__(self) -> AsyncIterator[_T]:
|
28
|
+
"""Return self as async iterator."""
|
29
|
+
return self
|
30
|
+
|
31
|
+
async def __anext__(self) -> _T:
|
32
|
+
"""Get next chunk and track it."""
|
33
|
+
try:
|
34
|
+
chunk = await self._base_stream.__anext__()
|
35
|
+
except StopAsyncIteration:
|
36
|
+
# Once the base stream is fully consumed, we can do final usage/logging.
|
37
|
+
if self._usage_callback and self._chunks:
|
38
|
+
self._usage_callback(self._chunks)
|
39
|
+
raise
|
40
|
+
|
41
|
+
# Intercept each chunk
|
42
|
+
self._chunks.append(chunk)
|
43
|
+
return chunk
|
44
|
+
|
45
|
+
|
46
|
+
class GeminiSyncStreamInterceptor(Iterator[_T]):
|
47
|
+
"""
|
48
|
+
A wrapper around Gemini sync stream that intercepts each chunk to handle usage or
|
49
|
+
logging logic.
|
50
|
+
"""
|
51
|
+
|
52
|
+
def __init__(
|
53
|
+
self,
|
54
|
+
base_stream: Iterator[_T],
|
55
|
+
usage_callback: Optional[Callable[[List[_T]], None]] = None,
|
56
|
+
):
|
57
|
+
self._base_stream = base_stream
|
58
|
+
self._usage_callback = usage_callback
|
59
|
+
self._chunks: List[_T] = []
|
60
|
+
|
61
|
+
def __iter__(self) -> Iterator[_T]:
|
62
|
+
"""Return self as iterator."""
|
63
|
+
return self
|
64
|
+
|
65
|
+
def __next__(self) -> _T:
|
66
|
+
"""Get next chunk and track it."""
|
67
|
+
try:
|
68
|
+
chunk = next(self._base_stream)
|
69
|
+
except StopIteration:
|
70
|
+
# Once the base stream is fully consumed, we can do final usage/logging.
|
71
|
+
if self._usage_callback and self._chunks:
|
72
|
+
self._usage_callback(self._chunks)
|
73
|
+
raise
|
74
|
+
|
75
|
+
# Intercept each chunk
|
76
|
+
self._chunks.append(chunk)
|
77
|
+
return chunk
|