tokenator 0.1.8__tar.gz → 0.1.10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {tokenator-0.1.8 → tokenator-0.1.10}/PKG-INFO +40 -13
- {tokenator-0.1.8 → tokenator-0.1.10}/README.md +39 -12
- {tokenator-0.1.8 → tokenator-0.1.10}/pyproject.toml +4 -3
- {tokenator-0.1.8 → tokenator-0.1.10}/src/tokenator/__init__.py +3 -3
- tokenator-0.1.10/src/tokenator/anthropic/client_anthropic.py +155 -0
- tokenator-0.1.10/src/tokenator/anthropic/stream_interceptors.py +146 -0
- {tokenator-0.1.8 → tokenator-0.1.10}/src/tokenator/base_wrapper.py +26 -13
- {tokenator-0.1.8 → tokenator-0.1.10}/src/tokenator/create_migrations.py +6 -5
- {tokenator-0.1.8 → tokenator-0.1.10}/src/tokenator/migrations/env.py +5 -4
- tokenator-0.1.10/src/tokenator/migrations/versions/f6f1f2437513_initial_migration.py +51 -0
- {tokenator-0.1.8 → tokenator-0.1.10}/src/tokenator/migrations.py +9 -6
- {tokenator-0.1.8 → tokenator-0.1.10}/src/tokenator/models.py +15 -4
- tokenator-0.1.10/src/tokenator/openai/client_openai.py +163 -0
- tokenator-0.1.10/src/tokenator/openai/stream_interceptors.py +146 -0
- {tokenator-0.1.8 → tokenator-0.1.10}/src/tokenator/schemas.py +26 -27
- {tokenator-0.1.8 → tokenator-0.1.10}/src/tokenator/usage.py +114 -47
- {tokenator-0.1.8 → tokenator-0.1.10}/src/tokenator/utils.py +14 -9
- tokenator-0.1.8/src/tokenator/client_anthropic.py +0 -148
- tokenator-0.1.8/src/tokenator/client_openai.py +0 -151
- tokenator-0.1.8/src/tokenator/migrations/versions/f6f1f2437513_initial_migration.py +0 -49
- {tokenator-0.1.8 → tokenator-0.1.10}/LICENSE +0 -0
- {tokenator-0.1.8 → tokenator-0.1.10}/src/tokenator/migrations/script.py.mako +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: tokenator
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.10
|
4
4
|
Summary: Token usage tracking wrapper for LLMs
|
5
5
|
License: MIT
|
6
6
|
Author: Ujjwal Maheshwari
|
@@ -27,7 +27,7 @@ Have you ever wondered about :
|
|
27
27
|
- How much does it cost to do run a complex AI workflow with multiple LLM providers?
|
28
28
|
- How much money did I spent today on development?
|
29
29
|
|
30
|
-
Afraid not, tokenator is here! With tokenator's easy to use API, you can start tracking LLM usage in a matter of minutes
|
30
|
+
Afraid not, tokenator is here! With tokenator's easy to use API, you can start tracking LLM usage in a matter of minutes.
|
31
31
|
|
32
32
|
Get started with just 3 lines of code!
|
33
33
|
|
@@ -60,27 +60,54 @@ response = client.chat.completions.create(
|
|
60
60
|
### Cost Analysis
|
61
61
|
|
62
62
|
```python
|
63
|
-
from tokenator import
|
63
|
+
from tokenator import usage
|
64
64
|
|
65
65
|
# Get usage for different time periods
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
66
|
+
usage.last_hour()
|
67
|
+
usage.last_day()
|
68
|
+
usage.last_week()
|
69
|
+
usage.last_month()
|
70
70
|
|
71
71
|
# Custom date range
|
72
|
-
|
72
|
+
usage.between("2024-03-01", "2024-03-15")
|
73
73
|
|
74
74
|
# Get usage for different LLM providers
|
75
|
-
|
76
|
-
|
77
|
-
|
75
|
+
usage.last_day("openai")
|
76
|
+
usage.last_day("anthropic")
|
77
|
+
usage.last_day("google")
|
78
78
|
```
|
79
79
|
|
80
|
-
### Example `
|
80
|
+
### Example `usage` object
|
81
81
|
|
82
|
-
```
|
82
|
+
```python
|
83
|
+
print(cost.last_hour().model_dump_json(indent=4))
|
84
|
+
```
|
83
85
|
|
86
|
+
```json
|
87
|
+
{
|
88
|
+
"total_cost": 0.0004,
|
89
|
+
"total_tokens": 79,
|
90
|
+
"prompt_tokens": 52,
|
91
|
+
"completion_tokens": 27,
|
92
|
+
"providers": [
|
93
|
+
{
|
94
|
+
"total_cost": 0.0004,
|
95
|
+
"total_tokens": 79,
|
96
|
+
"prompt_tokens": 52,
|
97
|
+
"completion_tokens": 27,
|
98
|
+
"provider": "openai",
|
99
|
+
"models": [
|
100
|
+
{
|
101
|
+
"total_cost": 0.0004,
|
102
|
+
"total_tokens": 79,
|
103
|
+
"prompt_tokens": 52,
|
104
|
+
"completion_tokens": 27,
|
105
|
+
"model": "gpt-4o-2024-08-06"
|
106
|
+
}
|
107
|
+
]
|
108
|
+
}
|
109
|
+
]
|
110
|
+
}
|
84
111
|
```
|
85
112
|
|
86
113
|
## Features
|
@@ -5,7 +5,7 @@ Have you ever wondered about :
|
|
5
5
|
- How much does it cost to do run a complex AI workflow with multiple LLM providers?
|
6
6
|
- How much money did I spent today on development?
|
7
7
|
|
8
|
-
Afraid not, tokenator is here! With tokenator's easy to use API, you can start tracking LLM usage in a matter of minutes
|
8
|
+
Afraid not, tokenator is here! With tokenator's easy to use API, you can start tracking LLM usage in a matter of minutes.
|
9
9
|
|
10
10
|
Get started with just 3 lines of code!
|
11
11
|
|
@@ -38,27 +38,54 @@ response = client.chat.completions.create(
|
|
38
38
|
### Cost Analysis
|
39
39
|
|
40
40
|
```python
|
41
|
-
from tokenator import
|
41
|
+
from tokenator import usage
|
42
42
|
|
43
43
|
# Get usage for different time periods
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
44
|
+
usage.last_hour()
|
45
|
+
usage.last_day()
|
46
|
+
usage.last_week()
|
47
|
+
usage.last_month()
|
48
48
|
|
49
49
|
# Custom date range
|
50
|
-
|
50
|
+
usage.between("2024-03-01", "2024-03-15")
|
51
51
|
|
52
52
|
# Get usage for different LLM providers
|
53
|
-
|
54
|
-
|
55
|
-
|
53
|
+
usage.last_day("openai")
|
54
|
+
usage.last_day("anthropic")
|
55
|
+
usage.last_day("google")
|
56
56
|
```
|
57
57
|
|
58
|
-
### Example `
|
58
|
+
### Example `usage` object
|
59
59
|
|
60
|
-
```
|
60
|
+
```python
|
61
|
+
print(cost.last_hour().model_dump_json(indent=4))
|
62
|
+
```
|
61
63
|
|
64
|
+
```json
|
65
|
+
{
|
66
|
+
"total_cost": 0.0004,
|
67
|
+
"total_tokens": 79,
|
68
|
+
"prompt_tokens": 52,
|
69
|
+
"completion_tokens": 27,
|
70
|
+
"providers": [
|
71
|
+
{
|
72
|
+
"total_cost": 0.0004,
|
73
|
+
"total_tokens": 79,
|
74
|
+
"prompt_tokens": 52,
|
75
|
+
"completion_tokens": 27,
|
76
|
+
"provider": "openai",
|
77
|
+
"models": [
|
78
|
+
{
|
79
|
+
"total_cost": 0.0004,
|
80
|
+
"total_tokens": 79,
|
81
|
+
"prompt_tokens": 52,
|
82
|
+
"completion_tokens": 27,
|
83
|
+
"model": "gpt-4o-2024-08-06"
|
84
|
+
}
|
85
|
+
]
|
86
|
+
}
|
87
|
+
]
|
88
|
+
}
|
62
89
|
```
|
63
90
|
|
64
91
|
## Features
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "tokenator"
|
3
|
-
version = "0.1.
|
3
|
+
version = "0.1.10"
|
4
4
|
description = "Token usage tracking wrapper for LLMs"
|
5
5
|
authors = ["Ujjwal Maheshwari <your.email@example.com>"]
|
6
6
|
readme = "README.md"
|
@@ -19,11 +19,12 @@ anthropic = "^0.40.0"
|
|
19
19
|
pytest = "^8.0.0"
|
20
20
|
pytest-asyncio = "^0.23.0"
|
21
21
|
pytest-cov = "^4.1.0"
|
22
|
+
ruff = "^0.8.4"
|
22
23
|
|
23
24
|
[build-system]
|
24
25
|
requires = ["poetry-core"]
|
25
26
|
build-backend = "poetry.core.masonry.api"
|
26
27
|
|
27
28
|
[tool.pytest.ini_options]
|
28
|
-
|
29
|
-
|
29
|
+
testpaths = ["tests"]
|
30
|
+
pythonpath = "src"
|
@@ -1,8 +1,8 @@
|
|
1
1
|
"""Tokenator - Track and analyze your OpenAI API token usage and costs."""
|
2
2
|
|
3
3
|
import logging
|
4
|
-
from .client_openai import tokenator_openai
|
5
|
-
from .client_anthropic import tokenator_anthropic
|
4
|
+
from .openai.client_openai import tokenator_openai
|
5
|
+
from .anthropic.client_anthropic import tokenator_anthropic
|
6
6
|
from . import usage
|
7
7
|
from .utils import get_default_db_path
|
8
8
|
from .migrations import check_and_run_migrations
|
@@ -15,4 +15,4 @@ logger = logging.getLogger(__name__)
|
|
15
15
|
try:
|
16
16
|
check_and_run_migrations()
|
17
17
|
except Exception as e:
|
18
|
-
logger.warning(f"Failed to run migrations, but continuing anyway: {e}")
|
18
|
+
logger.warning(f"Failed to run migrations, but continuing anyway: {e}")
|
@@ -0,0 +1,155 @@
|
|
1
|
+
"""Anthropic client wrapper with token usage tracking."""
|
2
|
+
|
3
|
+
from typing import Any, Optional, Union, overload, Iterator, AsyncIterator
|
4
|
+
import logging
|
5
|
+
|
6
|
+
from anthropic import Anthropic, AsyncAnthropic
|
7
|
+
from anthropic.types import Message, RawMessageStartEvent, RawMessageDeltaEvent
|
8
|
+
|
9
|
+
from ..models import Usage, TokenUsageStats
|
10
|
+
from ..base_wrapper import BaseWrapper, ResponseType
|
11
|
+
from .stream_interceptors import AnthropicAsyncStreamInterceptor, AnthropicSyncStreamInterceptor
|
12
|
+
|
13
|
+
logger = logging.getLogger(__name__)
|
14
|
+
|
15
|
+
|
16
|
+
class BaseAnthropicWrapper(BaseWrapper):
|
17
|
+
provider = "anthropic"
|
18
|
+
|
19
|
+
def _process_response_usage(
|
20
|
+
self, response: ResponseType
|
21
|
+
) -> Optional[TokenUsageStats]:
|
22
|
+
"""Process and log usage statistics from a response."""
|
23
|
+
try:
|
24
|
+
if isinstance(response, Message):
|
25
|
+
if not hasattr(response, "usage"):
|
26
|
+
return None
|
27
|
+
usage = Usage(
|
28
|
+
prompt_tokens=response.usage.input_tokens,
|
29
|
+
completion_tokens=response.usage.output_tokens,
|
30
|
+
total_tokens=response.usage.input_tokens
|
31
|
+
+ response.usage.output_tokens,
|
32
|
+
)
|
33
|
+
return TokenUsageStats(model=response.model, usage=usage)
|
34
|
+
elif isinstance(response, dict):
|
35
|
+
usage_dict = response.get("usage")
|
36
|
+
if not usage_dict:
|
37
|
+
return None
|
38
|
+
usage = Usage(
|
39
|
+
prompt_tokens=usage_dict.get("input_tokens", 0),
|
40
|
+
completion_tokens=usage_dict.get("output_tokens", 0),
|
41
|
+
total_tokens=usage_dict.get("input_tokens", 0)
|
42
|
+
+ usage_dict.get("output_tokens", 0),
|
43
|
+
)
|
44
|
+
return TokenUsageStats(
|
45
|
+
model=response.get("model", "unknown"), usage=usage
|
46
|
+
)
|
47
|
+
except Exception as e:
|
48
|
+
logger.warning("Failed to process usage stats: %s", str(e))
|
49
|
+
return None
|
50
|
+
return None
|
51
|
+
|
52
|
+
@property
|
53
|
+
def messages(self):
|
54
|
+
return self
|
55
|
+
|
56
|
+
|
57
|
+
def _create_usage_callback(execution_id, log_usage_fn):
|
58
|
+
"""Creates a callback function for processing usage statistics from stream chunks."""
|
59
|
+
def usage_callback(chunks):
|
60
|
+
if not chunks:
|
61
|
+
return
|
62
|
+
|
63
|
+
usage_data = TokenUsageStats(
|
64
|
+
model=chunks[0].message.model if isinstance(chunks[0], RawMessageStartEvent) else "",
|
65
|
+
usage=Usage(),
|
66
|
+
)
|
67
|
+
|
68
|
+
for chunk in chunks:
|
69
|
+
if isinstance(chunk, RawMessageStartEvent):
|
70
|
+
usage_data.model = chunk.message.model
|
71
|
+
usage_data.usage.prompt_tokens += chunk.message.usage.input_tokens
|
72
|
+
usage_data.usage.completion_tokens += chunk.message.usage.output_tokens
|
73
|
+
elif isinstance(chunk, RawMessageDeltaEvent):
|
74
|
+
usage_data.usage.prompt_tokens += chunk.usage.input_tokens
|
75
|
+
usage_data.usage.completion_tokens += chunk.usage.output_tokens
|
76
|
+
|
77
|
+
usage_data.usage.total_tokens = usage_data.usage.prompt_tokens + usage_data.usage.completion_tokens
|
78
|
+
log_usage_fn(usage_data, execution_id=execution_id)
|
79
|
+
|
80
|
+
return usage_callback
|
81
|
+
|
82
|
+
|
83
|
+
class AnthropicWrapper(BaseAnthropicWrapper):
|
84
|
+
def create(
|
85
|
+
self, *args: Any, execution_id: Optional[str] = None, **kwargs: Any
|
86
|
+
) -> Union[Message, Iterator[Message]]:
|
87
|
+
"""Create a message completion and log token usage."""
|
88
|
+
logger.debug("Creating message completion with args: %s, kwargs: %s", args, kwargs)
|
89
|
+
|
90
|
+
if kwargs.get("stream", False):
|
91
|
+
base_stream = self.client.messages.create(*args, **kwargs)
|
92
|
+
return AnthropicSyncStreamInterceptor(
|
93
|
+
base_stream=base_stream,
|
94
|
+
usage_callback=_create_usage_callback(execution_id, self._log_usage),
|
95
|
+
)
|
96
|
+
|
97
|
+
response = self.client.messages.create(*args, **kwargs)
|
98
|
+
usage_data = self._process_response_usage(response)
|
99
|
+
if usage_data:
|
100
|
+
self._log_usage(usage_data, execution_id=execution_id)
|
101
|
+
return response
|
102
|
+
|
103
|
+
|
104
|
+
class AsyncAnthropicWrapper(BaseAnthropicWrapper):
|
105
|
+
async def create(
|
106
|
+
self, *args: Any, execution_id: Optional[str] = None, **kwargs: Any
|
107
|
+
) -> Union[Message, AsyncIterator[Message]]:
|
108
|
+
"""Create a message completion and log token usage."""
|
109
|
+
logger.debug("Creating message completion with args: %s, kwargs: %s", args, kwargs)
|
110
|
+
|
111
|
+
if kwargs.get("stream", False):
|
112
|
+
base_stream = await self.client.messages.create(*args, **kwargs)
|
113
|
+
return AnthropicAsyncStreamInterceptor(
|
114
|
+
base_stream=base_stream,
|
115
|
+
usage_callback=_create_usage_callback(execution_id, self._log_usage),
|
116
|
+
)
|
117
|
+
|
118
|
+
response = await self.client.messages.create(*args, **kwargs)
|
119
|
+
usage_data = self._process_response_usage(response)
|
120
|
+
if usage_data:
|
121
|
+
self._log_usage(usage_data, execution_id=execution_id)
|
122
|
+
return response
|
123
|
+
|
124
|
+
|
125
|
+
@overload
|
126
|
+
def tokenator_anthropic(
|
127
|
+
client: Anthropic,
|
128
|
+
db_path: Optional[str] = None,
|
129
|
+
) -> AnthropicWrapper: ...
|
130
|
+
|
131
|
+
|
132
|
+
@overload
|
133
|
+
def tokenator_anthropic(
|
134
|
+
client: AsyncAnthropic,
|
135
|
+
db_path: Optional[str] = None,
|
136
|
+
) -> AsyncAnthropicWrapper: ...
|
137
|
+
|
138
|
+
|
139
|
+
def tokenator_anthropic(
|
140
|
+
client: Union[Anthropic, AsyncAnthropic],
|
141
|
+
db_path: Optional[str] = None,
|
142
|
+
) -> Union[AnthropicWrapper, AsyncAnthropicWrapper]:
|
143
|
+
"""Create a token-tracking wrapper for an Anthropic client.
|
144
|
+
|
145
|
+
Args:
|
146
|
+
client: Anthropic or AsyncAnthropic client instance
|
147
|
+
db_path: Optional path to SQLite database for token tracking
|
148
|
+
"""
|
149
|
+
if isinstance(client, Anthropic):
|
150
|
+
return AnthropicWrapper(client=client, db_path=db_path)
|
151
|
+
|
152
|
+
if isinstance(client, AsyncAnthropic):
|
153
|
+
return AsyncAnthropicWrapper(client=client, db_path=db_path)
|
154
|
+
|
155
|
+
raise ValueError("Client must be an instance of Anthropic or AsyncAnthropic")
|
@@ -0,0 +1,146 @@
|
|
1
|
+
import logging
|
2
|
+
from typing import AsyncIterator, Callable, List, Optional, TypeVar, Iterator
|
3
|
+
|
4
|
+
from anthropic import AsyncStream, Stream
|
5
|
+
|
6
|
+
logger = logging.getLogger(__name__)
|
7
|
+
|
8
|
+
_T = TypeVar("_T")
|
9
|
+
|
10
|
+
|
11
|
+
class AnthropicAsyncStreamInterceptor(AsyncStream[_T]):
|
12
|
+
"""
|
13
|
+
A wrapper around anthropic.AsyncStream that delegates all functionality
|
14
|
+
to the 'base_stream' but intercepts each chunk to handle usage or
|
15
|
+
logging logic. This preserves .response and other methods.
|
16
|
+
|
17
|
+
You can store aggregated usage in a local list and process it when
|
18
|
+
the stream ends (StopAsyncIteration).
|
19
|
+
"""
|
20
|
+
|
21
|
+
def __init__(
|
22
|
+
self,
|
23
|
+
base_stream: AsyncStream[_T],
|
24
|
+
usage_callback: Optional[Callable[[List[_T]], None]] = None,
|
25
|
+
):
|
26
|
+
# We do NOT call super().__init__() because anthropic.AsyncStream
|
27
|
+
# expects constructor parameters we don't want to re-initialize.
|
28
|
+
# Instead, we just store the base_stream and delegate everything to it.
|
29
|
+
self._base_stream = base_stream
|
30
|
+
self._usage_callback = usage_callback
|
31
|
+
self._chunks: List[_T] = []
|
32
|
+
|
33
|
+
@property
|
34
|
+
def response(self):
|
35
|
+
"""Expose the original stream's 'response' so user code can do stream.response, etc."""
|
36
|
+
return self._base_stream.response
|
37
|
+
|
38
|
+
def __aiter__(self) -> AsyncIterator[_T]:
|
39
|
+
"""
|
40
|
+
Called when we do 'async for chunk in wrapped_stream:'
|
41
|
+
We simply return 'self'. Then __anext__ does the rest.
|
42
|
+
"""
|
43
|
+
return self
|
44
|
+
|
45
|
+
async def __anext__(self) -> _T:
|
46
|
+
"""
|
47
|
+
Intercept iteration. We pull the next chunk from the base_stream.
|
48
|
+
If it's the end, do any final usage logging, then raise StopAsyncIteration.
|
49
|
+
Otherwise, we can accumulate usage info or do whatever we need with the chunk.
|
50
|
+
"""
|
51
|
+
try:
|
52
|
+
chunk = await self._base_stream.__anext__()
|
53
|
+
except StopAsyncIteration:
|
54
|
+
# Once the base stream is fully consumed, we can do final usage/logging.
|
55
|
+
if self._usage_callback and self._chunks:
|
56
|
+
self._usage_callback(self._chunks)
|
57
|
+
raise
|
58
|
+
|
59
|
+
# Intercept each chunk
|
60
|
+
self._chunks.append(chunk)
|
61
|
+
return chunk
|
62
|
+
|
63
|
+
async def __aenter__(self) -> "AnthropicAsyncStreamInterceptor[_T]":
|
64
|
+
"""Support async with ... : usage."""
|
65
|
+
await self._base_stream.__aenter__()
|
66
|
+
return self
|
67
|
+
|
68
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
69
|
+
"""
|
70
|
+
Ensure we propagate __aexit__ to the base stream,
|
71
|
+
so connections are properly closed.
|
72
|
+
"""
|
73
|
+
return await self._base_stream.__aexit__(exc_type, exc_val, exc_tb)
|
74
|
+
|
75
|
+
async def close(self) -> None:
|
76
|
+
"""Delegate close to the base_stream."""
|
77
|
+
await self._base_stream.close()
|
78
|
+
|
79
|
+
|
80
|
+
class AnthropicSyncStreamInterceptor(Stream[_T]):
|
81
|
+
"""
|
82
|
+
A wrapper around anthropic.Stream that delegates all functionality
|
83
|
+
to the 'base_stream' but intercepts each chunk to handle usage or
|
84
|
+
logging logic. This preserves .response and other methods.
|
85
|
+
|
86
|
+
You can store aggregated usage in a local list and process it when
|
87
|
+
the stream ends (StopIteration).
|
88
|
+
"""
|
89
|
+
|
90
|
+
def __init__(
|
91
|
+
self,
|
92
|
+
base_stream: Stream[_T],
|
93
|
+
usage_callback: Optional[Callable[[List[_T]], None]] = None,
|
94
|
+
):
|
95
|
+
# We do NOT call super().__init__() because openai.SyncStream
|
96
|
+
# expects constructor parameters we don't want to re-initialize.
|
97
|
+
# Instead, we just store the base_stream and delegate everything to it.
|
98
|
+
self._base_stream = base_stream
|
99
|
+
self._usage_callback = usage_callback
|
100
|
+
self._chunks: List[_T] = []
|
101
|
+
|
102
|
+
@property
|
103
|
+
def response(self):
|
104
|
+
"""Expose the original stream's 'response' so user code can do stream.response, etc."""
|
105
|
+
return self._base_stream.response
|
106
|
+
|
107
|
+
def __iter__(self) -> Iterator[_T]:
|
108
|
+
"""
|
109
|
+
Called when we do 'for chunk in wrapped_stream:'
|
110
|
+
We simply return 'self'. Then __next__ does the rest.
|
111
|
+
"""
|
112
|
+
return self
|
113
|
+
|
114
|
+
def __next__(self) -> _T:
|
115
|
+
"""
|
116
|
+
Intercept iteration. We pull the next chunk from the base_stream.
|
117
|
+
If it's the end, do any final usage logging, then raise StopIteration.
|
118
|
+
Otherwise, we can accumulate usage info or do whatever we need with the chunk.
|
119
|
+
"""
|
120
|
+
try:
|
121
|
+
chunk = self._base_stream.__next__()
|
122
|
+
except StopIteration:
|
123
|
+
# Once the base stream is fully consumed, we can do final usage/logging.
|
124
|
+
if self._usage_callback and self._chunks:
|
125
|
+
self._usage_callback(self._chunks)
|
126
|
+
raise
|
127
|
+
|
128
|
+
# Intercept each chunk
|
129
|
+
self._chunks.append(chunk)
|
130
|
+
return chunk
|
131
|
+
|
132
|
+
def __enter__(self) -> "AnthropicSyncStreamInterceptor[_T]":
|
133
|
+
"""Support with ... : usage."""
|
134
|
+
self._base_stream.__enter__()
|
135
|
+
return self
|
136
|
+
|
137
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
138
|
+
"""
|
139
|
+
Ensure we propagate __aexit__ to the base stream,
|
140
|
+
so connections are properly closed.
|
141
|
+
"""
|
142
|
+
return self._base_stream.__exit__(exc_type, exc_val, exc_tb)
|
143
|
+
|
144
|
+
async def close(self) -> None:
|
145
|
+
"""Delegate close to the base_stream."""
|
146
|
+
self._base_stream.close()
|
@@ -1,16 +1,17 @@
|
|
1
1
|
"""Base wrapper class for token usage tracking."""
|
2
2
|
|
3
3
|
from pathlib import Path
|
4
|
-
from typing import Any,
|
4
|
+
from typing import Any, Optional, TypeVar
|
5
5
|
import logging
|
6
6
|
import uuid
|
7
7
|
|
8
|
-
from .models import
|
8
|
+
from .models import TokenUsageStats
|
9
9
|
from .schemas import get_session, TokenUsage
|
10
10
|
|
11
11
|
logger = logging.getLogger(__name__)
|
12
12
|
|
13
|
-
ResponseType = TypeVar(
|
13
|
+
ResponseType = TypeVar("ResponseType")
|
14
|
+
|
14
15
|
|
15
16
|
class BaseWrapper:
|
16
17
|
def __init__(self, client: Any, db_path: Optional[str] = None):
|
@@ -22,13 +23,20 @@ class BaseWrapper:
|
|
22
23
|
logger.info("Created database directory at: %s", Path(db_path).parent)
|
23
24
|
|
24
25
|
self.Session = get_session(db_path)
|
25
|
-
|
26
|
-
logger.debug("Initializing %s with db_path: %s",
|
27
|
-
self.__class__.__name__, db_path)
|
28
26
|
|
29
|
-
|
27
|
+
logger.debug(
|
28
|
+
"Initializing %s with db_path: %s", self.__class__.__name__, db_path
|
29
|
+
)
|
30
|
+
|
31
|
+
def _log_usage_impl(
|
32
|
+
self, token_usage_stats: TokenUsageStats, session, execution_id: str
|
33
|
+
) -> None:
|
30
34
|
"""Implementation of token usage logging."""
|
31
|
-
logger.debug(
|
35
|
+
logger.debug(
|
36
|
+
"Logging usage for model %s: %s",
|
37
|
+
token_usage_stats.model,
|
38
|
+
token_usage_stats.usage.model_dump(),
|
39
|
+
)
|
32
40
|
try:
|
33
41
|
token_usage = TokenUsage(
|
34
42
|
execution_id=execution_id,
|
@@ -36,15 +44,20 @@ class BaseWrapper:
|
|
36
44
|
model=token_usage_stats.model,
|
37
45
|
prompt_tokens=token_usage_stats.usage.prompt_tokens,
|
38
46
|
completion_tokens=token_usage_stats.usage.completion_tokens,
|
39
|
-
total_tokens=token_usage_stats.usage.total_tokens
|
47
|
+
total_tokens=token_usage_stats.usage.total_tokens,
|
40
48
|
)
|
41
49
|
session.add(token_usage)
|
42
|
-
logger.info(
|
43
|
-
|
50
|
+
logger.info(
|
51
|
+
"Logged token usage: model=%s, total_tokens=%d",
|
52
|
+
token_usage_stats.model,
|
53
|
+
token_usage_stats.usage.total_tokens,
|
54
|
+
)
|
44
55
|
except Exception as e:
|
45
56
|
logger.error("Failed to log token usage: %s", str(e))
|
46
57
|
|
47
|
-
def _log_usage(
|
58
|
+
def _log_usage(
|
59
|
+
self, token_usage_stats: TokenUsageStats, execution_id: Optional[str] = None
|
60
|
+
):
|
48
61
|
"""Log token usage to database."""
|
49
62
|
if not execution_id:
|
50
63
|
execution_id = str(uuid.uuid4())
|
@@ -58,4 +71,4 @@ class BaseWrapper:
|
|
58
71
|
logger.error("Failed to log token usage: %s", str(e))
|
59
72
|
session.rollback()
|
60
73
|
finally:
|
61
|
-
session.close()
|
74
|
+
session.close()
|
@@ -1,25 +1,26 @@
|
|
1
1
|
"""Development utilities for tokenator."""
|
2
2
|
|
3
|
-
import os
|
4
3
|
import sys
|
5
4
|
from pathlib import Path
|
6
5
|
from alembic import command
|
7
6
|
from tokenator.migrations import get_alembic_config
|
8
7
|
|
8
|
+
|
9
9
|
def create_migration(message: str):
|
10
10
|
"""Create a new migration based on model changes."""
|
11
11
|
config = get_alembic_config()
|
12
|
-
|
12
|
+
|
13
13
|
# Get the migrations directory
|
14
14
|
migrations_dir = Path(__file__).parent / "migrations" / "versions"
|
15
15
|
migrations_dir.mkdir(parents=True, exist_ok=True)
|
16
|
-
|
16
|
+
|
17
17
|
# Generate migration with custom message
|
18
|
-
command.revision(config, autogenerate=True, message=message)
|
18
|
+
command.revision(config, autogenerate=True, message=message)
|
19
|
+
|
19
20
|
|
20
21
|
if __name__ == "__main__":
|
21
22
|
if len(sys.argv) > 1:
|
22
23
|
msg = " ".join(sys.argv[1:])
|
23
24
|
else:
|
24
25
|
msg = "auto generated migration"
|
25
|
-
create_migration(msg)
|
26
|
+
create_migration(msg)
|
@@ -18,6 +18,7 @@ if config.config_file_name is not None:
|
|
18
18
|
# add your model's MetaData object here
|
19
19
|
target_metadata = Base.metadata
|
20
20
|
|
21
|
+
|
21
22
|
def run_migrations_offline() -> None:
|
22
23
|
"""Run migrations in 'offline' mode."""
|
23
24
|
url = config.get_main_option("sqlalchemy.url")
|
@@ -31,6 +32,7 @@ def run_migrations_offline() -> None:
|
|
31
32
|
with context.begin_transaction():
|
32
33
|
context.run_migrations()
|
33
34
|
|
35
|
+
|
34
36
|
def run_migrations_online() -> None:
|
35
37
|
"""Run migrations in 'online' mode."""
|
36
38
|
connectable = engine_from_config(
|
@@ -40,14 +42,13 @@ def run_migrations_online() -> None:
|
|
40
42
|
)
|
41
43
|
|
42
44
|
with connectable.connect() as connection:
|
43
|
-
context.configure(
|
44
|
-
connection=connection, target_metadata=target_metadata
|
45
|
-
)
|
45
|
+
context.configure(connection=connection, target_metadata=target_metadata)
|
46
46
|
|
47
47
|
with context.begin_transaction():
|
48
48
|
context.run_migrations()
|
49
49
|
|
50
|
+
|
50
51
|
if context.is_offline_mode():
|
51
52
|
run_migrations_offline()
|
52
53
|
else:
|
53
|
-
run_migrations_online()
|
54
|
+
run_migrations_online()
|