tokenator 0.1.7__tar.gz → 0.1.9__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {tokenator-0.1.7 → tokenator-0.1.9}/PKG-INFO +28 -3
- {tokenator-0.1.7 → tokenator-0.1.9}/README.md +27 -2
- {tokenator-0.1.7 → tokenator-0.1.9}/pyproject.toml +1 -1
- {tokenator-0.1.7 → tokenator-0.1.9}/src/tokenator/__init__.py +1 -1
- tokenator-0.1.9/src/tokenator/openai/AsyncStreamInterceptor.py +78 -0
- {tokenator-0.1.7/src/tokenator → tokenator-0.1.9/src/tokenator/openai}/client_openai.py +45 -27
- {tokenator-0.1.7 → tokenator-0.1.9}/LICENSE +0 -0
- {tokenator-0.1.7 → tokenator-0.1.9}/src/tokenator/base_wrapper.py +0 -0
- {tokenator-0.1.7 → tokenator-0.1.9}/src/tokenator/client_anthropic.py +0 -0
- {tokenator-0.1.7 → tokenator-0.1.9}/src/tokenator/create_migrations.py +0 -0
- {tokenator-0.1.7 → tokenator-0.1.9}/src/tokenator/migrations/env.py +0 -0
- {tokenator-0.1.7 → tokenator-0.1.9}/src/tokenator/migrations/script.py.mako +0 -0
- {tokenator-0.1.7 → tokenator-0.1.9}/src/tokenator/migrations/versions/f6f1f2437513_initial_migration.py +0 -0
- {tokenator-0.1.7 → tokenator-0.1.9}/src/tokenator/migrations.py +0 -0
- {tokenator-0.1.7 → tokenator-0.1.9}/src/tokenator/models.py +0 -0
- {tokenator-0.1.7 → tokenator-0.1.9}/src/tokenator/schemas.py +0 -0
- {tokenator-0.1.7 → tokenator-0.1.9}/src/tokenator/usage.py +0 -0
- {tokenator-0.1.7 → tokenator-0.1.9}/src/tokenator/utils.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: tokenator
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.9
|
4
4
|
Summary: Token usage tracking wrapper for LLMs
|
5
5
|
License: MIT
|
6
6
|
Author: Ujjwal Maheshwari
|
@@ -27,7 +27,7 @@ Have you ever wondered about :
|
|
27
27
|
- How much does it cost to do run a complex AI workflow with multiple LLM providers?
|
28
28
|
- How much money did I spent today on development?
|
29
29
|
|
30
|
-
Afraid not, tokenator is here! With tokenator's easy to use API, you can start tracking LLM usage in a matter of minutes
|
30
|
+
Afraid not, tokenator is here! With tokenator's easy to use API, you can start tracking LLM usage in a matter of minutes.
|
31
31
|
|
32
32
|
Get started with just 3 lines of code!
|
33
33
|
|
@@ -80,7 +80,32 @@ cost.last_day("google")
|
|
80
80
|
### Example `cost` object
|
81
81
|
|
82
82
|
```json
|
83
|
-
|
83
|
+
# print(cost.last_hour().model_dump_json(indent=4))
|
84
|
+
|
85
|
+
usage : {
|
86
|
+
"total_cost": 0.0004,
|
87
|
+
"total_tokens": 79,
|
88
|
+
"prompt_tokens": 52,
|
89
|
+
"completion_tokens": 27,
|
90
|
+
"providers": [
|
91
|
+
{
|
92
|
+
"total_cost": 0.0004,
|
93
|
+
"total_tokens": 79,
|
94
|
+
"prompt_tokens": 52,
|
95
|
+
"completion_tokens": 27,
|
96
|
+
"provider": "openai",
|
97
|
+
"models": [
|
98
|
+
{
|
99
|
+
"total_cost": 0.0004,
|
100
|
+
"total_tokens": 79,
|
101
|
+
"prompt_tokens": 52,
|
102
|
+
"completion_tokens": 27,
|
103
|
+
"model": "gpt-4o-2024-08-06"
|
104
|
+
}
|
105
|
+
]
|
106
|
+
}
|
107
|
+
]
|
108
|
+
}
|
84
109
|
```
|
85
110
|
|
86
111
|
## Features
|
@@ -5,7 +5,7 @@ Have you ever wondered about :
|
|
5
5
|
- How much does it cost to do run a complex AI workflow with multiple LLM providers?
|
6
6
|
- How much money did I spent today on development?
|
7
7
|
|
8
|
-
Afraid not, tokenator is here! With tokenator's easy to use API, you can start tracking LLM usage in a matter of minutes
|
8
|
+
Afraid not, tokenator is here! With tokenator's easy to use API, you can start tracking LLM usage in a matter of minutes.
|
9
9
|
|
10
10
|
Get started with just 3 lines of code!
|
11
11
|
|
@@ -58,7 +58,32 @@ cost.last_day("google")
|
|
58
58
|
### Example `cost` object
|
59
59
|
|
60
60
|
```json
|
61
|
-
|
61
|
+
# print(cost.last_hour().model_dump_json(indent=4))
|
62
|
+
|
63
|
+
usage : {
|
64
|
+
"total_cost": 0.0004,
|
65
|
+
"total_tokens": 79,
|
66
|
+
"prompt_tokens": 52,
|
67
|
+
"completion_tokens": 27,
|
68
|
+
"providers": [
|
69
|
+
{
|
70
|
+
"total_cost": 0.0004,
|
71
|
+
"total_tokens": 79,
|
72
|
+
"prompt_tokens": 52,
|
73
|
+
"completion_tokens": 27,
|
74
|
+
"provider": "openai",
|
75
|
+
"models": [
|
76
|
+
{
|
77
|
+
"total_cost": 0.0004,
|
78
|
+
"total_tokens": 79,
|
79
|
+
"prompt_tokens": 52,
|
80
|
+
"completion_tokens": 27,
|
81
|
+
"model": "gpt-4o-2024-08-06"
|
82
|
+
}
|
83
|
+
]
|
84
|
+
}
|
85
|
+
]
|
86
|
+
}
|
62
87
|
```
|
63
88
|
|
64
89
|
## Features
|
@@ -1,7 +1,7 @@
|
|
1
1
|
"""Tokenator - Track and analyze your OpenAI API token usage and costs."""
|
2
2
|
|
3
3
|
import logging
|
4
|
-
from .client_openai import tokenator_openai
|
4
|
+
from .openai.client_openai import tokenator_openai
|
5
5
|
from .client_anthropic import tokenator_anthropic
|
6
6
|
from . import usage
|
7
7
|
from .utils import get_default_db_path
|
@@ -0,0 +1,78 @@
|
|
1
|
+
import logging
|
2
|
+
from typing import AsyncIterator, Callable, Generic, List, Optional, TypeVar
|
3
|
+
|
4
|
+
from openai import AsyncStream, AsyncOpenAI
|
5
|
+
from openai.types.chat import ChatCompletionChunk
|
6
|
+
|
7
|
+
logger = logging.getLogger(__name__)
|
8
|
+
|
9
|
+
_T = TypeVar("_T") # or you might specifically do _T = ChatCompletionChunk
|
10
|
+
|
11
|
+
|
12
|
+
class AsyncStreamInterceptor(AsyncStream[_T]):
|
13
|
+
"""
|
14
|
+
A wrapper around openai.AsyncStream that delegates all functionality
|
15
|
+
to the 'base_stream' but intercepts each chunk to handle usage or
|
16
|
+
logging logic. This preserves .response and other methods.
|
17
|
+
|
18
|
+
You can store aggregated usage in a local list and process it when
|
19
|
+
the stream ends (StopAsyncIteration).
|
20
|
+
"""
|
21
|
+
|
22
|
+
def __init__(
|
23
|
+
self,
|
24
|
+
base_stream: AsyncStream[_T],
|
25
|
+
usage_callback: Optional[Callable[[List[_T]], None]] = None,
|
26
|
+
):
|
27
|
+
# We do NOT call super().__init__() because openai.AsyncStream
|
28
|
+
# expects constructor parameters we don't want to re-initialize.
|
29
|
+
# Instead, we just store the base_stream and delegate everything to it.
|
30
|
+
self._base_stream = base_stream
|
31
|
+
self._usage_callback = usage_callback
|
32
|
+
self._chunks: List[_T] = []
|
33
|
+
|
34
|
+
@property
|
35
|
+
def response(self):
|
36
|
+
"""Expose the original stream's 'response' so user code can do stream.response, etc."""
|
37
|
+
return self._base_stream.response
|
38
|
+
|
39
|
+
def __aiter__(self) -> AsyncIterator[_T]:
|
40
|
+
"""
|
41
|
+
Called when we do 'async for chunk in wrapped_stream:'
|
42
|
+
We simply return 'self'. Then __anext__ does the rest.
|
43
|
+
"""
|
44
|
+
return self
|
45
|
+
|
46
|
+
async def __anext__(self) -> _T:
|
47
|
+
"""
|
48
|
+
Intercept iteration. We pull the next chunk from the base_stream.
|
49
|
+
If it's the end, do any final usage logging, then raise StopAsyncIteration.
|
50
|
+
Otherwise, we can accumulate usage info or do whatever we need with the chunk.
|
51
|
+
"""
|
52
|
+
try:
|
53
|
+
chunk = await self._base_stream.__anext__()
|
54
|
+
except StopAsyncIteration:
|
55
|
+
# Once the base stream is fully consumed, we can do final usage/logging.
|
56
|
+
if self._usage_callback and self._chunks:
|
57
|
+
self._usage_callback(self._chunks)
|
58
|
+
raise
|
59
|
+
|
60
|
+
# Intercept each chunk
|
61
|
+
self._chunks.append(chunk)
|
62
|
+
return chunk
|
63
|
+
|
64
|
+
async def __aenter__(self) -> "AsyncStreamInterceptor[_T]":
|
65
|
+
"""Support async with ... : usage."""
|
66
|
+
await self._base_stream.__aenter__()
|
67
|
+
return self
|
68
|
+
|
69
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
70
|
+
"""
|
71
|
+
Ensure we propagate __aexit__ to the base stream,
|
72
|
+
so connections are properly closed.
|
73
|
+
"""
|
74
|
+
return await self._base_stream.__aexit__(exc_type, exc_val, exc_tb)
|
75
|
+
|
76
|
+
async def close(self) -> None:
|
77
|
+
"""Delegate close to the base_stream."""
|
78
|
+
await self._base_stream.close()
|
@@ -6,8 +6,9 @@ import logging
|
|
6
6
|
from openai import AsyncOpenAI, AsyncStream, OpenAI, Stream
|
7
7
|
from openai.types.chat import ChatCompletion, ChatCompletionChunk
|
8
8
|
|
9
|
-
from
|
10
|
-
from
|
9
|
+
from ..models import Usage, TokenUsageStats
|
10
|
+
from ..base_wrapper import BaseWrapper, ResponseType
|
11
|
+
from .AsyncStreamInterceptor import AsyncStreamInterceptor
|
11
12
|
|
12
13
|
logger = logging.getLogger(__name__)
|
13
14
|
|
@@ -87,37 +88,54 @@ class OpenAIWrapper(BaseOpenAIWrapper):
|
|
87
88
|
|
88
89
|
|
89
90
|
class AsyncOpenAIWrapper(BaseOpenAIWrapper):
|
90
|
-
async def create(
|
91
|
-
|
91
|
+
async def create(
|
92
|
+
self,
|
93
|
+
*args: Any,
|
94
|
+
execution_id: Optional[str] = None,
|
95
|
+
**kwargs: Any
|
96
|
+
) -> Union[ChatCompletion, AsyncIterator[ChatCompletionChunk]]:
|
97
|
+
"""
|
98
|
+
Create a chat completion and log token usage.
|
99
|
+
"""
|
92
100
|
logger.debug("Creating chat completion with args: %s, kwargs: %s", args, kwargs)
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
101
|
+
|
102
|
+
# If user wants a stream, return an interceptor
|
103
|
+
if kwargs.get("stream", False):
|
104
|
+
base_stream = await self.client.chat.completions.create(*args, **kwargs)
|
105
|
+
|
106
|
+
# Define a callback that will get called once the stream ends
|
107
|
+
def usage_callback(chunks):
|
108
|
+
# Mimic your old logic to gather usage from chunk.usage
|
109
|
+
# e.g. ChatCompletionChunk.usage
|
110
|
+
# Then call self._log_usage(...)
|
111
|
+
if not chunks:
|
112
|
+
return
|
113
|
+
# Build usage_data from the first chunk's model
|
114
|
+
usage_data = TokenUsageStats(
|
115
|
+
model=chunks[0].model,
|
116
|
+
usage=Usage(),
|
117
|
+
)
|
118
|
+
# Sum up usage from all chunks
|
119
|
+
for ch in chunks:
|
120
|
+
if ch.usage:
|
121
|
+
usage_data.usage.prompt_tokens += ch.usage.prompt_tokens
|
122
|
+
usage_data.usage.completion_tokens += ch.usage.completion_tokens
|
123
|
+
usage_data.usage.total_tokens += ch.usage.total_tokens
|
124
|
+
|
125
|
+
self._log_usage(usage_data, execution_id=execution_id)
|
126
|
+
|
127
|
+
# Return the interceptor that wraps the real AsyncStream
|
128
|
+
return AsyncStreamInterceptor(
|
129
|
+
base_stream=base_stream,
|
130
|
+
usage_callback=usage_callback,
|
131
|
+
)
|
132
|
+
|
133
|
+
# Non-streaming path remains unchanged
|
98
134
|
response = await self.client.chat.completions.create(*args, **kwargs)
|
99
135
|
usage_data = self._process_response_usage(response)
|
100
136
|
if usage_data:
|
101
137
|
self._log_usage(usage_data, execution_id=execution_id)
|
102
138
|
return response
|
103
|
-
|
104
|
-
async def _wrap_streaming_response(self, response_iter: AsyncStream[ChatCompletionChunk], execution_id: Optional[str]) -> AsyncIterator[ChatCompletionChunk]:
|
105
|
-
"""Wrap streaming response to capture final usage stats"""
|
106
|
-
chunks_with_usage = []
|
107
|
-
async for chunk in response_iter:
|
108
|
-
if isinstance(chunk, ChatCompletionChunk) and chunk.usage is not None:
|
109
|
-
chunks_with_usage.append(chunk)
|
110
|
-
yield chunk
|
111
|
-
|
112
|
-
if len(chunks_with_usage) > 0:
|
113
|
-
usage_data: TokenUsageStats = TokenUsageStats(model=chunks_with_usage[0].model, usage=Usage())
|
114
|
-
for chunk in chunks_with_usage:
|
115
|
-
usage_data.usage.prompt_tokens += chunk.usage.prompt_tokens
|
116
|
-
usage_data.usage.completion_tokens += chunk.usage.completion_tokens
|
117
|
-
usage_data.usage.total_tokens += chunk.usage.total_tokens
|
118
|
-
|
119
|
-
self._log_usage(usage_data, execution_id=execution_id)
|
120
|
-
|
121
139
|
@overload
|
122
140
|
def tokenator_openai(
|
123
141
|
client: OpenAI,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|