tokenator 0.1.13__tar.gz → 0.1.14__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {tokenator-0.1.13 → tokenator-0.1.14}/PKG-INFO +3 -3
- {tokenator-0.1.13 → tokenator-0.1.14}/pyproject.toml +3 -3
- {tokenator-0.1.13 → tokenator-0.1.14}/src/tokenator/__init__.py +2 -7
- {tokenator-0.1.13 → tokenator-0.1.14}/src/tokenator/anthropic/client_anthropic.py +26 -8
- {tokenator-0.1.13 → tokenator-0.1.14}/src/tokenator/base_wrapper.py +28 -8
- {tokenator-0.1.13 → tokenator-0.1.14}/src/tokenator/models.py +4 -4
- {tokenator-0.1.13 → tokenator-0.1.14}/src/tokenator/openai/client_openai.py +11 -1
- {tokenator-0.1.13 → tokenator-0.1.14}/src/tokenator/schemas.py +6 -4
- tokenator-0.1.14/src/tokenator/state.py +12 -0
- tokenator-0.1.14/src/tokenator/usage.py +307 -0
- tokenator-0.1.13/src/tokenator/usage.py +0 -269
- {tokenator-0.1.13 → tokenator-0.1.14}/LICENSE +0 -0
- {tokenator-0.1.13 → tokenator-0.1.14}/README.md +0 -0
- {tokenator-0.1.13 → tokenator-0.1.14}/src/tokenator/anthropic/stream_interceptors.py +0 -0
- {tokenator-0.1.13 → tokenator-0.1.14}/src/tokenator/create_migrations.py +0 -0
- {tokenator-0.1.13 → tokenator-0.1.14}/src/tokenator/migrations/env.py +0 -0
- {tokenator-0.1.13 → tokenator-0.1.14}/src/tokenator/migrations/script.py.mako +0 -0
- {tokenator-0.1.13 → tokenator-0.1.14}/src/tokenator/migrations/versions/f6f1f2437513_initial_migration.py +0 -0
- {tokenator-0.1.13 → tokenator-0.1.14}/src/tokenator/migrations.py +0 -0
- {tokenator-0.1.13 → tokenator-0.1.14}/src/tokenator/openai/stream_interceptors.py +0 -0
- {tokenator-0.1.13 → tokenator-0.1.14}/src/tokenator/utils.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: tokenator
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.14
|
4
4
|
Summary: Token usage tracking wrapper for LLMs
|
5
5
|
License: MIT
|
6
6
|
Author: Ujjwal Maheshwari
|
@@ -14,8 +14,8 @@ Classifier: Programming Language :: Python :: 3.11
|
|
14
14
|
Classifier: Programming Language :: Python :: 3.12
|
15
15
|
Classifier: Programming Language :: Python :: 3.13
|
16
16
|
Requires-Dist: alembic (>=1.13.0,<2.0.0)
|
17
|
-
Requires-Dist: anthropic (>=0.
|
18
|
-
Requires-Dist: openai (>=1.
|
17
|
+
Requires-Dist: anthropic (>=0.43.0,<0.44.0)
|
18
|
+
Requires-Dist: openai (>=1.59.0,<2.0.0)
|
19
19
|
Requires-Dist: requests (>=2.32.3,<3.0.0)
|
20
20
|
Requires-Dist: sqlalchemy (>=2.0.0,<3.0.0)
|
21
21
|
Description-Content-Type: text/markdown
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "tokenator"
|
3
|
-
version = "0.1.
|
3
|
+
version = "0.1.14"
|
4
4
|
description = "Token usage tracking wrapper for LLMs"
|
5
5
|
authors = ["Ujjwal Maheshwari <your.email@example.com>"]
|
6
6
|
readme = "README.md"
|
@@ -9,11 +9,11 @@ packages = [{include = "tokenator", from = "src"}]
|
|
9
9
|
|
10
10
|
[tool.poetry.dependencies]
|
11
11
|
python = "^3.9"
|
12
|
-
openai = "^1.
|
12
|
+
openai = "^1.59.0"
|
13
13
|
sqlalchemy = "^2.0.0"
|
14
14
|
requests = "^2.32.3"
|
15
15
|
alembic = "^1.13.0"
|
16
|
-
anthropic = "^0.
|
16
|
+
anthropic = "^0.43.0"
|
17
17
|
|
18
18
|
[tool.poetry.group.dev.dependencies]
|
19
19
|
pytest = "^8.0.0"
|
@@ -5,14 +5,9 @@ from .openai.client_openai import tokenator_openai
|
|
5
5
|
from .anthropic.client_anthropic import tokenator_anthropic
|
6
6
|
from . import usage
|
7
7
|
from .utils import get_default_db_path
|
8
|
-
from .
|
8
|
+
from .usage import TokenUsageService
|
9
9
|
|
10
|
-
|
10
|
+
usage = TokenUsageService() # noqa: F811
|
11
11
|
__all__ = ["tokenator_openai", "tokenator_anthropic", "usage", "get_default_db_path"]
|
12
12
|
|
13
13
|
logger = logging.getLogger(__name__)
|
14
|
-
|
15
|
-
try:
|
16
|
-
check_and_run_migrations()
|
17
|
-
except Exception as e:
|
18
|
-
logger.warning(f"Failed to run migrations, but continuing anyway: {e}")
|
@@ -8,7 +8,11 @@ from anthropic.types import Message, RawMessageStartEvent, RawMessageDeltaEvent
|
|
8
8
|
|
9
9
|
from ..models import Usage, TokenUsageStats
|
10
10
|
from ..base_wrapper import BaseWrapper, ResponseType
|
11
|
-
from .stream_interceptors import
|
11
|
+
from .stream_interceptors import (
|
12
|
+
AnthropicAsyncStreamInterceptor,
|
13
|
+
AnthropicSyncStreamInterceptor,
|
14
|
+
)
|
15
|
+
from ..state import is_tokenator_enabled
|
12
16
|
|
13
17
|
logger = logging.getLogger(__name__)
|
14
18
|
|
@@ -56,15 +60,23 @@ class BaseAnthropicWrapper(BaseWrapper):
|
|
56
60
|
|
57
61
|
def _create_usage_callback(execution_id, log_usage_fn):
|
58
62
|
"""Creates a callback function for processing usage statistics from stream chunks."""
|
63
|
+
|
59
64
|
def usage_callback(chunks):
|
60
65
|
if not chunks:
|
61
66
|
return
|
62
|
-
|
67
|
+
|
68
|
+
# Skip if tokenator is disabled
|
69
|
+
if not is_tokenator_enabled:
|
70
|
+
logger.debug("Tokenator is disabled - skipping stream usage logging")
|
71
|
+
return
|
72
|
+
|
63
73
|
usage_data = TokenUsageStats(
|
64
|
-
model=chunks[0].message.model
|
74
|
+
model=chunks[0].message.model
|
75
|
+
if isinstance(chunks[0], RawMessageStartEvent)
|
76
|
+
else "",
|
65
77
|
usage=Usage(),
|
66
78
|
)
|
67
|
-
|
79
|
+
|
68
80
|
for chunk in chunks:
|
69
81
|
if isinstance(chunk, RawMessageStartEvent):
|
70
82
|
usage_data.model = chunk.message.model
|
@@ -72,8 +84,10 @@ def _create_usage_callback(execution_id, log_usage_fn):
|
|
72
84
|
usage_data.usage.completion_tokens += chunk.message.usage.output_tokens
|
73
85
|
elif isinstance(chunk, RawMessageDeltaEvent):
|
74
86
|
usage_data.usage.completion_tokens += chunk.usage.output_tokens
|
75
|
-
|
76
|
-
usage_data.usage.total_tokens =
|
87
|
+
|
88
|
+
usage_data.usage.total_tokens = (
|
89
|
+
usage_data.usage.prompt_tokens + usage_data.usage.completion_tokens
|
90
|
+
)
|
77
91
|
log_usage_fn(usage_data, execution_id=execution_id)
|
78
92
|
|
79
93
|
return usage_callback
|
@@ -84,7 +98,9 @@ class AnthropicWrapper(BaseAnthropicWrapper):
|
|
84
98
|
self, *args: Any, execution_id: Optional[str] = None, **kwargs: Any
|
85
99
|
) -> Union[Message, Iterator[Message]]:
|
86
100
|
"""Create a message completion and log token usage."""
|
87
|
-
logger.debug(
|
101
|
+
logger.debug(
|
102
|
+
"Creating message completion with args: %s, kwargs: %s", args, kwargs
|
103
|
+
)
|
88
104
|
|
89
105
|
if kwargs.get("stream", False):
|
90
106
|
base_stream = self.client.messages.create(*args, **kwargs)
|
@@ -105,7 +121,9 @@ class AsyncAnthropicWrapper(BaseAnthropicWrapper):
|
|
105
121
|
self, *args: Any, execution_id: Optional[str] = None, **kwargs: Any
|
106
122
|
) -> Union[Message, AsyncIterator[Message]]:
|
107
123
|
"""Create a message completion and log token usage."""
|
108
|
-
logger.debug(
|
124
|
+
logger.debug(
|
125
|
+
"Creating message completion with args: %s, kwargs: %s", args, kwargs
|
126
|
+
)
|
109
127
|
|
110
128
|
if kwargs.get("stream", False):
|
111
129
|
base_stream = await self.client.messages.create(*args, **kwargs)
|
@@ -7,6 +7,9 @@ import uuid
|
|
7
7
|
|
8
8
|
from .models import TokenUsageStats
|
9
9
|
from .schemas import get_session, TokenUsage
|
10
|
+
from . import state
|
11
|
+
|
12
|
+
from .migrations import check_and_run_migrations
|
10
13
|
|
11
14
|
logger = logging.getLogger(__name__)
|
12
15
|
|
@@ -16,17 +19,30 @@ ResponseType = TypeVar("ResponseType")
|
|
16
19
|
class BaseWrapper:
|
17
20
|
def __init__(self, client: Any, db_path: Optional[str] = None):
|
18
21
|
"""Initialize the base wrapper."""
|
19
|
-
|
22
|
+
state.is_tokenator_enabled = True
|
23
|
+
try:
|
24
|
+
self.client = client
|
20
25
|
|
21
|
-
|
22
|
-
|
23
|
-
|
26
|
+
if db_path:
|
27
|
+
Path(db_path).parent.mkdir(parents=True, exist_ok=True)
|
28
|
+
logger.info("Created database directory at: %s", Path(db_path).parent)
|
29
|
+
state.db_path = db_path # Store db_path in state
|
24
30
|
|
25
|
-
|
31
|
+
else:
|
32
|
+
state.db_path = None # Use default path
|
26
33
|
|
27
|
-
|
28
|
-
|
29
|
-
|
34
|
+
self.Session = get_session()
|
35
|
+
|
36
|
+
logger.debug(
|
37
|
+
"Initializing %s with db_path: %s", self.__class__.__name__, db_path
|
38
|
+
)
|
39
|
+
|
40
|
+
check_and_run_migrations(db_path)
|
41
|
+
except Exception as e:
|
42
|
+
state.is_tokenator_enabled = False
|
43
|
+
logger.warning(
|
44
|
+
f"Tokenator initialization failed. Usage tracking will be disabled. Error: {e}"
|
45
|
+
)
|
30
46
|
|
31
47
|
def _log_usage_impl(
|
32
48
|
self, token_usage_stats: TokenUsageStats, session, execution_id: str
|
@@ -59,6 +75,10 @@ class BaseWrapper:
|
|
59
75
|
self, token_usage_stats: TokenUsageStats, execution_id: Optional[str] = None
|
60
76
|
):
|
61
77
|
"""Log token usage to database."""
|
78
|
+
if not state.is_tokenator_enabled:
|
79
|
+
logger.debug("Tokenator is disabled - skipping usage logging")
|
80
|
+
return
|
81
|
+
|
62
82
|
if not execution_id:
|
63
83
|
execution_id = str(uuid.uuid4())
|
64
84
|
|
@@ -8,10 +8,10 @@ class TokenRate(BaseModel):
|
|
8
8
|
|
9
9
|
|
10
10
|
class TokenMetrics(BaseModel):
|
11
|
-
total_cost: float = Field(
|
12
|
-
total_tokens: int = Field(
|
13
|
-
prompt_tokens: int = Field(
|
14
|
-
completion_tokens: int = Field(
|
11
|
+
total_cost: float = Field(default=0, description="Total cost in USD")
|
12
|
+
total_tokens: int = Field(default=0, description="Total tokens used")
|
13
|
+
prompt_tokens: int = Field(default=0, description="Number of prompt tokens")
|
14
|
+
completion_tokens: int = Field(default=0, description="Number of completion tokens")
|
15
15
|
|
16
16
|
|
17
17
|
class ModelUsage(TokenMetrics):
|
@@ -8,7 +8,11 @@ from openai.types.chat import ChatCompletion, ChatCompletionChunk
|
|
8
8
|
|
9
9
|
from ..models import Usage, TokenUsageStats
|
10
10
|
from ..base_wrapper import BaseWrapper, ResponseType
|
11
|
-
from .stream_interceptors import
|
11
|
+
from .stream_interceptors import (
|
12
|
+
OpenAIAsyncStreamInterceptor,
|
13
|
+
OpenAISyncStreamInterceptor,
|
14
|
+
)
|
15
|
+
from ..state import is_tokenator_enabled
|
12
16
|
|
13
17
|
logger = logging.getLogger(__name__)
|
14
18
|
|
@@ -65,6 +69,12 @@ def _create_usage_callback(execution_id, log_usage_fn):
|
|
65
69
|
def usage_callback(chunks):
|
66
70
|
if not chunks:
|
67
71
|
return
|
72
|
+
|
73
|
+
# Skip if tokenator is disabled
|
74
|
+
if not is_tokenator_enabled:
|
75
|
+
logger.debug("Tokenator is disabled - skipping stream usage logging")
|
76
|
+
return
|
77
|
+
|
68
78
|
# Build usage_data from the first chunk's model
|
69
79
|
usage_data = TokenUsageStats(
|
70
80
|
model=chunks[0].model,
|
@@ -1,25 +1,27 @@
|
|
1
1
|
"""SQLAlchemy models for tokenator."""
|
2
2
|
|
3
3
|
from datetime import datetime
|
4
|
+
from typing import Optional
|
4
5
|
|
5
6
|
from sqlalchemy import create_engine, Column, Integer, String, DateTime, Index
|
6
7
|
from sqlalchemy.orm import sessionmaker, scoped_session, declarative_base
|
7
8
|
|
8
9
|
from .utils import get_default_db_path
|
10
|
+
from . import state # Import state to access db_path
|
9
11
|
|
10
12
|
Base = declarative_base()
|
11
13
|
|
12
14
|
|
13
|
-
def get_engine(db_path: str = None):
|
15
|
+
def get_engine(db_path: Optional[str] = None):
|
14
16
|
"""Create SQLAlchemy engine with the given database path."""
|
15
17
|
if db_path is None:
|
16
|
-
db_path = get_default_db_path()
|
18
|
+
db_path = state.db_path or get_default_db_path() # Use state.db_path if set
|
17
19
|
return create_engine(f"sqlite:///{db_path}", echo=False)
|
18
20
|
|
19
21
|
|
20
|
-
def get_session(
|
22
|
+
def get_session():
|
21
23
|
"""Create a thread-safe session factory."""
|
22
|
-
engine = get_engine(
|
24
|
+
engine = get_engine()
|
23
25
|
# Base.metadata.create_all(engine)
|
24
26
|
session_factory = sessionmaker(bind=engine)
|
25
27
|
return scoped_session(session_factory)
|
@@ -0,0 +1,12 @@
|
|
1
|
+
"""Global state for tokenator."""
|
2
|
+
|
3
|
+
import logging
|
4
|
+
from typing import Optional
|
5
|
+
|
6
|
+
logger = logging.getLogger(__name__)
|
7
|
+
|
8
|
+
# Global flag to track if tokenator is properly initialized
|
9
|
+
is_tokenator_enabled = True
|
10
|
+
|
11
|
+
# Store the database path
|
12
|
+
db_path: Optional[str] = None
|
@@ -0,0 +1,307 @@
|
|
1
|
+
"""Cost analysis functions for token usage."""
|
2
|
+
|
3
|
+
from datetime import datetime, timedelta
|
4
|
+
from typing import Dict, Optional, Union
|
5
|
+
|
6
|
+
from .schemas import get_session, TokenUsage
|
7
|
+
from .models import TokenRate, TokenUsageReport, ModelUsage, ProviderUsage
|
8
|
+
from . import state
|
9
|
+
|
10
|
+
import requests
|
11
|
+
import logging
|
12
|
+
|
13
|
+
logger = logging.getLogger(__name__)
|
14
|
+
|
15
|
+
|
16
|
+
class TokenUsageService:
|
17
|
+
def __init__(self):
|
18
|
+
if not state.is_tokenator_enabled:
|
19
|
+
logger.info("Tokenator is disabled. Database access is unavailable.")
|
20
|
+
|
21
|
+
self.MODEL_COSTS = self._get_model_costs()
|
22
|
+
|
23
|
+
def _get_model_costs(self) -> Dict[str, TokenRate]:
|
24
|
+
if not state.is_tokenator_enabled:
|
25
|
+
return {}
|
26
|
+
url = "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json"
|
27
|
+
response = requests.get(url)
|
28
|
+
data = response.json()
|
29
|
+
|
30
|
+
return {
|
31
|
+
model: TokenRate(
|
32
|
+
prompt=info["input_cost_per_token"],
|
33
|
+
completion=info["output_cost_per_token"],
|
34
|
+
)
|
35
|
+
for model, info in data.items()
|
36
|
+
if "input_cost_per_token" in info and "output_cost_per_token" in info
|
37
|
+
}
|
38
|
+
|
39
|
+
def _calculate_cost(
|
40
|
+
self, usages: list[TokenUsage], provider: Optional[str] = None
|
41
|
+
) -> TokenUsageReport:
|
42
|
+
if not state.is_tokenator_enabled:
|
43
|
+
logger.warning("Tokenator is disabled. Skipping cost calculation.")
|
44
|
+
return TokenUsageReport()
|
45
|
+
|
46
|
+
if not self.MODEL_COSTS:
|
47
|
+
logger.warning("No model costs available.")
|
48
|
+
return TokenUsageReport()
|
49
|
+
|
50
|
+
GPT4O_PRICING = self.MODEL_COSTS.get(
|
51
|
+
"gpt-4o", TokenRate(prompt=0.0000025, completion=0.000010)
|
52
|
+
)
|
53
|
+
|
54
|
+
# Existing calculation logic...
|
55
|
+
provider_model_usages: Dict[str, Dict[str, list[TokenUsage]]] = {}
|
56
|
+
logger.debug(f"usages: {len(usages)}")
|
57
|
+
|
58
|
+
for usage in usages:
|
59
|
+
# 1st priority - direct match
|
60
|
+
model_key = usage.model
|
61
|
+
if model_key in self.MODEL_COSTS:
|
62
|
+
pass
|
63
|
+
# 2nd priority - provider/model format
|
64
|
+
elif f"{usage.provider}/{usage.model}" in self.MODEL_COSTS:
|
65
|
+
model_key = f"{usage.provider}/{usage.model}"
|
66
|
+
# 3rd priority - contains search
|
67
|
+
else:
|
68
|
+
matched_keys = [k for k in self.MODEL_COSTS.keys() if usage.model in k]
|
69
|
+
if matched_keys:
|
70
|
+
model_key = matched_keys[0]
|
71
|
+
logger.warning(
|
72
|
+
f"Model {usage.model} matched with {model_key} in pricing data via contains search"
|
73
|
+
)
|
74
|
+
else:
|
75
|
+
# Fallback to GPT4O pricing
|
76
|
+
logger.warning(
|
77
|
+
f"Model {model_key} not found in pricing data. Using gpt-4o pricing as fallback "
|
78
|
+
f"(prompt: ${GPT4O_PRICING.prompt}/token, completion: ${GPT4O_PRICING.completion}/token)"
|
79
|
+
)
|
80
|
+
self.MODEL_COSTS[model_key] = GPT4O_PRICING
|
81
|
+
|
82
|
+
provider_key = usage.provider or "default"
|
83
|
+
provider_model_usages.setdefault(provider_key, {}).setdefault(
|
84
|
+
model_key, []
|
85
|
+
).append(usage)
|
86
|
+
|
87
|
+
# Calculate totals for each level
|
88
|
+
providers_list = []
|
89
|
+
total_metrics = {
|
90
|
+
"total_cost": 0.0,
|
91
|
+
"total_tokens": 0,
|
92
|
+
"prompt_tokens": 0,
|
93
|
+
"completion_tokens": 0,
|
94
|
+
}
|
95
|
+
|
96
|
+
for provider, model_usages in provider_model_usages.items():
|
97
|
+
provider_metrics = {
|
98
|
+
"total_cost": 0.0,
|
99
|
+
"total_tokens": 0,
|
100
|
+
"prompt_tokens": 0,
|
101
|
+
"completion_tokens": 0,
|
102
|
+
}
|
103
|
+
models_list = []
|
104
|
+
|
105
|
+
for model_key, usages in model_usages.items():
|
106
|
+
model_cost = sum(
|
107
|
+
usage.prompt_tokens * self.MODEL_COSTS[model_key].prompt
|
108
|
+
+ usage.completion_tokens * self.MODEL_COSTS[model_key].completion
|
109
|
+
for usage in usages
|
110
|
+
)
|
111
|
+
model_total = sum(usage.total_tokens for usage in usages)
|
112
|
+
model_prompt = sum(usage.prompt_tokens for usage in usages)
|
113
|
+
model_completion = sum(usage.completion_tokens for usage in usages)
|
114
|
+
|
115
|
+
models_list.append(
|
116
|
+
ModelUsage(
|
117
|
+
model=model_key,
|
118
|
+
total_cost=round(model_cost, 6),
|
119
|
+
total_tokens=model_total,
|
120
|
+
prompt_tokens=model_prompt,
|
121
|
+
completion_tokens=model_completion,
|
122
|
+
)
|
123
|
+
)
|
124
|
+
|
125
|
+
provider_metrics["total_cost"] += model_cost
|
126
|
+
provider_metrics["total_tokens"] += model_total
|
127
|
+
provider_metrics["prompt_tokens"] += model_prompt
|
128
|
+
provider_metrics["completion_tokens"] += model_completion
|
129
|
+
|
130
|
+
providers_list.append(
|
131
|
+
ProviderUsage(
|
132
|
+
provider=provider,
|
133
|
+
models=models_list,
|
134
|
+
**{
|
135
|
+
k: (round(v, 6) if k == "total_cost" else v)
|
136
|
+
for k, v in provider_metrics.items()
|
137
|
+
},
|
138
|
+
)
|
139
|
+
)
|
140
|
+
|
141
|
+
for key in total_metrics:
|
142
|
+
total_metrics[key] += provider_metrics[key]
|
143
|
+
|
144
|
+
return TokenUsageReport(
|
145
|
+
providers=providers_list,
|
146
|
+
**{
|
147
|
+
k: (round(v, 6) if k == "total_cost" else v)
|
148
|
+
for k, v in total_metrics.items()
|
149
|
+
},
|
150
|
+
)
|
151
|
+
|
152
|
+
def _query_usage(
|
153
|
+
self,
|
154
|
+
start_date: datetime,
|
155
|
+
end_date: datetime,
|
156
|
+
provider: Optional[str] = None,
|
157
|
+
model: Optional[str] = None,
|
158
|
+
) -> TokenUsageReport:
|
159
|
+
if not state.is_tokenator_enabled:
|
160
|
+
logger.warning("Tokenator is disabled. Skipping usage query.")
|
161
|
+
return TokenUsageReport()
|
162
|
+
|
163
|
+
session = get_session()()
|
164
|
+
try:
|
165
|
+
query = session.query(TokenUsage).filter(
|
166
|
+
TokenUsage.created_at.between(start_date, end_date)
|
167
|
+
)
|
168
|
+
|
169
|
+
if provider:
|
170
|
+
query = query.filter(TokenUsage.provider == provider)
|
171
|
+
if model:
|
172
|
+
query = query.filter(TokenUsage.model == model)
|
173
|
+
|
174
|
+
usages = query.all()
|
175
|
+
|
176
|
+
return self._calculate_cost(usages, provider or "all")
|
177
|
+
finally:
|
178
|
+
session.close()
|
179
|
+
|
180
|
+
def last_hour(
|
181
|
+
self, provider: Optional[str] = None, model: Optional[str] = None
|
182
|
+
) -> TokenUsageReport:
|
183
|
+
if not state.is_tokenator_enabled:
|
184
|
+
return TokenUsageReport()
|
185
|
+
logger.debug(
|
186
|
+
f"Getting cost analysis for last hour (provider={provider}, model={model})"
|
187
|
+
)
|
188
|
+
end = datetime.now()
|
189
|
+
start = end - timedelta(hours=1)
|
190
|
+
return self._query_usage(start, end, provider, model)
|
191
|
+
|
192
|
+
def last_day(
|
193
|
+
self, provider: Optional[str] = None, model: Optional[str] = None
|
194
|
+
) -> TokenUsageReport:
|
195
|
+
if not state.is_tokenator_enabled:
|
196
|
+
return TokenUsageReport()
|
197
|
+
logger.debug(
|
198
|
+
f"Getting cost analysis for last 24 hours (provider={provider}, model={model})"
|
199
|
+
)
|
200
|
+
end = datetime.now()
|
201
|
+
start = end - timedelta(days=1)
|
202
|
+
return self._query_usage(start, end, provider, model)
|
203
|
+
|
204
|
+
def last_week(
|
205
|
+
self, provider: Optional[str] = None, model: Optional[str] = None
|
206
|
+
) -> TokenUsageReport:
|
207
|
+
if not state.is_tokenator_enabled:
|
208
|
+
return TokenUsageReport()
|
209
|
+
logger.debug(
|
210
|
+
f"Getting cost analysis for last 7 days (provider={provider}, model={model})"
|
211
|
+
)
|
212
|
+
end = datetime.now()
|
213
|
+
start = end - timedelta(weeks=1)
|
214
|
+
return self._query_usage(start, end, provider, model)
|
215
|
+
|
216
|
+
def last_month(
|
217
|
+
self, provider: Optional[str] = None, model: Optional[str] = None
|
218
|
+
) -> TokenUsageReport:
|
219
|
+
if not state.is_tokenator_enabled:
|
220
|
+
return TokenUsageReport()
|
221
|
+
logger.debug(
|
222
|
+
f"Getting cost analysis for last 30 days (provider={provider}, model={model})"
|
223
|
+
)
|
224
|
+
end = datetime.now()
|
225
|
+
start = end - timedelta(days=30)
|
226
|
+
return self._query_usage(start, end, provider, model)
|
227
|
+
|
228
|
+
def between(
|
229
|
+
self,
|
230
|
+
start_date: Union[datetime, str],
|
231
|
+
end_date: Union[datetime, str],
|
232
|
+
provider: Optional[str] = None,
|
233
|
+
model: Optional[str] = None,
|
234
|
+
) -> TokenUsageReport:
|
235
|
+
if not state.is_tokenator_enabled:
|
236
|
+
return TokenUsageReport()
|
237
|
+
logger.debug(
|
238
|
+
f"Getting cost analysis between {start_date} and {end_date} (provider={provider}, model={model})"
|
239
|
+
)
|
240
|
+
|
241
|
+
if isinstance(start_date, str):
|
242
|
+
try:
|
243
|
+
start = datetime.strptime(start_date, "%Y-%m-%d %H:%M:%S")
|
244
|
+
except ValueError:
|
245
|
+
logger.warning(
|
246
|
+
f"Date-only string provided for start_date: {start_date}. Setting time to 00:00:00"
|
247
|
+
)
|
248
|
+
start = datetime.strptime(start_date, "%Y-%m-%d")
|
249
|
+
else:
|
250
|
+
start = start_date
|
251
|
+
|
252
|
+
if isinstance(end_date, str):
|
253
|
+
try:
|
254
|
+
end = datetime.strptime(end_date, "%Y-%m-%d %H:%M:%S")
|
255
|
+
except ValueError:
|
256
|
+
logger.warning(
|
257
|
+
f"Date-only string provided for end_date: {end_date}. Setting time to 23:59:59"
|
258
|
+
)
|
259
|
+
end = (
|
260
|
+
datetime.strptime(end_date, "%Y-%m-%d")
|
261
|
+
+ timedelta(days=1)
|
262
|
+
- timedelta(seconds=1)
|
263
|
+
)
|
264
|
+
else:
|
265
|
+
end = end_date
|
266
|
+
|
267
|
+
return self._query_usage(start, end, provider, model)
|
268
|
+
|
269
|
+
def for_execution(self, execution_id: str) -> TokenUsageReport:
|
270
|
+
if not state.is_tokenator_enabled:
|
271
|
+
return TokenUsageReport()
|
272
|
+
logger.debug(f"Getting cost analysis for execution_id={execution_id}")
|
273
|
+
session = get_session()()
|
274
|
+
try:
|
275
|
+
query = session.query(TokenUsage).filter(
|
276
|
+
TokenUsage.execution_id == execution_id
|
277
|
+
)
|
278
|
+
return self._calculate_cost(query.all())
|
279
|
+
finally:
|
280
|
+
session.close()
|
281
|
+
|
282
|
+
def last_execution(self) -> TokenUsageReport:
|
283
|
+
if not state.is_tokenator_enabled:
|
284
|
+
return TokenUsageReport()
|
285
|
+
logger.debug("Getting cost analysis for last execution")
|
286
|
+
session = get_session()()
|
287
|
+
try:
|
288
|
+
query = (
|
289
|
+
session.query(TokenUsage).order_by(TokenUsage.created_at.desc()).first()
|
290
|
+
)
|
291
|
+
if query:
|
292
|
+
return self.for_execution(query.execution_id)
|
293
|
+
return TokenUsageReport()
|
294
|
+
finally:
|
295
|
+
session.close()
|
296
|
+
|
297
|
+
def all_time(self) -> TokenUsageReport:
|
298
|
+
if not state.is_tokenator_enabled:
|
299
|
+
return TokenUsageReport()
|
300
|
+
|
301
|
+
logger.warning("Getting cost analysis for all time. This may take a while...")
|
302
|
+
session = get_session()()
|
303
|
+
try:
|
304
|
+
query = session.query(TokenUsage)
|
305
|
+
return self._calculate_cost(query.all())
|
306
|
+
finally:
|
307
|
+
session.close()
|
@@ -1,269 +0,0 @@
|
|
1
|
-
"""Cost analysis functions for token usage."""
|
2
|
-
|
3
|
-
from datetime import datetime, timedelta
|
4
|
-
from typing import Dict, Optional, Union
|
5
|
-
|
6
|
-
|
7
|
-
from .schemas import get_session, TokenUsage
|
8
|
-
from .models import TokenRate, TokenUsageReport, ModelUsage, ProviderUsage
|
9
|
-
|
10
|
-
import requests
|
11
|
-
import logging
|
12
|
-
|
13
|
-
logger = logging.getLogger(__name__)
|
14
|
-
|
15
|
-
|
16
|
-
def _get_model_costs() -> Dict[str, TokenRate]:
|
17
|
-
url = "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json"
|
18
|
-
response = requests.get(url)
|
19
|
-
data = response.json()
|
20
|
-
|
21
|
-
return {
|
22
|
-
model: TokenRate(
|
23
|
-
prompt=info["input_cost_per_token"],
|
24
|
-
completion=info["output_cost_per_token"],
|
25
|
-
)
|
26
|
-
for model, info in data.items()
|
27
|
-
if "input_cost_per_token" in info and "output_cost_per_token" in info
|
28
|
-
}
|
29
|
-
|
30
|
-
|
31
|
-
MODEL_COSTS = _get_model_costs()
|
32
|
-
|
33
|
-
|
34
|
-
def _calculate_cost(
|
35
|
-
usages: list[TokenUsage], provider: Optional[str] = None
|
36
|
-
) -> TokenUsageReport:
|
37
|
-
"""Calculate cost from token usage records."""
|
38
|
-
# Group usages by provider and model
|
39
|
-
provider_model_usages: Dict[str, Dict[str, list[TokenUsage]]] = {}
|
40
|
-
|
41
|
-
print(f"usages: {len(usages)}")
|
42
|
-
|
43
|
-
for usage in usages:
|
44
|
-
if usage.model not in MODEL_COSTS:
|
45
|
-
continue
|
46
|
-
|
47
|
-
provider = usage.provider
|
48
|
-
if provider not in provider_model_usages:
|
49
|
-
provider_model_usages[provider] = {}
|
50
|
-
|
51
|
-
if usage.model not in provider_model_usages[provider]:
|
52
|
-
provider_model_usages[provider][usage.model] = []
|
53
|
-
|
54
|
-
provider_model_usages[provider][usage.model].append(usage)
|
55
|
-
|
56
|
-
# Calculate totals for each level
|
57
|
-
providers_list = []
|
58
|
-
total_metrics = {
|
59
|
-
"total_cost": 0.0,
|
60
|
-
"total_tokens": 0,
|
61
|
-
"prompt_tokens": 0,
|
62
|
-
"completion_tokens": 0,
|
63
|
-
}
|
64
|
-
|
65
|
-
for provider, model_usages in provider_model_usages.items():
|
66
|
-
provider_metrics = {
|
67
|
-
"total_cost": 0.0,
|
68
|
-
"total_tokens": 0,
|
69
|
-
"prompt_tokens": 0,
|
70
|
-
"completion_tokens": 0,
|
71
|
-
}
|
72
|
-
models_list = []
|
73
|
-
|
74
|
-
for model, usages in model_usages.items():
|
75
|
-
model_cost = 0.0
|
76
|
-
model_total = 0
|
77
|
-
model_prompt = 0
|
78
|
-
model_completion = 0
|
79
|
-
|
80
|
-
for usage in usages:
|
81
|
-
model_prompt += usage.prompt_tokens
|
82
|
-
model_completion += usage.completion_tokens
|
83
|
-
model_total += usage.total_tokens
|
84
|
-
|
85
|
-
model_cost += usage.prompt_tokens * MODEL_COSTS[usage.model].prompt
|
86
|
-
model_cost += (
|
87
|
-
usage.completion_tokens * MODEL_COSTS[usage.model].completion
|
88
|
-
)
|
89
|
-
|
90
|
-
models_list.append(
|
91
|
-
ModelUsage(
|
92
|
-
model=model,
|
93
|
-
total_cost=round(model_cost, 6),
|
94
|
-
total_tokens=model_total,
|
95
|
-
prompt_tokens=model_prompt,
|
96
|
-
completion_tokens=model_completion,
|
97
|
-
)
|
98
|
-
)
|
99
|
-
|
100
|
-
# Add to provider totals
|
101
|
-
provider_metrics["total_cost"] += model_cost
|
102
|
-
provider_metrics["total_tokens"] += model_total
|
103
|
-
provider_metrics["prompt_tokens"] += model_prompt
|
104
|
-
provider_metrics["completion_tokens"] += model_completion
|
105
|
-
|
106
|
-
providers_list.append(
|
107
|
-
ProviderUsage(
|
108
|
-
provider=provider,
|
109
|
-
models=models_list,
|
110
|
-
**{
|
111
|
-
k: (round(v, 6) if k == "total_cost" else v)
|
112
|
-
for k, v in provider_metrics.items()
|
113
|
-
},
|
114
|
-
)
|
115
|
-
)
|
116
|
-
|
117
|
-
# Add to grand totals
|
118
|
-
for key in total_metrics:
|
119
|
-
total_metrics[key] += provider_metrics[key]
|
120
|
-
|
121
|
-
return TokenUsageReport(
|
122
|
-
providers=providers_list,
|
123
|
-
**{
|
124
|
-
k: (round(v, 6) if k == "total_cost" else v)
|
125
|
-
for k, v in total_metrics.items()
|
126
|
-
},
|
127
|
-
)
|
128
|
-
|
129
|
-
|
130
|
-
def _query_usage(
|
131
|
-
start_date: datetime,
|
132
|
-
end_date: datetime,
|
133
|
-
provider: Optional[str] = None,
|
134
|
-
model: Optional[str] = None,
|
135
|
-
) -> TokenUsageReport:
|
136
|
-
"""Query token usage for a specific time period."""
|
137
|
-
session = get_session()()
|
138
|
-
try:
|
139
|
-
query = session.query(TokenUsage).filter(
|
140
|
-
TokenUsage.created_at.between(start_date, end_date)
|
141
|
-
)
|
142
|
-
|
143
|
-
if provider:
|
144
|
-
query = query.filter(TokenUsage.provider == provider)
|
145
|
-
if model:
|
146
|
-
query = query.filter(TokenUsage.model == model)
|
147
|
-
|
148
|
-
usages = query.all()
|
149
|
-
return _calculate_cost(usages, provider or "all")
|
150
|
-
finally:
|
151
|
-
session.close()
|
152
|
-
|
153
|
-
|
154
|
-
def last_hour(
|
155
|
-
provider: Optional[str] = None, model: Optional[str] = None
|
156
|
-
) -> TokenUsageReport:
|
157
|
-
"""Get cost analysis for the last hour."""
|
158
|
-
logger.debug(
|
159
|
-
f"Getting cost analysis for last hour (provider={provider}, model={model})"
|
160
|
-
)
|
161
|
-
end = datetime.now()
|
162
|
-
start = end - timedelta(hours=1)
|
163
|
-
return _query_usage(start, end, provider, model)
|
164
|
-
|
165
|
-
|
166
|
-
def last_day(
|
167
|
-
provider: Optional[str] = None, model: Optional[str] = None
|
168
|
-
) -> TokenUsageReport:
|
169
|
-
"""Get cost analysis for the last 24 hours."""
|
170
|
-
logger.debug(
|
171
|
-
f"Getting cost analysis for last 24 hours (provider={provider}, model={model})"
|
172
|
-
)
|
173
|
-
end = datetime.now()
|
174
|
-
start = end - timedelta(days=1)
|
175
|
-
return _query_usage(start, end, provider, model)
|
176
|
-
|
177
|
-
|
178
|
-
def last_week(
|
179
|
-
provider: Optional[str] = None, model: Optional[str] = None
|
180
|
-
) -> TokenUsageReport:
|
181
|
-
"""Get cost analysis for the last 7 days."""
|
182
|
-
logger.debug(
|
183
|
-
f"Getting cost analysis for last 7 days (provider={provider}, model={model})"
|
184
|
-
)
|
185
|
-
end = datetime.now()
|
186
|
-
start = end - timedelta(weeks=1)
|
187
|
-
return _query_usage(start, end, provider, model)
|
188
|
-
|
189
|
-
|
190
|
-
def last_month(
|
191
|
-
provider: Optional[str] = None, model: Optional[str] = None
|
192
|
-
) -> TokenUsageReport:
|
193
|
-
"""Get cost analysis for the last 30 days."""
|
194
|
-
logger.debug(
|
195
|
-
f"Getting cost analysis for last 30 days (provider={provider}, model={model})"
|
196
|
-
)
|
197
|
-
end = datetime.now()
|
198
|
-
start = end - timedelta(days=30)
|
199
|
-
return _query_usage(start, end, provider, model)
|
200
|
-
|
201
|
-
|
202
|
-
def between(
|
203
|
-
start_date: Union[datetime, str],
|
204
|
-
end_date: Union[datetime, str],
|
205
|
-
provider: Optional[str] = None,
|
206
|
-
model: Optional[str] = None,
|
207
|
-
) -> TokenUsageReport:
|
208
|
-
"""Get cost analysis between two dates.
|
209
|
-
|
210
|
-
Args:
|
211
|
-
start_date: datetime object or string (format: YYYY-MM-DD or YYYY-MM-DD HH:MM:SS)
|
212
|
-
end_date: datetime object or string (format: YYYY-MM-DD or YYYY-MM-DD HH:MM:SS)
|
213
|
-
"""
|
214
|
-
logger.debug(
|
215
|
-
f"Getting cost analysis between {start_date} and {end_date} (provider={provider}, model={model})"
|
216
|
-
)
|
217
|
-
|
218
|
-
if isinstance(start_date, str):
|
219
|
-
try:
|
220
|
-
start = datetime.strptime(start_date, "%Y-%m-%d %H:%M:%S")
|
221
|
-
except ValueError:
|
222
|
-
logger.warning(
|
223
|
-
f"Date-only string provided for start_date: {start_date}. Setting time to 00:00:00"
|
224
|
-
)
|
225
|
-
start = datetime.strptime(start_date, "%Y-%m-%d")
|
226
|
-
|
227
|
-
else:
|
228
|
-
start = start_date
|
229
|
-
|
230
|
-
if isinstance(end_date, str):
|
231
|
-
try:
|
232
|
-
end = datetime.strptime(end_date, "%Y-%m-%d %H:%M:%S")
|
233
|
-
except ValueError:
|
234
|
-
logger.warning(
|
235
|
-
f"Date-only string provided for end_date: {end_date}. Setting time to 23:59:59"
|
236
|
-
)
|
237
|
-
end = (
|
238
|
-
datetime.strptime(end_date, "%Y-%m-%d")
|
239
|
-
+ timedelta(days=1)
|
240
|
-
- timedelta(seconds=1)
|
241
|
-
)
|
242
|
-
else:
|
243
|
-
end = end_date
|
244
|
-
|
245
|
-
return _query_usage(start, end, provider, model)
|
246
|
-
|
247
|
-
|
248
|
-
def for_execution(execution_id: str) -> TokenUsageReport:
|
249
|
-
"""Get cost analysis for a specific execution."""
|
250
|
-
logger.debug(f"Getting cost analysis for execution_id={execution_id}")
|
251
|
-
session = get_session()()
|
252
|
-
query = session.query(TokenUsage).filter(TokenUsage.execution_id == execution_id)
|
253
|
-
return _calculate_cost(query.all())
|
254
|
-
|
255
|
-
|
256
|
-
def last_execution() -> TokenUsageReport:
|
257
|
-
"""Get cost analysis for the last execution_id."""
|
258
|
-
logger.debug("Getting cost analysis for last execution")
|
259
|
-
session = get_session()()
|
260
|
-
query = session.query(TokenUsage).order_by(TokenUsage.created_at.desc()).first()
|
261
|
-
return for_execution(query.execution_id)
|
262
|
-
|
263
|
-
|
264
|
-
def all_time() -> TokenUsageReport:
|
265
|
-
"""Get cost analysis for all time."""
|
266
|
-
logger.warning("Getting cost analysis for all time. This may take a while...")
|
267
|
-
session = get_session()()
|
268
|
-
query = session.query(TokenUsage).all()
|
269
|
-
return for_execution(query.execution_id)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|