tokencostauto 0.1.25__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tokencostauto/__init__.py +9 -0
- tokencostauto/constants.py +94 -0
- tokencostauto/costs.py +310 -0
- tokencostauto/model_prices.json +13138 -0
- tokencostauto-0.1.25.dist-info/METADATA +1175 -0
- tokencostauto-0.1.25.dist-info/RECORD +9 -0
- tokencostauto-0.1.25.dist-info/WHEEL +5 -0
- tokencostauto-0.1.25.dist-info/licenses/LICENSE +21 -0
- tokencostauto-0.1.25.dist-info/top_level.txt +1 -0
@@ -0,0 +1,9 @@
|
|
1
|
+
from .costs import (
|
2
|
+
count_message_tokens,
|
3
|
+
count_string_tokens,
|
4
|
+
calculate_completion_cost,
|
5
|
+
calculate_prompt_cost,
|
6
|
+
calculate_all_costs_and_tokens,
|
7
|
+
calculate_cost_by_tokens,
|
8
|
+
)
|
9
|
+
from .constants import TOKEN_COSTS_STATIC, TOKEN_COSTS, update_token_costs, refresh_prices
|
@@ -0,0 +1,94 @@
|
|
1
|
+
import os
|
2
|
+
import json
|
3
|
+
import aiohttp
|
4
|
+
import asyncio
|
5
|
+
import logging
|
6
|
+
|
7
|
+
logger = logging.getLogger(__name__)
|
8
|
+
|
9
|
+
"""
|
10
|
+
Prompt (aka context) tokens are based on number of words + other chars (eg spaces and punctuation) in input.
|
11
|
+
Completion tokens are similarly based on how long chatGPT's response is.
|
12
|
+
Prompt tokens + completion tokens = total tokens.
|
13
|
+
The max total limit is typically 1 more than the prompt token limit, so there's space for at least one completion token.
|
14
|
+
|
15
|
+
You can use ChatGPT's webapp (which uses their tiktoken repo) to see how many tokens your phrase is:
|
16
|
+
https://platform.openai.com/tokenizer
|
17
|
+
|
18
|
+
Note: When asking follow-up questions, everything above and including your follow-up question
|
19
|
+
is considered a prompt (for the purpose of context) and will thus cost prompt tokens.
|
20
|
+
"""
|
21
|
+
|
22
|
+
# How to read TOKEN_COSTS:
|
23
|
+
# Each prompt token costs __ USD per token.
|
24
|
+
# Each completion token costs __ USD per token.
|
25
|
+
# Max prompt limit of each model is __ tokens.
|
26
|
+
|
27
|
+
PRICES_URL = "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json"
|
28
|
+
|
29
|
+
|
30
|
+
async def fetch_costs():
|
31
|
+
"""Fetch the latest token costs from the LiteLLM cost tracker asynchronously.
|
32
|
+
Returns:
|
33
|
+
dict: The token costs for each model.
|
34
|
+
Raises:
|
35
|
+
Exception: If the request fails.
|
36
|
+
"""
|
37
|
+
async with aiohttp.ClientSession(trust_env=True) as session:
|
38
|
+
async with session.get(PRICES_URL) as response:
|
39
|
+
if response.status == 200:
|
40
|
+
return await response.json(content_type=None)
|
41
|
+
else:
|
42
|
+
raise Exception(
|
43
|
+
f"Failed to fetch token costs, status code: {response.status}"
|
44
|
+
)
|
45
|
+
|
46
|
+
|
47
|
+
async def update_token_costs():
|
48
|
+
"""Update the TOKEN_COSTS dictionary with the latest costs from the LiteLLM cost tracker asynchronously."""
|
49
|
+
global TOKEN_COSTS
|
50
|
+
try:
|
51
|
+
fetched_costs = await fetch_costs()
|
52
|
+
# Safely remove 'sample_spec' if it exists
|
53
|
+
TOKEN_COSTS.update(fetched_costs)
|
54
|
+
TOKEN_COSTS.pop("sample_spec", None)
|
55
|
+
return TOKEN_COSTS
|
56
|
+
except Exception as e:
|
57
|
+
logger.error(f"Failed to update TOKEN_COSTS: {e}")
|
58
|
+
raise
|
59
|
+
|
60
|
+
|
61
|
+
def refresh_prices(write_file=True):
|
62
|
+
"""Synchronous wrapper for update_token_costs that optionally writes to model_prices.json."""
|
63
|
+
try:
|
64
|
+
# Run the async function in a new event loop
|
65
|
+
updated_costs = asyncio.run(update_token_costs())
|
66
|
+
|
67
|
+
# Write to file if requested
|
68
|
+
if write_file:
|
69
|
+
file_path = os.path.join(os.path.dirname(__file__), "model_prices.json")
|
70
|
+
with open(file_path, "w") as f:
|
71
|
+
json.dump(TOKEN_COSTS, f, indent=4)
|
72
|
+
logger.info(f"Updated prices written to {file_path}")
|
73
|
+
|
74
|
+
return updated_costs
|
75
|
+
except Exception as e:
|
76
|
+
logger.error(f"Failed to refresh prices: {e}")
|
77
|
+
# Return the static prices as fallback
|
78
|
+
return TOKEN_COSTS
|
79
|
+
|
80
|
+
|
81
|
+
with open(os.path.join(os.path.dirname(__file__), "model_prices.json"), "r") as f:
|
82
|
+
TOKEN_COSTS_STATIC = json.load(f)
|
83
|
+
|
84
|
+
|
85
|
+
# Set initial TOKEN_COSTS to the static values
|
86
|
+
TOKEN_COSTS = TOKEN_COSTS_STATIC.copy()
|
87
|
+
|
88
|
+
# Only run in a non-async context
|
89
|
+
if __name__ == "__main__":
|
90
|
+
try:
|
91
|
+
asyncio.run(update_token_costs())
|
92
|
+
print("Token costs updated successfully")
|
93
|
+
except Exception:
|
94
|
+
logger.error("Failed to update token costs. Using static costs.")
|
tokencostauto/costs.py
ADDED
@@ -0,0 +1,310 @@
|
|
1
|
+
"""
|
2
|
+
Costs dictionary and utility tool for counting tokens
|
3
|
+
"""
|
4
|
+
|
5
|
+
import os
|
6
|
+
import tiktoken
|
7
|
+
import anthropic
|
8
|
+
from typing import Union, List, Dict
|
9
|
+
from .constants import TOKEN_COSTS
|
10
|
+
from decimal import Decimal
|
11
|
+
import logging
|
12
|
+
|
13
|
+
logger = logging.getLogger(__name__)
|
14
|
+
|
15
|
+
# Note: cl100k is the openai base tokenizer. Nothing to do with Claude. Tiktoken doesn't have claude yet.
|
16
|
+
# https://github.com/anthropics/anthropic-tokenizer-typescript/blob/main/index.ts
|
17
|
+
|
18
|
+
|
19
|
+
def get_anthropic_token_count(messages: List[Dict[str, str]], model: str) -> int:
|
20
|
+
if not any(
|
21
|
+
supported_model in model
|
22
|
+
for supported_model in [
|
23
|
+
"claude-3-7-sonnet",
|
24
|
+
"claude-3-5-sonnet",
|
25
|
+
"claude-3-5-haiku",
|
26
|
+
"claude-3-haiku",
|
27
|
+
"claude-3-opus",
|
28
|
+
]
|
29
|
+
):
|
30
|
+
raise ValueError(
|
31
|
+
f"{model} is not supported in token counting (beta) API. Use the `usage` property in the response for exact counts."
|
32
|
+
)
|
33
|
+
try:
|
34
|
+
return (
|
35
|
+
anthropic.Anthropic()
|
36
|
+
.beta.messages.count_tokens(
|
37
|
+
model=model,
|
38
|
+
messages=messages,
|
39
|
+
)
|
40
|
+
.input_tokens
|
41
|
+
)
|
42
|
+
except TypeError as e:
|
43
|
+
raise e
|
44
|
+
except Exception as e:
|
45
|
+
raise e
|
46
|
+
|
47
|
+
|
48
|
+
def strip_ft_model_name(model: str) -> str:
|
49
|
+
"""
|
50
|
+
Finetuned models format: ft:gpt-3.5-turbo:my-org:custom_suffix:id
|
51
|
+
We only need the base model name to get cost info.
|
52
|
+
"""
|
53
|
+
if model.startswith("ft:gpt-3.5-turbo"):
|
54
|
+
model = "ft:gpt-3.5-turbo"
|
55
|
+
return model
|
56
|
+
|
57
|
+
|
58
|
+
def count_message_tokens(messages: List[Dict[str, str]], model: str) -> int:
|
59
|
+
"""
|
60
|
+
Return the total number of tokens in a prompt's messages.
|
61
|
+
Args:
|
62
|
+
messages (List[Dict[str, str]]): Message format for prompt requests. e.g.:
|
63
|
+
[{ "role": "user", "content": "Hello world"},
|
64
|
+
{ "role": "assistant", "content": "How may I assist you today?"}]
|
65
|
+
model (str): Name of LLM to choose encoding for.
|
66
|
+
Returns:
|
67
|
+
Total number of tokens in message.
|
68
|
+
"""
|
69
|
+
model = model.lower()
|
70
|
+
model = strip_ft_model_name(model)
|
71
|
+
|
72
|
+
# Anthropic token counting requires a valid API key
|
73
|
+
if "claude-" in model:
|
74
|
+
logger.warning(
|
75
|
+
"Warning: Anthropic token counting API is currently in beta. Please expect differences in costs!"
|
76
|
+
)
|
77
|
+
return get_anthropic_token_count(messages, model)
|
78
|
+
|
79
|
+
try:
|
80
|
+
encoding = tiktoken.encoding_for_model(model)
|
81
|
+
except KeyError:
|
82
|
+
logger.warning("Model not found. Using cl100k_base encoding.")
|
83
|
+
encoding = tiktoken.get_encoding("cl100k_base")
|
84
|
+
if model in {
|
85
|
+
"gpt-3.5-turbo-0613",
|
86
|
+
"gpt-3.5-turbo-16k-0613",
|
87
|
+
"gpt-4-0314",
|
88
|
+
"gpt-4-32k-0314",
|
89
|
+
"gpt-4-0613",
|
90
|
+
"gpt-4-32k-0613",
|
91
|
+
"gpt-4-turbo",
|
92
|
+
"gpt-4-turbo-2024-04-09",
|
93
|
+
"gpt-4o",
|
94
|
+
"gpt-4o-2024-05-13",
|
95
|
+
} or model.startswith("o"):
|
96
|
+
tokens_per_message = 3
|
97
|
+
tokens_per_name = 1
|
98
|
+
elif model == "gpt-3.5-turbo-0301":
|
99
|
+
# every message follows <|start|>{role/name}\n{content}<|end|>\n
|
100
|
+
tokens_per_message = 4
|
101
|
+
tokens_per_name = -1 # if there's a name, the role is omitted
|
102
|
+
elif "gpt-3.5-turbo" in model:
|
103
|
+
logger.warning(
|
104
|
+
"gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0613."
|
105
|
+
)
|
106
|
+
return count_message_tokens(messages, model="gpt-3.5-turbo-0613")
|
107
|
+
elif "gpt-4o" in model:
|
108
|
+
logger.warning(
|
109
|
+
"Warning: gpt-4o may update over time. Returning num tokens assuming gpt-4o-2024-05-13."
|
110
|
+
)
|
111
|
+
return count_message_tokens(messages, model="gpt-4o-2024-05-13")
|
112
|
+
elif "gpt-4" in model:
|
113
|
+
logger.warning(
|
114
|
+
"gpt-4 may update over time. Returning num tokens assuming gpt-4-0613."
|
115
|
+
)
|
116
|
+
return count_message_tokens(messages, model="gpt-4-0613")
|
117
|
+
else:
|
118
|
+
raise KeyError(
|
119
|
+
f"""num_tokens_from_messages() is not implemented for model {model}.
|
120
|
+
See https://github.com/openai/openai-python/blob/main/chatml.md for how messages are converted to tokens."""
|
121
|
+
)
|
122
|
+
num_tokens = 0
|
123
|
+
for message in messages:
|
124
|
+
num_tokens += tokens_per_message
|
125
|
+
for key, value in message.items():
|
126
|
+
num_tokens += len(encoding.encode(value))
|
127
|
+
if key == "name":
|
128
|
+
num_tokens += tokens_per_name
|
129
|
+
num_tokens += 3 # every reply is primed with <|start|>assistant<|message|>
|
130
|
+
return num_tokens
|
131
|
+
|
132
|
+
|
133
|
+
def count_string_tokens(prompt: str, model: str) -> int:
|
134
|
+
"""
|
135
|
+
Returns the number of tokens in a (prompt or completion) text string.
|
136
|
+
|
137
|
+
Args:
|
138
|
+
prompt (str): The text string
|
139
|
+
model_name (str): The name of the encoding to use. (e.g., "gpt-3.5-turbo")
|
140
|
+
|
141
|
+
Returns:
|
142
|
+
int: The number of tokens in the text string.
|
143
|
+
"""
|
144
|
+
model = model.lower()
|
145
|
+
|
146
|
+
if "/" in model:
|
147
|
+
model = model.split("/")[-1]
|
148
|
+
|
149
|
+
if "claude-" in model:
|
150
|
+
raise ValueError(
|
151
|
+
"Warning: Anthropic does not support this method. Please use the `count_message_tokens` function for the exact counts."
|
152
|
+
)
|
153
|
+
|
154
|
+
try:
|
155
|
+
encoding = tiktoken.encoding_for_model(model)
|
156
|
+
except KeyError:
|
157
|
+
logger.warning("Warning: model not found. Using cl100k_base encoding.")
|
158
|
+
encoding = tiktoken.get_encoding("cl100k_base")
|
159
|
+
|
160
|
+
return len(encoding.encode(prompt))
|
161
|
+
|
162
|
+
|
163
|
+
def calculate_cost_by_tokens(num_tokens: int, model: str, token_type: str) -> Decimal:
|
164
|
+
"""
|
165
|
+
Calculate the cost based on the number of tokens and the model.
|
166
|
+
|
167
|
+
Args:
|
168
|
+
num_tokens (int): The number of tokens.
|
169
|
+
model (str): The model name.
|
170
|
+
token_type (str): Type of token ('input' or 'output').
|
171
|
+
|
172
|
+
Returns:
|
173
|
+
Decimal: The calculated cost in USD.
|
174
|
+
"""
|
175
|
+
model = model.lower()
|
176
|
+
if model not in TOKEN_COSTS:
|
177
|
+
raise KeyError(
|
178
|
+
f"""Model {model} is not implemented.
|
179
|
+
Double-check your spelling, or submit an issue/PR"""
|
180
|
+
)
|
181
|
+
|
182
|
+
cost_per_token_key = (
|
183
|
+
"input_cost_per_token" if token_type == "input" else "output_cost_per_token"
|
184
|
+
)
|
185
|
+
cost_per_token = TOKEN_COSTS[model][cost_per_token_key]
|
186
|
+
|
187
|
+
return Decimal(str(cost_per_token)) * Decimal(num_tokens)
|
188
|
+
|
189
|
+
|
190
|
+
def calculate_prompt_cost(prompt: Union[List[dict], str], model: str) -> Decimal:
|
191
|
+
"""
|
192
|
+
Calculate the prompt's cost in USD.
|
193
|
+
|
194
|
+
Args:
|
195
|
+
prompt (Union[List[dict], str]): List of message objects or single string prompt.
|
196
|
+
model (str): The model name.
|
197
|
+
|
198
|
+
Returns:
|
199
|
+
Decimal: The calculated cost in USD.
|
200
|
+
|
201
|
+
e.g.:
|
202
|
+
>>> prompt = [{ "role": "user", "content": "Hello world"},
|
203
|
+
{ "role": "assistant", "content": "How may I assist you today?"}]
|
204
|
+
>>>calculate_prompt_cost(prompt, "gpt-3.5-turbo")
|
205
|
+
Decimal('0.0000300')
|
206
|
+
# or
|
207
|
+
>>> prompt = "Hello world"
|
208
|
+
>>> calculate_prompt_cost(prompt, "gpt-3.5-turbo")
|
209
|
+
Decimal('0.0000030')
|
210
|
+
"""
|
211
|
+
model = model.lower()
|
212
|
+
model = strip_ft_model_name(model)
|
213
|
+
if model not in TOKEN_COSTS:
|
214
|
+
raise KeyError(
|
215
|
+
f"""Model {model} is not implemented.
|
216
|
+
Double-check your spelling, or submit an issue/PR"""
|
217
|
+
)
|
218
|
+
if not isinstance(prompt, (list, str)):
|
219
|
+
raise TypeError(
|
220
|
+
f"Prompt must be either a string or list of message objects but found {type(prompt)} instead."
|
221
|
+
)
|
222
|
+
prompt_tokens = (
|
223
|
+
count_string_tokens(prompt, model)
|
224
|
+
if isinstance(prompt, str) and "claude-" not in model
|
225
|
+
else count_message_tokens(prompt, model)
|
226
|
+
)
|
227
|
+
|
228
|
+
return calculate_cost_by_tokens(prompt_tokens, model, "input")
|
229
|
+
|
230
|
+
|
231
|
+
def calculate_completion_cost(completion: str, model: str) -> Decimal:
|
232
|
+
"""
|
233
|
+
Calculate the prompt's cost in USD.
|
234
|
+
|
235
|
+
Args:
|
236
|
+
completion (str): Completion string.
|
237
|
+
model (str): The model name.
|
238
|
+
|
239
|
+
Returns:
|
240
|
+
Decimal: The calculated cost in USD.
|
241
|
+
|
242
|
+
e.g.:
|
243
|
+
>>> completion = "How may I assist you today?"
|
244
|
+
>>> calculate_completion_cost(completion, "gpt-3.5-turbo")
|
245
|
+
Decimal('0.000014')
|
246
|
+
"""
|
247
|
+
model = strip_ft_model_name(model)
|
248
|
+
if model not in TOKEN_COSTS:
|
249
|
+
raise KeyError(
|
250
|
+
f"""Model {model} is not implemented.
|
251
|
+
Double-check your spelling, or submit an issue/PR"""
|
252
|
+
)
|
253
|
+
|
254
|
+
if not isinstance(completion, str):
|
255
|
+
raise TypeError(
|
256
|
+
f"Prompt must be a string but found {type(completion)} instead."
|
257
|
+
)
|
258
|
+
|
259
|
+
if "claude-" in model:
|
260
|
+
completion_list = [{"role": "assistant", "content": completion}]
|
261
|
+
# Anthropic appends some 13 additional tokens to the actual completion tokens
|
262
|
+
completion_tokens = count_message_tokens(completion_list, model) - 13
|
263
|
+
else:
|
264
|
+
completion_tokens = count_string_tokens(completion, model)
|
265
|
+
|
266
|
+
return calculate_cost_by_tokens(completion_tokens, model, "output")
|
267
|
+
|
268
|
+
|
269
|
+
def calculate_all_costs_and_tokens(
|
270
|
+
prompt: Union[List[dict], str], completion: str, model: str
|
271
|
+
) -> dict:
|
272
|
+
"""
|
273
|
+
Calculate the prompt and completion costs and tokens in USD.
|
274
|
+
|
275
|
+
Args:
|
276
|
+
prompt (Union[List[dict], str]): List of message objects or single string prompt.
|
277
|
+
completion (str): Completion string.
|
278
|
+
model (str): The model name.
|
279
|
+
|
280
|
+
Returns:
|
281
|
+
dict: The calculated cost and tokens in USD.
|
282
|
+
|
283
|
+
e.g.:
|
284
|
+
>>> prompt = "Hello world"
|
285
|
+
>>> completion = "How may I assist you today?"
|
286
|
+
>>> calculate_all_costs_and_tokens(prompt, completion, "gpt-3.5-turbo")
|
287
|
+
{'prompt_cost': Decimal('0.0000030'), 'prompt_tokens': 2, 'completion_cost': Decimal('0.000014'), 'completion_tokens': 7}
|
288
|
+
"""
|
289
|
+
prompt_cost = calculate_prompt_cost(prompt, model)
|
290
|
+
completion_cost = calculate_completion_cost(completion, model)
|
291
|
+
prompt_tokens = (
|
292
|
+
count_string_tokens(prompt, model)
|
293
|
+
if isinstance(prompt, str) and "claude-" not in model
|
294
|
+
else count_message_tokens(prompt, model)
|
295
|
+
)
|
296
|
+
|
297
|
+
if "claude-" in model:
|
298
|
+
logger.warning("Warning: Token counting is estimated for ")
|
299
|
+
completion_list = [{"role": "assistant", "content": completion}]
|
300
|
+
# Anthropic appends some 13 additional tokens to the actual completion tokens
|
301
|
+
completion_tokens = count_message_tokens(completion_list, model) - 13
|
302
|
+
else:
|
303
|
+
completion_tokens = count_string_tokens(completion, model)
|
304
|
+
|
305
|
+
return {
|
306
|
+
"prompt_cost": prompt_cost,
|
307
|
+
"prompt_tokens": prompt_tokens,
|
308
|
+
"completion_cost": completion_cost,
|
309
|
+
"completion_tokens": completion_tokens,
|
310
|
+
}
|