tokencostauto 0.1.25__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,9 @@
1
+ from .costs import (
2
+ count_message_tokens,
3
+ count_string_tokens,
4
+ calculate_completion_cost,
5
+ calculate_prompt_cost,
6
+ calculate_all_costs_and_tokens,
7
+ calculate_cost_by_tokens,
8
+ )
9
+ from .constants import TOKEN_COSTS_STATIC, TOKEN_COSTS, update_token_costs, refresh_prices
@@ -0,0 +1,94 @@
1
+ import os
2
+ import json
3
+ import aiohttp
4
+ import asyncio
5
+ import logging
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+ """
10
+ Prompt (aka context) tokens are based on number of words + other chars (eg spaces and punctuation) in input.
11
+ Completion tokens are similarly based on how long chatGPT's response is.
12
+ Prompt tokens + completion tokens = total tokens.
13
+ The max total limit is typically 1 more than the prompt token limit, so there's space for at least one completion token.
14
+
15
+ You can use ChatGPT's webapp (which uses their tiktoken repo) to see how many tokens your phrase is:
16
+ https://platform.openai.com/tokenizer
17
+
18
+ Note: When asking follow-up questions, everything above and including your follow-up question
19
+ is considered a prompt (for the purpose of context) and will thus cost prompt tokens.
20
+ """
21
+
22
+ # How to read TOKEN_COSTS:
23
+ # Each prompt token costs __ USD per token.
24
+ # Each completion token costs __ USD per token.
25
+ # Max prompt limit of each model is __ tokens.
26
+
27
+ PRICES_URL = "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json"
28
+
29
+
30
+ async def fetch_costs():
31
+ """Fetch the latest token costs from the LiteLLM cost tracker asynchronously.
32
+ Returns:
33
+ dict: The token costs for each model.
34
+ Raises:
35
+ Exception: If the request fails.
36
+ """
37
+ async with aiohttp.ClientSession(trust_env=True) as session:
38
+ async with session.get(PRICES_URL) as response:
39
+ if response.status == 200:
40
+ return await response.json(content_type=None)
41
+ else:
42
+ raise Exception(
43
+ f"Failed to fetch token costs, status code: {response.status}"
44
+ )
45
+
46
+
47
+ async def update_token_costs():
48
+ """Update the TOKEN_COSTS dictionary with the latest costs from the LiteLLM cost tracker asynchronously."""
49
+ global TOKEN_COSTS
50
+ try:
51
+ fetched_costs = await fetch_costs()
52
+ # Safely remove 'sample_spec' if it exists
53
+ TOKEN_COSTS.update(fetched_costs)
54
+ TOKEN_COSTS.pop("sample_spec", None)
55
+ return TOKEN_COSTS
56
+ except Exception as e:
57
+ logger.error(f"Failed to update TOKEN_COSTS: {e}")
58
+ raise
59
+
60
+
61
+ def refresh_prices(write_file=True):
62
+ """Synchronous wrapper for update_token_costs that optionally writes to model_prices.json."""
63
+ try:
64
+ # Run the async function in a new event loop
65
+ updated_costs = asyncio.run(update_token_costs())
66
+
67
+ # Write to file if requested
68
+ if write_file:
69
+ file_path = os.path.join(os.path.dirname(__file__), "model_prices.json")
70
+ with open(file_path, "w") as f:
71
+ json.dump(TOKEN_COSTS, f, indent=4)
72
+ logger.info(f"Updated prices written to {file_path}")
73
+
74
+ return updated_costs
75
+ except Exception as e:
76
+ logger.error(f"Failed to refresh prices: {e}")
77
+ # Return the static prices as fallback
78
+ return TOKEN_COSTS
79
+
80
+
81
+ with open(os.path.join(os.path.dirname(__file__), "model_prices.json"), "r") as f:
82
+ TOKEN_COSTS_STATIC = json.load(f)
83
+
84
+
85
+ # Set initial TOKEN_COSTS to the static values
86
+ TOKEN_COSTS = TOKEN_COSTS_STATIC.copy()
87
+
88
+ # Only run in a non-async context
89
+ if __name__ == "__main__":
90
+ try:
91
+ asyncio.run(update_token_costs())
92
+ print("Token costs updated successfully")
93
+ except Exception:
94
+ logger.error("Failed to update token costs. Using static costs.")
tokencostauto/costs.py ADDED
@@ -0,0 +1,310 @@
1
+ """
2
+ Costs dictionary and utility tool for counting tokens
3
+ """
4
+
5
+ import os
6
+ import tiktoken
7
+ import anthropic
8
+ from typing import Union, List, Dict
9
+ from .constants import TOKEN_COSTS
10
+ from decimal import Decimal
11
+ import logging
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+ # Note: cl100k is the openai base tokenizer. Nothing to do with Claude. Tiktoken doesn't have claude yet.
16
+ # https://github.com/anthropics/anthropic-tokenizer-typescript/blob/main/index.ts
17
+
18
+
19
+ def get_anthropic_token_count(messages: List[Dict[str, str]], model: str) -> int:
20
+ if not any(
21
+ supported_model in model
22
+ for supported_model in [
23
+ "claude-3-7-sonnet",
24
+ "claude-3-5-sonnet",
25
+ "claude-3-5-haiku",
26
+ "claude-3-haiku",
27
+ "claude-3-opus",
28
+ ]
29
+ ):
30
+ raise ValueError(
31
+ f"{model} is not supported in token counting (beta) API. Use the `usage` property in the response for exact counts."
32
+ )
33
+ try:
34
+ return (
35
+ anthropic.Anthropic()
36
+ .beta.messages.count_tokens(
37
+ model=model,
38
+ messages=messages,
39
+ )
40
+ .input_tokens
41
+ )
42
+ except TypeError as e:
43
+ raise e
44
+ except Exception as e:
45
+ raise e
46
+
47
+
48
+ def strip_ft_model_name(model: str) -> str:
49
+ """
50
+ Finetuned models format: ft:gpt-3.5-turbo:my-org:custom_suffix:id
51
+ We only need the base model name to get cost info.
52
+ """
53
+ if model.startswith("ft:gpt-3.5-turbo"):
54
+ model = "ft:gpt-3.5-turbo"
55
+ return model
56
+
57
+
58
+ def count_message_tokens(messages: List[Dict[str, str]], model: str) -> int:
59
+ """
60
+ Return the total number of tokens in a prompt's messages.
61
+ Args:
62
+ messages (List[Dict[str, str]]): Message format for prompt requests. e.g.:
63
+ [{ "role": "user", "content": "Hello world"},
64
+ { "role": "assistant", "content": "How may I assist you today?"}]
65
+ model (str): Name of LLM to choose encoding for.
66
+ Returns:
67
+ Total number of tokens in message.
68
+ """
69
+ model = model.lower()
70
+ model = strip_ft_model_name(model)
71
+
72
+ # Anthropic token counting requires a valid API key
73
+ if "claude-" in model:
74
+ logger.warning(
75
+ "Warning: Anthropic token counting API is currently in beta. Please expect differences in costs!"
76
+ )
77
+ return get_anthropic_token_count(messages, model)
78
+
79
+ try:
80
+ encoding = tiktoken.encoding_for_model(model)
81
+ except KeyError:
82
+ logger.warning("Model not found. Using cl100k_base encoding.")
83
+ encoding = tiktoken.get_encoding("cl100k_base")
84
+ if model in {
85
+ "gpt-3.5-turbo-0613",
86
+ "gpt-3.5-turbo-16k-0613",
87
+ "gpt-4-0314",
88
+ "gpt-4-32k-0314",
89
+ "gpt-4-0613",
90
+ "gpt-4-32k-0613",
91
+ "gpt-4-turbo",
92
+ "gpt-4-turbo-2024-04-09",
93
+ "gpt-4o",
94
+ "gpt-4o-2024-05-13",
95
+ } or model.startswith("o"):
96
+ tokens_per_message = 3
97
+ tokens_per_name = 1
98
+ elif model == "gpt-3.5-turbo-0301":
99
+ # every message follows <|start|>{role/name}\n{content}<|end|>\n
100
+ tokens_per_message = 4
101
+ tokens_per_name = -1 # if there's a name, the role is omitted
102
+ elif "gpt-3.5-turbo" in model:
103
+ logger.warning(
104
+ "gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0613."
105
+ )
106
+ return count_message_tokens(messages, model="gpt-3.5-turbo-0613")
107
+ elif "gpt-4o" in model:
108
+ logger.warning(
109
+ "Warning: gpt-4o may update over time. Returning num tokens assuming gpt-4o-2024-05-13."
110
+ )
111
+ return count_message_tokens(messages, model="gpt-4o-2024-05-13")
112
+ elif "gpt-4" in model:
113
+ logger.warning(
114
+ "gpt-4 may update over time. Returning num tokens assuming gpt-4-0613."
115
+ )
116
+ return count_message_tokens(messages, model="gpt-4-0613")
117
+ else:
118
+ raise KeyError(
119
+ f"""num_tokens_from_messages() is not implemented for model {model}.
120
+ See https://github.com/openai/openai-python/blob/main/chatml.md for how messages are converted to tokens."""
121
+ )
122
+ num_tokens = 0
123
+ for message in messages:
124
+ num_tokens += tokens_per_message
125
+ for key, value in message.items():
126
+ num_tokens += len(encoding.encode(value))
127
+ if key == "name":
128
+ num_tokens += tokens_per_name
129
+ num_tokens += 3 # every reply is primed with <|start|>assistant<|message|>
130
+ return num_tokens
131
+
132
+
133
+ def count_string_tokens(prompt: str, model: str) -> int:
134
+ """
135
+ Returns the number of tokens in a (prompt or completion) text string.
136
+
137
+ Args:
138
+ prompt (str): The text string
139
+ model_name (str): The name of the encoding to use. (e.g., "gpt-3.5-turbo")
140
+
141
+ Returns:
142
+ int: The number of tokens in the text string.
143
+ """
144
+ model = model.lower()
145
+
146
+ if "/" in model:
147
+ model = model.split("/")[-1]
148
+
149
+ if "claude-" in model:
150
+ raise ValueError(
151
+ "Warning: Anthropic does not support this method. Please use the `count_message_tokens` function for the exact counts."
152
+ )
153
+
154
+ try:
155
+ encoding = tiktoken.encoding_for_model(model)
156
+ except KeyError:
157
+ logger.warning("Warning: model not found. Using cl100k_base encoding.")
158
+ encoding = tiktoken.get_encoding("cl100k_base")
159
+
160
+ return len(encoding.encode(prompt))
161
+
162
+
163
+ def calculate_cost_by_tokens(num_tokens: int, model: str, token_type: str) -> Decimal:
164
+ """
165
+ Calculate the cost based on the number of tokens and the model.
166
+
167
+ Args:
168
+ num_tokens (int): The number of tokens.
169
+ model (str): The model name.
170
+ token_type (str): Type of token ('input' or 'output').
171
+
172
+ Returns:
173
+ Decimal: The calculated cost in USD.
174
+ """
175
+ model = model.lower()
176
+ if model not in TOKEN_COSTS:
177
+ raise KeyError(
178
+ f"""Model {model} is not implemented.
179
+ Double-check your spelling, or submit an issue/PR"""
180
+ )
181
+
182
+ cost_per_token_key = (
183
+ "input_cost_per_token" if token_type == "input" else "output_cost_per_token"
184
+ )
185
+ cost_per_token = TOKEN_COSTS[model][cost_per_token_key]
186
+
187
+ return Decimal(str(cost_per_token)) * Decimal(num_tokens)
188
+
189
+
190
+ def calculate_prompt_cost(prompt: Union[List[dict], str], model: str) -> Decimal:
191
+ """
192
+ Calculate the prompt's cost in USD.
193
+
194
+ Args:
195
+ prompt (Union[List[dict], str]): List of message objects or single string prompt.
196
+ model (str): The model name.
197
+
198
+ Returns:
199
+ Decimal: The calculated cost in USD.
200
+
201
+ e.g.:
202
+ >>> prompt = [{ "role": "user", "content": "Hello world"},
203
+ { "role": "assistant", "content": "How may I assist you today?"}]
204
+ >>>calculate_prompt_cost(prompt, "gpt-3.5-turbo")
205
+ Decimal('0.0000300')
206
+ # or
207
+ >>> prompt = "Hello world"
208
+ >>> calculate_prompt_cost(prompt, "gpt-3.5-turbo")
209
+ Decimal('0.0000030')
210
+ """
211
+ model = model.lower()
212
+ model = strip_ft_model_name(model)
213
+ if model not in TOKEN_COSTS:
214
+ raise KeyError(
215
+ f"""Model {model} is not implemented.
216
+ Double-check your spelling, or submit an issue/PR"""
217
+ )
218
+ if not isinstance(prompt, (list, str)):
219
+ raise TypeError(
220
+ f"Prompt must be either a string or list of message objects but found {type(prompt)} instead."
221
+ )
222
+ prompt_tokens = (
223
+ count_string_tokens(prompt, model)
224
+ if isinstance(prompt, str) and "claude-" not in model
225
+ else count_message_tokens(prompt, model)
226
+ )
227
+
228
+ return calculate_cost_by_tokens(prompt_tokens, model, "input")
229
+
230
+
231
+ def calculate_completion_cost(completion: str, model: str) -> Decimal:
232
+ """
233
+ Calculate the prompt's cost in USD.
234
+
235
+ Args:
236
+ completion (str): Completion string.
237
+ model (str): The model name.
238
+
239
+ Returns:
240
+ Decimal: The calculated cost in USD.
241
+
242
+ e.g.:
243
+ >>> completion = "How may I assist you today?"
244
+ >>> calculate_completion_cost(completion, "gpt-3.5-turbo")
245
+ Decimal('0.000014')
246
+ """
247
+ model = strip_ft_model_name(model)
248
+ if model not in TOKEN_COSTS:
249
+ raise KeyError(
250
+ f"""Model {model} is not implemented.
251
+ Double-check your spelling, or submit an issue/PR"""
252
+ )
253
+
254
+ if not isinstance(completion, str):
255
+ raise TypeError(
256
+ f"Prompt must be a string but found {type(completion)} instead."
257
+ )
258
+
259
+ if "claude-" in model:
260
+ completion_list = [{"role": "assistant", "content": completion}]
261
+ # Anthropic appends some 13 additional tokens to the actual completion tokens
262
+ completion_tokens = count_message_tokens(completion_list, model) - 13
263
+ else:
264
+ completion_tokens = count_string_tokens(completion, model)
265
+
266
+ return calculate_cost_by_tokens(completion_tokens, model, "output")
267
+
268
+
269
+ def calculate_all_costs_and_tokens(
270
+ prompt: Union[List[dict], str], completion: str, model: str
271
+ ) -> dict:
272
+ """
273
+ Calculate the prompt and completion costs and tokens in USD.
274
+
275
+ Args:
276
+ prompt (Union[List[dict], str]): List of message objects or single string prompt.
277
+ completion (str): Completion string.
278
+ model (str): The model name.
279
+
280
+ Returns:
281
+ dict: The calculated cost and tokens in USD.
282
+
283
+ e.g.:
284
+ >>> prompt = "Hello world"
285
+ >>> completion = "How may I assist you today?"
286
+ >>> calculate_all_costs_and_tokens(prompt, completion, "gpt-3.5-turbo")
287
+ {'prompt_cost': Decimal('0.0000030'), 'prompt_tokens': 2, 'completion_cost': Decimal('0.000014'), 'completion_tokens': 7}
288
+ """
289
+ prompt_cost = calculate_prompt_cost(prompt, model)
290
+ completion_cost = calculate_completion_cost(completion, model)
291
+ prompt_tokens = (
292
+ count_string_tokens(prompt, model)
293
+ if isinstance(prompt, str) and "claude-" not in model
294
+ else count_message_tokens(prompt, model)
295
+ )
296
+
297
+ if "claude-" in model:
298
+ logger.warning("Warning: Token counting is estimated for ")
299
+ completion_list = [{"role": "assistant", "content": completion}]
300
+ # Anthropic appends some 13 additional tokens to the actual completion tokens
301
+ completion_tokens = count_message_tokens(completion_list, model) - 13
302
+ else:
303
+ completion_tokens = count_string_tokens(completion, model)
304
+
305
+ return {
306
+ "prompt_cost": prompt_cost,
307
+ "prompt_tokens": prompt_tokens,
308
+ "completion_cost": completion_cost,
309
+ "completion_tokens": completion_tokens,
310
+ }