vectorvein 0.1.7__tar.gz → 0.1.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. {vectorvein-0.1.7 → vectorvein-0.1.8}/PKG-INFO +3 -1
  2. {vectorvein-0.1.7 → vectorvein-0.1.8}/pyproject.toml +3 -1
  3. {vectorvein-0.1.7 → vectorvein-0.1.8}/src/vectorvein/chat_clients/anthropic_client.py +30 -5
  4. {vectorvein-0.1.7 → vectorvein-0.1.8}/src/vectorvein/chat_clients/base_client.py +4 -4
  5. {vectorvein-0.1.7 → vectorvein-0.1.8}/src/vectorvein/chat_clients/gemini_client.py +22 -4
  6. {vectorvein-0.1.7 → vectorvein-0.1.8}/src/vectorvein/chat_clients/minimax_client.py +29 -5
  7. {vectorvein-0.1.7 → vectorvein-0.1.8}/src/vectorvein/chat_clients/openai_compatible_client.py +33 -4
  8. {vectorvein-0.1.7 → vectorvein-0.1.8}/src/vectorvein/chat_clients/utils.py +86 -2
  9. {vectorvein-0.1.7 → vectorvein-0.1.8}/src/vectorvein/types/defaults.py +48 -3
  10. vectorvein-0.1.8/src/vectorvein/utilities/retry.py +36 -0
  11. vectorvein-0.1.8/tests/sample_settings.py +597 -0
  12. {vectorvein-0.1.7 → vectorvein-0.1.8}/tests/test_create_chat_client.py +36 -12
  13. {vectorvein-0.1.7 → vectorvein-0.1.8}/tests/test_image_input_chat_client.py +1 -1
  14. vectorvein-0.1.8/tests/test_tokens_count.py +46 -0
  15. vectorvein-0.1.7/tests/sample_settings.py +0 -947
  16. {vectorvein-0.1.7 → vectorvein-0.1.8}/README.md +0 -0
  17. {vectorvein-0.1.7 → vectorvein-0.1.8}/src/vectorvein/__init__.py +0 -0
  18. {vectorvein-0.1.7 → vectorvein-0.1.8}/src/vectorvein/chat_clients/__init__.py +0 -0
  19. {vectorvein-0.1.7 → vectorvein-0.1.8}/src/vectorvein/chat_clients/deepseek_client.py +0 -0
  20. {vectorvein-0.1.7 → vectorvein-0.1.8}/src/vectorvein/chat_clients/groq_client.py +0 -0
  21. {vectorvein-0.1.7 → vectorvein-0.1.8}/src/vectorvein/chat_clients/local_client.py +0 -0
  22. {vectorvein-0.1.7 → vectorvein-0.1.8}/src/vectorvein/chat_clients/mistral_client.py +0 -0
  23. {vectorvein-0.1.7 → vectorvein-0.1.8}/src/vectorvein/chat_clients/moonshot_client.py +0 -0
  24. {vectorvein-0.1.7 → vectorvein-0.1.8}/src/vectorvein/chat_clients/openai_client.py +0 -0
  25. {vectorvein-0.1.7 → vectorvein-0.1.8}/src/vectorvein/chat_clients/qwen_client.py +0 -0
  26. {vectorvein-0.1.7 → vectorvein-0.1.8}/src/vectorvein/chat_clients/yi_client.py +0 -0
  27. {vectorvein-0.1.7 → vectorvein-0.1.8}/src/vectorvein/chat_clients/zhipuai_client.py +0 -0
  28. {vectorvein-0.1.7 → vectorvein-0.1.8}/src/vectorvein/settings/__init__.py +0 -0
  29. {vectorvein-0.1.7 → vectorvein-0.1.8}/src/vectorvein/types/enums.py +0 -0
  30. {vectorvein-0.1.7 → vectorvein-0.1.8}/src/vectorvein/types/llm_parameters.py +0 -0
  31. {vectorvein-0.1.7 → vectorvein-0.1.8}/src/vectorvein/utilities/media_processing.py +0 -0
  32. {vectorvein-0.1.7 → vectorvein-0.1.8}/tests/__init__.py +0 -0
  33. {vectorvein-0.1.7 → vectorvein-0.1.8}/tests/cat.png +0 -0
  34. {vectorvein-0.1.7 → vectorvein-0.1.8}/tests/test_format_messages.py +0 -0
  35. {vectorvein-0.1.7 → vectorvein-0.1.8}/tests/test_tool_use_multi_turns.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vectorvein
3
- Version: 0.1.7
3
+ Version: 0.1.8
4
4
  Summary: Default template for PDM package
5
5
  Author-Email: Anderson <andersonby@163.com>
6
6
  License: MIT
@@ -11,6 +11,8 @@ Requires-Dist: httpx>=0.27.0
11
11
  Requires-Dist: anthropic[vertex]>=0.31.2
12
12
  Requires-Dist: pydantic>=2.8.2
13
13
  Requires-Dist: Pillow>=10.4.0
14
+ Requires-Dist: deepseek-tokenizer>=0.1.0
15
+ Requires-Dist: qwen-tokenizer>=0.1.0
14
16
  Description-Content-Type: text/markdown
15
17
 
16
18
  # vectorvein
@@ -9,12 +9,14 @@ dependencies = [
9
9
  "anthropic[vertex]>=0.31.2",
10
10
  "pydantic>=2.8.2",
11
11
  "Pillow>=10.4.0",
12
+ "deepseek-tokenizer>=0.1.0",
13
+ "qwen-tokenizer>=0.1.0",
12
14
  ]
13
15
  description = "Default template for PDM package"
14
16
  name = "vectorvein"
15
17
  readme = "README.md"
16
18
  requires-python = ">=3.10"
17
- version = "0.1.7"
19
+ version = "0.1.8"
18
20
 
19
21
  [project.license]
20
22
  text = "MIT"
@@ -19,8 +19,8 @@ from google.auth.transport.requests import Request
19
19
  from google.auth import _helpers
20
20
 
21
21
  from ..settings import settings
22
- from .utils import cutoff_messages
23
22
  from ..types import defaults as defs
23
+ from .utils import cutoff_messages, get_token_counts
24
24
  from .base_client import BaseChatClient, BaseAsyncChatClient
25
25
  from ..types.enums import ContextLengthControlType, BackendType
26
26
  from ..types.llm_parameters import ChatCompletionMessage, ChatCompletionDeltaMessage
@@ -115,7 +115,7 @@ class AnthropicChatClient(BaseChatClient):
115
115
  model: str | None = None,
116
116
  stream: bool | None = None,
117
117
  temperature: float | None = None,
118
- max_tokens: int = 2000,
118
+ max_tokens: int | None = None,
119
119
  tools: list | NotGiven = NOT_GIVEN,
120
120
  tool_choice: str | NotGiven = NOT_GIVEN,
121
121
  ):
@@ -189,6 +189,18 @@ class AnthropicChatClient(BaseChatClient):
189
189
  base_url=self.endpoint.api_base,
190
190
  )
191
191
 
192
+ tools_params = refactor_tool_use_params(tools) if tools else tools
193
+
194
+ if max_tokens is None:
195
+ max_output_tokens = self.model_setting.max_output_tokens
196
+ if max_output_tokens is not None:
197
+ token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
198
+ max_tokens = self.model_setting.context_length - token_counts
199
+ max_tokens = min(max(max_tokens, 1), max_output_tokens)
200
+ else:
201
+ token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
202
+ max_tokens = self.model_setting.context_length - token_counts
203
+
192
204
  response = self._client.messages.create(
193
205
  model=self.model_setting.id,
194
206
  messages=messages,
@@ -196,7 +208,7 @@ class AnthropicChatClient(BaseChatClient):
196
208
  stream=self.stream,
197
209
  temperature=self.temperature,
198
210
  max_tokens=max_tokens,
199
- tools=refactor_tool_use_params(tools) if tools else tools,
211
+ tools=tools_params,
200
212
  tool_choice=tool_choice,
201
213
  )
202
214
 
@@ -303,7 +315,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
303
315
  model: str | None = None,
304
316
  stream: bool | None = None,
305
317
  temperature: float | None = None,
306
- max_tokens: int = 2000,
318
+ max_tokens: int | None = None,
307
319
  tools: list | NotGiven = NOT_GIVEN,
308
320
  tool_choice: str | NotGiven = NOT_GIVEN,
309
321
  ):
@@ -376,6 +388,19 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
376
388
  api_key=self.endpoint.api_key,
377
389
  base_url=self.endpoint.api_base,
378
390
  )
391
+
392
+ tools_params = refactor_tool_use_params(tools) if tools else tools
393
+
394
+ if max_tokens is None:
395
+ max_output_tokens = self.model_setting.max_output_tokens
396
+ if max_output_tokens is not None:
397
+ token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
398
+ max_tokens = self.model_setting.context_length - token_counts
399
+ max_tokens = min(max(max_tokens, 1), max_output_tokens)
400
+ else:
401
+ token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
402
+ max_tokens = self.model_setting.context_length - token_counts
403
+
379
404
  response = await self._client.messages.create(
380
405
  model=self.model_setting.id,
381
406
  messages=messages,
@@ -383,7 +408,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
383
408
  stream=self.stream,
384
409
  temperature=self.temperature,
385
410
  max_tokens=max_tokens,
386
- tools=refactor_tool_use_params(tools) if tools else tools,
411
+ tools=tools_params,
387
412
  tool_choice=tool_choice,
388
413
  )
389
414
 
@@ -46,7 +46,7 @@ class BaseChatClient(ABC):
46
46
  model: str | None = None,
47
47
  stream: bool = False,
48
48
  temperature: float = 0.7,
49
- max_tokens: int = 2000,
49
+ max_tokens: int | None = None,
50
50
  tools: list | NotGiven = NOT_GIVEN,
51
51
  tool_choice: str | NotGiven = NOT_GIVEN,
52
52
  ) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
@@ -57,7 +57,7 @@ class BaseChatClient(ABC):
57
57
  messages: list,
58
58
  model: str | None = None,
59
59
  temperature: float = 0.7,
60
- max_tokens: int = 2000,
60
+ max_tokens: int | None = None,
61
61
  tools: list | NotGiven = NOT_GIVEN,
62
62
  tool_choice: str | NotGiven = NOT_GIVEN,
63
63
  ) -> Generator[ChatCompletionDeltaMessage, Any, None]:
@@ -107,7 +107,7 @@ class BaseAsyncChatClient(ABC):
107
107
  model: str | None = None,
108
108
  stream: bool = False,
109
109
  temperature: float = 0.7,
110
- max_tokens: int = 2000,
110
+ max_tokens: int | None = None,
111
111
  tools: list | NotGiven = NOT_GIVEN,
112
112
  tool_choice: str | NotGiven = NOT_GIVEN,
113
113
  ) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, None]:
@@ -118,7 +118,7 @@ class BaseAsyncChatClient(ABC):
118
118
  messages: list,
119
119
  model: str | None = None,
120
120
  temperature: float = 0.7,
121
- max_tokens: int = 2000,
121
+ max_tokens: int | None = None,
122
122
  tools: list | NotGiven = NOT_GIVEN,
123
123
  tool_choice: str | NotGiven = NOT_GIVEN,
124
124
  ) -> AsyncGenerator[ChatCompletionDeltaMessage, None]:
@@ -43,7 +43,7 @@ class GeminiChatClient(BaseChatClient):
43
43
  model: str | None = None,
44
44
  stream: bool | None = None,
45
45
  temperature: float | None = None,
46
- max_tokens: int = 2000,
46
+ max_tokens: int | None = None,
47
47
  tools: list | None = None,
48
48
  tool_choice: str | None = None,
49
49
  ):
@@ -156,7 +156,16 @@ class GeminiChatClient(BaseChatClient):
156
156
  if "text" in part:
157
157
  result["content"] += part["text"]
158
158
  elif "functionCall" in part:
159
- tool_calls.append(part["functionCall"])
159
+ tool_call = {
160
+ "index": 0,
161
+ "id": "call_0",
162
+ "function": {
163
+ "arguments": json.dumps(part["functionCall"]["args"], ensure_ascii=False),
164
+ "name": part["functionCall"]["name"],
165
+ },
166
+ "type": "function",
167
+ }
168
+ tool_calls.append(tool_call)
160
169
 
161
170
  if tool_calls:
162
171
  result["tool_calls"] = tool_calls
@@ -194,7 +203,7 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
194
203
  model: str | None = None,
195
204
  stream: bool | None = None,
196
205
  temperature: float | None = None,
197
- max_tokens: int = 2000,
206
+ max_tokens: int | None = None,
198
207
  tools: list | None = None,
199
208
  tool_choice: str | None = None,
200
209
  ):
@@ -310,7 +319,16 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
310
319
  if "text" in part:
311
320
  result["content"] += part["text"]
312
321
  elif "functionCall" in part:
313
- tool_calls.append(part["functionCall"])
322
+ tool_call = {
323
+ "index": 0,
324
+ "id": "call_0",
325
+ "function": {
326
+ "arguments": json.dumps(part["functionCall"]["args"], ensure_ascii=False),
327
+ "name": part["functionCall"]["name"],
328
+ },
329
+ "type": "function",
330
+ }
331
+ tool_calls.append(tool_call)
314
332
 
315
333
  if tool_calls:
316
334
  result["tool_calls"] = tool_calls
@@ -7,8 +7,8 @@ import httpx
7
7
  from openai._types import NotGiven
8
8
 
9
9
  from ..settings import settings
10
- from .utils import cutoff_messages
11
10
  from ..types import defaults as defs
11
+ from .utils import cutoff_messages, get_token_counts
12
12
  from .base_client import BaseChatClient, BaseAsyncChatClient
13
13
  from ..types.enums import ContextLengthControlType, BackendType
14
14
  from ..types.llm_parameters import ChatCompletionMessage, ChatCompletionDeltaMessage
@@ -66,7 +66,7 @@ class MiniMaxChatClient(BaseChatClient):
66
66
  model: str | None = None,
67
67
  stream: bool | None = None,
68
68
  temperature: float | None = None,
69
- max_tokens: int = 2048,
69
+ max_tokens: int | None = None,
70
70
  tools: list | None = None,
71
71
  tool_choice: str = "auto",
72
72
  ):
@@ -113,6 +113,16 @@ class MiniMaxChatClient(BaseChatClient):
113
113
  else:
114
114
  tools_params = {}
115
115
 
116
+ if max_tokens is None:
117
+ max_output_tokens = self.model_setting.max_output_tokens
118
+ if max_output_tokens is not None:
119
+ token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
120
+ max_tokens = self.model_setting.context_length - token_counts
121
+ max_tokens = min(max(max_tokens, 1), max_output_tokens)
122
+ else:
123
+ token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
124
+ max_tokens = self.model_setting.context_length - token_counts
125
+
116
126
  self.url = self.endpoint.api_base
117
127
  self.headers = {"Authorization": f"Bearer {self.endpoint.api_key}", "Content-Type": "application/json"}
118
128
 
@@ -145,9 +155,11 @@ class MiniMaxChatClient(BaseChatClient):
145
155
  if "usage" not in chunk_data:
146
156
  continue
147
157
  else:
158
+ if chunk_data["object"] != "chat.completion.chunk":
159
+ continue
148
160
  yield ChatCompletionDeltaMessage(
149
161
  **{
150
- "content": chunk_data["choices"][0]["message"].get("content"),
162
+ "content": chunk_data["choices"][0]["delta"].get("content"),
151
163
  "role": "assistant",
152
164
  **tool_calls_params,
153
165
  }
@@ -211,7 +223,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
211
223
  model: str | None = None,
212
224
  stream: bool | None = None,
213
225
  temperature: float | None = None,
214
- max_tokens: int = 2048,
226
+ max_tokens: int | None = None,
215
227
  tools: list | None = None,
216
228
  tool_choice: str = "auto",
217
229
  ):
@@ -256,6 +268,16 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
256
268
  else:
257
269
  tools_params = {}
258
270
 
271
+ if max_tokens is None:
272
+ max_output_tokens = self.model_setting.max_output_tokens
273
+ if max_output_tokens is not None:
274
+ token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
275
+ max_tokens = self.model_setting.context_length - token_counts
276
+ max_tokens = min(max(max_tokens, 1), max_output_tokens)
277
+ else:
278
+ token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
279
+ max_tokens = self.model_setting.context_length - token_counts
280
+
259
281
  self.url = self.endpoint.api_base
260
282
  self.headers = {"Authorization": f"Bearer {self.endpoint.api_key}", "Content-Type": "application/json"}
261
283
 
@@ -289,9 +311,11 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
289
311
  if "usage" not in chunk_data:
290
312
  continue
291
313
  else:
314
+ if chunk_data["object"] != "chat.completion.chunk":
315
+ continue
292
316
  yield ChatCompletionDeltaMessage(
293
317
  **{
294
- "content": chunk_data["choices"][0]["message"].get("content"),
318
+ "content": chunk_data["choices"][0]["delta"].get("content"),
295
319
  "role": "assistant",
296
320
  **tool_calls_params,
297
321
  }
@@ -11,6 +11,7 @@ from openai import OpenAI, AsyncOpenAI, AzureOpenAI, AsyncAzureOpenAI
11
11
  from .base_client import BaseChatClient, BaseAsyncChatClient
12
12
  from .utils import (
13
13
  cutoff_messages,
14
+ get_token_counts,
14
15
  ToolCallContentProcessor,
15
16
  generate_tool_use_system_prompt,
16
17
  )
@@ -50,7 +51,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
50
51
  model: str | None = None,
51
52
  stream: bool | None = None,
52
53
  temperature: float | None = None,
53
- max_tokens: int = 2000,
54
+ max_tokens: int | None = None,
54
55
  tools: list | NotGiven = NOT_GIVEN,
55
56
  tool_choice: str | NotGiven = NOT_GIVEN,
56
57
  ):
@@ -102,6 +103,16 @@ class OpenAICompatibleChatClient(BaseChatClient):
102
103
  else:
103
104
  tools_params = {}
104
105
 
106
+ if max_tokens is None:
107
+ max_output_tokens = self.model_setting.max_output_tokens
108
+ if max_output_tokens is not None:
109
+ token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
110
+ max_tokens = self.model_setting.context_length - token_counts
111
+ max_tokens = min(max(max_tokens, 1), max_output_tokens)
112
+ else:
113
+ token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
114
+ max_tokens = self.model_setting.context_length - token_counts
115
+
105
116
  response: ChatCompletion | Stream[ChatCompletionChunk] = self._client.chat.completions.create(
106
117
  model=self.model_setting.id,
107
118
  messages=messages,
@@ -122,6 +133,9 @@ class OpenAICompatibleChatClient(BaseChatClient):
122
133
  if not chunk.choices[0].delta:
123
134
  continue
124
135
  if self.model_setting.function_call_available:
136
+ if chunk.choices[0].delta.tool_calls:
137
+ for index, tool_call in enumerate(chunk.choices[0].delta.tool_calls):
138
+ tool_call.index = index
125
139
  yield ChatCompletionDeltaMessage(**chunk.choices[0].delta.model_dump())
126
140
  else:
127
141
  message = chunk.choices[0].delta.model_dump()
@@ -147,7 +161,8 @@ class OpenAICompatibleChatClient(BaseChatClient):
147
161
  if tools:
148
162
  if self.model_setting.function_call_available and response.choices[0].message.tool_calls:
149
163
  result["tool_calls"] = [
150
- tool_call.model_dump() for tool_call in response.choices[0].message.tool_calls
164
+ {**tool_call.model_dump(), "type": "function"}
165
+ for tool_call in response.choices[0].message.tool_calls
151
166
  ]
152
167
  else:
153
168
  tool_call_content_processor = ToolCallContentProcessor(result["content"])
@@ -189,7 +204,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
189
204
  model: str | None = None,
190
205
  stream: bool | None = None,
191
206
  temperature: float | None = None,
192
- max_tokens: int = 2000,
207
+ max_tokens: int | None = None,
193
208
  tools: list | NotGiven = NOT_GIVEN,
194
209
  tool_choice: str | NotGiven = NOT_GIVEN,
195
210
  ):
@@ -241,6 +256,16 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
241
256
  else:
242
257
  tools_params = {}
243
258
 
259
+ if max_tokens is None:
260
+ max_output_tokens = self.model_setting.max_output_tokens
261
+ if max_output_tokens is not None:
262
+ token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
263
+ max_tokens = self.model_setting.context_length - token_counts
264
+ max_tokens = min(max(max_tokens, 1), max_output_tokens)
265
+ else:
266
+ token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
267
+ max_tokens = self.model_setting.context_length - token_counts
268
+
244
269
  response: ChatCompletion | AsyncStream[ChatCompletionChunk] = await self._client.chat.completions.create(
245
270
  model=self.model_setting.id,
246
271
  messages=messages,
@@ -261,6 +286,9 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
261
286
  if not chunk.choices[0].delta:
262
287
  continue
263
288
  if self.model_setting.function_call_available:
289
+ if chunk.choices[0].delta.tool_calls:
290
+ for index, tool_call in enumerate(chunk.choices[0].delta.tool_calls):
291
+ tool_call.index = index
264
292
  yield ChatCompletionDeltaMessage(**chunk.choices[0].delta.model_dump())
265
293
  else:
266
294
  message = chunk.choices[0].delta.model_dump()
@@ -286,7 +314,8 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
286
314
  if tools:
287
315
  if self.model_setting.function_call_available and response.choices[0].message.tool_calls:
288
316
  result["tool_calls"] = [
289
- tool_call.model_dump() for tool_call in response.choices[0].message.tool_calls
317
+ {**tool_call.model_dump(), "type": "function"}
318
+ for tool_call in response.choices[0].message.tool_calls
290
319
  ]
291
320
  else:
292
321
  tool_call_content_processor = ToolCallContentProcessor(result["content"])
@@ -3,8 +3,14 @@
3
3
  import re
4
4
  import json
5
5
 
6
+ import httpx
6
7
  import tiktoken
8
+ from anthropic import Anthropic
9
+ from qwen_tokenizer import qwen_tokenizer
10
+ from deepseek_tokenizer import deepseek_tokenizer
7
11
 
12
+ from ..settings import settings
13
+ from ..utilities.retry import Retry
8
14
  from ..types.enums import BackendType
9
15
  from ..utilities.media_processing import ImageProcessor
10
16
 
@@ -95,10 +101,88 @@ def get_token_counts(text: str | dict, model: str = "") -> int:
95
101
  text = str(text)
96
102
  if model == "gpt-3.5-turbo":
97
103
  return len(chatgpt_encoding.encode(text))
98
- elif model == "gpt-4o":
104
+ elif model in ("gpt-4o", "gpt-4o-mini"):
99
105
  return len(gpt_4o_encoding.encode(text))
100
106
  elif model.startswith("abab"):
101
- return int(len(text) / 1.33)
107
+ model_setting = settings.minimax.models[model]
108
+ if len(model_setting.endpoints) == 0:
109
+ return int(len(text) / 1.33)
110
+ endpoint_id = model_setting.endpoints[0]
111
+ endpoint = settings.get_endpoint(endpoint_id)
112
+ tokenize_url = "https://api.minimax.chat/v1/tokenize"
113
+ headers = {"Authorization": f"Bearer {endpoint.api_key}", "Content-Type": "application/json"}
114
+ request_body = {
115
+ "model": model,
116
+ "tokens_to_generate": 128,
117
+ "temperature": 0.2,
118
+ "messages": [
119
+ {"sender_type": "USER", "text": text},
120
+ ],
121
+ }
122
+
123
+ _, response = (
124
+ Retry(httpx.post)
125
+ .args(url=tokenize_url, headers=headers, json=request_body, timeout=None)
126
+ .retry_times(5)
127
+ .sleep_time(10)
128
+ .run()
129
+ )
130
+ response = response.json()
131
+ return response["segments_num"]
132
+ elif model in ("moonshot-v1-8k", "moonshot-v1-32k", "moonshot-v1-128k"):
133
+ model_setting = settings.moonshot.models[model]
134
+ if len(model_setting.endpoints) == 0:
135
+ return len(chatgpt_encoding.encode(text))
136
+ endpoint_id = model_setting.endpoints[0]
137
+ endpoint = settings.get_endpoint(endpoint_id)
138
+ tokenize_url = "https://api.moonshot.cn/v1/tokenizers/estimate-token-count"
139
+ headers = {"Content-Type": "application/json", "Authorization": f"Bearer {endpoint.api_key}"}
140
+ request_body = {
141
+ "model": model,
142
+ "messages": [
143
+ {"role": "user", "content": text},
144
+ ],
145
+ }
146
+ _, response = (
147
+ Retry(httpx.post)
148
+ .args(url=tokenize_url, headers=headers, json=request_body, timeout=None)
149
+ .retry_times(5)
150
+ .sleep_time(10)
151
+ .run()
152
+ )
153
+ response = response.json()
154
+ return response["data"]["total_tokens"]
155
+ elif model.startswith("gemini"):
156
+ model_setting = settings.gemini.models[model]
157
+ if len(model_setting.endpoints) == 0:
158
+ return len(chatgpt_encoding.encode(text))
159
+ endpoint_id = model_setting.endpoints[0]
160
+ endpoint = settings.get_endpoint(endpoint_id)
161
+ url = f"{endpoint.api_base}/models/{model_setting.id}:countTokens"
162
+ params = {"key": endpoint.api_key}
163
+ request_body = {
164
+ "contents": {
165
+ "role": "USER",
166
+ "parts": [
167
+ {"text": "TEXT"},
168
+ ],
169
+ },
170
+ }
171
+ _, response = (
172
+ Retry(httpx.post)
173
+ .args(url, json=request_body, params=params, timeout=None)
174
+ .retry_times(5)
175
+ .sleep_time(10)
176
+ .run()
177
+ )
178
+ result = response.json()
179
+ return result["totalTokens"]
180
+ elif model.startswith("claude"):
181
+ return Anthropic().count_tokens(text)
182
+ elif model.startswith("deepseek"):
183
+ return len(deepseek_tokenizer.encode(text))
184
+ elif model.startswith("qwen"):
185
+ return len(qwen_tokenizer.encode(text))
102
186
  else:
103
187
  return len(chatgpt_encoding.encode(text))
104
188