vectorvein 0.1.7__tar.gz → 0.1.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {vectorvein-0.1.7 → vectorvein-0.1.9}/PKG-INFO +3 -1
- {vectorvein-0.1.7 → vectorvein-0.1.9}/pyproject.toml +3 -1
- {vectorvein-0.1.7 → vectorvein-0.1.9}/src/vectorvein/chat_clients/anthropic_client.py +30 -5
- {vectorvein-0.1.7 → vectorvein-0.1.9}/src/vectorvein/chat_clients/base_client.py +4 -4
- {vectorvein-0.1.7 → vectorvein-0.1.9}/src/vectorvein/chat_clients/gemini_client.py +22 -4
- {vectorvein-0.1.7 → vectorvein-0.1.9}/src/vectorvein/chat_clients/minimax_client.py +43 -29
- {vectorvein-0.1.7 → vectorvein-0.1.9}/src/vectorvein/chat_clients/openai_compatible_client.py +33 -4
- {vectorvein-0.1.7 → vectorvein-0.1.9}/src/vectorvein/chat_clients/utils.py +86 -2
- {vectorvein-0.1.7 → vectorvein-0.1.9}/src/vectorvein/types/defaults.py +48 -3
- vectorvein-0.1.9/src/vectorvein/utilities/retry.py +36 -0
- vectorvein-0.1.9/tests/sample_settings.py +596 -0
- {vectorvein-0.1.7 → vectorvein-0.1.9}/tests/test_create_chat_client.py +37 -16
- {vectorvein-0.1.7 → vectorvein-0.1.9}/tests/test_image_input_chat_client.py +1 -1
- vectorvein-0.1.9/tests/test_tokens_count.py +46 -0
- vectorvein-0.1.7/tests/sample_settings.py +0 -947
- {vectorvein-0.1.7 → vectorvein-0.1.9}/README.md +0 -0
- {vectorvein-0.1.7 → vectorvein-0.1.9}/src/vectorvein/__init__.py +0 -0
- {vectorvein-0.1.7 → vectorvein-0.1.9}/src/vectorvein/chat_clients/__init__.py +0 -0
- {vectorvein-0.1.7 → vectorvein-0.1.9}/src/vectorvein/chat_clients/deepseek_client.py +0 -0
- {vectorvein-0.1.7 → vectorvein-0.1.9}/src/vectorvein/chat_clients/groq_client.py +0 -0
- {vectorvein-0.1.7 → vectorvein-0.1.9}/src/vectorvein/chat_clients/local_client.py +0 -0
- {vectorvein-0.1.7 → vectorvein-0.1.9}/src/vectorvein/chat_clients/mistral_client.py +0 -0
- {vectorvein-0.1.7 → vectorvein-0.1.9}/src/vectorvein/chat_clients/moonshot_client.py +0 -0
- {vectorvein-0.1.7 → vectorvein-0.1.9}/src/vectorvein/chat_clients/openai_client.py +0 -0
- {vectorvein-0.1.7 → vectorvein-0.1.9}/src/vectorvein/chat_clients/qwen_client.py +0 -0
- {vectorvein-0.1.7 → vectorvein-0.1.9}/src/vectorvein/chat_clients/yi_client.py +0 -0
- {vectorvein-0.1.7 → vectorvein-0.1.9}/src/vectorvein/chat_clients/zhipuai_client.py +0 -0
- {vectorvein-0.1.7 → vectorvein-0.1.9}/src/vectorvein/settings/__init__.py +0 -0
- {vectorvein-0.1.7 → vectorvein-0.1.9}/src/vectorvein/types/enums.py +0 -0
- {vectorvein-0.1.7 → vectorvein-0.1.9}/src/vectorvein/types/llm_parameters.py +0 -0
- {vectorvein-0.1.7 → vectorvein-0.1.9}/src/vectorvein/utilities/media_processing.py +0 -0
- {vectorvein-0.1.7 → vectorvein-0.1.9}/tests/__init__.py +0 -0
- {vectorvein-0.1.7 → vectorvein-0.1.9}/tests/cat.png +0 -0
- {vectorvein-0.1.7 → vectorvein-0.1.9}/tests/test_format_messages.py +0 -0
- {vectorvein-0.1.7 → vectorvein-0.1.9}/tests/test_tool_use_multi_turns.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: vectorvein
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.9
|
4
4
|
Summary: Default template for PDM package
|
5
5
|
Author-Email: Anderson <andersonby@163.com>
|
6
6
|
License: MIT
|
@@ -11,6 +11,8 @@ Requires-Dist: httpx>=0.27.0
|
|
11
11
|
Requires-Dist: anthropic[vertex]>=0.31.2
|
12
12
|
Requires-Dist: pydantic>=2.8.2
|
13
13
|
Requires-Dist: Pillow>=10.4.0
|
14
|
+
Requires-Dist: deepseek-tokenizer>=0.1.0
|
15
|
+
Requires-Dist: qwen-tokenizer>=0.1.0
|
14
16
|
Description-Content-Type: text/markdown
|
15
17
|
|
16
18
|
# vectorvein
|
@@ -9,12 +9,14 @@ dependencies = [
|
|
9
9
|
"anthropic[vertex]>=0.31.2",
|
10
10
|
"pydantic>=2.8.2",
|
11
11
|
"Pillow>=10.4.0",
|
12
|
+
"deepseek-tokenizer>=0.1.0",
|
13
|
+
"qwen-tokenizer>=0.1.0",
|
12
14
|
]
|
13
15
|
description = "Default template for PDM package"
|
14
16
|
name = "vectorvein"
|
15
17
|
readme = "README.md"
|
16
18
|
requires-python = ">=3.10"
|
17
|
-
version = "0.1.
|
19
|
+
version = "0.1.9"
|
18
20
|
|
19
21
|
[project.license]
|
20
22
|
text = "MIT"
|
@@ -19,8 +19,8 @@ from google.auth.transport.requests import Request
|
|
19
19
|
from google.auth import _helpers
|
20
20
|
|
21
21
|
from ..settings import settings
|
22
|
-
from .utils import cutoff_messages
|
23
22
|
from ..types import defaults as defs
|
23
|
+
from .utils import cutoff_messages, get_token_counts
|
24
24
|
from .base_client import BaseChatClient, BaseAsyncChatClient
|
25
25
|
from ..types.enums import ContextLengthControlType, BackendType
|
26
26
|
from ..types.llm_parameters import ChatCompletionMessage, ChatCompletionDeltaMessage
|
@@ -115,7 +115,7 @@ class AnthropicChatClient(BaseChatClient):
|
|
115
115
|
model: str | None = None,
|
116
116
|
stream: bool | None = None,
|
117
117
|
temperature: float | None = None,
|
118
|
-
max_tokens: int =
|
118
|
+
max_tokens: int | None = None,
|
119
119
|
tools: list | NotGiven = NOT_GIVEN,
|
120
120
|
tool_choice: str | NotGiven = NOT_GIVEN,
|
121
121
|
):
|
@@ -189,6 +189,18 @@ class AnthropicChatClient(BaseChatClient):
|
|
189
189
|
base_url=self.endpoint.api_base,
|
190
190
|
)
|
191
191
|
|
192
|
+
tools_params = refactor_tool_use_params(tools) if tools else tools
|
193
|
+
|
194
|
+
if max_tokens is None:
|
195
|
+
max_output_tokens = self.model_setting.max_output_tokens
|
196
|
+
if max_output_tokens is not None:
|
197
|
+
token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
|
198
|
+
max_tokens = self.model_setting.context_length - token_counts
|
199
|
+
max_tokens = min(max(max_tokens, 1), max_output_tokens)
|
200
|
+
else:
|
201
|
+
token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
|
202
|
+
max_tokens = self.model_setting.context_length - token_counts
|
203
|
+
|
192
204
|
response = self._client.messages.create(
|
193
205
|
model=self.model_setting.id,
|
194
206
|
messages=messages,
|
@@ -196,7 +208,7 @@ class AnthropicChatClient(BaseChatClient):
|
|
196
208
|
stream=self.stream,
|
197
209
|
temperature=self.temperature,
|
198
210
|
max_tokens=max_tokens,
|
199
|
-
tools=
|
211
|
+
tools=tools_params,
|
200
212
|
tool_choice=tool_choice,
|
201
213
|
)
|
202
214
|
|
@@ -303,7 +315,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
|
|
303
315
|
model: str | None = None,
|
304
316
|
stream: bool | None = None,
|
305
317
|
temperature: float | None = None,
|
306
|
-
max_tokens: int =
|
318
|
+
max_tokens: int | None = None,
|
307
319
|
tools: list | NotGiven = NOT_GIVEN,
|
308
320
|
tool_choice: str | NotGiven = NOT_GIVEN,
|
309
321
|
):
|
@@ -376,6 +388,19 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
|
|
376
388
|
api_key=self.endpoint.api_key,
|
377
389
|
base_url=self.endpoint.api_base,
|
378
390
|
)
|
391
|
+
|
392
|
+
tools_params = refactor_tool_use_params(tools) if tools else tools
|
393
|
+
|
394
|
+
if max_tokens is None:
|
395
|
+
max_output_tokens = self.model_setting.max_output_tokens
|
396
|
+
if max_output_tokens is not None:
|
397
|
+
token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
|
398
|
+
max_tokens = self.model_setting.context_length - token_counts
|
399
|
+
max_tokens = min(max(max_tokens, 1), max_output_tokens)
|
400
|
+
else:
|
401
|
+
token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
|
402
|
+
max_tokens = self.model_setting.context_length - token_counts
|
403
|
+
|
379
404
|
response = await self._client.messages.create(
|
380
405
|
model=self.model_setting.id,
|
381
406
|
messages=messages,
|
@@ -383,7 +408,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
|
|
383
408
|
stream=self.stream,
|
384
409
|
temperature=self.temperature,
|
385
410
|
max_tokens=max_tokens,
|
386
|
-
tools=
|
411
|
+
tools=tools_params,
|
387
412
|
tool_choice=tool_choice,
|
388
413
|
)
|
389
414
|
|
@@ -46,7 +46,7 @@ class BaseChatClient(ABC):
|
|
46
46
|
model: str | None = None,
|
47
47
|
stream: bool = False,
|
48
48
|
temperature: float = 0.7,
|
49
|
-
max_tokens: int =
|
49
|
+
max_tokens: int | None = None,
|
50
50
|
tools: list | NotGiven = NOT_GIVEN,
|
51
51
|
tool_choice: str | NotGiven = NOT_GIVEN,
|
52
52
|
) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
|
@@ -57,7 +57,7 @@ class BaseChatClient(ABC):
|
|
57
57
|
messages: list,
|
58
58
|
model: str | None = None,
|
59
59
|
temperature: float = 0.7,
|
60
|
-
max_tokens: int =
|
60
|
+
max_tokens: int | None = None,
|
61
61
|
tools: list | NotGiven = NOT_GIVEN,
|
62
62
|
tool_choice: str | NotGiven = NOT_GIVEN,
|
63
63
|
) -> Generator[ChatCompletionDeltaMessage, Any, None]:
|
@@ -107,7 +107,7 @@ class BaseAsyncChatClient(ABC):
|
|
107
107
|
model: str | None = None,
|
108
108
|
stream: bool = False,
|
109
109
|
temperature: float = 0.7,
|
110
|
-
max_tokens: int =
|
110
|
+
max_tokens: int | None = None,
|
111
111
|
tools: list | NotGiven = NOT_GIVEN,
|
112
112
|
tool_choice: str | NotGiven = NOT_GIVEN,
|
113
113
|
) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, None]:
|
@@ -118,7 +118,7 @@ class BaseAsyncChatClient(ABC):
|
|
118
118
|
messages: list,
|
119
119
|
model: str | None = None,
|
120
120
|
temperature: float = 0.7,
|
121
|
-
max_tokens: int =
|
121
|
+
max_tokens: int | None = None,
|
122
122
|
tools: list | NotGiven = NOT_GIVEN,
|
123
123
|
tool_choice: str | NotGiven = NOT_GIVEN,
|
124
124
|
) -> AsyncGenerator[ChatCompletionDeltaMessage, None]:
|
@@ -43,7 +43,7 @@ class GeminiChatClient(BaseChatClient):
|
|
43
43
|
model: str | None = None,
|
44
44
|
stream: bool | None = None,
|
45
45
|
temperature: float | None = None,
|
46
|
-
max_tokens: int =
|
46
|
+
max_tokens: int | None = None,
|
47
47
|
tools: list | None = None,
|
48
48
|
tool_choice: str | None = None,
|
49
49
|
):
|
@@ -156,7 +156,16 @@ class GeminiChatClient(BaseChatClient):
|
|
156
156
|
if "text" in part:
|
157
157
|
result["content"] += part["text"]
|
158
158
|
elif "functionCall" in part:
|
159
|
-
|
159
|
+
tool_call = {
|
160
|
+
"index": 0,
|
161
|
+
"id": "call_0",
|
162
|
+
"function": {
|
163
|
+
"arguments": json.dumps(part["functionCall"]["args"], ensure_ascii=False),
|
164
|
+
"name": part["functionCall"]["name"],
|
165
|
+
},
|
166
|
+
"type": "function",
|
167
|
+
}
|
168
|
+
tool_calls.append(tool_call)
|
160
169
|
|
161
170
|
if tool_calls:
|
162
171
|
result["tool_calls"] = tool_calls
|
@@ -194,7 +203,7 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
|
|
194
203
|
model: str | None = None,
|
195
204
|
stream: bool | None = None,
|
196
205
|
temperature: float | None = None,
|
197
|
-
max_tokens: int =
|
206
|
+
max_tokens: int | None = None,
|
198
207
|
tools: list | None = None,
|
199
208
|
tool_choice: str | None = None,
|
200
209
|
):
|
@@ -310,7 +319,16 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
|
|
310
319
|
if "text" in part:
|
311
320
|
result["content"] += part["text"]
|
312
321
|
elif "functionCall" in part:
|
313
|
-
|
322
|
+
tool_call = {
|
323
|
+
"index": 0,
|
324
|
+
"id": "call_0",
|
325
|
+
"function": {
|
326
|
+
"arguments": json.dumps(part["functionCall"]["args"], ensure_ascii=False),
|
327
|
+
"name": part["functionCall"]["name"],
|
328
|
+
},
|
329
|
+
"type": "function",
|
330
|
+
}
|
331
|
+
tool_calls.append(tool_call)
|
314
332
|
|
315
333
|
if tool_calls:
|
316
334
|
result["tool_calls"] = tool_calls
|
@@ -7,8 +7,8 @@ import httpx
|
|
7
7
|
from openai._types import NotGiven
|
8
8
|
|
9
9
|
from ..settings import settings
|
10
|
-
from .utils import cutoff_messages
|
11
10
|
from ..types import defaults as defs
|
11
|
+
from .utils import cutoff_messages, get_token_counts
|
12
12
|
from .base_client import BaseChatClient, BaseAsyncChatClient
|
13
13
|
from ..types.enums import ContextLengthControlType, BackendType
|
14
14
|
from ..types.llm_parameters import ChatCompletionMessage, ChatCompletionDeltaMessage
|
@@ -66,7 +66,7 @@ class MiniMaxChatClient(BaseChatClient):
|
|
66
66
|
model: str | None = None,
|
67
67
|
stream: bool | None = None,
|
68
68
|
temperature: float | None = None,
|
69
|
-
max_tokens: int =
|
69
|
+
max_tokens: int | None = None,
|
70
70
|
tools: list | None = None,
|
71
71
|
tool_choice: str = "auto",
|
72
72
|
):
|
@@ -93,7 +93,7 @@ class MiniMaxChatClient(BaseChatClient):
|
|
93
93
|
model=self.model_setting.id,
|
94
94
|
)
|
95
95
|
|
96
|
-
if tools
|
96
|
+
if tools:
|
97
97
|
tools_params = {
|
98
98
|
"tools": [
|
99
99
|
{
|
@@ -113,6 +113,16 @@ class MiniMaxChatClient(BaseChatClient):
|
|
113
113
|
else:
|
114
114
|
tools_params = {}
|
115
115
|
|
116
|
+
if max_tokens is None:
|
117
|
+
max_output_tokens = self.model_setting.max_output_tokens
|
118
|
+
if max_output_tokens is not None:
|
119
|
+
token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
|
120
|
+
max_tokens = self.model_setting.context_length - token_counts
|
121
|
+
max_tokens = min(max(max_tokens, 1), max_output_tokens)
|
122
|
+
else:
|
123
|
+
token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
|
124
|
+
max_tokens = self.model_setting.context_length - token_counts
|
125
|
+
|
116
126
|
self.url = self.endpoint.api_base
|
117
127
|
self.headers = {"Authorization": f"Bearer {self.endpoint.api_key}", "Content-Type": "application/json"}
|
118
128
|
|
@@ -139,22 +149,19 @@ class MiniMaxChatClient(BaseChatClient):
|
|
139
149
|
for chunk in response.iter_lines():
|
140
150
|
if chunk:
|
141
151
|
chunk_data = json.loads(chunk[6:])
|
152
|
+
if chunk_data["object"] != "chat.completion.chunk":
|
153
|
+
continue
|
142
154
|
tool_calls_params = extract_tool_calls(chunk_data)
|
143
155
|
has_tool_calls = True if tool_calls_params else False
|
144
156
|
if has_tool_calls:
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
**
|
150
|
-
|
151
|
-
|
152
|
-
**tool_calls_params,
|
153
|
-
}
|
154
|
-
)
|
157
|
+
yield ChatCompletionDeltaMessage(
|
158
|
+
**{
|
159
|
+
"content": chunk_data["choices"][0]["delta"].get("content"),
|
160
|
+
"role": "assistant",
|
161
|
+
**tool_calls_params,
|
162
|
+
}
|
163
|
+
)
|
155
164
|
else:
|
156
|
-
if "usage" in chunk_data:
|
157
|
-
continue
|
158
165
|
yield ChatCompletionDeltaMessage(
|
159
166
|
**{
|
160
167
|
"content": chunk_data["choices"][0]["delta"]["content"],
|
@@ -211,7 +218,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
|
|
211
218
|
model: str | None = None,
|
212
219
|
stream: bool | None = None,
|
213
220
|
temperature: float | None = None,
|
214
|
-
max_tokens: int =
|
221
|
+
max_tokens: int | None = None,
|
215
222
|
tools: list | None = None,
|
216
223
|
tool_choice: str = "auto",
|
217
224
|
):
|
@@ -238,7 +245,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
|
|
238
245
|
model=self.model_setting.id,
|
239
246
|
)
|
240
247
|
|
241
|
-
if tools
|
248
|
+
if tools:
|
242
249
|
tools_params = {
|
243
250
|
"tools": [
|
244
251
|
{
|
@@ -256,6 +263,16 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
|
|
256
263
|
else:
|
257
264
|
tools_params = {}
|
258
265
|
|
266
|
+
if max_tokens is None:
|
267
|
+
max_output_tokens = self.model_setting.max_output_tokens
|
268
|
+
if max_output_tokens is not None:
|
269
|
+
token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
|
270
|
+
max_tokens = self.model_setting.context_length - token_counts
|
271
|
+
max_tokens = min(max(max_tokens, 1), max_output_tokens)
|
272
|
+
else:
|
273
|
+
token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
|
274
|
+
max_tokens = self.model_setting.context_length - token_counts
|
275
|
+
|
259
276
|
self.url = self.endpoint.api_base
|
260
277
|
self.headers = {"Authorization": f"Bearer {self.endpoint.api_key}", "Content-Type": "application/json"}
|
261
278
|
|
@@ -283,22 +300,19 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
|
|
283
300
|
async for chunk in response.aiter_lines():
|
284
301
|
if chunk:
|
285
302
|
chunk_data = json.loads(chunk[6:])
|
303
|
+
if chunk_data["object"] != "chat.completion.chunk":
|
304
|
+
continue
|
286
305
|
tool_calls_params = extract_tool_calls(chunk_data)
|
287
306
|
has_tool_calls = True if tool_calls_params else False
|
288
307
|
if has_tool_calls:
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
**
|
294
|
-
|
295
|
-
|
296
|
-
**tool_calls_params,
|
297
|
-
}
|
298
|
-
)
|
308
|
+
yield ChatCompletionDeltaMessage(
|
309
|
+
**{
|
310
|
+
"content": chunk_data["choices"][0]["delta"].get("content"),
|
311
|
+
"role": "assistant",
|
312
|
+
**tool_calls_params,
|
313
|
+
}
|
314
|
+
)
|
299
315
|
else:
|
300
|
-
if "usage" in chunk_data:
|
301
|
-
continue
|
302
316
|
yield ChatCompletionDeltaMessage(
|
303
317
|
**{
|
304
318
|
"content": chunk_data["choices"][0]["delta"]["content"],
|
{vectorvein-0.1.7 → vectorvein-0.1.9}/src/vectorvein/chat_clients/openai_compatible_client.py
RENAMED
@@ -11,6 +11,7 @@ from openai import OpenAI, AsyncOpenAI, AzureOpenAI, AsyncAzureOpenAI
|
|
11
11
|
from .base_client import BaseChatClient, BaseAsyncChatClient
|
12
12
|
from .utils import (
|
13
13
|
cutoff_messages,
|
14
|
+
get_token_counts,
|
14
15
|
ToolCallContentProcessor,
|
15
16
|
generate_tool_use_system_prompt,
|
16
17
|
)
|
@@ -50,7 +51,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
|
|
50
51
|
model: str | None = None,
|
51
52
|
stream: bool | None = None,
|
52
53
|
temperature: float | None = None,
|
53
|
-
max_tokens: int =
|
54
|
+
max_tokens: int | None = None,
|
54
55
|
tools: list | NotGiven = NOT_GIVEN,
|
55
56
|
tool_choice: str | NotGiven = NOT_GIVEN,
|
56
57
|
):
|
@@ -102,6 +103,16 @@ class OpenAICompatibleChatClient(BaseChatClient):
|
|
102
103
|
else:
|
103
104
|
tools_params = {}
|
104
105
|
|
106
|
+
if max_tokens is None:
|
107
|
+
max_output_tokens = self.model_setting.max_output_tokens
|
108
|
+
if max_output_tokens is not None:
|
109
|
+
token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
|
110
|
+
max_tokens = self.model_setting.context_length - token_counts
|
111
|
+
max_tokens = min(max(max_tokens, 1), max_output_tokens)
|
112
|
+
else:
|
113
|
+
token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
|
114
|
+
max_tokens = self.model_setting.context_length - token_counts
|
115
|
+
|
105
116
|
response: ChatCompletion | Stream[ChatCompletionChunk] = self._client.chat.completions.create(
|
106
117
|
model=self.model_setting.id,
|
107
118
|
messages=messages,
|
@@ -122,6 +133,9 @@ class OpenAICompatibleChatClient(BaseChatClient):
|
|
122
133
|
if not chunk.choices[0].delta:
|
123
134
|
continue
|
124
135
|
if self.model_setting.function_call_available:
|
136
|
+
if chunk.choices[0].delta.tool_calls:
|
137
|
+
for index, tool_call in enumerate(chunk.choices[0].delta.tool_calls):
|
138
|
+
tool_call.index = index
|
125
139
|
yield ChatCompletionDeltaMessage(**chunk.choices[0].delta.model_dump())
|
126
140
|
else:
|
127
141
|
message = chunk.choices[0].delta.model_dump()
|
@@ -147,7 +161,8 @@ class OpenAICompatibleChatClient(BaseChatClient):
|
|
147
161
|
if tools:
|
148
162
|
if self.model_setting.function_call_available and response.choices[0].message.tool_calls:
|
149
163
|
result["tool_calls"] = [
|
150
|
-
tool_call.model_dump()
|
164
|
+
{**tool_call.model_dump(), "type": "function"}
|
165
|
+
for tool_call in response.choices[0].message.tool_calls
|
151
166
|
]
|
152
167
|
else:
|
153
168
|
tool_call_content_processor = ToolCallContentProcessor(result["content"])
|
@@ -189,7 +204,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
|
|
189
204
|
model: str | None = None,
|
190
205
|
stream: bool | None = None,
|
191
206
|
temperature: float | None = None,
|
192
|
-
max_tokens: int =
|
207
|
+
max_tokens: int | None = None,
|
193
208
|
tools: list | NotGiven = NOT_GIVEN,
|
194
209
|
tool_choice: str | NotGiven = NOT_GIVEN,
|
195
210
|
):
|
@@ -241,6 +256,16 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
|
|
241
256
|
else:
|
242
257
|
tools_params = {}
|
243
258
|
|
259
|
+
if max_tokens is None:
|
260
|
+
max_output_tokens = self.model_setting.max_output_tokens
|
261
|
+
if max_output_tokens is not None:
|
262
|
+
token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
|
263
|
+
max_tokens = self.model_setting.context_length - token_counts
|
264
|
+
max_tokens = min(max(max_tokens, 1), max_output_tokens)
|
265
|
+
else:
|
266
|
+
token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
|
267
|
+
max_tokens = self.model_setting.context_length - token_counts
|
268
|
+
|
244
269
|
response: ChatCompletion | AsyncStream[ChatCompletionChunk] = await self._client.chat.completions.create(
|
245
270
|
model=self.model_setting.id,
|
246
271
|
messages=messages,
|
@@ -261,6 +286,9 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
|
|
261
286
|
if not chunk.choices[0].delta:
|
262
287
|
continue
|
263
288
|
if self.model_setting.function_call_available:
|
289
|
+
if chunk.choices[0].delta.tool_calls:
|
290
|
+
for index, tool_call in enumerate(chunk.choices[0].delta.tool_calls):
|
291
|
+
tool_call.index = index
|
264
292
|
yield ChatCompletionDeltaMessage(**chunk.choices[0].delta.model_dump())
|
265
293
|
else:
|
266
294
|
message = chunk.choices[0].delta.model_dump()
|
@@ -286,7 +314,8 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
|
|
286
314
|
if tools:
|
287
315
|
if self.model_setting.function_call_available and response.choices[0].message.tool_calls:
|
288
316
|
result["tool_calls"] = [
|
289
|
-
tool_call.model_dump()
|
317
|
+
{**tool_call.model_dump(), "type": "function"}
|
318
|
+
for tool_call in response.choices[0].message.tool_calls
|
290
319
|
]
|
291
320
|
else:
|
292
321
|
tool_call_content_processor = ToolCallContentProcessor(result["content"])
|
@@ -3,8 +3,14 @@
|
|
3
3
|
import re
|
4
4
|
import json
|
5
5
|
|
6
|
+
import httpx
|
6
7
|
import tiktoken
|
8
|
+
from anthropic import Anthropic
|
9
|
+
from qwen_tokenizer import qwen_tokenizer
|
10
|
+
from deepseek_tokenizer import deepseek_tokenizer
|
7
11
|
|
12
|
+
from ..settings import settings
|
13
|
+
from ..utilities.retry import Retry
|
8
14
|
from ..types.enums import BackendType
|
9
15
|
from ..utilities.media_processing import ImageProcessor
|
10
16
|
|
@@ -95,10 +101,88 @@ def get_token_counts(text: str | dict, model: str = "") -> int:
|
|
95
101
|
text = str(text)
|
96
102
|
if model == "gpt-3.5-turbo":
|
97
103
|
return len(chatgpt_encoding.encode(text))
|
98
|
-
elif model
|
104
|
+
elif model in ("gpt-4o", "gpt-4o-mini"):
|
99
105
|
return len(gpt_4o_encoding.encode(text))
|
100
106
|
elif model.startswith("abab"):
|
101
|
-
|
107
|
+
model_setting = settings.minimax.models[model]
|
108
|
+
if len(model_setting.endpoints) == 0:
|
109
|
+
return int(len(text) / 1.33)
|
110
|
+
endpoint_id = model_setting.endpoints[0]
|
111
|
+
endpoint = settings.get_endpoint(endpoint_id)
|
112
|
+
tokenize_url = "https://api.minimax.chat/v1/tokenize"
|
113
|
+
headers = {"Authorization": f"Bearer {endpoint.api_key}", "Content-Type": "application/json"}
|
114
|
+
request_body = {
|
115
|
+
"model": model,
|
116
|
+
"tokens_to_generate": 128,
|
117
|
+
"temperature": 0.2,
|
118
|
+
"messages": [
|
119
|
+
{"sender_type": "USER", "text": text},
|
120
|
+
],
|
121
|
+
}
|
122
|
+
|
123
|
+
_, response = (
|
124
|
+
Retry(httpx.post)
|
125
|
+
.args(url=tokenize_url, headers=headers, json=request_body, timeout=None)
|
126
|
+
.retry_times(5)
|
127
|
+
.sleep_time(10)
|
128
|
+
.run()
|
129
|
+
)
|
130
|
+
response = response.json()
|
131
|
+
return response["segments_num"]
|
132
|
+
elif model in ("moonshot-v1-8k", "moonshot-v1-32k", "moonshot-v1-128k"):
|
133
|
+
model_setting = settings.moonshot.models[model]
|
134
|
+
if len(model_setting.endpoints) == 0:
|
135
|
+
return len(chatgpt_encoding.encode(text))
|
136
|
+
endpoint_id = model_setting.endpoints[0]
|
137
|
+
endpoint = settings.get_endpoint(endpoint_id)
|
138
|
+
tokenize_url = "https://api.moonshot.cn/v1/tokenizers/estimate-token-count"
|
139
|
+
headers = {"Content-Type": "application/json", "Authorization": f"Bearer {endpoint.api_key}"}
|
140
|
+
request_body = {
|
141
|
+
"model": model,
|
142
|
+
"messages": [
|
143
|
+
{"role": "user", "content": text},
|
144
|
+
],
|
145
|
+
}
|
146
|
+
_, response = (
|
147
|
+
Retry(httpx.post)
|
148
|
+
.args(url=tokenize_url, headers=headers, json=request_body, timeout=None)
|
149
|
+
.retry_times(5)
|
150
|
+
.sleep_time(10)
|
151
|
+
.run()
|
152
|
+
)
|
153
|
+
response = response.json()
|
154
|
+
return response["data"]["total_tokens"]
|
155
|
+
elif model.startswith("gemini"):
|
156
|
+
model_setting = settings.gemini.models[model]
|
157
|
+
if len(model_setting.endpoints) == 0:
|
158
|
+
return len(chatgpt_encoding.encode(text))
|
159
|
+
endpoint_id = model_setting.endpoints[0]
|
160
|
+
endpoint = settings.get_endpoint(endpoint_id)
|
161
|
+
url = f"{endpoint.api_base}/models/{model_setting.id}:countTokens"
|
162
|
+
params = {"key": endpoint.api_key}
|
163
|
+
request_body = {
|
164
|
+
"contents": {
|
165
|
+
"role": "USER",
|
166
|
+
"parts": [
|
167
|
+
{"text": "TEXT"},
|
168
|
+
],
|
169
|
+
},
|
170
|
+
}
|
171
|
+
_, response = (
|
172
|
+
Retry(httpx.post)
|
173
|
+
.args(url, json=request_body, params=params, timeout=None)
|
174
|
+
.retry_times(5)
|
175
|
+
.sleep_time(10)
|
176
|
+
.run()
|
177
|
+
)
|
178
|
+
result = response.json()
|
179
|
+
return result["totalTokens"]
|
180
|
+
elif model.startswith("claude"):
|
181
|
+
return Anthropic().count_tokens(text)
|
182
|
+
elif model.startswith("deepseek"):
|
183
|
+
return len(deepseek_tokenizer.encode(text))
|
184
|
+
elif model.startswith("qwen"):
|
185
|
+
return len(qwen_tokenizer.encode(text))
|
102
186
|
else:
|
103
187
|
return len(chatgpt_encoding.encode(text))
|
104
188
|
|