vectorvein 0.1.3__tar.gz → 0.1.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {vectorvein-0.1.3 → vectorvein-0.1.5}/PKG-INFO +1 -1
- {vectorvein-0.1.3 → vectorvein-0.1.5}/pyproject.toml +1 -1
- {vectorvein-0.1.3 → vectorvein-0.1.5}/src/vectorvein/chat_clients/__init__.py +2 -2
- {vectorvein-0.1.3 → vectorvein-0.1.5}/src/vectorvein/chat_clients/anthropic_client.py +14 -8
- {vectorvein-0.1.3 → vectorvein-0.1.5}/src/vectorvein/chat_clients/base_client.py +52 -10
- {vectorvein-0.1.3 → vectorvein-0.1.5}/src/vectorvein/chat_clients/gemini_client.py +7 -6
- {vectorvein-0.1.3 → vectorvein-0.1.5}/src/vectorvein/chat_clients/minimax_client.py +56 -38
- {vectorvein-0.1.3 → vectorvein-0.1.5}/src/vectorvein/chat_clients/openai_compatible_client.py +13 -12
- {vectorvein-0.1.3 → vectorvein-0.1.5}/src/vectorvein/chat_clients/utils.py +1 -1
- {vectorvein-0.1.3 → vectorvein-0.1.5}/src/vectorvein/types/llm_parameters.py +12 -1
- {vectorvein-0.1.3 → vectorvein-0.1.5}/tests/sample_settings.py +34 -0
- {vectorvein-0.1.3 → vectorvein-0.1.5}/tests/test_create_chat_client.py +32 -19
- {vectorvein-0.1.3 → vectorvein-0.1.5}/tests/test_tool_use_multi_turns.py +0 -1
- {vectorvein-0.1.3 → vectorvein-0.1.5}/README.md +0 -0
- {vectorvein-0.1.3 → vectorvein-0.1.5}/src/vectorvein/__init__.py +0 -0
- {vectorvein-0.1.3 → vectorvein-0.1.5}/src/vectorvein/chat_clients/deepseek_client.py +0 -0
- {vectorvein-0.1.3 → vectorvein-0.1.5}/src/vectorvein/chat_clients/groq_client.py +0 -0
- {vectorvein-0.1.3 → vectorvein-0.1.5}/src/vectorvein/chat_clients/local_client.py +0 -0
- {vectorvein-0.1.3 → vectorvein-0.1.5}/src/vectorvein/chat_clients/mistral_client.py +0 -0
- {vectorvein-0.1.3 → vectorvein-0.1.5}/src/vectorvein/chat_clients/moonshot_client.py +0 -0
- {vectorvein-0.1.3 → vectorvein-0.1.5}/src/vectorvein/chat_clients/openai_client.py +0 -0
- {vectorvein-0.1.3 → vectorvein-0.1.5}/src/vectorvein/chat_clients/qwen_client.py +0 -0
- {vectorvein-0.1.3 → vectorvein-0.1.5}/src/vectorvein/chat_clients/yi_client.py +0 -0
- {vectorvein-0.1.3 → vectorvein-0.1.5}/src/vectorvein/chat_clients/zhipuai_client.py +0 -0
- {vectorvein-0.1.3 → vectorvein-0.1.5}/src/vectorvein/settings/__init__.py +0 -0
- {vectorvein-0.1.3 → vectorvein-0.1.5}/src/vectorvein/types/defaults.py +0 -0
- {vectorvein-0.1.3 → vectorvein-0.1.5}/src/vectorvein/types/enums.py +0 -0
- {vectorvein-0.1.3 → vectorvein-0.1.5}/src/vectorvein/utilities/media_processing.py +0 -0
- {vectorvein-0.1.3 → vectorvein-0.1.5}/tests/__init__.py +0 -0
- {vectorvein-0.1.3 → vectorvein-0.1.5}/tests/cat.png +0 -0
- {vectorvein-0.1.3 → vectorvein-0.1.5}/tests/test_format_messages.py +0 -0
- {vectorvein-0.1.3 → vectorvein-0.1.5}/tests/test_image_input_chat_client.py +0 -0
@@ -55,7 +55,7 @@ BackendMap = {
|
|
55
55
|
def create_chat_client(
|
56
56
|
backend: BackendType,
|
57
57
|
model: str | None = None,
|
58
|
-
stream: bool =
|
58
|
+
stream: bool = False,
|
59
59
|
temperature: float = 0.7,
|
60
60
|
context_length_control: ContextLengthControlType = defs.CONTEXT_LENGTH_CONTROL,
|
61
61
|
**kwargs,
|
@@ -80,7 +80,7 @@ def create_chat_client(
|
|
80
80
|
def create_async_chat_client(
|
81
81
|
backend: BackendType,
|
82
82
|
model: str | None = None,
|
83
|
-
stream: bool =
|
83
|
+
stream: bool = False,
|
84
84
|
temperature: float = 0.7,
|
85
85
|
context_length_control: ContextLengthControlType = defs.CONTEXT_LENGTH_CONTROL,
|
86
86
|
**kwargs,
|
@@ -3,6 +3,7 @@
|
|
3
3
|
import json
|
4
4
|
import random
|
5
5
|
|
6
|
+
from openai._types import NotGiven as OpenAINotGiven
|
6
7
|
from anthropic import Anthropic, AnthropicVertex, AsyncAnthropic, AsyncAnthropicVertex
|
7
8
|
from anthropic._types import NotGiven, NOT_GIVEN
|
8
9
|
from anthropic.types import (
|
@@ -22,6 +23,7 @@ from .utils import cutoff_messages
|
|
22
23
|
from ..types import defaults as defs
|
23
24
|
from .base_client import BaseChatClient, BaseAsyncChatClient
|
24
25
|
from ..types.enums import ContextLengthControlType, BackendType
|
26
|
+
from ..types.llm_parameters import ChatCompletionMessage, ChatCompletionDeltaMessage
|
25
27
|
|
26
28
|
|
27
29
|
def refactor_tool_use_params(tools: list):
|
@@ -123,6 +125,8 @@ class AnthropicChatClient(BaseChatClient):
|
|
123
125
|
self.stream = stream
|
124
126
|
if temperature is not None:
|
125
127
|
self.temperature = temperature
|
128
|
+
if isinstance(tool_choice, OpenAINotGiven):
|
129
|
+
tool_choice = NOT_GIVEN
|
126
130
|
|
127
131
|
self.model_setting = self.backend_settings.models[self.model]
|
128
132
|
|
@@ -218,7 +222,7 @@ class AnthropicChatClient(BaseChatClient):
|
|
218
222
|
]
|
219
223
|
elif chunk.content_block.type == "text":
|
220
224
|
message["content"] = chunk.content_block.text
|
221
|
-
yield message
|
225
|
+
yield ChatCompletionDeltaMessage(**message)
|
222
226
|
elif isinstance(chunk, RawContentBlockDeltaEvent):
|
223
227
|
if chunk.delta.type == "text_delta":
|
224
228
|
message["content"] = chunk.delta.text
|
@@ -236,13 +240,13 @@ class AnthropicChatClient(BaseChatClient):
|
|
236
240
|
"type": "function",
|
237
241
|
}
|
238
242
|
]
|
239
|
-
yield message
|
243
|
+
yield ChatCompletionDeltaMessage(**message)
|
240
244
|
elif isinstance(chunk, RawMessageDeltaEvent):
|
241
245
|
result["usage"]["completion_tokens"] = chunk.usage.output_tokens
|
242
246
|
result["usage"]["total_tokens"] = (
|
243
247
|
result["usage"]["prompt_tokens"] + result["usage"]["completion_tokens"]
|
244
248
|
)
|
245
|
-
yield
|
249
|
+
yield ChatCompletionDeltaMessage(usage=result["usage"])
|
246
250
|
|
247
251
|
return generator()
|
248
252
|
else:
|
@@ -264,7 +268,7 @@ class AnthropicChatClient(BaseChatClient):
|
|
264
268
|
if tool_calls:
|
265
269
|
result["tool_calls"] = refactor_tool_calls(tool_calls)
|
266
270
|
|
267
|
-
return result
|
271
|
+
return ChatCompletionMessage(**result)
|
268
272
|
|
269
273
|
|
270
274
|
class AsyncAnthropicChatClient(BaseAsyncChatClient):
|
@@ -307,6 +311,8 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
|
|
307
311
|
self.stream = stream
|
308
312
|
if temperature is not None:
|
309
313
|
self.temperature = temperature
|
314
|
+
if isinstance(tool_choice, OpenAINotGiven):
|
315
|
+
tool_choice = NOT_GIVEN
|
310
316
|
|
311
317
|
self.model_setting = self.backend_settings.models[self.model]
|
312
318
|
|
@@ -401,7 +407,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
|
|
401
407
|
]
|
402
408
|
elif chunk.content_block.type == "text":
|
403
409
|
message["content"] = chunk.content_block.text
|
404
|
-
yield message
|
410
|
+
yield ChatCompletionDeltaMessage(**message)
|
405
411
|
elif isinstance(chunk, RawContentBlockDeltaEvent):
|
406
412
|
if chunk.delta.type == "text_delta":
|
407
413
|
message["content"] = chunk.delta.text
|
@@ -419,13 +425,13 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
|
|
419
425
|
"type": "function",
|
420
426
|
}
|
421
427
|
]
|
422
|
-
yield message
|
428
|
+
yield ChatCompletionDeltaMessage(**message)
|
423
429
|
elif isinstance(chunk, RawMessageDeltaEvent):
|
424
430
|
result["usage"]["completion_tokens"] = chunk.usage.output_tokens
|
425
431
|
result["usage"]["total_tokens"] = (
|
426
432
|
result["usage"]["prompt_tokens"] + result["usage"]["completion_tokens"]
|
427
433
|
)
|
428
|
-
yield
|
434
|
+
yield ChatCompletionDeltaMessage(usage=result["usage"])
|
429
435
|
|
430
436
|
return generator()
|
431
437
|
else:
|
@@ -447,4 +453,4 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
|
|
447
453
|
if tool_calls:
|
448
454
|
result["tool_calls"] = refactor_tool_calls(tool_calls)
|
449
455
|
|
450
|
-
return result
|
456
|
+
return ChatCompletionMessage(**result)
|
@@ -1,10 +1,14 @@
|
|
1
1
|
# @Author: Bi Ying
|
2
2
|
# @Date: 2024-07-26 14:48:55
|
3
3
|
from abc import ABC, abstractmethod
|
4
|
+
from typing import Generator, AsyncGenerator, Any
|
5
|
+
|
6
|
+
from openai._types import NotGiven, NOT_GIVEN
|
4
7
|
|
5
8
|
from ..settings import settings
|
6
9
|
from ..types import defaults as defs
|
7
10
|
from ..types.enums import ContextLengthControlType, BackendType
|
11
|
+
from ..types.llm_parameters import ChatCompletionMessage, ChatCompletionDeltaMessage
|
8
12
|
|
9
13
|
|
10
14
|
class BaseChatClient(ABC):
|
@@ -14,7 +18,7 @@ class BaseChatClient(ABC):
|
|
14
18
|
def __init__(
|
15
19
|
self,
|
16
20
|
model: str = "",
|
17
|
-
stream: bool =
|
21
|
+
stream: bool = False,
|
18
22
|
temperature: float = 0.7,
|
19
23
|
context_length_control: ContextLengthControlType = defs.CONTEXT_LENGTH_CONTROL,
|
20
24
|
random_endpoint: bool = True,
|
@@ -40,14 +44,33 @@ class BaseChatClient(ABC):
|
|
40
44
|
self,
|
41
45
|
messages: list,
|
42
46
|
model: str | None = None,
|
43
|
-
stream: bool =
|
47
|
+
stream: bool = False,
|
44
48
|
temperature: float = 0.7,
|
45
49
|
max_tokens: int = 2000,
|
46
|
-
tools: list |
|
47
|
-
tool_choice: str |
|
48
|
-
):
|
50
|
+
tools: list | NotGiven = NOT_GIVEN,
|
51
|
+
tool_choice: str | NotGiven = NOT_GIVEN,
|
52
|
+
) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
|
49
53
|
pass
|
50
54
|
|
55
|
+
def create_stream(
|
56
|
+
self,
|
57
|
+
messages: list,
|
58
|
+
model: str | None = None,
|
59
|
+
temperature: float = 0.7,
|
60
|
+
max_tokens: int = 2000,
|
61
|
+
tools: list | NotGiven = NOT_GIVEN,
|
62
|
+
tool_choice: str | NotGiven = NOT_GIVEN,
|
63
|
+
) -> Generator[ChatCompletionDeltaMessage, Any, None]:
|
64
|
+
return self.create_completion(
|
65
|
+
messages=messages,
|
66
|
+
model=model,
|
67
|
+
stream=True,
|
68
|
+
temperature=temperature,
|
69
|
+
max_tokens=max_tokens,
|
70
|
+
tools=tools,
|
71
|
+
tool_choice=tool_choice,
|
72
|
+
)
|
73
|
+
|
51
74
|
|
52
75
|
class BaseAsyncChatClient(ABC):
|
53
76
|
DEFAULT_MODEL: str | None = None
|
@@ -56,7 +79,7 @@ class BaseAsyncChatClient(ABC):
|
|
56
79
|
def __init__(
|
57
80
|
self,
|
58
81
|
model: str = "",
|
59
|
-
stream: bool =
|
82
|
+
stream: bool = False,
|
60
83
|
temperature: float = 0.7,
|
61
84
|
context_length_control: ContextLengthControlType = defs.CONTEXT_LENGTH_CONTROL,
|
62
85
|
random_endpoint: bool = True,
|
@@ -82,10 +105,29 @@ class BaseAsyncChatClient(ABC):
|
|
82
105
|
self,
|
83
106
|
messages: list,
|
84
107
|
model: str | None = None,
|
85
|
-
stream: bool =
|
108
|
+
stream: bool = False,
|
86
109
|
temperature: float = 0.7,
|
87
110
|
max_tokens: int = 2000,
|
88
|
-
tools: list |
|
89
|
-
tool_choice: str |
|
90
|
-
):
|
111
|
+
tools: list | NotGiven = NOT_GIVEN,
|
112
|
+
tool_choice: str | NotGiven = NOT_GIVEN,
|
113
|
+
) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, None]:
|
91
114
|
pass
|
115
|
+
|
116
|
+
async def create_stream(
|
117
|
+
self,
|
118
|
+
messages: list,
|
119
|
+
model: str | None = None,
|
120
|
+
temperature: float = 0.7,
|
121
|
+
max_tokens: int = 2000,
|
122
|
+
tools: list | NotGiven = NOT_GIVEN,
|
123
|
+
tool_choice: str | NotGiven = NOT_GIVEN,
|
124
|
+
) -> AsyncGenerator[ChatCompletionDeltaMessage, None]:
|
125
|
+
return await self.create_completion(
|
126
|
+
messages=messages,
|
127
|
+
model=model,
|
128
|
+
stream=True,
|
129
|
+
temperature=temperature,
|
130
|
+
max_tokens=max_tokens,
|
131
|
+
tools=tools,
|
132
|
+
tool_choice=tool_choice,
|
133
|
+
)
|
@@ -10,6 +10,7 @@ from .utils import cutoff_messages
|
|
10
10
|
from ..types import defaults as defs
|
11
11
|
from .base_client import BaseChatClient, BaseAsyncChatClient
|
12
12
|
from ..types.enums import ContextLengthControlType, BackendType
|
13
|
+
from ..types.llm_parameters import ChatCompletionMessage, ChatCompletionDeltaMessage
|
13
14
|
|
14
15
|
|
15
16
|
class GeminiChatClient(BaseChatClient):
|
@@ -120,7 +121,7 @@ class GeminiChatClient(BaseChatClient):
|
|
120
121
|
message["tool_calls"] = [
|
121
122
|
{
|
122
123
|
"index": 0,
|
123
|
-
"id":
|
124
|
+
"id": "call_0",
|
124
125
|
"function": {
|
125
126
|
"arguments": json.dumps(
|
126
127
|
chunk_content["functionCall"]["args"], ensure_ascii=False
|
@@ -136,7 +137,7 @@ class GeminiChatClient(BaseChatClient):
|
|
136
137
|
"completion_tokens": data["usageMetadata"]["candidatesTokenCount"],
|
137
138
|
"total_tokens": data["usageMetadata"]["totalTokenCount"],
|
138
139
|
}
|
139
|
-
yield message
|
140
|
+
yield ChatCompletionDeltaMessage(**message)
|
140
141
|
|
141
142
|
return generator()
|
142
143
|
else:
|
@@ -160,7 +161,7 @@ class GeminiChatClient(BaseChatClient):
|
|
160
161
|
if tool_calls:
|
161
162
|
result["tool_calls"] = tool_calls
|
162
163
|
|
163
|
-
return result
|
164
|
+
return ChatCompletionMessage(**result)
|
164
165
|
|
165
166
|
|
166
167
|
class AsyncGeminiChatClient(BaseAsyncChatClient):
|
@@ -272,7 +273,7 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
|
|
272
273
|
message["tool_calls"] = [
|
273
274
|
{
|
274
275
|
"index": 0,
|
275
|
-
"id":
|
276
|
+
"id": "call_0",
|
276
277
|
"function": {
|
277
278
|
"arguments": json.dumps(
|
278
279
|
chunk_content["functionCall"]["args"], ensure_ascii=False
|
@@ -288,7 +289,7 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
|
|
288
289
|
"completion_tokens": data["usageMetadata"]["candidatesTokenCount"],
|
289
290
|
"total_tokens": data["usageMetadata"]["totalTokenCount"],
|
290
291
|
}
|
291
|
-
yield message
|
292
|
+
yield ChatCompletionDeltaMessage(**message)
|
292
293
|
|
293
294
|
return generator()
|
294
295
|
else:
|
@@ -314,4 +315,4 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
|
|
314
315
|
if tool_calls:
|
315
316
|
result["tool_calls"] = tool_calls
|
316
317
|
|
317
|
-
return result
|
318
|
+
return ChatCompletionMessage(**result)
|
@@ -4,12 +4,14 @@ import json
|
|
4
4
|
import random
|
5
5
|
|
6
6
|
import httpx
|
7
|
+
from openai._types import NotGiven
|
7
8
|
|
8
9
|
from ..settings import settings
|
9
10
|
from .utils import cutoff_messages
|
10
11
|
from ..types import defaults as defs
|
11
12
|
from .base_client import BaseChatClient, BaseAsyncChatClient
|
12
13
|
from ..types.enums import ContextLengthControlType, BackendType
|
14
|
+
from ..types.llm_parameters import ChatCompletionMessage, ChatCompletionDeltaMessage
|
13
15
|
|
14
16
|
|
15
17
|
def extract_tool_calls(response):
|
@@ -74,6 +76,8 @@ class MiniMaxChatClient(BaseChatClient):
|
|
74
76
|
self.stream = stream
|
75
77
|
if temperature is not None:
|
76
78
|
self.temperature = temperature
|
79
|
+
if isinstance(tool_choice, NotGiven):
|
80
|
+
tool_choice = "auto"
|
77
81
|
|
78
82
|
self.model_setting = self.backend_settings.models[self.model]
|
79
83
|
if self.random_endpoint:
|
@@ -141,33 +145,39 @@ class MiniMaxChatClient(BaseChatClient):
|
|
141
145
|
if "usage" not in chunk_data:
|
142
146
|
continue
|
143
147
|
else:
|
144
|
-
yield
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
148
|
+
yield ChatCompletionDeltaMessage(
|
149
|
+
**{
|
150
|
+
"content": chunk_data["choices"][0]["message"].get("content"),
|
151
|
+
"role": "assistant",
|
152
|
+
**tool_calls_params,
|
153
|
+
}
|
154
|
+
)
|
149
155
|
else:
|
150
156
|
if "usage" in chunk_data:
|
151
157
|
continue
|
152
|
-
yield
|
153
|
-
|
154
|
-
|
155
|
-
|
158
|
+
yield ChatCompletionDeltaMessage(
|
159
|
+
**{
|
160
|
+
"content": chunk_data["choices"][0]["delta"]["content"],
|
161
|
+
"role": "assistant",
|
162
|
+
}
|
163
|
+
)
|
156
164
|
|
157
165
|
return generator()
|
158
166
|
else:
|
159
167
|
result = response.json()
|
160
168
|
tool_calls_params = extract_tool_calls(result)
|
161
|
-
return
|
162
|
-
|
163
|
-
|
164
|
-
"
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
169
|
+
return ChatCompletionMessage(
|
170
|
+
**{
|
171
|
+
"content": result["choices"][0]["message"].get("content"),
|
172
|
+
"usage": {
|
173
|
+
"prompt_tokens": 0,
|
174
|
+
"completion_tokens": result["usage"]["total_tokens"],
|
175
|
+
"total_tokens": result["usage"]["total_tokens"],
|
176
|
+
},
|
177
|
+
"role": "assistant",
|
178
|
+
**tool_calls_params,
|
179
|
+
}
|
180
|
+
)
|
171
181
|
|
172
182
|
|
173
183
|
class AsyncMiniMaxChatClient(BaseAsyncChatClient):
|
@@ -211,6 +221,8 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
|
|
211
221
|
self.stream = stream
|
212
222
|
if temperature is not None:
|
213
223
|
self.temperature = temperature
|
224
|
+
if isinstance(tool_choice, NotGiven):
|
225
|
+
tool_choice = "auto"
|
214
226
|
|
215
227
|
self.model_setting = self.backend_settings.models[self.model]
|
216
228
|
if self.random_endpoint:
|
@@ -277,18 +289,22 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
|
|
277
289
|
if "usage" not in chunk_data:
|
278
290
|
continue
|
279
291
|
else:
|
280
|
-
yield
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
292
|
+
yield ChatCompletionDeltaMessage(
|
293
|
+
**{
|
294
|
+
"content": chunk_data["choices"][0]["message"].get("content"),
|
295
|
+
"role": "assistant",
|
296
|
+
**tool_calls_params,
|
297
|
+
}
|
298
|
+
)
|
285
299
|
else:
|
286
300
|
if "usage" in chunk_data:
|
287
301
|
continue
|
288
|
-
yield
|
289
|
-
|
290
|
-
|
291
|
-
|
302
|
+
yield ChatCompletionDeltaMessage(
|
303
|
+
**{
|
304
|
+
"content": chunk_data["choices"][0]["delta"]["content"],
|
305
|
+
"role": "assistant",
|
306
|
+
}
|
307
|
+
)
|
292
308
|
|
293
309
|
return generator()
|
294
310
|
else:
|
@@ -300,16 +316,18 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
|
|
300
316
|
)
|
301
317
|
result = response.json()
|
302
318
|
tool_calls_params = extract_tool_calls(result)
|
303
|
-
return
|
304
|
-
|
305
|
-
|
306
|
-
"
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
319
|
+
return ChatCompletionMessage(
|
320
|
+
**{
|
321
|
+
"content": result["choices"][0]["message"].get("content"),
|
322
|
+
"usage": {
|
323
|
+
"prompt_tokens": 0,
|
324
|
+
"completion_tokens": result["usage"]["total_tokens"],
|
325
|
+
"total_tokens": result["usage"]["total_tokens"],
|
326
|
+
},
|
327
|
+
"role": "assistant",
|
328
|
+
**tool_calls_params,
|
329
|
+
}
|
330
|
+
)
|
313
331
|
|
314
332
|
async def __aexit__(self, exc_type, exc, tb):
|
315
333
|
await self.http_client.aclose()
|
{vectorvein-0.1.3 → vectorvein-0.1.5}/src/vectorvein/chat_clients/openai_compatible_client.py
RENAMED
@@ -2,7 +2,6 @@
|
|
2
2
|
# @Date: 2024-07-26 14:48:55
|
3
3
|
import json
|
4
4
|
import random
|
5
|
-
from typing import Union, AsyncGenerator
|
6
5
|
|
7
6
|
from openai._types import NotGiven, NOT_GIVEN
|
8
7
|
from openai._streaming import Stream, AsyncStream
|
@@ -18,6 +17,7 @@ from .utils import (
|
|
18
17
|
from ..settings import settings
|
19
18
|
from ..types import defaults as defs
|
20
19
|
from ..types.enums import ContextLengthControlType, BackendType
|
20
|
+
from ..types.llm_parameters import ChatCompletionMessage, ChatCompletionDeltaMessage
|
21
21
|
|
22
22
|
|
23
23
|
class OpenAICompatibleChatClient(BaseChatClient):
|
@@ -122,21 +122,21 @@ class OpenAICompatibleChatClient(BaseChatClient):
|
|
122
122
|
if not chunk.choices[0].delta:
|
123
123
|
continue
|
124
124
|
if self.model_setting.function_call_available:
|
125
|
-
yield chunk.choices[0].delta.model_dump()
|
125
|
+
yield ChatCompletionDeltaMessage(**chunk.choices[0].delta.model_dump())
|
126
126
|
else:
|
127
127
|
message = chunk.choices[0].delta.model_dump()
|
128
128
|
full_content += message["content"] if message["content"] else ""
|
129
129
|
if tools:
|
130
|
-
tool_call_data = ToolCallContentProcessor(
|
130
|
+
tool_call_data = ToolCallContentProcessor(full_content).tool_calls
|
131
131
|
if tool_call_data:
|
132
132
|
message["tool_calls"] = tool_call_data["tool_calls"]
|
133
133
|
if full_content in ("<", "<|", "<|▶", "<|▶|") or full_content.startswith("<|▶|>"):
|
134
134
|
message["content"] = ""
|
135
135
|
result = message
|
136
136
|
continue
|
137
|
-
yield message
|
137
|
+
yield ChatCompletionDeltaMessage(**message)
|
138
138
|
if result:
|
139
|
-
yield result
|
139
|
+
yield ChatCompletionDeltaMessage(**result)
|
140
140
|
|
141
141
|
return generator()
|
142
142
|
else:
|
@@ -155,7 +155,8 @@ class OpenAICompatibleChatClient(BaseChatClient):
|
|
155
155
|
if tool_call_data:
|
156
156
|
result["tool_calls"] = tool_call_data["tool_calls"]
|
157
157
|
result["content"] = tool_call_content_processor.non_tool_content
|
158
|
-
|
158
|
+
|
159
|
+
return ChatCompletionMessage(**result)
|
159
160
|
|
160
161
|
|
161
162
|
class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
|
@@ -191,7 +192,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
|
|
191
192
|
max_tokens: int = 2000,
|
192
193
|
tools: list | NotGiven = NOT_GIVEN,
|
193
194
|
tool_choice: str | NotGiven = NOT_GIVEN,
|
194
|
-
)
|
195
|
+
):
|
195
196
|
if model is not None:
|
196
197
|
self.model = model
|
197
198
|
if stream is not None:
|
@@ -260,21 +261,21 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
|
|
260
261
|
if not chunk.choices[0].delta:
|
261
262
|
continue
|
262
263
|
if self.model_setting.function_call_available:
|
263
|
-
yield chunk.choices[0].delta.model_dump()
|
264
|
+
yield ChatCompletionDeltaMessage(**chunk.choices[0].delta.model_dump())
|
264
265
|
else:
|
265
266
|
message = chunk.choices[0].delta.model_dump()
|
266
267
|
full_content += message["content"] if message["content"] else ""
|
267
268
|
if tools:
|
268
|
-
tool_call_data = ToolCallContentProcessor(
|
269
|
+
tool_call_data = ToolCallContentProcessor(full_content).tool_calls
|
269
270
|
if tool_call_data:
|
270
271
|
message["tool_calls"] = tool_call_data["tool_calls"]
|
271
272
|
if full_content in ("<", "<|", "<|▶", "<|▶|") or full_content.startswith("<|▶|>"):
|
272
273
|
message["content"] = ""
|
273
274
|
result = message
|
274
275
|
continue
|
275
|
-
yield message
|
276
|
+
yield ChatCompletionDeltaMessage(**message)
|
276
277
|
if result:
|
277
|
-
yield result
|
278
|
+
yield ChatCompletionDeltaMessage(**result)
|
278
279
|
|
279
280
|
return generator()
|
280
281
|
else:
|
@@ -293,4 +294,4 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
|
|
293
294
|
if tool_call_data:
|
294
295
|
result["tool_calls"] = tool_call_data["tool_calls"]
|
295
296
|
result["content"] = tool_call_content_processor.non_tool_content
|
296
|
-
return result
|
297
|
+
return ChatCompletionMessage(**result)
|
@@ -4,6 +4,7 @@ from typing import List, Dict, Optional
|
|
4
4
|
|
5
5
|
from pydantic import BaseModel, Field
|
6
6
|
from openai.types.chat.chat_completion_message import ChatCompletionMessageToolCall
|
7
|
+
from openai.types.chat.chat_completion_chunk import ChoiceDeltaToolCall
|
7
8
|
|
8
9
|
from . import defaults as defs
|
9
10
|
|
@@ -58,7 +59,7 @@ class Usage(BaseModel):
|
|
58
59
|
total_tokens: int
|
59
60
|
|
60
61
|
|
61
|
-
class
|
62
|
+
class ChatCompletionMessage(BaseModel):
|
62
63
|
content: Optional[str] = None
|
63
64
|
|
64
65
|
tool_calls: Optional[List[ChatCompletionMessageToolCall]] = None
|
@@ -67,3 +68,13 @@ class ModelOutput(BaseModel):
|
|
67
68
|
function_call_arguments: Optional[dict] = None
|
68
69
|
|
69
70
|
usage: Optional[Usage] = None
|
71
|
+
|
72
|
+
class ChatCompletionDeltaMessage(BaseModel):
|
73
|
+
content: Optional[str] = None
|
74
|
+
|
75
|
+
tool_calls: Optional[List[ChoiceDeltaToolCall]] = None
|
76
|
+
"""The tool calls generated by the model, such as function calls."""
|
77
|
+
|
78
|
+
function_call_arguments: Optional[dict] = None
|
79
|
+
|
80
|
+
usage: Optional[Usage] = None
|
@@ -464,4 +464,38 @@ sample_settings = {
|
|
464
464
|
"endpoints": ["zhipuai-default"],
|
465
465
|
},
|
466
466
|
},
|
467
|
+
"qwen_models": {
|
468
|
+
"qwen1.5-1.8b-chat": {
|
469
|
+
"id": "Qwen/Qwen1.5-1.8B-Chat",
|
470
|
+
"endpoints": ["together-default"],
|
471
|
+
},
|
472
|
+
"qwen1.5-4b-chat": {
|
473
|
+
"id": "Qwen/Qwen1.5-4B-Chat",
|
474
|
+
"endpoints": ["together-default"],
|
475
|
+
},
|
476
|
+
"qwen1.5-7b-chat": {
|
477
|
+
"id": "Qwen/Qwen1.5-7B-Chat",
|
478
|
+
"endpoints": ["together-default"],
|
479
|
+
},
|
480
|
+
"qwen1.5-14b-chat": {
|
481
|
+
"id": "Qwen/Qwen1.5-14B-Chat",
|
482
|
+
"endpoints": ["together-default"],
|
483
|
+
},
|
484
|
+
"qwen1.5-32b-chat": {
|
485
|
+
"id": "Qwen/Qwen1.5-32B-Chat",
|
486
|
+
"endpoints": ["together-default"],
|
487
|
+
},
|
488
|
+
"qwen1.5-72b-chat": {
|
489
|
+
"id": "Qwen/Qwen1.5-72B-Chat",
|
490
|
+
"endpoints": ["together-default"],
|
491
|
+
},
|
492
|
+
"qwen1.5-110b-chat": {
|
493
|
+
"id": "Qwen/Qwen1.5-110B-Chat",
|
494
|
+
"endpoints": ["together-default"],
|
495
|
+
},
|
496
|
+
"qwen2-72b-instruct": {
|
497
|
+
"id": "Qwen/Qwen2-72B-Instruct",
|
498
|
+
"endpoints": ["together-default"],
|
499
|
+
},
|
500
|
+
},
|
467
501
|
}
|
@@ -145,30 +145,36 @@ def test_sync(backend, model, stream: bool = False, use_tool: bool = False):
|
|
145
145
|
else:
|
146
146
|
messages = messages_simple
|
147
147
|
tools_params = {}
|
148
|
-
|
149
|
-
if stream:
|
148
|
+
|
149
|
+
if not stream:
|
150
|
+
response = client.create_completion(messages=format_messages(messages, backend=backend), **tools_params)
|
151
|
+
print(response)
|
152
|
+
else:
|
153
|
+
response = client.create_stream(messages=format_messages(messages, backend=backend), **tools_params)
|
150
154
|
for chunk in response:
|
151
155
|
print(chunk)
|
152
156
|
print("=" * 20)
|
153
|
-
else:
|
154
|
-
print(response)
|
155
157
|
|
156
158
|
|
157
159
|
async def test_async(backend, model, stream: bool = False, use_tool: bool = False):
|
158
|
-
client = create_async_chat_client(backend, model=model
|
160
|
+
client = create_async_chat_client(backend, model=model)
|
159
161
|
if use_tool:
|
160
162
|
messages = messages_for_tools_simple
|
161
163
|
tools_params = {"tools": tools_simple}
|
162
164
|
else:
|
163
165
|
messages = messages_simple
|
164
166
|
tools_params = {}
|
165
|
-
|
166
|
-
if stream:
|
167
|
+
|
168
|
+
if not stream:
|
169
|
+
response = await client.create_completion(
|
170
|
+
messages=format_messages(messages, backend=backend), stream=False, **tools_params
|
171
|
+
)
|
172
|
+
print(response)
|
173
|
+
else:
|
174
|
+
response = await client.create_stream(messages=format_messages(messages, backend=backend), **tools_params)
|
167
175
|
async for chunk in response:
|
168
176
|
print(chunk)
|
169
177
|
print("=" * 20)
|
170
|
-
else:
|
171
|
-
print(response)
|
172
178
|
|
173
179
|
|
174
180
|
backend = BackendType.Moonshot
|
@@ -177,22 +183,29 @@ backend = BackendType.OpenAI
|
|
177
183
|
model = "gpt-4o"
|
178
184
|
backend = BackendType.Anthropic
|
179
185
|
model = "claude-3-5-sonnet-20240620"
|
180
|
-
backend = BackendType.MiniMax
|
181
|
-
model = "abab6.5s-chat"
|
182
|
-
backend = BackendType.Gemini
|
183
|
-
model = "gemini-1.5-flash"
|
184
|
-
backend = BackendType.OpenAI
|
185
|
-
model = "gpt-35-turbo"
|
186
|
-
backend = BackendType.
|
187
|
-
model = "
|
186
|
+
# backend = BackendType.MiniMax
|
187
|
+
# model = "abab6.5s-chat"
|
188
|
+
# backend = BackendType.Gemini
|
189
|
+
# model = "gemini-1.5-flash"
|
190
|
+
# backend = BackendType.OpenAI
|
191
|
+
# model = "gpt-35-turbo"
|
192
|
+
# backend = BackendType.MiniMax
|
193
|
+
# model = "abab6.5s-chat"
|
194
|
+
# backend = BackendType.Yi
|
195
|
+
# model = "yi-large-fc"
|
196
|
+
# backend = BackendType.Mistral
|
197
|
+
# model = "mixtral-8x7b"
|
198
|
+
backend = BackendType.Qwen
|
199
|
+
model = "qwen2-72b-instruct"
|
200
|
+
|
188
201
|
start_time = time.perf_counter()
|
189
202
|
# test_sync(backend=backend, model=model, stream=False, use_tool=False)
|
190
|
-
test_sync(backend=backend, model=model, stream=False, use_tool=True)
|
203
|
+
# test_sync(backend=backend, model=model, stream=False, use_tool=True)
|
191
204
|
# test_sync(backend=backend, model=model, stream=True, use_tool=False)
|
192
205
|
# test_sync(backend=backend, model=model, stream=True, use_tool=True)
|
193
206
|
# asyncio.run(test_async(backend=backend, model=model, stream=False, use_tool=False))
|
194
207
|
# asyncio.run(test_async(backend=backend, model=model, stream=False, use_tool=True))
|
195
208
|
# asyncio.run(test_async(backend=backend, model=model, stream=True, use_tool=False))
|
196
|
-
|
209
|
+
asyncio.run(test_async(backend=backend, model=model, stream=True, use_tool=True))
|
197
210
|
end_time = time.perf_counter()
|
198
211
|
print(f"Stream time elapsed: {end_time - start_time} seconds")
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|