vectorvein 0.1.10__tar.gz → 0.1.12__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {vectorvein-0.1.10 → vectorvein-0.1.12}/PKG-INFO +1 -1
- {vectorvein-0.1.10 → vectorvein-0.1.12}/pyproject.toml +1 -1
- {vectorvein-0.1.10 → vectorvein-0.1.12}/src/vectorvein/chat_clients/__init__.py +14 -0
- {vectorvein-0.1.10 → vectorvein-0.1.12}/src/vectorvein/chat_clients/anthropic_client.py +13 -0
- {vectorvein-0.1.10 → vectorvein-0.1.12}/src/vectorvein/chat_clients/base_client.py +5 -0
- {vectorvein-0.1.10 → vectorvein-0.1.12}/src/vectorvein/chat_clients/gemini_client.py +30 -11
- {vectorvein-0.1.10 → vectorvein-0.1.12}/src/vectorvein/chat_clients/minimax_client.py +16 -2
- {vectorvein-0.1.10 → vectorvein-0.1.12}/src/vectorvein/chat_clients/openai_compatible_client.py +15 -2
- {vectorvein-0.1.10 → vectorvein-0.1.12}/src/vectorvein/types/defaults.py +14 -0
- {vectorvein-0.1.10 → vectorvein-0.1.12}/src/vectorvein/types/llm_parameters.py +1 -0
- {vectorvein-0.1.10 → vectorvein-0.1.12}/tests/sample_settings.py +6 -2
- vectorvein-0.1.12/tests/test_chat_prefix.py +23 -0
- vectorvein-0.1.12/tests/test_http_client.py +24 -0
- vectorvein-0.1.12/tests/test_stop.py +25 -0
- {vectorvein-0.1.10 → vectorvein-0.1.12}/README.md +0 -0
- {vectorvein-0.1.10 → vectorvein-0.1.12}/src/vectorvein/__init__.py +0 -0
- {vectorvein-0.1.10 → vectorvein-0.1.12}/src/vectorvein/chat_clients/deepseek_client.py +0 -0
- {vectorvein-0.1.10 → vectorvein-0.1.12}/src/vectorvein/chat_clients/groq_client.py +0 -0
- {vectorvein-0.1.10 → vectorvein-0.1.12}/src/vectorvein/chat_clients/local_client.py +0 -0
- {vectorvein-0.1.10 → vectorvein-0.1.12}/src/vectorvein/chat_clients/mistral_client.py +0 -0
- {vectorvein-0.1.10 → vectorvein-0.1.12}/src/vectorvein/chat_clients/moonshot_client.py +0 -0
- {vectorvein-0.1.10 → vectorvein-0.1.12}/src/vectorvein/chat_clients/openai_client.py +0 -0
- {vectorvein-0.1.10 → vectorvein-0.1.12}/src/vectorvein/chat_clients/qwen_client.py +0 -0
- {vectorvein-0.1.10 → vectorvein-0.1.12}/src/vectorvein/chat_clients/utils.py +0 -0
- {vectorvein-0.1.10 → vectorvein-0.1.12}/src/vectorvein/chat_clients/yi_client.py +0 -0
- {vectorvein-0.1.10 → vectorvein-0.1.12}/src/vectorvein/chat_clients/zhipuai_client.py +0 -0
- {vectorvein-0.1.10 → vectorvein-0.1.12}/src/vectorvein/settings/__init__.py +0 -0
- {vectorvein-0.1.10 → vectorvein-0.1.12}/src/vectorvein/types/enums.py +0 -0
- {vectorvein-0.1.10 → vectorvein-0.1.12}/src/vectorvein/utilities/media_processing.py +0 -0
- {vectorvein-0.1.10 → vectorvein-0.1.12}/src/vectorvein/utilities/retry.py +0 -0
- {vectorvein-0.1.10 → vectorvein-0.1.12}/tests/__init__.py +0 -0
- {vectorvein-0.1.10 → vectorvein-0.1.12}/tests/cat.png +0 -0
- {vectorvein-0.1.10 → vectorvein-0.1.12}/tests/test_create_chat_client.py +0 -0
- {vectorvein-0.1.10 → vectorvein-0.1.12}/tests/test_format_messages.py +0 -0
- {vectorvein-0.1.10 → vectorvein-0.1.12}/tests/test_image_input_chat_client.py +0 -0
- {vectorvein-0.1.10 → vectorvein-0.1.12}/tests/test_tokens_count.py +0 -0
- {vectorvein-0.1.10 → vectorvein-0.1.12}/tests/test_tool_use_multi_turns.py +0 -0
@@ -1,5 +1,7 @@
|
|
1
1
|
# @Author: Bi Ying
|
2
2
|
# @Date: 2024-07-26 14:48:55
|
3
|
+
import httpx
|
4
|
+
|
3
5
|
from .base_client import BaseChatClient, BaseAsyncChatClient
|
4
6
|
|
5
7
|
from .yi_client import YiChatClient, AsyncYiChatClient
|
@@ -58,6 +60,9 @@ def create_chat_client(
|
|
58
60
|
stream: bool = False,
|
59
61
|
temperature: float = 0.7,
|
60
62
|
context_length_control: ContextLengthControlType = defs.CONTEXT_LENGTH_CONTROL,
|
63
|
+
random_endpoint: bool = True,
|
64
|
+
endpoint_id: str = "",
|
65
|
+
http_client: httpx.Client | None = None,
|
61
66
|
**kwargs,
|
62
67
|
) -> BaseChatClient:
|
63
68
|
if backend.lower() not in BackendMap["sync"]:
|
@@ -73,6 +78,9 @@ def create_chat_client(
|
|
73
78
|
stream=stream,
|
74
79
|
temperature=temperature,
|
75
80
|
context_length_control=context_length_control,
|
81
|
+
random_endpoint=random_endpoint,
|
82
|
+
endpoint_id=endpoint_id,
|
83
|
+
http_client=http_client,
|
76
84
|
**kwargs,
|
77
85
|
)
|
78
86
|
|
@@ -83,6 +91,9 @@ def create_async_chat_client(
|
|
83
91
|
stream: bool = False,
|
84
92
|
temperature: float = 0.7,
|
85
93
|
context_length_control: ContextLengthControlType = defs.CONTEXT_LENGTH_CONTROL,
|
94
|
+
random_endpoint: bool = True,
|
95
|
+
endpoint_id: str = "",
|
96
|
+
http_client: httpx.AsyncClient | None = None,
|
86
97
|
**kwargs,
|
87
98
|
) -> BaseAsyncChatClient:
|
88
99
|
if backend.lower() not in BackendMap["async"]:
|
@@ -98,6 +109,9 @@ def create_async_chat_client(
|
|
98
109
|
stream=stream,
|
99
110
|
temperature=temperature,
|
100
111
|
context_length_control=context_length_control,
|
112
|
+
random_endpoint=random_endpoint,
|
113
|
+
endpoint_id=endpoint_id,
|
114
|
+
http_client=http_client,
|
101
115
|
**kwargs,
|
102
116
|
)
|
103
117
|
|
@@ -3,6 +3,7 @@
|
|
3
3
|
import json
|
4
4
|
import random
|
5
5
|
|
6
|
+
import httpx
|
6
7
|
from openai._types import NotGiven as OpenAINotGiven
|
7
8
|
from anthropic import Anthropic, AnthropicVertex, AsyncAnthropic, AsyncAnthropicVertex
|
8
9
|
from anthropic._types import NotGiven, NOT_GIVEN
|
@@ -97,6 +98,7 @@ class AnthropicChatClient(BaseChatClient):
|
|
97
98
|
context_length_control: ContextLengthControlType = defs.CONTEXT_LENGTH_CONTROL,
|
98
99
|
random_endpoint: bool = True,
|
99
100
|
endpoint_id: str = "",
|
101
|
+
http_client: httpx.Client | None = None,
|
100
102
|
**kwargs,
|
101
103
|
):
|
102
104
|
super().__init__(
|
@@ -106,6 +108,7 @@ class AnthropicChatClient(BaseChatClient):
|
|
106
108
|
context_length_control,
|
107
109
|
random_endpoint,
|
108
110
|
endpoint_id,
|
111
|
+
http_client,
|
109
112
|
**kwargs,
|
110
113
|
)
|
111
114
|
|
@@ -118,6 +121,7 @@ class AnthropicChatClient(BaseChatClient):
|
|
118
121
|
max_tokens: int | None = None,
|
119
122
|
tools: list | NotGiven = NOT_GIVEN,
|
120
123
|
tool_choice: str | NotGiven = NOT_GIVEN,
|
124
|
+
**kwargs,
|
121
125
|
):
|
122
126
|
if model is not None:
|
123
127
|
self.model = model
|
@@ -182,11 +186,13 @@ class AnthropicChatClient(BaseChatClient):
|
|
182
186
|
base_url=base_url,
|
183
187
|
project_id=self.endpoint.credentials.get("quota_project_id"),
|
184
188
|
access_token=self.creds.token,
|
189
|
+
http_client=self.http_client,
|
185
190
|
)
|
186
191
|
else:
|
187
192
|
self._client = Anthropic(
|
188
193
|
api_key=self.endpoint.api_key,
|
189
194
|
base_url=self.endpoint.api_base,
|
195
|
+
http_client=self.http_client,
|
190
196
|
)
|
191
197
|
|
192
198
|
tools_params = refactor_tool_use_params(tools) if tools else tools
|
@@ -210,6 +216,7 @@ class AnthropicChatClient(BaseChatClient):
|
|
210
216
|
max_tokens=max_tokens,
|
211
217
|
tools=tools_params,
|
212
218
|
tool_choice=tool_choice,
|
219
|
+
**kwargs,
|
213
220
|
)
|
214
221
|
|
215
222
|
if self.stream:
|
@@ -297,6 +304,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
|
|
297
304
|
context_length_control: ContextLengthControlType = defs.CONTEXT_LENGTH_CONTROL,
|
298
305
|
random_endpoint: bool = True,
|
299
306
|
endpoint_id: str = "",
|
307
|
+
http_client: httpx.AsyncClient | None = None,
|
300
308
|
**kwargs,
|
301
309
|
):
|
302
310
|
super().__init__(
|
@@ -306,6 +314,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
|
|
306
314
|
context_length_control,
|
307
315
|
random_endpoint,
|
308
316
|
endpoint_id,
|
317
|
+
http_client,
|
309
318
|
**kwargs,
|
310
319
|
)
|
311
320
|
|
@@ -318,6 +327,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
|
|
318
327
|
max_tokens: int | None = None,
|
319
328
|
tools: list | NotGiven = NOT_GIVEN,
|
320
329
|
tool_choice: str | NotGiven = NOT_GIVEN,
|
330
|
+
**kwargs,
|
321
331
|
):
|
322
332
|
if model is not None:
|
323
333
|
self.model = model
|
@@ -382,11 +392,13 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
|
|
382
392
|
base_url=base_url,
|
383
393
|
project_id=self.endpoint.credentials.get("quota_project_id"),
|
384
394
|
access_token=self.creds.token,
|
395
|
+
http_client=self.http_client,
|
385
396
|
)
|
386
397
|
else:
|
387
398
|
self._client = AsyncAnthropic(
|
388
399
|
api_key=self.endpoint.api_key,
|
389
400
|
base_url=self.endpoint.api_base,
|
401
|
+
http_client=self.http_client,
|
390
402
|
)
|
391
403
|
|
392
404
|
tools_params = refactor_tool_use_params(tools) if tools else tools
|
@@ -410,6 +422,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
|
|
410
422
|
max_tokens=max_tokens,
|
411
423
|
tools=tools_params,
|
412
424
|
tool_choice=tool_choice,
|
425
|
+
**kwargs,
|
413
426
|
)
|
414
427
|
|
415
428
|
if self.stream:
|
@@ -3,6 +3,7 @@
|
|
3
3
|
from abc import ABC, abstractmethod
|
4
4
|
from typing import Generator, AsyncGenerator, Any
|
5
5
|
|
6
|
+
import httpx
|
6
7
|
from openai._types import NotGiven, NOT_GIVEN
|
7
8
|
|
8
9
|
from ..settings import settings
|
@@ -23,6 +24,7 @@ class BaseChatClient(ABC):
|
|
23
24
|
context_length_control: ContextLengthControlType = defs.CONTEXT_LENGTH_CONTROL,
|
24
25
|
random_endpoint: bool = True,
|
25
26
|
endpoint_id: str = "",
|
27
|
+
http_client: httpx.Client | None = None,
|
26
28
|
**kwargs,
|
27
29
|
):
|
28
30
|
self.model = model or self.DEFAULT_MODEL
|
@@ -31,6 +33,7 @@ class BaseChatClient(ABC):
|
|
31
33
|
self.context_length_control = context_length_control
|
32
34
|
self.random_endpoint = random_endpoint
|
33
35
|
self.endpoint_id = endpoint_id
|
36
|
+
self.http_client = http_client
|
34
37
|
|
35
38
|
self.backend_settings = settings.get_backend(self.BACKEND_NAME)
|
36
39
|
|
@@ -84,6 +87,7 @@ class BaseAsyncChatClient(ABC):
|
|
84
87
|
context_length_control: ContextLengthControlType = defs.CONTEXT_LENGTH_CONTROL,
|
85
88
|
random_endpoint: bool = True,
|
86
89
|
endpoint_id: str = "",
|
90
|
+
http_client: httpx.AsyncClient | None = None,
|
87
91
|
**kwargs,
|
88
92
|
):
|
89
93
|
self.model = model or self.DEFAULT_MODEL
|
@@ -92,6 +96,7 @@ class BaseAsyncChatClient(ABC):
|
|
92
96
|
self.context_length_control = context_length_control
|
93
97
|
self.random_endpoint = random_endpoint
|
94
98
|
self.endpoint_id = endpoint_id
|
99
|
+
self.http_client = http_client
|
95
100
|
|
96
101
|
self.backend_settings = settings.get_backend(self.BACKEND_NAME)
|
97
102
|
|
@@ -25,6 +25,7 @@ class GeminiChatClient(BaseChatClient):
|
|
25
25
|
context_length_control: ContextLengthControlType = defs.CONTEXT_LENGTH_CONTROL,
|
26
26
|
random_endpoint: bool = True,
|
27
27
|
endpoint_id: str = "",
|
28
|
+
http_client: httpx.Client | None = None,
|
28
29
|
**kwargs,
|
29
30
|
):
|
30
31
|
super().__init__(
|
@@ -34,6 +35,7 @@ class GeminiChatClient(BaseChatClient):
|
|
34
35
|
context_length_control,
|
35
36
|
random_endpoint,
|
36
37
|
endpoint_id,
|
38
|
+
http_client,
|
37
39
|
**kwargs,
|
38
40
|
)
|
39
41
|
|
@@ -107,7 +109,11 @@ class GeminiChatClient(BaseChatClient):
|
|
107
109
|
|
108
110
|
def generator():
|
109
111
|
result = {"content": ""}
|
110
|
-
|
112
|
+
if self.http_client:
|
113
|
+
client = self.http_client
|
114
|
+
else:
|
115
|
+
client = httpx.Client()
|
116
|
+
with client.stream("POST", url, headers=headers, params=params, json=request_body) as response:
|
111
117
|
for chunk in response.iter_lines():
|
112
118
|
message = {"content": ""}
|
113
119
|
if not chunk.startswith("data:"):
|
@@ -142,13 +148,17 @@ class GeminiChatClient(BaseChatClient):
|
|
142
148
|
return generator()
|
143
149
|
else:
|
144
150
|
url = f"{self.endpoint.api_base}/models/{self.model_setting.id}:generateContent"
|
145
|
-
|
151
|
+
if self.http_client:
|
152
|
+
client = self.http_client
|
153
|
+
else:
|
154
|
+
client = httpx.Client()
|
155
|
+
response = client.post(url, json=request_body, headers=headers, params=params, timeout=None).json()
|
146
156
|
result = {
|
147
157
|
"content": "",
|
148
158
|
"usage": {
|
149
|
-
"prompt_tokens": response
|
150
|
-
"completion_tokens": response
|
151
|
-
"total_tokens": response
|
159
|
+
"prompt_tokens": response.get("usageMetadata", {}).get("promptTokenCount", 0),
|
160
|
+
"completion_tokens": response.get("usageMetadata", {}).get("candidatesTokenCount", 0),
|
161
|
+
"total_tokens": response.get("usageMetadata", {}).get("totalTokenCount", 0),
|
152
162
|
},
|
153
163
|
}
|
154
164
|
tool_calls = []
|
@@ -185,6 +195,7 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
|
|
185
195
|
context_length_control: ContextLengthControlType = defs.CONTEXT_LENGTH_CONTROL,
|
186
196
|
random_endpoint: bool = True,
|
187
197
|
endpoint_id: str = "",
|
198
|
+
http_client: httpx.AsyncClient | None = None,
|
188
199
|
**kwargs,
|
189
200
|
):
|
190
201
|
super().__init__(
|
@@ -194,6 +205,7 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
|
|
194
205
|
context_length_control,
|
195
206
|
random_endpoint,
|
196
207
|
endpoint_id,
|
208
|
+
http_client,
|
197
209
|
**kwargs,
|
198
210
|
)
|
199
211
|
|
@@ -267,7 +279,10 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
|
|
267
279
|
|
268
280
|
async def generator():
|
269
281
|
result = {"content": ""}
|
270
|
-
|
282
|
+
if self.http_client:
|
283
|
+
client = self.http_client
|
284
|
+
else:
|
285
|
+
client = httpx.AsyncClient()
|
271
286
|
async with client.stream("POST", url, headers=headers, params=params, json=request_body) as response:
|
272
287
|
async for chunk in response.aiter_lines():
|
273
288
|
message = {"content": ""}
|
@@ -303,15 +318,19 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
|
|
303
318
|
return generator()
|
304
319
|
else:
|
305
320
|
url = f"{self.endpoint.api_base}/models/{self.model_setting.id}:generateContent"
|
306
|
-
|
307
|
-
|
321
|
+
if self.http_client:
|
322
|
+
client = self.http_client
|
323
|
+
else:
|
324
|
+
client = httpx.AsyncClient()
|
325
|
+
async with client:
|
326
|
+
response = await client.post(url, json=request_body, headers=headers, params=params, timeout=None)
|
308
327
|
response = response.json()
|
309
328
|
result = {
|
310
329
|
"content": "",
|
311
330
|
"usage": {
|
312
|
-
"prompt_tokens": response
|
313
|
-
"completion_tokens": response
|
314
|
-
"total_tokens": response
|
331
|
+
"prompt_tokens": response.get("usageMetadata", {}).get("promptTokenCount", 0),
|
332
|
+
"completion_tokens": response.get("usageMetadata", {}).get("candidatesTokenCount", 0),
|
333
|
+
"total_tokens": response.get("usageMetadata", {}).get("totalTokenCount", 0),
|
315
334
|
},
|
316
335
|
}
|
317
336
|
tool_calls = []
|
@@ -48,6 +48,7 @@ class MiniMaxChatClient(BaseChatClient):
|
|
48
48
|
context_length_control: ContextLengthControlType = defs.CONTEXT_LENGTH_CONTROL,
|
49
49
|
random_endpoint: bool = True,
|
50
50
|
endpoint_id: str = "",
|
51
|
+
http_client: httpx.Client | None = None,
|
51
52
|
**kwargs,
|
52
53
|
):
|
53
54
|
super().__init__(
|
@@ -57,9 +58,13 @@ class MiniMaxChatClient(BaseChatClient):
|
|
57
58
|
context_length_control,
|
58
59
|
random_endpoint,
|
59
60
|
endpoint_id,
|
61
|
+
http_client,
|
60
62
|
**kwargs,
|
61
63
|
)
|
62
|
-
|
64
|
+
if http_client:
|
65
|
+
self.http_client = http_client
|
66
|
+
else:
|
67
|
+
self.http_client = httpx.Client()
|
63
68
|
|
64
69
|
def create_completion(
|
65
70
|
self,
|
@@ -70,6 +75,7 @@ class MiniMaxChatClient(BaseChatClient):
|
|
70
75
|
max_tokens: int | None = None,
|
71
76
|
tools: list | None = None,
|
72
77
|
tool_choice: str = "auto",
|
78
|
+
**kwargs,
|
73
79
|
):
|
74
80
|
if model is not None:
|
75
81
|
self.model = model
|
@@ -135,6 +141,7 @@ class MiniMaxChatClient(BaseChatClient):
|
|
135
141
|
"stream": self.stream,
|
136
142
|
"mask_sensitive_info": False,
|
137
143
|
**tools_params,
|
144
|
+
**kwargs,
|
138
145
|
}
|
139
146
|
|
140
147
|
if self.stream:
|
@@ -206,6 +213,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
|
|
206
213
|
context_length_control: ContextLengthControlType = defs.CONTEXT_LENGTH_CONTROL,
|
207
214
|
random_endpoint: bool = True,
|
208
215
|
endpoint_id: str = "",
|
216
|
+
http_client: httpx.AsyncClient | None = None,
|
209
217
|
**kwargs,
|
210
218
|
):
|
211
219
|
super().__init__(
|
@@ -215,9 +223,13 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
|
|
215
223
|
context_length_control,
|
216
224
|
random_endpoint,
|
217
225
|
endpoint_id,
|
226
|
+
http_client,
|
218
227
|
**kwargs,
|
219
228
|
)
|
220
|
-
|
229
|
+
if http_client:
|
230
|
+
self.http_client = http_client
|
231
|
+
else:
|
232
|
+
self.http_client = httpx.AsyncClient()
|
221
233
|
|
222
234
|
async def create_completion(
|
223
235
|
self,
|
@@ -228,6 +240,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
|
|
228
240
|
max_tokens: int | None = None,
|
229
241
|
tools: list | None = None,
|
230
242
|
tool_choice: str = "auto",
|
243
|
+
**kwargs,
|
231
244
|
):
|
232
245
|
if model is not None:
|
233
246
|
self.model = model
|
@@ -291,6 +304,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
|
|
291
304
|
"stream": self.stream,
|
292
305
|
"mask_sensitive_info": False,
|
293
306
|
**tools_params,
|
307
|
+
**kwargs,
|
294
308
|
}
|
295
309
|
|
296
310
|
if self.stream:
|
{vectorvein-0.1.10 → vectorvein-0.1.12}/src/vectorvein/chat_clients/openai_compatible_client.py
RENAMED
@@ -3,6 +3,7 @@
|
|
3
3
|
import json
|
4
4
|
import random
|
5
5
|
|
6
|
+
import httpx
|
6
7
|
from openai._types import NotGiven, NOT_GIVEN
|
7
8
|
from openai._streaming import Stream, AsyncStream
|
8
9
|
from openai.types.chat import ChatCompletion, ChatCompletionChunk
|
@@ -33,6 +34,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
|
|
33
34
|
context_length_control: ContextLengthControlType = defs.CONTEXT_LENGTH_CONTROL,
|
34
35
|
random_endpoint: bool = True,
|
35
36
|
endpoint_id: str = "",
|
37
|
+
http_client: httpx.Client | None = None,
|
36
38
|
**kwargs,
|
37
39
|
):
|
38
40
|
super().__init__(
|
@@ -42,6 +44,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
|
|
42
44
|
context_length_control,
|
43
45
|
random_endpoint,
|
44
46
|
endpoint_id,
|
47
|
+
http_client,
|
45
48
|
**kwargs,
|
46
49
|
)
|
47
50
|
|
@@ -54,6 +57,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
|
|
54
57
|
max_tokens: int | None = None,
|
55
58
|
tools: list | NotGiven = NOT_GIVEN,
|
56
59
|
tool_choice: str | NotGiven = NOT_GIVEN,
|
60
|
+
**kwargs,
|
57
61
|
):
|
58
62
|
if model is not None:
|
59
63
|
self.model = model
|
@@ -73,12 +77,14 @@ class OpenAICompatibleChatClient(BaseChatClient):
|
|
73
77
|
self._client = AzureOpenAI(
|
74
78
|
azure_endpoint=self.endpoint.api_base,
|
75
79
|
api_key=self.endpoint.api_key,
|
76
|
-
api_version="2024-
|
80
|
+
api_version="2024-08-01-preview",
|
81
|
+
http_client=self.http_client,
|
77
82
|
)
|
78
83
|
else:
|
79
84
|
self._client = OpenAI(
|
80
85
|
api_key=self.endpoint.api_key,
|
81
86
|
base_url=self.endpoint.api_base,
|
87
|
+
http_client=self.http_client,
|
82
88
|
)
|
83
89
|
|
84
90
|
if self.context_length_control == ContextLengthControlType.Latest:
|
@@ -120,6 +126,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
|
|
120
126
|
temperature=self.temperature,
|
121
127
|
max_tokens=max_tokens,
|
122
128
|
**tools_params,
|
129
|
+
**kwargs,
|
123
130
|
)
|
124
131
|
|
125
132
|
if self.stream:
|
@@ -186,6 +193,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
|
|
186
193
|
context_length_control: ContextLengthControlType = defs.CONTEXT_LENGTH_CONTROL,
|
187
194
|
random_endpoint: bool = True,
|
188
195
|
endpoint_id: str = "",
|
196
|
+
http_client: httpx.AsyncClient | None = None,
|
189
197
|
**kwargs,
|
190
198
|
):
|
191
199
|
super().__init__(
|
@@ -195,6 +203,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
|
|
195
203
|
context_length_control,
|
196
204
|
random_endpoint,
|
197
205
|
endpoint_id,
|
206
|
+
http_client,
|
198
207
|
**kwargs,
|
199
208
|
)
|
200
209
|
|
@@ -207,6 +216,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
|
|
207
216
|
max_tokens: int | None = None,
|
208
217
|
tools: list | NotGiven = NOT_GIVEN,
|
209
218
|
tool_choice: str | NotGiven = NOT_GIVEN,
|
219
|
+
**kwargs,
|
210
220
|
):
|
211
221
|
if model is not None:
|
212
222
|
self.model = model
|
@@ -226,12 +236,14 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
|
|
226
236
|
self._client = AsyncAzureOpenAI(
|
227
237
|
azure_endpoint=self.endpoint.api_base,
|
228
238
|
api_key=self.endpoint.api_key,
|
229
|
-
api_version="2024-
|
239
|
+
api_version="2024-08-01-preview",
|
240
|
+
http_client=self.http_client,
|
230
241
|
)
|
231
242
|
else:
|
232
243
|
self._client = AsyncOpenAI(
|
233
244
|
api_key=self.endpoint.api_key,
|
234
245
|
base_url=self.endpoint.api_base,
|
246
|
+
http_client=self.http_client,
|
235
247
|
)
|
236
248
|
|
237
249
|
if self.context_length_control == ContextLengthControlType.Latest:
|
@@ -273,6 +285,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
|
|
273
285
|
temperature=self.temperature,
|
274
286
|
max_tokens=max_tokens,
|
275
287
|
**tools_params,
|
288
|
+
**kwargs,
|
276
289
|
)
|
277
290
|
|
278
291
|
if self.stream:
|
@@ -213,6 +213,13 @@ ZHIPUAI_MODELS = {
|
|
213
213
|
"response_format_available": False,
|
214
214
|
"max_output_tokens": 4095,
|
215
215
|
},
|
216
|
+
"glm-4-plus": {
|
217
|
+
"id": "glm-4-plus",
|
218
|
+
"context_length": 128000,
|
219
|
+
"function_call_available": True,
|
220
|
+
"response_format_available": False,
|
221
|
+
"max_output_tokens": 4095,
|
222
|
+
},
|
216
223
|
"glm-4-0520": {
|
217
224
|
"id": "glm-4-0520",
|
218
225
|
"context_length": 128000,
|
@@ -255,6 +262,13 @@ ZHIPUAI_MODELS = {
|
|
255
262
|
"response_format_available": False,
|
256
263
|
"max_output_tokens": 1024,
|
257
264
|
},
|
265
|
+
"glm-4v-plus": {
|
266
|
+
"id": "glm-4v-plus",
|
267
|
+
"context_length": 2000,
|
268
|
+
"function_call_available": False,
|
269
|
+
"response_format_available": False,
|
270
|
+
"max_output_tokens": 1024,
|
271
|
+
},
|
258
272
|
}
|
259
273
|
|
260
274
|
# Mistral models
|
@@ -39,7 +39,7 @@ sample_settings = {
|
|
39
39
|
},
|
40
40
|
{
|
41
41
|
"id": "deepseek-default",
|
42
|
-
"api_base": "https://api.deepseek.com/
|
42
|
+
"api_base": "https://api.deepseek.com/beta",
|
43
43
|
"api_key": "",
|
44
44
|
},
|
45
45
|
{
|
@@ -80,6 +80,10 @@ sample_settings = {
|
|
80
80
|
},
|
81
81
|
"openai": {
|
82
82
|
"models": {
|
83
|
+
"gpt-4o-mini": {
|
84
|
+
"id": "gpt-4o-mini",
|
85
|
+
"endpoints": ["azure-openai"],
|
86
|
+
},
|
83
87
|
"gpt-4o": {
|
84
88
|
"id": "gpt-4o",
|
85
89
|
"endpoints": ["azure-openai"],
|
@@ -362,7 +366,7 @@ sample_settings = {
|
|
362
366
|
},
|
363
367
|
{
|
364
368
|
"id": "deepseek-default",
|
365
|
-
"api_base": "https://api.deepseek.com/
|
369
|
+
"api_base": "https://api.deepseek.com/beta",
|
366
370
|
"api_key": "sk-6dad42e7154743cd80b77dff5d0ecaaa",
|
367
371
|
},
|
368
372
|
{
|
@@ -0,0 +1,23 @@
|
|
1
|
+
# @Author: Bi Ying
|
2
|
+
# @Date: 2024-07-27 11:51:28
|
3
|
+
import time
|
4
|
+
|
5
|
+
from vectorvein.settings import settings
|
6
|
+
from vectorvein.types.enums import BackendType
|
7
|
+
from vectorvein.chat_clients import create_chat_client
|
8
|
+
|
9
|
+
from sample_settings import sample_settings
|
10
|
+
|
11
|
+
settings.load(sample_settings)
|
12
|
+
messages = [
|
13
|
+
{"role": "user", "content": "Please write quick sort code"},
|
14
|
+
{"role": "assistant", "content": "```python\n", "prefix": True},
|
15
|
+
]
|
16
|
+
|
17
|
+
|
18
|
+
start_time = time.perf_counter()
|
19
|
+
client = create_chat_client(backend=BackendType.DeepSeek, model="deepseek-chat", stream=False)
|
20
|
+
response = client.create_completion(messages=messages, stop=["\n```"])
|
21
|
+
print(response)
|
22
|
+
end_time = time.perf_counter()
|
23
|
+
print(f"Stream time elapsed: {end_time - start_time} seconds")
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# @Author: Bi Ying
|
2
|
+
# @Date: 2024-07-27 11:51:28
|
3
|
+
import time
|
4
|
+
|
5
|
+
import httpx
|
6
|
+
from vectorvein.settings import settings
|
7
|
+
from vectorvein.types.enums import BackendType
|
8
|
+
from vectorvein.chat_clients import create_chat_client
|
9
|
+
|
10
|
+
from sample_settings import sample_settings
|
11
|
+
|
12
|
+
settings.load(sample_settings)
|
13
|
+
messages = [
|
14
|
+
{"role": "user", "content": "Please write quick sort code"},
|
15
|
+
]
|
16
|
+
|
17
|
+
|
18
|
+
start_time = time.perf_counter()
|
19
|
+
http_client = httpx.Client()
|
20
|
+
client = create_chat_client(backend=BackendType.DeepSeek, model="deepseek-chat", stream=False, http_client=http_client)
|
21
|
+
response = client.create_completion(messages=messages)
|
22
|
+
print(response)
|
23
|
+
end_time = time.perf_counter()
|
24
|
+
print(f"Stream time elapsed: {end_time - start_time} seconds")
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# @Author: Bi Ying
|
2
|
+
# @Date: 2024-07-27 11:51:28
|
3
|
+
import time
|
4
|
+
|
5
|
+
from vectorvein.settings import settings
|
6
|
+
from vectorvein.types.enums import BackendType
|
7
|
+
from vectorvein.chat_clients import create_chat_client
|
8
|
+
|
9
|
+
from sample_settings import sample_settings
|
10
|
+
|
11
|
+
settings.load(sample_settings)
|
12
|
+
messages = [
|
13
|
+
{
|
14
|
+
"role": "user",
|
15
|
+
"content": "节点名称是 FileLoader,FileLoader 节点连到 OCR 节点,使用 mermaid 语法表示流程图。直接开始补全,不要有任何解释。\n\n```mermaid\n",
|
16
|
+
}
|
17
|
+
]
|
18
|
+
|
19
|
+
|
20
|
+
start_time = time.perf_counter()
|
21
|
+
client = create_chat_client(backend=BackendType.DeepSeek, model="deepseek-chat", stream=False)
|
22
|
+
response = client.create_completion(messages=messages, stop=["\n```"])
|
23
|
+
print(response)
|
24
|
+
end_time = time.perf_counter()
|
25
|
+
print(f"Stream time elapsed: {end_time - start_time} seconds")
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|