vectorvein 0.1.10__tar.gz → 0.1.12__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. {vectorvein-0.1.10 → vectorvein-0.1.12}/PKG-INFO +1 -1
  2. {vectorvein-0.1.10 → vectorvein-0.1.12}/pyproject.toml +1 -1
  3. {vectorvein-0.1.10 → vectorvein-0.1.12}/src/vectorvein/chat_clients/__init__.py +14 -0
  4. {vectorvein-0.1.10 → vectorvein-0.1.12}/src/vectorvein/chat_clients/anthropic_client.py +13 -0
  5. {vectorvein-0.1.10 → vectorvein-0.1.12}/src/vectorvein/chat_clients/base_client.py +5 -0
  6. {vectorvein-0.1.10 → vectorvein-0.1.12}/src/vectorvein/chat_clients/gemini_client.py +30 -11
  7. {vectorvein-0.1.10 → vectorvein-0.1.12}/src/vectorvein/chat_clients/minimax_client.py +16 -2
  8. {vectorvein-0.1.10 → vectorvein-0.1.12}/src/vectorvein/chat_clients/openai_compatible_client.py +15 -2
  9. {vectorvein-0.1.10 → vectorvein-0.1.12}/src/vectorvein/types/defaults.py +14 -0
  10. {vectorvein-0.1.10 → vectorvein-0.1.12}/src/vectorvein/types/llm_parameters.py +1 -0
  11. {vectorvein-0.1.10 → vectorvein-0.1.12}/tests/sample_settings.py +6 -2
  12. vectorvein-0.1.12/tests/test_chat_prefix.py +23 -0
  13. vectorvein-0.1.12/tests/test_http_client.py +24 -0
  14. vectorvein-0.1.12/tests/test_stop.py +25 -0
  15. {vectorvein-0.1.10 → vectorvein-0.1.12}/README.md +0 -0
  16. {vectorvein-0.1.10 → vectorvein-0.1.12}/src/vectorvein/__init__.py +0 -0
  17. {vectorvein-0.1.10 → vectorvein-0.1.12}/src/vectorvein/chat_clients/deepseek_client.py +0 -0
  18. {vectorvein-0.1.10 → vectorvein-0.1.12}/src/vectorvein/chat_clients/groq_client.py +0 -0
  19. {vectorvein-0.1.10 → vectorvein-0.1.12}/src/vectorvein/chat_clients/local_client.py +0 -0
  20. {vectorvein-0.1.10 → vectorvein-0.1.12}/src/vectorvein/chat_clients/mistral_client.py +0 -0
  21. {vectorvein-0.1.10 → vectorvein-0.1.12}/src/vectorvein/chat_clients/moonshot_client.py +0 -0
  22. {vectorvein-0.1.10 → vectorvein-0.1.12}/src/vectorvein/chat_clients/openai_client.py +0 -0
  23. {vectorvein-0.1.10 → vectorvein-0.1.12}/src/vectorvein/chat_clients/qwen_client.py +0 -0
  24. {vectorvein-0.1.10 → vectorvein-0.1.12}/src/vectorvein/chat_clients/utils.py +0 -0
  25. {vectorvein-0.1.10 → vectorvein-0.1.12}/src/vectorvein/chat_clients/yi_client.py +0 -0
  26. {vectorvein-0.1.10 → vectorvein-0.1.12}/src/vectorvein/chat_clients/zhipuai_client.py +0 -0
  27. {vectorvein-0.1.10 → vectorvein-0.1.12}/src/vectorvein/settings/__init__.py +0 -0
  28. {vectorvein-0.1.10 → vectorvein-0.1.12}/src/vectorvein/types/enums.py +0 -0
  29. {vectorvein-0.1.10 → vectorvein-0.1.12}/src/vectorvein/utilities/media_processing.py +0 -0
  30. {vectorvein-0.1.10 → vectorvein-0.1.12}/src/vectorvein/utilities/retry.py +0 -0
  31. {vectorvein-0.1.10 → vectorvein-0.1.12}/tests/__init__.py +0 -0
  32. {vectorvein-0.1.10 → vectorvein-0.1.12}/tests/cat.png +0 -0
  33. {vectorvein-0.1.10 → vectorvein-0.1.12}/tests/test_create_chat_client.py +0 -0
  34. {vectorvein-0.1.10 → vectorvein-0.1.12}/tests/test_format_messages.py +0 -0
  35. {vectorvein-0.1.10 → vectorvein-0.1.12}/tests/test_image_input_chat_client.py +0 -0
  36. {vectorvein-0.1.10 → vectorvein-0.1.12}/tests/test_tokens_count.py +0 -0
  37. {vectorvein-0.1.10 → vectorvein-0.1.12}/tests/test_tool_use_multi_turns.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vectorvein
3
- Version: 0.1.10
3
+ Version: 0.1.12
4
4
  Summary: Default template for PDM package
5
5
  Author-Email: Anderson <andersonby@163.com>
6
6
  License: MIT
@@ -16,7 +16,7 @@ description = "Default template for PDM package"
16
16
  name = "vectorvein"
17
17
  readme = "README.md"
18
18
  requires-python = ">=3.10"
19
- version = "0.1.10"
19
+ version = "0.1.12"
20
20
 
21
21
  [project.license]
22
22
  text = "MIT"
@@ -1,5 +1,7 @@
1
1
  # @Author: Bi Ying
2
2
  # @Date: 2024-07-26 14:48:55
3
+ import httpx
4
+
3
5
  from .base_client import BaseChatClient, BaseAsyncChatClient
4
6
 
5
7
  from .yi_client import YiChatClient, AsyncYiChatClient
@@ -58,6 +60,9 @@ def create_chat_client(
58
60
  stream: bool = False,
59
61
  temperature: float = 0.7,
60
62
  context_length_control: ContextLengthControlType = defs.CONTEXT_LENGTH_CONTROL,
63
+ random_endpoint: bool = True,
64
+ endpoint_id: str = "",
65
+ http_client: httpx.Client | None = None,
61
66
  **kwargs,
62
67
  ) -> BaseChatClient:
63
68
  if backend.lower() not in BackendMap["sync"]:
@@ -73,6 +78,9 @@ def create_chat_client(
73
78
  stream=stream,
74
79
  temperature=temperature,
75
80
  context_length_control=context_length_control,
81
+ random_endpoint=random_endpoint,
82
+ endpoint_id=endpoint_id,
83
+ http_client=http_client,
76
84
  **kwargs,
77
85
  )
78
86
 
@@ -83,6 +91,9 @@ def create_async_chat_client(
83
91
  stream: bool = False,
84
92
  temperature: float = 0.7,
85
93
  context_length_control: ContextLengthControlType = defs.CONTEXT_LENGTH_CONTROL,
94
+ random_endpoint: bool = True,
95
+ endpoint_id: str = "",
96
+ http_client: httpx.AsyncClient | None = None,
86
97
  **kwargs,
87
98
  ) -> BaseAsyncChatClient:
88
99
  if backend.lower() not in BackendMap["async"]:
@@ -98,6 +109,9 @@ def create_async_chat_client(
98
109
  stream=stream,
99
110
  temperature=temperature,
100
111
  context_length_control=context_length_control,
112
+ random_endpoint=random_endpoint,
113
+ endpoint_id=endpoint_id,
114
+ http_client=http_client,
101
115
  **kwargs,
102
116
  )
103
117
 
@@ -3,6 +3,7 @@
3
3
  import json
4
4
  import random
5
5
 
6
+ import httpx
6
7
  from openai._types import NotGiven as OpenAINotGiven
7
8
  from anthropic import Anthropic, AnthropicVertex, AsyncAnthropic, AsyncAnthropicVertex
8
9
  from anthropic._types import NotGiven, NOT_GIVEN
@@ -97,6 +98,7 @@ class AnthropicChatClient(BaseChatClient):
97
98
  context_length_control: ContextLengthControlType = defs.CONTEXT_LENGTH_CONTROL,
98
99
  random_endpoint: bool = True,
99
100
  endpoint_id: str = "",
101
+ http_client: httpx.Client | None = None,
100
102
  **kwargs,
101
103
  ):
102
104
  super().__init__(
@@ -106,6 +108,7 @@ class AnthropicChatClient(BaseChatClient):
106
108
  context_length_control,
107
109
  random_endpoint,
108
110
  endpoint_id,
111
+ http_client,
109
112
  **kwargs,
110
113
  )
111
114
 
@@ -118,6 +121,7 @@ class AnthropicChatClient(BaseChatClient):
118
121
  max_tokens: int | None = None,
119
122
  tools: list | NotGiven = NOT_GIVEN,
120
123
  tool_choice: str | NotGiven = NOT_GIVEN,
124
+ **kwargs,
121
125
  ):
122
126
  if model is not None:
123
127
  self.model = model
@@ -182,11 +186,13 @@ class AnthropicChatClient(BaseChatClient):
182
186
  base_url=base_url,
183
187
  project_id=self.endpoint.credentials.get("quota_project_id"),
184
188
  access_token=self.creds.token,
189
+ http_client=self.http_client,
185
190
  )
186
191
  else:
187
192
  self._client = Anthropic(
188
193
  api_key=self.endpoint.api_key,
189
194
  base_url=self.endpoint.api_base,
195
+ http_client=self.http_client,
190
196
  )
191
197
 
192
198
  tools_params = refactor_tool_use_params(tools) if tools else tools
@@ -210,6 +216,7 @@ class AnthropicChatClient(BaseChatClient):
210
216
  max_tokens=max_tokens,
211
217
  tools=tools_params,
212
218
  tool_choice=tool_choice,
219
+ **kwargs,
213
220
  )
214
221
 
215
222
  if self.stream:
@@ -297,6 +304,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
297
304
  context_length_control: ContextLengthControlType = defs.CONTEXT_LENGTH_CONTROL,
298
305
  random_endpoint: bool = True,
299
306
  endpoint_id: str = "",
307
+ http_client: httpx.AsyncClient | None = None,
300
308
  **kwargs,
301
309
  ):
302
310
  super().__init__(
@@ -306,6 +314,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
306
314
  context_length_control,
307
315
  random_endpoint,
308
316
  endpoint_id,
317
+ http_client,
309
318
  **kwargs,
310
319
  )
311
320
 
@@ -318,6 +327,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
318
327
  max_tokens: int | None = None,
319
328
  tools: list | NotGiven = NOT_GIVEN,
320
329
  tool_choice: str | NotGiven = NOT_GIVEN,
330
+ **kwargs,
321
331
  ):
322
332
  if model is not None:
323
333
  self.model = model
@@ -382,11 +392,13 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
382
392
  base_url=base_url,
383
393
  project_id=self.endpoint.credentials.get("quota_project_id"),
384
394
  access_token=self.creds.token,
395
+ http_client=self.http_client,
385
396
  )
386
397
  else:
387
398
  self._client = AsyncAnthropic(
388
399
  api_key=self.endpoint.api_key,
389
400
  base_url=self.endpoint.api_base,
401
+ http_client=self.http_client,
390
402
  )
391
403
 
392
404
  tools_params = refactor_tool_use_params(tools) if tools else tools
@@ -410,6 +422,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
410
422
  max_tokens=max_tokens,
411
423
  tools=tools_params,
412
424
  tool_choice=tool_choice,
425
+ **kwargs,
413
426
  )
414
427
 
415
428
  if self.stream:
@@ -3,6 +3,7 @@
3
3
  from abc import ABC, abstractmethod
4
4
  from typing import Generator, AsyncGenerator, Any
5
5
 
6
+ import httpx
6
7
  from openai._types import NotGiven, NOT_GIVEN
7
8
 
8
9
  from ..settings import settings
@@ -23,6 +24,7 @@ class BaseChatClient(ABC):
23
24
  context_length_control: ContextLengthControlType = defs.CONTEXT_LENGTH_CONTROL,
24
25
  random_endpoint: bool = True,
25
26
  endpoint_id: str = "",
27
+ http_client: httpx.Client | None = None,
26
28
  **kwargs,
27
29
  ):
28
30
  self.model = model or self.DEFAULT_MODEL
@@ -31,6 +33,7 @@ class BaseChatClient(ABC):
31
33
  self.context_length_control = context_length_control
32
34
  self.random_endpoint = random_endpoint
33
35
  self.endpoint_id = endpoint_id
36
+ self.http_client = http_client
34
37
 
35
38
  self.backend_settings = settings.get_backend(self.BACKEND_NAME)
36
39
 
@@ -84,6 +87,7 @@ class BaseAsyncChatClient(ABC):
84
87
  context_length_control: ContextLengthControlType = defs.CONTEXT_LENGTH_CONTROL,
85
88
  random_endpoint: bool = True,
86
89
  endpoint_id: str = "",
90
+ http_client: httpx.AsyncClient | None = None,
87
91
  **kwargs,
88
92
  ):
89
93
  self.model = model or self.DEFAULT_MODEL
@@ -92,6 +96,7 @@ class BaseAsyncChatClient(ABC):
92
96
  self.context_length_control = context_length_control
93
97
  self.random_endpoint = random_endpoint
94
98
  self.endpoint_id = endpoint_id
99
+ self.http_client = http_client
95
100
 
96
101
  self.backend_settings = settings.get_backend(self.BACKEND_NAME)
97
102
 
@@ -25,6 +25,7 @@ class GeminiChatClient(BaseChatClient):
25
25
  context_length_control: ContextLengthControlType = defs.CONTEXT_LENGTH_CONTROL,
26
26
  random_endpoint: bool = True,
27
27
  endpoint_id: str = "",
28
+ http_client: httpx.Client | None = None,
28
29
  **kwargs,
29
30
  ):
30
31
  super().__init__(
@@ -34,6 +35,7 @@ class GeminiChatClient(BaseChatClient):
34
35
  context_length_control,
35
36
  random_endpoint,
36
37
  endpoint_id,
38
+ http_client,
37
39
  **kwargs,
38
40
  )
39
41
 
@@ -107,7 +109,11 @@ class GeminiChatClient(BaseChatClient):
107
109
 
108
110
  def generator():
109
111
  result = {"content": ""}
110
- with httpx.stream("POST", url, headers=headers, params=params, json=request_body) as response:
112
+ if self.http_client:
113
+ client = self.http_client
114
+ else:
115
+ client = httpx.Client()
116
+ with client.stream("POST", url, headers=headers, params=params, json=request_body) as response:
111
117
  for chunk in response.iter_lines():
112
118
  message = {"content": ""}
113
119
  if not chunk.startswith("data:"):
@@ -142,13 +148,17 @@ class GeminiChatClient(BaseChatClient):
142
148
  return generator()
143
149
  else:
144
150
  url = f"{self.endpoint.api_base}/models/{self.model_setting.id}:generateContent"
145
- response = httpx.post(url, json=request_body, headers=headers, params=params, timeout=None).json()
151
+ if self.http_client:
152
+ client = self.http_client
153
+ else:
154
+ client = httpx.Client()
155
+ response = client.post(url, json=request_body, headers=headers, params=params, timeout=None).json()
146
156
  result = {
147
157
  "content": "",
148
158
  "usage": {
149
- "prompt_tokens": response["usageMetadata"]["promptTokenCount"],
150
- "completion_tokens": response["usageMetadata"]["candidatesTokenCount"],
151
- "total_tokens": response["usageMetadata"]["totalTokenCount"],
159
+ "prompt_tokens": response.get("usageMetadata", {}).get("promptTokenCount", 0),
160
+ "completion_tokens": response.get("usageMetadata", {}).get("candidatesTokenCount", 0),
161
+ "total_tokens": response.get("usageMetadata", {}).get("totalTokenCount", 0),
152
162
  },
153
163
  }
154
164
  tool_calls = []
@@ -185,6 +195,7 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
185
195
  context_length_control: ContextLengthControlType = defs.CONTEXT_LENGTH_CONTROL,
186
196
  random_endpoint: bool = True,
187
197
  endpoint_id: str = "",
198
+ http_client: httpx.AsyncClient | None = None,
188
199
  **kwargs,
189
200
  ):
190
201
  super().__init__(
@@ -194,6 +205,7 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
194
205
  context_length_control,
195
206
  random_endpoint,
196
207
  endpoint_id,
208
+ http_client,
197
209
  **kwargs,
198
210
  )
199
211
 
@@ -267,7 +279,10 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
267
279
 
268
280
  async def generator():
269
281
  result = {"content": ""}
270
- client = httpx.AsyncClient()
282
+ if self.http_client:
283
+ client = self.http_client
284
+ else:
285
+ client = httpx.AsyncClient()
271
286
  async with client.stream("POST", url, headers=headers, params=params, json=request_body) as response:
272
287
  async for chunk in response.aiter_lines():
273
288
  message = {"content": ""}
@@ -303,15 +318,19 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
303
318
  return generator()
304
319
  else:
305
320
  url = f"{self.endpoint.api_base}/models/{self.model_setting.id}:generateContent"
306
- async with httpx.AsyncClient(headers=headers, params=params, timeout=None) as client:
307
- response = await client.post(url, json=request_body)
321
+ if self.http_client:
322
+ client = self.http_client
323
+ else:
324
+ client = httpx.AsyncClient()
325
+ async with client:
326
+ response = await client.post(url, json=request_body, headers=headers, params=params, timeout=None)
308
327
  response = response.json()
309
328
  result = {
310
329
  "content": "",
311
330
  "usage": {
312
- "prompt_tokens": response["usageMetadata"]["promptTokenCount"],
313
- "completion_tokens": response["usageMetadata"]["candidatesTokenCount"],
314
- "total_tokens": response["usageMetadata"]["totalTokenCount"],
331
+ "prompt_tokens": response.get("usageMetadata", {}).get("promptTokenCount", 0),
332
+ "completion_tokens": response.get("usageMetadata", {}).get("candidatesTokenCount", 0),
333
+ "total_tokens": response.get("usageMetadata", {}).get("totalTokenCount", 0),
315
334
  },
316
335
  }
317
336
  tool_calls = []
@@ -48,6 +48,7 @@ class MiniMaxChatClient(BaseChatClient):
48
48
  context_length_control: ContextLengthControlType = defs.CONTEXT_LENGTH_CONTROL,
49
49
  random_endpoint: bool = True,
50
50
  endpoint_id: str = "",
51
+ http_client: httpx.Client | None = None,
51
52
  **kwargs,
52
53
  ):
53
54
  super().__init__(
@@ -57,9 +58,13 @@ class MiniMaxChatClient(BaseChatClient):
57
58
  context_length_control,
58
59
  random_endpoint,
59
60
  endpoint_id,
61
+ http_client,
60
62
  **kwargs,
61
63
  )
62
- self.http_client = httpx.Client()
64
+ if http_client:
65
+ self.http_client = http_client
66
+ else:
67
+ self.http_client = httpx.Client()
63
68
 
64
69
  def create_completion(
65
70
  self,
@@ -70,6 +75,7 @@ class MiniMaxChatClient(BaseChatClient):
70
75
  max_tokens: int | None = None,
71
76
  tools: list | None = None,
72
77
  tool_choice: str = "auto",
78
+ **kwargs,
73
79
  ):
74
80
  if model is not None:
75
81
  self.model = model
@@ -135,6 +141,7 @@ class MiniMaxChatClient(BaseChatClient):
135
141
  "stream": self.stream,
136
142
  "mask_sensitive_info": False,
137
143
  **tools_params,
144
+ **kwargs,
138
145
  }
139
146
 
140
147
  if self.stream:
@@ -206,6 +213,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
206
213
  context_length_control: ContextLengthControlType = defs.CONTEXT_LENGTH_CONTROL,
207
214
  random_endpoint: bool = True,
208
215
  endpoint_id: str = "",
216
+ http_client: httpx.AsyncClient | None = None,
209
217
  **kwargs,
210
218
  ):
211
219
  super().__init__(
@@ -215,9 +223,13 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
215
223
  context_length_control,
216
224
  random_endpoint,
217
225
  endpoint_id,
226
+ http_client,
218
227
  **kwargs,
219
228
  )
220
- self.http_client = httpx.AsyncClient()
229
+ if http_client:
230
+ self.http_client = http_client
231
+ else:
232
+ self.http_client = httpx.AsyncClient()
221
233
 
222
234
  async def create_completion(
223
235
  self,
@@ -228,6 +240,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
228
240
  max_tokens: int | None = None,
229
241
  tools: list | None = None,
230
242
  tool_choice: str = "auto",
243
+ **kwargs,
231
244
  ):
232
245
  if model is not None:
233
246
  self.model = model
@@ -291,6 +304,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
291
304
  "stream": self.stream,
292
305
  "mask_sensitive_info": False,
293
306
  **tools_params,
307
+ **kwargs,
294
308
  }
295
309
 
296
310
  if self.stream:
@@ -3,6 +3,7 @@
3
3
  import json
4
4
  import random
5
5
 
6
+ import httpx
6
7
  from openai._types import NotGiven, NOT_GIVEN
7
8
  from openai._streaming import Stream, AsyncStream
8
9
  from openai.types.chat import ChatCompletion, ChatCompletionChunk
@@ -33,6 +34,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
33
34
  context_length_control: ContextLengthControlType = defs.CONTEXT_LENGTH_CONTROL,
34
35
  random_endpoint: bool = True,
35
36
  endpoint_id: str = "",
37
+ http_client: httpx.Client | None = None,
36
38
  **kwargs,
37
39
  ):
38
40
  super().__init__(
@@ -42,6 +44,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
42
44
  context_length_control,
43
45
  random_endpoint,
44
46
  endpoint_id,
47
+ http_client,
45
48
  **kwargs,
46
49
  )
47
50
 
@@ -54,6 +57,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
54
57
  max_tokens: int | None = None,
55
58
  tools: list | NotGiven = NOT_GIVEN,
56
59
  tool_choice: str | NotGiven = NOT_GIVEN,
60
+ **kwargs,
57
61
  ):
58
62
  if model is not None:
59
63
  self.model = model
@@ -73,12 +77,14 @@ class OpenAICompatibleChatClient(BaseChatClient):
73
77
  self._client = AzureOpenAI(
74
78
  azure_endpoint=self.endpoint.api_base,
75
79
  api_key=self.endpoint.api_key,
76
- api_version="2024-05-01-preview",
80
+ api_version="2024-08-01-preview",
81
+ http_client=self.http_client,
77
82
  )
78
83
  else:
79
84
  self._client = OpenAI(
80
85
  api_key=self.endpoint.api_key,
81
86
  base_url=self.endpoint.api_base,
87
+ http_client=self.http_client,
82
88
  )
83
89
 
84
90
  if self.context_length_control == ContextLengthControlType.Latest:
@@ -120,6 +126,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
120
126
  temperature=self.temperature,
121
127
  max_tokens=max_tokens,
122
128
  **tools_params,
129
+ **kwargs,
123
130
  )
124
131
 
125
132
  if self.stream:
@@ -186,6 +193,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
186
193
  context_length_control: ContextLengthControlType = defs.CONTEXT_LENGTH_CONTROL,
187
194
  random_endpoint: bool = True,
188
195
  endpoint_id: str = "",
196
+ http_client: httpx.AsyncClient | None = None,
189
197
  **kwargs,
190
198
  ):
191
199
  super().__init__(
@@ -195,6 +203,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
195
203
  context_length_control,
196
204
  random_endpoint,
197
205
  endpoint_id,
206
+ http_client,
198
207
  **kwargs,
199
208
  )
200
209
 
@@ -207,6 +216,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
207
216
  max_tokens: int | None = None,
208
217
  tools: list | NotGiven = NOT_GIVEN,
209
218
  tool_choice: str | NotGiven = NOT_GIVEN,
219
+ **kwargs,
210
220
  ):
211
221
  if model is not None:
212
222
  self.model = model
@@ -226,12 +236,14 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
226
236
  self._client = AsyncAzureOpenAI(
227
237
  azure_endpoint=self.endpoint.api_base,
228
238
  api_key=self.endpoint.api_key,
229
- api_version="2024-05-01-preview",
239
+ api_version="2024-08-01-preview",
240
+ http_client=self.http_client,
230
241
  )
231
242
  else:
232
243
  self._client = AsyncOpenAI(
233
244
  api_key=self.endpoint.api_key,
234
245
  base_url=self.endpoint.api_base,
246
+ http_client=self.http_client,
235
247
  )
236
248
 
237
249
  if self.context_length_control == ContextLengthControlType.Latest:
@@ -273,6 +285,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
273
285
  temperature=self.temperature,
274
286
  max_tokens=max_tokens,
275
287
  **tools_params,
288
+ **kwargs,
276
289
  )
277
290
 
278
291
  if self.stream:
@@ -213,6 +213,13 @@ ZHIPUAI_MODELS = {
213
213
  "response_format_available": False,
214
214
  "max_output_tokens": 4095,
215
215
  },
216
+ "glm-4-plus": {
217
+ "id": "glm-4-plus",
218
+ "context_length": 128000,
219
+ "function_call_available": True,
220
+ "response_format_available": False,
221
+ "max_output_tokens": 4095,
222
+ },
216
223
  "glm-4-0520": {
217
224
  "id": "glm-4-0520",
218
225
  "context_length": 128000,
@@ -255,6 +262,13 @@ ZHIPUAI_MODELS = {
255
262
  "response_format_available": False,
256
263
  "max_output_tokens": 1024,
257
264
  },
265
+ "glm-4v-plus": {
266
+ "id": "glm-4v-plus",
267
+ "context_length": 2000,
268
+ "function_call_available": False,
269
+ "response_format_available": False,
270
+ "max_output_tokens": 1024,
271
+ },
258
272
  }
259
273
 
260
274
  # Mistral models
@@ -69,6 +69,7 @@ class ChatCompletionMessage(BaseModel):
69
69
 
70
70
  usage: Optional[Usage] = None
71
71
 
72
+
72
73
  class ChatCompletionDeltaMessage(BaseModel):
73
74
  content: Optional[str] = None
74
75
 
@@ -39,7 +39,7 @@ sample_settings = {
39
39
  },
40
40
  {
41
41
  "id": "deepseek-default",
42
- "api_base": "https://api.deepseek.com/v1",
42
+ "api_base": "https://api.deepseek.com/beta",
43
43
  "api_key": "",
44
44
  },
45
45
  {
@@ -80,6 +80,10 @@ sample_settings = {
80
80
  },
81
81
  "openai": {
82
82
  "models": {
83
+ "gpt-4o-mini": {
84
+ "id": "gpt-4o-mini",
85
+ "endpoints": ["azure-openai"],
86
+ },
83
87
  "gpt-4o": {
84
88
  "id": "gpt-4o",
85
89
  "endpoints": ["azure-openai"],
@@ -362,7 +366,7 @@ sample_settings = {
362
366
  },
363
367
  {
364
368
  "id": "deepseek-default",
365
- "api_base": "https://api.deepseek.com/v1",
369
+ "api_base": "https://api.deepseek.com/beta",
366
370
  "api_key": "sk-6dad42e7154743cd80b77dff5d0ecaaa",
367
371
  },
368
372
  {
@@ -0,0 +1,23 @@
1
+ # @Author: Bi Ying
2
+ # @Date: 2024-07-27 11:51:28
3
+ import time
4
+
5
+ from vectorvein.settings import settings
6
+ from vectorvein.types.enums import BackendType
7
+ from vectorvein.chat_clients import create_chat_client
8
+
9
+ from sample_settings import sample_settings
10
+
11
+ settings.load(sample_settings)
12
+ messages = [
13
+ {"role": "user", "content": "Please write quick sort code"},
14
+ {"role": "assistant", "content": "```python\n", "prefix": True},
15
+ ]
16
+
17
+
18
+ start_time = time.perf_counter()
19
+ client = create_chat_client(backend=BackendType.DeepSeek, model="deepseek-chat", stream=False)
20
+ response = client.create_completion(messages=messages, stop=["\n```"])
21
+ print(response)
22
+ end_time = time.perf_counter()
23
+ print(f"Stream time elapsed: {end_time - start_time} seconds")
@@ -0,0 +1,24 @@
1
+ # @Author: Bi Ying
2
+ # @Date: 2024-07-27 11:51:28
3
+ import time
4
+
5
+ import httpx
6
+ from vectorvein.settings import settings
7
+ from vectorvein.types.enums import BackendType
8
+ from vectorvein.chat_clients import create_chat_client
9
+
10
+ from sample_settings import sample_settings
11
+
12
+ settings.load(sample_settings)
13
+ messages = [
14
+ {"role": "user", "content": "Please write quick sort code"},
15
+ ]
16
+
17
+
18
+ start_time = time.perf_counter()
19
+ http_client = httpx.Client()
20
+ client = create_chat_client(backend=BackendType.DeepSeek, model="deepseek-chat", stream=False, http_client=http_client)
21
+ response = client.create_completion(messages=messages)
22
+ print(response)
23
+ end_time = time.perf_counter()
24
+ print(f"Stream time elapsed: {end_time - start_time} seconds")
@@ -0,0 +1,25 @@
1
+ # @Author: Bi Ying
2
+ # @Date: 2024-07-27 11:51:28
3
+ import time
4
+
5
+ from vectorvein.settings import settings
6
+ from vectorvein.types.enums import BackendType
7
+ from vectorvein.chat_clients import create_chat_client
8
+
9
+ from sample_settings import sample_settings
10
+
11
+ settings.load(sample_settings)
12
+ messages = [
13
+ {
14
+ "role": "user",
15
+ "content": "节点名称是 FileLoader,FileLoader 节点连到 OCR 节点,使用 mermaid 语法表示流程图。直接开始补全,不要有任何解释。\n\n```mermaid\n",
16
+ }
17
+ ]
18
+
19
+
20
+ start_time = time.perf_counter()
21
+ client = create_chat_client(backend=BackendType.DeepSeek, model="deepseek-chat", stream=False)
22
+ response = client.create_completion(messages=messages, stop=["\n```"])
23
+ print(response)
24
+ end_time = time.perf_counter()
25
+ print(f"Stream time elapsed: {end_time - start_time} seconds")
File without changes
File without changes