vectorvein 0.1.45__tar.gz → 0.1.47__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. {vectorvein-0.1.45 → vectorvein-0.1.47}/PKG-INFO +1 -1
  2. {vectorvein-0.1.45 → vectorvein-0.1.47}/pyproject.toml +1 -1
  3. {vectorvein-0.1.45 → vectorvein-0.1.47}/src/vectorvein/chat_clients/anthropic_client.py +29 -2
  4. {vectorvein-0.1.45 → vectorvein-0.1.47}/src/vectorvein/chat_clients/base_client.py +13 -0
  5. {vectorvein-0.1.45 → vectorvein-0.1.47}/src/vectorvein/chat_clients/gemini_client.py +21 -2
  6. {vectorvein-0.1.45 → vectorvein-0.1.47}/src/vectorvein/chat_clients/minimax_client.py +29 -4
  7. {vectorvein-0.1.45 → vectorvein-0.1.47}/src/vectorvein/chat_clients/openai_compatible_client.py +53 -14
  8. {vectorvein-0.1.45 → vectorvein-0.1.47}/src/vectorvein/types/defaults.py +1 -1
  9. {vectorvein-0.1.45 → vectorvein-0.1.47}/src/vectorvein/types/llm_parameters.py +2 -0
  10. {vectorvein-0.1.45 → vectorvein-0.1.47}/README.md +0 -0
  11. {vectorvein-0.1.45 → vectorvein-0.1.47}/src/vectorvein/__init__.py +0 -0
  12. {vectorvein-0.1.45 → vectorvein-0.1.47}/src/vectorvein/chat_clients/__init__.py +0 -0
  13. {vectorvein-0.1.45 → vectorvein-0.1.47}/src/vectorvein/chat_clients/baichuan_client.py +0 -0
  14. {vectorvein-0.1.45 → vectorvein-0.1.47}/src/vectorvein/chat_clients/deepseek_client.py +0 -0
  15. {vectorvein-0.1.45 → vectorvein-0.1.47}/src/vectorvein/chat_clients/groq_client.py +0 -0
  16. {vectorvein-0.1.45 → vectorvein-0.1.47}/src/vectorvein/chat_clients/local_client.py +0 -0
  17. {vectorvein-0.1.45 → vectorvein-0.1.47}/src/vectorvein/chat_clients/mistral_client.py +0 -0
  18. {vectorvein-0.1.45 → vectorvein-0.1.47}/src/vectorvein/chat_clients/moonshot_client.py +0 -0
  19. {vectorvein-0.1.45 → vectorvein-0.1.47}/src/vectorvein/chat_clients/openai_client.py +0 -0
  20. {vectorvein-0.1.45 → vectorvein-0.1.47}/src/vectorvein/chat_clients/py.typed +0 -0
  21. {vectorvein-0.1.45 → vectorvein-0.1.47}/src/vectorvein/chat_clients/qwen_client.py +0 -0
  22. {vectorvein-0.1.45 → vectorvein-0.1.47}/src/vectorvein/chat_clients/stepfun_client.py +0 -0
  23. {vectorvein-0.1.45 → vectorvein-0.1.47}/src/vectorvein/chat_clients/utils.py +0 -0
  24. {vectorvein-0.1.45 → vectorvein-0.1.47}/src/vectorvein/chat_clients/yi_client.py +0 -0
  25. {vectorvein-0.1.45 → vectorvein-0.1.47}/src/vectorvein/chat_clients/zhipuai_client.py +0 -0
  26. {vectorvein-0.1.45 → vectorvein-0.1.47}/src/vectorvein/py.typed +0 -0
  27. {vectorvein-0.1.45 → vectorvein-0.1.47}/src/vectorvein/server/token_server.py +0 -0
  28. {vectorvein-0.1.45 → vectorvein-0.1.47}/src/vectorvein/settings/__init__.py +0 -0
  29. {vectorvein-0.1.45 → vectorvein-0.1.47}/src/vectorvein/settings/py.typed +0 -0
  30. {vectorvein-0.1.45 → vectorvein-0.1.47}/src/vectorvein/types/enums.py +0 -0
  31. {vectorvein-0.1.45 → vectorvein-0.1.47}/src/vectorvein/types/exception.py +0 -0
  32. {vectorvein-0.1.45 → vectorvein-0.1.47}/src/vectorvein/types/py.typed +0 -0
  33. {vectorvein-0.1.45 → vectorvein-0.1.47}/src/vectorvein/utilities/media_processing.py +0 -0
  34. {vectorvein-0.1.45 → vectorvein-0.1.47}/src/vectorvein/utilities/retry.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vectorvein
3
- Version: 0.1.45
3
+ Version: 0.1.47
4
4
  Summary: Default template for PDM package
5
5
  Author-Email: Anderson <andersonby@163.com>
6
6
  License: MIT
@@ -17,7 +17,7 @@ description = "Default template for PDM package"
17
17
  name = "vectorvein"
18
18
  readme = "README.md"
19
19
  requires-python = ">=3.10"
20
- version = "0.1.45"
20
+ version = "0.1.47"
21
21
 
22
22
  [project.license]
23
23
  text = "MIT"
@@ -38,6 +38,7 @@ from ..types.llm_parameters import (
38
38
  ChatCompletionMessage,
39
39
  ChatCompletionToolParam,
40
40
  ChatCompletionDeltaMessage,
41
+ ChatCompletionStreamOptionsParam,
41
42
  )
42
43
 
43
44
 
@@ -214,6 +215,7 @@ class AnthropicChatClient(BaseChatClient):
214
215
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
215
216
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
216
217
  response_format: dict | None = None,
218
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
217
219
  **kwargs,
218
220
  ) -> ChatCompletionMessage:
219
221
  pass
@@ -229,6 +231,7 @@ class AnthropicChatClient(BaseChatClient):
229
231
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
230
232
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
231
233
  response_format: dict | None = None,
234
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
232
235
  **kwargs,
233
236
  ) -> Generator[ChatCompletionDeltaMessage, None, None]:
234
237
  pass
@@ -244,6 +247,7 @@ class AnthropicChatClient(BaseChatClient):
244
247
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
245
248
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
246
249
  response_format: dict | None = None,
250
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
247
251
  **kwargs,
248
252
  ) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
249
253
  pass
@@ -258,6 +262,7 @@ class AnthropicChatClient(BaseChatClient):
258
262
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
259
263
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
260
264
  response_format: dict | None = None,
265
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
261
266
  **kwargs,
262
267
  ):
263
268
  if model is not None:
@@ -315,7 +320,16 @@ class AnthropicChatClient(BaseChatClient):
315
320
  http_client=self.http_client,
316
321
  backend_name=self.BACKEND_NAME,
317
322
  ).create_completion(
318
- messages, model, False, temperature, max_tokens, _tools, _tool_choice, response_format, **kwargs
323
+ messages=messages,
324
+ model=model,
325
+ stream=False,
326
+ temperature=temperature,
327
+ max_tokens=max_tokens,
328
+ tools=_tools,
329
+ tool_choice=_tool_choice,
330
+ response_format=response_format,
331
+ stream_options=stream_options,
332
+ **kwargs,
319
333
  )
320
334
 
321
335
  assert isinstance(self.raw_client, Anthropic | AnthropicVertex)
@@ -554,6 +568,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
554
568
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
555
569
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
556
570
  response_format: dict | None = None,
571
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
557
572
  **kwargs,
558
573
  ) -> ChatCompletionMessage:
559
574
  pass
@@ -569,6 +584,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
569
584
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
570
585
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
571
586
  response_format: dict | None = None,
587
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
572
588
  **kwargs,
573
589
  ) -> AsyncGenerator[ChatCompletionDeltaMessage, Any]:
574
590
  pass
@@ -584,6 +600,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
584
600
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
585
601
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
586
602
  response_format: dict | None = None,
603
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
587
604
  **kwargs,
588
605
  ) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, Any]:
589
606
  pass
@@ -598,6 +615,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
598
615
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
599
616
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
600
617
  response_format: dict | None = None,
618
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
601
619
  **kwargs,
602
620
  ):
603
621
  if model is not None:
@@ -639,7 +657,16 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
639
657
  backend_name=self.BACKEND_NAME,
640
658
  )
641
659
  response = await client.create_completion(
642
- messages, model, True, temperature, max_tokens, _tools, _tool_choice, response_format, **kwargs
660
+ messages=messages,
661
+ model=model,
662
+ stream=True,
663
+ temperature=temperature,
664
+ max_tokens=max_tokens,
665
+ tools=_tools,
666
+ tool_choice=_tool_choice,
667
+ response_format=response_format,
668
+ stream_options=stream_options,
669
+ **kwargs,
643
670
  )
644
671
  async for chunk in response:
645
672
  yield chunk
@@ -18,6 +18,7 @@ from ..types.llm_parameters import (
18
18
  ToolChoice,
19
19
  ChatCompletionMessage,
20
20
  ChatCompletionDeltaMessage,
21
+ ChatCompletionStreamOptionsParam,
21
22
  )
22
23
 
23
24
 
@@ -71,6 +72,7 @@ class BaseChatClient(ABC):
71
72
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
72
73
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
73
74
  response_format: dict | None = None,
75
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
74
76
  **kwargs,
75
77
  ) -> ChatCompletionMessage:
76
78
  pass
@@ -87,6 +89,7 @@ class BaseChatClient(ABC):
87
89
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
88
90
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
89
91
  response_format: dict | None = None,
92
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
90
93
  **kwargs,
91
94
  ) -> Generator[ChatCompletionDeltaMessage, Any, None]:
92
95
  pass
@@ -103,6 +106,7 @@ class BaseChatClient(ABC):
103
106
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
104
107
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
105
108
  response_format: dict | None = None,
109
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
106
110
  **kwargs,
107
111
  ) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
108
112
  pass
@@ -118,6 +122,7 @@ class BaseChatClient(ABC):
118
122
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
119
123
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
120
124
  response_format: dict | None = None,
125
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
121
126
  **kwargs,
122
127
  ) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
123
128
  pass
@@ -131,6 +136,7 @@ class BaseChatClient(ABC):
131
136
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
132
137
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
133
138
  response_format: dict | None = None,
139
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
134
140
  **kwargs,
135
141
  ) -> Generator[ChatCompletionDeltaMessage, Any, None]:
136
142
  return self.create_completion(
@@ -142,6 +148,7 @@ class BaseChatClient(ABC):
142
148
  tools=tools,
143
149
  tool_choice=tool_choice,
144
150
  response_format=response_format,
151
+ stream_options=stream_options,
145
152
  **kwargs,
146
153
  )
147
154
 
@@ -198,6 +205,7 @@ class BaseAsyncChatClient(ABC):
198
205
  tools: list | NotGiven = NOT_GIVEN,
199
206
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
200
207
  response_format: dict | None = None,
208
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
201
209
  **kwargs,
202
210
  ) -> ChatCompletionMessage:
203
211
  pass
@@ -214,6 +222,7 @@ class BaseAsyncChatClient(ABC):
214
222
  tools: list | NotGiven = NOT_GIVEN,
215
223
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
216
224
  response_format: dict | None = None,
225
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
217
226
  **kwargs,
218
227
  ) -> AsyncGenerator[ChatCompletionDeltaMessage, None]:
219
228
  pass
@@ -230,6 +239,7 @@ class BaseAsyncChatClient(ABC):
230
239
  tools: list | NotGiven = NOT_GIVEN,
231
240
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
232
241
  response_format: dict | None = None,
242
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
233
243
  **kwargs,
234
244
  ) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, None]:
235
245
  pass
@@ -245,6 +255,7 @@ class BaseAsyncChatClient(ABC):
245
255
  tools: list | NotGiven = NOT_GIVEN,
246
256
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
247
257
  response_format: dict | None = None,
258
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
248
259
  **kwargs,
249
260
  ) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, None]:
250
261
  pass
@@ -258,6 +269,7 @@ class BaseAsyncChatClient(ABC):
258
269
  tools: list | NotGiven = NOT_GIVEN,
259
270
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
260
271
  response_format: dict | None = None,
272
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
261
273
  **kwargs,
262
274
  ) -> AsyncGenerator[ChatCompletionDeltaMessage, None]:
263
275
  return await self.create_completion(
@@ -269,5 +281,6 @@ class BaseAsyncChatClient(ABC):
269
281
  tools=tools,
270
282
  tool_choice=tool_choice,
271
283
  response_format=response_format,
284
+ stream_options=stream_options,
272
285
  **kwargs,
273
286
  )
@@ -19,6 +19,7 @@ from ..types.llm_parameters import (
19
19
  ToolChoice,
20
20
  ChatCompletionMessage,
21
21
  ChatCompletionDeltaMessage,
22
+ ChatCompletionStreamOptionsParam,
22
23
  )
23
24
 
24
25
 
@@ -63,6 +64,7 @@ class GeminiChatClient(BaseChatClient):
63
64
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
64
65
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
65
66
  response_format: dict | None = None,
67
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
66
68
  **kwargs,
67
69
  ) -> ChatCompletionMessage:
68
70
  pass
@@ -78,6 +80,7 @@ class GeminiChatClient(BaseChatClient):
78
80
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
79
81
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
80
82
  response_format: dict | None = None,
83
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
81
84
  **kwargs,
82
85
  ) -> Generator[ChatCompletionDeltaMessage, None, None]:
83
86
  pass
@@ -93,6 +96,7 @@ class GeminiChatClient(BaseChatClient):
93
96
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
94
97
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
95
98
  response_format: dict | None = None,
99
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
96
100
  **kwargs,
97
101
  ) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
98
102
  pass
@@ -107,6 +111,7 @@ class GeminiChatClient(BaseChatClient):
107
111
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
108
112
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
109
113
  response_format: dict | None = None,
114
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
110
115
  **kwargs,
111
116
  ):
112
117
  if model is not None:
@@ -144,7 +149,12 @@ class GeminiChatClient(BaseChatClient):
144
149
 
145
150
  if self.random_endpoint:
146
151
  self.random_endpoint = True
147
- self.endpoint_id = random.choice(self.backend_settings.models[self.model].endpoints)
152
+ endpoint_choice = random.choice(self.backend_settings.models[self.model].endpoints)
153
+ if isinstance(endpoint_choice, dict):
154
+ self.endpoint_id = endpoint_choice["endpoint_id"]
155
+ self.model_id = endpoint_choice["model_id"]
156
+ else:
157
+ self.endpoint_id = endpoint_choice
148
158
  self.endpoint = settings.get_endpoint(self.endpoint_id)
149
159
 
150
160
  request_body = {
@@ -291,6 +301,7 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
291
301
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
292
302
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
293
303
  response_format: dict | None = None,
304
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
294
305
  **kwargs,
295
306
  ) -> ChatCompletionMessage:
296
307
  pass
@@ -306,6 +317,7 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
306
317
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
307
318
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
308
319
  response_format: dict | None = None,
320
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
309
321
  **kwargs,
310
322
  ) -> AsyncGenerator[ChatCompletionDeltaMessage, Any]:
311
323
  pass
@@ -321,6 +333,7 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
321
333
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
322
334
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
323
335
  response_format: dict | None = None,
336
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
324
337
  **kwargs,
325
338
  ) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, Any]:
326
339
  pass
@@ -335,6 +348,7 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
335
348
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
336
349
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
337
350
  response_format: dict | None = None,
351
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
338
352
  **kwargs,
339
353
  ):
340
354
  if model is not None:
@@ -372,7 +386,12 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
372
386
 
373
387
  if self.random_endpoint:
374
388
  self.random_endpoint = True
375
- self.endpoint_id = random.choice(self.backend_settings.models[self.model].endpoints)
389
+ endpoint_choice = random.choice(self.backend_settings.models[self.model].endpoints)
390
+ if isinstance(endpoint_choice, dict):
391
+ self.endpoint_id = endpoint_choice["endpoint_id"]
392
+ self.model_id = endpoint_choice["model_id"]
393
+ else:
394
+ self.endpoint_id = endpoint_choice
376
395
  self.endpoint = settings.get_endpoint(self.endpoint_id)
377
396
 
378
397
  request_body = {
@@ -18,6 +18,7 @@ from ..types.llm_parameters import (
18
18
  ToolChoice,
19
19
  ChatCompletionMessage,
20
20
  ChatCompletionDeltaMessage,
21
+ ChatCompletionStreamOptionsParam,
21
22
  )
22
23
 
23
24
 
@@ -72,6 +73,7 @@ class MiniMaxChatClient(BaseChatClient):
72
73
  self.http_client = http_client
73
74
  else:
74
75
  self.http_client = httpx.Client()
76
+ self.model_id = None
75
77
 
76
78
  @cached_property
77
79
  def raw_client(self):
@@ -88,6 +90,7 @@ class MiniMaxChatClient(BaseChatClient):
88
90
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
89
91
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
90
92
  response_format: dict | None = None,
93
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
91
94
  **kwargs,
92
95
  ) -> ChatCompletionMessage:
93
96
  pass
@@ -103,6 +106,7 @@ class MiniMaxChatClient(BaseChatClient):
103
106
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
104
107
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
105
108
  response_format: dict | None = None,
109
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
106
110
  **kwargs,
107
111
  ) -> Generator[ChatCompletionDeltaMessage, None, None]:
108
112
  pass
@@ -118,6 +122,7 @@ class MiniMaxChatClient(BaseChatClient):
118
122
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
119
123
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
120
124
  response_format: dict | None = None,
125
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
121
126
  **kwargs,
122
127
  ) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
123
128
  pass
@@ -132,6 +137,7 @@ class MiniMaxChatClient(BaseChatClient):
132
137
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
133
138
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
134
139
  response_format: dict | None = None,
140
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
135
141
  **kwargs,
136
142
  ):
137
143
  if model is not None:
@@ -144,9 +150,16 @@ class MiniMaxChatClient(BaseChatClient):
144
150
  tool_choice = "auto"
145
151
 
146
152
  self.model_setting = self.backend_settings.models[self.model]
153
+ if self.model_id is None:
154
+ self.model_id = self.model_setting.id
147
155
  if self.random_endpoint:
148
156
  self.random_endpoint = True
149
- self.endpoint_id = random.choice(self.backend_settings.models[self.model].endpoints)
157
+ endpoint_choice = random.choice(self.backend_settings.models[self.model].endpoints)
158
+ if isinstance(endpoint_choice, dict):
159
+ self.endpoint_id = endpoint_choice["endpoint_id"]
160
+ self.model_id = endpoint_choice["model_id"]
161
+ else:
162
+ self.endpoint_id = endpoint_choice
150
163
  self.endpoint = settings.get_endpoint(self.endpoint_id)
151
164
 
152
165
  if self.context_length_control == ContextLengthControlType.Latest:
@@ -191,7 +204,7 @@ class MiniMaxChatClient(BaseChatClient):
191
204
  self.headers = {"Authorization": f"Bearer {self.endpoint.api_key}", "Content-Type": "application/json"}
192
205
 
193
206
  request_body = {
194
- "model": self.model,
207
+ "model": self.model_id,
195
208
  "messages": messages,
196
209
  "max_tokens": max_tokens,
197
210
  "temperature": self.temperature,
@@ -287,6 +300,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
287
300
  self.http_client = http_client
288
301
  else:
289
302
  self.http_client = httpx.AsyncClient()
303
+ self.model_id = None
290
304
 
291
305
  @cached_property
292
306
  def raw_client(self):
@@ -303,6 +317,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
303
317
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
304
318
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
305
319
  response_format: dict | None = None,
320
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
306
321
  **kwargs,
307
322
  ) -> ChatCompletionMessage:
308
323
  pass
@@ -318,6 +333,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
318
333
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
319
334
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
320
335
  response_format: dict | None = None,
336
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
321
337
  **kwargs,
322
338
  ) -> AsyncGenerator[ChatCompletionDeltaMessage, Any]:
323
339
  pass
@@ -333,6 +349,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
333
349
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
334
350
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
335
351
  response_format: dict | None = None,
352
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
336
353
  **kwargs,
337
354
  ) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, Any]:
338
355
  pass
@@ -347,6 +364,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
347
364
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
348
365
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
349
366
  response_format: dict | None = None,
367
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
350
368
  **kwargs,
351
369
  ):
352
370
  if model is not None:
@@ -359,9 +377,16 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
359
377
  tool_choice = "auto"
360
378
 
361
379
  self.model_setting = self.backend_settings.models[self.model]
380
+ if self.model_id is None:
381
+ self.model_id = self.model_setting.id
362
382
  if self.random_endpoint:
363
383
  self.random_endpoint = True
364
- self.endpoint_id = random.choice(self.backend_settings.models[self.model].endpoints)
384
+ endpoint_choice = random.choice(self.backend_settings.models[self.model].endpoints)
385
+ if isinstance(endpoint_choice, dict):
386
+ self.endpoint_id = endpoint_choice["endpoint_id"]
387
+ self.model_id = endpoint_choice["model_id"]
388
+ else:
389
+ self.endpoint_id = endpoint_choice
365
390
  self.endpoint = settings.get_endpoint(self.endpoint_id)
366
391
 
367
392
  if self.context_length_control == ContextLengthControlType.Latest:
@@ -404,7 +429,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
404
429
  self.headers = {"Authorization": f"Bearer {self.endpoint.api_key}", "Content-Type": "application/json"}
405
430
 
406
431
  request_body = {
407
- "model": self.model,
432
+ "model": self.model_id,
408
433
  "messages": messages,
409
434
  "max_tokens": max_tokens,
410
435
  "temperature": self.temperature,
@@ -6,8 +6,6 @@ from functools import cached_property
6
6
  from typing import overload, Generator, AsyncGenerator, Any, Literal, Iterable
7
7
 
8
8
  import httpx
9
- from openai._streaming import Stream, AsyncStream
10
- from openai.types.chat import ChatCompletion, ChatCompletionChunk
11
9
  from openai import OpenAI, AsyncOpenAI, AzureOpenAI, AsyncAzureOpenAI
12
10
 
13
11
  from .base_client import BaseChatClient, BaseAsyncChatClient
@@ -25,8 +23,11 @@ from ..types.llm_parameters import (
25
23
  NOT_GIVEN,
26
24
  ToolParam,
27
25
  ToolChoice,
26
+ OpenAINotGiven,
27
+ Usage,
28
28
  ChatCompletionMessage,
29
29
  ChatCompletionDeltaMessage,
30
+ ChatCompletionStreamOptionsParam,
30
31
  )
31
32
 
32
33
 
@@ -74,7 +75,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
74
75
  return AzureOpenAI(
75
76
  azure_endpoint=self.endpoint.api_base,
76
77
  api_key=self.endpoint.api_key,
77
- api_version="2024-08-01-preview",
78
+ api_version="2024-10-01-preview",
78
79
  http_client=self.http_client,
79
80
  )
80
81
  else:
@@ -95,6 +96,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
95
96
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
96
97
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
97
98
  response_format: dict | None = None,
99
+ stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
98
100
  **kwargs,
99
101
  ) -> ChatCompletionMessage:
100
102
  pass
@@ -110,6 +112,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
110
112
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
111
113
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
112
114
  response_format: dict | None = None,
115
+ stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
113
116
  **kwargs,
114
117
  ) -> Generator[ChatCompletionDeltaMessage, None, None]:
115
118
  pass
@@ -125,6 +128,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
125
128
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
126
129
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
127
130
  response_format: dict | None = None,
131
+ stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
128
132
  **kwargs,
129
133
  ) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
130
134
  pass
@@ -139,6 +143,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
139
143
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
140
144
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
141
145
  response_format: dict | None = None,
146
+ stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
142
147
  **kwargs,
143
148
  ):
144
149
  if model is not None:
@@ -189,13 +194,19 @@ class OpenAICompatibleChatClient(BaseChatClient):
189
194
  else:
190
195
  self.response_format = {}
191
196
 
197
+ if stream_options:
198
+ _stream_options_params = {"stream_options": stream_options}
199
+ else:
200
+ _stream_options_params = {}
201
+
192
202
  if self.stream:
193
- stream_response: Stream[ChatCompletionChunk] = raw_client.chat.completions.create(
203
+ stream_response = raw_client.chat.completions.create(
194
204
  model=self.model_id,
195
205
  messages=messages,
196
206
  stream=True,
197
207
  temperature=self.temperature,
198
208
  max_tokens=max_tokens,
209
+ **_stream_options_params,
199
210
  **self.response_format,
200
211
  **tools_params,
201
212
  **kwargs,
@@ -204,7 +215,16 @@ class OpenAICompatibleChatClient(BaseChatClient):
204
215
  def generator():
205
216
  full_content = ""
206
217
  result = {}
218
+ usage = None
207
219
  for chunk in stream_response:
220
+ if chunk.usage and chunk.usage.total_tokens:
221
+ usage = Usage(
222
+ completion_tokens=chunk.usage.completion_tokens or 0,
223
+ prompt_tokens=chunk.usage.prompt_tokens or 0,
224
+ total_tokens=chunk.usage.total_tokens or 0,
225
+ )
226
+ else:
227
+ usage = None
208
228
  if len(chunk.choices) == 0:
209
229
  continue
210
230
  if not chunk.choices[0].delta:
@@ -213,7 +233,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
213
233
  if chunk.choices[0].delta.tool_calls:
214
234
  for index, tool_call in enumerate(chunk.choices[0].delta.tool_calls):
215
235
  tool_call.index = index
216
- yield ChatCompletionDeltaMessage(**chunk.choices[0].delta.model_dump())
236
+ yield ChatCompletionDeltaMessage(**chunk.choices[0].delta.model_dump(), usage=usage)
217
237
  else:
218
238
  message = chunk.choices[0].delta.model_dump()
219
239
  full_content += message["content"] if message["content"] else ""
@@ -225,13 +245,13 @@ class OpenAICompatibleChatClient(BaseChatClient):
225
245
  message["content"] = ""
226
246
  result = message
227
247
  continue
228
- yield ChatCompletionDeltaMessage(**message)
248
+ yield ChatCompletionDeltaMessage(**message, usage=usage)
229
249
  if result:
230
- yield ChatCompletionDeltaMessage(**result)
250
+ yield ChatCompletionDeltaMessage(**result, usage=usage)
231
251
 
232
252
  return generator()
233
253
  else:
234
- response: ChatCompletion = raw_client.chat.completions.create(
254
+ response = raw_client.chat.completions.create(
235
255
  model=self.model_id,
236
256
  messages=messages,
237
257
  stream=False,
@@ -306,7 +326,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
306
326
  return AsyncAzureOpenAI(
307
327
  azure_endpoint=self.endpoint.api_base,
308
328
  api_key=self.endpoint.api_key,
309
- api_version="2024-08-01-preview",
329
+ api_version="2024-10-01-preview",
310
330
  http_client=self.http_client,
311
331
  )
312
332
  else:
@@ -327,6 +347,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
327
347
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
328
348
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
329
349
  response_format: dict | None = None,
350
+ stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
330
351
  **kwargs,
331
352
  ) -> ChatCompletionMessage:
332
353
  pass
@@ -342,6 +363,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
342
363
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
343
364
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
344
365
  response_format: dict | None = None,
366
+ stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
345
367
  **kwargs,
346
368
  ) -> AsyncGenerator[ChatCompletionDeltaMessage, Any]:
347
369
  pass
@@ -357,6 +379,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
357
379
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
358
380
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
359
381
  response_format: dict | None = None,
382
+ stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
360
383
  **kwargs,
361
384
  ) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, Any]:
362
385
  pass
@@ -371,6 +394,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
371
394
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
372
395
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
373
396
  response_format: dict | None = None,
397
+ stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
374
398
  **kwargs,
375
399
  ):
376
400
  if model is not None:
@@ -412,6 +436,11 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
412
436
  else:
413
437
  self.response_format = {}
414
438
 
439
+ if stream_options:
440
+ _stream_options_params = {"stream_options": stream_options}
441
+ else:
442
+ _stream_options_params = {}
443
+
415
444
  if max_tokens is None:
416
445
  max_output_tokens = self.model_setting.max_output_tokens
417
446
  token_counts = get_message_token_counts(messages=messages, tools=tools, model=self.model)
@@ -422,12 +451,13 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
422
451
  max_tokens = self.model_setting.context_length - token_counts - 64
423
452
 
424
453
  if self.stream:
425
- stream_response: AsyncStream[ChatCompletionChunk] = await raw_client.chat.completions.create(
454
+ stream_response = await raw_client.chat.completions.create(
426
455
  model=self.model_id,
427
456
  messages=messages,
428
457
  stream=self.stream,
429
458
  temperature=self.temperature,
430
459
  max_tokens=max_tokens,
460
+ **_stream_options_params,
431
461
  **self.response_format,
432
462
  **tools_params,
433
463
  **kwargs,
@@ -436,7 +466,16 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
436
466
  async def generator():
437
467
  full_content = ""
438
468
  result = {}
469
+ usage = None
439
470
  async for chunk in stream_response:
471
+ if chunk.usage and chunk.usage.total_tokens:
472
+ usage = Usage(
473
+ completion_tokens=chunk.usage.completion_tokens or 0,
474
+ prompt_tokens=chunk.usage.prompt_tokens or 0,
475
+ total_tokens=chunk.usage.total_tokens or 0,
476
+ )
477
+ else:
478
+ usage = None
440
479
  if len(chunk.choices) == 0:
441
480
  continue
442
481
  if not chunk.choices[0].delta:
@@ -445,7 +484,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
445
484
  if chunk.choices[0].delta.tool_calls:
446
485
  for index, tool_call in enumerate(chunk.choices[0].delta.tool_calls):
447
486
  tool_call.index = index
448
- yield ChatCompletionDeltaMessage(**chunk.choices[0].delta.model_dump())
487
+ yield ChatCompletionDeltaMessage(**chunk.choices[0].delta.model_dump(), usage=usage)
449
488
  else:
450
489
  message = chunk.choices[0].delta.model_dump()
451
490
  full_content += message["content"] if message["content"] else ""
@@ -457,13 +496,13 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
457
496
  message["content"] = ""
458
497
  result = message
459
498
  continue
460
- yield ChatCompletionDeltaMessage(**message)
499
+ yield ChatCompletionDeltaMessage(**message, usage=usage)
461
500
  if result:
462
- yield ChatCompletionDeltaMessage(**result)
501
+ yield ChatCompletionDeltaMessage(**result, usage=usage)
463
502
 
464
503
  return generator()
465
504
  else:
466
- response: ChatCompletion = await raw_client.chat.completions.create(
505
+ response = await raw_client.chat.completions.create(
467
506
  model=self.model_id,
468
507
  messages=messages,
469
508
  stream=self.stream,
@@ -526,7 +526,7 @@ OPENAI_MODELS: Final[Dict[str, Dict[str, Any]]] = {
526
526
  }
527
527
 
528
528
  # Anthropic models
529
- ANTHROPIC_DEFAULT_MODEL: Final[str] = "claude-3-5-sonnet-20240620"
529
+ ANTHROPIC_DEFAULT_MODEL: Final[str] = "claude-3-5-sonnet-20241022"
530
530
  ANTHROPIC_MODELS: Final[Dict[str, Dict[str, Any]]] = {
531
531
  "claude-3-opus-20240229": {
532
532
  "id": "claude-3-opus-20240229",
@@ -14,6 +14,7 @@ from openai._types import NOT_GIVEN as OPENAI_NOT_GIVEN
14
14
  from openai.types.chat.chat_completion_chunk import ChoiceDeltaToolCall
15
15
  from openai.types.chat.chat_completion_tool_param import ChatCompletionToolParam
16
16
  from openai.types.chat.chat_completion_message_tool_call import ChatCompletionMessageToolCall
17
+ from openai.types.chat.chat_completion_stream_options_param import ChatCompletionStreamOptionsParam
17
18
  from openai.types.chat.chat_completion_tool_choice_option_param import ChatCompletionToolChoiceOptionParam
18
19
 
19
20
  from . import defaults as defs
@@ -122,6 +123,7 @@ __all__ = [
122
123
  "Usage",
123
124
  "ChatCompletionMessage",
124
125
  "ChatCompletionDeltaMessage",
126
+ "ChatCompletionStreamOptionsParam",
125
127
  "NotGiven",
126
128
  "NOT_GIVEN",
127
129
  "OpenAIToolParam",
File without changes