vectorvein 0.1.45__tar.gz → 0.1.46__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. {vectorvein-0.1.45 → vectorvein-0.1.46}/PKG-INFO +1 -1
  2. {vectorvein-0.1.45 → vectorvein-0.1.46}/pyproject.toml +1 -1
  3. {vectorvein-0.1.45 → vectorvein-0.1.46}/src/vectorvein/chat_clients/anthropic_client.py +29 -2
  4. {vectorvein-0.1.45 → vectorvein-0.1.46}/src/vectorvein/chat_clients/base_client.py +12 -0
  5. {vectorvein-0.1.45 → vectorvein-0.1.46}/src/vectorvein/chat_clients/gemini_client.py +21 -2
  6. {vectorvein-0.1.45 → vectorvein-0.1.46}/src/vectorvein/chat_clients/minimax_client.py +29 -4
  7. {vectorvein-0.1.45 → vectorvein-0.1.46}/src/vectorvein/chat_clients/openai_compatible_client.py +49 -8
  8. {vectorvein-0.1.45 → vectorvein-0.1.46}/src/vectorvein/types/defaults.py +1 -1
  9. {vectorvein-0.1.45 → vectorvein-0.1.46}/src/vectorvein/types/llm_parameters.py +2 -0
  10. {vectorvein-0.1.45 → vectorvein-0.1.46}/README.md +0 -0
  11. {vectorvein-0.1.45 → vectorvein-0.1.46}/src/vectorvein/__init__.py +0 -0
  12. {vectorvein-0.1.45 → vectorvein-0.1.46}/src/vectorvein/chat_clients/__init__.py +0 -0
  13. {vectorvein-0.1.45 → vectorvein-0.1.46}/src/vectorvein/chat_clients/baichuan_client.py +0 -0
  14. {vectorvein-0.1.45 → vectorvein-0.1.46}/src/vectorvein/chat_clients/deepseek_client.py +0 -0
  15. {vectorvein-0.1.45 → vectorvein-0.1.46}/src/vectorvein/chat_clients/groq_client.py +0 -0
  16. {vectorvein-0.1.45 → vectorvein-0.1.46}/src/vectorvein/chat_clients/local_client.py +0 -0
  17. {vectorvein-0.1.45 → vectorvein-0.1.46}/src/vectorvein/chat_clients/mistral_client.py +0 -0
  18. {vectorvein-0.1.45 → vectorvein-0.1.46}/src/vectorvein/chat_clients/moonshot_client.py +0 -0
  19. {vectorvein-0.1.45 → vectorvein-0.1.46}/src/vectorvein/chat_clients/openai_client.py +0 -0
  20. {vectorvein-0.1.45 → vectorvein-0.1.46}/src/vectorvein/chat_clients/py.typed +0 -0
  21. {vectorvein-0.1.45 → vectorvein-0.1.46}/src/vectorvein/chat_clients/qwen_client.py +0 -0
  22. {vectorvein-0.1.45 → vectorvein-0.1.46}/src/vectorvein/chat_clients/stepfun_client.py +0 -0
  23. {vectorvein-0.1.45 → vectorvein-0.1.46}/src/vectorvein/chat_clients/utils.py +0 -0
  24. {vectorvein-0.1.45 → vectorvein-0.1.46}/src/vectorvein/chat_clients/yi_client.py +0 -0
  25. {vectorvein-0.1.45 → vectorvein-0.1.46}/src/vectorvein/chat_clients/zhipuai_client.py +0 -0
  26. {vectorvein-0.1.45 → vectorvein-0.1.46}/src/vectorvein/py.typed +0 -0
  27. {vectorvein-0.1.45 → vectorvein-0.1.46}/src/vectorvein/server/token_server.py +0 -0
  28. {vectorvein-0.1.45 → vectorvein-0.1.46}/src/vectorvein/settings/__init__.py +0 -0
  29. {vectorvein-0.1.45 → vectorvein-0.1.46}/src/vectorvein/settings/py.typed +0 -0
  30. {vectorvein-0.1.45 → vectorvein-0.1.46}/src/vectorvein/types/enums.py +0 -0
  31. {vectorvein-0.1.45 → vectorvein-0.1.46}/src/vectorvein/types/exception.py +0 -0
  32. {vectorvein-0.1.45 → vectorvein-0.1.46}/src/vectorvein/types/py.typed +0 -0
  33. {vectorvein-0.1.45 → vectorvein-0.1.46}/src/vectorvein/utilities/media_processing.py +0 -0
  34. {vectorvein-0.1.45 → vectorvein-0.1.46}/src/vectorvein/utilities/retry.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vectorvein
3
- Version: 0.1.45
3
+ Version: 0.1.46
4
4
  Summary: Default template for PDM package
5
5
  Author-Email: Anderson <andersonby@163.com>
6
6
  License: MIT
@@ -17,7 +17,7 @@ description = "Default template for PDM package"
17
17
  name = "vectorvein"
18
18
  readme = "README.md"
19
19
  requires-python = ">=3.10"
20
- version = "0.1.45"
20
+ version = "0.1.46"
21
21
 
22
22
  [project.license]
23
23
  text = "MIT"
@@ -38,6 +38,7 @@ from ..types.llm_parameters import (
38
38
  ChatCompletionMessage,
39
39
  ChatCompletionToolParam,
40
40
  ChatCompletionDeltaMessage,
41
+ ChatCompletionStreamOptionsParam,
41
42
  )
42
43
 
43
44
 
@@ -214,6 +215,7 @@ class AnthropicChatClient(BaseChatClient):
214
215
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
215
216
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
216
217
  response_format: dict | None = None,
218
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
217
219
  **kwargs,
218
220
  ) -> ChatCompletionMessage:
219
221
  pass
@@ -229,6 +231,7 @@ class AnthropicChatClient(BaseChatClient):
229
231
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
230
232
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
231
233
  response_format: dict | None = None,
234
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
232
235
  **kwargs,
233
236
  ) -> Generator[ChatCompletionDeltaMessage, None, None]:
234
237
  pass
@@ -244,6 +247,7 @@ class AnthropicChatClient(BaseChatClient):
244
247
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
245
248
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
246
249
  response_format: dict | None = None,
250
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
247
251
  **kwargs,
248
252
  ) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
249
253
  pass
@@ -258,6 +262,7 @@ class AnthropicChatClient(BaseChatClient):
258
262
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
259
263
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
260
264
  response_format: dict | None = None,
265
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
261
266
  **kwargs,
262
267
  ):
263
268
  if model is not None:
@@ -315,7 +320,16 @@ class AnthropicChatClient(BaseChatClient):
315
320
  http_client=self.http_client,
316
321
  backend_name=self.BACKEND_NAME,
317
322
  ).create_completion(
318
- messages, model, False, temperature, max_tokens, _tools, _tool_choice, response_format, **kwargs
323
+ messages=messages,
324
+ model=model,
325
+ stream=False,
326
+ temperature=temperature,
327
+ max_tokens=max_tokens,
328
+ tools=_tools,
329
+ tool_choice=_tool_choice,
330
+ response_format=response_format,
331
+ stream_options=stream_options,
332
+ **kwargs,
319
333
  )
320
334
 
321
335
  assert isinstance(self.raw_client, Anthropic | AnthropicVertex)
@@ -554,6 +568,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
554
568
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
555
569
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
556
570
  response_format: dict | None = None,
571
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
557
572
  **kwargs,
558
573
  ) -> ChatCompletionMessage:
559
574
  pass
@@ -569,6 +584,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
569
584
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
570
585
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
571
586
  response_format: dict | None = None,
587
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
572
588
  **kwargs,
573
589
  ) -> AsyncGenerator[ChatCompletionDeltaMessage, Any]:
574
590
  pass
@@ -584,6 +600,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
584
600
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
585
601
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
586
602
  response_format: dict | None = None,
603
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
587
604
  **kwargs,
588
605
  ) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, Any]:
589
606
  pass
@@ -598,6 +615,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
598
615
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
599
616
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
600
617
  response_format: dict | None = None,
618
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
601
619
  **kwargs,
602
620
  ):
603
621
  if model is not None:
@@ -639,7 +657,16 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
639
657
  backend_name=self.BACKEND_NAME,
640
658
  )
641
659
  response = await client.create_completion(
642
- messages, model, True, temperature, max_tokens, _tools, _tool_choice, response_format, **kwargs
660
+ messages=messages,
661
+ model=model,
662
+ stream=True,
663
+ temperature=temperature,
664
+ max_tokens=max_tokens,
665
+ tools=_tools,
666
+ tool_choice=_tool_choice,
667
+ response_format=response_format,
668
+ stream_options=stream_options,
669
+ **kwargs,
643
670
  )
644
671
  async for chunk in response:
645
672
  yield chunk
@@ -18,6 +18,7 @@ from ..types.llm_parameters import (
18
18
  ToolChoice,
19
19
  ChatCompletionMessage,
20
20
  ChatCompletionDeltaMessage,
21
+ ChatCompletionStreamOptionsParam,
21
22
  )
22
23
 
23
24
 
@@ -71,6 +72,7 @@ class BaseChatClient(ABC):
71
72
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
72
73
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
73
74
  response_format: dict | None = None,
75
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
74
76
  **kwargs,
75
77
  ) -> ChatCompletionMessage:
76
78
  pass
@@ -87,6 +89,7 @@ class BaseChatClient(ABC):
87
89
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
88
90
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
89
91
  response_format: dict | None = None,
92
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
90
93
  **kwargs,
91
94
  ) -> Generator[ChatCompletionDeltaMessage, Any, None]:
92
95
  pass
@@ -103,6 +106,7 @@ class BaseChatClient(ABC):
103
106
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
104
107
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
105
108
  response_format: dict | None = None,
109
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
106
110
  **kwargs,
107
111
  ) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
108
112
  pass
@@ -118,6 +122,7 @@ class BaseChatClient(ABC):
118
122
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
119
123
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
120
124
  response_format: dict | None = None,
125
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
121
126
  **kwargs,
122
127
  ) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
123
128
  pass
@@ -131,6 +136,7 @@ class BaseChatClient(ABC):
131
136
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
132
137
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
133
138
  response_format: dict | None = None,
139
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
134
140
  **kwargs,
135
141
  ) -> Generator[ChatCompletionDeltaMessage, Any, None]:
136
142
  return self.create_completion(
@@ -198,6 +204,7 @@ class BaseAsyncChatClient(ABC):
198
204
  tools: list | NotGiven = NOT_GIVEN,
199
205
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
200
206
  response_format: dict | None = None,
207
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
201
208
  **kwargs,
202
209
  ) -> ChatCompletionMessage:
203
210
  pass
@@ -214,6 +221,7 @@ class BaseAsyncChatClient(ABC):
214
221
  tools: list | NotGiven = NOT_GIVEN,
215
222
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
216
223
  response_format: dict | None = None,
224
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
217
225
  **kwargs,
218
226
  ) -> AsyncGenerator[ChatCompletionDeltaMessage, None]:
219
227
  pass
@@ -230,6 +238,7 @@ class BaseAsyncChatClient(ABC):
230
238
  tools: list | NotGiven = NOT_GIVEN,
231
239
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
232
240
  response_format: dict | None = None,
241
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
233
242
  **kwargs,
234
243
  ) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, None]:
235
244
  pass
@@ -245,6 +254,7 @@ class BaseAsyncChatClient(ABC):
245
254
  tools: list | NotGiven = NOT_GIVEN,
246
255
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
247
256
  response_format: dict | None = None,
257
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
248
258
  **kwargs,
249
259
  ) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, None]:
250
260
  pass
@@ -258,6 +268,7 @@ class BaseAsyncChatClient(ABC):
258
268
  tools: list | NotGiven = NOT_GIVEN,
259
269
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
260
270
  response_format: dict | None = None,
271
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
261
272
  **kwargs,
262
273
  ) -> AsyncGenerator[ChatCompletionDeltaMessage, None]:
263
274
  return await self.create_completion(
@@ -269,5 +280,6 @@ class BaseAsyncChatClient(ABC):
269
280
  tools=tools,
270
281
  tool_choice=tool_choice,
271
282
  response_format=response_format,
283
+ stream_options=stream_options,
272
284
  **kwargs,
273
285
  )
@@ -19,6 +19,7 @@ from ..types.llm_parameters import (
19
19
  ToolChoice,
20
20
  ChatCompletionMessage,
21
21
  ChatCompletionDeltaMessage,
22
+ ChatCompletionStreamOptionsParam,
22
23
  )
23
24
 
24
25
 
@@ -63,6 +64,7 @@ class GeminiChatClient(BaseChatClient):
63
64
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
64
65
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
65
66
  response_format: dict | None = None,
67
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
66
68
  **kwargs,
67
69
  ) -> ChatCompletionMessage:
68
70
  pass
@@ -78,6 +80,7 @@ class GeminiChatClient(BaseChatClient):
78
80
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
79
81
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
80
82
  response_format: dict | None = None,
83
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
81
84
  **kwargs,
82
85
  ) -> Generator[ChatCompletionDeltaMessage, None, None]:
83
86
  pass
@@ -93,6 +96,7 @@ class GeminiChatClient(BaseChatClient):
93
96
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
94
97
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
95
98
  response_format: dict | None = None,
99
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
96
100
  **kwargs,
97
101
  ) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
98
102
  pass
@@ -107,6 +111,7 @@ class GeminiChatClient(BaseChatClient):
107
111
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
108
112
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
109
113
  response_format: dict | None = None,
114
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
110
115
  **kwargs,
111
116
  ):
112
117
  if model is not None:
@@ -144,7 +149,12 @@ class GeminiChatClient(BaseChatClient):
144
149
 
145
150
  if self.random_endpoint:
146
151
  self.random_endpoint = True
147
- self.endpoint_id = random.choice(self.backend_settings.models[self.model].endpoints)
152
+ endpoint_choice = random.choice(self.backend_settings.models[self.model].endpoints)
153
+ if isinstance(endpoint_choice, dict):
154
+ self.endpoint_id = endpoint_choice["endpoint_id"]
155
+ self.model_id = endpoint_choice["model_id"]
156
+ else:
157
+ self.endpoint_id = endpoint_choice
148
158
  self.endpoint = settings.get_endpoint(self.endpoint_id)
149
159
 
150
160
  request_body = {
@@ -291,6 +301,7 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
291
301
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
292
302
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
293
303
  response_format: dict | None = None,
304
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
294
305
  **kwargs,
295
306
  ) -> ChatCompletionMessage:
296
307
  pass
@@ -306,6 +317,7 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
306
317
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
307
318
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
308
319
  response_format: dict | None = None,
320
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
309
321
  **kwargs,
310
322
  ) -> AsyncGenerator[ChatCompletionDeltaMessage, Any]:
311
323
  pass
@@ -321,6 +333,7 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
321
333
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
322
334
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
323
335
  response_format: dict | None = None,
336
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
324
337
  **kwargs,
325
338
  ) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, Any]:
326
339
  pass
@@ -335,6 +348,7 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
335
348
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
336
349
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
337
350
  response_format: dict | None = None,
351
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
338
352
  **kwargs,
339
353
  ):
340
354
  if model is not None:
@@ -372,7 +386,12 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
372
386
 
373
387
  if self.random_endpoint:
374
388
  self.random_endpoint = True
375
- self.endpoint_id = random.choice(self.backend_settings.models[self.model].endpoints)
389
+ endpoint_choice = random.choice(self.backend_settings.models[self.model].endpoints)
390
+ if isinstance(endpoint_choice, dict):
391
+ self.endpoint_id = endpoint_choice["endpoint_id"]
392
+ self.model_id = endpoint_choice["model_id"]
393
+ else:
394
+ self.endpoint_id = endpoint_choice
376
395
  self.endpoint = settings.get_endpoint(self.endpoint_id)
377
396
 
378
397
  request_body = {
@@ -18,6 +18,7 @@ from ..types.llm_parameters import (
18
18
  ToolChoice,
19
19
  ChatCompletionMessage,
20
20
  ChatCompletionDeltaMessage,
21
+ ChatCompletionStreamOptionsParam,
21
22
  )
22
23
 
23
24
 
@@ -72,6 +73,7 @@ class MiniMaxChatClient(BaseChatClient):
72
73
  self.http_client = http_client
73
74
  else:
74
75
  self.http_client = httpx.Client()
76
+ self.model_id = None
75
77
 
76
78
  @cached_property
77
79
  def raw_client(self):
@@ -88,6 +90,7 @@ class MiniMaxChatClient(BaseChatClient):
88
90
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
89
91
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
90
92
  response_format: dict | None = None,
93
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
91
94
  **kwargs,
92
95
  ) -> ChatCompletionMessage:
93
96
  pass
@@ -103,6 +106,7 @@ class MiniMaxChatClient(BaseChatClient):
103
106
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
104
107
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
105
108
  response_format: dict | None = None,
109
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
106
110
  **kwargs,
107
111
  ) -> Generator[ChatCompletionDeltaMessage, None, None]:
108
112
  pass
@@ -118,6 +122,7 @@ class MiniMaxChatClient(BaseChatClient):
118
122
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
119
123
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
120
124
  response_format: dict | None = None,
125
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
121
126
  **kwargs,
122
127
  ) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
123
128
  pass
@@ -132,6 +137,7 @@ class MiniMaxChatClient(BaseChatClient):
132
137
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
133
138
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
134
139
  response_format: dict | None = None,
140
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
135
141
  **kwargs,
136
142
  ):
137
143
  if model is not None:
@@ -144,9 +150,16 @@ class MiniMaxChatClient(BaseChatClient):
144
150
  tool_choice = "auto"
145
151
 
146
152
  self.model_setting = self.backend_settings.models[self.model]
153
+ if self.model_id is None:
154
+ self.model_id = self.model_setting.id
147
155
  if self.random_endpoint:
148
156
  self.random_endpoint = True
149
- self.endpoint_id = random.choice(self.backend_settings.models[self.model].endpoints)
157
+ endpoint_choice = random.choice(self.backend_settings.models[self.model].endpoints)
158
+ if isinstance(endpoint_choice, dict):
159
+ self.endpoint_id = endpoint_choice["endpoint_id"]
160
+ self.model_id = endpoint_choice["model_id"]
161
+ else:
162
+ self.endpoint_id = endpoint_choice
150
163
  self.endpoint = settings.get_endpoint(self.endpoint_id)
151
164
 
152
165
  if self.context_length_control == ContextLengthControlType.Latest:
@@ -191,7 +204,7 @@ class MiniMaxChatClient(BaseChatClient):
191
204
  self.headers = {"Authorization": f"Bearer {self.endpoint.api_key}", "Content-Type": "application/json"}
192
205
 
193
206
  request_body = {
194
- "model": self.model,
207
+ "model": self.model_id,
195
208
  "messages": messages,
196
209
  "max_tokens": max_tokens,
197
210
  "temperature": self.temperature,
@@ -287,6 +300,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
287
300
  self.http_client = http_client
288
301
  else:
289
302
  self.http_client = httpx.AsyncClient()
303
+ self.model_id = None
290
304
 
291
305
  @cached_property
292
306
  def raw_client(self):
@@ -303,6 +317,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
303
317
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
304
318
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
305
319
  response_format: dict | None = None,
320
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
306
321
  **kwargs,
307
322
  ) -> ChatCompletionMessage:
308
323
  pass
@@ -318,6 +333,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
318
333
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
319
334
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
320
335
  response_format: dict | None = None,
336
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
321
337
  **kwargs,
322
338
  ) -> AsyncGenerator[ChatCompletionDeltaMessage, Any]:
323
339
  pass
@@ -333,6 +349,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
333
349
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
334
350
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
335
351
  response_format: dict | None = None,
352
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
336
353
  **kwargs,
337
354
  ) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, Any]:
338
355
  pass
@@ -347,6 +364,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
347
364
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
348
365
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
349
366
  response_format: dict | None = None,
367
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
350
368
  **kwargs,
351
369
  ):
352
370
  if model is not None:
@@ -359,9 +377,16 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
359
377
  tool_choice = "auto"
360
378
 
361
379
  self.model_setting = self.backend_settings.models[self.model]
380
+ if self.model_id is None:
381
+ self.model_id = self.model_setting.id
362
382
  if self.random_endpoint:
363
383
  self.random_endpoint = True
364
- self.endpoint_id = random.choice(self.backend_settings.models[self.model].endpoints)
384
+ endpoint_choice = random.choice(self.backend_settings.models[self.model].endpoints)
385
+ if isinstance(endpoint_choice, dict):
386
+ self.endpoint_id = endpoint_choice["endpoint_id"]
387
+ self.model_id = endpoint_choice["model_id"]
388
+ else:
389
+ self.endpoint_id = endpoint_choice
365
390
  self.endpoint = settings.get_endpoint(self.endpoint_id)
366
391
 
367
392
  if self.context_length_control == ContextLengthControlType.Latest:
@@ -404,7 +429,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
404
429
  self.headers = {"Authorization": f"Bearer {self.endpoint.api_key}", "Content-Type": "application/json"}
405
430
 
406
431
  request_body = {
407
- "model": self.model,
432
+ "model": self.model_id,
408
433
  "messages": messages,
409
434
  "max_tokens": max_tokens,
410
435
  "temperature": self.temperature,
@@ -25,8 +25,11 @@ from ..types.llm_parameters import (
25
25
  NOT_GIVEN,
26
26
  ToolParam,
27
27
  ToolChoice,
28
+ OpenAINotGiven,
29
+ Usage,
28
30
  ChatCompletionMessage,
29
31
  ChatCompletionDeltaMessage,
32
+ ChatCompletionStreamOptionsParam,
30
33
  )
31
34
 
32
35
 
@@ -74,7 +77,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
74
77
  return AzureOpenAI(
75
78
  azure_endpoint=self.endpoint.api_base,
76
79
  api_key=self.endpoint.api_key,
77
- api_version="2024-08-01-preview",
80
+ api_version="2024-10-01-preview",
78
81
  http_client=self.http_client,
79
82
  )
80
83
  else:
@@ -95,6 +98,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
95
98
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
96
99
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
97
100
  response_format: dict | None = None,
101
+ stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
98
102
  **kwargs,
99
103
  ) -> ChatCompletionMessage:
100
104
  pass
@@ -110,6 +114,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
110
114
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
111
115
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
112
116
  response_format: dict | None = None,
117
+ stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
113
118
  **kwargs,
114
119
  ) -> Generator[ChatCompletionDeltaMessage, None, None]:
115
120
  pass
@@ -125,6 +130,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
125
130
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
126
131
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
127
132
  response_format: dict | None = None,
133
+ stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
128
134
  **kwargs,
129
135
  ) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
130
136
  pass
@@ -139,6 +145,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
139
145
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
140
146
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
141
147
  response_format: dict | None = None,
148
+ stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
142
149
  **kwargs,
143
150
  ):
144
151
  if model is not None:
@@ -189,6 +196,11 @@ class OpenAICompatibleChatClient(BaseChatClient):
189
196
  else:
190
197
  self.response_format = {}
191
198
 
199
+ if stream_options:
200
+ _stream_options_params = {"stream_options": stream_options}
201
+ else:
202
+ _stream_options_params = {}
203
+
192
204
  if self.stream:
193
205
  stream_response: Stream[ChatCompletionChunk] = raw_client.chat.completions.create(
194
206
  model=self.model_id,
@@ -196,6 +208,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
196
208
  stream=True,
197
209
  temperature=self.temperature,
198
210
  max_tokens=max_tokens,
211
+ **_stream_options_params,
199
212
  **self.response_format,
200
213
  **tools_params,
201
214
  **kwargs,
@@ -204,7 +217,16 @@ class OpenAICompatibleChatClient(BaseChatClient):
204
217
  def generator():
205
218
  full_content = ""
206
219
  result = {}
220
+ usage = None
207
221
  for chunk in stream_response:
222
+ if chunk.usage and chunk.usage.total_tokens:
223
+ usage = Usage(
224
+ completion_tokens=chunk.usage.completion_tokens or 0,
225
+ prompt_tokens=chunk.usage.prompt_tokens or 0,
226
+ total_tokens=chunk.usage.total_tokens or 0,
227
+ )
228
+ else:
229
+ usage = None
208
230
  if len(chunk.choices) == 0:
209
231
  continue
210
232
  if not chunk.choices[0].delta:
@@ -213,7 +235,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
213
235
  if chunk.choices[0].delta.tool_calls:
214
236
  for index, tool_call in enumerate(chunk.choices[0].delta.tool_calls):
215
237
  tool_call.index = index
216
- yield ChatCompletionDeltaMessage(**chunk.choices[0].delta.model_dump())
238
+ yield ChatCompletionDeltaMessage(**chunk.choices[0].delta.model_dump(), usage=usage)
217
239
  else:
218
240
  message = chunk.choices[0].delta.model_dump()
219
241
  full_content += message["content"] if message["content"] else ""
@@ -225,9 +247,9 @@ class OpenAICompatibleChatClient(BaseChatClient):
225
247
  message["content"] = ""
226
248
  result = message
227
249
  continue
228
- yield ChatCompletionDeltaMessage(**message)
250
+ yield ChatCompletionDeltaMessage(**message, usage=usage)
229
251
  if result:
230
- yield ChatCompletionDeltaMessage(**result)
252
+ yield ChatCompletionDeltaMessage(**result, usage=usage)
231
253
 
232
254
  return generator()
233
255
  else:
@@ -306,7 +328,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
306
328
  return AsyncAzureOpenAI(
307
329
  azure_endpoint=self.endpoint.api_base,
308
330
  api_key=self.endpoint.api_key,
309
- api_version="2024-08-01-preview",
331
+ api_version="2024-10-01-preview",
310
332
  http_client=self.http_client,
311
333
  )
312
334
  else:
@@ -327,6 +349,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
327
349
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
328
350
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
329
351
  response_format: dict | None = None,
352
+ stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
330
353
  **kwargs,
331
354
  ) -> ChatCompletionMessage:
332
355
  pass
@@ -342,6 +365,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
342
365
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
343
366
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
344
367
  response_format: dict | None = None,
368
+ stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
345
369
  **kwargs,
346
370
  ) -> AsyncGenerator[ChatCompletionDeltaMessage, Any]:
347
371
  pass
@@ -357,6 +381,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
357
381
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
358
382
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
359
383
  response_format: dict | None = None,
384
+ stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
360
385
  **kwargs,
361
386
  ) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, Any]:
362
387
  pass
@@ -371,6 +396,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
371
396
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
372
397
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
373
398
  response_format: dict | None = None,
399
+ stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
374
400
  **kwargs,
375
401
  ):
376
402
  if model is not None:
@@ -412,6 +438,11 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
412
438
  else:
413
439
  self.response_format = {}
414
440
 
441
+ if stream_options:
442
+ _stream_options_params = {"stream_options": stream_options}
443
+ else:
444
+ _stream_options_params = {}
445
+
415
446
  if max_tokens is None:
416
447
  max_output_tokens = self.model_setting.max_output_tokens
417
448
  token_counts = get_message_token_counts(messages=messages, tools=tools, model=self.model)
@@ -428,6 +459,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
428
459
  stream=self.stream,
429
460
  temperature=self.temperature,
430
461
  max_tokens=max_tokens,
462
+ **_stream_options_params,
431
463
  **self.response_format,
432
464
  **tools_params,
433
465
  **kwargs,
@@ -436,7 +468,16 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
436
468
  async def generator():
437
469
  full_content = ""
438
470
  result = {}
471
+ usage = None
439
472
  async for chunk in stream_response:
473
+ if chunk.usage and chunk.usage.total_tokens:
474
+ usage = Usage(
475
+ completion_tokens=chunk.usage.completion_tokens or 0,
476
+ prompt_tokens=chunk.usage.prompt_tokens or 0,
477
+ total_tokens=chunk.usage.total_tokens or 0,
478
+ )
479
+ else:
480
+ usage = None
440
481
  if len(chunk.choices) == 0:
441
482
  continue
442
483
  if not chunk.choices[0].delta:
@@ -445,7 +486,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
445
486
  if chunk.choices[0].delta.tool_calls:
446
487
  for index, tool_call in enumerate(chunk.choices[0].delta.tool_calls):
447
488
  tool_call.index = index
448
- yield ChatCompletionDeltaMessage(**chunk.choices[0].delta.model_dump())
489
+ yield ChatCompletionDeltaMessage(**chunk.choices[0].delta.model_dump(), usage=usage)
449
490
  else:
450
491
  message = chunk.choices[0].delta.model_dump()
451
492
  full_content += message["content"] if message["content"] else ""
@@ -457,9 +498,9 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
457
498
  message["content"] = ""
458
499
  result = message
459
500
  continue
460
- yield ChatCompletionDeltaMessage(**message)
501
+ yield ChatCompletionDeltaMessage(**message, usage=usage)
461
502
  if result:
462
- yield ChatCompletionDeltaMessage(**result)
503
+ yield ChatCompletionDeltaMessage(**result, usage=usage)
463
504
 
464
505
  return generator()
465
506
  else:
@@ -526,7 +526,7 @@ OPENAI_MODELS: Final[Dict[str, Dict[str, Any]]] = {
526
526
  }
527
527
 
528
528
  # Anthropic models
529
- ANTHROPIC_DEFAULT_MODEL: Final[str] = "claude-3-5-sonnet-20240620"
529
+ ANTHROPIC_DEFAULT_MODEL: Final[str] = "claude-3-5-sonnet-20241022"
530
530
  ANTHROPIC_MODELS: Final[Dict[str, Dict[str, Any]]] = {
531
531
  "claude-3-opus-20240229": {
532
532
  "id": "claude-3-opus-20240229",
@@ -14,6 +14,7 @@ from openai._types import NOT_GIVEN as OPENAI_NOT_GIVEN
14
14
  from openai.types.chat.chat_completion_chunk import ChoiceDeltaToolCall
15
15
  from openai.types.chat.chat_completion_tool_param import ChatCompletionToolParam
16
16
  from openai.types.chat.chat_completion_message_tool_call import ChatCompletionMessageToolCall
17
+ from openai.types.chat.chat_completion_stream_options_param import ChatCompletionStreamOptionsParam
17
18
  from openai.types.chat.chat_completion_tool_choice_option_param import ChatCompletionToolChoiceOptionParam
18
19
 
19
20
  from . import defaults as defs
@@ -122,6 +123,7 @@ __all__ = [
122
123
  "Usage",
123
124
  "ChatCompletionMessage",
124
125
  "ChatCompletionDeltaMessage",
126
+ "ChatCompletionStreamOptionsParam",
125
127
  "NotGiven",
126
128
  "NOT_GIVEN",
127
129
  "OpenAIToolParam",
File without changes