vectorvein 0.1.56__tar.gz → 0.1.57__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. {vectorvein-0.1.56 → vectorvein-0.1.57}/PKG-INFO +1 -1
  2. {vectorvein-0.1.56 → vectorvein-0.1.57}/pyproject.toml +1 -1
  3. {vectorvein-0.1.56 → vectorvein-0.1.57}/src/vectorvein/chat_clients/anthropic_client.py +28 -8
  4. {vectorvein-0.1.56 → vectorvein-0.1.57}/src/vectorvein/chat_clients/base_client.py +10 -0
  5. {vectorvein-0.1.56 → vectorvein-0.1.57}/src/vectorvein/chat_clients/gemini_client.py +24 -4
  6. {vectorvein-0.1.56 → vectorvein-0.1.57}/src/vectorvein/chat_clients/minimax_client.py +20 -0
  7. {vectorvein-0.1.56 → vectorvein-0.1.57}/src/vectorvein/chat_clients/openai_compatible_client.py +17 -0
  8. {vectorvein-0.1.56 → vectorvein-0.1.57}/src/vectorvein/chat_clients/utils.py +6 -1
  9. {vectorvein-0.1.56 → vectorvein-0.1.57}/src/vectorvein/types/defaults.py +25 -1
  10. {vectorvein-0.1.56 → vectorvein-0.1.57}/README.md +0 -0
  11. {vectorvein-0.1.56 → vectorvein-0.1.57}/src/vectorvein/__init__.py +0 -0
  12. {vectorvein-0.1.56 → vectorvein-0.1.57}/src/vectorvein/chat_clients/__init__.py +0 -0
  13. {vectorvein-0.1.56 → vectorvein-0.1.57}/src/vectorvein/chat_clients/baichuan_client.py +0 -0
  14. {vectorvein-0.1.56 → vectorvein-0.1.57}/src/vectorvein/chat_clients/deepseek_client.py +0 -0
  15. {vectorvein-0.1.56 → vectorvein-0.1.57}/src/vectorvein/chat_clients/groq_client.py +0 -0
  16. {vectorvein-0.1.56 → vectorvein-0.1.57}/src/vectorvein/chat_clients/local_client.py +0 -0
  17. {vectorvein-0.1.56 → vectorvein-0.1.57}/src/vectorvein/chat_clients/mistral_client.py +0 -0
  18. {vectorvein-0.1.56 → vectorvein-0.1.57}/src/vectorvein/chat_clients/moonshot_client.py +0 -0
  19. {vectorvein-0.1.56 → vectorvein-0.1.57}/src/vectorvein/chat_clients/openai_client.py +0 -0
  20. {vectorvein-0.1.56 → vectorvein-0.1.57}/src/vectorvein/chat_clients/py.typed +0 -0
  21. {vectorvein-0.1.56 → vectorvein-0.1.57}/src/vectorvein/chat_clients/qwen_client.py +0 -0
  22. {vectorvein-0.1.56 → vectorvein-0.1.57}/src/vectorvein/chat_clients/stepfun_client.py +0 -0
  23. {vectorvein-0.1.56 → vectorvein-0.1.57}/src/vectorvein/chat_clients/xai_client.py +0 -0
  24. {vectorvein-0.1.56 → vectorvein-0.1.57}/src/vectorvein/chat_clients/yi_client.py +0 -0
  25. {vectorvein-0.1.56 → vectorvein-0.1.57}/src/vectorvein/chat_clients/zhipuai_client.py +0 -0
  26. {vectorvein-0.1.56 → vectorvein-0.1.57}/src/vectorvein/py.typed +0 -0
  27. {vectorvein-0.1.56 → vectorvein-0.1.57}/src/vectorvein/server/token_server.py +0 -0
  28. {vectorvein-0.1.56 → vectorvein-0.1.57}/src/vectorvein/settings/__init__.py +0 -0
  29. {vectorvein-0.1.56 → vectorvein-0.1.57}/src/vectorvein/settings/py.typed +0 -0
  30. {vectorvein-0.1.56 → vectorvein-0.1.57}/src/vectorvein/types/enums.py +0 -0
  31. {vectorvein-0.1.56 → vectorvein-0.1.57}/src/vectorvein/types/exception.py +0 -0
  32. {vectorvein-0.1.56 → vectorvein-0.1.57}/src/vectorvein/types/llm_parameters.py +0 -0
  33. {vectorvein-0.1.56 → vectorvein-0.1.57}/src/vectorvein/types/py.typed +0 -0
  34. {vectorvein-0.1.56 → vectorvein-0.1.57}/src/vectorvein/utilities/media_processing.py +0 -0
  35. {vectorvein-0.1.56 → vectorvein-0.1.57}/src/vectorvein/utilities/retry.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vectorvein
3
- Version: 0.1.56
3
+ Version: 0.1.57
4
4
  Summary: VectorVein python SDK
5
5
  Author-Email: Anderson <andersonby@163.com>
6
6
  License: MIT
@@ -17,7 +17,7 @@ description = "VectorVein python SDK"
17
17
  name = "vectorvein"
18
18
  readme = "README.md"
19
19
  requires-python = ">=3.10"
20
- version = "0.1.56"
20
+ version = "0.1.57"
21
21
 
22
22
  [project.license]
23
23
  text = "MIT"
@@ -249,6 +249,7 @@ class AnthropicChatClient(BaseChatClient):
249
249
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
250
250
  response_format: dict | None = None,
251
251
  stream_options: ChatCompletionStreamOptionsParam | None = None,
252
+ top_p: float | NotGiven | None = NOT_GIVEN,
252
253
  **kwargs,
253
254
  ) -> ChatCompletionMessage:
254
255
  pass
@@ -265,6 +266,7 @@ class AnthropicChatClient(BaseChatClient):
265
266
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
266
267
  response_format: dict | None = None,
267
268
  stream_options: ChatCompletionStreamOptionsParam | None = None,
269
+ top_p: float | NotGiven | None = NOT_GIVEN,
268
270
  **kwargs,
269
271
  ) -> Generator[ChatCompletionDeltaMessage, None, None]:
270
272
  pass
@@ -281,6 +283,7 @@ class AnthropicChatClient(BaseChatClient):
281
283
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
282
284
  response_format: dict | None = None,
283
285
  stream_options: ChatCompletionStreamOptionsParam | None = None,
286
+ top_p: float | NotGiven | None = NOT_GIVEN,
284
287
  **kwargs,
285
288
  ) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
286
289
  pass
@@ -296,6 +299,7 @@ class AnthropicChatClient(BaseChatClient):
296
299
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
297
300
  response_format: dict | None = None,
298
301
  stream_options: ChatCompletionStreamOptionsParam | None = None,
302
+ top_p: float | NotGiven | None = NOT_GIVEN,
299
303
  **kwargs,
300
304
  ):
301
305
  if model is not None:
@@ -304,10 +308,6 @@ class AnthropicChatClient(BaseChatClient):
304
308
  self.stream = stream
305
309
  if temperature is not None:
306
310
  self.temperature = temperature
307
- if isinstance(tools, OpenAINotGiven):
308
- tools = NOT_GIVEN
309
- if isinstance(tool_choice, OpenAINotGiven):
310
- tool_choice = NOT_GIVEN
311
311
 
312
312
  if self.random_endpoint:
313
313
  self.random_endpoint = True
@@ -348,6 +348,7 @@ class AnthropicChatClient(BaseChatClient):
348
348
  tool_choice=_tool_choice,
349
349
  response_format=response_format,
350
350
  stream_options=stream_options,
351
+ top_p=top_p,
351
352
  **kwargs,
352
353
  )
353
354
  for chunk in response:
@@ -374,11 +375,19 @@ class AnthropicChatClient(BaseChatClient):
374
375
  tool_choice=_tool_choice,
375
376
  response_format=response_format,
376
377
  stream_options=stream_options,
378
+ top_p=top_p,
377
379
  **kwargs,
378
380
  )
379
381
 
380
382
  assert isinstance(self.raw_client, Anthropic | AnthropicVertex)
381
383
 
384
+ if isinstance(tools, OpenAINotGiven):
385
+ tools = NOT_GIVEN
386
+ if isinstance(tool_choice, OpenAINotGiven):
387
+ tool_choice = NOT_GIVEN
388
+ if isinstance(top_p, OpenAINotGiven) or top_p is None:
389
+ top_p = NOT_GIVEN
390
+
382
391
  raw_client = self.raw_client # 调用完 self.raw_client 后,self.model_id 会被赋值
383
392
  self.model_setting = self.backend_settings.models[self.model]
384
393
  if self.model_id is None:
@@ -424,6 +433,7 @@ class AnthropicChatClient(BaseChatClient):
424
433
  max_tokens=max_tokens,
425
434
  tools=tools_params,
426
435
  tool_choice=tool_choice_param,
436
+ top_p=top_p,
427
437
  **kwargs,
428
438
  )
429
439
 
@@ -486,6 +496,7 @@ class AnthropicChatClient(BaseChatClient):
486
496
  max_tokens=max_tokens,
487
497
  tools=tools_params,
488
498
  tool_choice=tool_choice_param,
499
+ top_p=top_p,
489
500
  **kwargs,
490
501
  )
491
502
 
@@ -614,6 +625,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
614
625
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
615
626
  response_format: dict | None = None,
616
627
  stream_options: ChatCompletionStreamOptionsParam | None = None,
628
+ top_p: float | NotGiven | None = NOT_GIVEN,
617
629
  **kwargs,
618
630
  ) -> ChatCompletionMessage:
619
631
  pass
@@ -630,6 +642,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
630
642
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
631
643
  response_format: dict | None = None,
632
644
  stream_options: ChatCompletionStreamOptionsParam | None = None,
645
+ top_p: float | NotGiven | None = NOT_GIVEN,
633
646
  **kwargs,
634
647
  ) -> AsyncGenerator[ChatCompletionDeltaMessage, Any]:
635
648
  pass
@@ -646,6 +659,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
646
659
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
647
660
  response_format: dict | None = None,
648
661
  stream_options: ChatCompletionStreamOptionsParam | None = None,
662
+ top_p: float | NotGiven | None = NOT_GIVEN,
649
663
  **kwargs,
650
664
  ) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, Any]:
651
665
  pass
@@ -661,6 +675,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
661
675
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
662
676
  response_format: dict | None = None,
663
677
  stream_options: ChatCompletionStreamOptionsParam | None = None,
678
+ top_p: float | NotGiven | None = NOT_GIVEN,
664
679
  **kwargs,
665
680
  ):
666
681
  if model is not None:
@@ -669,10 +684,6 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
669
684
  self.stream = stream
670
685
  if temperature is not None:
671
686
  self.temperature = temperature
672
- if isinstance(tools, OpenAINotGiven):
673
- tools = NOT_GIVEN
674
- if isinstance(tool_choice, OpenAINotGiven):
675
- tool_choice = NOT_GIVEN
676
687
 
677
688
  if self.random_endpoint:
678
689
  self.random_endpoint = True
@@ -745,6 +756,13 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
745
756
 
746
757
  assert isinstance(self.raw_client, AsyncAnthropic | AsyncAnthropicVertex)
747
758
 
759
+ if isinstance(tools, OpenAINotGiven):
760
+ tools = NOT_GIVEN
761
+ if isinstance(tool_choice, OpenAINotGiven):
762
+ tool_choice = NOT_GIVEN
763
+ if isinstance(top_p, OpenAINotGiven) or top_p is None:
764
+ top_p = NOT_GIVEN
765
+
748
766
  raw_client = self.raw_client # 调用完 self.raw_client 后,self.model_id 会被赋值
749
767
  self.model_setting = self.backend_settings.models[self.model]
750
768
  if self.model_id is None:
@@ -790,6 +808,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
790
808
  max_tokens=max_tokens,
791
809
  tools=tools_params,
792
810
  tool_choice=tool_choice_param,
811
+ top_p=top_p,
793
812
  **kwargs,
794
813
  )
795
814
 
@@ -852,6 +871,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
852
871
  max_tokens=max_tokens,
853
872
  tools=tools_params,
854
873
  tool_choice=tool_choice_param,
874
+ top_p=top_p,
855
875
  **kwargs,
856
876
  )
857
877
 
@@ -73,6 +73,7 @@ class BaseChatClient(ABC):
73
73
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
74
74
  response_format: dict | None = None,
75
75
  stream_options: ChatCompletionStreamOptionsParam | None = None,
76
+ top_p: float | NotGiven | None = NOT_GIVEN,
76
77
  **kwargs,
77
78
  ) -> ChatCompletionMessage:
78
79
  pass
@@ -90,6 +91,7 @@ class BaseChatClient(ABC):
90
91
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
91
92
  response_format: dict | None = None,
92
93
  stream_options: ChatCompletionStreamOptionsParam | None = None,
94
+ top_p: float | NotGiven | None = NOT_GIVEN,
93
95
  **kwargs,
94
96
  ) -> Generator[ChatCompletionDeltaMessage, Any, None]:
95
97
  pass
@@ -107,6 +109,7 @@ class BaseChatClient(ABC):
107
109
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
108
110
  response_format: dict | None = None,
109
111
  stream_options: ChatCompletionStreamOptionsParam | None = None,
112
+ top_p: float | NotGiven | None = NOT_GIVEN,
110
113
  **kwargs,
111
114
  ) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
112
115
  pass
@@ -123,6 +126,7 @@ class BaseChatClient(ABC):
123
126
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
124
127
  response_format: dict | None = None,
125
128
  stream_options: ChatCompletionStreamOptionsParam | None = None,
129
+ top_p: float | NotGiven | None = NOT_GIVEN,
126
130
  **kwargs,
127
131
  ) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
128
132
  pass
@@ -137,6 +141,7 @@ class BaseChatClient(ABC):
137
141
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
138
142
  response_format: dict | None = None,
139
143
  stream_options: ChatCompletionStreamOptionsParam | None = None,
144
+ top_p: float | NotGiven | None = NOT_GIVEN,
140
145
  **kwargs,
141
146
  ) -> Generator[ChatCompletionDeltaMessage, Any, None]:
142
147
  return self.create_completion(
@@ -206,6 +211,7 @@ class BaseAsyncChatClient(ABC):
206
211
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
207
212
  response_format: dict | None = None,
208
213
  stream_options: ChatCompletionStreamOptionsParam | None = None,
214
+ top_p: float | NotGiven | None = NOT_GIVEN,
209
215
  **kwargs,
210
216
  ) -> ChatCompletionMessage:
211
217
  pass
@@ -223,6 +229,7 @@ class BaseAsyncChatClient(ABC):
223
229
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
224
230
  response_format: dict | None = None,
225
231
  stream_options: ChatCompletionStreamOptionsParam | None = None,
232
+ top_p: float | NotGiven | None = NOT_GIVEN,
226
233
  **kwargs,
227
234
  ) -> AsyncGenerator[ChatCompletionDeltaMessage, None]:
228
235
  pass
@@ -240,6 +247,7 @@ class BaseAsyncChatClient(ABC):
240
247
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
241
248
  response_format: dict | None = None,
242
249
  stream_options: ChatCompletionStreamOptionsParam | None = None,
250
+ top_p: float | NotGiven | None = NOT_GIVEN,
243
251
  **kwargs,
244
252
  ) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, None]:
245
253
  pass
@@ -256,6 +264,7 @@ class BaseAsyncChatClient(ABC):
256
264
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
257
265
  response_format: dict | None = None,
258
266
  stream_options: ChatCompletionStreamOptionsParam | None = None,
267
+ top_p: float | NotGiven | None = NOT_GIVEN,
259
268
  **kwargs,
260
269
  ) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, None]:
261
270
  pass
@@ -270,6 +279,7 @@ class BaseAsyncChatClient(ABC):
270
279
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
271
280
  response_format: dict | None = None,
272
281
  stream_options: ChatCompletionStreamOptionsParam | None = None,
282
+ top_p: float | NotGiven | None = NOT_GIVEN,
273
283
  **kwargs,
274
284
  ) -> AsyncGenerator[ChatCompletionDeltaMessage, None]:
275
285
  return await self.create_completion(
@@ -65,6 +65,7 @@ class GeminiChatClient(BaseChatClient):
65
65
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
66
66
  response_format: dict | None = None,
67
67
  stream_options: ChatCompletionStreamOptionsParam | None = None,
68
+ top_p: float | NotGiven | None = NOT_GIVEN,
68
69
  **kwargs,
69
70
  ) -> ChatCompletionMessage:
70
71
  pass
@@ -81,6 +82,7 @@ class GeminiChatClient(BaseChatClient):
81
82
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
82
83
  response_format: dict | None = None,
83
84
  stream_options: ChatCompletionStreamOptionsParam | None = None,
85
+ top_p: float | NotGiven | None = NOT_GIVEN,
84
86
  **kwargs,
85
87
  ) -> Generator[ChatCompletionDeltaMessage, None, None]:
86
88
  pass
@@ -97,6 +99,7 @@ class GeminiChatClient(BaseChatClient):
97
99
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
98
100
  response_format: dict | None = None,
99
101
  stream_options: ChatCompletionStreamOptionsParam | None = None,
102
+ top_p: float | NotGiven | None = NOT_GIVEN,
100
103
  **kwargs,
101
104
  ) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
102
105
  pass
@@ -112,6 +115,7 @@ class GeminiChatClient(BaseChatClient):
112
115
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
113
116
  response_format: dict | None = None,
114
117
  stream_options: ChatCompletionStreamOptionsParam | None = None,
118
+ top_p: float | NotGiven | None = NOT_GIVEN,
115
119
  **kwargs,
116
120
  ):
117
121
  if model is not None:
@@ -137,16 +141,19 @@ class GeminiChatClient(BaseChatClient):
137
141
  model=self.model_setting.id,
138
142
  )
139
143
 
144
+ tools_params = {}
140
145
  if tools:
141
146
  tools_params = {"tools": [{"function_declarations": [tool["function"] for tool in tools]}]}
142
- else:
143
- tools_params = {}
144
147
 
145
148
  response_format_params = {}
146
149
  if response_format is not None:
147
150
  if response_format.get("type") == "json_object":
148
151
  response_format_params = {"response_mime_type": "application/json"}
149
152
 
153
+ top_p_params = {}
154
+ if top_p:
155
+ top_p_params = {"top_p": top_p}
156
+
150
157
  if self.random_endpoint:
151
158
  self.random_endpoint = True
152
159
  endpoint_choice = random.choice(self.backend_settings.models[self.model].endpoints)
@@ -168,6 +175,7 @@ class GeminiChatClient(BaseChatClient):
168
175
  "generationConfig": {
169
176
  "temperature": self.temperature,
170
177
  "maxOutputTokens": max_tokens,
178
+ **top_p_params,
171
179
  **response_format_params,
172
180
  },
173
181
  **tools_params,
@@ -230,6 +238,8 @@ class GeminiChatClient(BaseChatClient):
230
238
  else:
231
239
  client = httpx.Client()
232
240
  response = client.post(url, json=request_body, headers=headers, params=params, timeout=None).json()
241
+ if "error" in response:
242
+ raise Exception(response["error"])
233
243
  result = {
234
244
  "content": "",
235
245
  "usage": {
@@ -302,6 +312,7 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
302
312
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
303
313
  response_format: dict | None = None,
304
314
  stream_options: ChatCompletionStreamOptionsParam | None = None,
315
+ top_p: float | NotGiven | None = NOT_GIVEN,
305
316
  **kwargs,
306
317
  ) -> ChatCompletionMessage:
307
318
  pass
@@ -318,6 +329,7 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
318
329
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
319
330
  response_format: dict | None = None,
320
331
  stream_options: ChatCompletionStreamOptionsParam | None = None,
332
+ top_p: float | NotGiven | None = NOT_GIVEN,
321
333
  **kwargs,
322
334
  ) -> AsyncGenerator[ChatCompletionDeltaMessage, Any]:
323
335
  pass
@@ -334,6 +346,7 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
334
346
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
335
347
  response_format: dict | None = None,
336
348
  stream_options: ChatCompletionStreamOptionsParam | None = None,
349
+ top_p: float | NotGiven | None = NOT_GIVEN,
337
350
  **kwargs,
338
351
  ) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, Any]:
339
352
  pass
@@ -349,6 +362,7 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
349
362
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
350
363
  response_format: dict | None = None,
351
364
  stream_options: ChatCompletionStreamOptionsParam | None = None,
365
+ top_p: float | NotGiven | None = NOT_GIVEN,
352
366
  **kwargs,
353
367
  ):
354
368
  if model is not None:
@@ -374,16 +388,19 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
374
388
  model=self.model_setting.id,
375
389
  )
376
390
 
391
+ tools_params = {}
377
392
  if tools:
378
393
  tools_params = {"tools": [{"function_declarations": [tool["function"] for tool in tools]}]}
379
- else:
380
- tools_params = {}
381
394
 
382
395
  response_format_params = {}
383
396
  if response_format is not None:
384
397
  if response_format.get("type") == "json_object":
385
398
  response_format_params = {"response_mime_type": "application/json"}
386
399
 
400
+ top_p_params = {}
401
+ if top_p:
402
+ top_p_params = {"top_p": top_p}
403
+
387
404
  if self.random_endpoint:
388
405
  self.random_endpoint = True
389
406
  endpoint_choice = random.choice(self.backend_settings.models[self.model].endpoints)
@@ -405,6 +422,7 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
405
422
  "generationConfig": {
406
423
  "temperature": self.temperature,
407
424
  "maxOutputTokens": max_tokens,
425
+ **top_p_params,
408
426
  **response_format_params,
409
427
  },
410
428
  **tools_params,
@@ -469,6 +487,8 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
469
487
  async with client:
470
488
  response = await client.post(url, json=request_body, headers=headers, params=params, timeout=None)
471
489
  response = response.json()
490
+ if "error" in response:
491
+ raise Exception(response["error"])
472
492
  result = {
473
493
  "content": "",
474
494
  "usage": {
@@ -91,6 +91,7 @@ class MiniMaxChatClient(BaseChatClient):
91
91
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
92
92
  response_format: dict | None = None,
93
93
  stream_options: ChatCompletionStreamOptionsParam | None = None,
94
+ top_p: float | NotGiven | None = NOT_GIVEN,
94
95
  **kwargs,
95
96
  ) -> ChatCompletionMessage:
96
97
  pass
@@ -107,6 +108,7 @@ class MiniMaxChatClient(BaseChatClient):
107
108
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
108
109
  response_format: dict | None = None,
109
110
  stream_options: ChatCompletionStreamOptionsParam | None = None,
111
+ top_p: float | NotGiven | None = NOT_GIVEN,
110
112
  **kwargs,
111
113
  ) -> Generator[ChatCompletionDeltaMessage, None, None]:
112
114
  pass
@@ -123,6 +125,7 @@ class MiniMaxChatClient(BaseChatClient):
123
125
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
124
126
  response_format: dict | None = None,
125
127
  stream_options: ChatCompletionStreamOptionsParam | None = None,
128
+ top_p: float | NotGiven | None = NOT_GIVEN,
126
129
  **kwargs,
127
130
  ) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
128
131
  pass
@@ -138,6 +141,7 @@ class MiniMaxChatClient(BaseChatClient):
138
141
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
139
142
  response_format: dict | None = None,
140
143
  stream_options: ChatCompletionStreamOptionsParam | None = None,
144
+ top_p: float | NotGiven | None = NOT_GIVEN,
141
145
  **kwargs,
142
146
  ):
143
147
  if model is not None:
@@ -190,6 +194,11 @@ class MiniMaxChatClient(BaseChatClient):
190
194
  else:
191
195
  tools_params = {}
192
196
 
197
+ if top_p:
198
+ top_p_params = {"top_p": top_p}
199
+ else:
200
+ top_p_params = {}
201
+
193
202
  if max_tokens is None:
194
203
  max_output_tokens = self.model_setting.max_output_tokens
195
204
  if max_output_tokens is not None:
@@ -218,6 +227,7 @@ class MiniMaxChatClient(BaseChatClient):
218
227
  "temperature": self.temperature,
219
228
  "stream": self.stream,
220
229
  "mask_sensitive_info": False,
230
+ **top_p_params,
221
231
  **tools_params,
222
232
  **kwargs,
223
233
  }
@@ -326,6 +336,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
326
336
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
327
337
  response_format: dict | None = None,
328
338
  stream_options: ChatCompletionStreamOptionsParam | None = None,
339
+ top_p: float | NotGiven | None = NOT_GIVEN,
329
340
  **kwargs,
330
341
  ) -> ChatCompletionMessage:
331
342
  pass
@@ -342,6 +353,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
342
353
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
343
354
  response_format: dict | None = None,
344
355
  stream_options: ChatCompletionStreamOptionsParam | None = None,
356
+ top_p: float | NotGiven | None = NOT_GIVEN,
345
357
  **kwargs,
346
358
  ) -> AsyncGenerator[ChatCompletionDeltaMessage, Any]:
347
359
  pass
@@ -358,6 +370,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
358
370
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
359
371
  response_format: dict | None = None,
360
372
  stream_options: ChatCompletionStreamOptionsParam | None = None,
373
+ top_p: float | NotGiven | None = NOT_GIVEN,
361
374
  **kwargs,
362
375
  ) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, Any]:
363
376
  pass
@@ -373,6 +386,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
373
386
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
374
387
  response_format: dict | None = None,
375
388
  stream_options: ChatCompletionStreamOptionsParam | None = None,
389
+ top_p: float | NotGiven | None = NOT_GIVEN,
376
390
  **kwargs,
377
391
  ):
378
392
  if model is not None:
@@ -423,6 +437,11 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
423
437
  else:
424
438
  tools_params = {}
425
439
 
440
+ if top_p:
441
+ top_p_params = {"top_p": top_p}
442
+ else:
443
+ top_p_params = {}
444
+
426
445
  if max_tokens is None:
427
446
  max_output_tokens = self.model_setting.max_output_tokens
428
447
  if max_output_tokens is not None:
@@ -451,6 +470,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
451
470
  "temperature": self.temperature,
452
471
  "stream": self.stream,
453
472
  "mask_sensitive_info": False,
473
+ **top_p_params,
454
474
  **tools_params,
455
475
  **kwargs,
456
476
  }
@@ -24,6 +24,7 @@ from ..types.llm_parameters import (
24
24
  ToolParam,
25
25
  ToolChoice,
26
26
  OpenAINotGiven,
27
+ AnthropicNotGiven,
27
28
  Usage,
28
29
  ChatCompletionMessage,
29
30
  ChatCompletionDeltaMessage,
@@ -97,6 +98,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
97
98
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
98
99
  response_format: dict | None = None,
99
100
  stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
101
+ top_p: float | NotGiven | None = NOT_GIVEN,
100
102
  **kwargs,
101
103
  ) -> ChatCompletionMessage:
102
104
  pass
@@ -113,6 +115,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
113
115
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
114
116
  response_format: dict | None = None,
115
117
  stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
118
+ top_p: float | NotGiven | None = NOT_GIVEN,
116
119
  **kwargs,
117
120
  ) -> Generator[ChatCompletionDeltaMessage, None, None]:
118
121
  pass
@@ -129,6 +132,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
129
132
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
130
133
  response_format: dict | None = None,
131
134
  stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
135
+ top_p: float | NotGiven | None = NOT_GIVEN,
132
136
  **kwargs,
133
137
  ) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
134
138
  pass
@@ -144,6 +148,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
144
148
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
145
149
  response_format: dict | None = None,
146
150
  stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
151
+ top_p: float | NotGiven | None = NOT_GIVEN,
147
152
  **kwargs,
148
153
  ):
149
154
  if model is not None:
@@ -152,6 +157,8 @@ class OpenAICompatibleChatClient(BaseChatClient):
152
157
  self.stream = stream
153
158
  if temperature is not None:
154
159
  self.temperature = temperature
160
+ if isinstance(top_p, AnthropicNotGiven):
161
+ top_p = NOT_GIVEN
155
162
 
156
163
  raw_client = self.raw_client # 调用完 self.raw_client 后,self.model_id 会被赋值
157
164
  self.model_setting = self.backend_settings.models[self.model]
@@ -206,6 +213,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
206
213
  stream=True,
207
214
  temperature=self.temperature,
208
215
  max_tokens=max_tokens,
216
+ top_p=top_p,
209
217
  **_stream_options_params,
210
218
  **self.response_format,
211
219
  **tools_params,
@@ -260,6 +268,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
260
268
  stream=False,
261
269
  temperature=self.temperature,
262
270
  max_tokens=max_tokens,
271
+ top_p=top_p,
263
272
  **self.response_format,
264
273
  **tools_params,
265
274
  **kwargs,
@@ -351,6 +360,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
351
360
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
352
361
  response_format: dict | None = None,
353
362
  stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
363
+ top_p: float | NotGiven | None = NOT_GIVEN,
354
364
  **kwargs,
355
365
  ) -> ChatCompletionMessage:
356
366
  pass
@@ -367,6 +377,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
367
377
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
368
378
  response_format: dict | None = None,
369
379
  stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
380
+ top_p: float | NotGiven | None = NOT_GIVEN,
370
381
  **kwargs,
371
382
  ) -> AsyncGenerator[ChatCompletionDeltaMessage, Any]:
372
383
  pass
@@ -383,6 +394,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
383
394
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
384
395
  response_format: dict | None = None,
385
396
  stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
397
+ top_p: float | NotGiven | None = NOT_GIVEN,
386
398
  **kwargs,
387
399
  ) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, Any]:
388
400
  pass
@@ -398,6 +410,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
398
410
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
399
411
  response_format: dict | None = None,
400
412
  stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
413
+ top_p: float | NotGiven | None = NOT_GIVEN,
401
414
  **kwargs,
402
415
  ):
403
416
  if model is not None:
@@ -406,6 +419,8 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
406
419
  self.stream = stream
407
420
  if temperature is not None:
408
421
  self.temperature = temperature
422
+ if isinstance(top_p, AnthropicNotGiven):
423
+ top_p = NOT_GIVEN
409
424
 
410
425
  raw_client = self.raw_client # 调用完 self.raw_client 后,self.model_id 会被赋值
411
426
  self.model_setting = self.backend_settings.models[self.model]
@@ -460,6 +475,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
460
475
  stream=self.stream,
461
476
  temperature=self.temperature,
462
477
  max_tokens=max_tokens,
478
+ top_p=top_p,
463
479
  **_stream_options_params,
464
480
  **self.response_format,
465
481
  **tools_params,
@@ -515,6 +531,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
515
531
  stream=self.stream,
516
532
  temperature=self.temperature,
517
533
  max_tokens=max_tokens,
534
+ top_p=top_p,
518
535
  **self.response_format,
519
536
  **tools_params,
520
537
  **kwargs,
@@ -201,6 +201,10 @@ def get_token_counts(text: str | dict, model: str = "", use_token_server_first:
201
201
  result = response.json()
202
202
  return result["data"]["total_tokens"]
203
203
  elif model.startswith("gemini"):
204
+ # TODO: gemini-exp-1114 暂时不支持,使用 gemini-1.5-flash 代替
205
+ if model == "gemini-exp-1114":
206
+ model = "gemini-1.5-flash"
207
+
204
208
  model_setting = settings.gemini.models[model]
205
209
  if len(model_setting.endpoints) == 0:
206
210
  return len(get_gpt_35_encoding().encode(text))
@@ -208,13 +212,14 @@ def get_token_counts(text: str | dict, model: str = "", use_token_server_first:
208
212
  if isinstance(endpoint_id, dict):
209
213
  endpoint_id = endpoint_id["endpoint_id"]
210
214
  endpoint = settings.get_endpoint(endpoint_id)
215
+
211
216
  base_url = f"{endpoint.api_base}/models/{model_setting.id}:countTokens"
212
217
  params = {"key": endpoint.api_key}
213
218
  request_body = {
214
219
  "contents": {
215
220
  "role": "USER",
216
221
  "parts": [
217
- {"text": "TEXT"},
222
+ {"text": text},
218
223
  ],
219
224
  },
220
225
  }
@@ -609,6 +609,13 @@ MINIMAX_MODELS: Final[Dict[str, Dict[str, Any]]] = {
609
609
  "function_call_available": True,
610
610
  "response_format_available": True,
611
611
  },
612
+ "abab7-preview": {
613
+ "id": "abab7-preview",
614
+ "context_length": 245760,
615
+ "max_output_tokens": 245760,
616
+ "function_call_available": True,
617
+ "response_format_available": True,
618
+ },
612
619
  }
613
620
 
614
621
  # Gemini models
@@ -616,7 +623,8 @@ GEMINI_DEFAULT_MODEL: Final[str] = "gemini-1.5-pro"
616
623
  GEMINI_MODELS: Final[Dict[str, Dict[str, Any]]] = {
617
624
  "gemini-1.5-pro": {
618
625
  "id": "gemini-1.5-pro",
619
- "context_length": 1048576,
626
+ "context_length": 2097152,
627
+ "max_output_tokens": 8192,
620
628
  "function_call_available": True,
621
629
  "response_format_available": True,
622
630
  "native_multimodal": True,
@@ -624,6 +632,22 @@ GEMINI_MODELS: Final[Dict[str, Dict[str, Any]]] = {
624
632
  "gemini-1.5-flash": {
625
633
  "id": "gemini-1.5-flash",
626
634
  "context_length": 1048576,
635
+ "max_output_tokens": 8192,
636
+ "function_call_available": True,
637
+ "response_format_available": True,
638
+ "native_multimodal": True,
639
+ },
640
+ "gemini-exp-1114": {
641
+ "id": "gemini-exp-1114",
642
+ "context_length": 32767,
643
+ "function_call_available": True,
644
+ "response_format_available": True,
645
+ "native_multimodal": True,
646
+ },
647
+ "gemini-1.5-flash-8b": {
648
+ "id": "gemini-1.5-flash-8b",
649
+ "context_length": 1048576,
650
+ "max_output_tokens": 8192,
627
651
  "function_call_available": True,
628
652
  "response_format_available": True,
629
653
  "native_multimodal": True,
File without changes