vectorvein 0.1.58__tar.gz → 0.1.60__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. {vectorvein-0.1.58 → vectorvein-0.1.60}/PKG-INFO +2 -2
  2. {vectorvein-0.1.58 → vectorvein-0.1.60}/pyproject.toml +2 -2
  3. {vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/chat_clients/anthropic_client.py +42 -4
  4. {vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/chat_clients/base_client.py +28 -3
  5. {vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/chat_clients/gemini_client.py +10 -2
  6. {vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/chat_clients/minimax_client.py +10 -2
  7. {vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/chat_clients/openai_compatible_client.py +10 -2
  8. {vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/types/defaults.py +22 -0
  9. {vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/types/llm_parameters.py +1 -0
  10. {vectorvein-0.1.58 → vectorvein-0.1.60}/README.md +0 -0
  11. {vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/__init__.py +0 -0
  12. {vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/chat_clients/__init__.py +0 -0
  13. {vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/chat_clients/baichuan_client.py +0 -0
  14. {vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/chat_clients/deepseek_client.py +0 -0
  15. {vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/chat_clients/groq_client.py +0 -0
  16. {vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/chat_clients/local_client.py +0 -0
  17. {vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/chat_clients/mistral_client.py +0 -0
  18. {vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/chat_clients/moonshot_client.py +0 -0
  19. {vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/chat_clients/openai_client.py +0 -0
  20. {vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/chat_clients/py.typed +0 -0
  21. {vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/chat_clients/qwen_client.py +0 -0
  22. {vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/chat_clients/stepfun_client.py +0 -0
  23. {vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/chat_clients/utils.py +0 -0
  24. {vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/chat_clients/xai_client.py +0 -0
  25. {vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/chat_clients/yi_client.py +0 -0
  26. {vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/chat_clients/zhipuai_client.py +0 -0
  27. {vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/py.typed +0 -0
  28. {vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/server/token_server.py +0 -0
  29. {vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/settings/__init__.py +0 -0
  30. {vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/settings/py.typed +0 -0
  31. {vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/types/enums.py +0 -0
  32. {vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/types/exception.py +0 -0
  33. {vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/types/py.typed +0 -0
  34. {vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/utilities/media_processing.py +0 -0
  35. {vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/utilities/retry.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vectorvein
3
- Version: 0.1.58
3
+ Version: 0.1.60
4
4
  Summary: VectorVein python SDK
5
5
  Author-Email: Anderson <andersonby@163.com>
6
6
  License: MIT
@@ -8,7 +8,7 @@ Requires-Python: >=3.10
8
8
  Requires-Dist: openai>=1.37.1
9
9
  Requires-Dist: tiktoken>=0.7.0
10
10
  Requires-Dist: httpx>=0.27.0
11
- Requires-Dist: anthropic[vertex]>=0.31.2
11
+ Requires-Dist: anthropic[bedrock,vertex]>=0.31.2
12
12
  Requires-Dist: pydantic>=2.8.2
13
13
  Requires-Dist: Pillow>=10.4.0
14
14
  Requires-Dist: deepseek-tokenizer>=0.1.0
@@ -6,7 +6,7 @@ dependencies = [
6
6
  "openai>=1.37.1",
7
7
  "tiktoken>=0.7.0",
8
8
  "httpx>=0.27.0",
9
- "anthropic[vertex]>=0.31.2",
9
+ "anthropic[vertex,bedrock]>=0.31.2",
10
10
  "pydantic>=2.8.2",
11
11
  "Pillow>=10.4.0",
12
12
  "deepseek-tokenizer>=0.1.0",
@@ -17,7 +17,7 @@ description = "VectorVein python SDK"
17
17
  name = "vectorvein"
18
18
  readme = "README.md"
19
19
  requires-python = ">=3.10"
20
- version = "0.1.58"
20
+ version = "0.1.60"
21
21
 
22
22
  [project.license]
23
23
  text = "MIT"
@@ -8,7 +8,14 @@ from typing import overload, Generator, AsyncGenerator, Any, Literal, Iterable
8
8
  import httpx
9
9
  from openai._types import NotGiven as OpenAINotGiven
10
10
  from openai._types import NOT_GIVEN as OPENAI_NOT_GIVEN
11
- from anthropic import Anthropic, AnthropicVertex, AsyncAnthropic, AsyncAnthropicVertex
11
+ from anthropic import (
12
+ Anthropic,
13
+ AnthropicVertex,
14
+ AsyncAnthropic,
15
+ AsyncAnthropicVertex,
16
+ AnthropicBedrock,
17
+ AsyncAnthropicBedrock,
18
+ )
12
19
  from anthropic._types import NOT_GIVEN
13
20
  from anthropic.types import (
14
21
  TextBlock,
@@ -219,6 +226,15 @@ class AnthropicChatClient(BaseChatClient):
219
226
  access_token=self.creds.token,
220
227
  http_client=self.http_client,
221
228
  )
229
+ elif self.endpoint.is_bedrock:
230
+ if self.endpoint.credentials is None:
231
+ raise ValueError("Anthropic Bedrock endpoint requires credentials")
232
+ return AnthropicBedrock(
233
+ aws_access_key=self.endpoint.credentials.get("access_key"),
234
+ aws_secret_key=self.endpoint.credentials.get("secret_key"),
235
+ aws_region=self.endpoint.region,
236
+ http_client=self.http_client,
237
+ )
222
238
  elif self.endpoint.api_schema_type == "default":
223
239
  return Anthropic(
224
240
  api_key=self.endpoint.api_key,
@@ -250,6 +266,7 @@ class AnthropicChatClient(BaseChatClient):
250
266
  response_format: dict | None = None,
251
267
  stream_options: ChatCompletionStreamOptionsParam | None = None,
252
268
  top_p: float | NotGiven | None = NOT_GIVEN,
269
+ skip_cutoff: bool = False,
253
270
  **kwargs,
254
271
  ) -> ChatCompletionMessage:
255
272
  pass
@@ -267,6 +284,7 @@ class AnthropicChatClient(BaseChatClient):
267
284
  response_format: dict | None = None,
268
285
  stream_options: ChatCompletionStreamOptionsParam | None = None,
269
286
  top_p: float | NotGiven | None = NOT_GIVEN,
287
+ skip_cutoff: bool = False,
270
288
  **kwargs,
271
289
  ) -> Generator[ChatCompletionDeltaMessage, None, None]:
272
290
  pass
@@ -284,6 +302,7 @@ class AnthropicChatClient(BaseChatClient):
284
302
  response_format: dict | None = None,
285
303
  stream_options: ChatCompletionStreamOptionsParam | None = None,
286
304
  top_p: float | NotGiven | None = NOT_GIVEN,
305
+ skip_cutoff: bool = False,
287
306
  **kwargs,
288
307
  ) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
289
308
  pass
@@ -300,6 +319,7 @@ class AnthropicChatClient(BaseChatClient):
300
319
  response_format: dict | None = None,
301
320
  stream_options: ChatCompletionStreamOptionsParam | None = None,
302
321
  top_p: float | NotGiven | None = NOT_GIVEN,
322
+ skip_cutoff: bool = False,
303
323
  **kwargs,
304
324
  ):
305
325
  if model is not None:
@@ -349,6 +369,7 @@ class AnthropicChatClient(BaseChatClient):
349
369
  response_format=response_format,
350
370
  stream_options=stream_options,
351
371
  top_p=top_p,
372
+ skip_cutoff=skip_cutoff,
352
373
  **kwargs,
353
374
  )
354
375
  for chunk in response:
@@ -374,8 +395,8 @@ class AnthropicChatClient(BaseChatClient):
374
395
  tools=_tools,
375
396
  tool_choice=_tool_choice,
376
397
  response_format=response_format,
377
- stream_options=stream_options,
378
398
  top_p=top_p,
399
+ skip_cutoff=skip_cutoff,
379
400
  **kwargs,
380
401
  )
381
402
 
@@ -399,7 +420,7 @@ class AnthropicChatClient(BaseChatClient):
399
420
  else:
400
421
  system_prompt = ""
401
422
 
402
- if self.context_length_control == ContextLengthControlType.Latest:
423
+ if not skip_cutoff and self.context_length_control == ContextLengthControlType.Latest:
403
424
  messages = cutoff_messages(
404
425
  messages,
405
426
  max_count=self.model_setting.context_length,
@@ -595,6 +616,15 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
595
616
  access_token=self.creds.token,
596
617
  http_client=self.http_client,
597
618
  )
619
+ elif self.endpoint.is_bedrock:
620
+ if self.endpoint.credentials is None:
621
+ raise ValueError("Anthropic Bedrock endpoint requires credentials")
622
+ return AsyncAnthropicBedrock(
623
+ aws_access_key=self.endpoint.credentials.get("aws_access_key"),
624
+ aws_secret_key=self.endpoint.credentials.get("aws_secret_key"),
625
+ aws_region=self.endpoint.region,
626
+ http_client=self.http_client,
627
+ )
598
628
  elif self.endpoint.api_schema_type == "default":
599
629
  return AsyncAnthropic(
600
630
  api_key=self.endpoint.api_key,
@@ -626,6 +656,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
626
656
  response_format: dict | None = None,
627
657
  stream_options: ChatCompletionStreamOptionsParam | None = None,
628
658
  top_p: float | NotGiven | None = NOT_GIVEN,
659
+ skip_cutoff: bool = False,
629
660
  **kwargs,
630
661
  ) -> ChatCompletionMessage:
631
662
  pass
@@ -643,6 +674,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
643
674
  response_format: dict | None = None,
644
675
  stream_options: ChatCompletionStreamOptionsParam | None = None,
645
676
  top_p: float | NotGiven | None = NOT_GIVEN,
677
+ skip_cutoff: bool = False,
646
678
  **kwargs,
647
679
  ) -> AsyncGenerator[ChatCompletionDeltaMessage, Any]:
648
680
  pass
@@ -660,6 +692,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
660
692
  response_format: dict | None = None,
661
693
  stream_options: ChatCompletionStreamOptionsParam | None = None,
662
694
  top_p: float | NotGiven | None = NOT_GIVEN,
695
+ skip_cutoff: bool = False,
663
696
  **kwargs,
664
697
  ) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, Any]:
665
698
  pass
@@ -676,6 +709,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
676
709
  response_format: dict | None = None,
677
710
  stream_options: ChatCompletionStreamOptionsParam | None = None,
678
711
  top_p: float | NotGiven | None = NOT_GIVEN,
712
+ skip_cutoff: bool = False,
679
713
  **kwargs,
680
714
  ):
681
715
  if model is not None:
@@ -725,6 +759,8 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
725
759
  tool_choice=_tool_choice,
726
760
  response_format=response_format,
727
761
  stream_options=stream_options,
762
+ top_p=top_p,
763
+ skip_cutoff=skip_cutoff,
728
764
  **kwargs,
729
765
  )
730
766
  async for chunk in response:
@@ -751,6 +787,8 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
751
787
  tools=_tools,
752
788
  tool_choice=_tool_choice,
753
789
  response_format=response_format,
790
+ top_p=top_p,
791
+ skip_cutoff=skip_cutoff,
754
792
  **kwargs,
755
793
  )
756
794
 
@@ -774,7 +812,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
774
812
  else:
775
813
  system_prompt = ""
776
814
 
777
- if self.context_length_control == ContextLengthControlType.Latest:
815
+ if not skip_cutoff and self.context_length_control == ContextLengthControlType.Latest:
778
816
  messages = cutoff_messages(
779
817
  messages,
780
818
  max_count=self.model_setting.context_length,
@@ -6,7 +6,14 @@ from typing import Generator, AsyncGenerator, Any, overload, Literal, Iterable
6
6
 
7
7
  import httpx
8
8
  from openai import OpenAI, AsyncOpenAI, AzureOpenAI, AsyncAzureOpenAI
9
- from anthropic import Anthropic, AnthropicVertex, AsyncAnthropic, AsyncAnthropicVertex
9
+ from anthropic import (
10
+ Anthropic,
11
+ AnthropicVertex,
12
+ AsyncAnthropic,
13
+ AsyncAnthropicVertex,
14
+ AnthropicBedrock,
15
+ AsyncAnthropicBedrock,
16
+ )
10
17
 
11
18
  from ..settings import settings
12
19
  from ..types import defaults as defs
@@ -57,7 +64,9 @@ class BaseChatClient(ABC):
57
64
 
58
65
  @cached_property
59
66
  @abstractmethod
60
- def raw_client(self) -> OpenAI | AzureOpenAI | Anthropic | AnthropicVertex | httpx.Client | None:
67
+ def raw_client(
68
+ self,
69
+ ) -> OpenAI | AzureOpenAI | Anthropic | AnthropicVertex | AnthropicBedrock | httpx.Client | None:
61
70
  pass
62
71
 
63
72
  @overload
@@ -74,6 +83,7 @@ class BaseChatClient(ABC):
74
83
  response_format: dict | None = None,
75
84
  stream_options: ChatCompletionStreamOptionsParam | None = None,
76
85
  top_p: float | NotGiven | None = NOT_GIVEN,
86
+ skip_cutoff: bool = False,
77
87
  **kwargs,
78
88
  ) -> ChatCompletionMessage:
79
89
  pass
@@ -92,6 +102,7 @@ class BaseChatClient(ABC):
92
102
  response_format: dict | None = None,
93
103
  stream_options: ChatCompletionStreamOptionsParam | None = None,
94
104
  top_p: float | NotGiven | None = NOT_GIVEN,
105
+ skip_cutoff: bool = False,
95
106
  **kwargs,
96
107
  ) -> Generator[ChatCompletionDeltaMessage, Any, None]:
97
108
  pass
@@ -110,6 +121,7 @@ class BaseChatClient(ABC):
110
121
  response_format: dict | None = None,
111
122
  stream_options: ChatCompletionStreamOptionsParam | None = None,
112
123
  top_p: float | NotGiven | None = NOT_GIVEN,
124
+ skip_cutoff: bool = False,
113
125
  **kwargs,
114
126
  ) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
115
127
  pass
@@ -127,6 +139,7 @@ class BaseChatClient(ABC):
127
139
  response_format: dict | None = None,
128
140
  stream_options: ChatCompletionStreamOptionsParam | None = None,
129
141
  top_p: float | NotGiven | None = NOT_GIVEN,
142
+ skip_cutoff: bool = False,
130
143
  **kwargs,
131
144
  ) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
132
145
  pass
@@ -195,7 +208,15 @@ class BaseAsyncChatClient(ABC):
195
208
  @abstractmethod
196
209
  def raw_client(
197
210
  self,
198
- ) -> AsyncOpenAI | AsyncAzureOpenAI | AsyncAnthropic | AsyncAnthropicVertex | httpx.AsyncClient | None:
211
+ ) -> (
212
+ AsyncOpenAI
213
+ | AsyncAzureOpenAI
214
+ | AsyncAnthropic
215
+ | AsyncAnthropicVertex
216
+ | AsyncAnthropicBedrock
217
+ | httpx.AsyncClient
218
+ | None
219
+ ):
199
220
  pass
200
221
 
201
222
  @overload
@@ -212,6 +233,7 @@ class BaseAsyncChatClient(ABC):
212
233
  response_format: dict | None = None,
213
234
  stream_options: ChatCompletionStreamOptionsParam | None = None,
214
235
  top_p: float | NotGiven | None = NOT_GIVEN,
236
+ skip_cutoff: bool = False,
215
237
  **kwargs,
216
238
  ) -> ChatCompletionMessage:
217
239
  pass
@@ -230,6 +252,7 @@ class BaseAsyncChatClient(ABC):
230
252
  response_format: dict | None = None,
231
253
  stream_options: ChatCompletionStreamOptionsParam | None = None,
232
254
  top_p: float | NotGiven | None = NOT_GIVEN,
255
+ skip_cutoff: bool = False,
233
256
  **kwargs,
234
257
  ) -> AsyncGenerator[ChatCompletionDeltaMessage, None]:
235
258
  pass
@@ -248,6 +271,7 @@ class BaseAsyncChatClient(ABC):
248
271
  response_format: dict | None = None,
249
272
  stream_options: ChatCompletionStreamOptionsParam | None = None,
250
273
  top_p: float | NotGiven | None = NOT_GIVEN,
274
+ skip_cutoff: bool = False,
251
275
  **kwargs,
252
276
  ) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, None]:
253
277
  pass
@@ -265,6 +289,7 @@ class BaseAsyncChatClient(ABC):
265
289
  response_format: dict | None = None,
266
290
  stream_options: ChatCompletionStreamOptionsParam | None = None,
267
291
  top_p: float | NotGiven | None = NOT_GIVEN,
292
+ skip_cutoff: bool = False,
268
293
  **kwargs,
269
294
  ) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, None]:
270
295
  pass
@@ -66,6 +66,7 @@ class GeminiChatClient(BaseChatClient):
66
66
  response_format: dict | None = None,
67
67
  stream_options: ChatCompletionStreamOptionsParam | None = None,
68
68
  top_p: float | NotGiven | None = NOT_GIVEN,
69
+ skip_cutoff: bool = False,
69
70
  **kwargs,
70
71
  ) -> ChatCompletionMessage:
71
72
  pass
@@ -83,6 +84,7 @@ class GeminiChatClient(BaseChatClient):
83
84
  response_format: dict | None = None,
84
85
  stream_options: ChatCompletionStreamOptionsParam | None = None,
85
86
  top_p: float | NotGiven | None = NOT_GIVEN,
87
+ skip_cutoff: bool = False,
86
88
  **kwargs,
87
89
  ) -> Generator[ChatCompletionDeltaMessage, None, None]:
88
90
  pass
@@ -100,6 +102,7 @@ class GeminiChatClient(BaseChatClient):
100
102
  response_format: dict | None = None,
101
103
  stream_options: ChatCompletionStreamOptionsParam | None = None,
102
104
  top_p: float | NotGiven | None = NOT_GIVEN,
105
+ skip_cutoff: bool = False,
103
106
  **kwargs,
104
107
  ) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
105
108
  pass
@@ -116,6 +119,7 @@ class GeminiChatClient(BaseChatClient):
116
119
  response_format: dict | None = None,
117
120
  stream_options: ChatCompletionStreamOptionsParam | None = None,
118
121
  top_p: float | NotGiven | None = NOT_GIVEN,
122
+ skip_cutoff: bool = False,
119
123
  **kwargs,
120
124
  ):
121
125
  if model is not None:
@@ -133,7 +137,7 @@ class GeminiChatClient(BaseChatClient):
133
137
  else:
134
138
  system_prompt = ""
135
139
 
136
- if self.context_length_control == ContextLengthControlType.Latest:
140
+ if not skip_cutoff and self.context_length_control == ContextLengthControlType.Latest:
137
141
  messages = cutoff_messages(
138
142
  messages,
139
143
  max_count=self.model_setting.context_length,
@@ -313,6 +317,7 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
313
317
  response_format: dict | None = None,
314
318
  stream_options: ChatCompletionStreamOptionsParam | None = None,
315
319
  top_p: float | NotGiven | None = NOT_GIVEN,
320
+ skip_cutoff: bool = False,
316
321
  **kwargs,
317
322
  ) -> ChatCompletionMessage:
318
323
  pass
@@ -330,6 +335,7 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
330
335
  response_format: dict | None = None,
331
336
  stream_options: ChatCompletionStreamOptionsParam | None = None,
332
337
  top_p: float | NotGiven | None = NOT_GIVEN,
338
+ skip_cutoff: bool = False,
333
339
  **kwargs,
334
340
  ) -> AsyncGenerator[ChatCompletionDeltaMessage, Any]:
335
341
  pass
@@ -347,6 +353,7 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
347
353
  response_format: dict | None = None,
348
354
  stream_options: ChatCompletionStreamOptionsParam | None = None,
349
355
  top_p: float | NotGiven | None = NOT_GIVEN,
356
+ skip_cutoff: bool = False,
350
357
  **kwargs,
351
358
  ) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, Any]:
352
359
  pass
@@ -363,6 +370,7 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
363
370
  response_format: dict | None = None,
364
371
  stream_options: ChatCompletionStreamOptionsParam | None = None,
365
372
  top_p: float | NotGiven | None = NOT_GIVEN,
373
+ skip_cutoff: bool = False,
366
374
  **kwargs,
367
375
  ):
368
376
  if model is not None:
@@ -380,7 +388,7 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
380
388
  else:
381
389
  system_prompt = ""
382
390
 
383
- if self.context_length_control == ContextLengthControlType.Latest:
391
+ if not skip_cutoff and self.context_length_control == ContextLengthControlType.Latest:
384
392
  messages = cutoff_messages(
385
393
  messages,
386
394
  max_count=self.model_setting.context_length,
@@ -92,6 +92,7 @@ class MiniMaxChatClient(BaseChatClient):
92
92
  response_format: dict | None = None,
93
93
  stream_options: ChatCompletionStreamOptionsParam | None = None,
94
94
  top_p: float | NotGiven | None = NOT_GIVEN,
95
+ skip_cutoff: bool = False,
95
96
  **kwargs,
96
97
  ) -> ChatCompletionMessage:
97
98
  pass
@@ -109,6 +110,7 @@ class MiniMaxChatClient(BaseChatClient):
109
110
  response_format: dict | None = None,
110
111
  stream_options: ChatCompletionStreamOptionsParam | None = None,
111
112
  top_p: float | NotGiven | None = NOT_GIVEN,
113
+ skip_cutoff: bool = False,
112
114
  **kwargs,
113
115
  ) -> Generator[ChatCompletionDeltaMessage, None, None]:
114
116
  pass
@@ -126,6 +128,7 @@ class MiniMaxChatClient(BaseChatClient):
126
128
  response_format: dict | None = None,
127
129
  stream_options: ChatCompletionStreamOptionsParam | None = None,
128
130
  top_p: float | NotGiven | None = NOT_GIVEN,
131
+ skip_cutoff: bool = False,
129
132
  **kwargs,
130
133
  ) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
131
134
  pass
@@ -142,6 +145,7 @@ class MiniMaxChatClient(BaseChatClient):
142
145
  response_format: dict | None = None,
143
146
  stream_options: ChatCompletionStreamOptionsParam | None = None,
144
147
  top_p: float | NotGiven | None = NOT_GIVEN,
148
+ skip_cutoff: bool = False,
145
149
  **kwargs,
146
150
  ):
147
151
  if model is not None:
@@ -166,7 +170,7 @@ class MiniMaxChatClient(BaseChatClient):
166
170
  self.endpoint_id = endpoint_choice
167
171
  self.endpoint = settings.get_endpoint(self.endpoint_id)
168
172
 
169
- if self.context_length_control == ContextLengthControlType.Latest:
173
+ if not skip_cutoff and self.context_length_control == ContextLengthControlType.Latest:
170
174
  messages = cutoff_messages(
171
175
  messages,
172
176
  max_count=self.model_setting.context_length,
@@ -337,6 +341,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
337
341
  response_format: dict | None = None,
338
342
  stream_options: ChatCompletionStreamOptionsParam | None = None,
339
343
  top_p: float | NotGiven | None = NOT_GIVEN,
344
+ skip_cutoff: bool = False,
340
345
  **kwargs,
341
346
  ) -> ChatCompletionMessage:
342
347
  pass
@@ -354,6 +359,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
354
359
  response_format: dict | None = None,
355
360
  stream_options: ChatCompletionStreamOptionsParam | None = None,
356
361
  top_p: float | NotGiven | None = NOT_GIVEN,
362
+ skip_cutoff: bool = False,
357
363
  **kwargs,
358
364
  ) -> AsyncGenerator[ChatCompletionDeltaMessage, Any]:
359
365
  pass
@@ -371,6 +377,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
371
377
  response_format: dict | None = None,
372
378
  stream_options: ChatCompletionStreamOptionsParam | None = None,
373
379
  top_p: float | NotGiven | None = NOT_GIVEN,
380
+ skip_cutoff: bool = False,
374
381
  **kwargs,
375
382
  ) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, Any]:
376
383
  pass
@@ -387,6 +394,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
387
394
  response_format: dict | None = None,
388
395
  stream_options: ChatCompletionStreamOptionsParam | None = None,
389
396
  top_p: float | NotGiven | None = NOT_GIVEN,
397
+ skip_cutoff: bool = False,
390
398
  **kwargs,
391
399
  ):
392
400
  if model is not None:
@@ -411,7 +419,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
411
419
  self.endpoint_id = endpoint_choice
412
420
  self.endpoint = settings.get_endpoint(self.endpoint_id)
413
421
 
414
- if self.context_length_control == ContextLengthControlType.Latest:
422
+ if not skip_cutoff and self.context_length_control == ContextLengthControlType.Latest:
415
423
  messages = cutoff_messages(
416
424
  messages,
417
425
  max_count=self.model_setting.context_length,
@@ -99,6 +99,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
99
99
  response_format: dict | None = None,
100
100
  stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
101
101
  top_p: float | NotGiven | None = NOT_GIVEN,
102
+ skip_cutoff: bool = False,
102
103
  **kwargs,
103
104
  ) -> ChatCompletionMessage:
104
105
  pass
@@ -116,6 +117,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
116
117
  response_format: dict | None = None,
117
118
  stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
118
119
  top_p: float | NotGiven | None = NOT_GIVEN,
120
+ skip_cutoff: bool = False,
119
121
  **kwargs,
120
122
  ) -> Generator[ChatCompletionDeltaMessage, None, None]:
121
123
  pass
@@ -133,6 +135,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
133
135
  response_format: dict | None = None,
134
136
  stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
135
137
  top_p: float | NotGiven | None = NOT_GIVEN,
138
+ skip_cutoff: bool = False,
136
139
  **kwargs,
137
140
  ) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
138
141
  pass
@@ -149,6 +152,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
149
152
  response_format: dict | None = None,
150
153
  stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
151
154
  top_p: float | NotGiven | None = NOT_GIVEN,
155
+ skip_cutoff: bool = False,
152
156
  **kwargs,
153
157
  ):
154
158
  if model is not None:
@@ -165,7 +169,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
165
169
  if self.model_id is None:
166
170
  self.model_id = self.model_setting.id
167
171
 
168
- if self.context_length_control == ContextLengthControlType.Latest:
172
+ if not skip_cutoff and self.context_length_control == ContextLengthControlType.Latest:
169
173
  messages = cutoff_messages(
170
174
  messages,
171
175
  max_count=self.model_setting.context_length,
@@ -361,6 +365,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
361
365
  response_format: dict | None = None,
362
366
  stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
363
367
  top_p: float | NotGiven | None = NOT_GIVEN,
368
+ skip_cutoff: bool = False,
364
369
  **kwargs,
365
370
  ) -> ChatCompletionMessage:
366
371
  pass
@@ -378,6 +383,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
378
383
  response_format: dict | None = None,
379
384
  stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
380
385
  top_p: float | NotGiven | None = NOT_GIVEN,
386
+ skip_cutoff: bool = False,
381
387
  **kwargs,
382
388
  ) -> AsyncGenerator[ChatCompletionDeltaMessage, Any]:
383
389
  pass
@@ -395,6 +401,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
395
401
  response_format: dict | None = None,
396
402
  stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
397
403
  top_p: float | NotGiven | None = NOT_GIVEN,
404
+ skip_cutoff: bool = False,
398
405
  **kwargs,
399
406
  ) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, Any]:
400
407
  pass
@@ -411,6 +418,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
411
418
  response_format: dict | None = None,
412
419
  stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
413
420
  top_p: float | NotGiven | None = NOT_GIVEN,
421
+ skip_cutoff: bool = False,
414
422
  **kwargs,
415
423
  ):
416
424
  if model is not None:
@@ -427,7 +435,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
427
435
  if self.model_id is None:
428
436
  self.model_id = self.model_setting.id
429
437
 
430
- if self.context_length_control == ContextLengthControlType.Latest:
438
+ if not skip_cutoff and self.context_length_control == ContextLengthControlType.Latest:
431
439
  messages = cutoff_messages(
432
440
  messages,
433
441
  max_count=self.model_setting.context_length,
@@ -231,6 +231,20 @@ QWEN_MODELS: Final[Dict[str, Dict[str, Any]]] = {
231
231
  "function_call_available": False,
232
232
  "response_format_available": True,
233
233
  },
234
+ "qwen2.5-coder-32b-instruct": {
235
+ "id": "qwen2.5-coder-32b-instruct",
236
+ "context_length": 30000,
237
+ "max_output_tokens": 4096,
238
+ "function_call_available": False,
239
+ "response_format_available": False,
240
+ },
241
+ "qwq-32b-preview": {
242
+ "id": "qwq-32b-preview",
243
+ "context_length": 30000,
244
+ "max_output_tokens": 4096,
245
+ "function_call_available": False,
246
+ "response_format_available": False,
247
+ },
234
248
  "qwen2.5-72b-instruct": {
235
249
  "id": "qwen2.5-72b-instruct",
236
250
  "context_length": 131072,
@@ -238,6 +252,14 @@ QWEN_MODELS: Final[Dict[str, Dict[str, Any]]] = {
238
252
  "function_call_available": False,
239
253
  "response_format_available": True,
240
254
  },
255
+ "qwen2-vl-72b-instruct": {
256
+ "id": "qwen2-vl-72b-instruct",
257
+ "context_length": 131072,
258
+ "max_output_tokens": 8192,
259
+ "function_call_available": False,
260
+ "response_format_available": False,
261
+ "native_multimodal": True,
262
+ },
241
263
  "qwen-max": {
242
264
  "id": "qwen-max",
243
265
  "context_length": 8096,
@@ -37,6 +37,7 @@ class EndpointSetting(BaseModel):
37
37
  credentials: Optional[dict] = Field(None, description="Additional credentials if needed.")
38
38
  is_azure: bool = Field(False, description="Indicates if the endpoint is for Azure.")
39
39
  is_vertex: bool = Field(False, description="Indicates if the endpoint is for Vertex.")
40
+ is_bedrock: bool = Field(False, description="Indicates if the endpoint is for Bedrock.")
40
41
  rpm: int = Field(description="Requests per minute.", default=defs.ENDPOINT_RPM)
41
42
  tpm: int = Field(description="Tokens per minute.", default=defs.ENDPOINT_TPM)
42
43
  concurrent_requests: int = Field(
File without changes