vectorvein 0.1.58__tar.gz → 0.1.60__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {vectorvein-0.1.58 → vectorvein-0.1.60}/PKG-INFO +2 -2
- {vectorvein-0.1.58 → vectorvein-0.1.60}/pyproject.toml +2 -2
- {vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/chat_clients/anthropic_client.py +42 -4
- {vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/chat_clients/base_client.py +28 -3
- {vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/chat_clients/gemini_client.py +10 -2
- {vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/chat_clients/minimax_client.py +10 -2
- {vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/chat_clients/openai_compatible_client.py +10 -2
- {vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/types/defaults.py +22 -0
- {vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/types/llm_parameters.py +1 -0
- {vectorvein-0.1.58 → vectorvein-0.1.60}/README.md +0 -0
- {vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/__init__.py +0 -0
- {vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/chat_clients/__init__.py +0 -0
- {vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/chat_clients/baichuan_client.py +0 -0
- {vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/chat_clients/deepseek_client.py +0 -0
- {vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/chat_clients/groq_client.py +0 -0
- {vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/chat_clients/local_client.py +0 -0
- {vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/chat_clients/mistral_client.py +0 -0
- {vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/chat_clients/moonshot_client.py +0 -0
- {vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/chat_clients/openai_client.py +0 -0
- {vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/chat_clients/py.typed +0 -0
- {vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/chat_clients/qwen_client.py +0 -0
- {vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/chat_clients/stepfun_client.py +0 -0
- {vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/chat_clients/utils.py +0 -0
- {vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/chat_clients/xai_client.py +0 -0
- {vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/chat_clients/yi_client.py +0 -0
- {vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/chat_clients/zhipuai_client.py +0 -0
- {vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/py.typed +0 -0
- {vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/server/token_server.py +0 -0
- {vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/settings/__init__.py +0 -0
- {vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/settings/py.typed +0 -0
- {vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/types/enums.py +0 -0
- {vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/types/exception.py +0 -0
- {vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/types/py.typed +0 -0
- {vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/utilities/media_processing.py +0 -0
- {vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/utilities/retry.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: vectorvein
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.60
|
4
4
|
Summary: VectorVein python SDK
|
5
5
|
Author-Email: Anderson <andersonby@163.com>
|
6
6
|
License: MIT
|
@@ -8,7 +8,7 @@ Requires-Python: >=3.10
|
|
8
8
|
Requires-Dist: openai>=1.37.1
|
9
9
|
Requires-Dist: tiktoken>=0.7.0
|
10
10
|
Requires-Dist: httpx>=0.27.0
|
11
|
-
Requires-Dist: anthropic[vertex]>=0.31.2
|
11
|
+
Requires-Dist: anthropic[bedrock,vertex]>=0.31.2
|
12
12
|
Requires-Dist: pydantic>=2.8.2
|
13
13
|
Requires-Dist: Pillow>=10.4.0
|
14
14
|
Requires-Dist: deepseek-tokenizer>=0.1.0
|
@@ -6,7 +6,7 @@ dependencies = [
|
|
6
6
|
"openai>=1.37.1",
|
7
7
|
"tiktoken>=0.7.0",
|
8
8
|
"httpx>=0.27.0",
|
9
|
-
"anthropic[vertex]>=0.31.2",
|
9
|
+
"anthropic[vertex,bedrock]>=0.31.2",
|
10
10
|
"pydantic>=2.8.2",
|
11
11
|
"Pillow>=10.4.0",
|
12
12
|
"deepseek-tokenizer>=0.1.0",
|
@@ -17,7 +17,7 @@ description = "VectorVein python SDK"
|
|
17
17
|
name = "vectorvein"
|
18
18
|
readme = "README.md"
|
19
19
|
requires-python = ">=3.10"
|
20
|
-
version = "0.1.
|
20
|
+
version = "0.1.60"
|
21
21
|
|
22
22
|
[project.license]
|
23
23
|
text = "MIT"
|
@@ -8,7 +8,14 @@ from typing import overload, Generator, AsyncGenerator, Any, Literal, Iterable
|
|
8
8
|
import httpx
|
9
9
|
from openai._types import NotGiven as OpenAINotGiven
|
10
10
|
from openai._types import NOT_GIVEN as OPENAI_NOT_GIVEN
|
11
|
-
from anthropic import
|
11
|
+
from anthropic import (
|
12
|
+
Anthropic,
|
13
|
+
AnthropicVertex,
|
14
|
+
AsyncAnthropic,
|
15
|
+
AsyncAnthropicVertex,
|
16
|
+
AnthropicBedrock,
|
17
|
+
AsyncAnthropicBedrock,
|
18
|
+
)
|
12
19
|
from anthropic._types import NOT_GIVEN
|
13
20
|
from anthropic.types import (
|
14
21
|
TextBlock,
|
@@ -219,6 +226,15 @@ class AnthropicChatClient(BaseChatClient):
|
|
219
226
|
access_token=self.creds.token,
|
220
227
|
http_client=self.http_client,
|
221
228
|
)
|
229
|
+
elif self.endpoint.is_bedrock:
|
230
|
+
if self.endpoint.credentials is None:
|
231
|
+
raise ValueError("Anthropic Bedrock endpoint requires credentials")
|
232
|
+
return AnthropicBedrock(
|
233
|
+
aws_access_key=self.endpoint.credentials.get("access_key"),
|
234
|
+
aws_secret_key=self.endpoint.credentials.get("secret_key"),
|
235
|
+
aws_region=self.endpoint.region,
|
236
|
+
http_client=self.http_client,
|
237
|
+
)
|
222
238
|
elif self.endpoint.api_schema_type == "default":
|
223
239
|
return Anthropic(
|
224
240
|
api_key=self.endpoint.api_key,
|
@@ -250,6 +266,7 @@ class AnthropicChatClient(BaseChatClient):
|
|
250
266
|
response_format: dict | None = None,
|
251
267
|
stream_options: ChatCompletionStreamOptionsParam | None = None,
|
252
268
|
top_p: float | NotGiven | None = NOT_GIVEN,
|
269
|
+
skip_cutoff: bool = False,
|
253
270
|
**kwargs,
|
254
271
|
) -> ChatCompletionMessage:
|
255
272
|
pass
|
@@ -267,6 +284,7 @@ class AnthropicChatClient(BaseChatClient):
|
|
267
284
|
response_format: dict | None = None,
|
268
285
|
stream_options: ChatCompletionStreamOptionsParam | None = None,
|
269
286
|
top_p: float | NotGiven | None = NOT_GIVEN,
|
287
|
+
skip_cutoff: bool = False,
|
270
288
|
**kwargs,
|
271
289
|
) -> Generator[ChatCompletionDeltaMessage, None, None]:
|
272
290
|
pass
|
@@ -284,6 +302,7 @@ class AnthropicChatClient(BaseChatClient):
|
|
284
302
|
response_format: dict | None = None,
|
285
303
|
stream_options: ChatCompletionStreamOptionsParam | None = None,
|
286
304
|
top_p: float | NotGiven | None = NOT_GIVEN,
|
305
|
+
skip_cutoff: bool = False,
|
287
306
|
**kwargs,
|
288
307
|
) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
|
289
308
|
pass
|
@@ -300,6 +319,7 @@ class AnthropicChatClient(BaseChatClient):
|
|
300
319
|
response_format: dict | None = None,
|
301
320
|
stream_options: ChatCompletionStreamOptionsParam | None = None,
|
302
321
|
top_p: float | NotGiven | None = NOT_GIVEN,
|
322
|
+
skip_cutoff: bool = False,
|
303
323
|
**kwargs,
|
304
324
|
):
|
305
325
|
if model is not None:
|
@@ -349,6 +369,7 @@ class AnthropicChatClient(BaseChatClient):
|
|
349
369
|
response_format=response_format,
|
350
370
|
stream_options=stream_options,
|
351
371
|
top_p=top_p,
|
372
|
+
skip_cutoff=skip_cutoff,
|
352
373
|
**kwargs,
|
353
374
|
)
|
354
375
|
for chunk in response:
|
@@ -374,8 +395,8 @@ class AnthropicChatClient(BaseChatClient):
|
|
374
395
|
tools=_tools,
|
375
396
|
tool_choice=_tool_choice,
|
376
397
|
response_format=response_format,
|
377
|
-
stream_options=stream_options,
|
378
398
|
top_p=top_p,
|
399
|
+
skip_cutoff=skip_cutoff,
|
379
400
|
**kwargs,
|
380
401
|
)
|
381
402
|
|
@@ -399,7 +420,7 @@ class AnthropicChatClient(BaseChatClient):
|
|
399
420
|
else:
|
400
421
|
system_prompt = ""
|
401
422
|
|
402
|
-
if self.context_length_control == ContextLengthControlType.Latest:
|
423
|
+
if not skip_cutoff and self.context_length_control == ContextLengthControlType.Latest:
|
403
424
|
messages = cutoff_messages(
|
404
425
|
messages,
|
405
426
|
max_count=self.model_setting.context_length,
|
@@ -595,6 +616,15 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
|
|
595
616
|
access_token=self.creds.token,
|
596
617
|
http_client=self.http_client,
|
597
618
|
)
|
619
|
+
elif self.endpoint.is_bedrock:
|
620
|
+
if self.endpoint.credentials is None:
|
621
|
+
raise ValueError("Anthropic Bedrock endpoint requires credentials")
|
622
|
+
return AsyncAnthropicBedrock(
|
623
|
+
aws_access_key=self.endpoint.credentials.get("aws_access_key"),
|
624
|
+
aws_secret_key=self.endpoint.credentials.get("aws_secret_key"),
|
625
|
+
aws_region=self.endpoint.region,
|
626
|
+
http_client=self.http_client,
|
627
|
+
)
|
598
628
|
elif self.endpoint.api_schema_type == "default":
|
599
629
|
return AsyncAnthropic(
|
600
630
|
api_key=self.endpoint.api_key,
|
@@ -626,6 +656,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
|
|
626
656
|
response_format: dict | None = None,
|
627
657
|
stream_options: ChatCompletionStreamOptionsParam | None = None,
|
628
658
|
top_p: float | NotGiven | None = NOT_GIVEN,
|
659
|
+
skip_cutoff: bool = False,
|
629
660
|
**kwargs,
|
630
661
|
) -> ChatCompletionMessage:
|
631
662
|
pass
|
@@ -643,6 +674,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
|
|
643
674
|
response_format: dict | None = None,
|
644
675
|
stream_options: ChatCompletionStreamOptionsParam | None = None,
|
645
676
|
top_p: float | NotGiven | None = NOT_GIVEN,
|
677
|
+
skip_cutoff: bool = False,
|
646
678
|
**kwargs,
|
647
679
|
) -> AsyncGenerator[ChatCompletionDeltaMessage, Any]:
|
648
680
|
pass
|
@@ -660,6 +692,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
|
|
660
692
|
response_format: dict | None = None,
|
661
693
|
stream_options: ChatCompletionStreamOptionsParam | None = None,
|
662
694
|
top_p: float | NotGiven | None = NOT_GIVEN,
|
695
|
+
skip_cutoff: bool = False,
|
663
696
|
**kwargs,
|
664
697
|
) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, Any]:
|
665
698
|
pass
|
@@ -676,6 +709,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
|
|
676
709
|
response_format: dict | None = None,
|
677
710
|
stream_options: ChatCompletionStreamOptionsParam | None = None,
|
678
711
|
top_p: float | NotGiven | None = NOT_GIVEN,
|
712
|
+
skip_cutoff: bool = False,
|
679
713
|
**kwargs,
|
680
714
|
):
|
681
715
|
if model is not None:
|
@@ -725,6 +759,8 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
|
|
725
759
|
tool_choice=_tool_choice,
|
726
760
|
response_format=response_format,
|
727
761
|
stream_options=stream_options,
|
762
|
+
top_p=top_p,
|
763
|
+
skip_cutoff=skip_cutoff,
|
728
764
|
**kwargs,
|
729
765
|
)
|
730
766
|
async for chunk in response:
|
@@ -751,6 +787,8 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
|
|
751
787
|
tools=_tools,
|
752
788
|
tool_choice=_tool_choice,
|
753
789
|
response_format=response_format,
|
790
|
+
top_p=top_p,
|
791
|
+
skip_cutoff=skip_cutoff,
|
754
792
|
**kwargs,
|
755
793
|
)
|
756
794
|
|
@@ -774,7 +812,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
|
|
774
812
|
else:
|
775
813
|
system_prompt = ""
|
776
814
|
|
777
|
-
if self.context_length_control == ContextLengthControlType.Latest:
|
815
|
+
if not skip_cutoff and self.context_length_control == ContextLengthControlType.Latest:
|
778
816
|
messages = cutoff_messages(
|
779
817
|
messages,
|
780
818
|
max_count=self.model_setting.context_length,
|
@@ -6,7 +6,14 @@ from typing import Generator, AsyncGenerator, Any, overload, Literal, Iterable
|
|
6
6
|
|
7
7
|
import httpx
|
8
8
|
from openai import OpenAI, AsyncOpenAI, AzureOpenAI, AsyncAzureOpenAI
|
9
|
-
from anthropic import
|
9
|
+
from anthropic import (
|
10
|
+
Anthropic,
|
11
|
+
AnthropicVertex,
|
12
|
+
AsyncAnthropic,
|
13
|
+
AsyncAnthropicVertex,
|
14
|
+
AnthropicBedrock,
|
15
|
+
AsyncAnthropicBedrock,
|
16
|
+
)
|
10
17
|
|
11
18
|
from ..settings import settings
|
12
19
|
from ..types import defaults as defs
|
@@ -57,7 +64,9 @@ class BaseChatClient(ABC):
|
|
57
64
|
|
58
65
|
@cached_property
|
59
66
|
@abstractmethod
|
60
|
-
def raw_client(
|
67
|
+
def raw_client(
|
68
|
+
self,
|
69
|
+
) -> OpenAI | AzureOpenAI | Anthropic | AnthropicVertex | AnthropicBedrock | httpx.Client | None:
|
61
70
|
pass
|
62
71
|
|
63
72
|
@overload
|
@@ -74,6 +83,7 @@ class BaseChatClient(ABC):
|
|
74
83
|
response_format: dict | None = None,
|
75
84
|
stream_options: ChatCompletionStreamOptionsParam | None = None,
|
76
85
|
top_p: float | NotGiven | None = NOT_GIVEN,
|
86
|
+
skip_cutoff: bool = False,
|
77
87
|
**kwargs,
|
78
88
|
) -> ChatCompletionMessage:
|
79
89
|
pass
|
@@ -92,6 +102,7 @@ class BaseChatClient(ABC):
|
|
92
102
|
response_format: dict | None = None,
|
93
103
|
stream_options: ChatCompletionStreamOptionsParam | None = None,
|
94
104
|
top_p: float | NotGiven | None = NOT_GIVEN,
|
105
|
+
skip_cutoff: bool = False,
|
95
106
|
**kwargs,
|
96
107
|
) -> Generator[ChatCompletionDeltaMessage, Any, None]:
|
97
108
|
pass
|
@@ -110,6 +121,7 @@ class BaseChatClient(ABC):
|
|
110
121
|
response_format: dict | None = None,
|
111
122
|
stream_options: ChatCompletionStreamOptionsParam | None = None,
|
112
123
|
top_p: float | NotGiven | None = NOT_GIVEN,
|
124
|
+
skip_cutoff: bool = False,
|
113
125
|
**kwargs,
|
114
126
|
) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
|
115
127
|
pass
|
@@ -127,6 +139,7 @@ class BaseChatClient(ABC):
|
|
127
139
|
response_format: dict | None = None,
|
128
140
|
stream_options: ChatCompletionStreamOptionsParam | None = None,
|
129
141
|
top_p: float | NotGiven | None = NOT_GIVEN,
|
142
|
+
skip_cutoff: bool = False,
|
130
143
|
**kwargs,
|
131
144
|
) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
|
132
145
|
pass
|
@@ -195,7 +208,15 @@ class BaseAsyncChatClient(ABC):
|
|
195
208
|
@abstractmethod
|
196
209
|
def raw_client(
|
197
210
|
self,
|
198
|
-
) ->
|
211
|
+
) -> (
|
212
|
+
AsyncOpenAI
|
213
|
+
| AsyncAzureOpenAI
|
214
|
+
| AsyncAnthropic
|
215
|
+
| AsyncAnthropicVertex
|
216
|
+
| AsyncAnthropicBedrock
|
217
|
+
| httpx.AsyncClient
|
218
|
+
| None
|
219
|
+
):
|
199
220
|
pass
|
200
221
|
|
201
222
|
@overload
|
@@ -212,6 +233,7 @@ class BaseAsyncChatClient(ABC):
|
|
212
233
|
response_format: dict | None = None,
|
213
234
|
stream_options: ChatCompletionStreamOptionsParam | None = None,
|
214
235
|
top_p: float | NotGiven | None = NOT_GIVEN,
|
236
|
+
skip_cutoff: bool = False,
|
215
237
|
**kwargs,
|
216
238
|
) -> ChatCompletionMessage:
|
217
239
|
pass
|
@@ -230,6 +252,7 @@ class BaseAsyncChatClient(ABC):
|
|
230
252
|
response_format: dict | None = None,
|
231
253
|
stream_options: ChatCompletionStreamOptionsParam | None = None,
|
232
254
|
top_p: float | NotGiven | None = NOT_GIVEN,
|
255
|
+
skip_cutoff: bool = False,
|
233
256
|
**kwargs,
|
234
257
|
) -> AsyncGenerator[ChatCompletionDeltaMessage, None]:
|
235
258
|
pass
|
@@ -248,6 +271,7 @@ class BaseAsyncChatClient(ABC):
|
|
248
271
|
response_format: dict | None = None,
|
249
272
|
stream_options: ChatCompletionStreamOptionsParam | None = None,
|
250
273
|
top_p: float | NotGiven | None = NOT_GIVEN,
|
274
|
+
skip_cutoff: bool = False,
|
251
275
|
**kwargs,
|
252
276
|
) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, None]:
|
253
277
|
pass
|
@@ -265,6 +289,7 @@ class BaseAsyncChatClient(ABC):
|
|
265
289
|
response_format: dict | None = None,
|
266
290
|
stream_options: ChatCompletionStreamOptionsParam | None = None,
|
267
291
|
top_p: float | NotGiven | None = NOT_GIVEN,
|
292
|
+
skip_cutoff: bool = False,
|
268
293
|
**kwargs,
|
269
294
|
) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, None]:
|
270
295
|
pass
|
@@ -66,6 +66,7 @@ class GeminiChatClient(BaseChatClient):
|
|
66
66
|
response_format: dict | None = None,
|
67
67
|
stream_options: ChatCompletionStreamOptionsParam | None = None,
|
68
68
|
top_p: float | NotGiven | None = NOT_GIVEN,
|
69
|
+
skip_cutoff: bool = False,
|
69
70
|
**kwargs,
|
70
71
|
) -> ChatCompletionMessage:
|
71
72
|
pass
|
@@ -83,6 +84,7 @@ class GeminiChatClient(BaseChatClient):
|
|
83
84
|
response_format: dict | None = None,
|
84
85
|
stream_options: ChatCompletionStreamOptionsParam | None = None,
|
85
86
|
top_p: float | NotGiven | None = NOT_GIVEN,
|
87
|
+
skip_cutoff: bool = False,
|
86
88
|
**kwargs,
|
87
89
|
) -> Generator[ChatCompletionDeltaMessage, None, None]:
|
88
90
|
pass
|
@@ -100,6 +102,7 @@ class GeminiChatClient(BaseChatClient):
|
|
100
102
|
response_format: dict | None = None,
|
101
103
|
stream_options: ChatCompletionStreamOptionsParam | None = None,
|
102
104
|
top_p: float | NotGiven | None = NOT_GIVEN,
|
105
|
+
skip_cutoff: bool = False,
|
103
106
|
**kwargs,
|
104
107
|
) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
|
105
108
|
pass
|
@@ -116,6 +119,7 @@ class GeminiChatClient(BaseChatClient):
|
|
116
119
|
response_format: dict | None = None,
|
117
120
|
stream_options: ChatCompletionStreamOptionsParam | None = None,
|
118
121
|
top_p: float | NotGiven | None = NOT_GIVEN,
|
122
|
+
skip_cutoff: bool = False,
|
119
123
|
**kwargs,
|
120
124
|
):
|
121
125
|
if model is not None:
|
@@ -133,7 +137,7 @@ class GeminiChatClient(BaseChatClient):
|
|
133
137
|
else:
|
134
138
|
system_prompt = ""
|
135
139
|
|
136
|
-
if self.context_length_control == ContextLengthControlType.Latest:
|
140
|
+
if not skip_cutoff and self.context_length_control == ContextLengthControlType.Latest:
|
137
141
|
messages = cutoff_messages(
|
138
142
|
messages,
|
139
143
|
max_count=self.model_setting.context_length,
|
@@ -313,6 +317,7 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
|
|
313
317
|
response_format: dict | None = None,
|
314
318
|
stream_options: ChatCompletionStreamOptionsParam | None = None,
|
315
319
|
top_p: float | NotGiven | None = NOT_GIVEN,
|
320
|
+
skip_cutoff: bool = False,
|
316
321
|
**kwargs,
|
317
322
|
) -> ChatCompletionMessage:
|
318
323
|
pass
|
@@ -330,6 +335,7 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
|
|
330
335
|
response_format: dict | None = None,
|
331
336
|
stream_options: ChatCompletionStreamOptionsParam | None = None,
|
332
337
|
top_p: float | NotGiven | None = NOT_GIVEN,
|
338
|
+
skip_cutoff: bool = False,
|
333
339
|
**kwargs,
|
334
340
|
) -> AsyncGenerator[ChatCompletionDeltaMessage, Any]:
|
335
341
|
pass
|
@@ -347,6 +353,7 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
|
|
347
353
|
response_format: dict | None = None,
|
348
354
|
stream_options: ChatCompletionStreamOptionsParam | None = None,
|
349
355
|
top_p: float | NotGiven | None = NOT_GIVEN,
|
356
|
+
skip_cutoff: bool = False,
|
350
357
|
**kwargs,
|
351
358
|
) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, Any]:
|
352
359
|
pass
|
@@ -363,6 +370,7 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
|
|
363
370
|
response_format: dict | None = None,
|
364
371
|
stream_options: ChatCompletionStreamOptionsParam | None = None,
|
365
372
|
top_p: float | NotGiven | None = NOT_GIVEN,
|
373
|
+
skip_cutoff: bool = False,
|
366
374
|
**kwargs,
|
367
375
|
):
|
368
376
|
if model is not None:
|
@@ -380,7 +388,7 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
|
|
380
388
|
else:
|
381
389
|
system_prompt = ""
|
382
390
|
|
383
|
-
if self.context_length_control == ContextLengthControlType.Latest:
|
391
|
+
if not skip_cutoff and self.context_length_control == ContextLengthControlType.Latest:
|
384
392
|
messages = cutoff_messages(
|
385
393
|
messages,
|
386
394
|
max_count=self.model_setting.context_length,
|
@@ -92,6 +92,7 @@ class MiniMaxChatClient(BaseChatClient):
|
|
92
92
|
response_format: dict | None = None,
|
93
93
|
stream_options: ChatCompletionStreamOptionsParam | None = None,
|
94
94
|
top_p: float | NotGiven | None = NOT_GIVEN,
|
95
|
+
skip_cutoff: bool = False,
|
95
96
|
**kwargs,
|
96
97
|
) -> ChatCompletionMessage:
|
97
98
|
pass
|
@@ -109,6 +110,7 @@ class MiniMaxChatClient(BaseChatClient):
|
|
109
110
|
response_format: dict | None = None,
|
110
111
|
stream_options: ChatCompletionStreamOptionsParam | None = None,
|
111
112
|
top_p: float | NotGiven | None = NOT_GIVEN,
|
113
|
+
skip_cutoff: bool = False,
|
112
114
|
**kwargs,
|
113
115
|
) -> Generator[ChatCompletionDeltaMessage, None, None]:
|
114
116
|
pass
|
@@ -126,6 +128,7 @@ class MiniMaxChatClient(BaseChatClient):
|
|
126
128
|
response_format: dict | None = None,
|
127
129
|
stream_options: ChatCompletionStreamOptionsParam | None = None,
|
128
130
|
top_p: float | NotGiven | None = NOT_GIVEN,
|
131
|
+
skip_cutoff: bool = False,
|
129
132
|
**kwargs,
|
130
133
|
) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
|
131
134
|
pass
|
@@ -142,6 +145,7 @@ class MiniMaxChatClient(BaseChatClient):
|
|
142
145
|
response_format: dict | None = None,
|
143
146
|
stream_options: ChatCompletionStreamOptionsParam | None = None,
|
144
147
|
top_p: float | NotGiven | None = NOT_GIVEN,
|
148
|
+
skip_cutoff: bool = False,
|
145
149
|
**kwargs,
|
146
150
|
):
|
147
151
|
if model is not None:
|
@@ -166,7 +170,7 @@ class MiniMaxChatClient(BaseChatClient):
|
|
166
170
|
self.endpoint_id = endpoint_choice
|
167
171
|
self.endpoint = settings.get_endpoint(self.endpoint_id)
|
168
172
|
|
169
|
-
if self.context_length_control == ContextLengthControlType.Latest:
|
173
|
+
if not skip_cutoff and self.context_length_control == ContextLengthControlType.Latest:
|
170
174
|
messages = cutoff_messages(
|
171
175
|
messages,
|
172
176
|
max_count=self.model_setting.context_length,
|
@@ -337,6 +341,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
|
|
337
341
|
response_format: dict | None = None,
|
338
342
|
stream_options: ChatCompletionStreamOptionsParam | None = None,
|
339
343
|
top_p: float | NotGiven | None = NOT_GIVEN,
|
344
|
+
skip_cutoff: bool = False,
|
340
345
|
**kwargs,
|
341
346
|
) -> ChatCompletionMessage:
|
342
347
|
pass
|
@@ -354,6 +359,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
|
|
354
359
|
response_format: dict | None = None,
|
355
360
|
stream_options: ChatCompletionStreamOptionsParam | None = None,
|
356
361
|
top_p: float | NotGiven | None = NOT_GIVEN,
|
362
|
+
skip_cutoff: bool = False,
|
357
363
|
**kwargs,
|
358
364
|
) -> AsyncGenerator[ChatCompletionDeltaMessage, Any]:
|
359
365
|
pass
|
@@ -371,6 +377,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
|
|
371
377
|
response_format: dict | None = None,
|
372
378
|
stream_options: ChatCompletionStreamOptionsParam | None = None,
|
373
379
|
top_p: float | NotGiven | None = NOT_GIVEN,
|
380
|
+
skip_cutoff: bool = False,
|
374
381
|
**kwargs,
|
375
382
|
) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, Any]:
|
376
383
|
pass
|
@@ -387,6 +394,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
|
|
387
394
|
response_format: dict | None = None,
|
388
395
|
stream_options: ChatCompletionStreamOptionsParam | None = None,
|
389
396
|
top_p: float | NotGiven | None = NOT_GIVEN,
|
397
|
+
skip_cutoff: bool = False,
|
390
398
|
**kwargs,
|
391
399
|
):
|
392
400
|
if model is not None:
|
@@ -411,7 +419,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
|
|
411
419
|
self.endpoint_id = endpoint_choice
|
412
420
|
self.endpoint = settings.get_endpoint(self.endpoint_id)
|
413
421
|
|
414
|
-
if self.context_length_control == ContextLengthControlType.Latest:
|
422
|
+
if not skip_cutoff and self.context_length_control == ContextLengthControlType.Latest:
|
415
423
|
messages = cutoff_messages(
|
416
424
|
messages,
|
417
425
|
max_count=self.model_setting.context_length,
|
{vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/chat_clients/openai_compatible_client.py
RENAMED
@@ -99,6 +99,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
|
|
99
99
|
response_format: dict | None = None,
|
100
100
|
stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
|
101
101
|
top_p: float | NotGiven | None = NOT_GIVEN,
|
102
|
+
skip_cutoff: bool = False,
|
102
103
|
**kwargs,
|
103
104
|
) -> ChatCompletionMessage:
|
104
105
|
pass
|
@@ -116,6 +117,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
|
|
116
117
|
response_format: dict | None = None,
|
117
118
|
stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
|
118
119
|
top_p: float | NotGiven | None = NOT_GIVEN,
|
120
|
+
skip_cutoff: bool = False,
|
119
121
|
**kwargs,
|
120
122
|
) -> Generator[ChatCompletionDeltaMessage, None, None]:
|
121
123
|
pass
|
@@ -133,6 +135,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
|
|
133
135
|
response_format: dict | None = None,
|
134
136
|
stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
|
135
137
|
top_p: float | NotGiven | None = NOT_GIVEN,
|
138
|
+
skip_cutoff: bool = False,
|
136
139
|
**kwargs,
|
137
140
|
) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
|
138
141
|
pass
|
@@ -149,6 +152,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
|
|
149
152
|
response_format: dict | None = None,
|
150
153
|
stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
|
151
154
|
top_p: float | NotGiven | None = NOT_GIVEN,
|
155
|
+
skip_cutoff: bool = False,
|
152
156
|
**kwargs,
|
153
157
|
):
|
154
158
|
if model is not None:
|
@@ -165,7 +169,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
|
|
165
169
|
if self.model_id is None:
|
166
170
|
self.model_id = self.model_setting.id
|
167
171
|
|
168
|
-
if self.context_length_control == ContextLengthControlType.Latest:
|
172
|
+
if not skip_cutoff and self.context_length_control == ContextLengthControlType.Latest:
|
169
173
|
messages = cutoff_messages(
|
170
174
|
messages,
|
171
175
|
max_count=self.model_setting.context_length,
|
@@ -361,6 +365,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
|
|
361
365
|
response_format: dict | None = None,
|
362
366
|
stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
|
363
367
|
top_p: float | NotGiven | None = NOT_GIVEN,
|
368
|
+
skip_cutoff: bool = False,
|
364
369
|
**kwargs,
|
365
370
|
) -> ChatCompletionMessage:
|
366
371
|
pass
|
@@ -378,6 +383,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
|
|
378
383
|
response_format: dict | None = None,
|
379
384
|
stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
|
380
385
|
top_p: float | NotGiven | None = NOT_GIVEN,
|
386
|
+
skip_cutoff: bool = False,
|
381
387
|
**kwargs,
|
382
388
|
) -> AsyncGenerator[ChatCompletionDeltaMessage, Any]:
|
383
389
|
pass
|
@@ -395,6 +401,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
|
|
395
401
|
response_format: dict | None = None,
|
396
402
|
stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
|
397
403
|
top_p: float | NotGiven | None = NOT_GIVEN,
|
404
|
+
skip_cutoff: bool = False,
|
398
405
|
**kwargs,
|
399
406
|
) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, Any]:
|
400
407
|
pass
|
@@ -411,6 +418,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
|
|
411
418
|
response_format: dict | None = None,
|
412
419
|
stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
|
413
420
|
top_p: float | NotGiven | None = NOT_GIVEN,
|
421
|
+
skip_cutoff: bool = False,
|
414
422
|
**kwargs,
|
415
423
|
):
|
416
424
|
if model is not None:
|
@@ -427,7 +435,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
|
|
427
435
|
if self.model_id is None:
|
428
436
|
self.model_id = self.model_setting.id
|
429
437
|
|
430
|
-
if self.context_length_control == ContextLengthControlType.Latest:
|
438
|
+
if not skip_cutoff and self.context_length_control == ContextLengthControlType.Latest:
|
431
439
|
messages = cutoff_messages(
|
432
440
|
messages,
|
433
441
|
max_count=self.model_setting.context_length,
|
@@ -231,6 +231,20 @@ QWEN_MODELS: Final[Dict[str, Dict[str, Any]]] = {
|
|
231
231
|
"function_call_available": False,
|
232
232
|
"response_format_available": True,
|
233
233
|
},
|
234
|
+
"qwen2.5-coder-32b-instruct": {
|
235
|
+
"id": "qwen2.5-coder-32b-instruct",
|
236
|
+
"context_length": 30000,
|
237
|
+
"max_output_tokens": 4096,
|
238
|
+
"function_call_available": False,
|
239
|
+
"response_format_available": False,
|
240
|
+
},
|
241
|
+
"qwq-32b-preview": {
|
242
|
+
"id": "qwq-32b-preview",
|
243
|
+
"context_length": 30000,
|
244
|
+
"max_output_tokens": 4096,
|
245
|
+
"function_call_available": False,
|
246
|
+
"response_format_available": False,
|
247
|
+
},
|
234
248
|
"qwen2.5-72b-instruct": {
|
235
249
|
"id": "qwen2.5-72b-instruct",
|
236
250
|
"context_length": 131072,
|
@@ -238,6 +252,14 @@ QWEN_MODELS: Final[Dict[str, Dict[str, Any]]] = {
|
|
238
252
|
"function_call_available": False,
|
239
253
|
"response_format_available": True,
|
240
254
|
},
|
255
|
+
"qwen2-vl-72b-instruct": {
|
256
|
+
"id": "qwen2-vl-72b-instruct",
|
257
|
+
"context_length": 131072,
|
258
|
+
"max_output_tokens": 8192,
|
259
|
+
"function_call_available": False,
|
260
|
+
"response_format_available": False,
|
261
|
+
"native_multimodal": True,
|
262
|
+
},
|
241
263
|
"qwen-max": {
|
242
264
|
"id": "qwen-max",
|
243
265
|
"context_length": 8096,
|
@@ -37,6 +37,7 @@ class EndpointSetting(BaseModel):
|
|
37
37
|
credentials: Optional[dict] = Field(None, description="Additional credentials if needed.")
|
38
38
|
is_azure: bool = Field(False, description="Indicates if the endpoint is for Azure.")
|
39
39
|
is_vertex: bool = Field(False, description="Indicates if the endpoint is for Vertex.")
|
40
|
+
is_bedrock: bool = Field(False, description="Indicates if the endpoint is for Bedrock.")
|
40
41
|
rpm: int = Field(description="Requests per minute.", default=defs.ENDPOINT_RPM)
|
41
42
|
tpm: int = Field(description="Tokens per minute.", default=defs.ENDPOINT_TPM)
|
42
43
|
concurrent_requests: int = Field(
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|