vectorvein 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vectorvein/api/client.py +81 -103
- vectorvein/api/exceptions.py +1 -3
- vectorvein/api/models.py +11 -11
- vectorvein/chat_clients/anthropic_client.py +157 -169
- vectorvein/chat_clients/base_client.py +257 -198
- vectorvein/chat_clients/openai_compatible_client.py +150 -161
- vectorvein/chat_clients/utils.py +44 -24
- vectorvein/server/token_server.py +1 -1
- vectorvein/settings/__init__.py +27 -27
- vectorvein/types/defaults.py +32 -16
- vectorvein/types/llm_parameters.py +40 -34
- vectorvein/types/settings.py +10 -10
- vectorvein/utilities/media_processing.py +1 -1
- vectorvein/utilities/rate_limiter.py +5 -6
- vectorvein/utilities/retry.py +6 -5
- vectorvein/workflow/graph/edge.py +3 -3
- vectorvein/workflow/graph/node.py +14 -26
- vectorvein/workflow/graph/port.py +40 -39
- vectorvein/workflow/graph/workflow.py +13 -25
- vectorvein/workflow/nodes/audio_generation.py +5 -7
- vectorvein/workflow/nodes/control_flows.py +7 -9
- vectorvein/workflow/nodes/file_processing.py +4 -6
- vectorvein/workflow/nodes/image_generation.py +20 -22
- vectorvein/workflow/nodes/llms.py +13 -15
- vectorvein/workflow/nodes/media_editing.py +26 -40
- vectorvein/workflow/nodes/media_processing.py +19 -21
- vectorvein/workflow/nodes/output.py +10 -12
- vectorvein/workflow/nodes/relational_db.py +3 -5
- vectorvein/workflow/nodes/text_processing.py +8 -10
- vectorvein/workflow/nodes/tools.py +8 -10
- vectorvein/workflow/nodes/triggers.py +1 -3
- vectorvein/workflow/nodes/vector_db.py +3 -5
- vectorvein/workflow/nodes/video_generation.py +4 -6
- vectorvein/workflow/nodes/web_crawlers.py +4 -6
- vectorvein/workflow/utils/analyse.py +5 -13
- vectorvein/workflow/utils/check.py +6 -16
- vectorvein/workflow/utils/json_to_code.py +6 -14
- vectorvein/workflow/utils/layout.py +3 -5
- {vectorvein-0.3.1.dist-info → vectorvein-0.3.3.dist-info}/METADATA +1 -1
- vectorvein-0.3.3.dist-info/RECORD +68 -0
- {vectorvein-0.3.1.dist-info → vectorvein-0.3.3.dist-info}/WHEEL +1 -1
- vectorvein-0.3.1.dist-info/RECORD +0 -68
- {vectorvein-0.3.1.dist-info → vectorvein-0.3.3.dist-info}/entry_points.txt +0 -0
@@ -3,19 +3,8 @@
|
|
3
3
|
import re
|
4
4
|
import json
|
5
5
|
from functools import cached_property
|
6
|
-
from
|
7
|
-
|
8
|
-
Dict,
|
9
|
-
List,
|
10
|
-
TYPE_CHECKING,
|
11
|
-
overload,
|
12
|
-
Generator,
|
13
|
-
AsyncGenerator,
|
14
|
-
Union,
|
15
|
-
Literal,
|
16
|
-
Iterable,
|
17
|
-
Optional,
|
18
|
-
)
|
6
|
+
from collections.abc import Generator, AsyncGenerator, Iterable
|
7
|
+
from typing import Any, TYPE_CHECKING, overload, Literal
|
19
8
|
|
20
9
|
import httpx
|
21
10
|
from openai import OpenAI, AsyncOpenAI, AzureOpenAI, AsyncAzureOpenAI
|
@@ -117,31 +106,31 @@ class OpenAICompatibleChatClient(BaseChatClient):
|
|
117
106
|
model: str | None = None,
|
118
107
|
stream: Literal[False] = False,
|
119
108
|
temperature: float | None | NotGiven = NOT_GIVEN,
|
120
|
-
max_tokens: int | None | NotGiven =
|
109
|
+
max_tokens: int | None | NotGiven = None,
|
121
110
|
tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
|
122
111
|
tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
|
123
112
|
response_format: ResponseFormat | NotGiven = NOT_GIVEN,
|
124
|
-
stream_options: ChatCompletionStreamOptionsParam | None
|
113
|
+
stream_options: ChatCompletionStreamOptionsParam | None = None,
|
125
114
|
top_p: float | NotGiven | None = NOT_GIVEN,
|
126
115
|
skip_cutoff: bool = False,
|
127
|
-
audio:
|
128
|
-
frequency_penalty:
|
129
|
-
logit_bias:
|
130
|
-
logprobs:
|
131
|
-
max_completion_tokens:
|
132
|
-
metadata:
|
133
|
-
modalities:
|
134
|
-
n:
|
116
|
+
audio: ChatCompletionAudioParam | OpenAINotGiven | None = NOT_GIVEN,
|
117
|
+
frequency_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
|
118
|
+
logit_bias: dict[str, int] | OpenAINotGiven | None = NOT_GIVEN,
|
119
|
+
logprobs: bool | OpenAINotGiven | None = NOT_GIVEN,
|
120
|
+
max_completion_tokens: int | OpenAINotGiven | None = NOT_GIVEN,
|
121
|
+
metadata: Metadata | OpenAINotGiven | None = NOT_GIVEN,
|
122
|
+
modalities: list[ChatCompletionModality] | OpenAINotGiven | None = NOT_GIVEN,
|
123
|
+
n: int | OpenAINotGiven | None = NOT_GIVEN,
|
135
124
|
parallel_tool_calls: bool | OpenAINotGiven = NOT_GIVEN,
|
136
|
-
prediction:
|
137
|
-
presence_penalty:
|
138
|
-
reasoning_effort:
|
125
|
+
prediction: ChatCompletionPredictionContentParam | OpenAINotGiven | None = NOT_GIVEN,
|
126
|
+
presence_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
|
127
|
+
reasoning_effort: ChatCompletionReasoningEffort | OpenAINotGiven | None = NOT_GIVEN,
|
139
128
|
thinking: ThinkingConfigParam | None | NotGiven = NOT_GIVEN,
|
140
|
-
seed:
|
141
|
-
service_tier:
|
142
|
-
stop:
|
143
|
-
store:
|
144
|
-
top_logprobs:
|
129
|
+
seed: int | OpenAINotGiven | None = NOT_GIVEN,
|
130
|
+
service_tier: Literal["auto", "default"] | OpenAINotGiven | None = NOT_GIVEN,
|
131
|
+
stop: str | list[str] | OpenAINotGiven | None = NOT_GIVEN,
|
132
|
+
store: bool | OpenAINotGiven | None = NOT_GIVEN,
|
133
|
+
top_logprobs: int | OpenAINotGiven | None = NOT_GIVEN,
|
145
134
|
user: str | OpenAINotGiven = NOT_GIVEN,
|
146
135
|
extra_headers: Headers | None = None,
|
147
136
|
extra_query: Query | None = None,
|
@@ -158,37 +147,37 @@ class OpenAICompatibleChatClient(BaseChatClient):
|
|
158
147
|
model: str | None = None,
|
159
148
|
stream: Literal[True],
|
160
149
|
temperature: float | None | NotGiven = NOT_GIVEN,
|
161
|
-
max_tokens: int | None | NotGiven =
|
150
|
+
max_tokens: int | None | NotGiven = None,
|
162
151
|
tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
|
163
152
|
tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
|
164
153
|
response_format: ResponseFormat | NotGiven = NOT_GIVEN,
|
165
|
-
stream_options: ChatCompletionStreamOptionsParam | None
|
154
|
+
stream_options: ChatCompletionStreamOptionsParam | None = None,
|
166
155
|
top_p: float | NotGiven | None = NOT_GIVEN,
|
167
156
|
skip_cutoff: bool = False,
|
168
|
-
audio:
|
169
|
-
frequency_penalty:
|
170
|
-
logit_bias:
|
171
|
-
logprobs:
|
172
|
-
max_completion_tokens:
|
173
|
-
metadata:
|
174
|
-
modalities:
|
175
|
-
n:
|
157
|
+
audio: ChatCompletionAudioParam | OpenAINotGiven | None = NOT_GIVEN,
|
158
|
+
frequency_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
|
159
|
+
logit_bias: dict[str, int] | OpenAINotGiven | None = NOT_GIVEN,
|
160
|
+
logprobs: bool | OpenAINotGiven | None = NOT_GIVEN,
|
161
|
+
max_completion_tokens: int | OpenAINotGiven | None = NOT_GIVEN,
|
162
|
+
metadata: Metadata | OpenAINotGiven | None = NOT_GIVEN,
|
163
|
+
modalities: list[ChatCompletionModality] | OpenAINotGiven | None = NOT_GIVEN,
|
164
|
+
n: int | OpenAINotGiven | None = NOT_GIVEN,
|
176
165
|
parallel_tool_calls: bool | OpenAINotGiven = NOT_GIVEN,
|
177
|
-
prediction:
|
178
|
-
presence_penalty:
|
179
|
-
reasoning_effort:
|
166
|
+
prediction: ChatCompletionPredictionContentParam | OpenAINotGiven | None = NOT_GIVEN,
|
167
|
+
presence_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
|
168
|
+
reasoning_effort: ChatCompletionReasoningEffort | OpenAINotGiven | None = NOT_GIVEN,
|
180
169
|
thinking: ThinkingConfigParam | None | NotGiven = NOT_GIVEN,
|
181
|
-
seed:
|
182
|
-
service_tier:
|
183
|
-
stop:
|
184
|
-
store:
|
185
|
-
top_logprobs:
|
170
|
+
seed: int | OpenAINotGiven | None = NOT_GIVEN,
|
171
|
+
service_tier: Literal["auto", "default"] | OpenAINotGiven | None = NOT_GIVEN,
|
172
|
+
stop: str | list[str] | OpenAINotGiven | None = NOT_GIVEN,
|
173
|
+
store: bool | OpenAINotGiven | None = NOT_GIVEN,
|
174
|
+
top_logprobs: int | OpenAINotGiven | None = NOT_GIVEN,
|
186
175
|
user: str | OpenAINotGiven = NOT_GIVEN,
|
187
176
|
extra_headers: Headers | None = None,
|
188
177
|
extra_query: Query | None = None,
|
189
178
|
extra_body: Body | None = None,
|
190
179
|
timeout: float | httpx.Timeout | None | OpenAINotGiven = NOT_GIVEN,
|
191
|
-
) -> Generator[ChatCompletionDeltaMessage,
|
180
|
+
) -> Generator[ChatCompletionDeltaMessage, Any, None]:
|
192
181
|
pass
|
193
182
|
|
194
183
|
@overload
|
@@ -199,31 +188,31 @@ class OpenAICompatibleChatClient(BaseChatClient):
|
|
199
188
|
model: str | None = None,
|
200
189
|
stream: bool,
|
201
190
|
temperature: float | None | NotGiven = NOT_GIVEN,
|
202
|
-
max_tokens: int | None | NotGiven =
|
191
|
+
max_tokens: int | None | NotGiven = None,
|
203
192
|
tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
|
204
193
|
tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
|
205
194
|
response_format: ResponseFormat | NotGiven = NOT_GIVEN,
|
206
|
-
stream_options: ChatCompletionStreamOptionsParam | None
|
195
|
+
stream_options: ChatCompletionStreamOptionsParam | None = None,
|
207
196
|
top_p: float | NotGiven | None = NOT_GIVEN,
|
208
197
|
skip_cutoff: bool = False,
|
209
|
-
audio:
|
210
|
-
frequency_penalty:
|
211
|
-
logit_bias:
|
212
|
-
logprobs:
|
213
|
-
max_completion_tokens:
|
214
|
-
metadata:
|
215
|
-
modalities:
|
216
|
-
n:
|
198
|
+
audio: ChatCompletionAudioParam | OpenAINotGiven | None = NOT_GIVEN,
|
199
|
+
frequency_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
|
200
|
+
logit_bias: dict[str, int] | OpenAINotGiven | None = NOT_GIVEN,
|
201
|
+
logprobs: bool | OpenAINotGiven | None = NOT_GIVEN,
|
202
|
+
max_completion_tokens: int | OpenAINotGiven | None = NOT_GIVEN,
|
203
|
+
metadata: Metadata | OpenAINotGiven | None = NOT_GIVEN,
|
204
|
+
modalities: list[ChatCompletionModality] | OpenAINotGiven | None = NOT_GIVEN,
|
205
|
+
n: int | OpenAINotGiven | None = NOT_GIVEN,
|
217
206
|
parallel_tool_calls: bool | OpenAINotGiven = NOT_GIVEN,
|
218
|
-
prediction:
|
219
|
-
presence_penalty:
|
220
|
-
reasoning_effort:
|
207
|
+
prediction: ChatCompletionPredictionContentParam | OpenAINotGiven | None = NOT_GIVEN,
|
208
|
+
presence_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
|
209
|
+
reasoning_effort: ChatCompletionReasoningEffort | OpenAINotGiven | None = NOT_GIVEN,
|
221
210
|
thinking: ThinkingConfigParam | None | NotGiven = NOT_GIVEN,
|
222
|
-
seed:
|
223
|
-
service_tier:
|
224
|
-
stop:
|
225
|
-
store:
|
226
|
-
top_logprobs:
|
211
|
+
seed: int | OpenAINotGiven | None = NOT_GIVEN,
|
212
|
+
service_tier: Literal["auto", "default"] | OpenAINotGiven | None = NOT_GIVEN,
|
213
|
+
stop: str | list[str] | OpenAINotGiven | None = NOT_GIVEN,
|
214
|
+
store: bool | OpenAINotGiven | None = NOT_GIVEN,
|
215
|
+
top_logprobs: int | OpenAINotGiven | None = NOT_GIVEN,
|
227
216
|
user: str | OpenAINotGiven = NOT_GIVEN,
|
228
217
|
extra_headers: Headers | None = None,
|
229
218
|
extra_query: Query | None = None,
|
@@ -239,31 +228,31 @@ class OpenAICompatibleChatClient(BaseChatClient):
|
|
239
228
|
model: str | None = None,
|
240
229
|
stream: Literal[False] | Literal[True] = False,
|
241
230
|
temperature: float | None | NotGiven = NOT_GIVEN,
|
242
|
-
max_tokens: int | None | NotGiven =
|
231
|
+
max_tokens: int | None | NotGiven = None,
|
243
232
|
tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
|
244
233
|
tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
|
245
234
|
response_format: ResponseFormat | NotGiven = NOT_GIVEN,
|
246
|
-
stream_options: ChatCompletionStreamOptionsParam | None
|
235
|
+
stream_options: ChatCompletionStreamOptionsParam | None = None,
|
247
236
|
top_p: float | NotGiven | None = NOT_GIVEN,
|
248
237
|
skip_cutoff: bool = False,
|
249
|
-
audio:
|
250
|
-
frequency_penalty:
|
251
|
-
logit_bias:
|
252
|
-
logprobs:
|
253
|
-
max_completion_tokens:
|
254
|
-
metadata:
|
255
|
-
modalities:
|
256
|
-
n:
|
238
|
+
audio: ChatCompletionAudioParam | OpenAINotGiven | None = NOT_GIVEN,
|
239
|
+
frequency_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
|
240
|
+
logit_bias: dict[str, int] | OpenAINotGiven | None = NOT_GIVEN,
|
241
|
+
logprobs: bool | OpenAINotGiven | None = NOT_GIVEN,
|
242
|
+
max_completion_tokens: int | OpenAINotGiven | None = NOT_GIVEN,
|
243
|
+
metadata: Metadata | OpenAINotGiven | None = NOT_GIVEN,
|
244
|
+
modalities: list[ChatCompletionModality] | OpenAINotGiven | None = NOT_GIVEN,
|
245
|
+
n: int | OpenAINotGiven | None = NOT_GIVEN,
|
257
246
|
parallel_tool_calls: bool | OpenAINotGiven = NOT_GIVEN,
|
258
|
-
prediction:
|
259
|
-
presence_penalty:
|
260
|
-
reasoning_effort:
|
247
|
+
prediction: ChatCompletionPredictionContentParam | OpenAINotGiven | None = NOT_GIVEN,
|
248
|
+
presence_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
|
249
|
+
reasoning_effort: ChatCompletionReasoningEffort | OpenAINotGiven | None = NOT_GIVEN,
|
261
250
|
thinking: ThinkingConfigParam | None | NotGiven = NOT_GIVEN,
|
262
|
-
seed:
|
263
|
-
service_tier:
|
264
|
-
stop:
|
265
|
-
store:
|
266
|
-
top_logprobs:
|
251
|
+
seed: int | OpenAINotGiven | None = NOT_GIVEN,
|
252
|
+
service_tier: Literal["auto", "default"] | OpenAINotGiven | None = NOT_GIVEN,
|
253
|
+
stop: str | list[str] | OpenAINotGiven | None = NOT_GIVEN,
|
254
|
+
store: bool | OpenAINotGiven | None = NOT_GIVEN,
|
255
|
+
top_logprobs: int | OpenAINotGiven | None = NOT_GIVEN,
|
267
256
|
user: str | OpenAINotGiven = NOT_GIVEN,
|
268
257
|
extra_headers: Headers | None = None,
|
269
258
|
extra_query: Query | None = None,
|
@@ -312,7 +301,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
|
|
312
301
|
},
|
313
302
|
}
|
314
303
|
)
|
315
|
-
tools_params =
|
304
|
+
tools_params = {"tools": _tools, "tool_choice": tool_choice}
|
316
305
|
else:
|
317
306
|
tools_str = json.dumps(tools, ensure_ascii=False, indent=None)
|
318
307
|
additional_system_prompt = generate_tool_use_system_prompt(tools=tools_str)
|
@@ -670,31 +659,31 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
|
|
670
659
|
model: str | None = None,
|
671
660
|
stream: Literal[False] = False,
|
672
661
|
temperature: float | None | NotGiven = NOT_GIVEN,
|
673
|
-
max_tokens: int | None | NotGiven =
|
662
|
+
max_tokens: int | None | NotGiven = None,
|
674
663
|
tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
|
675
664
|
tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
|
676
665
|
response_format: ResponseFormat | NotGiven = NOT_GIVEN,
|
677
|
-
stream_options: ChatCompletionStreamOptionsParam | None
|
666
|
+
stream_options: ChatCompletionStreamOptionsParam | None = None,
|
678
667
|
top_p: float | NotGiven | None = NOT_GIVEN,
|
679
668
|
skip_cutoff: bool = False,
|
680
|
-
audio:
|
681
|
-
frequency_penalty:
|
682
|
-
logit_bias:
|
683
|
-
logprobs:
|
684
|
-
max_completion_tokens:
|
685
|
-
metadata:
|
686
|
-
modalities:
|
687
|
-
n:
|
669
|
+
audio: ChatCompletionAudioParam | OpenAINotGiven | None = NOT_GIVEN,
|
670
|
+
frequency_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
|
671
|
+
logit_bias: dict[str, int] | OpenAINotGiven | None = NOT_GIVEN,
|
672
|
+
logprobs: bool | OpenAINotGiven | None = NOT_GIVEN,
|
673
|
+
max_completion_tokens: int | OpenAINotGiven | None = NOT_GIVEN,
|
674
|
+
metadata: Metadata | OpenAINotGiven | None = NOT_GIVEN,
|
675
|
+
modalities: list[ChatCompletionModality] | OpenAINotGiven | None = NOT_GIVEN,
|
676
|
+
n: int | OpenAINotGiven | None = NOT_GIVEN,
|
688
677
|
parallel_tool_calls: bool | OpenAINotGiven = NOT_GIVEN,
|
689
|
-
prediction:
|
690
|
-
presence_penalty:
|
691
|
-
reasoning_effort:
|
678
|
+
prediction: ChatCompletionPredictionContentParam | OpenAINotGiven | None = NOT_GIVEN,
|
679
|
+
presence_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
|
680
|
+
reasoning_effort: ChatCompletionReasoningEffort | OpenAINotGiven | None = NOT_GIVEN,
|
692
681
|
thinking: ThinkingConfigParam | None | NotGiven = NOT_GIVEN,
|
693
|
-
seed:
|
694
|
-
service_tier:
|
695
|
-
stop:
|
696
|
-
store:
|
697
|
-
top_logprobs:
|
682
|
+
seed: int | OpenAINotGiven | None = NOT_GIVEN,
|
683
|
+
service_tier: Literal["auto", "default"] | OpenAINotGiven | None = NOT_GIVEN,
|
684
|
+
stop: str | list[str] | OpenAINotGiven | None = NOT_GIVEN,
|
685
|
+
store: bool | OpenAINotGiven | None = NOT_GIVEN,
|
686
|
+
top_logprobs: int | OpenAINotGiven | None = NOT_GIVEN,
|
698
687
|
user: str | OpenAINotGiven = NOT_GIVEN,
|
699
688
|
extra_headers: Headers | None = None,
|
700
689
|
extra_query: Query | None = None,
|
@@ -711,31 +700,31 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
|
|
711
700
|
model: str | None = None,
|
712
701
|
stream: Literal[True],
|
713
702
|
temperature: float | None | NotGiven = NOT_GIVEN,
|
714
|
-
max_tokens: int | None | NotGiven =
|
703
|
+
max_tokens: int | None | NotGiven = None,
|
715
704
|
tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
|
716
705
|
tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
|
717
706
|
response_format: ResponseFormat | NotGiven = NOT_GIVEN,
|
718
|
-
stream_options: ChatCompletionStreamOptionsParam | None
|
707
|
+
stream_options: ChatCompletionStreamOptionsParam | None = None,
|
719
708
|
top_p: float | NotGiven | None = NOT_GIVEN,
|
720
709
|
skip_cutoff: bool = False,
|
721
|
-
audio:
|
722
|
-
frequency_penalty:
|
723
|
-
logit_bias:
|
724
|
-
logprobs:
|
725
|
-
max_completion_tokens:
|
726
|
-
metadata:
|
727
|
-
modalities:
|
728
|
-
n:
|
710
|
+
audio: ChatCompletionAudioParam | OpenAINotGiven | None = NOT_GIVEN,
|
711
|
+
frequency_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
|
712
|
+
logit_bias: dict[str, int] | OpenAINotGiven | None = NOT_GIVEN,
|
713
|
+
logprobs: bool | OpenAINotGiven | None = NOT_GIVEN,
|
714
|
+
max_completion_tokens: int | OpenAINotGiven | None = NOT_GIVEN,
|
715
|
+
metadata: Metadata | OpenAINotGiven | None = NOT_GIVEN,
|
716
|
+
modalities: list[ChatCompletionModality] | OpenAINotGiven | None = NOT_GIVEN,
|
717
|
+
n: int | OpenAINotGiven | None = NOT_GIVEN,
|
729
718
|
parallel_tool_calls: bool | OpenAINotGiven = NOT_GIVEN,
|
730
|
-
prediction:
|
731
|
-
presence_penalty:
|
732
|
-
reasoning_effort:
|
719
|
+
prediction: ChatCompletionPredictionContentParam | OpenAINotGiven | None = NOT_GIVEN,
|
720
|
+
presence_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
|
721
|
+
reasoning_effort: ChatCompletionReasoningEffort | OpenAINotGiven | None = NOT_GIVEN,
|
733
722
|
thinking: ThinkingConfigParam | None | NotGiven = NOT_GIVEN,
|
734
|
-
seed:
|
735
|
-
service_tier:
|
736
|
-
stop:
|
737
|
-
store:
|
738
|
-
top_logprobs:
|
723
|
+
seed: int | OpenAINotGiven | None = NOT_GIVEN,
|
724
|
+
service_tier: Literal["auto", "default"] | OpenAINotGiven | None = NOT_GIVEN,
|
725
|
+
stop: str | list[str] | OpenAINotGiven | None = NOT_GIVEN,
|
726
|
+
store: bool | OpenAINotGiven | None = NOT_GIVEN,
|
727
|
+
top_logprobs: int | OpenAINotGiven | None = NOT_GIVEN,
|
739
728
|
user: str | OpenAINotGiven = NOT_GIVEN,
|
740
729
|
extra_headers: Headers | None = None,
|
741
730
|
extra_query: Query | None = None,
|
@@ -752,31 +741,31 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
|
|
752
741
|
model: str | None = None,
|
753
742
|
stream: bool,
|
754
743
|
temperature: float | None | NotGiven = NOT_GIVEN,
|
755
|
-
max_tokens: int | None | NotGiven =
|
744
|
+
max_tokens: int | None | NotGiven = None,
|
756
745
|
tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
|
757
746
|
tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
|
758
747
|
response_format: ResponseFormat | NotGiven = NOT_GIVEN,
|
759
|
-
stream_options: ChatCompletionStreamOptionsParam | None
|
748
|
+
stream_options: ChatCompletionStreamOptionsParam | None = None,
|
760
749
|
top_p: float | NotGiven | None = NOT_GIVEN,
|
761
750
|
skip_cutoff: bool = False,
|
762
|
-
audio:
|
763
|
-
frequency_penalty:
|
764
|
-
logit_bias:
|
765
|
-
logprobs:
|
766
|
-
max_completion_tokens:
|
767
|
-
metadata:
|
768
|
-
modalities:
|
769
|
-
n:
|
751
|
+
audio: ChatCompletionAudioParam | OpenAINotGiven | None = NOT_GIVEN,
|
752
|
+
frequency_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
|
753
|
+
logit_bias: dict[str, int] | OpenAINotGiven | None = NOT_GIVEN,
|
754
|
+
logprobs: bool | OpenAINotGiven | None = NOT_GIVEN,
|
755
|
+
max_completion_tokens: int | OpenAINotGiven | None = NOT_GIVEN,
|
756
|
+
metadata: Metadata | OpenAINotGiven | None = NOT_GIVEN,
|
757
|
+
modalities: list[ChatCompletionModality] | OpenAINotGiven | None = NOT_GIVEN,
|
758
|
+
n: int | OpenAINotGiven | None = NOT_GIVEN,
|
770
759
|
parallel_tool_calls: bool | OpenAINotGiven = NOT_GIVEN,
|
771
|
-
prediction:
|
772
|
-
presence_penalty:
|
773
|
-
reasoning_effort:
|
760
|
+
prediction: ChatCompletionPredictionContentParam | OpenAINotGiven | None = NOT_GIVEN,
|
761
|
+
presence_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
|
762
|
+
reasoning_effort: ChatCompletionReasoningEffort | OpenAINotGiven | None = NOT_GIVEN,
|
774
763
|
thinking: ThinkingConfigParam | None | NotGiven = NOT_GIVEN,
|
775
|
-
seed:
|
776
|
-
service_tier:
|
777
|
-
stop:
|
778
|
-
store:
|
779
|
-
top_logprobs:
|
764
|
+
seed: int | OpenAINotGiven | None = NOT_GIVEN,
|
765
|
+
service_tier: Literal["auto", "default"] | OpenAINotGiven | None = NOT_GIVEN,
|
766
|
+
stop: str | list[str] | OpenAINotGiven | None = NOT_GIVEN,
|
767
|
+
store: bool | OpenAINotGiven | None = NOT_GIVEN,
|
768
|
+
top_logprobs: int | OpenAINotGiven | None = NOT_GIVEN,
|
780
769
|
user: str | OpenAINotGiven = NOT_GIVEN,
|
781
770
|
extra_headers: Headers | None = None,
|
782
771
|
extra_query: Query | None = None,
|
@@ -792,37 +781,37 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
|
|
792
781
|
model: str | None = None,
|
793
782
|
stream: Literal[False] | Literal[True] = False,
|
794
783
|
temperature: float | None | NotGiven = NOT_GIVEN,
|
795
|
-
max_tokens: int | None | NotGiven =
|
784
|
+
max_tokens: int | None | NotGiven = None,
|
796
785
|
tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
|
797
786
|
tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
|
798
787
|
response_format: ResponseFormat | NotGiven = NOT_GIVEN,
|
799
|
-
stream_options: ChatCompletionStreamOptionsParam | None
|
788
|
+
stream_options: ChatCompletionStreamOptionsParam | None = None,
|
800
789
|
top_p: float | NotGiven | None = NOT_GIVEN,
|
801
790
|
skip_cutoff: bool = False,
|
802
|
-
audio:
|
803
|
-
frequency_penalty:
|
804
|
-
logit_bias:
|
805
|
-
logprobs:
|
806
|
-
max_completion_tokens:
|
807
|
-
metadata:
|
808
|
-
modalities:
|
809
|
-
n:
|
791
|
+
audio: ChatCompletionAudioParam | OpenAINotGiven | None = NOT_GIVEN,
|
792
|
+
frequency_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
|
793
|
+
logit_bias: dict[str, int] | OpenAINotGiven | None = NOT_GIVEN,
|
794
|
+
logprobs: bool | OpenAINotGiven | None = NOT_GIVEN,
|
795
|
+
max_completion_tokens: int | OpenAINotGiven | None = NOT_GIVEN,
|
796
|
+
metadata: Metadata | OpenAINotGiven | None = NOT_GIVEN,
|
797
|
+
modalities: list[ChatCompletionModality] | OpenAINotGiven | None = NOT_GIVEN,
|
798
|
+
n: int | OpenAINotGiven | None = NOT_GIVEN,
|
810
799
|
parallel_tool_calls: bool | OpenAINotGiven = NOT_GIVEN,
|
811
|
-
prediction:
|
812
|
-
presence_penalty:
|
813
|
-
reasoning_effort:
|
800
|
+
prediction: ChatCompletionPredictionContentParam | OpenAINotGiven | None = NOT_GIVEN,
|
801
|
+
presence_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
|
802
|
+
reasoning_effort: ChatCompletionReasoningEffort | OpenAINotGiven | None = NOT_GIVEN,
|
814
803
|
thinking: ThinkingConfigParam | None | NotGiven = NOT_GIVEN,
|
815
|
-
seed:
|
816
|
-
service_tier:
|
817
|
-
stop:
|
818
|
-
store:
|
819
|
-
top_logprobs:
|
804
|
+
seed: int | OpenAINotGiven | None = NOT_GIVEN,
|
805
|
+
service_tier: Literal["auto", "default"] | OpenAINotGiven | None = NOT_GIVEN,
|
806
|
+
stop: str | list[str] | OpenAINotGiven | None = NOT_GIVEN,
|
807
|
+
store: bool | OpenAINotGiven | None = NOT_GIVEN,
|
808
|
+
top_logprobs: int | OpenAINotGiven | None = NOT_GIVEN,
|
820
809
|
user: str | OpenAINotGiven = NOT_GIVEN,
|
821
810
|
extra_headers: Headers | None = None,
|
822
811
|
extra_query: Query | None = None,
|
823
812
|
extra_body: Body | None = None,
|
824
813
|
timeout: float | httpx.Timeout | None | OpenAINotGiven = NOT_GIVEN,
|
825
|
-
):
|
814
|
+
) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, Any]:
|
826
815
|
if model is not None:
|
827
816
|
self.model = model
|
828
817
|
if stream is not None:
|
@@ -865,7 +854,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
|
|
865
854
|
},
|
866
855
|
}
|
867
856
|
)
|
868
|
-
tools_params =
|
857
|
+
tools_params = {"tools": _tools, "tool_choice": tool_choice}
|
869
858
|
else:
|
870
859
|
tools_str = json.dumps(tools, ensure_ascii=False, indent=None)
|
871
860
|
additional_system_prompt = generate_tool_use_system_prompt(tools=tools_str)
|
vectorvein/chat_clients/utils.py
CHANGED
@@ -5,7 +5,8 @@ import json
|
|
5
5
|
import uuid
|
6
6
|
import warnings
|
7
7
|
from math import ceil
|
8
|
-
from
|
8
|
+
from collections.abc import Iterable
|
9
|
+
from typing import cast
|
9
10
|
|
10
11
|
import httpx
|
11
12
|
import tiktoken
|
@@ -105,6 +106,24 @@ def convert_type(value, value_type):
|
|
105
106
|
return value # 如果类型未知,返回原始值
|
106
107
|
|
107
108
|
|
109
|
+
def _get_first_enabled_endpoint(backend_setting, settings):
|
110
|
+
"""Get the first enabled endpoint from backend settings"""
|
111
|
+
for endpoint_choice in backend_setting.endpoints:
|
112
|
+
if isinstance(endpoint_choice, dict):
|
113
|
+
endpoint_id = endpoint_choice["endpoint_id"]
|
114
|
+
else:
|
115
|
+
endpoint_id = endpoint_choice
|
116
|
+
|
117
|
+
try:
|
118
|
+
endpoint = settings.get_endpoint(endpoint_id)
|
119
|
+
if endpoint.enabled:
|
120
|
+
return endpoint
|
121
|
+
except ValueError:
|
122
|
+
# Endpoint not found, skip it
|
123
|
+
continue
|
124
|
+
return None
|
125
|
+
|
126
|
+
|
108
127
|
def get_token_counts(text: str | dict, model: str = "", use_token_server_first: bool = True) -> int:
|
109
128
|
if use_token_server_first and settings.token_server is not None:
|
110
129
|
base_url = settings.token_server.url if settings.token_server.url is not None else f"http://{settings.token_server.host}:{settings.token_server.port}"
|
@@ -126,10 +145,9 @@ def get_token_counts(text: str | dict, model: str = "", use_token_server_first:
|
|
126
145
|
backend_setting = settings.get_backend(BackendType.MiniMax).models[model]
|
127
146
|
if len(backend_setting.endpoints) == 0:
|
128
147
|
return int(len(text) / 1.33)
|
129
|
-
|
130
|
-
if
|
131
|
-
|
132
|
-
endpoint = settings.get_endpoint(endpoint_id)
|
148
|
+
endpoint = _get_first_enabled_endpoint(backend_setting, settings)
|
149
|
+
if endpoint is None:
|
150
|
+
return int(len(text) / 1.33)
|
133
151
|
tokenize_url = "https://api.minimax.chat/v1/tokenize"
|
134
152
|
headers = {"Authorization": f"Bearer {endpoint.api_key}", "Content-Type": "application/json"}
|
135
153
|
request_body = {
|
@@ -150,10 +168,9 @@ def get_token_counts(text: str | dict, model: str = "", use_token_server_first:
|
|
150
168
|
backend_setting = settings.get_backend(BackendType.Moonshot).models[model]
|
151
169
|
if len(backend_setting.endpoints) == 0:
|
152
170
|
return len(get_gpt_35_encoding().encode(text))
|
153
|
-
|
154
|
-
if
|
155
|
-
|
156
|
-
endpoint = settings.get_endpoint(endpoint_id)
|
171
|
+
endpoint = _get_first_enabled_endpoint(backend_setting, settings)
|
172
|
+
if endpoint is None:
|
173
|
+
return len(get_gpt_35_encoding().encode(text))
|
157
174
|
tokenize_url = f"{endpoint.api_base}/tokenizers/estimate-token-count"
|
158
175
|
headers = {"Content-Type": "application/json", "Authorization": f"Bearer {endpoint.api_key}"}
|
159
176
|
request_body = {
|
@@ -171,10 +188,9 @@ def get_token_counts(text: str | dict, model: str = "", use_token_server_first:
|
|
171
188
|
backend_setting = settings.get_backend(BackendType.Gemini).models[model]
|
172
189
|
if len(backend_setting.endpoints) == 0:
|
173
190
|
return len(get_gpt_35_encoding().encode(text))
|
174
|
-
|
175
|
-
if
|
176
|
-
|
177
|
-
endpoint = settings.get_endpoint(endpoint_id)
|
191
|
+
endpoint = _get_first_enabled_endpoint(backend_setting, settings)
|
192
|
+
if endpoint is None:
|
193
|
+
return len(get_gpt_35_encoding().encode(text))
|
178
194
|
|
179
195
|
api_base = endpoint.api_base.removesuffix("/openai/") if endpoint.api_base else "https://generativelanguage.googleapis.com/v1beta"
|
180
196
|
base_url = f"{api_base}/models/{backend_setting.id}:countTokens"
|
@@ -200,7 +216,13 @@ def get_token_counts(text: str | dict, model: str = "", use_token_server_first:
|
|
200
216
|
endpoint_id = endpoint_choice["endpoint_id"]
|
201
217
|
else:
|
202
218
|
endpoint_id = endpoint_choice
|
203
|
-
|
219
|
+
|
220
|
+
try:
|
221
|
+
endpoint = settings.get_endpoint(endpoint_id)
|
222
|
+
if not endpoint.enabled:
|
223
|
+
continue
|
224
|
+
except ValueError:
|
225
|
+
continue
|
204
226
|
|
205
227
|
if endpoint.is_vertex or endpoint.is_bedrock or endpoint.endpoint_type == "anthropic_vertex" or endpoint.endpoint_type == "anthropic_bedrock":
|
206
228
|
continue
|
@@ -214,10 +236,10 @@ def get_token_counts(text: str | dict, model: str = "", use_token_server_first:
|
|
214
236
|
.input_tokens
|
215
237
|
)
|
216
238
|
except Exception as e:
|
217
|
-
warnings.warn(f"Anthropic token counting failed: {e}")
|
239
|
+
warnings.warn(f"Anthropic token counting failed: {e}", stacklevel=2)
|
218
240
|
|
219
241
|
# TODO: Use anthropic token counting
|
220
|
-
warnings.warn("Anthropic token counting is not implemented in Vertex or Bedrock yet")
|
242
|
+
warnings.warn("Anthropic token counting is not implemented in Vertex or Bedrock yet", stacklevel=2)
|
221
243
|
return len(get_gpt_4o_encoding().encode(text))
|
222
244
|
elif model.startswith("deepseek"):
|
223
245
|
from deepseek_tokenizer import deepseek_tokenizer
|
@@ -232,10 +254,9 @@ def get_token_counts(text: str | dict, model: str = "", use_token_server_first:
|
|
232
254
|
backend_setting = settings.get_backend(BackendType.StepFun).models[model]
|
233
255
|
if len(backend_setting.endpoints) == 0:
|
234
256
|
return len(get_gpt_35_encoding().encode(text))
|
235
|
-
|
236
|
-
if
|
237
|
-
|
238
|
-
endpoint = settings.get_endpoint(endpoint_id)
|
257
|
+
endpoint = _get_first_enabled_endpoint(backend_setting, settings)
|
258
|
+
if endpoint is None:
|
259
|
+
return len(get_gpt_35_encoding().encode(text))
|
239
260
|
tokenize_url = f"{endpoint.api_base}/token/count"
|
240
261
|
headers = {"Content-Type": "application/json", "Authorization": f"Bearer {endpoint.api_key}"}
|
241
262
|
request_body = {
|
@@ -253,10 +274,9 @@ def get_token_counts(text: str | dict, model: str = "", use_token_server_first:
|
|
253
274
|
backend_setting = settings.get_backend(BackendType.ZhiPuAI).models[model]
|
254
275
|
if len(backend_setting.endpoints) == 0:
|
255
276
|
return len(get_gpt_35_encoding().encode(text))
|
256
|
-
|
257
|
-
if
|
258
|
-
|
259
|
-
endpoint = settings.get_endpoint(endpoint_id)
|
277
|
+
endpoint = _get_first_enabled_endpoint(backend_setting, settings)
|
278
|
+
if endpoint is None:
|
279
|
+
return len(get_gpt_35_encoding().encode(text))
|
260
280
|
if model not in ("glm-4-plus", "glm-4-long", "glm-4-0520", "glm-4-air", "glm-4-flash"):
|
261
281
|
model = "glm-4-plus"
|
262
282
|
tokenize_url = f"{endpoint.api_base or 'https://open.bigmodel.cn/api/paas/v4'}/tokenizer"
|
@@ -19,7 +19,7 @@ async def count_tokens(request: TokenCountRequest):
|
|
19
19
|
token_count = get_token_counts(request.text, request.model, use_token_server_first=False)
|
20
20
|
return {"total_tokens": token_count}
|
21
21
|
except Exception as e:
|
22
|
-
raise HTTPException(status_code=500, detail=str(e))
|
22
|
+
raise HTTPException(status_code=500, detail=str(e)) from None
|
23
23
|
|
24
24
|
|
25
25
|
def run_token_server(host: str | None = None, port: int | None = None):
|