vectorvein 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. vectorvein/api/client.py +81 -103
  2. vectorvein/api/exceptions.py +1 -3
  3. vectorvein/api/models.py +11 -11
  4. vectorvein/chat_clients/anthropic_client.py +157 -169
  5. vectorvein/chat_clients/base_client.py +257 -198
  6. vectorvein/chat_clients/openai_compatible_client.py +150 -161
  7. vectorvein/chat_clients/utils.py +44 -24
  8. vectorvein/server/token_server.py +1 -1
  9. vectorvein/settings/__init__.py +27 -27
  10. vectorvein/types/defaults.py +32 -16
  11. vectorvein/types/llm_parameters.py +40 -34
  12. vectorvein/types/settings.py +10 -10
  13. vectorvein/utilities/media_processing.py +1 -1
  14. vectorvein/utilities/rate_limiter.py +5 -6
  15. vectorvein/utilities/retry.py +6 -5
  16. vectorvein/workflow/graph/edge.py +3 -3
  17. vectorvein/workflow/graph/node.py +14 -26
  18. vectorvein/workflow/graph/port.py +40 -39
  19. vectorvein/workflow/graph/workflow.py +13 -25
  20. vectorvein/workflow/nodes/audio_generation.py +5 -7
  21. vectorvein/workflow/nodes/control_flows.py +7 -9
  22. vectorvein/workflow/nodes/file_processing.py +4 -6
  23. vectorvein/workflow/nodes/image_generation.py +20 -22
  24. vectorvein/workflow/nodes/llms.py +13 -15
  25. vectorvein/workflow/nodes/media_editing.py +26 -40
  26. vectorvein/workflow/nodes/media_processing.py +19 -21
  27. vectorvein/workflow/nodes/output.py +10 -12
  28. vectorvein/workflow/nodes/relational_db.py +3 -5
  29. vectorvein/workflow/nodes/text_processing.py +8 -10
  30. vectorvein/workflow/nodes/tools.py +8 -10
  31. vectorvein/workflow/nodes/triggers.py +1 -3
  32. vectorvein/workflow/nodes/vector_db.py +3 -5
  33. vectorvein/workflow/nodes/video_generation.py +4 -6
  34. vectorvein/workflow/nodes/web_crawlers.py +4 -6
  35. vectorvein/workflow/utils/analyse.py +5 -13
  36. vectorvein/workflow/utils/check.py +6 -16
  37. vectorvein/workflow/utils/json_to_code.py +6 -14
  38. vectorvein/workflow/utils/layout.py +3 -5
  39. {vectorvein-0.3.1.dist-info → vectorvein-0.3.3.dist-info}/METADATA +1 -1
  40. vectorvein-0.3.3.dist-info/RECORD +68 -0
  41. {vectorvein-0.3.1.dist-info → vectorvein-0.3.3.dist-info}/WHEEL +1 -1
  42. vectorvein-0.3.1.dist-info/RECORD +0 -68
  43. {vectorvein-0.3.1.dist-info → vectorvein-0.3.3.dist-info}/entry_points.txt +0 -0
@@ -3,19 +3,8 @@
3
3
  import re
4
4
  import json
5
5
  from functools import cached_property
6
- from typing import (
7
- Any,
8
- Dict,
9
- List,
10
- TYPE_CHECKING,
11
- overload,
12
- Generator,
13
- AsyncGenerator,
14
- Union,
15
- Literal,
16
- Iterable,
17
- Optional,
18
- )
6
+ from collections.abc import Generator, AsyncGenerator, Iterable
7
+ from typing import Any, TYPE_CHECKING, overload, Literal
19
8
 
20
9
  import httpx
21
10
  from openai import OpenAI, AsyncOpenAI, AzureOpenAI, AsyncAzureOpenAI
@@ -117,31 +106,31 @@ class OpenAICompatibleChatClient(BaseChatClient):
117
106
  model: str | None = None,
118
107
  stream: Literal[False] = False,
119
108
  temperature: float | None | NotGiven = NOT_GIVEN,
120
- max_tokens: int | None | NotGiven = NOT_GIVEN,
109
+ max_tokens: int | None | NotGiven = None,
121
110
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
122
111
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
123
112
  response_format: ResponseFormat | NotGiven = NOT_GIVEN,
124
- stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
113
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
125
114
  top_p: float | NotGiven | None = NOT_GIVEN,
126
115
  skip_cutoff: bool = False,
127
- audio: Optional[ChatCompletionAudioParam] | OpenAINotGiven = NOT_GIVEN,
128
- frequency_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
129
- logit_bias: Optional[Dict[str, int]] | OpenAINotGiven = NOT_GIVEN,
130
- logprobs: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
131
- max_completion_tokens: Optional[int] | OpenAINotGiven = NOT_GIVEN,
132
- metadata: Optional[Metadata] | OpenAINotGiven = NOT_GIVEN,
133
- modalities: Optional[List[ChatCompletionModality]] | OpenAINotGiven = NOT_GIVEN,
134
- n: Optional[int] | OpenAINotGiven = NOT_GIVEN,
116
+ audio: ChatCompletionAudioParam | OpenAINotGiven | None = NOT_GIVEN,
117
+ frequency_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
118
+ logit_bias: dict[str, int] | OpenAINotGiven | None = NOT_GIVEN,
119
+ logprobs: bool | OpenAINotGiven | None = NOT_GIVEN,
120
+ max_completion_tokens: int | OpenAINotGiven | None = NOT_GIVEN,
121
+ metadata: Metadata | OpenAINotGiven | None = NOT_GIVEN,
122
+ modalities: list[ChatCompletionModality] | OpenAINotGiven | None = NOT_GIVEN,
123
+ n: int | OpenAINotGiven | None = NOT_GIVEN,
135
124
  parallel_tool_calls: bool | OpenAINotGiven = NOT_GIVEN,
136
- prediction: Optional[ChatCompletionPredictionContentParam] | OpenAINotGiven = NOT_GIVEN,
137
- presence_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
138
- reasoning_effort: Optional[ChatCompletionReasoningEffort] | OpenAINotGiven = NOT_GIVEN,
125
+ prediction: ChatCompletionPredictionContentParam | OpenAINotGiven | None = NOT_GIVEN,
126
+ presence_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
127
+ reasoning_effort: ChatCompletionReasoningEffort | OpenAINotGiven | None = NOT_GIVEN,
139
128
  thinking: ThinkingConfigParam | None | NotGiven = NOT_GIVEN,
140
- seed: Optional[int] | OpenAINotGiven = NOT_GIVEN,
141
- service_tier: Optional[Literal["auto", "default"]] | OpenAINotGiven = NOT_GIVEN,
142
- stop: Union[Optional[str], List[str]] | OpenAINotGiven = NOT_GIVEN,
143
- store: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
144
- top_logprobs: Optional[int] | OpenAINotGiven = NOT_GIVEN,
129
+ seed: int | OpenAINotGiven | None = NOT_GIVEN,
130
+ service_tier: Literal["auto", "default"] | OpenAINotGiven | None = NOT_GIVEN,
131
+ stop: str | list[str] | OpenAINotGiven | None = NOT_GIVEN,
132
+ store: bool | OpenAINotGiven | None = NOT_GIVEN,
133
+ top_logprobs: int | OpenAINotGiven | None = NOT_GIVEN,
145
134
  user: str | OpenAINotGiven = NOT_GIVEN,
146
135
  extra_headers: Headers | None = None,
147
136
  extra_query: Query | None = None,
@@ -158,37 +147,37 @@ class OpenAICompatibleChatClient(BaseChatClient):
158
147
  model: str | None = None,
159
148
  stream: Literal[True],
160
149
  temperature: float | None | NotGiven = NOT_GIVEN,
161
- max_tokens: int | None | NotGiven = NOT_GIVEN,
150
+ max_tokens: int | None | NotGiven = None,
162
151
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
163
152
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
164
153
  response_format: ResponseFormat | NotGiven = NOT_GIVEN,
165
- stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
154
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
166
155
  top_p: float | NotGiven | None = NOT_GIVEN,
167
156
  skip_cutoff: bool = False,
168
- audio: Optional[ChatCompletionAudioParam] | OpenAINotGiven = NOT_GIVEN,
169
- frequency_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
170
- logit_bias: Optional[Dict[str, int]] | OpenAINotGiven = NOT_GIVEN,
171
- logprobs: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
172
- max_completion_tokens: Optional[int] | OpenAINotGiven = NOT_GIVEN,
173
- metadata: Optional[Metadata] | OpenAINotGiven = NOT_GIVEN,
174
- modalities: Optional[List[ChatCompletionModality]] | OpenAINotGiven = NOT_GIVEN,
175
- n: Optional[int] | OpenAINotGiven = NOT_GIVEN,
157
+ audio: ChatCompletionAudioParam | OpenAINotGiven | None = NOT_GIVEN,
158
+ frequency_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
159
+ logit_bias: dict[str, int] | OpenAINotGiven | None = NOT_GIVEN,
160
+ logprobs: bool | OpenAINotGiven | None = NOT_GIVEN,
161
+ max_completion_tokens: int | OpenAINotGiven | None = NOT_GIVEN,
162
+ metadata: Metadata | OpenAINotGiven | None = NOT_GIVEN,
163
+ modalities: list[ChatCompletionModality] | OpenAINotGiven | None = NOT_GIVEN,
164
+ n: int | OpenAINotGiven | None = NOT_GIVEN,
176
165
  parallel_tool_calls: bool | OpenAINotGiven = NOT_GIVEN,
177
- prediction: Optional[ChatCompletionPredictionContentParam] | OpenAINotGiven = NOT_GIVEN,
178
- presence_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
179
- reasoning_effort: Optional[ChatCompletionReasoningEffort] | OpenAINotGiven = NOT_GIVEN,
166
+ prediction: ChatCompletionPredictionContentParam | OpenAINotGiven | None = NOT_GIVEN,
167
+ presence_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
168
+ reasoning_effort: ChatCompletionReasoningEffort | OpenAINotGiven | None = NOT_GIVEN,
180
169
  thinking: ThinkingConfigParam | None | NotGiven = NOT_GIVEN,
181
- seed: Optional[int] | OpenAINotGiven = NOT_GIVEN,
182
- service_tier: Optional[Literal["auto", "default"]] | OpenAINotGiven = NOT_GIVEN,
183
- stop: Union[Optional[str], List[str]] | OpenAINotGiven = NOT_GIVEN,
184
- store: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
185
- top_logprobs: Optional[int] | OpenAINotGiven = NOT_GIVEN,
170
+ seed: int | OpenAINotGiven | None = NOT_GIVEN,
171
+ service_tier: Literal["auto", "default"] | OpenAINotGiven | None = NOT_GIVEN,
172
+ stop: str | list[str] | OpenAINotGiven | None = NOT_GIVEN,
173
+ store: bool | OpenAINotGiven | None = NOT_GIVEN,
174
+ top_logprobs: int | OpenAINotGiven | None = NOT_GIVEN,
186
175
  user: str | OpenAINotGiven = NOT_GIVEN,
187
176
  extra_headers: Headers | None = None,
188
177
  extra_query: Query | None = None,
189
178
  extra_body: Body | None = None,
190
179
  timeout: float | httpx.Timeout | None | OpenAINotGiven = NOT_GIVEN,
191
- ) -> Generator[ChatCompletionDeltaMessage, None, None]:
180
+ ) -> Generator[ChatCompletionDeltaMessage, Any, None]:
192
181
  pass
193
182
 
194
183
  @overload
@@ -199,31 +188,31 @@ class OpenAICompatibleChatClient(BaseChatClient):
199
188
  model: str | None = None,
200
189
  stream: bool,
201
190
  temperature: float | None | NotGiven = NOT_GIVEN,
202
- max_tokens: int | None | NotGiven = NOT_GIVEN,
191
+ max_tokens: int | None | NotGiven = None,
203
192
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
204
193
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
205
194
  response_format: ResponseFormat | NotGiven = NOT_GIVEN,
206
- stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
195
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
207
196
  top_p: float | NotGiven | None = NOT_GIVEN,
208
197
  skip_cutoff: bool = False,
209
- audio: Optional[ChatCompletionAudioParam] | OpenAINotGiven = NOT_GIVEN,
210
- frequency_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
211
- logit_bias: Optional[Dict[str, int]] | OpenAINotGiven = NOT_GIVEN,
212
- logprobs: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
213
- max_completion_tokens: Optional[int] | OpenAINotGiven = NOT_GIVEN,
214
- metadata: Optional[Metadata] | OpenAINotGiven = NOT_GIVEN,
215
- modalities: Optional[List[ChatCompletionModality]] | OpenAINotGiven = NOT_GIVEN,
216
- n: Optional[int] | OpenAINotGiven = NOT_GIVEN,
198
+ audio: ChatCompletionAudioParam | OpenAINotGiven | None = NOT_GIVEN,
199
+ frequency_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
200
+ logit_bias: dict[str, int] | OpenAINotGiven | None = NOT_GIVEN,
201
+ logprobs: bool | OpenAINotGiven | None = NOT_GIVEN,
202
+ max_completion_tokens: int | OpenAINotGiven | None = NOT_GIVEN,
203
+ metadata: Metadata | OpenAINotGiven | None = NOT_GIVEN,
204
+ modalities: list[ChatCompletionModality] | OpenAINotGiven | None = NOT_GIVEN,
205
+ n: int | OpenAINotGiven | None = NOT_GIVEN,
217
206
  parallel_tool_calls: bool | OpenAINotGiven = NOT_GIVEN,
218
- prediction: Optional[ChatCompletionPredictionContentParam] | OpenAINotGiven = NOT_GIVEN,
219
- presence_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
220
- reasoning_effort: Optional[ChatCompletionReasoningEffort] | OpenAINotGiven = NOT_GIVEN,
207
+ prediction: ChatCompletionPredictionContentParam | OpenAINotGiven | None = NOT_GIVEN,
208
+ presence_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
209
+ reasoning_effort: ChatCompletionReasoningEffort | OpenAINotGiven | None = NOT_GIVEN,
221
210
  thinking: ThinkingConfigParam | None | NotGiven = NOT_GIVEN,
222
- seed: Optional[int] | OpenAINotGiven = NOT_GIVEN,
223
- service_tier: Optional[Literal["auto", "default"]] | OpenAINotGiven = NOT_GIVEN,
224
- stop: Union[Optional[str], List[str]] | OpenAINotGiven = NOT_GIVEN,
225
- store: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
226
- top_logprobs: Optional[int] | OpenAINotGiven = NOT_GIVEN,
211
+ seed: int | OpenAINotGiven | None = NOT_GIVEN,
212
+ service_tier: Literal["auto", "default"] | OpenAINotGiven | None = NOT_GIVEN,
213
+ stop: str | list[str] | OpenAINotGiven | None = NOT_GIVEN,
214
+ store: bool | OpenAINotGiven | None = NOT_GIVEN,
215
+ top_logprobs: int | OpenAINotGiven | None = NOT_GIVEN,
227
216
  user: str | OpenAINotGiven = NOT_GIVEN,
228
217
  extra_headers: Headers | None = None,
229
218
  extra_query: Query | None = None,
@@ -239,31 +228,31 @@ class OpenAICompatibleChatClient(BaseChatClient):
239
228
  model: str | None = None,
240
229
  stream: Literal[False] | Literal[True] = False,
241
230
  temperature: float | None | NotGiven = NOT_GIVEN,
242
- max_tokens: int | None | NotGiven = NOT_GIVEN,
231
+ max_tokens: int | None | NotGiven = None,
243
232
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
244
233
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
245
234
  response_format: ResponseFormat | NotGiven = NOT_GIVEN,
246
- stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
235
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
247
236
  top_p: float | NotGiven | None = NOT_GIVEN,
248
237
  skip_cutoff: bool = False,
249
- audio: Optional[ChatCompletionAudioParam] | OpenAINotGiven = NOT_GIVEN,
250
- frequency_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
251
- logit_bias: Optional[Dict[str, int]] | OpenAINotGiven = NOT_GIVEN,
252
- logprobs: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
253
- max_completion_tokens: Optional[int] | OpenAINotGiven = NOT_GIVEN,
254
- metadata: Optional[Metadata] | OpenAINotGiven = NOT_GIVEN,
255
- modalities: Optional[List[ChatCompletionModality]] | OpenAINotGiven = NOT_GIVEN,
256
- n: Optional[int] | OpenAINotGiven = NOT_GIVEN,
238
+ audio: ChatCompletionAudioParam | OpenAINotGiven | None = NOT_GIVEN,
239
+ frequency_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
240
+ logit_bias: dict[str, int] | OpenAINotGiven | None = NOT_GIVEN,
241
+ logprobs: bool | OpenAINotGiven | None = NOT_GIVEN,
242
+ max_completion_tokens: int | OpenAINotGiven | None = NOT_GIVEN,
243
+ metadata: Metadata | OpenAINotGiven | None = NOT_GIVEN,
244
+ modalities: list[ChatCompletionModality] | OpenAINotGiven | None = NOT_GIVEN,
245
+ n: int | OpenAINotGiven | None = NOT_GIVEN,
257
246
  parallel_tool_calls: bool | OpenAINotGiven = NOT_GIVEN,
258
- prediction: Optional[ChatCompletionPredictionContentParam] | OpenAINotGiven = NOT_GIVEN,
259
- presence_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
260
- reasoning_effort: Optional[ChatCompletionReasoningEffort] | OpenAINotGiven = NOT_GIVEN,
247
+ prediction: ChatCompletionPredictionContentParam | OpenAINotGiven | None = NOT_GIVEN,
248
+ presence_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
249
+ reasoning_effort: ChatCompletionReasoningEffort | OpenAINotGiven | None = NOT_GIVEN,
261
250
  thinking: ThinkingConfigParam | None | NotGiven = NOT_GIVEN,
262
- seed: Optional[int] | OpenAINotGiven = NOT_GIVEN,
263
- service_tier: Optional[Literal["auto", "default"]] | OpenAINotGiven = NOT_GIVEN,
264
- stop: Union[Optional[str], List[str]] | OpenAINotGiven = NOT_GIVEN,
265
- store: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
266
- top_logprobs: Optional[int] | OpenAINotGiven = NOT_GIVEN,
251
+ seed: int | OpenAINotGiven | None = NOT_GIVEN,
252
+ service_tier: Literal["auto", "default"] | OpenAINotGiven | None = NOT_GIVEN,
253
+ stop: str | list[str] | OpenAINotGiven | None = NOT_GIVEN,
254
+ store: bool | OpenAINotGiven | None = NOT_GIVEN,
255
+ top_logprobs: int | OpenAINotGiven | None = NOT_GIVEN,
267
256
  user: str | OpenAINotGiven = NOT_GIVEN,
268
257
  extra_headers: Headers | None = None,
269
258
  extra_query: Query | None = None,
@@ -312,7 +301,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
312
301
  },
313
302
  }
314
303
  )
315
- tools_params = dict(tools=_tools, tool_choice=tool_choice)
304
+ tools_params = {"tools": _tools, "tool_choice": tool_choice}
316
305
  else:
317
306
  tools_str = json.dumps(tools, ensure_ascii=False, indent=None)
318
307
  additional_system_prompt = generate_tool_use_system_prompt(tools=tools_str)
@@ -670,31 +659,31 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
670
659
  model: str | None = None,
671
660
  stream: Literal[False] = False,
672
661
  temperature: float | None | NotGiven = NOT_GIVEN,
673
- max_tokens: int | None | NotGiven = NOT_GIVEN,
662
+ max_tokens: int | None | NotGiven = None,
674
663
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
675
664
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
676
665
  response_format: ResponseFormat | NotGiven = NOT_GIVEN,
677
- stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
666
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
678
667
  top_p: float | NotGiven | None = NOT_GIVEN,
679
668
  skip_cutoff: bool = False,
680
- audio: Optional[ChatCompletionAudioParam] | OpenAINotGiven = NOT_GIVEN,
681
- frequency_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
682
- logit_bias: Optional[Dict[str, int]] | OpenAINotGiven = NOT_GIVEN,
683
- logprobs: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
684
- max_completion_tokens: Optional[int] | OpenAINotGiven = NOT_GIVEN,
685
- metadata: Optional[Metadata] | OpenAINotGiven = NOT_GIVEN,
686
- modalities: Optional[List[ChatCompletionModality]] | OpenAINotGiven = NOT_GIVEN,
687
- n: Optional[int] | OpenAINotGiven = NOT_GIVEN,
669
+ audio: ChatCompletionAudioParam | OpenAINotGiven | None = NOT_GIVEN,
670
+ frequency_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
671
+ logit_bias: dict[str, int] | OpenAINotGiven | None = NOT_GIVEN,
672
+ logprobs: bool | OpenAINotGiven | None = NOT_GIVEN,
673
+ max_completion_tokens: int | OpenAINotGiven | None = NOT_GIVEN,
674
+ metadata: Metadata | OpenAINotGiven | None = NOT_GIVEN,
675
+ modalities: list[ChatCompletionModality] | OpenAINotGiven | None = NOT_GIVEN,
676
+ n: int | OpenAINotGiven | None = NOT_GIVEN,
688
677
  parallel_tool_calls: bool | OpenAINotGiven = NOT_GIVEN,
689
- prediction: Optional[ChatCompletionPredictionContentParam] | OpenAINotGiven = NOT_GIVEN,
690
- presence_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
691
- reasoning_effort: Optional[ChatCompletionReasoningEffort] | OpenAINotGiven = NOT_GIVEN,
678
+ prediction: ChatCompletionPredictionContentParam | OpenAINotGiven | None = NOT_GIVEN,
679
+ presence_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
680
+ reasoning_effort: ChatCompletionReasoningEffort | OpenAINotGiven | None = NOT_GIVEN,
692
681
  thinking: ThinkingConfigParam | None | NotGiven = NOT_GIVEN,
693
- seed: Optional[int] | OpenAINotGiven = NOT_GIVEN,
694
- service_tier: Optional[Literal["auto", "default"]] | OpenAINotGiven = NOT_GIVEN,
695
- stop: Union[Optional[str], List[str]] | OpenAINotGiven = NOT_GIVEN,
696
- store: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
697
- top_logprobs: Optional[int] | OpenAINotGiven = NOT_GIVEN,
682
+ seed: int | OpenAINotGiven | None = NOT_GIVEN,
683
+ service_tier: Literal["auto", "default"] | OpenAINotGiven | None = NOT_GIVEN,
684
+ stop: str | list[str] | OpenAINotGiven | None = NOT_GIVEN,
685
+ store: bool | OpenAINotGiven | None = NOT_GIVEN,
686
+ top_logprobs: int | OpenAINotGiven | None = NOT_GIVEN,
698
687
  user: str | OpenAINotGiven = NOT_GIVEN,
699
688
  extra_headers: Headers | None = None,
700
689
  extra_query: Query | None = None,
@@ -711,31 +700,31 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
711
700
  model: str | None = None,
712
701
  stream: Literal[True],
713
702
  temperature: float | None | NotGiven = NOT_GIVEN,
714
- max_tokens: int | None | NotGiven = NOT_GIVEN,
703
+ max_tokens: int | None | NotGiven = None,
715
704
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
716
705
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
717
706
  response_format: ResponseFormat | NotGiven = NOT_GIVEN,
718
- stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
707
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
719
708
  top_p: float | NotGiven | None = NOT_GIVEN,
720
709
  skip_cutoff: bool = False,
721
- audio: Optional[ChatCompletionAudioParam] | OpenAINotGiven = NOT_GIVEN,
722
- frequency_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
723
- logit_bias: Optional[Dict[str, int]] | OpenAINotGiven = NOT_GIVEN,
724
- logprobs: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
725
- max_completion_tokens: Optional[int] | OpenAINotGiven = NOT_GIVEN,
726
- metadata: Optional[Metadata] | OpenAINotGiven = NOT_GIVEN,
727
- modalities: Optional[List[ChatCompletionModality]] | OpenAINotGiven = NOT_GIVEN,
728
- n: Optional[int] | OpenAINotGiven = NOT_GIVEN,
710
+ audio: ChatCompletionAudioParam | OpenAINotGiven | None = NOT_GIVEN,
711
+ frequency_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
712
+ logit_bias: dict[str, int] | OpenAINotGiven | None = NOT_GIVEN,
713
+ logprobs: bool | OpenAINotGiven | None = NOT_GIVEN,
714
+ max_completion_tokens: int | OpenAINotGiven | None = NOT_GIVEN,
715
+ metadata: Metadata | OpenAINotGiven | None = NOT_GIVEN,
716
+ modalities: list[ChatCompletionModality] | OpenAINotGiven | None = NOT_GIVEN,
717
+ n: int | OpenAINotGiven | None = NOT_GIVEN,
729
718
  parallel_tool_calls: bool | OpenAINotGiven = NOT_GIVEN,
730
- prediction: Optional[ChatCompletionPredictionContentParam] | OpenAINotGiven = NOT_GIVEN,
731
- presence_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
732
- reasoning_effort: Optional[ChatCompletionReasoningEffort] | OpenAINotGiven = NOT_GIVEN,
719
+ prediction: ChatCompletionPredictionContentParam | OpenAINotGiven | None = NOT_GIVEN,
720
+ presence_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
721
+ reasoning_effort: ChatCompletionReasoningEffort | OpenAINotGiven | None = NOT_GIVEN,
733
722
  thinking: ThinkingConfigParam | None | NotGiven = NOT_GIVEN,
734
- seed: Optional[int] | OpenAINotGiven = NOT_GIVEN,
735
- service_tier: Optional[Literal["auto", "default"]] | OpenAINotGiven = NOT_GIVEN,
736
- stop: Union[Optional[str], List[str]] | OpenAINotGiven = NOT_GIVEN,
737
- store: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
738
- top_logprobs: Optional[int] | OpenAINotGiven = NOT_GIVEN,
723
+ seed: int | OpenAINotGiven | None = NOT_GIVEN,
724
+ service_tier: Literal["auto", "default"] | OpenAINotGiven | None = NOT_GIVEN,
725
+ stop: str | list[str] | OpenAINotGiven | None = NOT_GIVEN,
726
+ store: bool | OpenAINotGiven | None = NOT_GIVEN,
727
+ top_logprobs: int | OpenAINotGiven | None = NOT_GIVEN,
739
728
  user: str | OpenAINotGiven = NOT_GIVEN,
740
729
  extra_headers: Headers | None = None,
741
730
  extra_query: Query | None = None,
@@ -752,31 +741,31 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
752
741
  model: str | None = None,
753
742
  stream: bool,
754
743
  temperature: float | None | NotGiven = NOT_GIVEN,
755
- max_tokens: int | None | NotGiven = NOT_GIVEN,
744
+ max_tokens: int | None | NotGiven = None,
756
745
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
757
746
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
758
747
  response_format: ResponseFormat | NotGiven = NOT_GIVEN,
759
- stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
748
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
760
749
  top_p: float | NotGiven | None = NOT_GIVEN,
761
750
  skip_cutoff: bool = False,
762
- audio: Optional[ChatCompletionAudioParam] | OpenAINotGiven = NOT_GIVEN,
763
- frequency_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
764
- logit_bias: Optional[Dict[str, int]] | OpenAINotGiven = NOT_GIVEN,
765
- logprobs: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
766
- max_completion_tokens: Optional[int] | OpenAINotGiven = NOT_GIVEN,
767
- metadata: Optional[Metadata] | OpenAINotGiven = NOT_GIVEN,
768
- modalities: Optional[List[ChatCompletionModality]] | OpenAINotGiven = NOT_GIVEN,
769
- n: Optional[int] | OpenAINotGiven = NOT_GIVEN,
751
+ audio: ChatCompletionAudioParam | OpenAINotGiven | None = NOT_GIVEN,
752
+ frequency_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
753
+ logit_bias: dict[str, int] | OpenAINotGiven | None = NOT_GIVEN,
754
+ logprobs: bool | OpenAINotGiven | None = NOT_GIVEN,
755
+ max_completion_tokens: int | OpenAINotGiven | None = NOT_GIVEN,
756
+ metadata: Metadata | OpenAINotGiven | None = NOT_GIVEN,
757
+ modalities: list[ChatCompletionModality] | OpenAINotGiven | None = NOT_GIVEN,
758
+ n: int | OpenAINotGiven | None = NOT_GIVEN,
770
759
  parallel_tool_calls: bool | OpenAINotGiven = NOT_GIVEN,
771
- prediction: Optional[ChatCompletionPredictionContentParam] | OpenAINotGiven = NOT_GIVEN,
772
- presence_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
773
- reasoning_effort: Optional[ChatCompletionReasoningEffort] | OpenAINotGiven = NOT_GIVEN,
760
+ prediction: ChatCompletionPredictionContentParam | OpenAINotGiven | None = NOT_GIVEN,
761
+ presence_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
762
+ reasoning_effort: ChatCompletionReasoningEffort | OpenAINotGiven | None = NOT_GIVEN,
774
763
  thinking: ThinkingConfigParam | None | NotGiven = NOT_GIVEN,
775
- seed: Optional[int] | OpenAINotGiven = NOT_GIVEN,
776
- service_tier: Optional[Literal["auto", "default"]] | OpenAINotGiven = NOT_GIVEN,
777
- stop: Union[Optional[str], List[str]] | OpenAINotGiven = NOT_GIVEN,
778
- store: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
779
- top_logprobs: Optional[int] | OpenAINotGiven = NOT_GIVEN,
764
+ seed: int | OpenAINotGiven | None = NOT_GIVEN,
765
+ service_tier: Literal["auto", "default"] | OpenAINotGiven | None = NOT_GIVEN,
766
+ stop: str | list[str] | OpenAINotGiven | None = NOT_GIVEN,
767
+ store: bool | OpenAINotGiven | None = NOT_GIVEN,
768
+ top_logprobs: int | OpenAINotGiven | None = NOT_GIVEN,
780
769
  user: str | OpenAINotGiven = NOT_GIVEN,
781
770
  extra_headers: Headers | None = None,
782
771
  extra_query: Query | None = None,
@@ -792,37 +781,37 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
792
781
  model: str | None = None,
793
782
  stream: Literal[False] | Literal[True] = False,
794
783
  temperature: float | None | NotGiven = NOT_GIVEN,
795
- max_tokens: int | None | NotGiven = NOT_GIVEN,
784
+ max_tokens: int | None | NotGiven = None,
796
785
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
797
786
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
798
787
  response_format: ResponseFormat | NotGiven = NOT_GIVEN,
799
- stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
788
+ stream_options: ChatCompletionStreamOptionsParam | None = None,
800
789
  top_p: float | NotGiven | None = NOT_GIVEN,
801
790
  skip_cutoff: bool = False,
802
- audio: Optional[ChatCompletionAudioParam] | OpenAINotGiven = NOT_GIVEN,
803
- frequency_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
804
- logit_bias: Optional[Dict[str, int]] | OpenAINotGiven = NOT_GIVEN,
805
- logprobs: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
806
- max_completion_tokens: Optional[int] | OpenAINotGiven = NOT_GIVEN,
807
- metadata: Optional[Metadata] | OpenAINotGiven = NOT_GIVEN,
808
- modalities: Optional[List[ChatCompletionModality]] | OpenAINotGiven = NOT_GIVEN,
809
- n: Optional[int] | OpenAINotGiven = NOT_GIVEN,
791
+ audio: ChatCompletionAudioParam | OpenAINotGiven | None = NOT_GIVEN,
792
+ frequency_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
793
+ logit_bias: dict[str, int] | OpenAINotGiven | None = NOT_GIVEN,
794
+ logprobs: bool | OpenAINotGiven | None = NOT_GIVEN,
795
+ max_completion_tokens: int | OpenAINotGiven | None = NOT_GIVEN,
796
+ metadata: Metadata | OpenAINotGiven | None = NOT_GIVEN,
797
+ modalities: list[ChatCompletionModality] | OpenAINotGiven | None = NOT_GIVEN,
798
+ n: int | OpenAINotGiven | None = NOT_GIVEN,
810
799
  parallel_tool_calls: bool | OpenAINotGiven = NOT_GIVEN,
811
- prediction: Optional[ChatCompletionPredictionContentParam] | OpenAINotGiven = NOT_GIVEN,
812
- presence_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
813
- reasoning_effort: Optional[ChatCompletionReasoningEffort] | OpenAINotGiven = NOT_GIVEN,
800
+ prediction: ChatCompletionPredictionContentParam | OpenAINotGiven | None = NOT_GIVEN,
801
+ presence_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
802
+ reasoning_effort: ChatCompletionReasoningEffort | OpenAINotGiven | None = NOT_GIVEN,
814
803
  thinking: ThinkingConfigParam | None | NotGiven = NOT_GIVEN,
815
- seed: Optional[int] | OpenAINotGiven = NOT_GIVEN,
816
- service_tier: Optional[Literal["auto", "default"]] | OpenAINotGiven = NOT_GIVEN,
817
- stop: Union[Optional[str], List[str]] | OpenAINotGiven = NOT_GIVEN,
818
- store: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
819
- top_logprobs: Optional[int] | OpenAINotGiven = NOT_GIVEN,
804
+ seed: int | OpenAINotGiven | None = NOT_GIVEN,
805
+ service_tier: Literal["auto", "default"] | OpenAINotGiven | None = NOT_GIVEN,
806
+ stop: str | list[str] | OpenAINotGiven | None = NOT_GIVEN,
807
+ store: bool | OpenAINotGiven | None = NOT_GIVEN,
808
+ top_logprobs: int | OpenAINotGiven | None = NOT_GIVEN,
820
809
  user: str | OpenAINotGiven = NOT_GIVEN,
821
810
  extra_headers: Headers | None = None,
822
811
  extra_query: Query | None = None,
823
812
  extra_body: Body | None = None,
824
813
  timeout: float | httpx.Timeout | None | OpenAINotGiven = NOT_GIVEN,
825
- ):
814
+ ) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, Any]:
826
815
  if model is not None:
827
816
  self.model = model
828
817
  if stream is not None:
@@ -865,7 +854,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
865
854
  },
866
855
  }
867
856
  )
868
- tools_params = dict(tools=_tools, tool_choice=tool_choice)
857
+ tools_params = {"tools": _tools, "tool_choice": tool_choice}
869
858
  else:
870
859
  tools_str = json.dumps(tools, ensure_ascii=False, indent=None)
871
860
  additional_system_prompt = generate_tool_use_system_prompt(tools=tools_str)
@@ -5,7 +5,8 @@ import json
5
5
  import uuid
6
6
  import warnings
7
7
  from math import ceil
8
- from typing import Iterable, cast
8
+ from collections.abc import Iterable
9
+ from typing import cast
9
10
 
10
11
  import httpx
11
12
  import tiktoken
@@ -105,6 +106,24 @@ def convert_type(value, value_type):
105
106
  return value # 如果类型未知,返回原始值
106
107
 
107
108
 
109
+ def _get_first_enabled_endpoint(backend_setting, settings):
110
+ """Get the first enabled endpoint from backend settings"""
111
+ for endpoint_choice in backend_setting.endpoints:
112
+ if isinstance(endpoint_choice, dict):
113
+ endpoint_id = endpoint_choice["endpoint_id"]
114
+ else:
115
+ endpoint_id = endpoint_choice
116
+
117
+ try:
118
+ endpoint = settings.get_endpoint(endpoint_id)
119
+ if endpoint.enabled:
120
+ return endpoint
121
+ except ValueError:
122
+ # Endpoint not found, skip it
123
+ continue
124
+ return None
125
+
126
+
108
127
  def get_token_counts(text: str | dict, model: str = "", use_token_server_first: bool = True) -> int:
109
128
  if use_token_server_first and settings.token_server is not None:
110
129
  base_url = settings.token_server.url if settings.token_server.url is not None else f"http://{settings.token_server.host}:{settings.token_server.port}"
@@ -126,10 +145,9 @@ def get_token_counts(text: str | dict, model: str = "", use_token_server_first:
126
145
  backend_setting = settings.get_backend(BackendType.MiniMax).models[model]
127
146
  if len(backend_setting.endpoints) == 0:
128
147
  return int(len(text) / 1.33)
129
- endpoint_id = backend_setting.endpoints[0]
130
- if isinstance(endpoint_id, dict):
131
- endpoint_id = endpoint_id["endpoint_id"]
132
- endpoint = settings.get_endpoint(endpoint_id)
148
+ endpoint = _get_first_enabled_endpoint(backend_setting, settings)
149
+ if endpoint is None:
150
+ return int(len(text) / 1.33)
133
151
  tokenize_url = "https://api.minimax.chat/v1/tokenize"
134
152
  headers = {"Authorization": f"Bearer {endpoint.api_key}", "Content-Type": "application/json"}
135
153
  request_body = {
@@ -150,10 +168,9 @@ def get_token_counts(text: str | dict, model: str = "", use_token_server_first:
150
168
  backend_setting = settings.get_backend(BackendType.Moonshot).models[model]
151
169
  if len(backend_setting.endpoints) == 0:
152
170
  return len(get_gpt_35_encoding().encode(text))
153
- endpoint_id = backend_setting.endpoints[0]
154
- if isinstance(endpoint_id, dict):
155
- endpoint_id = endpoint_id["endpoint_id"]
156
- endpoint = settings.get_endpoint(endpoint_id)
171
+ endpoint = _get_first_enabled_endpoint(backend_setting, settings)
172
+ if endpoint is None:
173
+ return len(get_gpt_35_encoding().encode(text))
157
174
  tokenize_url = f"{endpoint.api_base}/tokenizers/estimate-token-count"
158
175
  headers = {"Content-Type": "application/json", "Authorization": f"Bearer {endpoint.api_key}"}
159
176
  request_body = {
@@ -171,10 +188,9 @@ def get_token_counts(text: str | dict, model: str = "", use_token_server_first:
171
188
  backend_setting = settings.get_backend(BackendType.Gemini).models[model]
172
189
  if len(backend_setting.endpoints) == 0:
173
190
  return len(get_gpt_35_encoding().encode(text))
174
- endpoint_id = backend_setting.endpoints[0]
175
- if isinstance(endpoint_id, dict):
176
- endpoint_id = endpoint_id["endpoint_id"]
177
- endpoint = settings.get_endpoint(endpoint_id)
191
+ endpoint = _get_first_enabled_endpoint(backend_setting, settings)
192
+ if endpoint is None:
193
+ return len(get_gpt_35_encoding().encode(text))
178
194
 
179
195
  api_base = endpoint.api_base.removesuffix("/openai/") if endpoint.api_base else "https://generativelanguage.googleapis.com/v1beta"
180
196
  base_url = f"{api_base}/models/{backend_setting.id}:countTokens"
@@ -200,7 +216,13 @@ def get_token_counts(text: str | dict, model: str = "", use_token_server_first:
200
216
  endpoint_id = endpoint_choice["endpoint_id"]
201
217
  else:
202
218
  endpoint_id = endpoint_choice
203
- endpoint = settings.get_endpoint(endpoint_id)
219
+
220
+ try:
221
+ endpoint = settings.get_endpoint(endpoint_id)
222
+ if not endpoint.enabled:
223
+ continue
224
+ except ValueError:
225
+ continue
204
226
 
205
227
  if endpoint.is_vertex or endpoint.is_bedrock or endpoint.endpoint_type == "anthropic_vertex" or endpoint.endpoint_type == "anthropic_bedrock":
206
228
  continue
@@ -214,10 +236,10 @@ def get_token_counts(text: str | dict, model: str = "", use_token_server_first:
214
236
  .input_tokens
215
237
  )
216
238
  except Exception as e:
217
- warnings.warn(f"Anthropic token counting failed: {e}")
239
+ warnings.warn(f"Anthropic token counting failed: {e}", stacklevel=2)
218
240
 
219
241
  # TODO: Use anthropic token counting
220
- warnings.warn("Anthropic token counting is not implemented in Vertex or Bedrock yet")
242
+ warnings.warn("Anthropic token counting is not implemented in Vertex or Bedrock yet", stacklevel=2)
221
243
  return len(get_gpt_4o_encoding().encode(text))
222
244
  elif model.startswith("deepseek"):
223
245
  from deepseek_tokenizer import deepseek_tokenizer
@@ -232,10 +254,9 @@ def get_token_counts(text: str | dict, model: str = "", use_token_server_first:
232
254
  backend_setting = settings.get_backend(BackendType.StepFun).models[model]
233
255
  if len(backend_setting.endpoints) == 0:
234
256
  return len(get_gpt_35_encoding().encode(text))
235
- endpoint_id = backend_setting.endpoints[0]
236
- if isinstance(endpoint_id, dict):
237
- endpoint_id = endpoint_id["endpoint_id"]
238
- endpoint = settings.get_endpoint(endpoint_id)
257
+ endpoint = _get_first_enabled_endpoint(backend_setting, settings)
258
+ if endpoint is None:
259
+ return len(get_gpt_35_encoding().encode(text))
239
260
  tokenize_url = f"{endpoint.api_base}/token/count"
240
261
  headers = {"Content-Type": "application/json", "Authorization": f"Bearer {endpoint.api_key}"}
241
262
  request_body = {
@@ -253,10 +274,9 @@ def get_token_counts(text: str | dict, model: str = "", use_token_server_first:
253
274
  backend_setting = settings.get_backend(BackendType.ZhiPuAI).models[model]
254
275
  if len(backend_setting.endpoints) == 0:
255
276
  return len(get_gpt_35_encoding().encode(text))
256
- endpoint_id = backend_setting.endpoints[0]
257
- if isinstance(endpoint_id, dict):
258
- endpoint_id = endpoint_id["endpoint_id"]
259
- endpoint = settings.get_endpoint(endpoint_id)
277
+ endpoint = _get_first_enabled_endpoint(backend_setting, settings)
278
+ if endpoint is None:
279
+ return len(get_gpt_35_encoding().encode(text))
260
280
  if model not in ("glm-4-plus", "glm-4-long", "glm-4-0520", "glm-4-air", "glm-4-flash"):
261
281
  model = "glm-4-plus"
262
282
  tokenize_url = f"{endpoint.api_base or 'https://open.bigmodel.cn/api/paas/v4'}/tokenizer"
@@ -19,7 +19,7 @@ async def count_tokens(request: TokenCountRequest):
19
19
  token_count = get_token_counts(request.text, request.model, use_token_server_first=False)
20
20
  return {"total_tokens": token_count}
21
21
  except Exception as e:
22
- raise HTTPException(status_code=500, detail=str(e))
22
+ raise HTTPException(status_code=500, detail=str(e)) from None
23
23
 
24
24
 
25
25
  def run_token_server(host: str | None = None, port: int | None = None):