vectorvein 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. vectorvein/api/client.py +81 -103
  2. vectorvein/api/exceptions.py +1 -3
  3. vectorvein/api/models.py +11 -11
  4. vectorvein/chat_clients/anthropic_client.py +157 -169
  5. vectorvein/chat_clients/base_client.py +257 -198
  6. vectorvein/chat_clients/openai_compatible_client.py +150 -161
  7. vectorvein/chat_clients/utils.py +44 -24
  8. vectorvein/server/token_server.py +1 -1
  9. vectorvein/settings/__init__.py +27 -27
  10. vectorvein/types/defaults.py +32 -16
  11. vectorvein/types/llm_parameters.py +40 -34
  12. vectorvein/types/settings.py +10 -10
  13. vectorvein/utilities/media_processing.py +1 -1
  14. vectorvein/utilities/rate_limiter.py +5 -6
  15. vectorvein/utilities/retry.py +6 -5
  16. vectorvein/workflow/graph/edge.py +3 -3
  17. vectorvein/workflow/graph/node.py +14 -26
  18. vectorvein/workflow/graph/port.py +40 -39
  19. vectorvein/workflow/graph/workflow.py +13 -25
  20. vectorvein/workflow/nodes/audio_generation.py +5 -7
  21. vectorvein/workflow/nodes/control_flows.py +7 -9
  22. vectorvein/workflow/nodes/file_processing.py +4 -6
  23. vectorvein/workflow/nodes/image_generation.py +20 -22
  24. vectorvein/workflow/nodes/llms.py +13 -15
  25. vectorvein/workflow/nodes/media_editing.py +26 -40
  26. vectorvein/workflow/nodes/media_processing.py +19 -21
  27. vectorvein/workflow/nodes/output.py +10 -12
  28. vectorvein/workflow/nodes/relational_db.py +3 -5
  29. vectorvein/workflow/nodes/text_processing.py +8 -10
  30. vectorvein/workflow/nodes/tools.py +8 -10
  31. vectorvein/workflow/nodes/triggers.py +1 -3
  32. vectorvein/workflow/nodes/vector_db.py +3 -5
  33. vectorvein/workflow/nodes/video_generation.py +4 -6
  34. vectorvein/workflow/nodes/web_crawlers.py +4 -6
  35. vectorvein/workflow/utils/analyse.py +5 -13
  36. vectorvein/workflow/utils/check.py +6 -16
  37. vectorvein/workflow/utils/json_to_code.py +6 -14
  38. vectorvein/workflow/utils/layout.py +3 -5
  39. {vectorvein-0.3.1.dist-info → vectorvein-0.3.3.dist-info}/METADATA +1 -1
  40. vectorvein-0.3.3.dist-info/RECORD +68 -0
  41. {vectorvein-0.3.1.dist-info → vectorvein-0.3.3.dist-info}/WHEEL +1 -1
  42. vectorvein-0.3.1.dist-info/RECORD +0 -68
  43. {vectorvein-0.3.1.dist-info → vectorvein-0.3.3.dist-info}/entry_points.txt +0 -0
@@ -3,8 +3,9 @@ import random
3
3
  import asyncio
4
4
  from abc import ABC, abstractmethod
5
5
  from collections import defaultdict
6
+ from collections.abc import Iterable, Generator, AsyncGenerator
6
7
  from functools import cached_property
7
- from typing import Generator, AsyncGenerator, Any, overload, Literal, Iterable, Optional, Dict, List, Union
8
+ from typing import Any, overload, Literal
8
9
 
9
10
  import httpx
10
11
  from openai import OpenAI, AsyncOpenAI, AzureOpenAI, AsyncAzureOpenAI
@@ -126,9 +127,7 @@ class BaseChatClient(ABC):
126
127
  # Get rate limit parameters
127
128
  # Priority: parameters in model.endpoints > parameters in endpoint > default parameters
128
129
  rpm = self.rpm or endpoint.rpm or (self.settings.rate_limit.default_rpm if self.settings.rate_limit else 60)
129
- tpm = (
130
- self.tpm or endpoint.tpm or (self.settings.rate_limit.default_tpm if self.settings.rate_limit else 1000000)
131
- )
130
+ tpm = self.tpm or endpoint.tpm or (self.settings.rate_limit.default_tpm if self.settings.rate_limit else 1000000)
132
131
 
133
132
  while self.rate_limiter:
134
133
  allowed, wait_time = self.rate_limiter.check_limit(key, rpm, tpm, self._estimate_request_tokens(messages))
@@ -143,6 +142,31 @@ class BaseChatClient(ABC):
143
142
  tokens += int(len(message.get("content", "")) * 0.6)
144
143
  return tokens
145
144
 
145
+ def _get_available_endpoints(self, model_endpoints: list) -> list:
146
+ """Get list of available (enabled) endpoints for the model"""
147
+ available_endpoints = []
148
+ for endpoint_option in model_endpoints:
149
+ if isinstance(endpoint_option, dict):
150
+ # For endpoint with specific config, check if the endpoint is enabled
151
+ endpoint_id = endpoint_option["endpoint_id"]
152
+ try:
153
+ endpoint = self.settings.get_endpoint(endpoint_id)
154
+ if endpoint.enabled:
155
+ available_endpoints.append(endpoint_option)
156
+ except ValueError:
157
+ # Endpoint not found, skip it
158
+ continue
159
+ else:
160
+ # For simple endpoint ID string, check if the endpoint is enabled
161
+ try:
162
+ endpoint = self.settings.get_endpoint(endpoint_option)
163
+ if endpoint.enabled:
164
+ available_endpoints.append(endpoint_option)
165
+ except ValueError:
166
+ # Endpoint not found, skip it
167
+ continue
168
+ return available_endpoints
169
+
146
170
  def set_model_id_by_endpoint_id(self, endpoint_id: str):
147
171
  for endpoint_option in self.backend_settings.models[self.model].endpoints:
148
172
  if isinstance(endpoint_option, dict) and endpoint_id == endpoint_option["endpoint_id"]:
@@ -154,7 +178,12 @@ class BaseChatClient(ABC):
154
178
  if self.endpoint is None:
155
179
  if self.random_endpoint:
156
180
  self.random_endpoint = True
157
- endpoint = random.choice(self.backend_settings.models[self.model].endpoints)
181
+ # Get available (enabled) endpoints
182
+ available_endpoints = self._get_available_endpoints(self.backend_settings.models[self.model].endpoints)
183
+ if not available_endpoints:
184
+ raise ValueError(f"No enabled endpoints available for model {self.model}")
185
+
186
+ endpoint = random.choice(available_endpoints)
158
187
  if isinstance(endpoint, dict):
159
188
  self.endpoint_id = endpoint["endpoint_id"]
160
189
  self.model_id = endpoint["model_id"]
@@ -166,8 +195,14 @@ class BaseChatClient(ABC):
166
195
  self.endpoint = self.settings.get_endpoint(self.endpoint_id)
167
196
  else:
168
197
  self.endpoint = self.settings.get_endpoint(self.endpoint_id)
198
+ # Check if the specified endpoint is enabled
199
+ if not self.endpoint.enabled:
200
+ raise ValueError(f"Endpoint {self.endpoint_id} is disabled")
169
201
  self.set_model_id_by_endpoint_id(self.endpoint_id)
170
202
  elif isinstance(self.endpoint, EndpointSetting):
203
+ # Check if the endpoint is enabled
204
+ if not self.endpoint.enabled:
205
+ raise ValueError(f"Endpoint {self.endpoint.id} is disabled")
171
206
  self.endpoint_id = self.endpoint.id
172
207
  self.set_model_id_by_endpoint_id(self.endpoint_id)
173
208
  else:
@@ -191,31 +226,31 @@ class BaseChatClient(ABC):
191
226
  model: str | None = None,
192
227
  stream: Literal[False] = False,
193
228
  temperature: float | None | NotGiven = NOT_GIVEN,
194
- max_tokens: int | None = None,
229
+ max_tokens: int | None | NotGiven = None,
195
230
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
196
231
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
197
232
  response_format: ResponseFormat | NotGiven = NOT_GIVEN,
198
233
  stream_options: ChatCompletionStreamOptionsParam | None = None,
199
234
  top_p: float | NotGiven | None = NOT_GIVEN,
200
235
  skip_cutoff: bool = False,
201
- audio: Optional[ChatCompletionAudioParam] | OpenAINotGiven = NOT_GIVEN,
202
- frequency_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
203
- logit_bias: Optional[Dict[str, int]] | OpenAINotGiven = NOT_GIVEN,
204
- logprobs: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
205
- max_completion_tokens: Optional[int] | OpenAINotGiven = NOT_GIVEN,
206
- metadata: Optional[Metadata] | OpenAINotGiven = NOT_GIVEN,
207
- modalities: Optional[List[ChatCompletionModality]] | OpenAINotGiven = NOT_GIVEN,
208
- n: Optional[int] | OpenAINotGiven = NOT_GIVEN,
236
+ audio: ChatCompletionAudioParam | OpenAINotGiven | None = NOT_GIVEN,
237
+ frequency_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
238
+ logit_bias: dict[str, int] | OpenAINotGiven | None = NOT_GIVEN,
239
+ logprobs: bool | OpenAINotGiven | None = NOT_GIVEN,
240
+ max_completion_tokens: int | OpenAINotGiven | None = NOT_GIVEN,
241
+ metadata: Metadata | OpenAINotGiven | None = NOT_GIVEN,
242
+ modalities: list[ChatCompletionModality] | OpenAINotGiven | None = NOT_GIVEN,
243
+ n: int | OpenAINotGiven | None = NOT_GIVEN,
209
244
  parallel_tool_calls: bool | OpenAINotGiven = NOT_GIVEN,
210
- prediction: Optional[ChatCompletionPredictionContentParam] | OpenAINotGiven = NOT_GIVEN,
211
- presence_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
212
- reasoning_effort: Optional[ChatCompletionReasoningEffort] | OpenAINotGiven = NOT_GIVEN,
245
+ prediction: ChatCompletionPredictionContentParam | OpenAINotGiven | None = NOT_GIVEN,
246
+ presence_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
247
+ reasoning_effort: ChatCompletionReasoningEffort | OpenAINotGiven | None = NOT_GIVEN,
213
248
  thinking: ThinkingConfigParam | None | NotGiven = NOT_GIVEN,
214
- seed: Optional[int] | OpenAINotGiven = NOT_GIVEN,
215
- service_tier: Optional[Literal["auto", "default"]] | OpenAINotGiven = NOT_GIVEN,
216
- stop: Union[Optional[str], List[str]] | OpenAINotGiven = NOT_GIVEN,
217
- store: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
218
- top_logprobs: Optional[int] | OpenAINotGiven = NOT_GIVEN,
249
+ seed: int | OpenAINotGiven | None = NOT_GIVEN,
250
+ service_tier: Literal["auto", "default"] | OpenAINotGiven | None = NOT_GIVEN,
251
+ stop: str | list[str] | OpenAINotGiven | None = NOT_GIVEN,
252
+ store: bool | OpenAINotGiven | None = NOT_GIVEN,
253
+ top_logprobs: int | OpenAINotGiven | None = NOT_GIVEN,
219
254
  user: str | OpenAINotGiven = NOT_GIVEN,
220
255
  extra_headers: Headers | None = None,
221
256
  extra_query: Query | None = None,
@@ -233,31 +268,31 @@ class BaseChatClient(ABC):
233
268
  model: str | None = None,
234
269
  stream: Literal[True],
235
270
  temperature: float | None | NotGiven = NOT_GIVEN,
236
- max_tokens: int | None = None,
271
+ max_tokens: int | None | NotGiven = None,
237
272
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
238
273
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
239
274
  response_format: ResponseFormat | NotGiven = NOT_GIVEN,
240
275
  stream_options: ChatCompletionStreamOptionsParam | None = None,
241
276
  top_p: float | NotGiven | None = NOT_GIVEN,
242
277
  skip_cutoff: bool = False,
243
- audio: Optional[ChatCompletionAudioParam] | OpenAINotGiven = NOT_GIVEN,
244
- frequency_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
245
- logit_bias: Optional[Dict[str, int]] | OpenAINotGiven = NOT_GIVEN,
246
- logprobs: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
247
- max_completion_tokens: Optional[int] | OpenAINotGiven = NOT_GIVEN,
248
- metadata: Optional[Metadata] | OpenAINotGiven = NOT_GIVEN,
249
- modalities: Optional[List[ChatCompletionModality]] | OpenAINotGiven = NOT_GIVEN,
250
- n: Optional[int] | OpenAINotGiven = NOT_GIVEN,
278
+ audio: ChatCompletionAudioParam | OpenAINotGiven | None = NOT_GIVEN,
279
+ frequency_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
280
+ logit_bias: dict[str, int] | OpenAINotGiven | None = NOT_GIVEN,
281
+ logprobs: bool | OpenAINotGiven | None = NOT_GIVEN,
282
+ max_completion_tokens: int | OpenAINotGiven | None = NOT_GIVEN,
283
+ metadata: Metadata | OpenAINotGiven | None = NOT_GIVEN,
284
+ modalities: list[ChatCompletionModality] | OpenAINotGiven | None = NOT_GIVEN,
285
+ n: int | OpenAINotGiven | None = NOT_GIVEN,
251
286
  parallel_tool_calls: bool | OpenAINotGiven = NOT_GIVEN,
252
- prediction: Optional[ChatCompletionPredictionContentParam] | OpenAINotGiven = NOT_GIVEN,
253
- presence_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
254
- reasoning_effort: Optional[ChatCompletionReasoningEffort] | OpenAINotGiven = NOT_GIVEN,
287
+ prediction: ChatCompletionPredictionContentParam | OpenAINotGiven | None = NOT_GIVEN,
288
+ presence_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
289
+ reasoning_effort: ChatCompletionReasoningEffort | OpenAINotGiven | None = NOT_GIVEN,
255
290
  thinking: ThinkingConfigParam | None | NotGiven = NOT_GIVEN,
256
- seed: Optional[int] | OpenAINotGiven = NOT_GIVEN,
257
- service_tier: Optional[Literal["auto", "default"]] | OpenAINotGiven = NOT_GIVEN,
258
- stop: Union[Optional[str], List[str]] | OpenAINotGiven = NOT_GIVEN,
259
- store: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
260
- top_logprobs: Optional[int] | OpenAINotGiven = NOT_GIVEN,
291
+ seed: int | OpenAINotGiven | None = NOT_GIVEN,
292
+ service_tier: Literal["auto", "default"] | OpenAINotGiven | None = NOT_GIVEN,
293
+ stop: str | list[str] | OpenAINotGiven | None = NOT_GIVEN,
294
+ store: bool | OpenAINotGiven | None = NOT_GIVEN,
295
+ top_logprobs: int | OpenAINotGiven | None = NOT_GIVEN,
261
296
  user: str | OpenAINotGiven = NOT_GIVEN,
262
297
  extra_headers: Headers | None = None,
263
298
  extra_query: Query | None = None,
@@ -275,31 +310,31 @@ class BaseChatClient(ABC):
275
310
  model: str | None = None,
276
311
  stream: bool,
277
312
  temperature: float | None | NotGiven = NOT_GIVEN,
278
- max_tokens: int | None = None,
313
+ max_tokens: int | None | NotGiven = None,
279
314
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
280
315
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
281
316
  response_format: ResponseFormat | NotGiven = NOT_GIVEN,
282
317
  stream_options: ChatCompletionStreamOptionsParam | None = None,
283
318
  top_p: float | NotGiven | None = NOT_GIVEN,
284
319
  skip_cutoff: bool = False,
285
- audio: Optional[ChatCompletionAudioParam] | OpenAINotGiven = NOT_GIVEN,
286
- frequency_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
287
- logit_bias: Optional[Dict[str, int]] | OpenAINotGiven = NOT_GIVEN,
288
- logprobs: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
289
- max_completion_tokens: Optional[int] | OpenAINotGiven = NOT_GIVEN,
290
- metadata: Optional[Metadata] | OpenAINotGiven = NOT_GIVEN,
291
- modalities: Optional[List[ChatCompletionModality]] | OpenAINotGiven = NOT_GIVEN,
292
- n: Optional[int] | OpenAINotGiven = NOT_GIVEN,
320
+ audio: ChatCompletionAudioParam | OpenAINotGiven | None = NOT_GIVEN,
321
+ frequency_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
322
+ logit_bias: dict[str, int] | OpenAINotGiven | None = NOT_GIVEN,
323
+ logprobs: bool | OpenAINotGiven | None = NOT_GIVEN,
324
+ max_completion_tokens: int | OpenAINotGiven | None = NOT_GIVEN,
325
+ metadata: Metadata | OpenAINotGiven | None = NOT_GIVEN,
326
+ modalities: list[ChatCompletionModality] | OpenAINotGiven | None = NOT_GIVEN,
327
+ n: int | OpenAINotGiven | None = NOT_GIVEN,
293
328
  parallel_tool_calls: bool | OpenAINotGiven = NOT_GIVEN,
294
- prediction: Optional[ChatCompletionPredictionContentParam] | OpenAINotGiven = NOT_GIVEN,
295
- presence_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
296
- reasoning_effort: Optional[ChatCompletionReasoningEffort] | OpenAINotGiven = NOT_GIVEN,
329
+ prediction: ChatCompletionPredictionContentParam | OpenAINotGiven | None = NOT_GIVEN,
330
+ presence_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
331
+ reasoning_effort: ChatCompletionReasoningEffort | OpenAINotGiven | None = NOT_GIVEN,
297
332
  thinking: ThinkingConfigParam | None | NotGiven = NOT_GIVEN,
298
- seed: Optional[int] | OpenAINotGiven = NOT_GIVEN,
299
- service_tier: Optional[Literal["auto", "default"]] | OpenAINotGiven = NOT_GIVEN,
300
- stop: Union[Optional[str], List[str]] | OpenAINotGiven = NOT_GIVEN,
301
- store: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
302
- top_logprobs: Optional[int] | OpenAINotGiven = NOT_GIVEN,
333
+ seed: int | OpenAINotGiven | None = NOT_GIVEN,
334
+ service_tier: Literal["auto", "default"] | OpenAINotGiven | None = NOT_GIVEN,
335
+ stop: str | list[str] | OpenAINotGiven | None = NOT_GIVEN,
336
+ store: bool | OpenAINotGiven | None = NOT_GIVEN,
337
+ top_logprobs: int | OpenAINotGiven | None = NOT_GIVEN,
303
338
  user: str | OpenAINotGiven = NOT_GIVEN,
304
339
  extra_headers: Headers | None = None,
305
340
  extra_query: Query | None = None,
@@ -316,31 +351,31 @@ class BaseChatClient(ABC):
316
351
  model: str | None = None,
317
352
  stream: Literal[False] | Literal[True] = False,
318
353
  temperature: float | None | NotGiven = NOT_GIVEN,
319
- max_tokens: int | None = None,
354
+ max_tokens: int | None | NotGiven = None,
320
355
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
321
356
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
322
357
  response_format: ResponseFormat | NotGiven = NOT_GIVEN,
323
358
  stream_options: ChatCompletionStreamOptionsParam | None = None,
324
359
  top_p: float | NotGiven | None = NOT_GIVEN,
325
360
  skip_cutoff: bool = False,
326
- audio: Optional[ChatCompletionAudioParam] | OpenAINotGiven = NOT_GIVEN,
327
- frequency_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
328
- logit_bias: Optional[Dict[str, int]] | OpenAINotGiven = NOT_GIVEN,
329
- logprobs: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
330
- max_completion_tokens: Optional[int] | OpenAINotGiven = NOT_GIVEN,
331
- metadata: Optional[Metadata] | OpenAINotGiven = NOT_GIVEN,
332
- modalities: Optional[List[ChatCompletionModality]] | OpenAINotGiven = NOT_GIVEN,
333
- n: Optional[int] | OpenAINotGiven = NOT_GIVEN,
361
+ audio: ChatCompletionAudioParam | OpenAINotGiven | None = NOT_GIVEN,
362
+ frequency_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
363
+ logit_bias: dict[str, int] | OpenAINotGiven | None = NOT_GIVEN,
364
+ logprobs: bool | OpenAINotGiven | None = NOT_GIVEN,
365
+ max_completion_tokens: int | OpenAINotGiven | None = NOT_GIVEN,
366
+ metadata: Metadata | OpenAINotGiven | None = NOT_GIVEN,
367
+ modalities: list[ChatCompletionModality] | OpenAINotGiven | None = NOT_GIVEN,
368
+ n: int | OpenAINotGiven | None = NOT_GIVEN,
334
369
  parallel_tool_calls: bool | OpenAINotGiven = NOT_GIVEN,
335
- prediction: Optional[ChatCompletionPredictionContentParam] | OpenAINotGiven = NOT_GIVEN,
336
- presence_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
337
- reasoning_effort: Optional[ChatCompletionReasoningEffort] | OpenAINotGiven = NOT_GIVEN,
370
+ prediction: ChatCompletionPredictionContentParam | OpenAINotGiven | None = NOT_GIVEN,
371
+ presence_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
372
+ reasoning_effort: ChatCompletionReasoningEffort | OpenAINotGiven | None = NOT_GIVEN,
338
373
  thinking: ThinkingConfigParam | None | NotGiven = NOT_GIVEN,
339
- seed: Optional[int] | OpenAINotGiven = NOT_GIVEN,
340
- service_tier: Optional[Literal["auto", "default"]] | OpenAINotGiven = NOT_GIVEN,
341
- stop: Union[Optional[str], List[str]] | OpenAINotGiven = NOT_GIVEN,
342
- store: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
343
- top_logprobs: Optional[int] | OpenAINotGiven = NOT_GIVEN,
374
+ seed: int | OpenAINotGiven | None = NOT_GIVEN,
375
+ service_tier: Literal["auto", "default"] | OpenAINotGiven | None = NOT_GIVEN,
376
+ stop: str | list[str] | OpenAINotGiven | None = NOT_GIVEN,
377
+ store: bool | OpenAINotGiven | None = NOT_GIVEN,
378
+ top_logprobs: int | OpenAINotGiven | None = NOT_GIVEN,
344
379
  user: str | OpenAINotGiven = NOT_GIVEN,
345
380
  extra_headers: Headers | None = None,
346
381
  extra_query: Query | None = None,
@@ -355,31 +390,31 @@ class BaseChatClient(ABC):
355
390
  messages: list,
356
391
  model: str | None = None,
357
392
  temperature: float | None | NotGiven = NOT_GIVEN,
358
- max_tokens: int | None = None,
393
+ max_tokens: int | None | NotGiven = None,
359
394
  tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
360
395
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
361
396
  response_format: ResponseFormat | NotGiven = NOT_GIVEN,
362
397
  stream_options: ChatCompletionStreamOptionsParam | None = None,
363
398
  top_p: float | NotGiven | None = NOT_GIVEN,
364
399
  skip_cutoff: bool = False,
365
- audio: Optional[ChatCompletionAudioParam] | OpenAINotGiven = NOT_GIVEN,
366
- frequency_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
367
- logit_bias: Optional[Dict[str, int]] | OpenAINotGiven = NOT_GIVEN,
368
- logprobs: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
369
- max_completion_tokens: Optional[int] | OpenAINotGiven = NOT_GIVEN,
370
- metadata: Optional[Metadata] | OpenAINotGiven = NOT_GIVEN,
371
- modalities: Optional[List[ChatCompletionModality]] | OpenAINotGiven = NOT_GIVEN,
372
- n: Optional[int] | OpenAINotGiven = NOT_GIVEN,
400
+ audio: ChatCompletionAudioParam | OpenAINotGiven | None = NOT_GIVEN,
401
+ frequency_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
402
+ logit_bias: dict[str, int] | OpenAINotGiven | None = NOT_GIVEN,
403
+ logprobs: bool | OpenAINotGiven | None = NOT_GIVEN,
404
+ max_completion_tokens: int | OpenAINotGiven | None = NOT_GIVEN,
405
+ metadata: Metadata | OpenAINotGiven | None = NOT_GIVEN,
406
+ modalities: list[ChatCompletionModality] | OpenAINotGiven | None = NOT_GIVEN,
407
+ n: int | OpenAINotGiven | None = NOT_GIVEN,
373
408
  parallel_tool_calls: bool | OpenAINotGiven = NOT_GIVEN,
374
- prediction: Optional[ChatCompletionPredictionContentParam] | OpenAINotGiven = NOT_GIVEN,
375
- presence_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
376
- reasoning_effort: Optional[ChatCompletionReasoningEffort] | OpenAINotGiven = NOT_GIVEN,
409
+ prediction: ChatCompletionPredictionContentParam | OpenAINotGiven | None = NOT_GIVEN,
410
+ presence_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
411
+ reasoning_effort: ChatCompletionReasoningEffort | OpenAINotGiven | None = NOT_GIVEN,
377
412
  thinking: ThinkingConfigParam | None | NotGiven = NOT_GIVEN,
378
- seed: Optional[int] | OpenAINotGiven = NOT_GIVEN,
379
- service_tier: Optional[Literal["auto", "default"]] | OpenAINotGiven = NOT_GIVEN,
380
- stop: Union[Optional[str], List[str]] | OpenAINotGiven = NOT_GIVEN,
381
- store: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
382
- top_logprobs: Optional[int] | OpenAINotGiven = NOT_GIVEN,
413
+ seed: int | OpenAINotGiven | None = NOT_GIVEN,
414
+ service_tier: Literal["auto", "default"] | OpenAINotGiven | None = NOT_GIVEN,
415
+ stop: str | list[str] | OpenAINotGiven | None = NOT_GIVEN,
416
+ store: bool | OpenAINotGiven | None = NOT_GIVEN,
417
+ top_logprobs: int | OpenAINotGiven | None = NOT_GIVEN,
383
418
  user: str | OpenAINotGiven = NOT_GIVEN,
384
419
  extra_headers: Headers | None = None,
385
420
  extra_query: Query | None = None,
@@ -425,7 +460,7 @@ class BaseChatClient(ABC):
425
460
 
426
461
  def model_list(self):
427
462
  _raw_client = self.raw_client
428
- if isinstance(_raw_client, (OpenAI, AzureOpenAI)):
463
+ if isinstance(_raw_client, OpenAI | AzureOpenAI):
429
464
  return _raw_client.models.list().model_dump()
430
465
  elif isinstance(_raw_client, Anthropic):
431
466
  return _raw_client.models.list(limit=1000).model_dump()
@@ -513,14 +548,10 @@ class BaseAsyncChatClient(ABC):
513
548
  # Get rate limit parameters
514
549
  # Priority: parameters in model.endpoints > parameters in endpoint > default parameters
515
550
  rpm = self.rpm or endpoint.rpm or (self.settings.rate_limit.default_rpm if self.settings.rate_limit else 60)
516
- tpm = (
517
- self.tpm or endpoint.tpm or (self.settings.rate_limit.default_tpm if self.settings.rate_limit else 1000000)
518
- )
551
+ tpm = self.tpm or endpoint.tpm or (self.settings.rate_limit.default_tpm if self.settings.rate_limit else 1000000)
519
552
 
520
553
  while self.rate_limiter:
521
- allowed, wait_time = await self.rate_limiter.check_limit(
522
- key, rpm, tpm, self._estimate_request_tokens(messages)
523
- )
554
+ allowed, wait_time = await self.rate_limiter.check_limit(key, rpm, tpm, self._estimate_request_tokens(messages))
524
555
  if allowed:
525
556
  break
526
557
  await asyncio.sleep(wait_time)
@@ -532,6 +563,31 @@ class BaseAsyncChatClient(ABC):
532
563
  tokens += int(len(message.get("content", "")) * 0.6)
533
564
  return tokens
534
565
 
566
+ def _get_available_endpoints(self, model_endpoints: list) -> list:
567
+ """Get list of available (enabled) endpoints for the model"""
568
+ available_endpoints = []
569
+ for endpoint_option in model_endpoints:
570
+ if isinstance(endpoint_option, dict):
571
+ # For endpoint with specific config, check if the endpoint is enabled
572
+ endpoint_id = endpoint_option["endpoint_id"]
573
+ try:
574
+ endpoint = self.settings.get_endpoint(endpoint_id)
575
+ if endpoint.enabled:
576
+ available_endpoints.append(endpoint_option)
577
+ except ValueError:
578
+ # Endpoint not found, skip it
579
+ continue
580
+ else:
581
+ # For simple endpoint ID string, check if the endpoint is enabled
582
+ try:
583
+ endpoint = self.settings.get_endpoint(endpoint_option)
584
+ if endpoint.enabled:
585
+ available_endpoints.append(endpoint_option)
586
+ except ValueError:
587
+ # Endpoint not found, skip it
588
+ continue
589
+ return available_endpoints
590
+
535
591
  def set_model_id_by_endpoint_id(self, endpoint_id: str):
536
592
  for endpoint_option in self.backend_settings.models[self.model].endpoints:
537
593
  if isinstance(endpoint_option, dict) and endpoint_id == endpoint_option["endpoint_id"]:
@@ -543,7 +599,12 @@ class BaseAsyncChatClient(ABC):
543
599
  if self.endpoint is None:
544
600
  if self.random_endpoint:
545
601
  self.random_endpoint = True
546
- endpoint = random.choice(self.backend_settings.models[self.model].endpoints)
602
+ # Get available (enabled) endpoints
603
+ available_endpoints = self._get_available_endpoints(self.backend_settings.models[self.model].endpoints)
604
+ if not available_endpoints:
605
+ raise ValueError(f"No enabled endpoints available for model {self.model}")
606
+
607
+ endpoint = random.choice(available_endpoints)
547
608
  if isinstance(endpoint, dict):
548
609
  self.endpoint_id = endpoint["endpoint_id"]
549
610
  self.model_id = endpoint["model_id"]
@@ -555,8 +616,14 @@ class BaseAsyncChatClient(ABC):
555
616
  self.endpoint = self.settings.get_endpoint(self.endpoint_id)
556
617
  else:
557
618
  self.endpoint = self.settings.get_endpoint(self.endpoint_id)
619
+ # Check if the specified endpoint is enabled
620
+ if not self.endpoint.enabled:
621
+ raise ValueError(f"Endpoint {self.endpoint_id} is disabled")
558
622
  self.set_model_id_by_endpoint_id(self.endpoint_id)
559
623
  elif isinstance(self.endpoint, EndpointSetting):
624
+ # Check if the endpoint is enabled
625
+ if not self.endpoint.enabled:
626
+ raise ValueError(f"Endpoint {self.endpoint.id} is disabled")
560
627
  self.endpoint_id = self.endpoint.id
561
628
  self.set_model_id_by_endpoint_id(self.endpoint_id)
562
629
  else:
@@ -568,15 +635,7 @@ class BaseAsyncChatClient(ABC):
568
635
  @abstractmethod
569
636
  def raw_client(
570
637
  self,
571
- ) -> (
572
- AsyncOpenAI
573
- | AsyncAzureOpenAI
574
- | AsyncAnthropic
575
- | AsyncAnthropicVertex
576
- | AsyncAnthropicBedrock
577
- | httpx.AsyncClient
578
- | None
579
- ):
638
+ ) -> AsyncOpenAI | AsyncAzureOpenAI | AsyncAnthropic | AsyncAnthropicVertex | AsyncAnthropicBedrock | httpx.AsyncClient | None:
580
639
  pass
581
640
 
582
641
  @overload
@@ -588,31 +647,31 @@ class BaseAsyncChatClient(ABC):
588
647
  model: str | None = None,
589
648
  stream: Literal[False] = False,
590
649
  temperature: float | None | NotGiven = NOT_GIVEN,
591
- max_tokens: int | None = None,
592
- tools: list | NotGiven = NOT_GIVEN,
650
+ max_tokens: int | None | NotGiven = None,
651
+ tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
593
652
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
594
653
  response_format: ResponseFormat | NotGiven = NOT_GIVEN,
595
654
  stream_options: ChatCompletionStreamOptionsParam | None = None,
596
655
  top_p: float | NotGiven | None = NOT_GIVEN,
597
656
  skip_cutoff: bool = False,
598
- audio: Optional[ChatCompletionAudioParam] | OpenAINotGiven = NOT_GIVEN,
599
- frequency_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
600
- logit_bias: Optional[Dict[str, int]] | OpenAINotGiven = NOT_GIVEN,
601
- logprobs: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
602
- max_completion_tokens: Optional[int] | OpenAINotGiven = NOT_GIVEN,
603
- metadata: Optional[Metadata] | OpenAINotGiven = NOT_GIVEN,
604
- modalities: Optional[List[ChatCompletionModality]] | OpenAINotGiven = NOT_GIVEN,
605
- n: Optional[int] | OpenAINotGiven = NOT_GIVEN,
657
+ audio: ChatCompletionAudioParam | OpenAINotGiven | None = NOT_GIVEN,
658
+ frequency_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
659
+ logit_bias: dict[str, int] | OpenAINotGiven | None = NOT_GIVEN,
660
+ logprobs: bool | OpenAINotGiven | None = NOT_GIVEN,
661
+ max_completion_tokens: int | OpenAINotGiven | None = NOT_GIVEN,
662
+ metadata: Metadata | OpenAINotGiven | None = NOT_GIVEN,
663
+ modalities: list[ChatCompletionModality] | OpenAINotGiven | None = NOT_GIVEN,
664
+ n: int | OpenAINotGiven | None = NOT_GIVEN,
606
665
  parallel_tool_calls: bool | OpenAINotGiven = NOT_GIVEN,
607
- prediction: Optional[ChatCompletionPredictionContentParam] | OpenAINotGiven = NOT_GIVEN,
608
- presence_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
609
- reasoning_effort: Optional[ChatCompletionReasoningEffort] | OpenAINotGiven = NOT_GIVEN,
666
+ prediction: ChatCompletionPredictionContentParam | OpenAINotGiven | None = NOT_GIVEN,
667
+ presence_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
668
+ reasoning_effort: ChatCompletionReasoningEffort | OpenAINotGiven | None = NOT_GIVEN,
610
669
  thinking: ThinkingConfigParam | None | NotGiven = NOT_GIVEN,
611
- seed: Optional[int] | OpenAINotGiven = NOT_GIVEN,
612
- service_tier: Optional[Literal["auto", "default"]] | OpenAINotGiven = NOT_GIVEN,
613
- stop: Union[Optional[str], List[str]] | OpenAINotGiven = NOT_GIVEN,
614
- store: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
615
- top_logprobs: Optional[int] | OpenAINotGiven = NOT_GIVEN,
670
+ seed: int | OpenAINotGiven | None = NOT_GIVEN,
671
+ service_tier: Literal["auto", "default"] | OpenAINotGiven | None = NOT_GIVEN,
672
+ stop: str | list[str] | OpenAINotGiven | None = NOT_GIVEN,
673
+ store: bool | OpenAINotGiven | None = NOT_GIVEN,
674
+ top_logprobs: int | OpenAINotGiven | None = NOT_GIVEN,
616
675
  user: str | OpenAINotGiven = NOT_GIVEN,
617
676
  extra_headers: Headers | None = None,
618
677
  extra_query: Query | None = None,
@@ -630,31 +689,31 @@ class BaseAsyncChatClient(ABC):
630
689
  model: str | None = None,
631
690
  stream: Literal[True],
632
691
  temperature: float | None | NotGiven = NOT_GIVEN,
633
- max_tokens: int | None = None,
634
- tools: list | NotGiven = NOT_GIVEN,
692
+ max_tokens: int | None | NotGiven = None,
693
+ tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
635
694
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
636
695
  response_format: ResponseFormat | NotGiven = NOT_GIVEN,
637
696
  stream_options: ChatCompletionStreamOptionsParam | None = None,
638
697
  top_p: float | NotGiven | None = NOT_GIVEN,
639
698
  skip_cutoff: bool = False,
640
- audio: Optional[ChatCompletionAudioParam] | OpenAINotGiven = NOT_GIVEN,
641
- frequency_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
642
- logit_bias: Optional[Dict[str, int]] | OpenAINotGiven = NOT_GIVEN,
643
- logprobs: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
644
- max_completion_tokens: Optional[int] | OpenAINotGiven = NOT_GIVEN,
645
- metadata: Optional[Metadata] | OpenAINotGiven = NOT_GIVEN,
646
- modalities: Optional[List[ChatCompletionModality]] | OpenAINotGiven = NOT_GIVEN,
647
- n: Optional[int] | OpenAINotGiven = NOT_GIVEN,
699
+ audio: ChatCompletionAudioParam | OpenAINotGiven | None = NOT_GIVEN,
700
+ frequency_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
701
+ logit_bias: dict[str, int] | OpenAINotGiven | None = NOT_GIVEN,
702
+ logprobs: bool | OpenAINotGiven | None = NOT_GIVEN,
703
+ max_completion_tokens: int | OpenAINotGiven | None = NOT_GIVEN,
704
+ metadata: Metadata | OpenAINotGiven | None = NOT_GIVEN,
705
+ modalities: list[ChatCompletionModality] | OpenAINotGiven | None = NOT_GIVEN,
706
+ n: int | OpenAINotGiven | None = NOT_GIVEN,
648
707
  parallel_tool_calls: bool | OpenAINotGiven = NOT_GIVEN,
649
- prediction: Optional[ChatCompletionPredictionContentParam] | OpenAINotGiven = NOT_GIVEN,
650
- presence_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
651
- reasoning_effort: Optional[ChatCompletionReasoningEffort] | OpenAINotGiven = NOT_GIVEN,
708
+ prediction: ChatCompletionPredictionContentParam | OpenAINotGiven | None = NOT_GIVEN,
709
+ presence_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
710
+ reasoning_effort: ChatCompletionReasoningEffort | OpenAINotGiven | None = NOT_GIVEN,
652
711
  thinking: ThinkingConfigParam | None | NotGiven = NOT_GIVEN,
653
- seed: Optional[int] | OpenAINotGiven = NOT_GIVEN,
654
- service_tier: Optional[Literal["auto", "default"]] | OpenAINotGiven = NOT_GIVEN,
655
- stop: Union[Optional[str], List[str]] | OpenAINotGiven = NOT_GIVEN,
656
- store: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
657
- top_logprobs: Optional[int] | OpenAINotGiven = NOT_GIVEN,
712
+ seed: int | OpenAINotGiven | None = NOT_GIVEN,
713
+ service_tier: Literal["auto", "default"] | OpenAINotGiven | None = NOT_GIVEN,
714
+ stop: str | list[str] | OpenAINotGiven | None = NOT_GIVEN,
715
+ store: bool | OpenAINotGiven | None = NOT_GIVEN,
716
+ top_logprobs: int | OpenAINotGiven | None = NOT_GIVEN,
658
717
  user: str | OpenAINotGiven = NOT_GIVEN,
659
718
  extra_headers: Headers | None = None,
660
719
  extra_query: Query | None = None,
@@ -672,31 +731,31 @@ class BaseAsyncChatClient(ABC):
672
731
  model: str | None = None,
673
732
  stream: bool,
674
733
  temperature: float | None | NotGiven = NOT_GIVEN,
675
- max_tokens: int | None = None,
676
- tools: list | NotGiven = NOT_GIVEN,
734
+ max_tokens: int | None | NotGiven = None,
735
+ tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
677
736
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
678
737
  response_format: ResponseFormat | NotGiven = NOT_GIVEN,
679
738
  stream_options: ChatCompletionStreamOptionsParam | None = None,
680
739
  top_p: float | NotGiven | None = NOT_GIVEN,
681
740
  skip_cutoff: bool = False,
682
- audio: Optional[ChatCompletionAudioParam] | OpenAINotGiven = NOT_GIVEN,
683
- frequency_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
684
- logit_bias: Optional[Dict[str, int]] | OpenAINotGiven = NOT_GIVEN,
685
- logprobs: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
686
- max_completion_tokens: Optional[int] | OpenAINotGiven = NOT_GIVEN,
687
- metadata: Optional[Metadata] | OpenAINotGiven = NOT_GIVEN,
688
- modalities: Optional[List[ChatCompletionModality]] | OpenAINotGiven = NOT_GIVEN,
689
- n: Optional[int] | OpenAINotGiven = NOT_GIVEN,
741
+ audio: ChatCompletionAudioParam | OpenAINotGiven | None = NOT_GIVEN,
742
+ frequency_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
743
+ logit_bias: dict[str, int] | OpenAINotGiven | None = NOT_GIVEN,
744
+ logprobs: bool | OpenAINotGiven | None = NOT_GIVEN,
745
+ max_completion_tokens: int | OpenAINotGiven | None = NOT_GIVEN,
746
+ metadata: Metadata | OpenAINotGiven | None = NOT_GIVEN,
747
+ modalities: list[ChatCompletionModality] | OpenAINotGiven | None = NOT_GIVEN,
748
+ n: int | OpenAINotGiven | None = NOT_GIVEN,
690
749
  parallel_tool_calls: bool | OpenAINotGiven = NOT_GIVEN,
691
- prediction: Optional[ChatCompletionPredictionContentParam] | OpenAINotGiven = NOT_GIVEN,
692
- presence_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
693
- reasoning_effort: Optional[ChatCompletionReasoningEffort] | OpenAINotGiven = NOT_GIVEN,
750
+ prediction: ChatCompletionPredictionContentParam | OpenAINotGiven | None = NOT_GIVEN,
751
+ presence_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
752
+ reasoning_effort: ChatCompletionReasoningEffort | OpenAINotGiven | None = NOT_GIVEN,
694
753
  thinking: ThinkingConfigParam | None | NotGiven = NOT_GIVEN,
695
- seed: Optional[int] | OpenAINotGiven = NOT_GIVEN,
696
- service_tier: Optional[Literal["auto", "default"]] | OpenAINotGiven = NOT_GIVEN,
697
- stop: Union[Optional[str], List[str]] | OpenAINotGiven = NOT_GIVEN,
698
- store: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
699
- top_logprobs: Optional[int] | OpenAINotGiven = NOT_GIVEN,
754
+ seed: int | OpenAINotGiven | None = NOT_GIVEN,
755
+ service_tier: Literal["auto", "default"] | OpenAINotGiven | None = NOT_GIVEN,
756
+ stop: str | list[str] | OpenAINotGiven | None = NOT_GIVEN,
757
+ store: bool | OpenAINotGiven | None = NOT_GIVEN,
758
+ top_logprobs: int | OpenAINotGiven | None = NOT_GIVEN,
700
759
  user: str | OpenAINotGiven = NOT_GIVEN,
701
760
  extra_headers: Headers | None = None,
702
761
  extra_query: Query | None = None,
@@ -713,31 +772,31 @@ class BaseAsyncChatClient(ABC):
713
772
  model: str | None = None,
714
773
  stream: Literal[False] | Literal[True] = False,
715
774
  temperature: float | None | NotGiven = NOT_GIVEN,
716
- max_tokens: int | None = None,
717
- tools: list | NotGiven = NOT_GIVEN,
775
+ max_tokens: int | None | NotGiven = None,
776
+ tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
718
777
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
719
778
  response_format: ResponseFormat | NotGiven = NOT_GIVEN,
720
779
  stream_options: ChatCompletionStreamOptionsParam | None = None,
721
780
  top_p: float | NotGiven | None = NOT_GIVEN,
722
781
  skip_cutoff: bool = False,
723
- audio: Optional[ChatCompletionAudioParam] | OpenAINotGiven = NOT_GIVEN,
724
- frequency_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
725
- logit_bias: Optional[Dict[str, int]] | OpenAINotGiven = NOT_GIVEN,
726
- logprobs: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
727
- max_completion_tokens: Optional[int] | OpenAINotGiven = NOT_GIVEN,
728
- metadata: Optional[Metadata] | OpenAINotGiven = NOT_GIVEN,
729
- modalities: Optional[List[ChatCompletionModality]] | OpenAINotGiven = NOT_GIVEN,
730
- n: Optional[int] | OpenAINotGiven = NOT_GIVEN,
782
+ audio: ChatCompletionAudioParam | OpenAINotGiven | None = NOT_GIVEN,
783
+ frequency_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
784
+ logit_bias: dict[str, int] | OpenAINotGiven | None = NOT_GIVEN,
785
+ logprobs: bool | OpenAINotGiven | None = NOT_GIVEN,
786
+ max_completion_tokens: int | OpenAINotGiven | None = NOT_GIVEN,
787
+ metadata: Metadata | OpenAINotGiven | None = NOT_GIVEN,
788
+ modalities: list[ChatCompletionModality] | OpenAINotGiven | None = NOT_GIVEN,
789
+ n: int | OpenAINotGiven | None = NOT_GIVEN,
731
790
  parallel_tool_calls: bool | OpenAINotGiven = NOT_GIVEN,
732
- prediction: Optional[ChatCompletionPredictionContentParam] | OpenAINotGiven = NOT_GIVEN,
733
- presence_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
734
- reasoning_effort: Optional[ChatCompletionReasoningEffort] | OpenAINotGiven = NOT_GIVEN,
791
+ prediction: ChatCompletionPredictionContentParam | OpenAINotGiven | None = NOT_GIVEN,
792
+ presence_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
793
+ reasoning_effort: ChatCompletionReasoningEffort | OpenAINotGiven | None = NOT_GIVEN,
735
794
  thinking: ThinkingConfigParam | None | NotGiven = NOT_GIVEN,
736
- seed: Optional[int] | OpenAINotGiven = NOT_GIVEN,
737
- service_tier: Optional[Literal["auto", "default"]] | OpenAINotGiven = NOT_GIVEN,
738
- stop: Union[Optional[str], List[str]] | OpenAINotGiven = NOT_GIVEN,
739
- store: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
740
- top_logprobs: Optional[int] | OpenAINotGiven = NOT_GIVEN,
795
+ seed: int | OpenAINotGiven | None = NOT_GIVEN,
796
+ service_tier: Literal["auto", "default"] | OpenAINotGiven | None = NOT_GIVEN,
797
+ stop: str | list[str] | OpenAINotGiven | None = NOT_GIVEN,
798
+ store: bool | OpenAINotGiven | None = NOT_GIVEN,
799
+ top_logprobs: int | OpenAINotGiven | None = NOT_GIVEN,
741
800
  user: str | OpenAINotGiven = NOT_GIVEN,
742
801
  extra_headers: Headers | None = None,
743
802
  extra_query: Query | None = None,
@@ -752,31 +811,31 @@ class BaseAsyncChatClient(ABC):
752
811
  messages: list,
753
812
  model: str | None = None,
754
813
  temperature: float | None | NotGiven = NOT_GIVEN,
755
- max_tokens: int | None = None,
756
- tools: list | NotGiven = NOT_GIVEN,
814
+ max_tokens: int | None | NotGiven = None,
815
+ tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
757
816
  tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
758
817
  response_format: ResponseFormat | NotGiven = NOT_GIVEN,
759
818
  stream_options: ChatCompletionStreamOptionsParam | None = None,
760
819
  top_p: float | NotGiven | None = NOT_GIVEN,
761
820
  skip_cutoff: bool = False,
762
- audio: Optional[ChatCompletionAudioParam] | OpenAINotGiven = NOT_GIVEN,
763
- frequency_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
764
- logit_bias: Optional[Dict[str, int]] | OpenAINotGiven = NOT_GIVEN,
765
- logprobs: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
766
- max_completion_tokens: Optional[int] | OpenAINotGiven = NOT_GIVEN,
767
- metadata: Optional[Metadata] | OpenAINotGiven = NOT_GIVEN,
768
- modalities: Optional[List[ChatCompletionModality]] | OpenAINotGiven = NOT_GIVEN,
769
- n: Optional[int] | OpenAINotGiven = NOT_GIVEN,
821
+ audio: ChatCompletionAudioParam | OpenAINotGiven | None = NOT_GIVEN,
822
+ frequency_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
823
+ logit_bias: dict[str, int] | OpenAINotGiven | None = NOT_GIVEN,
824
+ logprobs: bool | OpenAINotGiven | None = NOT_GIVEN,
825
+ max_completion_tokens: int | OpenAINotGiven | None = NOT_GIVEN,
826
+ metadata: Metadata | OpenAINotGiven | None = NOT_GIVEN,
827
+ modalities: list[ChatCompletionModality] | OpenAINotGiven | None = NOT_GIVEN,
828
+ n: int | OpenAINotGiven | None = NOT_GIVEN,
770
829
  parallel_tool_calls: bool | OpenAINotGiven = NOT_GIVEN,
771
- prediction: Optional[ChatCompletionPredictionContentParam] | OpenAINotGiven = NOT_GIVEN,
772
- presence_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
773
- reasoning_effort: Optional[ChatCompletionReasoningEffort] | OpenAINotGiven = NOT_GIVEN,
830
+ prediction: ChatCompletionPredictionContentParam | OpenAINotGiven | None = NOT_GIVEN,
831
+ presence_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
832
+ reasoning_effort: ChatCompletionReasoningEffort | OpenAINotGiven | None = NOT_GIVEN,
774
833
  thinking: ThinkingConfigParam | None | NotGiven = NOT_GIVEN,
775
- seed: Optional[int] | OpenAINotGiven = NOT_GIVEN,
776
- service_tier: Optional[Literal["auto", "default"]] | OpenAINotGiven = NOT_GIVEN,
777
- stop: Union[Optional[str], List[str]] | OpenAINotGiven = NOT_GIVEN,
778
- store: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
779
- top_logprobs: Optional[int] | OpenAINotGiven = NOT_GIVEN,
834
+ seed: int | OpenAINotGiven | None = NOT_GIVEN,
835
+ service_tier: Literal["auto", "default"] | OpenAINotGiven | None = NOT_GIVEN,
836
+ stop: str | list[str] | OpenAINotGiven | None = NOT_GIVEN,
837
+ store: bool | OpenAINotGiven | None = NOT_GIVEN,
838
+ top_logprobs: int | OpenAINotGiven | None = NOT_GIVEN,
780
839
  user: str | OpenAINotGiven = NOT_GIVEN,
781
840
  extra_headers: Headers | None = None,
782
841
  extra_query: Query | None = None,
@@ -822,7 +881,7 @@ class BaseAsyncChatClient(ABC):
822
881
 
823
882
  async def model_list(self):
824
883
  _raw_client = self.raw_client
825
- if isinstance(_raw_client, (AsyncOpenAI, AsyncAzureOpenAI)):
884
+ if isinstance(_raw_client, AsyncOpenAI | AsyncAzureOpenAI):
826
885
  return (await _raw_client.models.list()).model_dump()
827
886
  elif isinstance(_raw_client, AsyncAnthropic):
828
887
  return (await _raw_client.models.list(limit=1000)).model_dump()