PyPI - vectorvein - Versions diffs - 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl - Mend

vectorvein 0.3.1py3-none-any.whl → 0.3.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

vectorvein/api/client.py +81 -103
vectorvein/api/exceptions.py +1 -3
vectorvein/api/models.py +11 -11
vectorvein/chat_clients/anthropic_client.py +157 -169
vectorvein/chat_clients/base_client.py +257 -198
vectorvein/chat_clients/openai_compatible_client.py +150 -161
vectorvein/chat_clients/utils.py +44 -24
vectorvein/server/token_server.py +1 -1
vectorvein/settings/__init__.py +27 -27
vectorvein/types/defaults.py +32 -16
vectorvein/types/llm_parameters.py +40 -34
vectorvein/types/settings.py +10 -10
vectorvein/utilities/media_processing.py +1 -1
vectorvein/utilities/rate_limiter.py +5 -6
vectorvein/utilities/retry.py +6 -5
vectorvein/workflow/graph/edge.py +3 -3
vectorvein/workflow/graph/node.py +14 -26
vectorvein/workflow/graph/port.py +40 -39
vectorvein/workflow/graph/workflow.py +13 -25
vectorvein/workflow/nodes/audio_generation.py +5 -7
vectorvein/workflow/nodes/control_flows.py +7 -9
vectorvein/workflow/nodes/file_processing.py +4 -6
vectorvein/workflow/nodes/image_generation.py +20 -22
vectorvein/workflow/nodes/llms.py +13 -15
vectorvein/workflow/nodes/media_editing.py +26 -40
vectorvein/workflow/nodes/media_processing.py +19 -21
vectorvein/workflow/nodes/output.py +10 -12
vectorvein/workflow/nodes/relational_db.py +3 -5
vectorvein/workflow/nodes/text_processing.py +8 -10
vectorvein/workflow/nodes/tools.py +8 -10
vectorvein/workflow/nodes/triggers.py +1 -3
vectorvein/workflow/nodes/vector_db.py +3 -5
vectorvein/workflow/nodes/video_generation.py +4 -6
vectorvein/workflow/nodes/web_crawlers.py +4 -6
vectorvein/workflow/utils/analyse.py +5 -13
vectorvein/workflow/utils/check.py +6 -16
vectorvein/workflow/utils/json_to_code.py +6 -14
vectorvein/workflow/utils/layout.py +3 -5
{vectorvein-0.3.1.dist-info → vectorvein-0.3.3.dist-info}/METADATA +1 -1
vectorvein-0.3.3.dist-info/RECORD +68 -0
{vectorvein-0.3.1.dist-info → vectorvein-0.3.3.dist-info}/WHEEL +1 -1
vectorvein-0.3.1.dist-info/RECORD +0 -68
{vectorvein-0.3.1.dist-info → vectorvein-0.3.3.dist-info}/entry_points.txt +0 -0

vectorvein/chat_clients/base_client.py CHANGED Viewed

@@ -3,8 +3,9 @@ import random
 import asyncio
 from abc import ABC, abstractmethod
 from collections import defaultdict
+from collections.abc import Iterable, Generator, AsyncGenerator
 from functools import cached_property
-from typing import Generator, AsyncGenerator, Any, overload, Literal, Iterable, Optional, Dict, List, Union
+from typing import Any, overload, Literal
 import httpx
 from openai import OpenAI, AsyncOpenAI, AzureOpenAI, AsyncAzureOpenAI
@@ -126,9 +127,7 @@ class BaseChatClient(ABC):
         # Get rate limit parameters
         # Priority: parameters in model.endpoints > parameters in endpoint > default parameters
         rpm = self.rpm or endpoint.rpm or (self.settings.rate_limit.default_rpm if self.settings.rate_limit else 60)
-        tpm = (
-            self.tpm or endpoint.tpm or (self.settings.rate_limit.default_tpm if self.settings.rate_limit else 1000000)
-        )
+        tpm = self.tpm or endpoint.tpm or (self.settings.rate_limit.default_tpm if self.settings.rate_limit else 1000000)
         while self.rate_limiter:
             allowed, wait_time = self.rate_limiter.check_limit(key, rpm, tpm, self._estimate_request_tokens(messages))
@@ -143,6 +142,31 @@ class BaseChatClient(ABC):
             tokens += int(len(message.get("content", "")) * 0.6)
         return tokens
+    def _get_available_endpoints(self, model_endpoints: list) -> list:
+        """Get list of available (enabled) endpoints for the model"""
+        available_endpoints = []
+        for endpoint_option in model_endpoints:
+            if isinstance(endpoint_option, dict):
+                # For endpoint with specific config, check if the endpoint is enabled
+                endpoint_id = endpoint_option["endpoint_id"]
+                try:
+                    endpoint = self.settings.get_endpoint(endpoint_id)
+                    if endpoint.enabled:
+                        available_endpoints.append(endpoint_option)
+                except ValueError:
+                    # Endpoint not found, skip it
+                    continue
+            else:
+                # For simple endpoint ID string, check if the endpoint is enabled
+                try:
+                    endpoint = self.settings.get_endpoint(endpoint_option)
+                    if endpoint.enabled:
+                        available_endpoints.append(endpoint_option)
+                except ValueError:
+                    # Endpoint not found, skip it
+                    continue
+        return available_endpoints
     def set_model_id_by_endpoint_id(self, endpoint_id: str):
         for endpoint_option in self.backend_settings.models[self.model].endpoints:
             if isinstance(endpoint_option, dict) and endpoint_id == endpoint_option["endpoint_id"]:
@@ -154,7 +178,12 @@ class BaseChatClient(ABC):
         if self.endpoint is None:
             if self.random_endpoint:
                 self.random_endpoint = True
-                endpoint = random.choice(self.backend_settings.models[self.model].endpoints)
+                # Get available (enabled) endpoints
+                available_endpoints = self._get_available_endpoints(self.backend_settings.models[self.model].endpoints)
+                if not available_endpoints:
+                    raise ValueError(f"No enabled endpoints available for model {self.model}")
+                endpoint = random.choice(available_endpoints)
                 if isinstance(endpoint, dict):
                     self.endpoint_id = endpoint["endpoint_id"]
                     self.model_id = endpoint["model_id"]
@@ -166,8 +195,14 @@ class BaseChatClient(ABC):
                 self.endpoint = self.settings.get_endpoint(self.endpoint_id)
             else:
                 self.endpoint = self.settings.get_endpoint(self.endpoint_id)
+                # Check if the specified endpoint is enabled
+                if not self.endpoint.enabled:
+                    raise ValueError(f"Endpoint {self.endpoint_id} is disabled")
                 self.set_model_id_by_endpoint_id(self.endpoint_id)
         elif isinstance(self.endpoint, EndpointSetting):
+            # Check if the endpoint is enabled
+            if not self.endpoint.enabled:
+                raise ValueError(f"Endpoint {self.endpoint.id} is disabled")
             self.endpoint_id = self.endpoint.id
             self.set_model_id_by_endpoint_id(self.endpoint_id)
         else:
@@ -191,31 +226,31 @@ class BaseChatClient(ABC):
         model: str | None = None,
         stream: Literal[False] = False,
         temperature: float | None | NotGiven = NOT_GIVEN,
-        max_tokens: int | None = None,
+        max_tokens: int | None | NotGiven = None,
         tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: ResponseFormat | NotGiven = NOT_GIVEN,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
         top_p: float | NotGiven | None = NOT_GIVEN,
         skip_cutoff: bool = False,
-        audio: Optional[ChatCompletionAudioParam] | OpenAINotGiven = NOT_GIVEN,
-        frequency_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | OpenAINotGiven = NOT_GIVEN,
-        logprobs: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | OpenAINotGiven = NOT_GIVEN,
-        metadata: Optional[Metadata] | OpenAINotGiven = NOT_GIVEN,
-        modalities: Optional[List[ChatCompletionModality]] | OpenAINotGiven = NOT_GIVEN,
-        n: Optional[int] | OpenAINotGiven = NOT_GIVEN,
+        audio: ChatCompletionAudioParam | OpenAINotGiven | None = NOT_GIVEN,
+        frequency_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
+        logit_bias: dict[str, int] | OpenAINotGiven | None = NOT_GIVEN,
+        logprobs: bool | OpenAINotGiven | None = NOT_GIVEN,
+        max_completion_tokens: int | OpenAINotGiven | None = NOT_GIVEN,
+        metadata: Metadata | OpenAINotGiven | None = NOT_GIVEN,
+        modalities: list[ChatCompletionModality] | OpenAINotGiven | None = NOT_GIVEN,
+        n: int | OpenAINotGiven | None = NOT_GIVEN,
         parallel_tool_calls: bool | OpenAINotGiven = NOT_GIVEN,
-        prediction: Optional[ChatCompletionPredictionContentParam] | OpenAINotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
-        reasoning_effort: Optional[ChatCompletionReasoningEffort] | OpenAINotGiven = NOT_GIVEN,
+        prediction: ChatCompletionPredictionContentParam | OpenAINotGiven | None = NOT_GIVEN,
+        presence_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
+        reasoning_effort: ChatCompletionReasoningEffort | OpenAINotGiven | None = NOT_GIVEN,
         thinking: ThinkingConfigParam | None | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | OpenAINotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default"]] | OpenAINotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str]] | OpenAINotGiven = NOT_GIVEN,
-        store: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
-        top_logprobs: Optional[int] | OpenAINotGiven = NOT_GIVEN,
+        seed: int | OpenAINotGiven | None = NOT_GIVEN,
+        service_tier: Literal["auto", "default"] | OpenAINotGiven | None = NOT_GIVEN,
+        stop: str | list[str] | OpenAINotGiven | None = NOT_GIVEN,
+        store: bool | OpenAINotGiven | None = NOT_GIVEN,
+        top_logprobs: int | OpenAINotGiven | None = NOT_GIVEN,
         user: str | OpenAINotGiven = NOT_GIVEN,
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
@@ -233,31 +268,31 @@ class BaseChatClient(ABC):
         model: str | None = None,
         stream: Literal[True],
         temperature: float | None | NotGiven = NOT_GIVEN,
-        max_tokens: int | None = None,
+        max_tokens: int | None | NotGiven = None,
         tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: ResponseFormat | NotGiven = NOT_GIVEN,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
         top_p: float | NotGiven | None = NOT_GIVEN,
         skip_cutoff: bool = False,
-        audio: Optional[ChatCompletionAudioParam] | OpenAINotGiven = NOT_GIVEN,
-        frequency_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | OpenAINotGiven = NOT_GIVEN,
-        logprobs: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | OpenAINotGiven = NOT_GIVEN,
-        metadata: Optional[Metadata] | OpenAINotGiven = NOT_GIVEN,
-        modalities: Optional[List[ChatCompletionModality]] | OpenAINotGiven = NOT_GIVEN,
-        n: Optional[int] | OpenAINotGiven = NOT_GIVEN,
+        audio: ChatCompletionAudioParam | OpenAINotGiven | None = NOT_GIVEN,
+        frequency_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
+        logit_bias: dict[str, int] | OpenAINotGiven | None = NOT_GIVEN,
+        logprobs: bool | OpenAINotGiven | None = NOT_GIVEN,
+        max_completion_tokens: int | OpenAINotGiven | None = NOT_GIVEN,
+        metadata: Metadata | OpenAINotGiven | None = NOT_GIVEN,
+        modalities: list[ChatCompletionModality] | OpenAINotGiven | None = NOT_GIVEN,
+        n: int | OpenAINotGiven | None = NOT_GIVEN,
         parallel_tool_calls: bool | OpenAINotGiven = NOT_GIVEN,
-        prediction: Optional[ChatCompletionPredictionContentParam] | OpenAINotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
-        reasoning_effort: Optional[ChatCompletionReasoningEffort] | OpenAINotGiven = NOT_GIVEN,
+        prediction: ChatCompletionPredictionContentParam | OpenAINotGiven | None = NOT_GIVEN,
+        presence_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
+        reasoning_effort: ChatCompletionReasoningEffort | OpenAINotGiven | None = NOT_GIVEN,
         thinking: ThinkingConfigParam | None | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | OpenAINotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default"]] | OpenAINotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str]] | OpenAINotGiven = NOT_GIVEN,
-        store: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
-        top_logprobs: Optional[int] | OpenAINotGiven = NOT_GIVEN,
+        seed: int | OpenAINotGiven | None = NOT_GIVEN,
+        service_tier: Literal["auto", "default"] | OpenAINotGiven | None = NOT_GIVEN,
+        stop: str | list[str] | OpenAINotGiven | None = NOT_GIVEN,
+        store: bool | OpenAINotGiven | None = NOT_GIVEN,
+        top_logprobs: int | OpenAINotGiven | None = NOT_GIVEN,
         user: str | OpenAINotGiven = NOT_GIVEN,
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
@@ -275,31 +310,31 @@ class BaseChatClient(ABC):
         model: str | None = None,
         stream: bool,
         temperature: float | None | NotGiven = NOT_GIVEN,
-        max_tokens: int | None = None,
+        max_tokens: int | None | NotGiven = None,
         tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: ResponseFormat | NotGiven = NOT_GIVEN,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
         top_p: float | NotGiven | None = NOT_GIVEN,
         skip_cutoff: bool = False,
-        audio: Optional[ChatCompletionAudioParam] | OpenAINotGiven = NOT_GIVEN,
-        frequency_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | OpenAINotGiven = NOT_GIVEN,
-        logprobs: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | OpenAINotGiven = NOT_GIVEN,
-        metadata: Optional[Metadata] | OpenAINotGiven = NOT_GIVEN,
-        modalities: Optional[List[ChatCompletionModality]] | OpenAINotGiven = NOT_GIVEN,
-        n: Optional[int] | OpenAINotGiven = NOT_GIVEN,
+        audio: ChatCompletionAudioParam | OpenAINotGiven | None = NOT_GIVEN,
+        frequency_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
+        logit_bias: dict[str, int] | OpenAINotGiven | None = NOT_GIVEN,
+        logprobs: bool | OpenAINotGiven | None = NOT_GIVEN,
+        max_completion_tokens: int | OpenAINotGiven | None = NOT_GIVEN,
+        metadata: Metadata | OpenAINotGiven | None = NOT_GIVEN,
+        modalities: list[ChatCompletionModality] | OpenAINotGiven | None = NOT_GIVEN,
+        n: int | OpenAINotGiven | None = NOT_GIVEN,
         parallel_tool_calls: bool | OpenAINotGiven = NOT_GIVEN,
-        prediction: Optional[ChatCompletionPredictionContentParam] | OpenAINotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
-        reasoning_effort: Optional[ChatCompletionReasoningEffort] | OpenAINotGiven = NOT_GIVEN,
+        prediction: ChatCompletionPredictionContentParam | OpenAINotGiven | None = NOT_GIVEN,
+        presence_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
+        reasoning_effort: ChatCompletionReasoningEffort | OpenAINotGiven | None = NOT_GIVEN,
         thinking: ThinkingConfigParam | None | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | OpenAINotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default"]] | OpenAINotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str]] | OpenAINotGiven = NOT_GIVEN,
-        store: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
-        top_logprobs: Optional[int] | OpenAINotGiven = NOT_GIVEN,
+        seed: int | OpenAINotGiven | None = NOT_GIVEN,
+        service_tier: Literal["auto", "default"] | OpenAINotGiven | None = NOT_GIVEN,
+        stop: str | list[str] | OpenAINotGiven | None = NOT_GIVEN,
+        store: bool | OpenAINotGiven | None = NOT_GIVEN,
+        top_logprobs: int | OpenAINotGiven | None = NOT_GIVEN,
         user: str | OpenAINotGiven = NOT_GIVEN,
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
@@ -316,31 +351,31 @@ class BaseChatClient(ABC):
         model: str | None = None,
         stream: Literal[False] | Literal[True] = False,
         temperature: float | None | NotGiven = NOT_GIVEN,
-        max_tokens: int | None = None,
+        max_tokens: int | None | NotGiven = None,
         tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: ResponseFormat | NotGiven = NOT_GIVEN,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
         top_p: float | NotGiven | None = NOT_GIVEN,
         skip_cutoff: bool = False,
-        audio: Optional[ChatCompletionAudioParam] | OpenAINotGiven = NOT_GIVEN,
-        frequency_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | OpenAINotGiven = NOT_GIVEN,
-        logprobs: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | OpenAINotGiven = NOT_GIVEN,
-        metadata: Optional[Metadata] | OpenAINotGiven = NOT_GIVEN,
-        modalities: Optional[List[ChatCompletionModality]] | OpenAINotGiven = NOT_GIVEN,
-        n: Optional[int] | OpenAINotGiven = NOT_GIVEN,
+        audio: ChatCompletionAudioParam | OpenAINotGiven | None = NOT_GIVEN,
+        frequency_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
+        logit_bias: dict[str, int] | OpenAINotGiven | None = NOT_GIVEN,
+        logprobs: bool | OpenAINotGiven | None = NOT_GIVEN,
+        max_completion_tokens: int | OpenAINotGiven | None = NOT_GIVEN,
+        metadata: Metadata | OpenAINotGiven | None = NOT_GIVEN,
+        modalities: list[ChatCompletionModality] | OpenAINotGiven | None = NOT_GIVEN,
+        n: int | OpenAINotGiven | None = NOT_GIVEN,
         parallel_tool_calls: bool | OpenAINotGiven = NOT_GIVEN,
-        prediction: Optional[ChatCompletionPredictionContentParam] | OpenAINotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
-        reasoning_effort: Optional[ChatCompletionReasoningEffort] | OpenAINotGiven = NOT_GIVEN,
+        prediction: ChatCompletionPredictionContentParam | OpenAINotGiven | None = NOT_GIVEN,
+        presence_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
+        reasoning_effort: ChatCompletionReasoningEffort | OpenAINotGiven | None = NOT_GIVEN,
         thinking: ThinkingConfigParam | None | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | OpenAINotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default"]] | OpenAINotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str]] | OpenAINotGiven = NOT_GIVEN,
-        store: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
-        top_logprobs: Optional[int] | OpenAINotGiven = NOT_GIVEN,
+        seed: int | OpenAINotGiven | None = NOT_GIVEN,
+        service_tier: Literal["auto", "default"] | OpenAINotGiven | None = NOT_GIVEN,
+        stop: str | list[str] | OpenAINotGiven | None = NOT_GIVEN,
+        store: bool | OpenAINotGiven | None = NOT_GIVEN,
+        top_logprobs: int | OpenAINotGiven | None = NOT_GIVEN,
         user: str | OpenAINotGiven = NOT_GIVEN,
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
@@ -355,31 +390,31 @@ class BaseChatClient(ABC):
         messages: list,
         model: str | None = None,
         temperature: float | None | NotGiven = NOT_GIVEN,
-        max_tokens: int | None = None,
+        max_tokens: int | None | NotGiven = None,
         tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: ResponseFormat | NotGiven = NOT_GIVEN,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
         top_p: float | NotGiven | None = NOT_GIVEN,
         skip_cutoff: bool = False,
-        audio: Optional[ChatCompletionAudioParam] | OpenAINotGiven = NOT_GIVEN,
-        frequency_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | OpenAINotGiven = NOT_GIVEN,
-        logprobs: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | OpenAINotGiven = NOT_GIVEN,
-        metadata: Optional[Metadata] | OpenAINotGiven = NOT_GIVEN,
-        modalities: Optional[List[ChatCompletionModality]] | OpenAINotGiven = NOT_GIVEN,
-        n: Optional[int] | OpenAINotGiven = NOT_GIVEN,
+        audio: ChatCompletionAudioParam | OpenAINotGiven | None = NOT_GIVEN,
+        frequency_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
+        logit_bias: dict[str, int] | OpenAINotGiven | None = NOT_GIVEN,
+        logprobs: bool | OpenAINotGiven | None = NOT_GIVEN,
+        max_completion_tokens: int | OpenAINotGiven | None = NOT_GIVEN,
+        metadata: Metadata | OpenAINotGiven | None = NOT_GIVEN,
+        modalities: list[ChatCompletionModality] | OpenAINotGiven | None = NOT_GIVEN,
+        n: int | OpenAINotGiven | None = NOT_GIVEN,
         parallel_tool_calls: bool | OpenAINotGiven = NOT_GIVEN,
-        prediction: Optional[ChatCompletionPredictionContentParam] | OpenAINotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
-        reasoning_effort: Optional[ChatCompletionReasoningEffort] | OpenAINotGiven = NOT_GIVEN,
+        prediction: ChatCompletionPredictionContentParam | OpenAINotGiven | None = NOT_GIVEN,
+        presence_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
+        reasoning_effort: ChatCompletionReasoningEffort | OpenAINotGiven | None = NOT_GIVEN,
         thinking: ThinkingConfigParam | None | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | OpenAINotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default"]] | OpenAINotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str]] | OpenAINotGiven = NOT_GIVEN,
-        store: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
-        top_logprobs: Optional[int] | OpenAINotGiven = NOT_GIVEN,
+        seed: int | OpenAINotGiven | None = NOT_GIVEN,
+        service_tier: Literal["auto", "default"] | OpenAINotGiven | None = NOT_GIVEN,
+        stop: str | list[str] | OpenAINotGiven | None = NOT_GIVEN,
+        store: bool | OpenAINotGiven | None = NOT_GIVEN,
+        top_logprobs: int | OpenAINotGiven | None = NOT_GIVEN,
         user: str | OpenAINotGiven = NOT_GIVEN,
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
@@ -425,7 +460,7 @@ class BaseChatClient(ABC):
     def model_list(self):
         _raw_client = self.raw_client
-        if isinstance(_raw_client, (OpenAI, AzureOpenAI)):
+        if isinstance(_raw_client, OpenAI | AzureOpenAI):
             return _raw_client.models.list().model_dump()
         elif isinstance(_raw_client, Anthropic):
             return _raw_client.models.list(limit=1000).model_dump()
@@ -513,14 +548,10 @@ class BaseAsyncChatClient(ABC):
         # Get rate limit parameters
         # Priority: parameters in model.endpoints > parameters in endpoint > default parameters
         rpm = self.rpm or endpoint.rpm or (self.settings.rate_limit.default_rpm if self.settings.rate_limit else 60)
-        tpm = (
-            self.tpm or endpoint.tpm or (self.settings.rate_limit.default_tpm if self.settings.rate_limit else 1000000)
-        )
+        tpm = self.tpm or endpoint.tpm or (self.settings.rate_limit.default_tpm if self.settings.rate_limit else 1000000)
         while self.rate_limiter:
-            allowed, wait_time = await self.rate_limiter.check_limit(
-                key, rpm, tpm, self._estimate_request_tokens(messages)
-            )
+            allowed, wait_time = await self.rate_limiter.check_limit(key, rpm, tpm, self._estimate_request_tokens(messages))
             if allowed:
                 break
             await asyncio.sleep(wait_time)
@@ -532,6 +563,31 @@ class BaseAsyncChatClient(ABC):
             tokens += int(len(message.get("content", "")) * 0.6)
         return tokens
+    def _get_available_endpoints(self, model_endpoints: list) -> list:
+        """Get list of available (enabled) endpoints for the model"""
+        available_endpoints = []
+        for endpoint_option in model_endpoints:
+            if isinstance(endpoint_option, dict):
+                # For endpoint with specific config, check if the endpoint is enabled
+                endpoint_id = endpoint_option["endpoint_id"]
+                try:
+                    endpoint = self.settings.get_endpoint(endpoint_id)
+                    if endpoint.enabled:
+                        available_endpoints.append(endpoint_option)
+                except ValueError:
+                    # Endpoint not found, skip it
+                    continue
+            else:
+                # For simple endpoint ID string, check if the endpoint is enabled
+                try:
+                    endpoint = self.settings.get_endpoint(endpoint_option)
+                    if endpoint.enabled:
+                        available_endpoints.append(endpoint_option)
+                except ValueError:
+                    # Endpoint not found, skip it
+                    continue
+        return available_endpoints
     def set_model_id_by_endpoint_id(self, endpoint_id: str):
         for endpoint_option in self.backend_settings.models[self.model].endpoints:
             if isinstance(endpoint_option, dict) and endpoint_id == endpoint_option["endpoint_id"]:
@@ -543,7 +599,12 @@ class BaseAsyncChatClient(ABC):
         if self.endpoint is None:
             if self.random_endpoint:
                 self.random_endpoint = True
-                endpoint = random.choice(self.backend_settings.models[self.model].endpoints)
+                # Get available (enabled) endpoints
+                available_endpoints = self._get_available_endpoints(self.backend_settings.models[self.model].endpoints)
+                if not available_endpoints:
+                    raise ValueError(f"No enabled endpoints available for model {self.model}")
+                endpoint = random.choice(available_endpoints)
                 if isinstance(endpoint, dict):
                     self.endpoint_id = endpoint["endpoint_id"]
                     self.model_id = endpoint["model_id"]
@@ -555,8 +616,14 @@ class BaseAsyncChatClient(ABC):
                 self.endpoint = self.settings.get_endpoint(self.endpoint_id)
             else:
                 self.endpoint = self.settings.get_endpoint(self.endpoint_id)
+                # Check if the specified endpoint is enabled
+                if not self.endpoint.enabled:
+                    raise ValueError(f"Endpoint {self.endpoint_id} is disabled")
                 self.set_model_id_by_endpoint_id(self.endpoint_id)
         elif isinstance(self.endpoint, EndpointSetting):
+            # Check if the endpoint is enabled
+            if not self.endpoint.enabled:
+                raise ValueError(f"Endpoint {self.endpoint.id} is disabled")
             self.endpoint_id = self.endpoint.id
             self.set_model_id_by_endpoint_id(self.endpoint_id)
         else:
@@ -568,15 +635,7 @@ class BaseAsyncChatClient(ABC):
     @abstractmethod
     def raw_client(
         self,
-    ) -> (
-        AsyncOpenAI
-        | AsyncAzureOpenAI
-        | AsyncAnthropic
-        | AsyncAnthropicVertex
-        | AsyncAnthropicBedrock
-        | httpx.AsyncClient
-        | None
-    ):
+    ) -> AsyncOpenAI | AsyncAzureOpenAI | AsyncAnthropic | AsyncAnthropicVertex | AsyncAnthropicBedrock | httpx.AsyncClient | None:
         pass
     @overload
@@ -588,31 +647,31 @@ class BaseAsyncChatClient(ABC):
         model: str | None = None,
         stream: Literal[False] = False,
         temperature: float | None | NotGiven = NOT_GIVEN,
-        max_tokens: int | None = None,
-        tools: list | NotGiven = NOT_GIVEN,
+        max_tokens: int | None | NotGiven = None,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: ResponseFormat | NotGiven = NOT_GIVEN,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
         top_p: float | NotGiven | None = NOT_GIVEN,
         skip_cutoff: bool = False,
-        audio: Optional[ChatCompletionAudioParam] | OpenAINotGiven = NOT_GIVEN,
-        frequency_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | OpenAINotGiven = NOT_GIVEN,
-        logprobs: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | OpenAINotGiven = NOT_GIVEN,
-        metadata: Optional[Metadata] | OpenAINotGiven = NOT_GIVEN,
-        modalities: Optional[List[ChatCompletionModality]] | OpenAINotGiven = NOT_GIVEN,
-        n: Optional[int] | OpenAINotGiven = NOT_GIVEN,
+        audio: ChatCompletionAudioParam | OpenAINotGiven | None = NOT_GIVEN,
+        frequency_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
+        logit_bias: dict[str, int] | OpenAINotGiven | None = NOT_GIVEN,
+        logprobs: bool | OpenAINotGiven | None = NOT_GIVEN,
+        max_completion_tokens: int | OpenAINotGiven | None = NOT_GIVEN,
+        metadata: Metadata | OpenAINotGiven | None = NOT_GIVEN,
+        modalities: list[ChatCompletionModality] | OpenAINotGiven | None = NOT_GIVEN,
+        n: int | OpenAINotGiven | None = NOT_GIVEN,
         parallel_tool_calls: bool | OpenAINotGiven = NOT_GIVEN,
-        prediction: Optional[ChatCompletionPredictionContentParam] | OpenAINotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
-        reasoning_effort: Optional[ChatCompletionReasoningEffort] | OpenAINotGiven = NOT_GIVEN,
+        prediction: ChatCompletionPredictionContentParam | OpenAINotGiven | None = NOT_GIVEN,
+        presence_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
+        reasoning_effort: ChatCompletionReasoningEffort | OpenAINotGiven | None = NOT_GIVEN,
         thinking: ThinkingConfigParam | None | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | OpenAINotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default"]] | OpenAINotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str]] | OpenAINotGiven = NOT_GIVEN,
-        store: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
-        top_logprobs: Optional[int] | OpenAINotGiven = NOT_GIVEN,
+        seed: int | OpenAINotGiven | None = NOT_GIVEN,
+        service_tier: Literal["auto", "default"] | OpenAINotGiven | None = NOT_GIVEN,
+        stop: str | list[str] | OpenAINotGiven | None = NOT_GIVEN,
+        store: bool | OpenAINotGiven | None = NOT_GIVEN,
+        top_logprobs: int | OpenAINotGiven | None = NOT_GIVEN,
         user: str | OpenAINotGiven = NOT_GIVEN,
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
@@ -630,31 +689,31 @@ class BaseAsyncChatClient(ABC):
         model: str | None = None,
         stream: Literal[True],
         temperature: float | None | NotGiven = NOT_GIVEN,
-        max_tokens: int | None = None,
-        tools: list | NotGiven = NOT_GIVEN,
+        max_tokens: int | None | NotGiven = None,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: ResponseFormat | NotGiven = NOT_GIVEN,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
         top_p: float | NotGiven | None = NOT_GIVEN,
         skip_cutoff: bool = False,
-        audio: Optional[ChatCompletionAudioParam] | OpenAINotGiven = NOT_GIVEN,
-        frequency_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | OpenAINotGiven = NOT_GIVEN,
-        logprobs: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | OpenAINotGiven = NOT_GIVEN,
-        metadata: Optional[Metadata] | OpenAINotGiven = NOT_GIVEN,
-        modalities: Optional[List[ChatCompletionModality]] | OpenAINotGiven = NOT_GIVEN,
-        n: Optional[int] | OpenAINotGiven = NOT_GIVEN,
+        audio: ChatCompletionAudioParam | OpenAINotGiven | None = NOT_GIVEN,
+        frequency_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
+        logit_bias: dict[str, int] | OpenAINotGiven | None = NOT_GIVEN,
+        logprobs: bool | OpenAINotGiven | None = NOT_GIVEN,
+        max_completion_tokens: int | OpenAINotGiven | None = NOT_GIVEN,
+        metadata: Metadata | OpenAINotGiven | None = NOT_GIVEN,
+        modalities: list[ChatCompletionModality] | OpenAINotGiven | None = NOT_GIVEN,
+        n: int | OpenAINotGiven | None = NOT_GIVEN,
         parallel_tool_calls: bool | OpenAINotGiven = NOT_GIVEN,
-        prediction: Optional[ChatCompletionPredictionContentParam] | OpenAINotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
-        reasoning_effort: Optional[ChatCompletionReasoningEffort] | OpenAINotGiven = NOT_GIVEN,
+        prediction: ChatCompletionPredictionContentParam | OpenAINotGiven | None = NOT_GIVEN,
+        presence_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
+        reasoning_effort: ChatCompletionReasoningEffort | OpenAINotGiven | None = NOT_GIVEN,
         thinking: ThinkingConfigParam | None | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | OpenAINotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default"]] | OpenAINotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str]] | OpenAINotGiven = NOT_GIVEN,
-        store: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
-        top_logprobs: Optional[int] | OpenAINotGiven = NOT_GIVEN,
+        seed: int | OpenAINotGiven | None = NOT_GIVEN,
+        service_tier: Literal["auto", "default"] | OpenAINotGiven | None = NOT_GIVEN,
+        stop: str | list[str] | OpenAINotGiven | None = NOT_GIVEN,
+        store: bool | OpenAINotGiven | None = NOT_GIVEN,
+        top_logprobs: int | OpenAINotGiven | None = NOT_GIVEN,
         user: str | OpenAINotGiven = NOT_GIVEN,
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
@@ -672,31 +731,31 @@ class BaseAsyncChatClient(ABC):
         model: str | None = None,
         stream: bool,
         temperature: float | None | NotGiven = NOT_GIVEN,
-        max_tokens: int | None = None,
-        tools: list | NotGiven = NOT_GIVEN,
+        max_tokens: int | None | NotGiven = None,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: ResponseFormat | NotGiven = NOT_GIVEN,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
         top_p: float | NotGiven | None = NOT_GIVEN,
         skip_cutoff: bool = False,
-        audio: Optional[ChatCompletionAudioParam] | OpenAINotGiven = NOT_GIVEN,
-        frequency_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | OpenAINotGiven = NOT_GIVEN,
-        logprobs: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | OpenAINotGiven = NOT_GIVEN,
-        metadata: Optional[Metadata] | OpenAINotGiven = NOT_GIVEN,
-        modalities: Optional[List[ChatCompletionModality]] | OpenAINotGiven = NOT_GIVEN,
-        n: Optional[int] | OpenAINotGiven = NOT_GIVEN,
+        audio: ChatCompletionAudioParam | OpenAINotGiven | None = NOT_GIVEN,
+        frequency_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
+        logit_bias: dict[str, int] | OpenAINotGiven | None = NOT_GIVEN,
+        logprobs: bool | OpenAINotGiven | None = NOT_GIVEN,
+        max_completion_tokens: int | OpenAINotGiven | None = NOT_GIVEN,
+        metadata: Metadata | OpenAINotGiven | None = NOT_GIVEN,
+        modalities: list[ChatCompletionModality] | OpenAINotGiven | None = NOT_GIVEN,
+        n: int | OpenAINotGiven | None = NOT_GIVEN,
         parallel_tool_calls: bool | OpenAINotGiven = NOT_GIVEN,
-        prediction: Optional[ChatCompletionPredictionContentParam] | OpenAINotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
-        reasoning_effort: Optional[ChatCompletionReasoningEffort] | OpenAINotGiven = NOT_GIVEN,
+        prediction: ChatCompletionPredictionContentParam | OpenAINotGiven | None = NOT_GIVEN,
+        presence_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
+        reasoning_effort: ChatCompletionReasoningEffort | OpenAINotGiven | None = NOT_GIVEN,
         thinking: ThinkingConfigParam | None | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | OpenAINotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default"]] | OpenAINotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str]] | OpenAINotGiven = NOT_GIVEN,
-        store: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
-        top_logprobs: Optional[int] | OpenAINotGiven = NOT_GIVEN,
+        seed: int | OpenAINotGiven | None = NOT_GIVEN,
+        service_tier: Literal["auto", "default"] | OpenAINotGiven | None = NOT_GIVEN,
+        stop: str | list[str] | OpenAINotGiven | None = NOT_GIVEN,
+        store: bool | OpenAINotGiven | None = NOT_GIVEN,
+        top_logprobs: int | OpenAINotGiven | None = NOT_GIVEN,
         user: str | OpenAINotGiven = NOT_GIVEN,
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
@@ -713,31 +772,31 @@ class BaseAsyncChatClient(ABC):
         model: str | None = None,
         stream: Literal[False] | Literal[True] = False,
         temperature: float | None | NotGiven = NOT_GIVEN,
-        max_tokens: int | None = None,
-        tools: list | NotGiven = NOT_GIVEN,
+        max_tokens: int | None | NotGiven = None,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: ResponseFormat | NotGiven = NOT_GIVEN,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
         top_p: float | NotGiven | None = NOT_GIVEN,
         skip_cutoff: bool = False,
-        audio: Optional[ChatCompletionAudioParam] | OpenAINotGiven = NOT_GIVEN,
-        frequency_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | OpenAINotGiven = NOT_GIVEN,
-        logprobs: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | OpenAINotGiven = NOT_GIVEN,
-        metadata: Optional[Metadata] | OpenAINotGiven = NOT_GIVEN,
-        modalities: Optional[List[ChatCompletionModality]] | OpenAINotGiven = NOT_GIVEN,
-        n: Optional[int] | OpenAINotGiven = NOT_GIVEN,
+        audio: ChatCompletionAudioParam | OpenAINotGiven | None = NOT_GIVEN,
+        frequency_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
+        logit_bias: dict[str, int] | OpenAINotGiven | None = NOT_GIVEN,
+        logprobs: bool | OpenAINotGiven | None = NOT_GIVEN,
+        max_completion_tokens: int | OpenAINotGiven | None = NOT_GIVEN,
+        metadata: Metadata | OpenAINotGiven | None = NOT_GIVEN,
+        modalities: list[ChatCompletionModality] | OpenAINotGiven | None = NOT_GIVEN,
+        n: int | OpenAINotGiven | None = NOT_GIVEN,
         parallel_tool_calls: bool | OpenAINotGiven = NOT_GIVEN,
-        prediction: Optional[ChatCompletionPredictionContentParam] | OpenAINotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
-        reasoning_effort: Optional[ChatCompletionReasoningEffort] | OpenAINotGiven = NOT_GIVEN,
+        prediction: ChatCompletionPredictionContentParam | OpenAINotGiven | None = NOT_GIVEN,
+        presence_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
+        reasoning_effort: ChatCompletionReasoningEffort | OpenAINotGiven | None = NOT_GIVEN,
         thinking: ThinkingConfigParam | None | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | OpenAINotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default"]] | OpenAINotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str]] | OpenAINotGiven = NOT_GIVEN,
-        store: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
-        top_logprobs: Optional[int] | OpenAINotGiven = NOT_GIVEN,
+        seed: int | OpenAINotGiven | None = NOT_GIVEN,
+        service_tier: Literal["auto", "default"] | OpenAINotGiven | None = NOT_GIVEN,
+        stop: str | list[str] | OpenAINotGiven | None = NOT_GIVEN,
+        store: bool | OpenAINotGiven | None = NOT_GIVEN,
+        top_logprobs: int | OpenAINotGiven | None = NOT_GIVEN,
         user: str | OpenAINotGiven = NOT_GIVEN,
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
@@ -752,31 +811,31 @@ class BaseAsyncChatClient(ABC):
         messages: list,
         model: str | None = None,
         temperature: float | None | NotGiven = NOT_GIVEN,
-        max_tokens: int | None = None,
-        tools: list | NotGiven = NOT_GIVEN,
+        max_tokens: int | None | NotGiven = None,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: ResponseFormat | NotGiven = NOT_GIVEN,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
         top_p: float | NotGiven | None = NOT_GIVEN,
         skip_cutoff: bool = False,
-        audio: Optional[ChatCompletionAudioParam] | OpenAINotGiven = NOT_GIVEN,
-        frequency_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | OpenAINotGiven = NOT_GIVEN,
-        logprobs: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | OpenAINotGiven = NOT_GIVEN,
-        metadata: Optional[Metadata] | OpenAINotGiven = NOT_GIVEN,
-        modalities: Optional[List[ChatCompletionModality]] | OpenAINotGiven = NOT_GIVEN,
-        n: Optional[int] | OpenAINotGiven = NOT_GIVEN,
+        audio: ChatCompletionAudioParam | OpenAINotGiven | None = NOT_GIVEN,
+        frequency_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
+        logit_bias: dict[str, int] | OpenAINotGiven | None = NOT_GIVEN,
+        logprobs: bool | OpenAINotGiven | None = NOT_GIVEN,
+        max_completion_tokens: int | OpenAINotGiven | None = NOT_GIVEN,
+        metadata: Metadata | OpenAINotGiven | None = NOT_GIVEN,
+        modalities: list[ChatCompletionModality] | OpenAINotGiven | None = NOT_GIVEN,
+        n: int | OpenAINotGiven | None = NOT_GIVEN,
         parallel_tool_calls: bool | OpenAINotGiven = NOT_GIVEN,
-        prediction: Optional[ChatCompletionPredictionContentParam] | OpenAINotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
-        reasoning_effort: Optional[ChatCompletionReasoningEffort] | OpenAINotGiven = NOT_GIVEN,
+        prediction: ChatCompletionPredictionContentParam | OpenAINotGiven | None = NOT_GIVEN,
+        presence_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
+        reasoning_effort: ChatCompletionReasoningEffort | OpenAINotGiven | None = NOT_GIVEN,
         thinking: ThinkingConfigParam | None | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | OpenAINotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default"]] | OpenAINotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str]] | OpenAINotGiven = NOT_GIVEN,
-        store: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
-        top_logprobs: Optional[int] | OpenAINotGiven = NOT_GIVEN,
+        seed: int | OpenAINotGiven | None = NOT_GIVEN,
+        service_tier: Literal["auto", "default"] | OpenAINotGiven | None = NOT_GIVEN,
+        stop: str | list[str] | OpenAINotGiven | None = NOT_GIVEN,
+        store: bool | OpenAINotGiven | None = NOT_GIVEN,
+        top_logprobs: int | OpenAINotGiven | None = NOT_GIVEN,
         user: str | OpenAINotGiven = NOT_GIVEN,
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
@@ -822,7 +881,7 @@ class BaseAsyncChatClient(ABC):
     async def model_list(self):
         _raw_client = self.raw_client
-        if isinstance(_raw_client, (AsyncOpenAI, AsyncAzureOpenAI)):
+        if isinstance(_raw_client, AsyncOpenAI | AsyncAzureOpenAI):
             return (await _raw_client.models.list()).model_dump()
         elif isinstance(_raw_client, AsyncAnthropic):
             return (await _raw_client.models.list(limit=1000)).model_dump()

vectorvein 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl

vectorvein 0.3.1py3-none-any.whl → 0.3.3py3-none-any.whl