PyPI - vectorvein - Versions diffs - 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl - Mend

vectorvein 0.3.1py3-none-any.whl → 0.3.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

vectorvein/api/client.py +81 -103
vectorvein/api/exceptions.py +1 -3
vectorvein/api/models.py +11 -11
vectorvein/chat_clients/anthropic_client.py +157 -169
vectorvein/chat_clients/base_client.py +257 -198
vectorvein/chat_clients/openai_compatible_client.py +150 -161
vectorvein/chat_clients/utils.py +44 -24
vectorvein/server/token_server.py +1 -1
vectorvein/settings/__init__.py +27 -27
vectorvein/types/defaults.py +32 -16
vectorvein/types/llm_parameters.py +40 -34
vectorvein/types/settings.py +10 -10
vectorvein/utilities/media_processing.py +1 -1
vectorvein/utilities/rate_limiter.py +5 -6
vectorvein/utilities/retry.py +6 -5
vectorvein/workflow/graph/edge.py +3 -3
vectorvein/workflow/graph/node.py +14 -26
vectorvein/workflow/graph/port.py +40 -39
vectorvein/workflow/graph/workflow.py +13 -25
vectorvein/workflow/nodes/audio_generation.py +5 -7
vectorvein/workflow/nodes/control_flows.py +7 -9
vectorvein/workflow/nodes/file_processing.py +4 -6
vectorvein/workflow/nodes/image_generation.py +20 -22
vectorvein/workflow/nodes/llms.py +13 -15
vectorvein/workflow/nodes/media_editing.py +26 -40
vectorvein/workflow/nodes/media_processing.py +19 -21
vectorvein/workflow/nodes/output.py +10 -12
vectorvein/workflow/nodes/relational_db.py +3 -5
vectorvein/workflow/nodes/text_processing.py +8 -10
vectorvein/workflow/nodes/tools.py +8 -10
vectorvein/workflow/nodes/triggers.py +1 -3
vectorvein/workflow/nodes/vector_db.py +3 -5
vectorvein/workflow/nodes/video_generation.py +4 -6
vectorvein/workflow/nodes/web_crawlers.py +4 -6
vectorvein/workflow/utils/analyse.py +5 -13
vectorvein/workflow/utils/check.py +6 -16
vectorvein/workflow/utils/json_to_code.py +6 -14
vectorvein/workflow/utils/layout.py +3 -5
{vectorvein-0.3.1.dist-info → vectorvein-0.3.3.dist-info}/METADATA +1 -1
vectorvein-0.3.3.dist-info/RECORD +68 -0
{vectorvein-0.3.1.dist-info → vectorvein-0.3.3.dist-info}/WHEEL +1 -1
vectorvein-0.3.1.dist-info/RECORD +0 -68
{vectorvein-0.3.1.dist-info → vectorvein-0.3.3.dist-info}/entry_points.txt +0 -0

vectorvein/chat_clients/openai_compatible_client.py CHANGED Viewed

@@ -3,19 +3,8 @@
 import re
 import json
 from functools import cached_property
-from typing import (
-    Any,
-    Dict,
-    List,
-    TYPE_CHECKING,
-    overload,
-    Generator,
-    AsyncGenerator,
-    Union,
-    Literal,
-    Iterable,
-    Optional,
-)
+from collections.abc import Generator, AsyncGenerator, Iterable
+from typing import Any, TYPE_CHECKING, overload, Literal
 import httpx
 from openai import OpenAI, AsyncOpenAI, AzureOpenAI, AsyncAzureOpenAI
@@ -117,31 +106,31 @@ class OpenAICompatibleChatClient(BaseChatClient):
         model: str | None = None,
         stream: Literal[False] = False,
         temperature: float | None | NotGiven = NOT_GIVEN,
-        max_tokens: int | None | NotGiven = NOT_GIVEN,
+        max_tokens: int | None | NotGiven = None,
         tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: ResponseFormat | NotGiven = NOT_GIVEN,
-        stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
+        stream_options: ChatCompletionStreamOptionsParam | None = None,
         top_p: float | NotGiven | None = NOT_GIVEN,
         skip_cutoff: bool = False,
-        audio: Optional[ChatCompletionAudioParam] | OpenAINotGiven = NOT_GIVEN,
-        frequency_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | OpenAINotGiven = NOT_GIVEN,
-        logprobs: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | OpenAINotGiven = NOT_GIVEN,
-        metadata: Optional[Metadata] | OpenAINotGiven = NOT_GIVEN,
-        modalities: Optional[List[ChatCompletionModality]] | OpenAINotGiven = NOT_GIVEN,
-        n: Optional[int] | OpenAINotGiven = NOT_GIVEN,
+        audio: ChatCompletionAudioParam | OpenAINotGiven | None = NOT_GIVEN,
+        frequency_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
+        logit_bias: dict[str, int] | OpenAINotGiven | None = NOT_GIVEN,
+        logprobs: bool | OpenAINotGiven | None = NOT_GIVEN,
+        max_completion_tokens: int | OpenAINotGiven | None = NOT_GIVEN,
+        metadata: Metadata | OpenAINotGiven | None = NOT_GIVEN,
+        modalities: list[ChatCompletionModality] | OpenAINotGiven | None = NOT_GIVEN,
+        n: int | OpenAINotGiven | None = NOT_GIVEN,
         parallel_tool_calls: bool | OpenAINotGiven = NOT_GIVEN,
-        prediction: Optional[ChatCompletionPredictionContentParam] | OpenAINotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
-        reasoning_effort: Optional[ChatCompletionReasoningEffort] | OpenAINotGiven = NOT_GIVEN,
+        prediction: ChatCompletionPredictionContentParam | OpenAINotGiven | None = NOT_GIVEN,
+        presence_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
+        reasoning_effort: ChatCompletionReasoningEffort | OpenAINotGiven | None = NOT_GIVEN,
         thinking: ThinkingConfigParam | None | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | OpenAINotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default"]] | OpenAINotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str]] | OpenAINotGiven = NOT_GIVEN,
-        store: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
-        top_logprobs: Optional[int] | OpenAINotGiven = NOT_GIVEN,
+        seed: int | OpenAINotGiven | None = NOT_GIVEN,
+        service_tier: Literal["auto", "default"] | OpenAINotGiven | None = NOT_GIVEN,
+        stop: str | list[str] | OpenAINotGiven | None = NOT_GIVEN,
+        store: bool | OpenAINotGiven | None = NOT_GIVEN,
+        top_logprobs: int | OpenAINotGiven | None = NOT_GIVEN,
         user: str | OpenAINotGiven = NOT_GIVEN,
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
@@ -158,37 +147,37 @@ class OpenAICompatibleChatClient(BaseChatClient):
         model: str | None = None,
         stream: Literal[True],
         temperature: float | None | NotGiven = NOT_GIVEN,
-        max_tokens: int | None | NotGiven = NOT_GIVEN,
+        max_tokens: int | None | NotGiven = None,
         tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: ResponseFormat | NotGiven = NOT_GIVEN,
-        stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
+        stream_options: ChatCompletionStreamOptionsParam | None = None,
         top_p: float | NotGiven | None = NOT_GIVEN,
         skip_cutoff: bool = False,
-        audio: Optional[ChatCompletionAudioParam] | OpenAINotGiven = NOT_GIVEN,
-        frequency_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | OpenAINotGiven = NOT_GIVEN,
-        logprobs: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | OpenAINotGiven = NOT_GIVEN,
-        metadata: Optional[Metadata] | OpenAINotGiven = NOT_GIVEN,
-        modalities: Optional[List[ChatCompletionModality]] | OpenAINotGiven = NOT_GIVEN,
-        n: Optional[int] | OpenAINotGiven = NOT_GIVEN,
+        audio: ChatCompletionAudioParam | OpenAINotGiven | None = NOT_GIVEN,
+        frequency_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
+        logit_bias: dict[str, int] | OpenAINotGiven | None = NOT_GIVEN,
+        logprobs: bool | OpenAINotGiven | None = NOT_GIVEN,
+        max_completion_tokens: int | OpenAINotGiven | None = NOT_GIVEN,
+        metadata: Metadata | OpenAINotGiven | None = NOT_GIVEN,
+        modalities: list[ChatCompletionModality] | OpenAINotGiven | None = NOT_GIVEN,
+        n: int | OpenAINotGiven | None = NOT_GIVEN,
         parallel_tool_calls: bool | OpenAINotGiven = NOT_GIVEN,
-        prediction: Optional[ChatCompletionPredictionContentParam] | OpenAINotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
-        reasoning_effort: Optional[ChatCompletionReasoningEffort] | OpenAINotGiven = NOT_GIVEN,
+        prediction: ChatCompletionPredictionContentParam | OpenAINotGiven | None = NOT_GIVEN,
+        presence_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
+        reasoning_effort: ChatCompletionReasoningEffort | OpenAINotGiven | None = NOT_GIVEN,
         thinking: ThinkingConfigParam | None | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | OpenAINotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default"]] | OpenAINotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str]] | OpenAINotGiven = NOT_GIVEN,
-        store: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
-        top_logprobs: Optional[int] | OpenAINotGiven = NOT_GIVEN,
+        seed: int | OpenAINotGiven | None = NOT_GIVEN,
+        service_tier: Literal["auto", "default"] | OpenAINotGiven | None = NOT_GIVEN,
+        stop: str | list[str] | OpenAINotGiven | None = NOT_GIVEN,
+        store: bool | OpenAINotGiven | None = NOT_GIVEN,
+        top_logprobs: int | OpenAINotGiven | None = NOT_GIVEN,
         user: str | OpenAINotGiven = NOT_GIVEN,
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | OpenAINotGiven = NOT_GIVEN,
-    ) -> Generator[ChatCompletionDeltaMessage, None, None]:
+    ) -> Generator[ChatCompletionDeltaMessage, Any, None]:
         pass
     @overload
@@ -199,31 +188,31 @@ class OpenAICompatibleChatClient(BaseChatClient):
         model: str | None = None,
         stream: bool,
         temperature: float | None | NotGiven = NOT_GIVEN,
-        max_tokens: int | None | NotGiven = NOT_GIVEN,
+        max_tokens: int | None | NotGiven = None,
         tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: ResponseFormat | NotGiven = NOT_GIVEN,
-        stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
+        stream_options: ChatCompletionStreamOptionsParam | None = None,
         top_p: float | NotGiven | None = NOT_GIVEN,
         skip_cutoff: bool = False,
-        audio: Optional[ChatCompletionAudioParam] | OpenAINotGiven = NOT_GIVEN,
-        frequency_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | OpenAINotGiven = NOT_GIVEN,
-        logprobs: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | OpenAINotGiven = NOT_GIVEN,
-        metadata: Optional[Metadata] | OpenAINotGiven = NOT_GIVEN,
-        modalities: Optional[List[ChatCompletionModality]] | OpenAINotGiven = NOT_GIVEN,
-        n: Optional[int] | OpenAINotGiven = NOT_GIVEN,
+        audio: ChatCompletionAudioParam | OpenAINotGiven | None = NOT_GIVEN,
+        frequency_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
+        logit_bias: dict[str, int] | OpenAINotGiven | None = NOT_GIVEN,
+        logprobs: bool | OpenAINotGiven | None = NOT_GIVEN,
+        max_completion_tokens: int | OpenAINotGiven | None = NOT_GIVEN,
+        metadata: Metadata | OpenAINotGiven | None = NOT_GIVEN,
+        modalities: list[ChatCompletionModality] | OpenAINotGiven | None = NOT_GIVEN,
+        n: int | OpenAINotGiven | None = NOT_GIVEN,
         parallel_tool_calls: bool | OpenAINotGiven = NOT_GIVEN,
-        prediction: Optional[ChatCompletionPredictionContentParam] | OpenAINotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
-        reasoning_effort: Optional[ChatCompletionReasoningEffort] | OpenAINotGiven = NOT_GIVEN,
+        prediction: ChatCompletionPredictionContentParam | OpenAINotGiven | None = NOT_GIVEN,
+        presence_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
+        reasoning_effort: ChatCompletionReasoningEffort | OpenAINotGiven | None = NOT_GIVEN,
         thinking: ThinkingConfigParam | None | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | OpenAINotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default"]] | OpenAINotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str]] | OpenAINotGiven = NOT_GIVEN,
-        store: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
-        top_logprobs: Optional[int] | OpenAINotGiven = NOT_GIVEN,
+        seed: int | OpenAINotGiven | None = NOT_GIVEN,
+        service_tier: Literal["auto", "default"] | OpenAINotGiven | None = NOT_GIVEN,
+        stop: str | list[str] | OpenAINotGiven | None = NOT_GIVEN,
+        store: bool | OpenAINotGiven | None = NOT_GIVEN,
+        top_logprobs: int | OpenAINotGiven | None = NOT_GIVEN,
         user: str | OpenAINotGiven = NOT_GIVEN,
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
@@ -239,31 +228,31 @@ class OpenAICompatibleChatClient(BaseChatClient):
         model: str | None = None,
         stream: Literal[False] | Literal[True] = False,
         temperature: float | None | NotGiven = NOT_GIVEN,
-        max_tokens: int | None | NotGiven = NOT_GIVEN,
+        max_tokens: int | None | NotGiven = None,
         tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: ResponseFormat | NotGiven = NOT_GIVEN,
-        stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
+        stream_options: ChatCompletionStreamOptionsParam | None = None,
         top_p: float | NotGiven | None = NOT_GIVEN,
         skip_cutoff: bool = False,
-        audio: Optional[ChatCompletionAudioParam] | OpenAINotGiven = NOT_GIVEN,
-        frequency_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | OpenAINotGiven = NOT_GIVEN,
-        logprobs: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | OpenAINotGiven = NOT_GIVEN,
-        metadata: Optional[Metadata] | OpenAINotGiven = NOT_GIVEN,
-        modalities: Optional[List[ChatCompletionModality]] | OpenAINotGiven = NOT_GIVEN,
-        n: Optional[int] | OpenAINotGiven = NOT_GIVEN,
+        audio: ChatCompletionAudioParam | OpenAINotGiven | None = NOT_GIVEN,
+        frequency_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
+        logit_bias: dict[str, int] | OpenAINotGiven | None = NOT_GIVEN,
+        logprobs: bool | OpenAINotGiven | None = NOT_GIVEN,
+        max_completion_tokens: int | OpenAINotGiven | None = NOT_GIVEN,
+        metadata: Metadata | OpenAINotGiven | None = NOT_GIVEN,
+        modalities: list[ChatCompletionModality] | OpenAINotGiven | None = NOT_GIVEN,
+        n: int | OpenAINotGiven | None = NOT_GIVEN,
         parallel_tool_calls: bool | OpenAINotGiven = NOT_GIVEN,
-        prediction: Optional[ChatCompletionPredictionContentParam] | OpenAINotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
-        reasoning_effort: Optional[ChatCompletionReasoningEffort] | OpenAINotGiven = NOT_GIVEN,
+        prediction: ChatCompletionPredictionContentParam | OpenAINotGiven | None = NOT_GIVEN,
+        presence_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
+        reasoning_effort: ChatCompletionReasoningEffort | OpenAINotGiven | None = NOT_GIVEN,
         thinking: ThinkingConfigParam | None | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | OpenAINotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default"]] | OpenAINotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str]] | OpenAINotGiven = NOT_GIVEN,
-        store: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
-        top_logprobs: Optional[int] | OpenAINotGiven = NOT_GIVEN,
+        seed: int | OpenAINotGiven | None = NOT_GIVEN,
+        service_tier: Literal["auto", "default"] | OpenAINotGiven | None = NOT_GIVEN,
+        stop: str | list[str] | OpenAINotGiven | None = NOT_GIVEN,
+        store: bool | OpenAINotGiven | None = NOT_GIVEN,
+        top_logprobs: int | OpenAINotGiven | None = NOT_GIVEN,
         user: str | OpenAINotGiven = NOT_GIVEN,
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
@@ -312,7 +301,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
                                 },
                             }
                         )
-                tools_params = dict(tools=_tools, tool_choice=tool_choice)
+                tools_params = {"tools": _tools, "tool_choice": tool_choice}
             else:
                 tools_str = json.dumps(tools, ensure_ascii=False, indent=None)
                 additional_system_prompt = generate_tool_use_system_prompt(tools=tools_str)
@@ -670,31 +659,31 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
         model: str | None = None,
         stream: Literal[False] = False,
         temperature: float | None | NotGiven = NOT_GIVEN,
-        max_tokens: int | None | NotGiven = NOT_GIVEN,
+        max_tokens: int | None | NotGiven = None,
         tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: ResponseFormat | NotGiven = NOT_GIVEN,
-        stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
+        stream_options: ChatCompletionStreamOptionsParam | None = None,
         top_p: float | NotGiven | None = NOT_GIVEN,
         skip_cutoff: bool = False,
-        audio: Optional[ChatCompletionAudioParam] | OpenAINotGiven = NOT_GIVEN,
-        frequency_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | OpenAINotGiven = NOT_GIVEN,
-        logprobs: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | OpenAINotGiven = NOT_GIVEN,
-        metadata: Optional[Metadata] | OpenAINotGiven = NOT_GIVEN,
-        modalities: Optional[List[ChatCompletionModality]] | OpenAINotGiven = NOT_GIVEN,
-        n: Optional[int] | OpenAINotGiven = NOT_GIVEN,
+        audio: ChatCompletionAudioParam | OpenAINotGiven | None = NOT_GIVEN,
+        frequency_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
+        logit_bias: dict[str, int] | OpenAINotGiven | None = NOT_GIVEN,
+        logprobs: bool | OpenAINotGiven | None = NOT_GIVEN,
+        max_completion_tokens: int | OpenAINotGiven | None = NOT_GIVEN,
+        metadata: Metadata | OpenAINotGiven | None = NOT_GIVEN,
+        modalities: list[ChatCompletionModality] | OpenAINotGiven | None = NOT_GIVEN,
+        n: int | OpenAINotGiven | None = NOT_GIVEN,
         parallel_tool_calls: bool | OpenAINotGiven = NOT_GIVEN,
-        prediction: Optional[ChatCompletionPredictionContentParam] | OpenAINotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
-        reasoning_effort: Optional[ChatCompletionReasoningEffort] | OpenAINotGiven = NOT_GIVEN,
+        prediction: ChatCompletionPredictionContentParam | OpenAINotGiven | None = NOT_GIVEN,
+        presence_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
+        reasoning_effort: ChatCompletionReasoningEffort | OpenAINotGiven | None = NOT_GIVEN,
         thinking: ThinkingConfigParam | None | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | OpenAINotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default"]] | OpenAINotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str]] | OpenAINotGiven = NOT_GIVEN,
-        store: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
-        top_logprobs: Optional[int] | OpenAINotGiven = NOT_GIVEN,
+        seed: int | OpenAINotGiven | None = NOT_GIVEN,
+        service_tier: Literal["auto", "default"] | OpenAINotGiven | None = NOT_GIVEN,
+        stop: str | list[str] | OpenAINotGiven | None = NOT_GIVEN,
+        store: bool | OpenAINotGiven | None = NOT_GIVEN,
+        top_logprobs: int | OpenAINotGiven | None = NOT_GIVEN,
         user: str | OpenAINotGiven = NOT_GIVEN,
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
@@ -711,31 +700,31 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
         model: str | None = None,
         stream: Literal[True],
         temperature: float | None | NotGiven = NOT_GIVEN,
-        max_tokens: int | None | NotGiven = NOT_GIVEN,
+        max_tokens: int | None | NotGiven = None,
         tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: ResponseFormat | NotGiven = NOT_GIVEN,
-        stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
+        stream_options: ChatCompletionStreamOptionsParam | None = None,
         top_p: float | NotGiven | None = NOT_GIVEN,
         skip_cutoff: bool = False,
-        audio: Optional[ChatCompletionAudioParam] | OpenAINotGiven = NOT_GIVEN,
-        frequency_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | OpenAINotGiven = NOT_GIVEN,
-        logprobs: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | OpenAINotGiven = NOT_GIVEN,
-        metadata: Optional[Metadata] | OpenAINotGiven = NOT_GIVEN,
-        modalities: Optional[List[ChatCompletionModality]] | OpenAINotGiven = NOT_GIVEN,
-        n: Optional[int] | OpenAINotGiven = NOT_GIVEN,
+        audio: ChatCompletionAudioParam | OpenAINotGiven | None = NOT_GIVEN,
+        frequency_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
+        logit_bias: dict[str, int] | OpenAINotGiven | None = NOT_GIVEN,
+        logprobs: bool | OpenAINotGiven | None = NOT_GIVEN,
+        max_completion_tokens: int | OpenAINotGiven | None = NOT_GIVEN,
+        metadata: Metadata | OpenAINotGiven | None = NOT_GIVEN,
+        modalities: list[ChatCompletionModality] | OpenAINotGiven | None = NOT_GIVEN,
+        n: int | OpenAINotGiven | None = NOT_GIVEN,
         parallel_tool_calls: bool | OpenAINotGiven = NOT_GIVEN,
-        prediction: Optional[ChatCompletionPredictionContentParam] | OpenAINotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
-        reasoning_effort: Optional[ChatCompletionReasoningEffort] | OpenAINotGiven = NOT_GIVEN,
+        prediction: ChatCompletionPredictionContentParam | OpenAINotGiven | None = NOT_GIVEN,
+        presence_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
+        reasoning_effort: ChatCompletionReasoningEffort | OpenAINotGiven | None = NOT_GIVEN,
         thinking: ThinkingConfigParam | None | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | OpenAINotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default"]] | OpenAINotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str]] | OpenAINotGiven = NOT_GIVEN,
-        store: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
-        top_logprobs: Optional[int] | OpenAINotGiven = NOT_GIVEN,
+        seed: int | OpenAINotGiven | None = NOT_GIVEN,
+        service_tier: Literal["auto", "default"] | OpenAINotGiven | None = NOT_GIVEN,
+        stop: str | list[str] | OpenAINotGiven | None = NOT_GIVEN,
+        store: bool | OpenAINotGiven | None = NOT_GIVEN,
+        top_logprobs: int | OpenAINotGiven | None = NOT_GIVEN,
         user: str | OpenAINotGiven = NOT_GIVEN,
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
@@ -752,31 +741,31 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
         model: str | None = None,
         stream: bool,
         temperature: float | None | NotGiven = NOT_GIVEN,
-        max_tokens: int | None | NotGiven = NOT_GIVEN,
+        max_tokens: int | None | NotGiven = None,
         tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: ResponseFormat | NotGiven = NOT_GIVEN,
-        stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
+        stream_options: ChatCompletionStreamOptionsParam | None = None,
         top_p: float | NotGiven | None = NOT_GIVEN,
         skip_cutoff: bool = False,
-        audio: Optional[ChatCompletionAudioParam] | OpenAINotGiven = NOT_GIVEN,
-        frequency_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | OpenAINotGiven = NOT_GIVEN,
-        logprobs: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | OpenAINotGiven = NOT_GIVEN,
-        metadata: Optional[Metadata] | OpenAINotGiven = NOT_GIVEN,
-        modalities: Optional[List[ChatCompletionModality]] | OpenAINotGiven = NOT_GIVEN,
-        n: Optional[int] | OpenAINotGiven = NOT_GIVEN,
+        audio: ChatCompletionAudioParam | OpenAINotGiven | None = NOT_GIVEN,
+        frequency_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
+        logit_bias: dict[str, int] | OpenAINotGiven | None = NOT_GIVEN,
+        logprobs: bool | OpenAINotGiven | None = NOT_GIVEN,
+        max_completion_tokens: int | OpenAINotGiven | None = NOT_GIVEN,
+        metadata: Metadata | OpenAINotGiven | None = NOT_GIVEN,
+        modalities: list[ChatCompletionModality] | OpenAINotGiven | None = NOT_GIVEN,
+        n: int | OpenAINotGiven | None = NOT_GIVEN,
         parallel_tool_calls: bool | OpenAINotGiven = NOT_GIVEN,
-        prediction: Optional[ChatCompletionPredictionContentParam] | OpenAINotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
-        reasoning_effort: Optional[ChatCompletionReasoningEffort] | OpenAINotGiven = NOT_GIVEN,
+        prediction: ChatCompletionPredictionContentParam | OpenAINotGiven | None = NOT_GIVEN,
+        presence_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
+        reasoning_effort: ChatCompletionReasoningEffort | OpenAINotGiven | None = NOT_GIVEN,
         thinking: ThinkingConfigParam | None | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | OpenAINotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default"]] | OpenAINotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str]] | OpenAINotGiven = NOT_GIVEN,
-        store: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
-        top_logprobs: Optional[int] | OpenAINotGiven = NOT_GIVEN,
+        seed: int | OpenAINotGiven | None = NOT_GIVEN,
+        service_tier: Literal["auto", "default"] | OpenAINotGiven | None = NOT_GIVEN,
+        stop: str | list[str] | OpenAINotGiven | None = NOT_GIVEN,
+        store: bool | OpenAINotGiven | None = NOT_GIVEN,
+        top_logprobs: int | OpenAINotGiven | None = NOT_GIVEN,
         user: str | OpenAINotGiven = NOT_GIVEN,
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
@@ -792,37 +781,37 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
         model: str | None = None,
         stream: Literal[False] | Literal[True] = False,
         temperature: float | None | NotGiven = NOT_GIVEN,
-        max_tokens: int | None | NotGiven = NOT_GIVEN,
+        max_tokens: int | None | NotGiven = None,
         tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: ResponseFormat | NotGiven = NOT_GIVEN,
-        stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
+        stream_options: ChatCompletionStreamOptionsParam | None = None,
         top_p: float | NotGiven | None = NOT_GIVEN,
         skip_cutoff: bool = False,
-        audio: Optional[ChatCompletionAudioParam] | OpenAINotGiven = NOT_GIVEN,
-        frequency_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | OpenAINotGiven = NOT_GIVEN,
-        logprobs: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | OpenAINotGiven = NOT_GIVEN,
-        metadata: Optional[Metadata] | OpenAINotGiven = NOT_GIVEN,
-        modalities: Optional[List[ChatCompletionModality]] | OpenAINotGiven = NOT_GIVEN,
-        n: Optional[int] | OpenAINotGiven = NOT_GIVEN,
+        audio: ChatCompletionAudioParam | OpenAINotGiven | None = NOT_GIVEN,
+        frequency_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
+        logit_bias: dict[str, int] | OpenAINotGiven | None = NOT_GIVEN,
+        logprobs: bool | OpenAINotGiven | None = NOT_GIVEN,
+        max_completion_tokens: int | OpenAINotGiven | None = NOT_GIVEN,
+        metadata: Metadata | OpenAINotGiven | None = NOT_GIVEN,
+        modalities: list[ChatCompletionModality] | OpenAINotGiven | None = NOT_GIVEN,
+        n: int | OpenAINotGiven | None = NOT_GIVEN,
         parallel_tool_calls: bool | OpenAINotGiven = NOT_GIVEN,
-        prediction: Optional[ChatCompletionPredictionContentParam] | OpenAINotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | OpenAINotGiven = NOT_GIVEN,
-        reasoning_effort: Optional[ChatCompletionReasoningEffort] | OpenAINotGiven = NOT_GIVEN,
+        prediction: ChatCompletionPredictionContentParam | OpenAINotGiven | None = NOT_GIVEN,
+        presence_penalty: float | OpenAINotGiven | None = NOT_GIVEN,
+        reasoning_effort: ChatCompletionReasoningEffort | OpenAINotGiven | None = NOT_GIVEN,
         thinking: ThinkingConfigParam | None | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | OpenAINotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default"]] | OpenAINotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str]] | OpenAINotGiven = NOT_GIVEN,
-        store: Optional[bool] | OpenAINotGiven = NOT_GIVEN,
-        top_logprobs: Optional[int] | OpenAINotGiven = NOT_GIVEN,
+        seed: int | OpenAINotGiven | None = NOT_GIVEN,
+        service_tier: Literal["auto", "default"] | OpenAINotGiven | None = NOT_GIVEN,
+        stop: str | list[str] | OpenAINotGiven | None = NOT_GIVEN,
+        store: bool | OpenAINotGiven | None = NOT_GIVEN,
+        top_logprobs: int | OpenAINotGiven | None = NOT_GIVEN,
         user: str | OpenAINotGiven = NOT_GIVEN,
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | OpenAINotGiven = NOT_GIVEN,
-    ):
+    ) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, Any]:
         if model is not None:
             self.model = model
         if stream is not None:
@@ -865,7 +854,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
                                 },
                             }
                         )
-                tools_params = dict(tools=_tools, tool_choice=tool_choice)
+                tools_params = {"tools": _tools, "tool_choice": tool_choice}
             else:
                 tools_str = json.dumps(tools, ensure_ascii=False, indent=None)
                 additional_system_prompt = generate_tool_use_system_prompt(tools=tools_str)

vectorvein/chat_clients/utils.py CHANGED Viewed

@@ -5,7 +5,8 @@ import json
 import uuid
 import warnings
 from math import ceil
-from typing import Iterable, cast
+from collections.abc import Iterable
+from typing import cast
 import httpx
 import tiktoken
@@ -105,6 +106,24 @@ def convert_type(value, value_type):
         return value  # 如果类型未知，返回原始值
+def _get_first_enabled_endpoint(backend_setting, settings):
+    """Get the first enabled endpoint from backend settings"""
+    for endpoint_choice in backend_setting.endpoints:
+        if isinstance(endpoint_choice, dict):
+            endpoint_id = endpoint_choice["endpoint_id"]
+        else:
+            endpoint_id = endpoint_choice
+        try:
+            endpoint = settings.get_endpoint(endpoint_id)
+            if endpoint.enabled:
+                return endpoint
+        except ValueError:
+            # Endpoint not found, skip it
+            continue
+    return None
 def get_token_counts(text: str | dict, model: str = "", use_token_server_first: bool = True) -> int:
     if use_token_server_first and settings.token_server is not None:
         base_url = settings.token_server.url if settings.token_server.url is not None else f"http://{settings.token_server.host}:{settings.token_server.port}"
@@ -126,10 +145,9 @@ def get_token_counts(text: str | dict, model: str = "", use_token_server_first:
         backend_setting = settings.get_backend(BackendType.MiniMax).models[model]
         if len(backend_setting.endpoints) == 0:
             return int(len(text) / 1.33)
-        endpoint_id = backend_setting.endpoints[0]
-        if isinstance(endpoint_id, dict):
-            endpoint_id = endpoint_id["endpoint_id"]
-        endpoint = settings.get_endpoint(endpoint_id)
+        endpoint = _get_first_enabled_endpoint(backend_setting, settings)
+        if endpoint is None:
+            return int(len(text) / 1.33)
         tokenize_url = "https://api.minimax.chat/v1/tokenize"
         headers = {"Authorization": f"Bearer {endpoint.api_key}", "Content-Type": "application/json"}
         request_body = {
@@ -150,10 +168,9 @@ def get_token_counts(text: str | dict, model: str = "", use_token_server_first:
         backend_setting = settings.get_backend(BackendType.Moonshot).models[model]
         if len(backend_setting.endpoints) == 0:
             return len(get_gpt_35_encoding().encode(text))
-        endpoint_id = backend_setting.endpoints[0]
-        if isinstance(endpoint_id, dict):
-            endpoint_id = endpoint_id["endpoint_id"]
-        endpoint = settings.get_endpoint(endpoint_id)
+        endpoint = _get_first_enabled_endpoint(backend_setting, settings)
+        if endpoint is None:
+            return len(get_gpt_35_encoding().encode(text))
         tokenize_url = f"{endpoint.api_base}/tokenizers/estimate-token-count"
         headers = {"Content-Type": "application/json", "Authorization": f"Bearer {endpoint.api_key}"}
         request_body = {
@@ -171,10 +188,9 @@ def get_token_counts(text: str | dict, model: str = "", use_token_server_first:
         backend_setting = settings.get_backend(BackendType.Gemini).models[model]
         if len(backend_setting.endpoints) == 0:
             return len(get_gpt_35_encoding().encode(text))
-        endpoint_id = backend_setting.endpoints[0]
-        if isinstance(endpoint_id, dict):
-            endpoint_id = endpoint_id["endpoint_id"]
-        endpoint = settings.get_endpoint(endpoint_id)
+        endpoint = _get_first_enabled_endpoint(backend_setting, settings)
+        if endpoint is None:
+            return len(get_gpt_35_encoding().encode(text))
         api_base = endpoint.api_base.removesuffix("/openai/") if endpoint.api_base else "https://generativelanguage.googleapis.com/v1beta"
         base_url = f"{api_base}/models/{backend_setting.id}:countTokens"
@@ -200,7 +216,13 @@ def get_token_counts(text: str | dict, model: str = "", use_token_server_first:
                     endpoint_id = endpoint_choice["endpoint_id"]
                 else:
                     endpoint_id = endpoint_choice
-                endpoint = settings.get_endpoint(endpoint_id)
+                try:
+                    endpoint = settings.get_endpoint(endpoint_id)
+                    if not endpoint.enabled:
+                        continue
+                except ValueError:
+                    continue
                 if endpoint.is_vertex or endpoint.is_bedrock or endpoint.endpoint_type == "anthropic_vertex" or endpoint.endpoint_type == "anthropic_bedrock":
                     continue
@@ -214,10 +236,10 @@ def get_token_counts(text: str | dict, model: str = "", use_token_server_first:
                         .input_tokens
                     )
         except Exception as e:
-            warnings.warn(f"Anthropic token counting failed: {e}")
+            warnings.warn(f"Anthropic token counting failed: {e}", stacklevel=2)
         # TODO: Use anthropic token counting
-        warnings.warn("Anthropic token counting is not implemented in Vertex or Bedrock yet")
+        warnings.warn("Anthropic token counting is not implemented in Vertex or Bedrock yet", stacklevel=2)
         return len(get_gpt_4o_encoding().encode(text))
     elif model.startswith("deepseek"):
         from deepseek_tokenizer import deepseek_tokenizer
@@ -232,10 +254,9 @@ def get_token_counts(text: str | dict, model: str = "", use_token_server_first:
         backend_setting = settings.get_backend(BackendType.StepFun).models[model]
         if len(backend_setting.endpoints) == 0:
             return len(get_gpt_35_encoding().encode(text))
-        endpoint_id = backend_setting.endpoints[0]
-        if isinstance(endpoint_id, dict):
-            endpoint_id = endpoint_id["endpoint_id"]
-        endpoint = settings.get_endpoint(endpoint_id)
+        endpoint = _get_first_enabled_endpoint(backend_setting, settings)
+        if endpoint is None:
+            return len(get_gpt_35_encoding().encode(text))
         tokenize_url = f"{endpoint.api_base}/token/count"
         headers = {"Content-Type": "application/json", "Authorization": f"Bearer {endpoint.api_key}"}
         request_body = {
@@ -253,10 +274,9 @@ def get_token_counts(text: str | dict, model: str = "", use_token_server_first:
         backend_setting = settings.get_backend(BackendType.ZhiPuAI).models[model]
         if len(backend_setting.endpoints) == 0:
             return len(get_gpt_35_encoding().encode(text))
-        endpoint_id = backend_setting.endpoints[0]
-        if isinstance(endpoint_id, dict):
-            endpoint_id = endpoint_id["endpoint_id"]
-        endpoint = settings.get_endpoint(endpoint_id)
+        endpoint = _get_first_enabled_endpoint(backend_setting, settings)
+        if endpoint is None:
+            return len(get_gpt_35_encoding().encode(text))
         if model not in ("glm-4-plus", "glm-4-long", "glm-4-0520", "glm-4-air", "glm-4-flash"):
             model = "glm-4-plus"
         tokenize_url = f"{endpoint.api_base or 'https://open.bigmodel.cn/api/paas/v4'}/tokenizer"

vectorvein/server/token_server.py CHANGED Viewed

@@ -19,7 +19,7 @@ async def count_tokens(request: TokenCountRequest):
         token_count = get_token_counts(request.text, request.model, use_token_server_first=False)
         return {"total_tokens": token_count}
     except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
+        raise HTTPException(status_code=500, detail=str(e)) from None
 def run_token_server(host: str | None = None, port: int | None = None):

vectorvein 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl

vectorvein 0.3.1py3-none-any.whl → 0.3.3py3-none-any.whl