PyPI - vectorvein - Versions diffs - 0.1.6__py3-none-any.whl → 0.1.8__py3-none-any.whl - Mend

vectorvein 0.1.6py3-none-any.whl → 0.1.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

vectorvein/chat_clients/anthropic_client.py +30 -5
vectorvein/chat_clients/base_client.py +4 -4
vectorvein/chat_clients/gemini_client.py +22 -4
vectorvein/chat_clients/minimax_client.py +29 -5
vectorvein/chat_clients/openai_compatible_client.py +33 -4
vectorvein/chat_clients/utils.py +86 -2
vectorvein/settings/__init__.py +26 -28
vectorvein/types/defaults.py +48 -3
vectorvein/utilities/retry.py +36 -0
{vectorvein-0.1.6.dist-info → vectorvein-0.1.8.dist-info}/METADATA +3 -1
{vectorvein-0.1.6.dist-info → vectorvein-0.1.8.dist-info}/RECORD +12 -11
{vectorvein-0.1.6.dist-info → vectorvein-0.1.8.dist-info}/WHEEL +0 -0

vectorvein/chat_clients/anthropic_client.py CHANGED Viewed

@@ -19,8 +19,8 @@ from google.auth.transport.requests import Request
 from google.auth import _helpers
 from ..settings import settings
-from .utils import cutoff_messages
 from ..types import defaults as defs
+from .utils import cutoff_messages, get_token_counts
 from .base_client import BaseChatClient, BaseAsyncChatClient
 from ..types.enums import ContextLengthControlType, BackendType
 from ..types.llm_parameters import ChatCompletionMessage, ChatCompletionDeltaMessage
@@ -115,7 +115,7 @@ class AnthropicChatClient(BaseChatClient):
         model: str | None = None,
         stream: bool | None = None,
         temperature: float | None = None,
-        max_tokens: int = 2000,
+        max_tokens: int | None = None,
         tools: list | NotGiven = NOT_GIVEN,
         tool_choice: str | NotGiven = NOT_GIVEN,
     ):
@@ -189,6 +189,18 @@ class AnthropicChatClient(BaseChatClient):
                 base_url=self.endpoint.api_base,
             )
+        tools_params = refactor_tool_use_params(tools) if tools else tools
+        if max_tokens is None:
+            max_output_tokens = self.model_setting.max_output_tokens
+            if max_output_tokens is not None:
+                token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
+                max_tokens = self.model_setting.context_length - token_counts
+                max_tokens = min(max(max_tokens, 1), max_output_tokens)
+            else:
+                token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
+                max_tokens = self.model_setting.context_length - token_counts
         response = self._client.messages.create(
             model=self.model_setting.id,
             messages=messages,
@@ -196,7 +208,7 @@ class AnthropicChatClient(BaseChatClient):
             stream=self.stream,
             temperature=self.temperature,
             max_tokens=max_tokens,
-            tools=refactor_tool_use_params(tools) if tools else tools,
+            tools=tools_params,
             tool_choice=tool_choice,
         )
@@ -303,7 +315,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
         model: str | None = None,
         stream: bool | None = None,
         temperature: float | None = None,
-        max_tokens: int = 2000,
+        max_tokens: int | None = None,
         tools: list | NotGiven = NOT_GIVEN,
         tool_choice: str | NotGiven = NOT_GIVEN,
     ):
@@ -376,6 +388,19 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
                 api_key=self.endpoint.api_key,
                 base_url=self.endpoint.api_base,
             )
+        tools_params = refactor_tool_use_params(tools) if tools else tools
+        if max_tokens is None:
+            max_output_tokens = self.model_setting.max_output_tokens
+            if max_output_tokens is not None:
+                token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
+                max_tokens = self.model_setting.context_length - token_counts
+                max_tokens = min(max(max_tokens, 1), max_output_tokens)
+            else:
+                token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
+                max_tokens = self.model_setting.context_length - token_counts
         response = await self._client.messages.create(
             model=self.model_setting.id,
             messages=messages,
@@ -383,7 +408,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
             stream=self.stream,
             temperature=self.temperature,
             max_tokens=max_tokens,
-            tools=refactor_tool_use_params(tools) if tools else tools,
+            tools=tools_params,
             tool_choice=tool_choice,
         )

vectorvein/chat_clients/base_client.py CHANGED Viewed

@@ -46,7 +46,7 @@ class BaseChatClient(ABC):
         model: str | None = None,
         stream: bool = False,
         temperature: float = 0.7,
-        max_tokens: int = 2000,
+        max_tokens: int | None = None,
         tools: list | NotGiven = NOT_GIVEN,
         tool_choice: str | NotGiven = NOT_GIVEN,
     ) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
@@ -57,7 +57,7 @@ class BaseChatClient(ABC):
         messages: list,
         model: str | None = None,
         temperature: float = 0.7,
-        max_tokens: int = 2000,
+        max_tokens: int | None = None,
         tools: list | NotGiven = NOT_GIVEN,
         tool_choice: str | NotGiven = NOT_GIVEN,
     ) -> Generator[ChatCompletionDeltaMessage, Any, None]:
@@ -107,7 +107,7 @@ class BaseAsyncChatClient(ABC):
         model: str | None = None,
         stream: bool = False,
         temperature: float = 0.7,
-        max_tokens: int = 2000,
+        max_tokens: int | None = None,
         tools: list | NotGiven = NOT_GIVEN,
         tool_choice: str | NotGiven = NOT_GIVEN,
     ) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, None]:
@@ -118,7 +118,7 @@ class BaseAsyncChatClient(ABC):
         messages: list,
         model: str | None = None,
         temperature: float = 0.7,
-        max_tokens: int = 2000,
+        max_tokens: int | None = None,
         tools: list | NotGiven = NOT_GIVEN,
         tool_choice: str | NotGiven = NOT_GIVEN,
     ) -> AsyncGenerator[ChatCompletionDeltaMessage, None]:

vectorvein/chat_clients/gemini_client.py CHANGED Viewed

@@ -43,7 +43,7 @@ class GeminiChatClient(BaseChatClient):
         model: str | None = None,
         stream: bool | None = None,
         temperature: float | None = None,
-        max_tokens: int = 2000,
+        max_tokens: int | None = None,
         tools: list | None = None,
         tool_choice: str | None = None,
     ):
@@ -156,7 +156,16 @@ class GeminiChatClient(BaseChatClient):
                 if "text" in part:
                     result["content"] += part["text"]
                 elif "functionCall" in part:
-                    tool_calls.append(part["functionCall"])
+                    tool_call = {
+                        "index": 0,
+                        "id": "call_0",
+                        "function": {
+                            "arguments": json.dumps(part["functionCall"]["args"], ensure_ascii=False),
+                            "name": part["functionCall"]["name"],
+                        },
+                        "type": "function",
+                    }
+                    tool_calls.append(tool_call)
             if tool_calls:
                 result["tool_calls"] = tool_calls
@@ -194,7 +203,7 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
         model: str | None = None,
         stream: bool | None = None,
         temperature: float | None = None,
-        max_tokens: int = 2000,
+        max_tokens: int | None = None,
         tools: list | None = None,
         tool_choice: str | None = None,
     ):
@@ -310,7 +319,16 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
                     if "text" in part:
                         result["content"] += part["text"]
                     elif "functionCall" in part:
-                        tool_calls.append(part["functionCall"])
+                        tool_call = {
+                            "index": 0,
+                            "id": "call_0",
+                            "function": {
+                                "arguments": json.dumps(part["functionCall"]["args"], ensure_ascii=False),
+                                "name": part["functionCall"]["name"],
+                            },
+                            "type": "function",
+                        }
+                        tool_calls.append(tool_call)
                 if tool_calls:
                     result["tool_calls"] = tool_calls

vectorvein/chat_clients/minimax_client.py CHANGED Viewed

@@ -7,8 +7,8 @@ import httpx
 from openai._types import NotGiven
 from ..settings import settings
-from .utils import cutoff_messages
 from ..types import defaults as defs
+from .utils import cutoff_messages, get_token_counts
 from .base_client import BaseChatClient, BaseAsyncChatClient
 from ..types.enums import ContextLengthControlType, BackendType
 from ..types.llm_parameters import ChatCompletionMessage, ChatCompletionDeltaMessage
@@ -66,7 +66,7 @@ class MiniMaxChatClient(BaseChatClient):
         model: str | None = None,
         stream: bool | None = None,
         temperature: float | None = None,
-        max_tokens: int = 2048,
+        max_tokens: int | None = None,
         tools: list | None = None,
         tool_choice: str = "auto",
     ):
@@ -113,6 +113,16 @@ class MiniMaxChatClient(BaseChatClient):
         else:
             tools_params = {}
+        if max_tokens is None:
+            max_output_tokens = self.model_setting.max_output_tokens
+            if max_output_tokens is not None:
+                token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
+                max_tokens = self.model_setting.context_length - token_counts
+                max_tokens = min(max(max_tokens, 1), max_output_tokens)
+            else:
+                token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
+                max_tokens = self.model_setting.context_length - token_counts
         self.url = self.endpoint.api_base
         self.headers = {"Authorization": f"Bearer {self.endpoint.api_key}", "Content-Type": "application/json"}
@@ -145,9 +155,11 @@ class MiniMaxChatClient(BaseChatClient):
                             if "usage" not in chunk_data:
                                 continue
                             else:
+                                if chunk_data["object"] != "chat.completion.chunk":
+                                    continue
                                 yield ChatCompletionDeltaMessage(
                                     **{
-                                        "content": chunk_data["choices"][0]["message"].get("content"),
+                                        "content": chunk_data["choices"][0]["delta"].get("content"),
                                         "role": "assistant",
                                         **tool_calls_params,
                                     }
@@ -211,7 +223,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
         model: str | None = None,
         stream: bool | None = None,
         temperature: float | None = None,
-        max_tokens: int = 2048,
+        max_tokens: int | None = None,
         tools: list | None = None,
         tool_choice: str = "auto",
     ):
@@ -256,6 +268,16 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
         else:
             tools_params = {}
+        if max_tokens is None:
+            max_output_tokens = self.model_setting.max_output_tokens
+            if max_output_tokens is not None:
+                token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
+                max_tokens = self.model_setting.context_length - token_counts
+                max_tokens = min(max(max_tokens, 1), max_output_tokens)
+            else:
+                token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
+                max_tokens = self.model_setting.context_length - token_counts
         self.url = self.endpoint.api_base
         self.headers = {"Authorization": f"Bearer {self.endpoint.api_key}", "Content-Type": "application/json"}
@@ -289,9 +311,11 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
                                 if "usage" not in chunk_data:
                                     continue
                                 else:
+                                    if chunk_data["object"] != "chat.completion.chunk":
+                                        continue
                                     yield ChatCompletionDeltaMessage(
                                         **{
-                                            "content": chunk_data["choices"][0]["message"].get("content"),
+                                            "content": chunk_data["choices"][0]["delta"].get("content"),
                                             "role": "assistant",
                                             **tool_calls_params,
                                         }

vectorvein/chat_clients/openai_compatible_client.py CHANGED Viewed

@@ -11,6 +11,7 @@ from openai import OpenAI, AsyncOpenAI, AzureOpenAI, AsyncAzureOpenAI
 from .base_client import BaseChatClient, BaseAsyncChatClient
 from .utils import (
     cutoff_messages,
+    get_token_counts,
     ToolCallContentProcessor,
     generate_tool_use_system_prompt,
 )
@@ -50,7 +51,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
         model: str | None = None,
         stream: bool | None = None,
         temperature: float | None = None,
-        max_tokens: int = 2000,
+        max_tokens: int | None = None,
         tools: list | NotGiven = NOT_GIVEN,
         tool_choice: str | NotGiven = NOT_GIVEN,
     ):
@@ -102,6 +103,16 @@ class OpenAICompatibleChatClient(BaseChatClient):
         else:
             tools_params = {}
+        if max_tokens is None:
+            max_output_tokens = self.model_setting.max_output_tokens
+            if max_output_tokens is not None:
+                token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
+                max_tokens = self.model_setting.context_length - token_counts
+                max_tokens = min(max(max_tokens, 1), max_output_tokens)
+            else:
+                token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
+                max_tokens = self.model_setting.context_length - token_counts
         response: ChatCompletion | Stream[ChatCompletionChunk] = self._client.chat.completions.create(
             model=self.model_setting.id,
             messages=messages,
@@ -122,6 +133,9 @@ class OpenAICompatibleChatClient(BaseChatClient):
                     if not chunk.choices[0].delta:
                         continue
                     if self.model_setting.function_call_available:
+                        if chunk.choices[0].delta.tool_calls:
+                            for index, tool_call in enumerate(chunk.choices[0].delta.tool_calls):
+                                tool_call.index = index
                         yield ChatCompletionDeltaMessage(**chunk.choices[0].delta.model_dump())
                     else:
                         message = chunk.choices[0].delta.model_dump()
@@ -147,7 +161,8 @@ class OpenAICompatibleChatClient(BaseChatClient):
             if tools:
                 if self.model_setting.function_call_available and response.choices[0].message.tool_calls:
                     result["tool_calls"] = [
-                        tool_call.model_dump() for tool_call in response.choices[0].message.tool_calls
+                        {**tool_call.model_dump(), "type": "function"}
+                        for tool_call in response.choices[0].message.tool_calls
                     ]
                 else:
                     tool_call_content_processor = ToolCallContentProcessor(result["content"])
@@ -189,7 +204,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
         model: str | None = None,
         stream: bool | None = None,
         temperature: float | None = None,
-        max_tokens: int = 2000,
+        max_tokens: int | None = None,
         tools: list | NotGiven = NOT_GIVEN,
         tool_choice: str | NotGiven = NOT_GIVEN,
     ):
@@ -241,6 +256,16 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
         else:
             tools_params = {}
+        if max_tokens is None:
+            max_output_tokens = self.model_setting.max_output_tokens
+            if max_output_tokens is not None:
+                token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
+                max_tokens = self.model_setting.context_length - token_counts
+                max_tokens = min(max(max_tokens, 1), max_output_tokens)
+            else:
+                token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
+                max_tokens = self.model_setting.context_length - token_counts
         response: ChatCompletion | AsyncStream[ChatCompletionChunk] = await self._client.chat.completions.create(
             model=self.model_setting.id,
             messages=messages,
@@ -261,6 +286,9 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
                     if not chunk.choices[0].delta:
                         continue
                     if self.model_setting.function_call_available:
+                        if chunk.choices[0].delta.tool_calls:
+                            for index, tool_call in enumerate(chunk.choices[0].delta.tool_calls):
+                                tool_call.index = index
                         yield ChatCompletionDeltaMessage(**chunk.choices[0].delta.model_dump())
                     else:
                         message = chunk.choices[0].delta.model_dump()
@@ -286,7 +314,8 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
             if tools:
                 if self.model_setting.function_call_available and response.choices[0].message.tool_calls:
                     result["tool_calls"] = [
-                        tool_call.model_dump() for tool_call in response.choices[0].message.tool_calls
+                        {**tool_call.model_dump(), "type": "function"}
+                        for tool_call in response.choices[0].message.tool_calls
                     ]
                 else:
                     tool_call_content_processor = ToolCallContentProcessor(result["content"])

vectorvein/chat_clients/utils.py CHANGED Viewed

@@ -3,8 +3,14 @@
 import re
 import json
+import httpx
 import tiktoken
+from anthropic import Anthropic
+from qwen_tokenizer import qwen_tokenizer
+from deepseek_tokenizer import deepseek_tokenizer
+from ..settings import settings
+from ..utilities.retry import Retry
 from ..types.enums import BackendType
 from ..utilities.media_processing import ImageProcessor
@@ -95,10 +101,88 @@ def get_token_counts(text: str | dict, model: str = "") -> int:
         text = str(text)
     if model == "gpt-3.5-turbo":
         return len(chatgpt_encoding.encode(text))
-    elif model == "gpt-4o":
+    elif model in ("gpt-4o", "gpt-4o-mini"):
         return len(gpt_4o_encoding.encode(text))
     elif model.startswith("abab"):
-        return int(len(text) / 1.33)
+        model_setting = settings.minimax.models[model]
+        if len(model_setting.endpoints) == 0:
+            return int(len(text) / 1.33)
+        endpoint_id = model_setting.endpoints[0]
+        endpoint = settings.get_endpoint(endpoint_id)
+        tokenize_url = "https://api.minimax.chat/v1/tokenize"
+        headers = {"Authorization": f"Bearer {endpoint.api_key}", "Content-Type": "application/json"}
+        request_body = {
+            "model": model,
+            "tokens_to_generate": 128,
+            "temperature": 0.2,
+            "messages": [
+                {"sender_type": "USER", "text": text},
+            ],
+        }
+        _, response = (
+            Retry(httpx.post)
+            .args(url=tokenize_url, headers=headers, json=request_body, timeout=None)
+            .retry_times(5)
+            .sleep_time(10)
+            .run()
+        )
+        response = response.json()
+        return response["segments_num"]
+    elif model in ("moonshot-v1-8k", "moonshot-v1-32k", "moonshot-v1-128k"):
+        model_setting = settings.moonshot.models[model]
+        if len(model_setting.endpoints) == 0:
+            return len(chatgpt_encoding.encode(text))
+        endpoint_id = model_setting.endpoints[0]
+        endpoint = settings.get_endpoint(endpoint_id)
+        tokenize_url = "https://api.moonshot.cn/v1/tokenizers/estimate-token-count"
+        headers = {"Content-Type": "application/json", "Authorization": f"Bearer {endpoint.api_key}"}
+        request_body = {
+            "model": model,
+            "messages": [
+                {"role": "user", "content": text},
+            ],
+        }
+        _, response = (
+            Retry(httpx.post)
+            .args(url=tokenize_url, headers=headers, json=request_body, timeout=None)
+            .retry_times(5)
+            .sleep_time(10)
+            .run()
+        )
+        response = response.json()
+        return response["data"]["total_tokens"]
+    elif model.startswith("gemini"):
+        model_setting = settings.gemini.models[model]
+        if len(model_setting.endpoints) == 0:
+            return len(chatgpt_encoding.encode(text))
+        endpoint_id = model_setting.endpoints[0]
+        endpoint = settings.get_endpoint(endpoint_id)
+        url = f"{endpoint.api_base}/models/{model_setting.id}:countTokens"
+        params = {"key": endpoint.api_key}
+        request_body = {
+            "contents": {
+                "role": "USER",
+                "parts": [
+                    {"text": "TEXT"},
+                ],
+            },
+        }
+        _, response = (
+            Retry(httpx.post)
+            .args(url, json=request_body, params=params, timeout=None)
+            .retry_times(5)
+            .sleep_time(10)
+            .run()
+        )
+        result = response.json()
+        return result["totalTokens"]
+    elif model.startswith("claude"):
+        return Anthropic().count_tokens(text)
+    elif model.startswith("deepseek"):
+        return len(deepseek_tokenizer.encode(text))
+    elif model.startswith("qwen"):
+        return len(qwen_tokenizer.encode(text))
     else:
         return len(chatgpt_encoding.encode(text))

vectorvein/settings/__init__.py CHANGED Viewed

@@ -14,41 +14,39 @@ class Settings(BaseModel):
         default_factory=list, description="Available endpoints for the LLM service."
     )
-    anthropic_models: BackendSettings = Field(
-        default_factory=BackendSettings, description="Anthropic models settings."
-    )
-    deepseek_models: BackendSettings = Field(default_factory=BackendSettings, description="Deepseek models settings.")
-    gemini_models: BackendSettings = Field(default_factory=BackendSettings, description="Gemini models settings.")
-    groq_models: BackendSettings = Field(default_factory=BackendSettings, description="Groq models settings.")
-    local_models: BackendSettings = Field(default_factory=BackendSettings, description="Local models settings.")
-    minimax_models: BackendSettings = Field(default_factory=BackendSettings, description="Minimax models settings.")
-    mistral_models: BackendSettings = Field(default_factory=BackendSettings, description="Mistral models settings.")
-    moonshot_models: BackendSettings = Field(default_factory=BackendSettings, description="Moonshot models settings.")
-    openai_models: BackendSettings = Field(default_factory=BackendSettings, description="OpenAI models settings.")
-    qwen_models: BackendSettings = Field(default_factory=BackendSettings, description="Qwen models settings.")
-    yi_models: BackendSettings = Field(default_factory=BackendSettings, description="Yi models settings.")
-    zhipuai_models: BackendSettings = Field(default_factory=BackendSettings, description="Zhipuai models settings.")
+    anthropic: BackendSettings = Field(default_factory=BackendSettings, description="Anthropic models settings.")
+    deepseek: BackendSettings = Field(default_factory=BackendSettings, description="Deepseek models settings.")
+    gemini: BackendSettings = Field(default_factory=BackendSettings, description="Gemini models settings.")
+    groq: BackendSettings = Field(default_factory=BackendSettings, description="Groq models settings.")
+    local: BackendSettings = Field(default_factory=BackendSettings, description="Local models settings.")
+    minimax: BackendSettings = Field(default_factory=BackendSettings, description="Minimax models settings.")
+    mistral: BackendSettings = Field(default_factory=BackendSettings, description="Mistral models settings.")
+    moonshot: BackendSettings = Field(default_factory=BackendSettings, description="Moonshot models settings.")
+    openai: BackendSettings = Field(default_factory=BackendSettings, description="OpenAI models settings.")
+    qwen: BackendSettings = Field(default_factory=BackendSettings, description="Qwen models settings.")
+    yi: BackendSettings = Field(default_factory=BackendSettings, description="Yi models settings.")
+    zhipuai: BackendSettings = Field(default_factory=BackendSettings, description="Zhipuai models settings.")
     def __init__(self, **data):
         model_types = {
-            "anthropic_models": defs.ANTHROPIC_MODELS,
-            "deepseek_models": defs.DEEPSEEK_MODELS,
-            "gemini_models": defs.GEMINI_MODELS,
-            "groq_models": defs.GROQ_MODELS,
-            "local_models": {},
-            "minimax_models": defs.MINIMAX_MODELS,
-            "mistral_models": defs.MISTRAL_MODELS,
-            "moonshot_models": defs.MOONSHOT_MODELS,
-            "openai_models": defs.OPENAI_MODELS,
-            "qwen_models": defs.QWEN_MODELS,
-            "yi_models": defs.YI_MODELS,
-            "zhipuai_models": defs.ZHIPUAI_MODELS,
+            "anthropic": defs.ANTHROPIC_MODELS,
+            "deepseek": defs.DEEPSEEK_MODELS,
+            "gemini": defs.GEMINI_MODELS,
+            "groq": defs.GROQ_MODELS,
+            "local": {},
+            "minimax": defs.MINIMAX_MODELS,
+            "mistral": defs.MISTRAL_MODELS,
+            "moonshot": defs.MOONSHOT_MODELS,
+            "openai": defs.OPENAI_MODELS,
+            "qwen": defs.QWEN_MODELS,
+            "yi": defs.YI_MODELS,
+            "zhipuai": defs.ZHIPUAI_MODELS,
         }
         for model_type, default_models in model_types.items():
             if model_type in data:
                 model_settings = BackendSettings()
-                model_settings.update_models(default_models, data[model_type])
+                model_settings.update_models(default_models, data[model_type].get("models", {}))
                 data[model_type] = model_settings
             else:
                 data[model_type] = BackendSettings(models=default_models)
@@ -65,7 +63,7 @@ class Settings(BaseModel):
         return EndpointSetting()
     def get_backend(self, backend: BackendType) -> BackendSettings:
-        return getattr(self, f"{backend.value.lower()}_models")
+        return getattr(self, backend.value.lower())
 settings = Settings()

vectorvein/types/defaults.py CHANGED Viewed

@@ -14,19 +14,19 @@ MODEL_CONTEXT_LENGTH = 32768
 MOONSHOT_MODELS = {
     "moonshot-v1-8k": {
         "id": "moonshot-v1-8k",
-        "context_length": 8000,
+        "context_length": 8192,
         "function_call_available": True,
         "response_format_available": True,
     },
     "moonshot-v1-32k": {
         "id": "moonshot-v1-32k",
-        "context_length": 32000,
+        "context_length": 32768,
         "function_call_available": True,
         "response_format_available": True,
     },
     "moonshot-v1-128k": {
         "id": "moonshot-v1-128k",
-        "context_length": 128000,
+        "context_length": 131072,
         "function_call_available": True,
         "response_format_available": True,
     },
@@ -38,12 +38,14 @@ DEEPSEEK_MODELS = {
     "deepseek-chat": {
         "id": "deepseek-chat",
         "context_length": 128000,
+        "max_output_tokens": 4096,
         "function_call_available": True,
         "response_format_available": True,
     },
     "deepseek-coder": {
         "id": "deepseek-chat",
         "context_length": 128000,
+        "max_output_tokens": 4096,
         "function_call_available": True,
         "response_format_available": True,
     },
@@ -85,48 +87,56 @@ QWEN_MODELS = {
     "qwen1.5-1.8b-chat": {
         "id": "qwen1.5-1.8b-chat",
         "context_length": 30000,
+        "max_output_tokens": 4096,
         "function_call_available": False,
         "response_format_available": True,
     },
     "qwen1.5-4b-chat": {
         "id": "qwen1.5-4b-chat",
         "context_length": 30000,
+        "max_output_tokens": 4096,
         "function_call_available": False,
         "response_format_available": True,
     },
     "qwen1.5-7b-chat": {
         "id": "qwen1.5-7b-chat",
         "context_length": 30000,
+        "max_output_tokens": 4096,
         "function_call_available": False,
         "response_format_available": True,
     },
     "qwen1.5-14b-chat": {
         "id": "qwen1.5-14b-chat",
         "context_length": 30000,
+        "max_output_tokens": 4096,
         "function_call_available": False,
         "response_format_available": True,
     },
     "qwen1.5-32b-chat": {
         "id": "qwen1.5-32b-chat",
         "context_length": 30000,
+        "max_output_tokens": 4096,
         "function_call_available": False,
         "response_format_available": True,
     },
     "qwen1.5-72b-chat": {
         "id": "qwen1.5-72b-chat",
         "context_length": 30000,
+        "max_output_tokens": 4096,
         "function_call_available": False,
         "response_format_available": True,
     },
     "qwen1.5-110b-chat": {
         "id": "qwen1.5-110b-chat",
         "context_length": 30000,
+        "max_output_tokens": 4096,
         "function_call_available": False,
         "response_format_available": True,
     },
     "qwen2-72b-instruct": {
         "id": "qwen2-72b-instruct",
         "context_length": 30000,
+        "max_output_tokens": 4096,
         "function_call_available": False,
         "response_format_available": True,
     },
@@ -138,42 +148,49 @@ YI_MODELS = {
     "yi-large": {
         "id": "yi-large",
         "context_length": 32000,
+        "max_output_tokens": 4096,
         "function_call_available": False,
         "response_format_available": False,
     },
     "yi-large-turbo": {
         "id": "yi-large-turbo",
         "context_length": 16000,
+        "max_output_tokens": 4096,
         "function_call_available": False,
         "response_format_available": False,
     },
     "yi-large-fc": {
         "id": "yi-large-fc",
         "context_length": 32000,
+        "max_output_tokens": 4096,
         "function_call_available": True,
         "response_format_available": False,
     },
     "yi-medium": {
         "id": "yi-medium",
         "context_length": 16000,
+        "max_output_tokens": 4096,
         "function_call_available": False,
         "response_format_available": False,
     },
     "yi-medium-200k": {
         "id": "yi-medium-200k",
         "context_length": 200000,
+        "max_output_tokens": 4096,
         "function_call_available": False,
         "response_format_available": False,
     },
     "yi-spark": {
         "id": "yi-spark",
         "context_length": 16000,
+        "max_output_tokens": 4096,
         "function_call_available": False,
         "response_format_available": False,
     },
     "yi-vision": {
         "id": "yi-vision",
         "context_length": 4000,
+        "max_output_tokens": 2000,
         "function_call_available": False,
         "response_format_available": False,
     },
@@ -187,42 +204,56 @@ ZHIPUAI_MODELS = {
         "context_length": 128000,
         "function_call_available": True,
         "response_format_available": False,
+        "max_output_tokens": 4095,
     },
     "glm-4": {
         "id": "glm-4",
         "context_length": 128000,
         "function_call_available": True,
         "response_format_available": False,
+        "max_output_tokens": 4095,
     },
     "glm-4-0520": {
         "id": "glm-4-0520",
         "context_length": 128000,
         "function_call_available": True,
         "response_format_available": False,
+        "max_output_tokens": 4095,
     },
     "glm-4-air": {
         "id": "glm-4-air",
         "context_length": 128000,
         "function_call_available": True,
         "response_format_available": False,
+        "max_output_tokens": 4095,
     },
     "glm-4-airx": {
         "id": "glm-4-airx",
         "context_length": 128000,
         "function_call_available": True,
         "response_format_available": False,
+        "max_output_tokens": 4095,
     },
     "glm-4-flash": {
         "id": "glm-4-flash",
         "context_length": 128000,
         "function_call_available": True,
         "response_format_available": False,
+        "max_output_tokens": 4095,
+    },
+    "glm-4-long": {
+        "id": "glm-4-long",
+        "context_length": 1000000,
+        "function_call_available": True,
+        "response_format_available": False,
+        "max_output_tokens": 4095,
     },
     "glm-4v": {
         "id": "glm-4v",
         "context_length": 2000,
         "function_call_available": False,
         "response_format_available": False,
+        "max_output_tokens": 1024,
     },
 }
@@ -287,34 +318,40 @@ OPENAI_MODELS = {
         "context_length": 16385,
         "function_call_available": True,
         "response_format_available": True,
+        "max_output_tokens": 4096,
     },
     "gpt-4-turbo": {
         "id": "gpt-4-turbo",
         "context_length": 128000,
+        "max_output_tokens": 4096,
         "function_call_available": True,
         "response_format_available": True,
     },
     "gpt-4": {
         "id": "gpt-4",
         "context_length": 8192,
+        "max_output_tokens": 4096,
         "function_call_available": True,
         "response_format_available": True,
     },
     "gpt-4o": {
         "id": "gpt-4o",
         "context_length": 128000,
+        "max_output_tokens": 4096,
         "function_call_available": True,
         "response_format_available": True,
     },
     "gpt-4o-mini": {
         "id": "gpt-4o-mini",
         "context_length": 128000,
+        "max_output_tokens": 16384,
         "function_call_available": True,
         "response_format_available": True,
     },
     "gpt-4v": {
         "id": "gpt-4v",
         "context_length": 128000,
+        "max_output_tokens": 4096,
         "function_call_available": True,
         "response_format_available": True,
     },
@@ -326,24 +363,28 @@ ANTHROPIC_MODELS = {
     "claude-3-opus-20240229": {
         "id": "claude-3-opus-20240229",
         "context_length": 200000,
+        "max_output_tokens": 4096,
         "function_call_available": True,
         "response_format_available": True,
     },
     "claude-3-sonnet-20240229": {
         "id": "claude-3-sonnet-20240229",
         "context_length": 200000,
+        "max_output_tokens": 4096,
         "function_call_available": True,
         "response_format_available": True,
     },
     "claude-3-haiku-20240307": {
         "id": "claude-3-haiku-20240307",
         "context_length": 200000,
+        "max_output_tokens": 4096,
         "function_call_available": True,
         "response_format_available": True,
     },
     "claude-3-5-sonnet-20240620": {
         "id": "claude-3-5-sonnet-20240620",
         "context_length": 200000,
+        "max_output_tokens": 4096,
         "function_call_available": True,
         "response_format_available": True,
     },
@@ -355,24 +396,28 @@ MINIMAX_MODELS = {
     "abab5-chat": {
         "id": "abab5-chat",
         "context_length": 6144,
+        "max_output_tokens": 6144,
         "function_call_available": True,
         "response_format_available": True,
     },
     "abab5.5-chat": {
         "id": "abab5.5-chat",
         "context_length": 16384,
+        "max_output_tokens": 16384,
         "function_call_available": True,
         "response_format_available": True,
     },
     "abab6-chat": {
         "id": "abab6-chat",
         "context_length": 32768,
+        "max_output_tokens": 32768,
         "function_call_available": True,
         "response_format_available": True,
     },
     "abab6.5s-chat": {
         "id": "abab6.5s-chat",
         "context_length": 245760,
+        "max_output_tokens": 245760,
         "function_call_available": True,
         "response_format_available": True,
     },

vectorvein/utilities/retry.py ADDED Viewed

@@ -0,0 +1,36 @@
+# @Author: Bi Ying
+# @Date:   2024-08-14 13:03:10
+import time
+class Retry:
+    def __init__(self, function):
+        self.function = function
+        self.__retry_times = 3
+        self.__sleep_time = 1
+        self.pargs = []
+        self.kwargs = {}
+    def args(self, *args, **kwargs):
+        self.pargs = args
+        self.kwargs = kwargs
+        return self
+    def retry_times(self, retry_times: int):
+        self.__retry_times = retry_times
+        return self
+    def sleep_time(self, sleep_time):
+        self.__sleep_time = sleep_time
+        return self
+    def run(self):
+        try_times = 0
+        while try_times < self.__retry_times:
+            try:
+                return True, self.function(*self.pargs, **self.kwargs)
+            except Exception as e:
+                print(f"{self.function.__name__} 函数出错：{e}")
+                try_times += 1
+                time.sleep(self.__sleep_time)
+        return False, None

{vectorvein-0.1.6.dist-info → vectorvein-0.1.8.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vectorvein
-Version: 0.1.6
+Version: 0.1.8
 Summary: Default template for PDM package
 Author-Email: Anderson <andersonby@163.com>
 License: MIT
@@ -11,6 +11,8 @@ Requires-Dist: httpx>=0.27.0
 Requires-Dist: anthropic[vertex]>=0.31.2
 Requires-Dist: pydantic>=2.8.2
 Requires-Dist: Pillow>=10.4.0
+Requires-Dist: deepseek-tokenizer>=0.1.0
+Requires-Dist: qwen-tokenizer>=0.1.0
 Description-Content-Type: text/markdown
 # vectorvein

{vectorvein-0.1.6.dist-info → vectorvein-0.1.8.dist-info}/RECORD RENAMED Viewed

@@ -1,25 +1,26 @@
-vectorvein-0.1.6.dist-info/METADATA,sha256=cfyPkv1PSdCL5Ve9zAIDTNVzrPDLN2GDbiADGewJN0g,423
-vectorvein-0.1.6.dist-info/WHEEL,sha256=rSwsxJWe3vzyR5HCwjWXQruDgschpei4h_giTm0dJVE,90
+vectorvein-0.1.8.dist-info/METADATA,sha256=3gBxjxgbrdsbA4Xwc3MkuCct-9VFuuM0X9hfSj6sGt4,501
+vectorvein-0.1.8.dist-info/WHEEL,sha256=rSwsxJWe3vzyR5HCwjWXQruDgschpei4h_giTm0dJVE,90
 vectorvein/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 vectorvein/chat_clients/__init__.py,sha256=5j7W--jr-l2cqDJp38uXYvkydDK0rnzm7MYGSACHKmU,3976
-vectorvein/chat_clients/anthropic_client.py,sha256=pkk0yPH05WTOnKOAXkm60ZZw1kdT8pCydNkHx7QArh4,18707
-vectorvein/chat_clients/base_client.py,sha256=GjPUAjgd_36-lHr8QXvz2X_-ApjbmAD3KZRjmntJ65U,4247
+vectorvein/chat_clients/anthropic_client.py,sha256=JjigSUsIn06ixIEjnOJhVbcMqy2_MAL3iVUlDFAFMW4,20008
+vectorvein/chat_clients/base_client.py,sha256=wMXpQ1L1KDb2Hg6va3H3GmcVeQB6r6sh7F4IS0DBQWI,4275
 vectorvein/chat_clients/deepseek_client.py,sha256=3qWu01NlJAP2N-Ff62d5-CZXZitlizE1fzb20LNetig,526
-vectorvein/chat_clients/gemini_client.py,sha256=IpkfcqVF38f4kAFnlHyisn4vkiMeM4cICyS4XDD0jJE,12787
+vectorvein/chat_clients/gemini_client.py,sha256=IHcBHTSHkj3f962S5L7Ga-XA-96sq8quIDRZpoqvGss,13653
 vectorvein/chat_clients/groq_client.py,sha256=Uow4pgdmFi93ZQSoOol2-0PhhqkW-S0XuSldvppz5U4,498
 vectorvein/chat_clients/local_client.py,sha256=55nOsxzqUf79q3Y14MKROA71zxhsT7p7FsDZ89rts2M,422
-vectorvein/chat_clients/minimax_client.py,sha256=toe4mFYHphHnPQpOpegaL5n2OxTUu5TJVju61j7hgBw,12100
+vectorvein/chat_clients/minimax_client.py,sha256=iC60QnLQRb5Y0B6_vorL9wkZx1Gs9w9znm8zil4uJ-g,13588
 vectorvein/chat_clients/mistral_client.py,sha256=1aKSylzBDaLYcFnaBIL4-sXSzWmXfBeON9Q0rq-ziWw,534
 vectorvein/chat_clients/moonshot_client.py,sha256=gbu-6nGxx8uM_U2WlI4Wus881rFRotzHtMSoYOcruGU,526
 vectorvein/chat_clients/openai_client.py,sha256=Nz6tV45pWcsOupxjnsRsGTicbQNJWIZyxuJoJ5DGMpg,527
-vectorvein/chat_clients/openai_compatible_client.py,sha256=sN9fq8yZ0XKSVg0eQZDWUE-awrkkJA2lEdMG-WENnUg,11951
+vectorvein/chat_clients/openai_compatible_client.py,sha256=fvg--wFwnFEEhLGS9_u1XzNhtkkDUf4_rq6zYKwnOuI,13738
 vectorvein/chat_clients/qwen_client.py,sha256=-ryh-m9PgsO0fc4ulcCmPTy1155J8YUy15uPoJQOHA0,513
-vectorvein/chat_clients/utils.py,sha256=tAQwfydj46sMxSHeaeOWXrTUY2q0h7482NbvZjbNz9A,17637
+vectorvein/chat_clients/utils.py,sha256=mnAew2Ie3nQHdEyDLKuJvXkQ5QdcSAJ6SpYk5JPbR1Q,20888
 vectorvein/chat_clients/yi_client.py,sha256=RNf4CRuPJfixrwLZ3-DEc3t25QDe1mvZeb9sku2f8Bc,484
 vectorvein/chat_clients/zhipuai_client.py,sha256=Ys5DSeLCuedaDXr3PfG1EW2zKXopt-awO2IylWSwY0s,519
-vectorvein/settings/__init__.py,sha256=1__ILDpBh25S43d32knqLBe05tF4AQwT0ZALdBHw1OY,3396
-vectorvein/types/defaults.py,sha256=Mg-Mj3_eBzKZn1N8x1V2GqyaYgLD13i-NdSYdQC28X4,11437
+vectorvein/settings/__init__.py,sha256=4mpccT7eZC3yI1vVnVViW4wHBnDEH9D2R5EsIP34VgU,3218
+vectorvein/types/defaults.py,sha256=ANIYL0W0bxl2IBxvtkS_WlS_qMQQwpi5TKRdLxdk47M,13027
 vectorvein/types/enums.py,sha256=vzOenCnRlFXBwPh-lfFhjGfM-6yfDj7wZColHODqocI,1550
 vectorvein/types/llm_parameters.py,sha256=nBjStC2zndTY__yhD2WFXB09taxEhDLE3OHA6MICfgE,3494
 vectorvein/utilities/media_processing.py,sha256=BujciRmw1GMmc3ELRvafL8STcy6r5b2rVnh27-uA7so,2256
-vectorvein-0.1.6.dist-info/RECORD,,
+vectorvein/utilities/retry.py,sha256=9ePuJdeUUGx-qMWfaFxmlOvG_lQPwCQ4UB1z3Edlo34,993
+vectorvein-0.1.8.dist-info/RECORD,,

{vectorvein-0.1.6.dist-info → vectorvein-0.1.8.dist-info}/WHEEL RENAMED Viewed

File without changes

vectorvein 0.1.6__py3-none-any.whl → 0.1.8__py3-none-any.whl

vectorvein 0.1.6py3-none-any.whl → 0.1.8py3-none-any.whl