PyPI - vectorvein - Versions diffs - 0.1.7__py3-none-any.whl → 0.1.9__py3-none-any.whl - Mend

vectorvein 0.1.7py3-none-any.whl → 0.1.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

vectorvein/chat_clients/anthropic_client.py +30 -5
vectorvein/chat_clients/base_client.py +4 -4
vectorvein/chat_clients/gemini_client.py +22 -4
vectorvein/chat_clients/minimax_client.py +43 -29
vectorvein/chat_clients/openai_compatible_client.py +33 -4
vectorvein/chat_clients/utils.py +86 -2
vectorvein/types/defaults.py +48 -3
vectorvein/utilities/retry.py +36 -0
{vectorvein-0.1.7.dist-info → vectorvein-0.1.9.dist-info}/METADATA +3 -1
{vectorvein-0.1.7.dist-info → vectorvein-0.1.9.dist-info}/RECORD +11 -10
{vectorvein-0.1.7.dist-info → vectorvein-0.1.9.dist-info}/WHEEL +0 -0

vectorvein/chat_clients/anthropic_client.py CHANGED Viewed

@@ -19,8 +19,8 @@ from google.auth.transport.requests import Request
 from google.auth import _helpers
 from ..settings import settings
-from .utils import cutoff_messages
 from ..types import defaults as defs
+from .utils import cutoff_messages, get_token_counts
 from .base_client import BaseChatClient, BaseAsyncChatClient
 from ..types.enums import ContextLengthControlType, BackendType
 from ..types.llm_parameters import ChatCompletionMessage, ChatCompletionDeltaMessage
@@ -115,7 +115,7 @@ class AnthropicChatClient(BaseChatClient):
         model: str | None = None,
         stream: bool | None = None,
         temperature: float | None = None,
-        max_tokens: int = 2000,
+        max_tokens: int | None = None,
         tools: list | NotGiven = NOT_GIVEN,
         tool_choice: str | NotGiven = NOT_GIVEN,
     ):
@@ -189,6 +189,18 @@ class AnthropicChatClient(BaseChatClient):
                 base_url=self.endpoint.api_base,
             )
+        tools_params = refactor_tool_use_params(tools) if tools else tools
+        if max_tokens is None:
+            max_output_tokens = self.model_setting.max_output_tokens
+            if max_output_tokens is not None:
+                token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
+                max_tokens = self.model_setting.context_length - token_counts
+                max_tokens = min(max(max_tokens, 1), max_output_tokens)
+            else:
+                token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
+                max_tokens = self.model_setting.context_length - token_counts
         response = self._client.messages.create(
             model=self.model_setting.id,
             messages=messages,
@@ -196,7 +208,7 @@ class AnthropicChatClient(BaseChatClient):
             stream=self.stream,
             temperature=self.temperature,
             max_tokens=max_tokens,
-            tools=refactor_tool_use_params(tools) if tools else tools,
+            tools=tools_params,
             tool_choice=tool_choice,
         )
@@ -303,7 +315,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
         model: str | None = None,
         stream: bool | None = None,
         temperature: float | None = None,
-        max_tokens: int = 2000,
+        max_tokens: int | None = None,
         tools: list | NotGiven = NOT_GIVEN,
         tool_choice: str | NotGiven = NOT_GIVEN,
     ):
@@ -376,6 +388,19 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
                 api_key=self.endpoint.api_key,
                 base_url=self.endpoint.api_base,
             )
+        tools_params = refactor_tool_use_params(tools) if tools else tools
+        if max_tokens is None:
+            max_output_tokens = self.model_setting.max_output_tokens
+            if max_output_tokens is not None:
+                token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
+                max_tokens = self.model_setting.context_length - token_counts
+                max_tokens = min(max(max_tokens, 1), max_output_tokens)
+            else:
+                token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
+                max_tokens = self.model_setting.context_length - token_counts
         response = await self._client.messages.create(
             model=self.model_setting.id,
             messages=messages,
@@ -383,7 +408,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
             stream=self.stream,
             temperature=self.temperature,
             max_tokens=max_tokens,
-            tools=refactor_tool_use_params(tools) if tools else tools,
+            tools=tools_params,
             tool_choice=tool_choice,
         )

vectorvein/chat_clients/base_client.py CHANGED Viewed

@@ -46,7 +46,7 @@ class BaseChatClient(ABC):
         model: str | None = None,
         stream: bool = False,
         temperature: float = 0.7,
-        max_tokens: int = 2000,
+        max_tokens: int | None = None,
         tools: list | NotGiven = NOT_GIVEN,
         tool_choice: str | NotGiven = NOT_GIVEN,
     ) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
@@ -57,7 +57,7 @@ class BaseChatClient(ABC):
         messages: list,
         model: str | None = None,
         temperature: float = 0.7,
-        max_tokens: int = 2000,
+        max_tokens: int | None = None,
         tools: list | NotGiven = NOT_GIVEN,
         tool_choice: str | NotGiven = NOT_GIVEN,
     ) -> Generator[ChatCompletionDeltaMessage, Any, None]:
@@ -107,7 +107,7 @@ class BaseAsyncChatClient(ABC):
         model: str | None = None,
         stream: bool = False,
         temperature: float = 0.7,
-        max_tokens: int = 2000,
+        max_tokens: int | None = None,
         tools: list | NotGiven = NOT_GIVEN,
         tool_choice: str | NotGiven = NOT_GIVEN,
     ) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, None]:
@@ -118,7 +118,7 @@ class BaseAsyncChatClient(ABC):
         messages: list,
         model: str | None = None,
         temperature: float = 0.7,
-        max_tokens: int = 2000,
+        max_tokens: int | None = None,
         tools: list | NotGiven = NOT_GIVEN,
         tool_choice: str | NotGiven = NOT_GIVEN,
     ) -> AsyncGenerator[ChatCompletionDeltaMessage, None]:

vectorvein/chat_clients/gemini_client.py CHANGED Viewed

@@ -43,7 +43,7 @@ class GeminiChatClient(BaseChatClient):
         model: str | None = None,
         stream: bool | None = None,
         temperature: float | None = None,
-        max_tokens: int = 2000,
+        max_tokens: int | None = None,
         tools: list | None = None,
         tool_choice: str | None = None,
     ):
@@ -156,7 +156,16 @@ class GeminiChatClient(BaseChatClient):
                 if "text" in part:
                     result["content"] += part["text"]
                 elif "functionCall" in part:
-                    tool_calls.append(part["functionCall"])
+                    tool_call = {
+                        "index": 0,
+                        "id": "call_0",
+                        "function": {
+                            "arguments": json.dumps(part["functionCall"]["args"], ensure_ascii=False),
+                            "name": part["functionCall"]["name"],
+                        },
+                        "type": "function",
+                    }
+                    tool_calls.append(tool_call)
             if tool_calls:
                 result["tool_calls"] = tool_calls
@@ -194,7 +203,7 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
         model: str | None = None,
         stream: bool | None = None,
         temperature: float | None = None,
-        max_tokens: int = 2000,
+        max_tokens: int | None = None,
         tools: list | None = None,
         tool_choice: str | None = None,
     ):
@@ -310,7 +319,16 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
                     if "text" in part:
                         result["content"] += part["text"]
                     elif "functionCall" in part:
-                        tool_calls.append(part["functionCall"])
+                        tool_call = {
+                            "index": 0,
+                            "id": "call_0",
+                            "function": {
+                                "arguments": json.dumps(part["functionCall"]["args"], ensure_ascii=False),
+                                "name": part["functionCall"]["name"],
+                            },
+                            "type": "function",
+                        }
+                        tool_calls.append(tool_call)
                 if tool_calls:
                     result["tool_calls"] = tool_calls

vectorvein/chat_clients/minimax_client.py CHANGED Viewed

@@ -7,8 +7,8 @@ import httpx
 from openai._types import NotGiven
 from ..settings import settings
-from .utils import cutoff_messages
 from ..types import defaults as defs
+from .utils import cutoff_messages, get_token_counts
 from .base_client import BaseChatClient, BaseAsyncChatClient
 from ..types.enums import ContextLengthControlType, BackendType
 from ..types.llm_parameters import ChatCompletionMessage, ChatCompletionDeltaMessage
@@ -66,7 +66,7 @@ class MiniMaxChatClient(BaseChatClient):
         model: str | None = None,
         stream: bool | None = None,
         temperature: float | None = None,
-        max_tokens: int = 2048,
+        max_tokens: int | None = None,
         tools: list | None = None,
         tool_choice: str = "auto",
     ):
@@ -93,7 +93,7 @@ class MiniMaxChatClient(BaseChatClient):
                 model=self.model_setting.id,
             )
-        if tools is not None:
+        if tools:
             tools_params = {
                 "tools": [
                     {
@@ -113,6 +113,16 @@ class MiniMaxChatClient(BaseChatClient):
         else:
             tools_params = {}
+        if max_tokens is None:
+            max_output_tokens = self.model_setting.max_output_tokens
+            if max_output_tokens is not None:
+                token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
+                max_tokens = self.model_setting.context_length - token_counts
+                max_tokens = min(max(max_tokens, 1), max_output_tokens)
+            else:
+                token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
+                max_tokens = self.model_setting.context_length - token_counts
         self.url = self.endpoint.api_base
         self.headers = {"Authorization": f"Bearer {self.endpoint.api_key}", "Content-Type": "application/json"}
@@ -139,22 +149,19 @@ class MiniMaxChatClient(BaseChatClient):
                 for chunk in response.iter_lines():
                     if chunk:
                         chunk_data = json.loads(chunk[6:])
+                        if chunk_data["object"] != "chat.completion.chunk":
+                            continue
                         tool_calls_params = extract_tool_calls(chunk_data)
                         has_tool_calls = True if tool_calls_params else False
                         if has_tool_calls:
-                            if "usage" not in chunk_data:
-                                continue
-                            else:
-                                yield ChatCompletionDeltaMessage(
-                                    **{
-                                        "content": chunk_data["choices"][0]["message"].get("content"),
-                                        "role": "assistant",
-                                        **tool_calls_params,
-                                    }
-                                )
+                            yield ChatCompletionDeltaMessage(
+                                **{
+                                    "content": chunk_data["choices"][0]["delta"].get("content"),
+                                    "role": "assistant",
+                                    **tool_calls_params,
+                                }
+                            )
                         else:
-                            if "usage" in chunk_data:
-                                continue
                             yield ChatCompletionDeltaMessage(
                                 **{
                                     "content": chunk_data["choices"][0]["delta"]["content"],
@@ -211,7 +218,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
         model: str | None = None,
         stream: bool | None = None,
         temperature: float | None = None,
-        max_tokens: int = 2048,
+        max_tokens: int | None = None,
         tools: list | None = None,
         tool_choice: str = "auto",
     ):
@@ -238,7 +245,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
                 model=self.model_setting.id,
             )
-        if tools is not None:
+        if tools:
             tools_params = {
                 "tools": [
                     {
@@ -256,6 +263,16 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
         else:
             tools_params = {}
+        if max_tokens is None:
+            max_output_tokens = self.model_setting.max_output_tokens
+            if max_output_tokens is not None:
+                token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
+                max_tokens = self.model_setting.context_length - token_counts
+                max_tokens = min(max(max_tokens, 1), max_output_tokens)
+            else:
+                token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
+                max_tokens = self.model_setting.context_length - token_counts
         self.url = self.endpoint.api_base
         self.headers = {"Authorization": f"Bearer {self.endpoint.api_key}", "Content-Type": "application/json"}
@@ -283,22 +300,19 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
                     async for chunk in response.aiter_lines():
                         if chunk:
                             chunk_data = json.loads(chunk[6:])
+                            if chunk_data["object"] != "chat.completion.chunk":
+                                continue
                             tool_calls_params = extract_tool_calls(chunk_data)
                             has_tool_calls = True if tool_calls_params else False
                             if has_tool_calls:
-                                if "usage" not in chunk_data:
-                                    continue
-                                else:
-                                    yield ChatCompletionDeltaMessage(
-                                        **{
-                                            "content": chunk_data["choices"][0]["message"].get("content"),
-                                            "role": "assistant",
-                                            **tool_calls_params,
-                                        }
-                                    )
+                                yield ChatCompletionDeltaMessage(
+                                    **{
+                                        "content": chunk_data["choices"][0]["delta"].get("content"),
+                                        "role": "assistant",
+                                        **tool_calls_params,
+                                    }
+                                )
                             else:
-                                if "usage" in chunk_data:
-                                    continue
                                 yield ChatCompletionDeltaMessage(
                                     **{
                                         "content": chunk_data["choices"][0]["delta"]["content"],

vectorvein/chat_clients/openai_compatible_client.py CHANGED Viewed

@@ -11,6 +11,7 @@ from openai import OpenAI, AsyncOpenAI, AzureOpenAI, AsyncAzureOpenAI
 from .base_client import BaseChatClient, BaseAsyncChatClient
 from .utils import (
     cutoff_messages,
+    get_token_counts,
     ToolCallContentProcessor,
     generate_tool_use_system_prompt,
 )
@@ -50,7 +51,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
         model: str | None = None,
         stream: bool | None = None,
         temperature: float | None = None,
-        max_tokens: int = 2000,
+        max_tokens: int | None = None,
         tools: list | NotGiven = NOT_GIVEN,
         tool_choice: str | NotGiven = NOT_GIVEN,
     ):
@@ -102,6 +103,16 @@ class OpenAICompatibleChatClient(BaseChatClient):
         else:
             tools_params = {}
+        if max_tokens is None:
+            max_output_tokens = self.model_setting.max_output_tokens
+            if max_output_tokens is not None:
+                token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
+                max_tokens = self.model_setting.context_length - token_counts
+                max_tokens = min(max(max_tokens, 1), max_output_tokens)
+            else:
+                token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
+                max_tokens = self.model_setting.context_length - token_counts
         response: ChatCompletion | Stream[ChatCompletionChunk] = self._client.chat.completions.create(
             model=self.model_setting.id,
             messages=messages,
@@ -122,6 +133,9 @@ class OpenAICompatibleChatClient(BaseChatClient):
                     if not chunk.choices[0].delta:
                         continue
                     if self.model_setting.function_call_available:
+                        if chunk.choices[0].delta.tool_calls:
+                            for index, tool_call in enumerate(chunk.choices[0].delta.tool_calls):
+                                tool_call.index = index
                         yield ChatCompletionDeltaMessage(**chunk.choices[0].delta.model_dump())
                     else:
                         message = chunk.choices[0].delta.model_dump()
@@ -147,7 +161,8 @@ class OpenAICompatibleChatClient(BaseChatClient):
             if tools:
                 if self.model_setting.function_call_available and response.choices[0].message.tool_calls:
                     result["tool_calls"] = [
-                        tool_call.model_dump() for tool_call in response.choices[0].message.tool_calls
+                        {**tool_call.model_dump(), "type": "function"}
+                        for tool_call in response.choices[0].message.tool_calls
                     ]
                 else:
                     tool_call_content_processor = ToolCallContentProcessor(result["content"])
@@ -189,7 +204,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
         model: str | None = None,
         stream: bool | None = None,
         temperature: float | None = None,
-        max_tokens: int = 2000,
+        max_tokens: int | None = None,
         tools: list | NotGiven = NOT_GIVEN,
         tool_choice: str | NotGiven = NOT_GIVEN,
     ):
@@ -241,6 +256,16 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
         else:
             tools_params = {}
+        if max_tokens is None:
+            max_output_tokens = self.model_setting.max_output_tokens
+            if max_output_tokens is not None:
+                token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
+                max_tokens = self.model_setting.context_length - token_counts
+                max_tokens = min(max(max_tokens, 1), max_output_tokens)
+            else:
+                token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
+                max_tokens = self.model_setting.context_length - token_counts
         response: ChatCompletion | AsyncStream[ChatCompletionChunk] = await self._client.chat.completions.create(
             model=self.model_setting.id,
             messages=messages,
@@ -261,6 +286,9 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
                     if not chunk.choices[0].delta:
                         continue
                     if self.model_setting.function_call_available:
+                        if chunk.choices[0].delta.tool_calls:
+                            for index, tool_call in enumerate(chunk.choices[0].delta.tool_calls):
+                                tool_call.index = index
                         yield ChatCompletionDeltaMessage(**chunk.choices[0].delta.model_dump())
                     else:
                         message = chunk.choices[0].delta.model_dump()
@@ -286,7 +314,8 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
             if tools:
                 if self.model_setting.function_call_available and response.choices[0].message.tool_calls:
                     result["tool_calls"] = [
-                        tool_call.model_dump() for tool_call in response.choices[0].message.tool_calls
+                        {**tool_call.model_dump(), "type": "function"}
+                        for tool_call in response.choices[0].message.tool_calls
                     ]
                 else:
                     tool_call_content_processor = ToolCallContentProcessor(result["content"])

vectorvein/chat_clients/utils.py CHANGED Viewed

@@ -3,8 +3,14 @@
 import re
 import json
+import httpx
 import tiktoken
+from anthropic import Anthropic
+from qwen_tokenizer import qwen_tokenizer
+from deepseek_tokenizer import deepseek_tokenizer
+from ..settings import settings
+from ..utilities.retry import Retry
 from ..types.enums import BackendType
 from ..utilities.media_processing import ImageProcessor
@@ -95,10 +101,88 @@ def get_token_counts(text: str | dict, model: str = "") -> int:
         text = str(text)
     if model == "gpt-3.5-turbo":
         return len(chatgpt_encoding.encode(text))
-    elif model == "gpt-4o":
+    elif model in ("gpt-4o", "gpt-4o-mini"):
         return len(gpt_4o_encoding.encode(text))
     elif model.startswith("abab"):
-        return int(len(text) / 1.33)
+        model_setting = settings.minimax.models[model]
+        if len(model_setting.endpoints) == 0:
+            return int(len(text) / 1.33)
+        endpoint_id = model_setting.endpoints[0]
+        endpoint = settings.get_endpoint(endpoint_id)
+        tokenize_url = "https://api.minimax.chat/v1/tokenize"
+        headers = {"Authorization": f"Bearer {endpoint.api_key}", "Content-Type": "application/json"}
+        request_body = {
+            "model": model,
+            "tokens_to_generate": 128,
+            "temperature": 0.2,
+            "messages": [
+                {"sender_type": "USER", "text": text},
+            ],
+        }
+        _, response = (
+            Retry(httpx.post)
+            .args(url=tokenize_url, headers=headers, json=request_body, timeout=None)
+            .retry_times(5)
+            .sleep_time(10)
+            .run()
+        )
+        response = response.json()
+        return response["segments_num"]
+    elif model in ("moonshot-v1-8k", "moonshot-v1-32k", "moonshot-v1-128k"):
+        model_setting = settings.moonshot.models[model]
+        if len(model_setting.endpoints) == 0:
+            return len(chatgpt_encoding.encode(text))
+        endpoint_id = model_setting.endpoints[0]
+        endpoint = settings.get_endpoint(endpoint_id)
+        tokenize_url = "https://api.moonshot.cn/v1/tokenizers/estimate-token-count"
+        headers = {"Content-Type": "application/json", "Authorization": f"Bearer {endpoint.api_key}"}
+        request_body = {
+            "model": model,
+            "messages": [
+                {"role": "user", "content": text},
+            ],
+        }
+        _, response = (
+            Retry(httpx.post)
+            .args(url=tokenize_url, headers=headers, json=request_body, timeout=None)
+            .retry_times(5)
+            .sleep_time(10)
+            .run()
+        )
+        response = response.json()
+        return response["data"]["total_tokens"]
+    elif model.startswith("gemini"):
+        model_setting = settings.gemini.models[model]
+        if len(model_setting.endpoints) == 0:
+            return len(chatgpt_encoding.encode(text))
+        endpoint_id = model_setting.endpoints[0]
+        endpoint = settings.get_endpoint(endpoint_id)
+        url = f"{endpoint.api_base}/models/{model_setting.id}:countTokens"
+        params = {"key": endpoint.api_key}
+        request_body = {
+            "contents": {
+                "role": "USER",
+                "parts": [
+                    {"text": "TEXT"},
+                ],
+            },
+        }
+        _, response = (
+            Retry(httpx.post)
+            .args(url, json=request_body, params=params, timeout=None)
+            .retry_times(5)
+            .sleep_time(10)
+            .run()
+        )
+        result = response.json()
+        return result["totalTokens"]
+    elif model.startswith("claude"):
+        return Anthropic().count_tokens(text)
+    elif model.startswith("deepseek"):
+        return len(deepseek_tokenizer.encode(text))
+    elif model.startswith("qwen"):
+        return len(qwen_tokenizer.encode(text))
     else:
         return len(chatgpt_encoding.encode(text))

vectorvein/types/defaults.py CHANGED Viewed

@@ -14,19 +14,19 @@ MODEL_CONTEXT_LENGTH = 32768
 MOONSHOT_MODELS = {
     "moonshot-v1-8k": {
         "id": "moonshot-v1-8k",
-        "context_length": 8000,
+        "context_length": 8192,
         "function_call_available": True,
         "response_format_available": True,
     },
     "moonshot-v1-32k": {
         "id": "moonshot-v1-32k",
-        "context_length": 32000,
+        "context_length": 32768,
         "function_call_available": True,
         "response_format_available": True,
     },
     "moonshot-v1-128k": {
         "id": "moonshot-v1-128k",
-        "context_length": 128000,
+        "context_length": 131072,
         "function_call_available": True,
         "response_format_available": True,
     },
@@ -38,12 +38,14 @@ DEEPSEEK_MODELS = {
     "deepseek-chat": {
         "id": "deepseek-chat",
         "context_length": 128000,
+        "max_output_tokens": 4096,
         "function_call_available": True,
         "response_format_available": True,
     },
     "deepseek-coder": {
         "id": "deepseek-chat",
         "context_length": 128000,
+        "max_output_tokens": 4096,
         "function_call_available": True,
         "response_format_available": True,
     },
@@ -85,48 +87,56 @@ QWEN_MODELS = {
     "qwen1.5-1.8b-chat": {
         "id": "qwen1.5-1.8b-chat",
         "context_length": 30000,
+        "max_output_tokens": 4096,
         "function_call_available": False,
         "response_format_available": True,
     },
     "qwen1.5-4b-chat": {
         "id": "qwen1.5-4b-chat",
         "context_length": 30000,
+        "max_output_tokens": 4096,
         "function_call_available": False,
         "response_format_available": True,
     },
     "qwen1.5-7b-chat": {
         "id": "qwen1.5-7b-chat",
         "context_length": 30000,
+        "max_output_tokens": 4096,
         "function_call_available": False,
         "response_format_available": True,
     },
     "qwen1.5-14b-chat": {
         "id": "qwen1.5-14b-chat",
         "context_length": 30000,
+        "max_output_tokens": 4096,
         "function_call_available": False,
         "response_format_available": True,
     },
     "qwen1.5-32b-chat": {
         "id": "qwen1.5-32b-chat",
         "context_length": 30000,
+        "max_output_tokens": 4096,
         "function_call_available": False,
         "response_format_available": True,
     },
     "qwen1.5-72b-chat": {
         "id": "qwen1.5-72b-chat",
         "context_length": 30000,
+        "max_output_tokens": 4096,
         "function_call_available": False,
         "response_format_available": True,
     },
     "qwen1.5-110b-chat": {
         "id": "qwen1.5-110b-chat",
         "context_length": 30000,
+        "max_output_tokens": 4096,
         "function_call_available": False,
         "response_format_available": True,
     },
     "qwen2-72b-instruct": {
         "id": "qwen2-72b-instruct",
         "context_length": 30000,
+        "max_output_tokens": 4096,
         "function_call_available": False,
         "response_format_available": True,
     },
@@ -138,42 +148,49 @@ YI_MODELS = {
     "yi-large": {
         "id": "yi-large",
         "context_length": 32000,
+        "max_output_tokens": 4096,
         "function_call_available": False,
         "response_format_available": False,
     },
     "yi-large-turbo": {
         "id": "yi-large-turbo",
         "context_length": 16000,
+        "max_output_tokens": 4096,
         "function_call_available": False,
         "response_format_available": False,
     },
     "yi-large-fc": {
         "id": "yi-large-fc",
         "context_length": 32000,
+        "max_output_tokens": 4096,
         "function_call_available": True,
         "response_format_available": False,
     },
     "yi-medium": {
         "id": "yi-medium",
         "context_length": 16000,
+        "max_output_tokens": 4096,
         "function_call_available": False,
         "response_format_available": False,
     },
     "yi-medium-200k": {
         "id": "yi-medium-200k",
         "context_length": 200000,
+        "max_output_tokens": 4096,
         "function_call_available": False,
         "response_format_available": False,
     },
     "yi-spark": {
         "id": "yi-spark",
         "context_length": 16000,
+        "max_output_tokens": 4096,
         "function_call_available": False,
         "response_format_available": False,
     },
     "yi-vision": {
         "id": "yi-vision",
         "context_length": 4000,
+        "max_output_tokens": 2000,
         "function_call_available": False,
         "response_format_available": False,
     },
@@ -187,42 +204,56 @@ ZHIPUAI_MODELS = {
         "context_length": 128000,
         "function_call_available": True,
         "response_format_available": False,
+        "max_output_tokens": 4095,
     },
     "glm-4": {
         "id": "glm-4",
         "context_length": 128000,
         "function_call_available": True,
         "response_format_available": False,
+        "max_output_tokens": 4095,
     },
     "glm-4-0520": {
         "id": "glm-4-0520",
         "context_length": 128000,
         "function_call_available": True,
         "response_format_available": False,
+        "max_output_tokens": 4095,
     },
     "glm-4-air": {
         "id": "glm-4-air",
         "context_length": 128000,
         "function_call_available": True,
         "response_format_available": False,
+        "max_output_tokens": 4095,
     },
     "glm-4-airx": {
         "id": "glm-4-airx",
         "context_length": 128000,
         "function_call_available": True,
         "response_format_available": False,
+        "max_output_tokens": 4095,
     },
     "glm-4-flash": {
         "id": "glm-4-flash",
         "context_length": 128000,
         "function_call_available": True,
         "response_format_available": False,
+        "max_output_tokens": 4095,
+    },
+    "glm-4-long": {
+        "id": "glm-4-long",
+        "context_length": 1000000,
+        "function_call_available": True,
+        "response_format_available": False,
+        "max_output_tokens": 4095,
     },
     "glm-4v": {
         "id": "glm-4v",
         "context_length": 2000,
         "function_call_available": False,
         "response_format_available": False,
+        "max_output_tokens": 1024,
     },
 }
@@ -287,34 +318,40 @@ OPENAI_MODELS = {
         "context_length": 16385,
         "function_call_available": True,
         "response_format_available": True,
+        "max_output_tokens": 4096,
     },
     "gpt-4-turbo": {
         "id": "gpt-4-turbo",
         "context_length": 128000,
+        "max_output_tokens": 4096,
         "function_call_available": True,
         "response_format_available": True,
     },
     "gpt-4": {
         "id": "gpt-4",
         "context_length": 8192,
+        "max_output_tokens": 4096,
         "function_call_available": True,
         "response_format_available": True,
     },
     "gpt-4o": {
         "id": "gpt-4o",
         "context_length": 128000,
+        "max_output_tokens": 4096,
         "function_call_available": True,
         "response_format_available": True,
     },
     "gpt-4o-mini": {
         "id": "gpt-4o-mini",
         "context_length": 128000,
+        "max_output_tokens": 16384,
         "function_call_available": True,
         "response_format_available": True,
     },
     "gpt-4v": {
         "id": "gpt-4v",
         "context_length": 128000,
+        "max_output_tokens": 4096,
         "function_call_available": True,
         "response_format_available": True,
     },
@@ -326,24 +363,28 @@ ANTHROPIC_MODELS = {
     "claude-3-opus-20240229": {
         "id": "claude-3-opus-20240229",
         "context_length": 200000,
+        "max_output_tokens": 4096,
         "function_call_available": True,
         "response_format_available": True,
     },
     "claude-3-sonnet-20240229": {
         "id": "claude-3-sonnet-20240229",
         "context_length": 200000,
+        "max_output_tokens": 4096,
         "function_call_available": True,
         "response_format_available": True,
     },
     "claude-3-haiku-20240307": {
         "id": "claude-3-haiku-20240307",
         "context_length": 200000,
+        "max_output_tokens": 4096,
         "function_call_available": True,
         "response_format_available": True,
     },
     "claude-3-5-sonnet-20240620": {
         "id": "claude-3-5-sonnet-20240620",
         "context_length": 200000,
+        "max_output_tokens": 4096,
         "function_call_available": True,
         "response_format_available": True,
     },
@@ -355,24 +396,28 @@ MINIMAX_MODELS = {
     "abab5-chat": {
         "id": "abab5-chat",
         "context_length": 6144,
+        "max_output_tokens": 6144,
         "function_call_available": True,
         "response_format_available": True,
     },
     "abab5.5-chat": {
         "id": "abab5.5-chat",
         "context_length": 16384,
+        "max_output_tokens": 16384,
         "function_call_available": True,
         "response_format_available": True,
     },
     "abab6-chat": {
         "id": "abab6-chat",
         "context_length": 32768,
+        "max_output_tokens": 32768,
         "function_call_available": True,
         "response_format_available": True,
     },
     "abab6.5s-chat": {
         "id": "abab6.5s-chat",
         "context_length": 245760,
+        "max_output_tokens": 245760,
         "function_call_available": True,
         "response_format_available": True,
     },

vectorvein/utilities/retry.py ADDED Viewed

@@ -0,0 +1,36 @@
+# @Author: Bi Ying
+# @Date:   2024-08-14 13:03:10
+import time
+class Retry:
+    def __init__(self, function):
+        self.function = function
+        self.__retry_times = 3
+        self.__sleep_time = 1
+        self.pargs = []
+        self.kwargs = {}
+    def args(self, *args, **kwargs):
+        self.pargs = args
+        self.kwargs = kwargs
+        return self
+    def retry_times(self, retry_times: int):
+        self.__retry_times = retry_times
+        return self
+    def sleep_time(self, sleep_time):
+        self.__sleep_time = sleep_time
+        return self
+    def run(self):
+        try_times = 0
+        while try_times < self.__retry_times:
+            try:
+                return True, self.function(*self.pargs, **self.kwargs)
+            except Exception as e:
+                print(f"{self.function.__name__} 函数出错：{e}")
+                try_times += 1
+                time.sleep(self.__sleep_time)
+        return False, None

{vectorvein-0.1.7.dist-info → vectorvein-0.1.9.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vectorvein
-Version: 0.1.7
+Version: 0.1.9
 Summary: Default template for PDM package
 Author-Email: Anderson <andersonby@163.com>
 License: MIT
@@ -11,6 +11,8 @@ Requires-Dist: httpx>=0.27.0
 Requires-Dist: anthropic[vertex]>=0.31.2
 Requires-Dist: pydantic>=2.8.2
 Requires-Dist: Pillow>=10.4.0
+Requires-Dist: deepseek-tokenizer>=0.1.0
+Requires-Dist: qwen-tokenizer>=0.1.0
 Description-Content-Type: text/markdown
 # vectorvein

{vectorvein-0.1.7.dist-info → vectorvein-0.1.9.dist-info}/RECORD RENAMED Viewed

@@ -1,25 +1,26 @@
-vectorvein-0.1.7.dist-info/METADATA,sha256=C_3UCv_92cL58DnFKRMs-1GbEbzGIdDPlNt-X4aIKF4,423
-vectorvein-0.1.7.dist-info/WHEEL,sha256=rSwsxJWe3vzyR5HCwjWXQruDgschpei4h_giTm0dJVE,90
+vectorvein-0.1.9.dist-info/METADATA,sha256=AlikMRU7DLdZ6gZMohsL1X6NiuAWP3jGV0tE-uZkhNo,501
+vectorvein-0.1.9.dist-info/WHEEL,sha256=rSwsxJWe3vzyR5HCwjWXQruDgschpei4h_giTm0dJVE,90
 vectorvein/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 vectorvein/chat_clients/__init__.py,sha256=5j7W--jr-l2cqDJp38uXYvkydDK0rnzm7MYGSACHKmU,3976
-vectorvein/chat_clients/anthropic_client.py,sha256=pkk0yPH05WTOnKOAXkm60ZZw1kdT8pCydNkHx7QArh4,18707
-vectorvein/chat_clients/base_client.py,sha256=GjPUAjgd_36-lHr8QXvz2X_-ApjbmAD3KZRjmntJ65U,4247
+vectorvein/chat_clients/anthropic_client.py,sha256=JjigSUsIn06ixIEjnOJhVbcMqy2_MAL3iVUlDFAFMW4,20008
+vectorvein/chat_clients/base_client.py,sha256=wMXpQ1L1KDb2Hg6va3H3GmcVeQB6r6sh7F4IS0DBQWI,4275
 vectorvein/chat_clients/deepseek_client.py,sha256=3qWu01NlJAP2N-Ff62d5-CZXZitlizE1fzb20LNetig,526
-vectorvein/chat_clients/gemini_client.py,sha256=IpkfcqVF38f4kAFnlHyisn4vkiMeM4cICyS4XDD0jJE,12787
+vectorvein/chat_clients/gemini_client.py,sha256=IHcBHTSHkj3f962S5L7Ga-XA-96sq8quIDRZpoqvGss,13653
 vectorvein/chat_clients/groq_client.py,sha256=Uow4pgdmFi93ZQSoOol2-0PhhqkW-S0XuSldvppz5U4,498
 vectorvein/chat_clients/local_client.py,sha256=55nOsxzqUf79q3Y14MKROA71zxhsT7p7FsDZ89rts2M,422
-vectorvein/chat_clients/minimax_client.py,sha256=toe4mFYHphHnPQpOpegaL5n2OxTUu5TJVju61j7hgBw,12100
+vectorvein/chat_clients/minimax_client.py,sha256=uomp3DyTBmDXQtCmRiYp1VIIOFoVZ9_oyM3-j4JO7go,13000
 vectorvein/chat_clients/mistral_client.py,sha256=1aKSylzBDaLYcFnaBIL4-sXSzWmXfBeON9Q0rq-ziWw,534
 vectorvein/chat_clients/moonshot_client.py,sha256=gbu-6nGxx8uM_U2WlI4Wus881rFRotzHtMSoYOcruGU,526
 vectorvein/chat_clients/openai_client.py,sha256=Nz6tV45pWcsOupxjnsRsGTicbQNJWIZyxuJoJ5DGMpg,527
-vectorvein/chat_clients/openai_compatible_client.py,sha256=sN9fq8yZ0XKSVg0eQZDWUE-awrkkJA2lEdMG-WENnUg,11951
+vectorvein/chat_clients/openai_compatible_client.py,sha256=fvg--wFwnFEEhLGS9_u1XzNhtkkDUf4_rq6zYKwnOuI,13738
 vectorvein/chat_clients/qwen_client.py,sha256=-ryh-m9PgsO0fc4ulcCmPTy1155J8YUy15uPoJQOHA0,513
-vectorvein/chat_clients/utils.py,sha256=tAQwfydj46sMxSHeaeOWXrTUY2q0h7482NbvZjbNz9A,17637
+vectorvein/chat_clients/utils.py,sha256=mnAew2Ie3nQHdEyDLKuJvXkQ5QdcSAJ6SpYk5JPbR1Q,20888
 vectorvein/chat_clients/yi_client.py,sha256=RNf4CRuPJfixrwLZ3-DEc3t25QDe1mvZeb9sku2f8Bc,484
 vectorvein/chat_clients/zhipuai_client.py,sha256=Ys5DSeLCuedaDXr3PfG1EW2zKXopt-awO2IylWSwY0s,519
 vectorvein/settings/__init__.py,sha256=4mpccT7eZC3yI1vVnVViW4wHBnDEH9D2R5EsIP34VgU,3218
-vectorvein/types/defaults.py,sha256=Mg-Mj3_eBzKZn1N8x1V2GqyaYgLD13i-NdSYdQC28X4,11437
+vectorvein/types/defaults.py,sha256=ANIYL0W0bxl2IBxvtkS_WlS_qMQQwpi5TKRdLxdk47M,13027
 vectorvein/types/enums.py,sha256=vzOenCnRlFXBwPh-lfFhjGfM-6yfDj7wZColHODqocI,1550
 vectorvein/types/llm_parameters.py,sha256=nBjStC2zndTY__yhD2WFXB09taxEhDLE3OHA6MICfgE,3494
 vectorvein/utilities/media_processing.py,sha256=BujciRmw1GMmc3ELRvafL8STcy6r5b2rVnh27-uA7so,2256
-vectorvein-0.1.7.dist-info/RECORD,,
+vectorvein/utilities/retry.py,sha256=9ePuJdeUUGx-qMWfaFxmlOvG_lQPwCQ4UB1z3Edlo34,993
+vectorvein-0.1.9.dist-info/RECORD,,

{vectorvein-0.1.7.dist-info → vectorvein-0.1.9.dist-info}/WHEEL RENAMED Viewed

File without changes

vectorvein 0.1.7__py3-none-any.whl → 0.1.9__py3-none-any.whl

vectorvein 0.1.7py3-none-any.whl → 0.1.9py3-none-any.whl