PyPI - vectorvein - Versions diffs - 0.1.7__tar.gz → 0.1.9__tar.gz - Mend

vectorvein 0.1.7tar.gz → 0.1.9tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

{vectorvein-0.1.7 → vectorvein-0.1.9}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vectorvein
-Version: 0.1.7
+Version: 0.1.9
 Summary: Default template for PDM package
 Author-Email: Anderson <andersonby@163.com>
 License: MIT
@@ -11,6 +11,8 @@ Requires-Dist: httpx>=0.27.0
 Requires-Dist: anthropic[vertex]>=0.31.2
 Requires-Dist: pydantic>=2.8.2
 Requires-Dist: Pillow>=10.4.0
+Requires-Dist: deepseek-tokenizer>=0.1.0
+Requires-Dist: qwen-tokenizer>=0.1.0
 Description-Content-Type: text/markdown
 # vectorvein

{vectorvein-0.1.7 → vectorvein-0.1.9}/pyproject.toml RENAMED Viewed

@@ -9,12 +9,14 @@ dependencies = [
     "anthropic[vertex]>=0.31.2",
     "pydantic>=2.8.2",
     "Pillow>=10.4.0",
+    "deepseek-tokenizer>=0.1.0",
+    "qwen-tokenizer>=0.1.0",
 ]
 description = "Default template for PDM package"
 name = "vectorvein"
 readme = "README.md"
 requires-python = ">=3.10"
-version = "0.1.7"
+version = "0.1.9"
 [project.license]
 text = "MIT"

{vectorvein-0.1.7 → vectorvein-0.1.9}/src/vectorvein/chat_clients/anthropic_client.py RENAMED Viewed

@@ -19,8 +19,8 @@ from google.auth.transport.requests import Request
 from google.auth import _helpers
 from ..settings import settings
-from .utils import cutoff_messages
 from ..types import defaults as defs
+from .utils import cutoff_messages, get_token_counts
 from .base_client import BaseChatClient, BaseAsyncChatClient
 from ..types.enums import ContextLengthControlType, BackendType
 from ..types.llm_parameters import ChatCompletionMessage, ChatCompletionDeltaMessage
@@ -115,7 +115,7 @@ class AnthropicChatClient(BaseChatClient):
         model: str | None = None,
         stream: bool | None = None,
         temperature: float | None = None,
-        max_tokens: int = 2000,
+        max_tokens: int | None = None,
         tools: list | NotGiven = NOT_GIVEN,
         tool_choice: str | NotGiven = NOT_GIVEN,
     ):
@@ -189,6 +189,18 @@ class AnthropicChatClient(BaseChatClient):
                 base_url=self.endpoint.api_base,
             )
+        tools_params = refactor_tool_use_params(tools) if tools else tools
+        if max_tokens is None:
+            max_output_tokens = self.model_setting.max_output_tokens
+            if max_output_tokens is not None:
+                token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
+                max_tokens = self.model_setting.context_length - token_counts
+                max_tokens = min(max(max_tokens, 1), max_output_tokens)
+            else:
+                token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
+                max_tokens = self.model_setting.context_length - token_counts
         response = self._client.messages.create(
             model=self.model_setting.id,
             messages=messages,
@@ -196,7 +208,7 @@ class AnthropicChatClient(BaseChatClient):
             stream=self.stream,
             temperature=self.temperature,
             max_tokens=max_tokens,
-            tools=refactor_tool_use_params(tools) if tools else tools,
+            tools=tools_params,
             tool_choice=tool_choice,
         )
@@ -303,7 +315,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
         model: str | None = None,
         stream: bool | None = None,
         temperature: float | None = None,
-        max_tokens: int = 2000,
+        max_tokens: int | None = None,
         tools: list | NotGiven = NOT_GIVEN,
         tool_choice: str | NotGiven = NOT_GIVEN,
     ):
@@ -376,6 +388,19 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
                 api_key=self.endpoint.api_key,
                 base_url=self.endpoint.api_base,
             )
+        tools_params = refactor_tool_use_params(tools) if tools else tools
+        if max_tokens is None:
+            max_output_tokens = self.model_setting.max_output_tokens
+            if max_output_tokens is not None:
+                token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
+                max_tokens = self.model_setting.context_length - token_counts
+                max_tokens = min(max(max_tokens, 1), max_output_tokens)
+            else:
+                token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
+                max_tokens = self.model_setting.context_length - token_counts
         response = await self._client.messages.create(
             model=self.model_setting.id,
             messages=messages,
@@ -383,7 +408,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
             stream=self.stream,
             temperature=self.temperature,
             max_tokens=max_tokens,
-            tools=refactor_tool_use_params(tools) if tools else tools,
+            tools=tools_params,
             tool_choice=tool_choice,
         )

{vectorvein-0.1.7 → vectorvein-0.1.9}/src/vectorvein/chat_clients/base_client.py RENAMED Viewed

@@ -46,7 +46,7 @@ class BaseChatClient(ABC):
         model: str | None = None,
         stream: bool = False,
         temperature: float = 0.7,
-        max_tokens: int = 2000,
+        max_tokens: int | None = None,
         tools: list | NotGiven = NOT_GIVEN,
         tool_choice: str | NotGiven = NOT_GIVEN,
     ) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
@@ -57,7 +57,7 @@ class BaseChatClient(ABC):
         messages: list,
         model: str | None = None,
         temperature: float = 0.7,
-        max_tokens: int = 2000,
+        max_tokens: int | None = None,
         tools: list | NotGiven = NOT_GIVEN,
         tool_choice: str | NotGiven = NOT_GIVEN,
     ) -> Generator[ChatCompletionDeltaMessage, Any, None]:
@@ -107,7 +107,7 @@ class BaseAsyncChatClient(ABC):
         model: str | None = None,
         stream: bool = False,
         temperature: float = 0.7,
-        max_tokens: int = 2000,
+        max_tokens: int | None = None,
         tools: list | NotGiven = NOT_GIVEN,
         tool_choice: str | NotGiven = NOT_GIVEN,
     ) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, None]:
@@ -118,7 +118,7 @@ class BaseAsyncChatClient(ABC):
         messages: list,
         model: str | None = None,
         temperature: float = 0.7,
-        max_tokens: int = 2000,
+        max_tokens: int | None = None,
         tools: list | NotGiven = NOT_GIVEN,
         tool_choice: str | NotGiven = NOT_GIVEN,
     ) -> AsyncGenerator[ChatCompletionDeltaMessage, None]:

{vectorvein-0.1.7 → vectorvein-0.1.9}/src/vectorvein/chat_clients/gemini_client.py RENAMED Viewed

@@ -43,7 +43,7 @@ class GeminiChatClient(BaseChatClient):
         model: str | None = None,
         stream: bool | None = None,
         temperature: float | None = None,
-        max_tokens: int = 2000,
+        max_tokens: int | None = None,
         tools: list | None = None,
         tool_choice: str | None = None,
     ):
@@ -156,7 +156,16 @@ class GeminiChatClient(BaseChatClient):
                 if "text" in part:
                     result["content"] += part["text"]
                 elif "functionCall" in part:
-                    tool_calls.append(part["functionCall"])
+                    tool_call = {
+                        "index": 0,
+                        "id": "call_0",
+                        "function": {
+                            "arguments": json.dumps(part["functionCall"]["args"], ensure_ascii=False),
+                            "name": part["functionCall"]["name"],
+                        },
+                        "type": "function",
+                    }
+                    tool_calls.append(tool_call)
             if tool_calls:
                 result["tool_calls"] = tool_calls
@@ -194,7 +203,7 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
         model: str | None = None,
         stream: bool | None = None,
         temperature: float | None = None,
-        max_tokens: int = 2000,
+        max_tokens: int | None = None,
         tools: list | None = None,
         tool_choice: str | None = None,
     ):
@@ -310,7 +319,16 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
                     if "text" in part:
                         result["content"] += part["text"]
                     elif "functionCall" in part:
-                        tool_calls.append(part["functionCall"])
+                        tool_call = {
+                            "index": 0,
+                            "id": "call_0",
+                            "function": {
+                                "arguments": json.dumps(part["functionCall"]["args"], ensure_ascii=False),
+                                "name": part["functionCall"]["name"],
+                            },
+                            "type": "function",
+                        }
+                        tool_calls.append(tool_call)
                 if tool_calls:
                     result["tool_calls"] = tool_calls

{vectorvein-0.1.7 → vectorvein-0.1.9}/src/vectorvein/chat_clients/minimax_client.py RENAMED Viewed

@@ -7,8 +7,8 @@ import httpx
 from openai._types import NotGiven
 from ..settings import settings
-from .utils import cutoff_messages
 from ..types import defaults as defs
+from .utils import cutoff_messages, get_token_counts
 from .base_client import BaseChatClient, BaseAsyncChatClient
 from ..types.enums import ContextLengthControlType, BackendType
 from ..types.llm_parameters import ChatCompletionMessage, ChatCompletionDeltaMessage
@@ -66,7 +66,7 @@ class MiniMaxChatClient(BaseChatClient):
         model: str | None = None,
         stream: bool | None = None,
         temperature: float | None = None,
-        max_tokens: int = 2048,
+        max_tokens: int | None = None,
         tools: list | None = None,
         tool_choice: str = "auto",
     ):
@@ -93,7 +93,7 @@ class MiniMaxChatClient(BaseChatClient):
                 model=self.model_setting.id,
             )
-        if tools is not None:
+        if tools:
             tools_params = {
                 "tools": [
                     {
@@ -113,6 +113,16 @@ class MiniMaxChatClient(BaseChatClient):
         else:
             tools_params = {}
+        if max_tokens is None:
+            max_output_tokens = self.model_setting.max_output_tokens
+            if max_output_tokens is not None:
+                token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
+                max_tokens = self.model_setting.context_length - token_counts
+                max_tokens = min(max(max_tokens, 1), max_output_tokens)
+            else:
+                token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
+                max_tokens = self.model_setting.context_length - token_counts
         self.url = self.endpoint.api_base
         self.headers = {"Authorization": f"Bearer {self.endpoint.api_key}", "Content-Type": "application/json"}
@@ -139,22 +149,19 @@ class MiniMaxChatClient(BaseChatClient):
                 for chunk in response.iter_lines():
                     if chunk:
                         chunk_data = json.loads(chunk[6:])
+                        if chunk_data["object"] != "chat.completion.chunk":
+                            continue
                         tool_calls_params = extract_tool_calls(chunk_data)
                         has_tool_calls = True if tool_calls_params else False
                         if has_tool_calls:
-                            if "usage" not in chunk_data:
-                                continue
-                            else:
-                                yield ChatCompletionDeltaMessage(
-                                    **{
-                                        "content": chunk_data["choices"][0]["message"].get("content"),
-                                        "role": "assistant",
-                                        **tool_calls_params,
-                                    }
-                                )
+                            yield ChatCompletionDeltaMessage(
+                                **{
+                                    "content": chunk_data["choices"][0]["delta"].get("content"),
+                                    "role": "assistant",
+                                    **tool_calls_params,
+                                }
+                            )
                         else:
-                            if "usage" in chunk_data:
-                                continue
                             yield ChatCompletionDeltaMessage(
                                 **{
                                     "content": chunk_data["choices"][0]["delta"]["content"],
@@ -211,7 +218,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
         model: str | None = None,
         stream: bool | None = None,
         temperature: float | None = None,
-        max_tokens: int = 2048,
+        max_tokens: int | None = None,
         tools: list | None = None,
         tool_choice: str = "auto",
     ):
@@ -238,7 +245,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
                 model=self.model_setting.id,
             )
-        if tools is not None:
+        if tools:
             tools_params = {
                 "tools": [
                     {
@@ -256,6 +263,16 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
         else:
             tools_params = {}
+        if max_tokens is None:
+            max_output_tokens = self.model_setting.max_output_tokens
+            if max_output_tokens is not None:
+                token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
+                max_tokens = self.model_setting.context_length - token_counts
+                max_tokens = min(max(max_tokens, 1), max_output_tokens)
+            else:
+                token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
+                max_tokens = self.model_setting.context_length - token_counts
         self.url = self.endpoint.api_base
         self.headers = {"Authorization": f"Bearer {self.endpoint.api_key}", "Content-Type": "application/json"}
@@ -283,22 +300,19 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
                     async for chunk in response.aiter_lines():
                         if chunk:
                             chunk_data = json.loads(chunk[6:])
+                            if chunk_data["object"] != "chat.completion.chunk":
+                                continue
                             tool_calls_params = extract_tool_calls(chunk_data)
                             has_tool_calls = True if tool_calls_params else False
                             if has_tool_calls:
-                                if "usage" not in chunk_data:
-                                    continue
-                                else:
-                                    yield ChatCompletionDeltaMessage(
-                                        **{
-                                            "content": chunk_data["choices"][0]["message"].get("content"),
-                                            "role": "assistant",
-                                            **tool_calls_params,
-                                        }
-                                    )
+                                yield ChatCompletionDeltaMessage(
+                                    **{
+                                        "content": chunk_data["choices"][0]["delta"].get("content"),
+                                        "role": "assistant",
+                                        **tool_calls_params,
+                                    }
+                                )
                             else:
-                                if "usage" in chunk_data:
-                                    continue
                                 yield ChatCompletionDeltaMessage(
                                     **{
                                         "content": chunk_data["choices"][0]["delta"]["content"],

{vectorvein-0.1.7 → vectorvein-0.1.9}/src/vectorvein/chat_clients/openai_compatible_client.py RENAMED Viewed

@@ -11,6 +11,7 @@ from openai import OpenAI, AsyncOpenAI, AzureOpenAI, AsyncAzureOpenAI
 from .base_client import BaseChatClient, BaseAsyncChatClient
 from .utils import (
     cutoff_messages,
+    get_token_counts,
     ToolCallContentProcessor,
     generate_tool_use_system_prompt,
 )
@@ -50,7 +51,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
         model: str | None = None,
         stream: bool | None = None,
         temperature: float | None = None,
-        max_tokens: int = 2000,
+        max_tokens: int | None = None,
         tools: list | NotGiven = NOT_GIVEN,
         tool_choice: str | NotGiven = NOT_GIVEN,
     ):
@@ -102,6 +103,16 @@ class OpenAICompatibleChatClient(BaseChatClient):
         else:
             tools_params = {}
+        if max_tokens is None:
+            max_output_tokens = self.model_setting.max_output_tokens
+            if max_output_tokens is not None:
+                token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
+                max_tokens = self.model_setting.context_length - token_counts
+                max_tokens = min(max(max_tokens, 1), max_output_tokens)
+            else:
+                token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
+                max_tokens = self.model_setting.context_length - token_counts
         response: ChatCompletion | Stream[ChatCompletionChunk] = self._client.chat.completions.create(
             model=self.model_setting.id,
             messages=messages,
@@ -122,6 +133,9 @@ class OpenAICompatibleChatClient(BaseChatClient):
                     if not chunk.choices[0].delta:
                         continue
                     if self.model_setting.function_call_available:
+                        if chunk.choices[0].delta.tool_calls:
+                            for index, tool_call in enumerate(chunk.choices[0].delta.tool_calls):
+                                tool_call.index = index
                         yield ChatCompletionDeltaMessage(**chunk.choices[0].delta.model_dump())
                     else:
                         message = chunk.choices[0].delta.model_dump()
@@ -147,7 +161,8 @@ class OpenAICompatibleChatClient(BaseChatClient):
             if tools:
                 if self.model_setting.function_call_available and response.choices[0].message.tool_calls:
                     result["tool_calls"] = [
-                        tool_call.model_dump() for tool_call in response.choices[0].message.tool_calls
+                        {**tool_call.model_dump(), "type": "function"}
+                        for tool_call in response.choices[0].message.tool_calls
                     ]
                 else:
                     tool_call_content_processor = ToolCallContentProcessor(result["content"])
@@ -189,7 +204,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
         model: str | None = None,
         stream: bool | None = None,
         temperature: float | None = None,
-        max_tokens: int = 2000,
+        max_tokens: int | None = None,
         tools: list | NotGiven = NOT_GIVEN,
         tool_choice: str | NotGiven = NOT_GIVEN,
     ):
@@ -241,6 +256,16 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
         else:
             tools_params = {}
+        if max_tokens is None:
+            max_output_tokens = self.model_setting.max_output_tokens
+            if max_output_tokens is not None:
+                token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
+                max_tokens = self.model_setting.context_length - token_counts
+                max_tokens = min(max(max_tokens, 1), max_output_tokens)
+            else:
+                token_counts = get_token_counts({"messages": messages, "tools_params": tools_params})
+                max_tokens = self.model_setting.context_length - token_counts
         response: ChatCompletion | AsyncStream[ChatCompletionChunk] = await self._client.chat.completions.create(
             model=self.model_setting.id,
             messages=messages,
@@ -261,6 +286,9 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
                     if not chunk.choices[0].delta:
                         continue
                     if self.model_setting.function_call_available:
+                        if chunk.choices[0].delta.tool_calls:
+                            for index, tool_call in enumerate(chunk.choices[0].delta.tool_calls):
+                                tool_call.index = index
                         yield ChatCompletionDeltaMessage(**chunk.choices[0].delta.model_dump())
                     else:
                         message = chunk.choices[0].delta.model_dump()
@@ -286,7 +314,8 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
             if tools:
                 if self.model_setting.function_call_available and response.choices[0].message.tool_calls:
                     result["tool_calls"] = [
-                        tool_call.model_dump() for tool_call in response.choices[0].message.tool_calls
+                        {**tool_call.model_dump(), "type": "function"}
+                        for tool_call in response.choices[0].message.tool_calls
                     ]
                 else:
                     tool_call_content_processor = ToolCallContentProcessor(result["content"])

{vectorvein-0.1.7 → vectorvein-0.1.9}/src/vectorvein/chat_clients/utils.py RENAMED Viewed

@@ -3,8 +3,14 @@
 import re
 import json
+import httpx
 import tiktoken
+from anthropic import Anthropic
+from qwen_tokenizer import qwen_tokenizer
+from deepseek_tokenizer import deepseek_tokenizer
+from ..settings import settings
+from ..utilities.retry import Retry
 from ..types.enums import BackendType
 from ..utilities.media_processing import ImageProcessor
@@ -95,10 +101,88 @@ def get_token_counts(text: str | dict, model: str = "") -> int:
         text = str(text)
     if model == "gpt-3.5-turbo":
         return len(chatgpt_encoding.encode(text))
-    elif model == "gpt-4o":
+    elif model in ("gpt-4o", "gpt-4o-mini"):
         return len(gpt_4o_encoding.encode(text))
     elif model.startswith("abab"):
-        return int(len(text) / 1.33)
+        model_setting = settings.minimax.models[model]
+        if len(model_setting.endpoints) == 0:
+            return int(len(text) / 1.33)
+        endpoint_id = model_setting.endpoints[0]
+        endpoint = settings.get_endpoint(endpoint_id)
+        tokenize_url = "https://api.minimax.chat/v1/tokenize"
+        headers = {"Authorization": f"Bearer {endpoint.api_key}", "Content-Type": "application/json"}
+        request_body = {
+            "model": model,
+            "tokens_to_generate": 128,
+            "temperature": 0.2,
+            "messages": [
+                {"sender_type": "USER", "text": text},
+            ],
+        }
+        _, response = (
+            Retry(httpx.post)
+            .args(url=tokenize_url, headers=headers, json=request_body, timeout=None)
+            .retry_times(5)
+            .sleep_time(10)
+            .run()
+        )
+        response = response.json()
+        return response["segments_num"]
+    elif model in ("moonshot-v1-8k", "moonshot-v1-32k", "moonshot-v1-128k"):
+        model_setting = settings.moonshot.models[model]
+        if len(model_setting.endpoints) == 0:
+            return len(chatgpt_encoding.encode(text))
+        endpoint_id = model_setting.endpoints[0]
+        endpoint = settings.get_endpoint(endpoint_id)
+        tokenize_url = "https://api.moonshot.cn/v1/tokenizers/estimate-token-count"
+        headers = {"Content-Type": "application/json", "Authorization": f"Bearer {endpoint.api_key}"}
+        request_body = {
+            "model": model,
+            "messages": [
+                {"role": "user", "content": text},
+            ],
+        }
+        _, response = (
+            Retry(httpx.post)
+            .args(url=tokenize_url, headers=headers, json=request_body, timeout=None)
+            .retry_times(5)
+            .sleep_time(10)
+            .run()
+        )
+        response = response.json()
+        return response["data"]["total_tokens"]
+    elif model.startswith("gemini"):
+        model_setting = settings.gemini.models[model]
+        if len(model_setting.endpoints) == 0:
+            return len(chatgpt_encoding.encode(text))
+        endpoint_id = model_setting.endpoints[0]
+        endpoint = settings.get_endpoint(endpoint_id)
+        url = f"{endpoint.api_base}/models/{model_setting.id}:countTokens"
+        params = {"key": endpoint.api_key}
+        request_body = {
+            "contents": {
+                "role": "USER",
+                "parts": [
+                    {"text": "TEXT"},
+                ],
+            },
+        }
+        _, response = (
+            Retry(httpx.post)
+            .args(url, json=request_body, params=params, timeout=None)
+            .retry_times(5)
+            .sleep_time(10)
+            .run()
+        )
+        result = response.json()
+        return result["totalTokens"]
+    elif model.startswith("claude"):
+        return Anthropic().count_tokens(text)
+    elif model.startswith("deepseek"):
+        return len(deepseek_tokenizer.encode(text))
+    elif model.startswith("qwen"):
+        return len(qwen_tokenizer.encode(text))
     else:
         return len(chatgpt_encoding.encode(text))

vectorvein 0.1.7__tar.gz → 0.1.9__tar.gz

vectorvein 0.1.7tar.gz → 0.1.9tar.gz