PyPI - vectorvein - Versions diffs - 0.1.88__py3-none-any.whl → 0.1.89__py3-none-any.whl - Mend

vectorvein 0.1.88py3-none-any.whl → 0.1.89py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

vectorvein/chat_clients/anthropic_client.py +4 -0
vectorvein/chat_clients/base_client.py +121 -2
vectorvein/chat_clients/gemini_client.py +9 -523
vectorvein/chat_clients/openai_compatible_client.py +4 -0
vectorvein/chat_clients/utils.py +34 -116
vectorvein/settings/__init__.py +30 -1
vectorvein/types/defaults.py +30 -6
vectorvein/types/llm_parameters.py +4 -1
vectorvein/utilities/rate_limiter.py +312 -0
{vectorvein-0.1.88.dist-info → vectorvein-0.1.89.dist-info}/METADATA +6 -1
{vectorvein-0.1.88.dist-info → vectorvein-0.1.89.dist-info}/RECORD +13 -12
{vectorvein-0.1.88.dist-info → vectorvein-0.1.89.dist-info}/WHEEL +0 -0
{vectorvein-0.1.88.dist-info → vectorvein-0.1.89.dist-info}/entry_points.txt +0 -0

vectorvein/chat_clients/utils.py CHANGED Viewed

@@ -83,20 +83,6 @@ class ToolCallContentProcessor:
             return {}
-def get_assistant_role_key(backend: BackendType) -> str:
-    if backend == BackendType.Gemini:
-        return "model"
-    else:
-        return "assistant"
-def get_content_key(backend: BackendType) -> str:
-    if backend == BackendType.Gemini:
-        return "parts"
-    else:
-        return "content"
 def convert_type(value, value_type):
     if value_type == "string":
         return str(value)
@@ -141,9 +127,9 @@ def get_token_counts(text: str | dict, model: str = "", use_token_server_first:
         text = str(text)
     if model == "gpt-3.5-turbo":
         return len(get_gpt_35_encoding().encode(text))
-    elif model in ("gpt-4o", "gpt-4o-mini"):
+    elif model.startswith(("gpt-4o", "o1-")):
         return len(get_gpt_4o_encoding().encode(text))
-    elif model.startswith("abab"):
+    elif model.startswith(("abab", "MiniMax")):
         model_setting = settings.minimax.models[model]
         if len(model_setting.endpoints) == 0:
             return int(len(text) / 1.33)
@@ -201,10 +187,6 @@ def get_token_counts(text: str | dict, model: str = "", use_token_server_first:
         result = response.json()
         return result["data"]["total_tokens"]
     elif model.startswith("gemini"):
-        # TODO: gemini-exp-1206 暂时不支持，使用 gemini-1.5-flash 代替
-        if model in ("gemini-exp-1206", "gemini-2.0-flash-exp", "gemini-2.0-flash-thinking-exp-1219"):
-            model = "gemini-1.5-flash"
         model_setting = settings.gemini.models[model]
         if len(model_setting.endpoints) == 0:
             return len(get_gpt_35_encoding().encode(text))
@@ -213,7 +195,12 @@ def get_token_counts(text: str | dict, model: str = "", use_token_server_first:
             endpoint_id = endpoint_id["endpoint_id"]
         endpoint = settings.get_endpoint(endpoint_id)
-        base_url = f"{endpoint.api_base}/models/{model_setting.id}:countTokens"
+        api_base = (
+            endpoint.api_base.removesuffix("/openai/")
+            if endpoint.api_base
+            else "https://generativelanguage.googleapis.com/v1beta"
+        )
+        base_url = f"{api_base}/models/{model_setting.id}:countTokens"
         params = {"key": endpoint.api_key}
         request_body = {
             "contents": {
@@ -304,7 +291,7 @@ def get_token_counts(text: str | dict, model: str = "", use_token_server_first:
         endpoint = settings.get_endpoint(endpoint_id)
         if model not in ("glm-4-plus", "glm-4-long", "glm-4-0520", "glm-4-air", "glm-4-flash"):
             model = "glm-4-plus"
-        tokenize_url = f"{endpoint.api_base}/tokenizer"
+        tokenize_url = f"{endpoint.api_base or 'https://open.bigmodel.cn/api/paas/v4'}/tokenizer"
         headers = {"Content-Type": "application/json", "Authorization": f"Bearer {endpoint.api_key}"}
         request_body = {
             "model": model,
@@ -395,7 +382,7 @@ def cutoff_messages(
         return messages
     messages_length = 0
-    content_key = get_content_key(backend)
+    content_key = "content"
     # 先检查并保留第一条system消息（如果有）
     system_message = None
@@ -440,21 +427,14 @@ def cutoff_messages(
             continue
         if index == 0:
             # 一条消息就超过长度则将该消息内容进行截断，保留该消息最后的一部分
-            if backend == BackendType.Gemini:
-                return system_message + [
-                    {
-                        "role": message["role"],
-                        content_key: [{"text": message[content_key][-max_count:]}],
-                    }
-                ]
-            else:
-                content = message[content_key][max_count - messages_length :]
-                return system_message + [
-                    {
-                        "role": message["role"],
-                        content_key: content,
-                    }
-                ]
+            content = message[content_key][max_count - messages_length :]
+            return system_message + [
+                {
+                    "role": message["role"],
+                    content_key: content,
+                }
+            ]
         return system_message + messages[-index:]
     return system_message + messages
@@ -477,13 +457,6 @@ def format_image_message(image: str, backend: BackendType = BackendType.OpenAI)
                 "data": image_processor.base64_image,
             },
         }
-    elif backend == BackendType.Gemini:
-        return {
-            "inline_data": {
-                "mime_type": image_processor.mime_type,
-                "data": image_processor.base64_image,
-            }
-        }
     else:
         return {
             "type": "image_url",
@@ -495,7 +468,7 @@ def format_workflow_messages(message: dict, content: str, backend: BackendType):
     formatted_messages = []
     # 工具调用消息
-    if backend in (BackendType.OpenAI, BackendType.ZhiPuAI, BackendType.Mistral, BackendType.Yi):
+    if backend in (BackendType.OpenAI, BackendType.ZhiPuAI, BackendType.Mistral, BackendType.Yi, BackendType.Gemini):
         tool_call_message = {
             "content": None,
             "role": "assistant",
@@ -524,20 +497,6 @@ def format_workflow_messages(message: dict, content: str, backend: BackendType):
         }
         if content:
             tool_call_message["content"].insert(0, {"type": "text", "text": content})
-    elif backend == BackendType.Gemini:
-        tool_call_message = {
-            "role": "model",
-            "parts": [
-                {
-                    "functionCall": {
-                        "name": message["metadata"]["selected_workflow"]["function_name"],
-                        "args": message["metadata"]["selected_workflow"]["params"],
-                    }
-                },
-            ],
-        }
-        if content:
-            tool_call_message["parts"].insert(0, {"text": content})
     else:
         tool_call_message = {
             "content": json.dumps(
@@ -552,7 +511,7 @@ def format_workflow_messages(message: dict, content: str, backend: BackendType):
     formatted_messages.append(tool_call_message)
     # 工具调用结果消息
-    if backend in (BackendType.OpenAI, BackendType.ZhiPuAI, BackendType.Mistral, BackendType.Yi):
+    if backend in (BackendType.OpenAI, BackendType.ZhiPuAI, BackendType.Mistral, BackendType.Yi, BackendType.Gemini):
         tool_call_result_message = {
             "role": "tool",
             "tool_call_id": message["metadata"]["selected_workflow"]["tool_call_id"],
@@ -570,21 +529,6 @@ def format_workflow_messages(message: dict, content: str, backend: BackendType):
                 }
             ],
         }
-    elif backend == BackendType.Gemini:
-        tool_call_result_message = {
-            "role": "function",
-            "parts": [
-                {
-                    "functionResponse": {
-                        "name": message["metadata"]["selected_workflow"]["function_name"],
-                        "response": {
-                            "name": message["metadata"]["selected_workflow"]["function_name"],
-                            "content": message["metadata"].get("workflow_result", ""),
-                        },
-                    }
-                }
-            ],
-        }
     else:
         tool_call_result_message = {
             "role": "user",
@@ -598,7 +542,7 @@ def format_workflow_messages(message: dict, content: str, backend: BackendType):
         }
     formatted_messages.append(tool_call_result_message)
-    if content and backend not in (BackendType.Mistral, BackendType.Anthropic, BackendType.Gemini):
+    if content and backend not in (BackendType.Mistral, BackendType.Anthropic):
         formatted_messages.append({"role": "assistant", "content": content})
     return formatted_messages
@@ -608,21 +552,7 @@ def transform_from_openai_message(message: ChatCompletionMessageParam, backend:
     role = message.get("role", "user")
     content = message.get("content", "")
-    if backend == BackendType.Gemini:
-        if isinstance(content, list):
-            parts = []
-            for item in content:
-                if isinstance(item, str):
-                    parts.append({"text": item})
-                elif isinstance(item, dict) and "type" in item:
-                    if item["type"] == "image":
-                        parts.append({"image": item["image"]})
-                    elif item["type"] == "text":
-                        parts.append({"text": item["text"]})
-            return {"role": "user" if role == "user" else "model", "parts": parts}
-        else:
-            return {"role": "user" if role == "user" else "model", "parts": [{"text": content}]}
-    elif backend == BackendType.Anthropic:
+    if backend == BackendType.Anthropic:
         if isinstance(content, list):
             formatted_content = []
             for item in content:
@@ -663,7 +593,7 @@ def format_messages(
             # 处理 VectorVein 格式的消息
             content = message["content"]["text"]
             if message["content_type"] == "TXT":
-                role = "user" if message["author_type"] == "U" else get_assistant_role_key(backend)
+                role = "user" if message["author_type"] == "U" else "assistant"
                 formatted_message = format_text_message(
                     content, role, message.get("attachments", []), backend, native_multimodal
                 )
@@ -693,31 +623,19 @@ def format_text_message(
         content += "\n".join([f"- {attachment}" for attachment in attachments])
     if native_multimodal and has_images:
-        if backend == BackendType.Gemini:
-            parts = [{"text": content}]
-            for attachment in attachments:
-                if attachment.lower().endswith(images_extensions):
-                    parts.append(format_image_message(image=attachment, backend=backend))
-            return {"role": role, "parts": parts}
-        else:
-            return {
-                "role": role,
-                "content": [
-                    {"type": "text", "text": content},
-                    *[
-                        format_image_message(image=attachment, backend=backend)
-                        for attachment in attachments
-                        if attachment.lower().endswith(images_extensions)
-                    ],
+        return {
+            "role": role,
+            "content": [
+                {"type": "text", "text": content},
+                *[
+                    format_image_message(image=attachment, backend=backend)
+                    for attachment in attachments
+                    if attachment.lower().endswith(images_extensions)
                 ],
-            }
+            ],
+        }
     else:
-        if backend == BackendType.Gemini:
-            return {"role": role, "parts": [{"text": content}]}
-        elif backend == BackendType.Anthropic:
-            return {"role": role, "content": content}
-        else:
-            return {"role": role, "content": content}
+        return {"role": role, "content": content}
 def generate_tool_use_system_prompt(tools: list | str, format_type: str = "json") -> str:

vectorvein/settings/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # @Author: Bi Ying
 # @Date:   2024-07-27 00:30:56
-from typing import List, Dict, Optional
+from typing import List, Dict, Optional, Literal
 from pydantic import BaseModel, Field
@@ -9,6 +9,26 @@ from ..types.enums import BackendType
 from ..types.llm_parameters import BackendSettings, EndpointSetting
+class RedisConfig(BaseModel):
+    host: str = "localhost"
+    port: int = 6379
+    db: int = 0
+class DiskCacheConfig(BaseModel):
+    cache_dir: str = ".rate_limit_cache"
+class RateLimitConfig(BaseModel):
+    enabled: bool = False
+    backend: Literal["memory", "redis", "diskcache"] = "memory"
+    redis: Optional[RedisConfig] = Field(default=None)
+    diskcache: Optional[DiskCacheConfig] = Field(default=None)
+    default_rpm: int = 60
+    default_tpm: int = 1000000
 class Server(BaseModel):
     host: str
     port: int
@@ -20,6 +40,7 @@ class Settings(BaseModel):
         default_factory=list, description="Available endpoints for the LLM service."
     )
     token_server: Optional[Server] = Field(default=None, description="Token server address. Format: host:port")
+    rate_limit: Optional[RateLimitConfig] = Field(default=None, description="Rate limit settings.")
     anthropic: BackendSettings = Field(default_factory=BackendSettings, description="Anthropic models settings.")
     deepseek: BackendSettings = Field(default_factory=BackendSettings, description="Deepseek models settings.")
@@ -63,6 +84,14 @@ class Settings(BaseModel):
             else:
                 data[model_type] = BackendSettings(models=default_models)
+        for endpoint in data.get("endpoints", []):
+            if not endpoint.get("api_base"):
+                continue
+            api_base = endpoint["api_base"]
+            if api_base.startswith("https://generativelanguage.googleapis.com/v1beta"):
+                if not api_base.endswith("openai/"):
+                    endpoint["api_base"] = api_base.strip("/") + "/openai/"
         super().__init__(**data)
     def load(self, settings_dict: Dict):

vectorvein/types/defaults.py CHANGED Viewed

@@ -13,6 +13,7 @@ ENDPOINT_TPM: Final[int] = 300000
 MODEL_CONTEXT_LENGTH: Final[int] = 32768
 # Moonshot models
+MOONSHOT_DEFAULT_MODEL: Final[str] = "moonshot-v1-8k"
 MOONSHOT_MODELS: Final[Dict[str, Dict[str, Any]]] = {
     "moonshot-v1-8k": {
         "id": "moonshot-v1-8k",
@@ -33,9 +34,9 @@ MOONSHOT_MODELS: Final[Dict[str, Dict[str, Any]]] = {
         "response_format_available": True,
     },
 }
-MOONSHOT_DEFAULT_MODEL: Final[str] = "moonshot-v1-8k"
 # Deepseek models
+DEEPSEEK_DEFAULT_MODEL: Final[str] = "deepseek-chat"
 DEEPSEEK_MODELS: Final[Dict[str, Dict[str, Any]]] = {
     "deepseek-chat": {
         "id": "deepseek-chat",
@@ -52,9 +53,9 @@ DEEPSEEK_MODELS: Final[Dict[str, Dict[str, Any]]] = {
         "response_format_available": False,
     },
 }
-DEEPSEEK_DEFAULT_MODEL: Final[str] = "deepseek-chat"
 # Baichuan models
+BAICHUAN_DEFAULT_MODEL: Final[str] = "Baichuan3-Turbo"
 BAICHUAN_MODELS: Final[Dict[str, Dict[str, Any]]] = {
     "Baichuan4": {
         "id": "Baichuan4",
@@ -92,7 +93,6 @@ BAICHUAN_MODELS: Final[Dict[str, Dict[str, Any]]] = {
         "response_format_available": False,
     },
 }
-BAICHUAN_DEFAULT_MODEL: Final[str] = "Baichuan3-Turbo"
 # Groq models
 GROQ_DEFAULT_MODEL: Final[str] = "llama3-70b-8192"
@@ -617,7 +617,7 @@ ANTHROPIC_MODELS: Final[Dict[str, Dict[str, Any]]] = {
 }
 # Minimax models
-MINIMAX_DEFAULT_MODEL: Final[str] = "abab6.5s-chat"
+MINIMAX_DEFAULT_MODEL: Final[str] = "MiniMax-Text-01"
 MINIMAX_MODELS: Final[Dict[str, Dict[str, Any]]] = {
     "abab5-chat": {
         "id": "abab5-chat",
@@ -664,7 +664,7 @@ MINIMAX_MODELS: Final[Dict[str, Dict[str, Any]]] = {
 }
 # Gemini models
-GEMINI_DEFAULT_MODEL: Final[str] = "gemini-1.5-pro"
+GEMINI_DEFAULT_MODEL: Final[str] = "gemini-2.0-flash"
 GEMINI_MODELS: Final[Dict[str, Dict[str, Any]]] = {
     "gemini-1.5-pro": {
         "id": "gemini-1.5-pro",
@@ -674,6 +674,14 @@ GEMINI_MODELS: Final[Dict[str, Dict[str, Any]]] = {
         "response_format_available": True,
         "native_multimodal": True,
     },
+    "gemini-2.0-pro-exp-02-05": {
+        "id": "gemini-2.0-pro-exp-02-05",
+        "context_length": 2097152,
+        "max_output_tokens": 8192,
+        "function_call_available": True,
+        "response_format_available": True,
+        "native_multimodal": True,
+    },
     "gemini-1.5-flash": {
         "id": "gemini-1.5-flash",
         "context_length": 1048576,
@@ -682,6 +690,22 @@ GEMINI_MODELS: Final[Dict[str, Dict[str, Any]]] = {
         "response_format_available": True,
         "native_multimodal": True,
     },
+    "gemini-2.0-flash": {
+        "id": "gemini-2.0-flash",
+        "context_length": 1048576,
+        "max_output_tokens": 8192,
+        "function_call_available": True,
+        "response_format_available": True,
+        "native_multimodal": True,
+    },
+    "gemini-2.0-flash-lite-preview-02-05": {
+        "id": "gemini-2.0-flash-lite-preview-02-05",
+        "context_length": 1048576,
+        "max_output_tokens": 8192,
+        "function_call_available": True,
+        "response_format_available": True,
+        "native_multimodal": True,
+    },
     "gemini-2.0-flash-exp": {
         "id": "gemini-2.0-flash-exp",
         "context_length": 1048576,
@@ -819,7 +843,7 @@ STEPFUN_MODELS: Final[Dict[str, Dict[str, Any]]] = {
 }
-XAI_DEFAULT_MODEL: Final[str] = "grok-beta"
+XAI_DEFAULT_MODEL: Final[str] = "grok-2-latest"
 XAI_MODELS: Final[Dict[str, Dict[str, Any]]] = {
     "grok-beta": {
         "id": "grok-beta",

vectorvein/types/llm_parameters.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # @Author: Bi Ying
 # @Date:   2024-07-26 23:48:04
-from typing import List, Dict, Optional, Union, Iterable
+from typing import List, Dict, Optional, Union, Iterable, NotRequired
 from typing_extensions import TypedDict  # Required by pydantic under Python < 3.12
 from pydantic import BaseModel, Field
@@ -24,6 +24,9 @@ from . import defaults as defs
 class EndpointOptionDict(TypedDict):
     endpoint_id: str
     model_id: str
+    rpm: NotRequired[int]
+    tpm: NotRequired[int]
+    concurrent_requests: NotRequired[int]
 class EndpointSetting(BaseModel):

vectorvein 0.1.88__py3-none-any.whl → 0.1.89__py3-none-any.whl

vectorvein 0.1.88py3-none-any.whl → 0.1.89py3-none-any.whl