PyPI - vectorvein - Versions diffs - 0.1.87__py3-none-any.whl → 0.1.89__py3-none-any.whl - Mend

vectorvein 0.1.87py3-none-any.whl → 0.1.89py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

vectorvein/chat_clients/anthropic_client.py +4 -0
vectorvein/chat_clients/base_client.py +121 -2
vectorvein/chat_clients/gemini_client.py +9 -523
vectorvein/chat_clients/openai_compatible_client.py +16 -12
vectorvein/chat_clients/utils.py +34 -116
vectorvein/settings/__init__.py +30 -1
vectorvein/types/defaults.py +30 -6
vectorvein/types/llm_parameters.py +4 -1
vectorvein/utilities/rate_limiter.py +312 -0
{vectorvein-0.1.87.dist-info → vectorvein-0.1.89.dist-info}/METADATA +6 -1
{vectorvein-0.1.87.dist-info → vectorvein-0.1.89.dist-info}/RECORD +13 -12
{vectorvein-0.1.87.dist-info → vectorvein-0.1.89.dist-info}/WHEEL +0 -0
{vectorvein-0.1.87.dist-info → vectorvein-0.1.89.dist-info}/entry_points.txt +0 -0

vectorvein/chat_clients/gemini_client.py CHANGED Viewed

@@ -1,527 +1,13 @@
-# @Author: Bi Ying
-# @Date:   2024-06-17 23:47:49
-import json
-from functools import cached_property
-from typing import Iterable, Literal, Generator, AsyncGenerator, overload, Any
+from ..types.enums import BackendType
+from ..types.defaults import GEMINI_DEFAULT_MODEL
+from .openai_compatible_client import OpenAICompatibleChatClient, AsyncOpenAICompatibleChatClient
-import httpx
-from .utils import cutoff_messages
-from ..types import defaults as defs
-from .base_client import BaseChatClient, BaseAsyncChatClient
-from ..types.enums import ContextLengthControlType, BackendType
-from ..types.llm_parameters import (
-    NotGiven,
-    NOT_GIVEN,
-    ToolParam,
-    ToolChoice,
-    ChatCompletionMessage,
-    ChatCompletionDeltaMessage,
-    ChatCompletionStreamOptionsParam,
-)
+class GeminiChatClient(OpenAICompatibleChatClient):
+    DEFAULT_MODEL = GEMINI_DEFAULT_MODEL
+    BACKEND_NAME = BackendType.Gemini
-class GeminiChatClient(BaseChatClient):
-    DEFAULT_MODEL: str = defs.GEMINI_DEFAULT_MODEL
-    BACKEND_NAME: BackendType = BackendType.Gemini
-    def __init__(
-        self,
-        model: str = defs.GEMINI_DEFAULT_MODEL,
-        stream: bool = True,
-        temperature: float | None | NotGiven = NOT_GIVEN,
-        context_length_control: ContextLengthControlType = defs.CONTEXT_LENGTH_CONTROL,
-        random_endpoint: bool = True,
-        endpoint_id: str = "",
-        http_client: httpx.Client | None = None,
-        backend_name: str | None = None,
-    ):
-        super().__init__(
-            model,
-            stream,
-            temperature,
-            context_length_control,
-            random_endpoint,
-            endpoint_id,
-            http_client,
-            backend_name,
-        )
-        self.model_id = None
-        self.endpoint = None
-    @cached_property
-    def raw_client(self):
-        self.endpoint, self.model_id = self._set_endpoint()
-        if not self.http_client:
-            self.http_client = httpx.Client(timeout=300, proxy=self.endpoint.proxy)
-        return self.http_client
-    @overload
-    def create_completion(
-        self,
-        *,
-        messages: list,
-        model: str | None = None,
-        stream: Literal[False] = False,
-        temperature: float | None | NotGiven = NOT_GIVEN,
-        max_tokens: int | None = None,
-        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
-        tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
-        response_format: dict | None = None,
-        stream_options: ChatCompletionStreamOptionsParam | None = None,
-        top_p: float | NotGiven | None = NOT_GIVEN,
-        skip_cutoff: bool = False,
-        **kwargs,
-    ) -> ChatCompletionMessage:
-        pass
-    @overload
-    def create_completion(
-        self,
-        *,
-        messages: list,
-        model: str | None = None,
-        stream: Literal[True],
-        temperature: float | None | NotGiven = NOT_GIVEN,
-        max_tokens: int | None = None,
-        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
-        tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
-        response_format: dict | None = None,
-        stream_options: ChatCompletionStreamOptionsParam | None = None,
-        top_p: float | NotGiven | None = NOT_GIVEN,
-        skip_cutoff: bool = False,
-        **kwargs,
-    ) -> Generator[ChatCompletionDeltaMessage, None, None]:
-        pass
-    @overload
-    def create_completion(
-        self,
-        *,
-        messages: list,
-        model: str | None = None,
-        stream: bool,
-        temperature: float | None | NotGiven = NOT_GIVEN,
-        max_tokens: int | None = None,
-        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
-        tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
-        response_format: dict | None = None,
-        stream_options: ChatCompletionStreamOptionsParam | None = None,
-        top_p: float | NotGiven | None = NOT_GIVEN,
-        skip_cutoff: bool = False,
-        **kwargs,
-    ) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
-        pass
-    def create_completion(
-        self,
-        *,
-        messages: list,
-        model: str | None = None,
-        stream: Literal[False] | Literal[True] = False,
-        temperature: float | None | NotGiven = NOT_GIVEN,
-        max_tokens: int | None = None,
-        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
-        tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
-        response_format: dict | None = None,
-        stream_options: ChatCompletionStreamOptionsParam | None = None,
-        top_p: float | NotGiven | None = NOT_GIVEN,
-        skip_cutoff: bool = False,
-        **kwargs,
-    ):
-        if model is not None:
-            self.model = model
-        if stream is not None:
-            self.stream = stream
-        if temperature is not None:
-            self.temperature = temperature
-        self.model_setting = self.backend_settings.models[self.model]
-        if self.model_id is None:
-            self.model_id = self.model_setting.id
-        self.endpoint, self.model_id = self._set_endpoint()
-        if messages[0].get("role") == "system":
-            system_prompt = messages[0]["content"]
-            messages = messages[1:]
-        else:
-            system_prompt = ""
-        if not skip_cutoff and self.context_length_control == ContextLengthControlType.Latest:
-            messages = cutoff_messages(
-                messages,
-                max_count=self.model_setting.context_length,
-                backend=self.BACKEND_NAME,
-                model=self.model_setting.id,
-            )
-        tools_params = {}
-        if tools:
-            tools_params = {"tools": [{"function_declarations": [tool["function"] for tool in tools]}]}
-        response_format_params = {}
-        if response_format is not None:
-            if response_format.get("type") == "json_object":
-                response_format_params = {"response_mime_type": "application/json"}
-        top_p_params = {}
-        if top_p:
-            top_p_params = {"top_p": top_p}
-        temperature_params = {}
-        if temperature:
-            temperature_params = {"temperature": temperature}
-        request_body = {
-            "contents": messages,
-            "safetySettings": [
-                {
-                    "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
-                    "threshold": "BLOCK_ONLY_HIGH",
-                }
-            ],
-            "generationConfig": {
-                "maxOutputTokens": max_tokens,
-                **temperature_params,
-                **top_p_params,
-                **response_format_params,
-            },
-            **tools_params,
-            **kwargs,
-        }
-        if system_prompt:
-            request_body["systemInstruction"] = {"parts": [{"text": system_prompt}]}
-        headers = {"Content-Type": "application/json"}
-        params = {"key": self.endpoint.api_key}
-        if self.stream:
-            url = f"{self.endpoint.api_base}/models/{self.model_setting.id}:streamGenerateContent"
-            params["alt"] = "sse"
-            def generator():
-                result = {"content": "", "tool_calls": [], "usage": {}}
-                client = self.raw_client
-                with client.stream("POST", url, headers=headers, params=params, json=request_body) as response:
-                    for chunk in response.iter_lines():
-                        message = {"content": "", "tool_calls": []}
-                        if not chunk.startswith("data:"):
-                            continue
-                        data = json.loads(chunk[5:])
-                        chunk_content = data["candidates"][0]["content"]["parts"][0]
-                        if "text" in chunk_content:
-                            message["content"] = chunk_content["text"]
-                            result["content"] += message["content"]
-                        elif "functionCall" in chunk_content:
-                            message["tool_calls"] = [
-                                {
-                                    "index": 0,
-                                    "id": "call_0",
-                                    "function": {
-                                        "arguments": json.dumps(
-                                            chunk_content["functionCall"]["args"], ensure_ascii=False
-                                        ),
-                                        "name": chunk_content["functionCall"]["name"],
-                                    },
-                                    "type": "function",
-                                }
-                            ]
-                        result["usage"] = message["usage"] = {
-                            "prompt_tokens": data["usageMetadata"].get("promptTokenCount", 0),
-                            "completion_tokens": data["usageMetadata"].get("candidatesTokenCount", 0),
-                            "total_tokens": data["usageMetadata"].get("totalTokenCount", 0),
-                        }
-                        yield ChatCompletionDeltaMessage(**message)
-            return generator()
-        else:
-            url = f"{self.endpoint.api_base}/models/{self.model_setting.id}:generateContent"
-            client = self.raw_client
-            response = client.post(url, json=request_body, headers=headers, params=params, timeout=None).json()
-            if "error" in response:
-                raise Exception(response["error"])
-            result = {
-                "content": "",
-                "usage": {
-                    "prompt_tokens": response.get("usageMetadata", {}).get("promptTokenCount", 0),
-                    "completion_tokens": response.get("usageMetadata", {}).get("candidatesTokenCount", 0),
-                    "total_tokens": response.get("usageMetadata", {}).get("totalTokenCount", 0),
-                },
-            }
-            tool_calls = []
-            for part in response["candidates"][0]["content"]["parts"]:
-                if "text" in part:
-                    result["content"] += part["text"]
-                elif "functionCall" in part:
-                    tool_call = {
-                        "index": 0,
-                        "id": "call_0",
-                        "function": {
-                            "arguments": json.dumps(part["functionCall"]["args"], ensure_ascii=False),
-                            "name": part["functionCall"]["name"],
-                        },
-                        "type": "function",
-                    }
-                    tool_calls.append(tool_call)
-            if tool_calls:
-                result["tool_calls"] = tool_calls
-            return ChatCompletionMessage(**result)
-class AsyncGeminiChatClient(BaseAsyncChatClient):
-    DEFAULT_MODEL: str = defs.GEMINI_DEFAULT_MODEL
-    BACKEND_NAME: BackendType = BackendType.Gemini
-    def __init__(
-        self,
-        model: str = defs.GEMINI_DEFAULT_MODEL,
-        stream: bool = True,
-        temperature: float | None | NotGiven = NOT_GIVEN,
-        context_length_control: ContextLengthControlType = defs.CONTEXT_LENGTH_CONTROL,
-        random_endpoint: bool = True,
-        endpoint_id: str = "",
-        http_client: httpx.AsyncClient | None = None,
-        backend_name: str | None = None,
-    ):
-        super().__init__(
-            model,
-            stream,
-            temperature,
-            context_length_control,
-            random_endpoint,
-            endpoint_id,
-            http_client,
-            backend_name,
-        )
-        self.model_id = None
-        self.endpoint = None
-    @cached_property
-    def raw_client(self):
-        self.endpoint, self.model_id = self._set_endpoint()
-        if not self.http_client:
-            self.http_client = httpx.AsyncClient(timeout=300, proxy=self.endpoint.proxy)
-        return self.http_client
-    @overload
-    async def create_completion(
-        self,
-        *,
-        messages: list,
-        model: str | None = None,
-        stream: Literal[False] = False,
-        temperature: float | None | NotGiven = NOT_GIVEN,
-        max_tokens: int | None = None,
-        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
-        tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
-        response_format: dict | None = None,
-        stream_options: ChatCompletionStreamOptionsParam | None = None,
-        top_p: float | NotGiven | None = NOT_GIVEN,
-        skip_cutoff: bool = False,
-        **kwargs,
-    ) -> ChatCompletionMessage:
-        pass
-    @overload
-    async def create_completion(
-        self,
-        *,
-        messages: list,
-        model: str | None = None,
-        stream: Literal[True],
-        temperature: float | None | NotGiven = NOT_GIVEN,
-        max_tokens: int | None = None,
-        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
-        tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
-        response_format: dict | None = None,
-        stream_options: ChatCompletionStreamOptionsParam | None = None,
-        top_p: float | NotGiven | None = NOT_GIVEN,
-        skip_cutoff: bool = False,
-        **kwargs,
-    ) -> AsyncGenerator[ChatCompletionDeltaMessage, Any]:
-        pass
-    @overload
-    async def create_completion(
-        self,
-        *,
-        messages: list,
-        model: str | None = None,
-        stream: bool,
-        temperature: float | None | NotGiven = NOT_GIVEN,
-        max_tokens: int | None = None,
-        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
-        tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
-        response_format: dict | None = None,
-        stream_options: ChatCompletionStreamOptionsParam | None = None,
-        top_p: float | NotGiven | None = NOT_GIVEN,
-        skip_cutoff: bool = False,
-        **kwargs,
-    ) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, Any]:
-        pass
-    async def create_completion(
-        self,
-        *,
-        messages: list,
-        model: str | None = None,
-        stream: Literal[False] | Literal[True] = False,
-        temperature: float | None | NotGiven = NOT_GIVEN,
-        max_tokens: int | None = None,
-        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
-        tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
-        response_format: dict | None = None,
-        stream_options: ChatCompletionStreamOptionsParam | None = None,
-        top_p: float | NotGiven | None = NOT_GIVEN,
-        skip_cutoff: bool = False,
-        **kwargs,
-    ):
-        if model is not None:
-            self.model = model
-        if stream is not None:
-            self.stream = stream
-        if temperature is not None:
-            self.temperature = temperature
-        self.model_setting = self.backend_settings.models[self.model]
-        if self.model_id is None:
-            self.model_id = self.model_setting.id
-        self.endpoint, self.model_id = self._set_endpoint()
-        if messages[0].get("role") == "system":
-            system_prompt = messages[0]["content"]
-            messages = messages[1:]
-        else:
-            system_prompt = ""
-        if not skip_cutoff and self.context_length_control == ContextLengthControlType.Latest:
-            messages = cutoff_messages(
-                messages,
-                max_count=self.model_setting.context_length,
-                backend=self.BACKEND_NAME,
-                model=self.model_setting.id,
-            )
-        tools_params = {}
-        if tools:
-            tools_params = {"tools": [{"function_declarations": [tool["function"] for tool in tools]}]}
-        response_format_params = {}
-        if response_format is not None:
-            if response_format.get("type") == "json_object":
-                response_format_params = {"response_mime_type": "application/json"}
-        top_p_params = {}
-        if top_p:
-            top_p_params = {"top_p": top_p}
-        temperature_params = {}
-        if temperature:
-            temperature_params = {"temperature": temperature}
-        request_body = {
-            "contents": messages,
-            "safetySettings": [
-                {
-                    "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
-                    "threshold": "BLOCK_ONLY_HIGH",
-                }
-            ],
-            "generationConfig": {
-                "maxOutputTokens": max_tokens,
-                **temperature_params,
-                **top_p_params,
-                **response_format_params,
-            },
-            **tools_params,
-            **kwargs,
-        }
-        if system_prompt:
-            request_body["systemInstruction"] = {"parts": [{"text": system_prompt}]}
-        headers = {"Content-Type": "application/json"}
-        params = {"key": self.endpoint.api_key}
-        if self.stream:
-            url = f"{self.endpoint.api_base}/models/{self.model_setting.id}:streamGenerateContent"
-            params["alt"] = "sse"
-            async def generator():
-                result = {"content": "", "tool_calls": [], "usage": {}}
-                client = self.raw_client
-                async with client.stream("POST", url, headers=headers, params=params, json=request_body) as response:
-                    async for chunk in response.aiter_lines():
-                        message = {"content": "", "tool_calls": []}
-                        if not chunk.startswith("data:"):
-                            continue
-                        data = json.loads(chunk[5:])
-                        chunk_content = data["candidates"][0]["content"]["parts"][0]
-                        if "text" in chunk_content:
-                            message["content"] = chunk_content["text"]
-                            result["content"] += message["content"]
-                        elif "functionCall" in chunk_content:
-                            message["tool_calls"] = [
-                                {
-                                    "index": 0,
-                                    "id": "call_0",
-                                    "function": {
-                                        "arguments": json.dumps(
-                                            chunk_content["functionCall"]["args"], ensure_ascii=False
-                                        ),
-                                        "name": chunk_content["functionCall"]["name"],
-                                    },
-                                    "type": "function",
-                                }
-                            ]
-                        result["usage"] = message["usage"] = {
-                            "prompt_tokens": data["usageMetadata"].get("promptTokenCount", 0),
-                            "completion_tokens": data["usageMetadata"].get("candidatesTokenCount", 0),
-                            "total_tokens": data["usageMetadata"].get("totalTokenCount", 0),
-                        }
-                        yield ChatCompletionDeltaMessage(**message)
-            return generator()
-        else:
-            url = f"{self.endpoint.api_base}/models/{self.model_setting.id}:generateContent"
-            client = self.raw_client
-            async with client:
-                response = await client.post(url, json=request_body, headers=headers, params=params, timeout=None)
-                response = response.json()
-                if "error" in response:
-                    raise Exception(response["error"])
-                result = {
-                    "content": "",
-                    "usage": {
-                        "prompt_tokens": response.get("usageMetadata", {}).get("promptTokenCount", 0),
-                        "completion_tokens": response.get("usageMetadata", {}).get("candidatesTokenCount", 0),
-                        "total_tokens": response.get("usageMetadata", {}).get("totalTokenCount", 0),
-                    },
-                }
-                tool_calls = []
-                for part in response["candidates"][0]["content"]["parts"]:
-                    if "text" in part:
-                        result["content"] += part["text"]
-                    elif "functionCall" in part:
-                        tool_call = {
-                            "index": 0,
-                            "id": "call_0",
-                            "function": {
-                                "arguments": json.dumps(part["functionCall"]["args"], ensure_ascii=False),
-                                "name": part["functionCall"]["name"],
-                            },
-                            "type": "function",
-                        }
-                        tool_calls.append(tool_call)
-                if tool_calls:
-                    result["tool_calls"] = tool_calls
-                return ChatCompletionMessage(**result)
+class AsyncGeminiChatClient(AsyncOpenAICompatibleChatClient):
+    DEFAULT_MODEL = GEMINI_DEFAULT_MODEL
+    BACKEND_NAME = BackendType.Gemini

vectorvein/chat_clients/openai_compatible_client.py CHANGED Viewed

@@ -212,6 +212,8 @@ class OpenAICompatibleChatClient(BaseChatClient):
         else:
             _stream_options_params = {}
+        self._acquire_rate_limit(self.endpoint, self.model, messages)
         if self.stream:
             stream_response = raw_client.chat.completions.create(
                 model=self.model_id,
@@ -282,9 +284,9 @@ class OpenAICompatibleChatClient(BaseChatClient):
                                     buffer = ""
                                     break
-                        message["content"] = "".join(current_content).strip()
+                        message["content"] = "".join(current_content)
                         if current_reasoning:
-                            message["reasoning_content"] = "".join(current_reasoning).strip()
+                            message["reasoning_content"] = "".join(current_reasoning)
                         current_content.clear()
                         current_reasoning.clear()
@@ -307,8 +309,8 @@ class OpenAICompatibleChatClient(BaseChatClient):
                     else:
                         current_content.append(buffer)
                     final_message = {
-                        "content": "".join(current_content).strip(),
-                        "reasoning_content": "".join(current_reasoning).strip() if current_reasoning else None,
+                        "content": "".join(current_content),
+                        "reasoning_content": "".join(current_reasoning) if current_reasoning else None,
                     }
                     yield ChatCompletionDeltaMessage(**final_message, usage=usage)
@@ -338,8 +340,8 @@ class OpenAICompatibleChatClient(BaseChatClient):
             if not result["reasoning_content"] and result["content"]:
                 think_match = re.search(r"<think>(.*?)</think>", result["content"], re.DOTALL)
                 if think_match:
-                    result["reasoning_content"] = think_match.group(1).strip()
-                    result["content"] = result["content"].replace(think_match.group(0), "", 1).strip()
+                    result["reasoning_content"] = think_match.group(1)
+                    result["content"] = result["content"].replace(think_match.group(0), "", 1)
             if tools:
                 if self.model_setting.function_call_available and response.choices[0].message.tool_calls:
@@ -538,6 +540,8 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
             else:
                 max_tokens = self.model_setting.context_length - token_counts - 64
+        await self._acquire_rate_limit(self.endpoint, self.model, messages)
         if self.stream:
             stream_response = await raw_client.chat.completions.create(
                 model=self.model_id,
@@ -608,9 +612,9 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
                                     buffer = ""
                                     break
-                        message["content"] = "".join(current_content).strip()
+                        message["content"] = "".join(current_content)
                         if current_reasoning:
-                            message["reasoning_content"] = "".join(current_reasoning).strip()
+                            message["reasoning_content"] = "".join(current_reasoning)
                         current_content.clear()
                         current_reasoning.clear()
@@ -633,8 +637,8 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
                     else:
                         current_content.append(buffer)
                     final_message = {
-                        "content": "".join(current_content).strip(),
-                        "reasoning_content": "".join(current_reasoning).strip() if current_reasoning else None,
+                        "content": "".join(current_content),
+                        "reasoning_content": "".join(current_reasoning) if current_reasoning else None,
                     }
                     yield ChatCompletionDeltaMessage(**final_message, usage=usage)
@@ -663,8 +667,8 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
             if not result["reasoning_content"] and result["content"]:
                 think_match = re.search(r"<think>(.*?)</think>", result["content"], re.DOTALL)
                 if think_match:
-                    result["reasoning_content"] = think_match.group(1).strip()
-                    result["content"] = result["content"].replace(think_match.group(0), "", 1).strip()
+                    result["reasoning_content"] = think_match.group(1)
+                    result["content"] = result["content"].replace(think_match.group(0), "", 1)
             if tools:
                 if self.model_setting.function_call_available and response.choices[0].message.tool_calls:

vectorvein 0.1.87__py3-none-any.whl → 0.1.89__py3-none-any.whl

vectorvein 0.1.87py3-none-any.whl → 0.1.89py3-none-any.whl