PyPI - vectorvein - Versions diffs - 0.1.23__py3-none-any.whl → 0.1.25__py3-none-any.whl - Mend

vectorvein 0.1.23py3-none-any.whl → 0.1.25py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

vectorvein/chat_clients/anthropic_client.py +175 -56
vectorvein/chat_clients/base_client.py +92 -15
vectorvein/chat_clients/gemini_client.py +84 -15
vectorvein/chat_clients/minimax_client.py +82 -13
vectorvein/chat_clients/openai_compatible_client.py +136 -36
vectorvein/chat_clients/utils.py +45 -17
vectorvein/types/defaults.py +57 -1
vectorvein/types/llm_parameters.py +24 -3
{vectorvein-0.1.23.dist-info → vectorvein-0.1.25.dist-info}/METADATA +1 -1
{vectorvein-0.1.23.dist-info → vectorvein-0.1.25.dist-info}/RECORD +12 -12
{vectorvein-0.1.23.dist-info → vectorvein-0.1.25.dist-info}/WHEEL +1 -1
{vectorvein-0.1.23.dist-info → vectorvein-0.1.25.dist-info}/entry_points.txt +0 -0

vectorvein/chat_clients/gemini_client.py CHANGED Viewed

@@ -2,6 +2,8 @@
 # @Date:   2024-06-17 23:47:49
 import json
 import random
+from functools import cached_property
+from typing import Iterable, Literal, Generator, AsyncGenerator, overload, Any
 import httpx
@@ -10,11 +12,18 @@ from .utils import cutoff_messages
 from ..types import defaults as defs
 from .base_client import BaseChatClient, BaseAsyncChatClient
 from ..types.enums import ContextLengthControlType, BackendType
-from ..types.llm_parameters import ChatCompletionMessage, ChatCompletionDeltaMessage
+from ..types.llm_parameters import (
+    NotGiven,
+    NOT_GIVEN,
+    ToolParam,
+    ToolChoice,
+    ChatCompletionMessage,
+    ChatCompletionDeltaMessage,
+)
 class GeminiChatClient(BaseChatClient):
-    DEFAULT_MODEL: str = defs.GEMINI_DEFAULT_MODEL
+    DEFAULT_MODEL: str | None = defs.GEMINI_DEFAULT_MODEL
     BACKEND_NAME: BackendType = BackendType.Gemini
     def __init__(
@@ -39,19 +48,49 @@ class GeminiChatClient(BaseChatClient):
             **kwargs,
         )
-    @property
+    @cached_property
     def raw_client(self):
         return self.http_client
+    @overload
     def create_completion(
         self,
-        messages: list = list,
+        messages: list,
+        model: str | None = None,
+        stream: Literal[False] = False,
+        temperature: float | None = None,
+        max_tokens: int | None = None,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
+        response_format: dict | None = None,
+        **kwargs,
+    ) -> ChatCompletionMessage:
+        pass
+    @overload
+    def create_completion(
+        self,
+        messages: list,
+        model: str | None = None,
+        stream: Literal[True] = True,
+        temperature: float | None = None,
+        max_tokens: int | None = None,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
+        response_format: dict | None = None,
+        **kwargs,
+    ) -> Generator[ChatCompletionDeltaMessage, None, None]:
+        pass
+    def create_completion(
+        self,
+        messages: list,
         model: str | None = None,
         stream: bool | None = None,
         temperature: float | None = None,
         max_tokens: int | None = None,
-        tools: list | None = None,
-        tool_choice: str | None = None,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         **kwargs,
     ):
@@ -121,14 +160,14 @@ class GeminiChatClient(BaseChatClient):
             params["alt"] = "sse"
             def generator():
-                result = {"content": ""}
+                result = {"content": "", "tool_calls": [], "usage": {}}
                 if self.http_client:
                     client = self.http_client
                 else:
                     client = httpx.Client()
                 with client.stream("POST", url, headers=headers, params=params, json=request_body) as response:
                     for chunk in response.iter_lines():
-                        message = {"content": ""}
+                        message = {"content": "", "tool_calls": []}
                         if not chunk.startswith("data:"):
                             continue
                         data = json.loads(chunk[5:])
@@ -197,7 +236,7 @@ class GeminiChatClient(BaseChatClient):
 class AsyncGeminiChatClient(BaseAsyncChatClient):
-    DEFAULT_MODEL: str = defs.GEMINI_DEFAULT_MODEL
+    DEFAULT_MODEL: str | None = defs.GEMINI_DEFAULT_MODEL
     BACKEND_NAME: BackendType = BackendType.Gemini
     def __init__(
@@ -222,19 +261,49 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
             **kwargs,
         )
-    @property
+    @cached_property
     def raw_client(self):
         return self.http_client
+    @overload
+    async def create_completion(
+        self,
+        messages: list,
+        model: str | None = None,
+        stream: Literal[False] = False,
+        temperature: float | None = None,
+        max_tokens: int | None = None,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
+        response_format: dict | None = None,
+        **kwargs,
+    ) -> ChatCompletionMessage:
+        pass
+    @overload
+    async def create_completion(
+        self,
+        messages: list,
+        model: str | None = None,
+        stream: Literal[True] = True,
+        temperature: float | None = None,
+        max_tokens: int | None = None,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
+        response_format: dict | None = None,
+        **kwargs,
+    ) -> AsyncGenerator[ChatCompletionDeltaMessage, Any]:
+        pass
     async def create_completion(
         self,
-        messages: list = list,
+        messages: list,
         model: str | None = None,
         stream: bool | None = None,
         temperature: float | None = None,
         max_tokens: int | None = None,
-        tools: list | None = None,
-        tool_choice: str | None = None,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         **kwargs,
     ):
@@ -304,14 +373,14 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
             params["alt"] = "sse"
             async def generator():
-                result = {"content": ""}
+                result = {"content": "", "tool_calls": [], "usage": {}}
                 if self.http_client:
                     client = self.http_client
                 else:
                     client = httpx.AsyncClient()
                 async with client.stream("POST", url, headers=headers, params=params, json=request_body) as response:
                     async for chunk in response.aiter_lines():
-                        message = {"content": ""}
+                        message = {"content": "", "tool_calls": []}
                         if not chunk.startswith("data:"):
                             continue
                         data = json.loads(chunk[5:])

vectorvein/chat_clients/minimax_client.py CHANGED Viewed

@@ -2,16 +2,23 @@
 # @Date:   2024-07-26 14:48:55
 import json
 import random
+from functools import cached_property
+from typing import Iterable, Literal, Generator, AsyncGenerator, overload, Any
 import httpx
-from openai._types import NotGiven
 from ..settings import settings
 from ..types import defaults as defs
 from .utils import cutoff_messages, get_token_counts
 from .base_client import BaseChatClient, BaseAsyncChatClient
 from ..types.enums import ContextLengthControlType, BackendType
-from ..types.llm_parameters import ChatCompletionMessage, ChatCompletionDeltaMessage
+from ..types.llm_parameters import (
+    NotGiven,
+    NOT_GIVEN,
+    ToolParam,
+    ToolChoice,
+    ChatCompletionMessage,
+    ChatCompletionDeltaMessage,
+)
 def extract_tool_calls(response):
@@ -37,7 +44,7 @@ def extract_tool_calls(response):
 class MiniMaxChatClient(BaseChatClient):
-    DEFAULT_MODEL: str = defs.MINIMAX_DEFAULT_MODEL
+    DEFAULT_MODEL: str | None = defs.MINIMAX_DEFAULT_MODEL
     BACKEND_NAME: BackendType = BackendType.MiniMax
     def __init__(
@@ -66,19 +73,50 @@ class MiniMaxChatClient(BaseChatClient):
         else:
             self.http_client = httpx.Client()
-    @property
+    @cached_property
     def raw_client(self):
         return self.http_client
+    @overload
+    def create_completion(
+        self,
+        messages: list,
+        model: str | None = None,
+        stream: Literal[False] = False,
+        temperature: float | None = None,
+        max_tokens: int | None = None,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
+        response_format: dict | None = None,
+        **kwargs,
+    ) -> ChatCompletionMessage:
+        pass
+    @overload
+    def create_completion(
+        self,
+        messages: list,
+        model: str | None = None,
+        stream: Literal[True] = True,
+        temperature: float | None = None,
+        max_tokens: int | None = None,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
+        response_format: dict | None = None,
+        **kwargs,
+    ) -> Generator[ChatCompletionDeltaMessage, None, None]:
+        pass
     def create_completion(
         self,
-        messages: list = list,
+        messages: list,
         model: str | None = None,
         stream: bool | None = None,
         temperature: float | None = None,
         max_tokens: int | None = None,
-        tools: list | None = None,
-        tool_choice: str = "auto",
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
+        response_format: dict | None = None,
         **kwargs,
     ):
         if model is not None:
@@ -206,7 +244,7 @@ class MiniMaxChatClient(BaseChatClient):
 class AsyncMiniMaxChatClient(BaseAsyncChatClient):
-    DEFAULT_MODEL: str = defs.MINIMAX_DEFAULT_MODEL
+    DEFAULT_MODEL: str | None = defs.MINIMAX_DEFAULT_MODEL
     BACKEND_NAME: BackendType = BackendType.MiniMax
     def __init__(
@@ -235,19 +273,50 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
         else:
             self.http_client = httpx.AsyncClient()
-    @property
+    @cached_property
     def raw_client(self):
         return self.http_client
+    @overload
+    async def create_completion(
+        self,
+        messages: list,
+        model: str | None = None,
+        stream: Literal[False] = False,
+        temperature: float | None = None,
+        max_tokens: int | None = None,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
+        response_format: dict | None = None,
+        **kwargs,
+    ) -> ChatCompletionMessage:
+        pass
+    @overload
+    async def create_completion(
+        self,
+        messages: list,
+        model: str | None = None,
+        stream: Literal[True] = True,
+        temperature: float | None = None,
+        max_tokens: int | None = None,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
+        response_format: dict | None = None,
+        **kwargs,
+    ) -> AsyncGenerator[ChatCompletionDeltaMessage, Any]:
+        pass
     async def create_completion(
         self,
-        messages: list = list,
+        messages: list,
         model: str | None = None,
         stream: bool | None = None,
         temperature: float | None = None,
         max_tokens: int | None = None,
-        tools: list | None = None,
-        tool_choice: str = "auto",
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
+        response_format: dict | None = None,
         **kwargs,
     ):
         if model is not None:

vectorvein/chat_clients/openai_compatible_client.py CHANGED Viewed

@@ -3,9 +3,9 @@
 import json
 import random
 from functools import cached_property
+from typing import overload, Generator, AsyncGenerator, Any, Literal, Iterable
 import httpx
-from openai._types import NotGiven, NOT_GIVEN
 from openai._streaming import Stream, AsyncStream
 from openai.types.chat import ChatCompletion, ChatCompletionChunk
 from openai import OpenAI, AsyncOpenAI, AzureOpenAI, AsyncAzureOpenAI
@@ -20,11 +20,18 @@ from .utils import (
 from ..settings import settings
 from ..types import defaults as defs
 from ..types.enums import ContextLengthControlType, BackendType
-from ..types.llm_parameters import ChatCompletionMessage, ChatCompletionDeltaMessage
+from ..types.llm_parameters import (
+    NotGiven,
+    NOT_GIVEN,
+    ToolParam,
+    ToolChoice,
+    ChatCompletionMessage,
+    ChatCompletionDeltaMessage,
+)
 class OpenAICompatibleChatClient(BaseChatClient):
-    DEFAULT_MODEL: str = ""
+    DEFAULT_MODEL: str | None = ""
     BACKEND_NAME: BackendType
     def __init__(
@@ -50,7 +57,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
         )
     @cached_property
-    def raw_client(self):
+    def raw_client(self) -> OpenAI | AzureOpenAI:
         if self.random_endpoint:
             self.random_endpoint = True
             self.endpoint_id = random.choice(self.backend_settings.models[self.model].endpoints)
@@ -70,15 +77,46 @@ class OpenAICompatibleChatClient(BaseChatClient):
                 http_client=self.http_client,
             )
+    @overload
+    def create_completion(
+        self,
+        messages: list,
+        model: str | None = None,
+        stream: Literal[False] = False,
+        temperature: float | None = None,
+        max_tokens: int | None = None,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
+        response_format: dict | None = None,
+        **kwargs,
+    ) -> ChatCompletionMessage:
+        pass
+    @overload
+    def create_completion(
+        self,
+        messages: list,
+        model: str | None = None,
+        stream: Literal[True] = True,
+        temperature: float | None = None,
+        max_tokens: int | None = None,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
+        response_format: dict | None = None,
+        **kwargs,
+    ) -> Generator[ChatCompletionDeltaMessage, None, None]:
+        pass
     def create_completion(
         self,
-        messages: list = list,
+        messages: list,
         model: str | None = None,
         stream: bool | None = None,
         temperature: float | None = None,
         max_tokens: int | None = None,
-        tools: list | NotGiven = NOT_GIVEN,
-        tool_choice: str | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
+        response_format: dict | None = None,
         **kwargs,
     ):
         if model is not None:
@@ -114,29 +152,34 @@ class OpenAICompatibleChatClient(BaseChatClient):
         if max_tokens is None:
             max_output_tokens = self.model_setting.max_output_tokens
-            token_counts = get_message_token_counts(messages=messages, tools=tools_params, model=self.model_setting.id)
+            token_counts = get_message_token_counts(messages=messages, tools=tools, model=self.model_setting.id)
             if max_output_tokens is not None:
                 max_tokens = self.model_setting.context_length - token_counts
                 max_tokens = min(max(max_tokens, 1), max_output_tokens)
             else:
                 max_tokens = self.model_setting.context_length - token_counts
-        response: ChatCompletion | Stream[ChatCompletionChunk] = self.raw_client.chat.completions.create(
-            model=self.model_setting.id,
-            messages=messages,
-            stream=self.stream,
-            temperature=self.temperature,
-            max_tokens=max_tokens,
-            **tools_params,
-            **kwargs,
-        )
+        if response_format and self.model_setting.response_format_available:
+            self.response_format = {"response_format": response_format}
+        else:
+            self.response_format = {}
         if self.stream:
+            stream_response: Stream[ChatCompletionChunk] = self.raw_client.chat.completions.create(
+                model=self.model_setting.id,
+                messages=messages,
+                stream=True,
+                temperature=self.temperature,
+                max_tokens=max_tokens,
+                **self.response_format,
+                **tools_params,
+                **kwargs,
+            )
             def generator():
                 full_content = ""
                 result = {}
-                for chunk in response:
+                for chunk in stream_response:
                     if len(chunk.choices) == 0:
                         continue
                     if not chunk.choices[0].delta:
@@ -163,9 +206,20 @@ class OpenAICompatibleChatClient(BaseChatClient):
             return generator()
         else:
+            response: ChatCompletion = self.raw_client.chat.completions.create(
+                model=self.model_setting.id,
+                messages=messages,
+                stream=False,
+                temperature=self.temperature,
+                max_tokens=max_tokens,
+                **self.response_format,
+                **tools_params,
+                **kwargs,
+            )
             result = {
                 "content": response.choices[0].message.content,
-                "usage": response.usage.model_dump(),
+                "usage": response.usage.model_dump() if response.usage else None,
             }
             if tools:
                 if self.model_setting.function_call_available and response.choices[0].message.tool_calls:
@@ -184,7 +238,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
 class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
-    DEFAULT_MODEL: str = ""
+    DEFAULT_MODEL: str | None = ""
     BACKEND_NAME: BackendType
     def __init__(
@@ -230,15 +284,46 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
                 http_client=self.http_client,
             )
+    @overload
     async def create_completion(
         self,
-        messages: list = list,
+        messages: list,
+        model: str | None = None,
+        stream: Literal[False] = False,
+        temperature: float | None = None,
+        max_tokens: int | None = None,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
+        response_format: dict | None = None,
+        **kwargs,
+    ) -> ChatCompletionMessage:
+        pass
+    @overload
+    async def create_completion(
+        self,
+        messages: list,
+        model: str | None = None,
+        stream: Literal[True] = True,
+        temperature: float | None = None,
+        max_tokens: int | None = None,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
+        response_format: dict | None = None,
+        **kwargs,
+    ) -> AsyncGenerator[ChatCompletionDeltaMessage, Any]:
+        pass
+    async def create_completion(
+        self,
+        messages: list,
         model: str | None = None,
         stream: bool | None = None,
         temperature: float | None = None,
         max_tokens: int | None = None,
-        tools: list | NotGiven = NOT_GIVEN,
-        tool_choice: str | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
+        response_format: dict | None = None,
         **kwargs,
     ):
         if model is not None:
@@ -272,31 +357,36 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
         else:
             tools_params = {}
+        if response_format and self.model_setting.response_format_available:
+            self.response_format = {"response_format": response_format}
+        else:
+            self.response_format = {}
         if max_tokens is None:
             max_output_tokens = self.model_setting.max_output_tokens
-            token_counts = get_message_token_counts(messages=messages, tools=tools_params, model=self.model_setting.id)
+            token_counts = get_message_token_counts(messages=messages, tools=tools, model=self.model_setting.id)
             if max_output_tokens is not None:
                 max_tokens = self.model_setting.context_length - token_counts
                 max_tokens = min(max(max_tokens, 1), max_output_tokens)
             else:
                 max_tokens = self.model_setting.context_length - token_counts
-        response: ChatCompletion | AsyncStream[ChatCompletionChunk] = await self.raw_client.chat.completions.create(
-            model=self.model_setting.id,
-            messages=messages,
-            stream=self.stream,
-            temperature=self.temperature,
-            max_tokens=max_tokens,
-            **tools_params,
-            **kwargs,
-        )
         if self.stream:
+            stream_response: AsyncStream[ChatCompletionChunk] = await self.raw_client.chat.completions.create(
+                model=self.model_setting.id,
+                messages=messages,
+                stream=self.stream,
+                temperature=self.temperature,
+                max_tokens=max_tokens,
+                **self.response_format,
+                **tools_params,
+                **kwargs,
+            )
             async def generator():
                 full_content = ""
                 result = {}
-                async for chunk in response:
+                async for chunk in stream_response:
                     if len(chunk.choices) == 0:
                         continue
                     if not chunk.choices[0].delta:
@@ -323,9 +413,19 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
             return generator()
         else:
+            response: ChatCompletion = await self.raw_client.chat.completions.create(
+                model=self.model_setting.id,
+                messages=messages,
+                stream=self.stream,
+                temperature=self.temperature,
+                max_tokens=max_tokens,
+                **self.response_format,
+                **tools_params,
+                **kwargs,
+            )
             result = {
                 "content": response.choices[0].message.content,
-                "usage": response.usage.model_dump(),
+                "usage": response.usage.model_dump() if response.usage else None,
             }
             if tools:
                 if self.model_setting.function_call_available and response.choices[0].message.tool_calls:

vectorvein 0.1.23__py3-none-any.whl → 0.1.25__py3-none-any.whl

vectorvein 0.1.23py3-none-any.whl → 0.1.25py3-none-any.whl