PyPI - vectorvein - Versions diffs - 0.1.23__py3-none-any.whl → 0.1.24__py3-none-any.whl - Mend

vectorvein 0.1.23py3-none-any.whl → 0.1.24py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

vectorvein/chat_clients/anthropic_client.py +175 -56
vectorvein/chat_clients/base_client.py +92 -15
vectorvein/chat_clients/gemini_client.py +84 -15
vectorvein/chat_clients/minimax_client.py +82 -13
vectorvein/chat_clients/openai_compatible_client.py +136 -36
vectorvein/chat_clients/utils.py +45 -17
vectorvein/types/defaults.py +57 -1
vectorvein/types/llm_parameters.py +24 -3
{vectorvein-0.1.23.dist-info → vectorvein-0.1.24.dist-info}/METADATA +1 -1
{vectorvein-0.1.23.dist-info → vectorvein-0.1.24.dist-info}/RECORD +12 -12
{vectorvein-0.1.23.dist-info → vectorvein-0.1.24.dist-info}/WHEEL +1 -1
{vectorvein-0.1.23.dist-info → vectorvein-0.1.24.dist-info}/entry_points.txt +0 -0

vectorvein/chat_clients/anthropic_client.py CHANGED Viewed

@@ -2,11 +2,13 @@
 # @Date:   2024-07-26 14:48:55
 import json
 import random
+from functools import cached_property
+from typing import overload, Generator, AsyncGenerator, Any, Literal, Iterable
 import httpx
 from openai._types import NotGiven as OpenAINotGiven
 from anthropic import Anthropic, AnthropicVertex, AsyncAnthropic, AsyncAnthropicVertex
-from anthropic._types import NotGiven, NOT_GIVEN
+from anthropic._types import NOT_GIVEN
 from anthropic.types import (
     TextBlock,
     ToolUseBlock,
@@ -24,15 +26,25 @@ from ..types import defaults as defs
 from .utils import cutoff_messages, get_message_token_counts
 from .base_client import BaseChatClient, BaseAsyncChatClient
 from ..types.enums import ContextLengthControlType, BackendType
-from ..types.llm_parameters import ChatCompletionMessage, ChatCompletionDeltaMessage
+from ..types.llm_parameters import (
+    Usage,
+    NotGiven,
+    ToolParam,
+    ToolChoice,
+    AnthropicToolParam,
+    AnthropicToolChoice,
+    ChatCompletionMessage,
+    ChatCompletionToolParam,
+    ChatCompletionDeltaMessage,
+)
-def refactor_tool_use_params(tools: list):
+def refactor_tool_use_params(tools: Iterable[ChatCompletionToolParam]) -> list[AnthropicToolParam]:
     return [
         {
             "name": tool["function"]["name"],
-            "description": tool["function"]["description"],
-            "input_schema": tool["function"]["parameters"],
+            "description": tool["function"].get("description", ""),
+            "input_schema": tool["function"].get("parameters", {}),
         }
         for tool in tools
     ]
@@ -53,6 +65,17 @@ def refactor_tool_calls(tool_calls: list):
     ]
+def refactor_tool_choice(tool_choice: ToolChoice) -> AnthropicToolChoice:
+    if isinstance(tool_choice, str):
+        if tool_choice == "auto":
+            return {"type": "auto"}
+        elif tool_choice == "required":
+            return {"type": "any"}
+    elif isinstance(tool_choice, dict) and "function" in tool_choice:
+        return {"type": "tool", "name": tool_choice["function"]["name"]}
+    return {"type": "auto"}
 def format_messages_alternate(messages: list) -> list:
     # messages: roles must alternate between "user" and "assistant", and not multiple "user" roles in a row
     # reformat multiple "user" roles in a row into {"role": "user", "content": [{"type": "text", "text": "Hello, Claude"}, {"type": "text", "text": "How are you?"}]}
@@ -87,7 +110,7 @@ def format_messages_alternate(messages: list) -> list:
 class AnthropicChatClient(BaseChatClient):
-    DEFAULT_MODEL: str = defs.ANTHROPIC_DEFAULT_MODEL
+    DEFAULT_MODEL: str | None = defs.ANTHROPIC_DEFAULT_MODEL
     BACKEND_NAME: BackendType = BackendType.Anthropic
     def __init__(
@@ -112,7 +135,7 @@ class AnthropicChatClient(BaseChatClient):
             **kwargs,
         )
-    @property
+    @cached_property
     def raw_client(self):
         if self.random_endpoint:
             self.random_endpoint = True
@@ -120,6 +143,8 @@ class AnthropicChatClient(BaseChatClient):
             self.endpoint = settings.get_endpoint(self.endpoint_id)
         if self.endpoint.is_vertex:
+            if self.endpoint.credentials is None:
+                raise ValueError("Anthropic Vertex endpoint requires credentials")
             self.creds = Credentials(
                 token=self.endpoint.credentials.get("token"),
                 refresh_token=self.endpoint.credentials.get("refresh_token"),
@@ -131,7 +156,7 @@ class AnthropicChatClient(BaseChatClient):
                 expiry=_helpers.utcnow() - _helpers.REFRESH_THRESHOLD,
                 rapt_token=self.endpoint.credentials.get("rapt_token"),
                 trust_boundary=self.endpoint.credentials.get("trust_boundary"),
-                universe_domain=self.endpoint.credentials.get("universe_domain"),
+                universe_domain=self.endpoint.credentials.get("universe_domain", "googleapis.com"),
                 account=self.endpoint.credentials.get("account", ""),
             )
@@ -143,10 +168,11 @@ class AnthropicChatClient(BaseChatClient):
             else:
                 base_url = f"{self.endpoint.api_base}{self.endpoint.region}-aiplatform/v1"
+            region = NOT_GIVEN if self.endpoint.region is None else self.endpoint.region
             return AnthropicVertex(
-                region=self.endpoint.region,
+                region=region,
                 base_url=base_url,
-                project_id=self.endpoint.credentials.get("quota_project_id"),
+                project_id=self.endpoint.credentials.get("quota_project_id", NOT_GIVEN),
                 access_token=self.creds.token,
                 http_client=self.http_client,
             )
@@ -157,15 +183,46 @@ class AnthropicChatClient(BaseChatClient):
                 http_client=self.http_client,
             )
+    @overload
+    def create_completion(
+        self,
+        messages: list,
+        model: str | None = None,
+        stream: Literal[False] = False,
+        temperature: float | None = None,
+        max_tokens: int | None = None,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
+        response_format: dict | None = None,
+        **kwargs,
+    ) -> ChatCompletionMessage:
+        pass
+    @overload
+    def create_completion(
+        self,
+        messages: list,
+        model: str | None = None,
+        stream: Literal[True] = True,
+        temperature: float | None = None,
+        max_tokens: int | None = None,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
+        response_format: dict | None = None,
+        **kwargs,
+    ) -> Generator[ChatCompletionDeltaMessage, None, None]:
+        pass
     def create_completion(
         self,
-        messages: list = list,
+        messages: list,
         model: str | None = None,
         stream: bool | None = None,
         temperature: float | None = None,
         max_tokens: int | None = None,
-        tools: list | NotGiven = NOT_GIVEN,
-        tool_choice: str | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
+        response_format: dict | None = None,
         **kwargs,
     ):
         if model is not None:
@@ -182,7 +239,7 @@ class AnthropicChatClient(BaseChatClient):
         self.model_setting = self.backend_settings.models[self.model]
         if messages[0].get("role") == "system":
-            system_prompt = messages[0]["content"]
+            system_prompt: str = messages[0]["content"]
             messages = messages[1:]
         else:
             system_prompt = ""
@@ -197,7 +254,10 @@ class AnthropicChatClient(BaseChatClient):
         messages = format_messages_alternate(messages)
-        tools_params = refactor_tool_use_params(tools) if tools else tools
+        tools_params: list[AnthropicToolParam] | NotGiven = refactor_tool_use_params(tools) if tools else NOT_GIVEN
+        tool_choice_param = NOT_GIVEN
+        if tool_choice:
+            tool_choice_param = refactor_tool_choice(tool_choice)
         if max_tokens is None:
             max_output_tokens = self.model_setting.max_output_tokens
@@ -208,24 +268,23 @@ class AnthropicChatClient(BaseChatClient):
             else:
                 max_tokens = self.model_setting.context_length - token_counts
-        response = self.raw_client.messages.create(
-            model=self.model_setting.id,
-            messages=messages,
-            system=system_prompt,
-            stream=self.stream,
-            temperature=self.temperature,
-            max_tokens=max_tokens,
-            tools=tools_params,
-            tool_choice=tool_choice,
-            **kwargs,
-        )
         if self.stream:
+            stream_response = self.raw_client.messages.create(
+                model=self.model_setting.id,
+                messages=messages,
+                system=system_prompt,
+                stream=True,
+                temperature=self.temperature,
+                max_tokens=max_tokens,
+                tools=tools_params,
+                tool_choice=tool_choice_param,
+                **kwargs,
+            )
             def generator():
-                result = {"content": ""}
-                for chunk in response:
-                    message = {"content": ""}
+                result = {"content": "", "usage": {}, "tool_calls": []}
+                for chunk in stream_response:
+                    message = {"content": "", "tool_calls": []}
                     if isinstance(chunk, RawMessageStartEvent):
                         result["usage"] = {"prompt_tokens": chunk.message.usage.input_tokens}
                         continue
@@ -268,10 +327,22 @@ class AnthropicChatClient(BaseChatClient):
                         result["usage"]["total_tokens"] = (
                             result["usage"]["prompt_tokens"] + result["usage"]["completion_tokens"]
                         )
-                        yield ChatCompletionDeltaMessage(usage=result["usage"])
+                        yield ChatCompletionDeltaMessage(usage=Usage(**result["usage"]))
             return generator()
         else:
+            response = self.raw_client.messages.create(
+                model=self.model_setting.id,
+                messages=messages,
+                system=system_prompt,
+                stream=False,
+                temperature=self.temperature,
+                max_tokens=max_tokens,
+                tools=tools_params,
+                tool_choice=tool_choice_param,
+                **kwargs,
+            )
             result = {
                 "content": "",
                 "usage": {
@@ -294,7 +365,7 @@ class AnthropicChatClient(BaseChatClient):
 class AsyncAnthropicChatClient(BaseAsyncChatClient):
-    DEFAULT_MODEL: str = defs.ANTHROPIC_DEFAULT_MODEL
+    DEFAULT_MODEL: str | None = defs.ANTHROPIC_DEFAULT_MODEL
     BACKEND_NAME: BackendType = BackendType.Anthropic
     def __init__(
@@ -319,7 +390,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
             **kwargs,
         )
-    @property
+    @cached_property
     def raw_client(self):
         if self.random_endpoint:
             self.random_endpoint = True
@@ -327,6 +398,8 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
             self.endpoint = settings.get_endpoint(self.endpoint_id)
         if self.endpoint.is_vertex:
+            if self.endpoint.credentials is None:
+                raise ValueError("Anthropic Vertex endpoint requires credentials")
             self.creds = Credentials(
                 token=self.endpoint.credentials.get("token"),
                 refresh_token=self.endpoint.credentials.get("refresh_token"),
@@ -338,7 +411,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
                 expiry=_helpers.utcnow() - _helpers.REFRESH_THRESHOLD,
                 rapt_token=self.endpoint.credentials.get("rapt_token"),
                 trust_boundary=self.endpoint.credentials.get("trust_boundary"),
-                universe_domain=self.endpoint.credentials.get("universe_domain"),
+                universe_domain=self.endpoint.credentials.get("universe_domain", "googleapis.com"),
                 account=self.endpoint.credentials.get("account", ""),
             )
@@ -350,10 +423,11 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
             else:
                 base_url = f"{self.endpoint.api_base}{self.endpoint.region}-aiplatform/v1"
+            region = NOT_GIVEN if self.endpoint.region is None else self.endpoint.region
             return AsyncAnthropicVertex(
-                region=self.endpoint.region,
+                region=region,
                 base_url=base_url,
-                project_id=self.endpoint.credentials.get("quota_project_id"),
+                project_id=self.endpoint.credentials.get("quota_project_id", NOT_GIVEN),
                 access_token=self.creds.token,
                 http_client=self.http_client,
             )
@@ -364,15 +438,46 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
                 http_client=self.http_client,
             )
+    @overload
+    async def create_completion(
+        self,
+        messages: list,
+        model: str | None = None,
+        stream: Literal[False] = False,
+        temperature: float | None = None,
+        max_tokens: int | None = None,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
+        response_format: dict | None = None,
+        **kwargs,
+    ) -> ChatCompletionMessage:
+        pass
+    @overload
+    async def create_completion(
+        self,
+        messages: list,
+        model: str | None = None,
+        stream: Literal[True] = True,
+        temperature: float | None = None,
+        max_tokens: int | None = None,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
+        response_format: dict | None = None,
+        **kwargs,
+    ) -> AsyncGenerator[ChatCompletionDeltaMessage, Any]:
+        pass
     async def create_completion(
         self,
-        messages: list = list,
+        messages: list,
         model: str | None = None,
         stream: bool | None = None,
         temperature: float | None = None,
         max_tokens: int | None = None,
-        tools: list | NotGiven = NOT_GIVEN,
-        tool_choice: str | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
+        response_format: dict | None = None,
         **kwargs,
     ):
         if model is not None:
@@ -404,7 +509,10 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
         messages = format_messages_alternate(messages)
-        tools_params = refactor_tool_use_params(tools) if tools else tools
+        tools_params: list[AnthropicToolParam] | NotGiven = refactor_tool_use_params(tools) if tools else NOT_GIVEN
+        tool_choice_param = NOT_GIVEN
+        if tool_choice:
+            tool_choice_param = refactor_tool_choice(tool_choice)
         if max_tokens is None:
             max_output_tokens = self.model_setting.max_output_tokens
@@ -415,24 +523,23 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
             else:
                 max_tokens = self.model_setting.context_length - token_counts
-        response = await self.raw_client.messages.create(
-            model=self.model_setting.id,
-            messages=messages,
-            system=system_prompt,
-            stream=self.stream,
-            temperature=self.temperature,
-            max_tokens=max_tokens,
-            tools=tools_params,
-            tool_choice=tool_choice,
-            **kwargs,
-        )
         if self.stream:
+            stream_response = await self.raw_client.messages.create(
+                model=self.model_setting.id,
+                messages=messages,
+                system=system_prompt,
+                stream=True,
+                temperature=self.temperature,
+                max_tokens=max_tokens,
+                tools=tools_params,
+                tool_choice=tool_choice_param,
+                **kwargs,
+            )
             async def generator():
-                result = {"content": ""}
-                async for chunk in response:
-                    message = {"content": ""}
+                result = {"content": "", "usage": {}, "tool_calls": []}
+                async for chunk in stream_response:
+                    message = {"content": "", "tool_calls": []}
                     if isinstance(chunk, RawMessageStartEvent):
                         result["usage"] = {"prompt_tokens": chunk.message.usage.input_tokens}
                         continue
@@ -475,10 +582,22 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
                         result["usage"]["total_tokens"] = (
                             result["usage"]["prompt_tokens"] + result["usage"]["completion_tokens"]
                         )
-                        yield ChatCompletionDeltaMessage(usage=result["usage"])
+                        yield ChatCompletionDeltaMessage(usage=Usage(**result["usage"]))
             return generator()
         else:
+            response = await self.raw_client.messages.create(
+                model=self.model_setting.id,
+                messages=messages,
+                system=system_prompt,
+                stream=False,
+                temperature=self.temperature,
+                max_tokens=max_tokens,
+                tools=tools_params,
+                tool_choice=tool_choice_param,
+                **kwargs,
+            )
             result = {
                 "content": "",
                 "usage": {

vectorvein/chat_clients/base_client.py CHANGED Viewed

@@ -1,22 +1,29 @@
 # @Author: Bi Ying
 # @Date:   2024-07-26 14:48:55
 from abc import ABC, abstractmethod
-from typing import Generator, AsyncGenerator, Any
+from functools import cached_property
+from typing import Generator, AsyncGenerator, Any, overload, Literal, Iterable
 import httpx
-from openai._types import NotGiven, NOT_GIVEN
 from openai import OpenAI, AsyncOpenAI, AzureOpenAI, AsyncAzureOpenAI
 from anthropic import Anthropic, AnthropicVertex, AsyncAnthropic, AsyncAnthropicVertex
 from ..settings import settings
 from ..types import defaults as defs
 from ..types.enums import ContextLengthControlType, BackendType
-from ..types.llm_parameters import ChatCompletionMessage, ChatCompletionDeltaMessage
+from ..types.llm_parameters import (
+    NotGiven,
+    NOT_GIVEN,
+    ToolParam,
+    ToolChoice,
+    ChatCompletionMessage,
+    ChatCompletionDeltaMessage,
+)
 class BaseChatClient(ABC):
     DEFAULT_MODEL: str | None = None
-    BACKEND_NAME: BackendType | None = None
+    BACKEND_NAME: BackendType
     def __init__(
         self,
@@ -44,9 +51,41 @@ class BaseChatClient(ABC):
             self.random_endpoint = False
             self.endpoint = settings.get_endpoint(self.endpoint_id)
-    @property
+    @cached_property
     @abstractmethod
-    def raw_client(self) -> OpenAI | AzureOpenAI | Anthropic | AnthropicVertex:
+    def raw_client(self) -> OpenAI | AzureOpenAI | Anthropic | AnthropicVertex | httpx.Client | None:
+        pass
+    @overload
+    @abstractmethod
+    def create_completion(
+        self,
+        messages: list,
+        model: str | None = None,
+        stream: Literal[False] = False,
+        temperature: float = 0.7,
+        max_tokens: int | None = None,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
+        response_format: dict | None = None,
+        **kwargs,
+    ) -> ChatCompletionMessage:
+        pass
+    @overload
+    @abstractmethod
+    def create_completion(
+        self,
+        messages: list,
+        model: str | None = None,
+        stream: Literal[True] = True,
+        temperature: float = 0.7,
+        max_tokens: int | None = None,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
+        response_format: dict | None = None,
+        **kwargs,
+    ) -> Generator[ChatCompletionDeltaMessage, Any, None]:
         pass
     @abstractmethod
@@ -57,8 +96,8 @@ class BaseChatClient(ABC):
         stream: bool = False,
         temperature: float = 0.7,
         max_tokens: int | None = None,
-        tools: list | NotGiven = NOT_GIVEN,
-        tool_choice: str | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         **kwargs,
     ) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
@@ -70,8 +109,9 @@ class BaseChatClient(ABC):
         model: str | None = None,
         temperature: float = 0.7,
         max_tokens: int | None = None,
-        tools: list | NotGiven = NOT_GIVEN,
-        tool_choice: str | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
+        response_format: dict | None = None,
         **kwargs,
     ) -> Generator[ChatCompletionDeltaMessage, Any, None]:
         return self.create_completion(
@@ -82,13 +122,14 @@ class BaseChatClient(ABC):
             max_tokens=max_tokens,
             tools=tools,
             tool_choice=tool_choice,
+            response_format=response_format,
             **kwargs,
         )
 class BaseAsyncChatClient(ABC):
     DEFAULT_MODEL: str | None = None
-    BACKEND_NAME: BackendType | None = None
+    BACKEND_NAME: BackendType
     def __init__(
         self,
@@ -116,9 +157,43 @@ class BaseAsyncChatClient(ABC):
             self.random_endpoint = False
             self.endpoint = settings.get_endpoint(self.endpoint_id)
-    @property
+    @cached_property
     @abstractmethod
-    def raw_client(self) -> AsyncOpenAI | AsyncAzureOpenAI | AsyncAnthropic | AsyncAnthropicVertex:
+    def raw_client(
+        self,
+    ) -> AsyncOpenAI | AsyncAzureOpenAI | AsyncAnthropic | AsyncAnthropicVertex | httpx.AsyncClient | None:
+        pass
+    @overload
+    @abstractmethod
+    async def create_completion(
+        self,
+        messages: list,
+        model: str | None = None,
+        stream: Literal[False] = False,
+        temperature: float = 0.7,
+        max_tokens: int | None = None,
+        tools: list | NotGiven = NOT_GIVEN,
+        tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
+        response_format: dict | None = None,
+        **kwargs,
+    ) -> ChatCompletionMessage:
+        pass
+    @overload
+    @abstractmethod
+    async def create_completion(
+        self,
+        messages: list,
+        model: str | None = None,
+        stream: Literal[True] = True,
+        temperature: float = 0.7,
+        max_tokens: int | None = None,
+        tools: list | NotGiven = NOT_GIVEN,
+        tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
+        response_format: dict | None = None,
+        **kwargs,
+    ) -> AsyncGenerator[ChatCompletionDeltaMessage, None]:
         pass
     @abstractmethod
@@ -130,7 +205,7 @@ class BaseAsyncChatClient(ABC):
         temperature: float = 0.7,
         max_tokens: int | None = None,
         tools: list | NotGiven = NOT_GIVEN,
-        tool_choice: str | NotGiven = NOT_GIVEN,
+        tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         **kwargs,
     ) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, None]:
@@ -143,7 +218,8 @@ class BaseAsyncChatClient(ABC):
         temperature: float = 0.7,
         max_tokens: int | None = None,
         tools: list | NotGiven = NOT_GIVEN,
-        tool_choice: str | NotGiven = NOT_GIVEN,
+        tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
+        response_format: dict | None = None,
         **kwargs,
     ) -> AsyncGenerator[ChatCompletionDeltaMessage, None]:
         return await self.create_completion(
@@ -154,5 +230,6 @@ class BaseAsyncChatClient(ABC):
             max_tokens=max_tokens,
             tools=tools,
             tool_choice=tool_choice,
+            response_format=response_format,
             **kwargs,
         )

vectorvein 0.1.23__py3-none-any.whl → 0.1.24__py3-none-any.whl

vectorvein 0.1.23py3-none-any.whl → 0.1.24py3-none-any.whl