PyPI - vectorvein - Versions diffs - 0.1.56__py3-none-any.whl → 0.1.59__py3-none-any.whl - Mend

vectorvein 0.1.56py3-none-any.whl → 0.1.59py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

vectorvein/chat_clients/anthropic_client.py CHANGED Viewed

@@ -249,6 +249,8 @@ class AnthropicChatClient(BaseChatClient):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ) -> ChatCompletionMessage:
         pass
@@ -265,6 +267,8 @@ class AnthropicChatClient(BaseChatClient):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ) -> Generator[ChatCompletionDeltaMessage, None, None]:
         pass
@@ -281,6 +285,8 @@ class AnthropicChatClient(BaseChatClient):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
         pass
@@ -296,6 +302,8 @@ class AnthropicChatClient(BaseChatClient):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ):
         if model is not None:
@@ -304,10 +312,6 @@ class AnthropicChatClient(BaseChatClient):
             self.stream = stream
         if temperature is not None:
             self.temperature = temperature
-        if isinstance(tools, OpenAINotGiven):
-            tools = NOT_GIVEN
-        if isinstance(tool_choice, OpenAINotGiven):
-            tool_choice = NOT_GIVEN
         if self.random_endpoint:
             self.random_endpoint = True
@@ -348,6 +352,8 @@ class AnthropicChatClient(BaseChatClient):
                         tool_choice=_tool_choice,
                         response_format=response_format,
                         stream_options=stream_options,
+                        top_p=top_p,
+                        skip_cutoff=skip_cutoff,
                         **kwargs,
                     )
                     for chunk in response:
@@ -373,12 +379,20 @@ class AnthropicChatClient(BaseChatClient):
                     tools=_tools,
                     tool_choice=_tool_choice,
                     response_format=response_format,
-                    stream_options=stream_options,
+                    top_p=top_p,
+                    skip_cutoff=skip_cutoff,
                     **kwargs,
                 )
         assert isinstance(self.raw_client, Anthropic | AnthropicVertex)
+        if isinstance(tools, OpenAINotGiven):
+            tools = NOT_GIVEN
+        if isinstance(tool_choice, OpenAINotGiven):
+            tool_choice = NOT_GIVEN
+        if isinstance(top_p, OpenAINotGiven) or top_p is None:
+            top_p = NOT_GIVEN
         raw_client = self.raw_client  # 调用完 self.raw_client 后，self.model_id 会被赋值
         self.model_setting = self.backend_settings.models[self.model]
         if self.model_id is None:
@@ -390,7 +404,7 @@ class AnthropicChatClient(BaseChatClient):
         else:
             system_prompt = ""
-        if self.context_length_control == ContextLengthControlType.Latest:
+        if not skip_cutoff and self.context_length_control == ContextLengthControlType.Latest:
             messages = cutoff_messages(
                 messages,
                 max_count=self.model_setting.context_length,
@@ -424,6 +438,7 @@ class AnthropicChatClient(BaseChatClient):
                 max_tokens=max_tokens,
                 tools=tools_params,
                 tool_choice=tool_choice_param,
+                top_p=top_p,
                 **kwargs,
             )
@@ -486,6 +501,7 @@ class AnthropicChatClient(BaseChatClient):
                 max_tokens=max_tokens,
                 tools=tools_params,
                 tool_choice=tool_choice_param,
+                top_p=top_p,
                 **kwargs,
             )
@@ -614,6 +630,8 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ) -> ChatCompletionMessage:
         pass
@@ -630,6 +648,8 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ) -> AsyncGenerator[ChatCompletionDeltaMessage, Any]:
         pass
@@ -646,6 +666,8 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, Any]:
         pass
@@ -661,6 +683,8 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ):
         if model is not None:
@@ -669,10 +693,6 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
             self.stream = stream
         if temperature is not None:
             self.temperature = temperature
-        if isinstance(tools, OpenAINotGiven):
-            tools = NOT_GIVEN
-        if isinstance(tool_choice, OpenAINotGiven):
-            tool_choice = NOT_GIVEN
         if self.random_endpoint:
             self.random_endpoint = True
@@ -714,6 +734,8 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
                         tool_choice=_tool_choice,
                         response_format=response_format,
                         stream_options=stream_options,
+                        top_p=top_p,
+                        skip_cutoff=skip_cutoff,
                         **kwargs,
                     )
                     async for chunk in response:
@@ -740,11 +762,20 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
                     tools=_tools,
                     tool_choice=_tool_choice,
                     response_format=response_format,
+                    top_p=top_p,
+                    skip_cutoff=skip_cutoff,
                     **kwargs,
                 )
         assert isinstance(self.raw_client, AsyncAnthropic | AsyncAnthropicVertex)
+        if isinstance(tools, OpenAINotGiven):
+            tools = NOT_GIVEN
+        if isinstance(tool_choice, OpenAINotGiven):
+            tool_choice = NOT_GIVEN
+        if isinstance(top_p, OpenAINotGiven) or top_p is None:
+            top_p = NOT_GIVEN
         raw_client = self.raw_client  # 调用完 self.raw_client 后，self.model_id 会被赋值
         self.model_setting = self.backend_settings.models[self.model]
         if self.model_id is None:
@@ -756,7 +787,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
         else:
             system_prompt = ""
-        if self.context_length_control == ContextLengthControlType.Latest:
+        if not skip_cutoff and self.context_length_control == ContextLengthControlType.Latest:
             messages = cutoff_messages(
                 messages,
                 max_count=self.model_setting.context_length,
@@ -790,6 +821,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
                 max_tokens=max_tokens,
                 tools=tools_params,
                 tool_choice=tool_choice_param,
+                top_p=top_p,
                 **kwargs,
             )
@@ -852,6 +884,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
                 max_tokens=max_tokens,
                 tools=tools_params,
                 tool_choice=tool_choice_param,
+                top_p=top_p,
                 **kwargs,
             )

vectorvein/chat_clients/base_client.py CHANGED Viewed

@@ -73,6 +73,8 @@ class BaseChatClient(ABC):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ) -> ChatCompletionMessage:
         pass
@@ -90,6 +92,8 @@ class BaseChatClient(ABC):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ) -> Generator[ChatCompletionDeltaMessage, Any, None]:
         pass
@@ -107,6 +111,8 @@ class BaseChatClient(ABC):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
         pass
@@ -123,6 +129,8 @@ class BaseChatClient(ABC):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
         pass
@@ -137,6 +145,7 @@ class BaseChatClient(ABC):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
         **kwargs,
     ) -> Generator[ChatCompletionDeltaMessage, Any, None]:
         return self.create_completion(
@@ -206,6 +215,8 @@ class BaseAsyncChatClient(ABC):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ) -> ChatCompletionMessage:
         pass
@@ -223,6 +234,8 @@ class BaseAsyncChatClient(ABC):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ) -> AsyncGenerator[ChatCompletionDeltaMessage, None]:
         pass
@@ -240,6 +253,8 @@ class BaseAsyncChatClient(ABC):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, None]:
         pass
@@ -256,6 +271,8 @@ class BaseAsyncChatClient(ABC):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, None]:
         pass
@@ -270,6 +287,7 @@ class BaseAsyncChatClient(ABC):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
         **kwargs,
     ) -> AsyncGenerator[ChatCompletionDeltaMessage, None]:
         return await self.create_completion(

vectorvein/chat_clients/gemini_client.py CHANGED Viewed

@@ -65,6 +65,8 @@ class GeminiChatClient(BaseChatClient):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ) -> ChatCompletionMessage:
         pass
@@ -81,6 +83,8 @@ class GeminiChatClient(BaseChatClient):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ) -> Generator[ChatCompletionDeltaMessage, None, None]:
         pass
@@ -97,6 +101,8 @@ class GeminiChatClient(BaseChatClient):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
         pass
@@ -112,6 +118,8 @@ class GeminiChatClient(BaseChatClient):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ):
         if model is not None:
@@ -129,7 +137,7 @@ class GeminiChatClient(BaseChatClient):
         else:
             system_prompt = ""
-        if self.context_length_control == ContextLengthControlType.Latest:
+        if not skip_cutoff and self.context_length_control == ContextLengthControlType.Latest:
             messages = cutoff_messages(
                 messages,
                 max_count=self.model_setting.context_length,
@@ -137,16 +145,19 @@ class GeminiChatClient(BaseChatClient):
                 model=self.model_setting.id,
             )
+        tools_params = {}
         if tools:
             tools_params = {"tools": [{"function_declarations": [tool["function"] for tool in tools]}]}
-        else:
-            tools_params = {}
         response_format_params = {}
         if response_format is not None:
             if response_format.get("type") == "json_object":
                 response_format_params = {"response_mime_type": "application/json"}
+        top_p_params = {}
+        if top_p:
+            top_p_params = {"top_p": top_p}
         if self.random_endpoint:
             self.random_endpoint = True
             endpoint_choice = random.choice(self.backend_settings.models[self.model].endpoints)
@@ -168,6 +179,7 @@ class GeminiChatClient(BaseChatClient):
             "generationConfig": {
                 "temperature": self.temperature,
                 "maxOutputTokens": max_tokens,
+                **top_p_params,
                 **response_format_params,
             },
             **tools_params,
@@ -189,7 +201,7 @@ class GeminiChatClient(BaseChatClient):
                 if self.http_client:
                     client = self.http_client
                 else:
-                    client = httpx.Client()
+                    client = httpx.Client(timeout=300)
                 with client.stream("POST", url, headers=headers, params=params, json=request_body) as response:
                     for chunk in response.iter_lines():
                         message = {"content": "", "tool_calls": []}
@@ -228,8 +240,10 @@ class GeminiChatClient(BaseChatClient):
             if self.http_client:
                 client = self.http_client
             else:
-                client = httpx.Client()
+                client = httpx.Client(timeout=300)
             response = client.post(url, json=request_body, headers=headers, params=params, timeout=None).json()
+            if "error" in response:
+                raise Exception(response["error"])
             result = {
                 "content": "",
                 "usage": {
@@ -302,6 +316,8 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ) -> ChatCompletionMessage:
         pass
@@ -318,6 +334,8 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ) -> AsyncGenerator[ChatCompletionDeltaMessage, Any]:
         pass
@@ -334,6 +352,8 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, Any]:
         pass
@@ -349,6 +369,8 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ):
         if model is not None:
@@ -366,7 +388,7 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
         else:
             system_prompt = ""
-        if self.context_length_control == ContextLengthControlType.Latest:
+        if not skip_cutoff and self.context_length_control == ContextLengthControlType.Latest:
             messages = cutoff_messages(
                 messages,
                 max_count=self.model_setting.context_length,
@@ -374,16 +396,19 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
                 model=self.model_setting.id,
             )
+        tools_params = {}
         if tools:
             tools_params = {"tools": [{"function_declarations": [tool["function"] for tool in tools]}]}
-        else:
-            tools_params = {}
         response_format_params = {}
         if response_format is not None:
             if response_format.get("type") == "json_object":
                 response_format_params = {"response_mime_type": "application/json"}
+        top_p_params = {}
+        if top_p:
+            top_p_params = {"top_p": top_p}
         if self.random_endpoint:
             self.random_endpoint = True
             endpoint_choice = random.choice(self.backend_settings.models[self.model].endpoints)
@@ -405,6 +430,7 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
             "generationConfig": {
                 "temperature": self.temperature,
                 "maxOutputTokens": max_tokens,
+                **top_p_params,
                 **response_format_params,
             },
             **tools_params,
@@ -426,7 +452,7 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
                 if self.http_client:
                     client = self.http_client
                 else:
-                    client = httpx.AsyncClient()
+                    client = httpx.AsyncClient(timeout=300)
                 async with client.stream("POST", url, headers=headers, params=params, json=request_body) as response:
                     async for chunk in response.aiter_lines():
                         message = {"content": "", "tool_calls": []}
@@ -465,10 +491,12 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
             if self.http_client:
                 client = self.http_client
             else:
-                client = httpx.AsyncClient()
+                client = httpx.AsyncClient(timeout=300)
             async with client:
                 response = await client.post(url, json=request_body, headers=headers, params=params, timeout=None)
                 response = response.json()
+                if "error" in response:
+                    raise Exception(response["error"])
                 result = {
                     "content": "",
                     "usage": {

vectorvein/chat_clients/minimax_client.py CHANGED Viewed

@@ -91,6 +91,8 @@ class MiniMaxChatClient(BaseChatClient):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ) -> ChatCompletionMessage:
         pass
@@ -107,6 +109,8 @@ class MiniMaxChatClient(BaseChatClient):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ) -> Generator[ChatCompletionDeltaMessage, None, None]:
         pass
@@ -123,6 +127,8 @@ class MiniMaxChatClient(BaseChatClient):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
         pass
@@ -138,6 +144,8 @@ class MiniMaxChatClient(BaseChatClient):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ):
         if model is not None:
@@ -162,7 +170,7 @@ class MiniMaxChatClient(BaseChatClient):
                 self.endpoint_id = endpoint_choice
             self.endpoint = settings.get_endpoint(self.endpoint_id)
-        if self.context_length_control == ContextLengthControlType.Latest:
+        if not skip_cutoff and self.context_length_control == ContextLengthControlType.Latest:
             messages = cutoff_messages(
                 messages,
                 max_count=self.model_setting.context_length,
@@ -190,6 +198,11 @@ class MiniMaxChatClient(BaseChatClient):
         else:
             tools_params = {}
+        if top_p:
+            top_p_params = {"top_p": top_p}
+        else:
+            top_p_params = {}
         if max_tokens is None:
             max_output_tokens = self.model_setting.max_output_tokens
             if max_output_tokens is not None:
@@ -218,6 +231,7 @@ class MiniMaxChatClient(BaseChatClient):
             "temperature": self.temperature,
             "stream": self.stream,
             "mask_sensitive_info": False,
+            **top_p_params,
             **tools_params,
             **kwargs,
         }
@@ -326,6 +340,8 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ) -> ChatCompletionMessage:
         pass
@@ -342,6 +358,8 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ) -> AsyncGenerator[ChatCompletionDeltaMessage, Any]:
         pass
@@ -358,6 +376,8 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, Any]:
         pass
@@ -373,6 +393,8 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ):
         if model is not None:
@@ -397,7 +419,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
                 self.endpoint_id = endpoint_choice
             self.endpoint = settings.get_endpoint(self.endpoint_id)
-        if self.context_length_control == ContextLengthControlType.Latest:
+        if not skip_cutoff and self.context_length_control == ContextLengthControlType.Latest:
             messages = cutoff_messages(
                 messages,
                 max_count=self.model_setting.context_length,
@@ -423,6 +445,11 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
         else:
             tools_params = {}
+        if top_p:
+            top_p_params = {"top_p": top_p}
+        else:
+            top_p_params = {}
         if max_tokens is None:
             max_output_tokens = self.model_setting.max_output_tokens
             if max_output_tokens is not None:
@@ -451,6 +478,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
             "temperature": self.temperature,
             "stream": self.stream,
             "mask_sensitive_info": False,
+            **top_p_params,
             **tools_params,
             **kwargs,
         }

vectorvein/chat_clients/openai_compatible_client.py CHANGED Viewed

@@ -24,6 +24,7 @@ from ..types.llm_parameters import (
     ToolParam,
     ToolChoice,
     OpenAINotGiven,
+    AnthropicNotGiven,
     Usage,
     ChatCompletionMessage,
     ChatCompletionDeltaMessage,
@@ -97,6 +98,8 @@ class OpenAICompatibleChatClient(BaseChatClient):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
+        top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ) -> ChatCompletionMessage:
         pass
@@ -113,6 +116,8 @@ class OpenAICompatibleChatClient(BaseChatClient):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
+        top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ) -> Generator[ChatCompletionDeltaMessage, None, None]:
         pass
@@ -129,6 +134,8 @@ class OpenAICompatibleChatClient(BaseChatClient):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
+        top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
         pass
@@ -144,6 +151,8 @@ class OpenAICompatibleChatClient(BaseChatClient):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
+        top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ):
         if model is not None:
@@ -152,13 +161,15 @@ class OpenAICompatibleChatClient(BaseChatClient):
             self.stream = stream
         if temperature is not None:
             self.temperature = temperature
+        if isinstance(top_p, AnthropicNotGiven):
+            top_p = NOT_GIVEN
         raw_client = self.raw_client  # 调用完 self.raw_client 后，self.model_id 会被赋值
         self.model_setting = self.backend_settings.models[self.model]
         if self.model_id is None:
             self.model_id = self.model_setting.id
-        if self.context_length_control == ContextLengthControlType.Latest:
+        if not skip_cutoff and self.context_length_control == ContextLengthControlType.Latest:
             messages = cutoff_messages(
                 messages,
                 max_count=self.model_setting.context_length,
@@ -206,6 +217,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
                 stream=True,
                 temperature=self.temperature,
                 max_tokens=max_tokens,
+                top_p=top_p,
                 **_stream_options_params,
                 **self.response_format,
                 **tools_params,
@@ -260,6 +272,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
                 stream=False,
                 temperature=self.temperature,
                 max_tokens=max_tokens,
+                top_p=top_p,
                 **self.response_format,
                 **tools_params,
                 **kwargs,
@@ -351,6 +364,8 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
+        top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ) -> ChatCompletionMessage:
         pass
@@ -367,6 +382,8 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
+        top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ) -> AsyncGenerator[ChatCompletionDeltaMessage, Any]:
         pass
@@ -383,6 +400,8 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
+        top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, Any]:
         pass
@@ -398,6 +417,8 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
+        top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ):
         if model is not None:
@@ -406,13 +427,15 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
             self.stream = stream
         if temperature is not None:
             self.temperature = temperature
+        if isinstance(top_p, AnthropicNotGiven):
+            top_p = NOT_GIVEN
         raw_client = self.raw_client  # 调用完 self.raw_client 后，self.model_id 会被赋值
         self.model_setting = self.backend_settings.models[self.model]
         if self.model_id is None:
             self.model_id = self.model_setting.id
-        if self.context_length_control == ContextLengthControlType.Latest:
+        if not skip_cutoff and self.context_length_control == ContextLengthControlType.Latest:
             messages = cutoff_messages(
                 messages,
                 max_count=self.model_setting.context_length,
@@ -460,6 +483,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
                 stream=self.stream,
                 temperature=self.temperature,
                 max_tokens=max_tokens,
+                top_p=top_p,
                 **_stream_options_params,
                 **self.response_format,
                 **tools_params,
@@ -515,6 +539,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
                 stream=self.stream,
                 temperature=self.temperature,
                 max_tokens=max_tokens,
+                top_p=top_p,
                 **self.response_format,
                 **tools_params,
                 **kwargs,

vectorvein/chat_clients/utils.py CHANGED Viewed

@@ -201,6 +201,10 @@ def get_token_counts(text: str | dict, model: str = "", use_token_server_first:
         result = response.json()
         return result["data"]["total_tokens"]
     elif model.startswith("gemini"):
+        # TODO: gemini-exp-1114 暂时不支持，使用 gemini-1.5-flash 代替
+        if model == "gemini-exp-1114":
+            model = "gemini-1.5-flash"
         model_setting = settings.gemini.models[model]
         if len(model_setting.endpoints) == 0:
             return len(get_gpt_35_encoding().encode(text))
@@ -208,13 +212,14 @@ def get_token_counts(text: str | dict, model: str = "", use_token_server_first:
         if isinstance(endpoint_id, dict):
             endpoint_id = endpoint_id["endpoint_id"]
         endpoint = settings.get_endpoint(endpoint_id)
         base_url = f"{endpoint.api_base}/models/{model_setting.id}:countTokens"
         params = {"key": endpoint.api_key}
         request_body = {
             "contents": {
                 "role": "USER",
                 "parts": [
-                    {"text": "TEXT"},
+                    {"text": text},
                 ],
             },
         }

vectorvein/types/defaults.py CHANGED Viewed

@@ -609,6 +609,13 @@ MINIMAX_MODELS: Final[Dict[str, Dict[str, Any]]] = {
         "function_call_available": True,
         "response_format_available": True,
     },
+    "abab7-preview": {
+        "id": "abab7-preview",
+        "context_length": 245760,
+        "max_output_tokens": 245760,
+        "function_call_available": True,
+        "response_format_available": True,
+    },
 }
 # Gemini models
@@ -616,7 +623,8 @@ GEMINI_DEFAULT_MODEL: Final[str] = "gemini-1.5-pro"
 GEMINI_MODELS: Final[Dict[str, Dict[str, Any]]] = {
     "gemini-1.5-pro": {
         "id": "gemini-1.5-pro",
-        "context_length": 1048576,
+        "context_length": 2097152,
+        "max_output_tokens": 8192,
         "function_call_available": True,
         "response_format_available": True,
         "native_multimodal": True,
@@ -624,6 +632,22 @@ GEMINI_MODELS: Final[Dict[str, Dict[str, Any]]] = {
     "gemini-1.5-flash": {
         "id": "gemini-1.5-flash",
         "context_length": 1048576,
+        "max_output_tokens": 8192,
+        "function_call_available": True,
+        "response_format_available": True,
+        "native_multimodal": True,
+    },
+    "gemini-exp-1114": {
+        "id": "gemini-exp-1114",
+        "context_length": 32767,
+        "function_call_available": True,
+        "response_format_available": True,
+        "native_multimodal": True,
+    },
+    "gemini-1.5-flash-8b": {
+        "id": "gemini-1.5-flash-8b",
+        "context_length": 1048576,
+        "max_output_tokens": 8192,
         "function_call_available": True,
         "response_format_available": True,
         "native_multimodal": True,

{vectorvein-0.1.56.dist-info → vectorvein-0.1.59.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vectorvein
-Version: 0.1.56
+Version: 0.1.59
 Summary: VectorVein python SDK
 Author-Email: Anderson <andersonby@163.com>
 License: MIT

{vectorvein-0.1.56.dist-info → vectorvein-0.1.59.dist-info}/RECORD RENAMED Viewed

@@ -1,24 +1,24 @@
-vectorvein-0.1.56.dist-info/METADATA,sha256=3YTghMyf-ZK9VzwhaqGwf_xuhKmVI5IA7SKI1O73Sww,633
-vectorvein-0.1.56.dist-info/WHEEL,sha256=thaaA2w1JzcGC48WYufAs8nrYZjJm8LqNfnXFOFyCC4,90
-vectorvein-0.1.56.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
+vectorvein-0.1.59.dist-info/METADATA,sha256=GCU-0oftBD1QtnzFXvknbCMq1mdXwBiduQT_B6y95eI,633
+vectorvein-0.1.59.dist-info/WHEEL,sha256=thaaA2w1JzcGC48WYufAs8nrYZjJm8LqNfnXFOFyCC4,90
+vectorvein-0.1.59.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
 vectorvein/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 vectorvein/chat_clients/__init__.py,sha256=Oev7Lv1DIEWCMD-2Pm7e2cwzX7JFQTnIK-j6o4iUuyQ,17725
-vectorvein/chat_clients/anthropic_client.py,sha256=G68JGM98E0pYyi8Tjvo4VQtnug9ncugFbb4d0DrPVQo,36122
+vectorvein/chat_clients/anthropic_client.py,sha256=phDFgXPV-eNla7ZFPdcZx1fPOWlAFWc3C_mP9VcNkO0,37439
 vectorvein/chat_clients/baichuan_client.py,sha256=CVMvpgjdrZGv0BWnTOBD-f2ufZ3wq3496wqukumsAr4,526
-vectorvein/chat_clients/base_client.py,sha256=0Uj0e-JR0a68sRS_WfUMVd91Av7lzJh6-DukjutlaD0,9497
+vectorvein/chat_clients/base_client.py,sha256=N1Swm6b9Gos7zLSH-qCSxgnDRCHPmuWZcw_H9zVnGJs,10297
 vectorvein/chat_clients/deepseek_client.py,sha256=3qWu01NlJAP2N-Ff62d5-CZXZitlizE1fzb20LNetig,526
-vectorvein/chat_clients/gemini_client.py,sha256=DlQI5p8qG5erThMb17ojB2gofzTkwVy3veFC8_Cbpc4,19902
+vectorvein/chat_clients/gemini_client.py,sha256=e7xZdZm0-W2iXy3S-J5b1bO9YqhGxcv0Y5HPYcQnDds,21098
 vectorvein/chat_clients/groq_client.py,sha256=Uow4pgdmFi93ZQSoOol2-0PhhqkW-S0XuSldvppz5U4,498
 vectorvein/chat_clients/local_client.py,sha256=55nOsxzqUf79q3Y14MKROA71zxhsT7p7FsDZ89rts2M,422
-vectorvein/chat_clients/minimax_client.py,sha256=3HetFZbmgvEzWFaH_Gbj99lsh9DmIOhfpVdbV4HxrSI,19116
+vectorvein/chat_clients/minimax_client.py,sha256=uNYz3ccJOhTAgzkDNvWfM4_LaBaOpHjrne1YNieba28,20122
 vectorvein/chat_clients/mistral_client.py,sha256=1aKSylzBDaLYcFnaBIL4-sXSzWmXfBeON9Q0rq-ziWw,534
 vectorvein/chat_clients/moonshot_client.py,sha256=gbu-6nGxx8uM_U2WlI4Wus881rFRotzHtMSoYOcruGU,526
 vectorvein/chat_clients/openai_client.py,sha256=Nz6tV45pWcsOupxjnsRsGTicbQNJWIZyxuJoJ5DGMpg,527
-vectorvein/chat_clients/openai_compatible_client.py,sha256=D2VmhpDVct4w2y58s87An7x22koOdkxSOKR2-so9lJI,22044
+vectorvein/chat_clients/openai_compatible_client.py,sha256=hbSggW5itvTRk3Qb3rejNTt3vd0r6R95irLTjeSzm2g,23102
 vectorvein/chat_clients/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 vectorvein/chat_clients/qwen_client.py,sha256=-ryh-m9PgsO0fc4ulcCmPTy1155J8YUy15uPoJQOHA0,513
 vectorvein/chat_clients/stepfun_client.py,sha256=zsD2W5ahmR4DD9cqQTXmJr3txrGuvxbRWhFlRdwNijI,519
-vectorvein/chat_clients/utils.py,sha256=UqDV4ljwZRbndZdkyHkIBis1ciRt5gjPFOJ0vnR2Uas,26669
+vectorvein/chat_clients/utils.py,sha256=6rjXUv39XMIEKKmA-vG8HzOHntCVeFvlrirZcduKCIA,26828
 vectorvein/chat_clients/xai_client.py,sha256=eLFJJrNRJ-ni3DpshODcr3S1EJQLbhVwxyO1E54LaqM,491
 vectorvein/chat_clients/yi_client.py,sha256=RNf4CRuPJfixrwLZ3-DEc3t25QDe1mvZeb9sku2f8Bc,484
 vectorvein/chat_clients/zhipuai_client.py,sha256=Ys5DSeLCuedaDXr3PfG1EW2zKXopt-awO2IylWSwY0s,519
@@ -26,11 +26,11 @@ vectorvein/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 vectorvein/server/token_server.py,sha256=36F9PKSNOX8ZtYBXY_l-76GQTpUSmQ2Y8EMy1H7wtdQ,1353
 vectorvein/settings/__init__.py,sha256=g01y74x0k2JEAqNpRGG0PDs0NTULjOAZV6HRhydPX1c,3874
 vectorvein/settings/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-vectorvein/types/defaults.py,sha256=Kf7kdY2-dbcWR_oQkqoHfBLtr2kg-M_Va3-37TlZU-o,22941
+vectorvein/types/defaults.py,sha256=DC0fJ2MtXYNXiWkDdnpGYXuwCoSOcuB6PseI_y-VDo0,23730
 vectorvein/types/enums.py,sha256=7KTJSVtQueImmbr1fSwv3rQVtc0RyMWXJmoE2tDOaso,1667
 vectorvein/types/exception.py,sha256=gnW4GnJ76jND6UGnodk9xmqkcbeS7Cz2rvncA2HpD5E,69
 vectorvein/types/llm_parameters.py,sha256=5o-C_yXxxQWZy_e8OWowB2107GTS-Eawx4Mvb1q55Co,5256
 vectorvein/types/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 vectorvein/utilities/media_processing.py,sha256=CTRq-lGlFkFgP_FSRhNwF_qUgmOrXPf2_1Ok9HY42_g,5887
 vectorvein/utilities/retry.py,sha256=6KFS9R2HdhqM3_9jkjD4F36ZSpEx2YNFGOVlpOsUetM,2208
-vectorvein-0.1.56.dist-info/RECORD,,
+vectorvein-0.1.59.dist-info/RECORD,,

{vectorvein-0.1.56.dist-info → vectorvein-0.1.59.dist-info}/WHEEL RENAMED Viewed

File without changes

{vectorvein-0.1.56.dist-info → vectorvein-0.1.59.dist-info}/entry_points.txt RENAMED Viewed

File without changes

vectorvein 0.1.56__py3-none-any.whl → 0.1.59__py3-none-any.whl

vectorvein 0.1.56py3-none-any.whl → 0.1.59py3-none-any.whl