PyPI - vectorvein - Versions diffs - 0.1.56__tar.gz → 0.1.57__tar.gz - Mend

vectorvein 0.1.56tar.gz → 0.1.57tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

{vectorvein-0.1.56 → vectorvein-0.1.57}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vectorvein
-Version: 0.1.56
+Version: 0.1.57
 Summary: VectorVein python SDK
 Author-Email: Anderson <andersonby@163.com>
 License: MIT

{vectorvein-0.1.56 → vectorvein-0.1.57}/pyproject.toml RENAMED Viewed

@@ -17,7 +17,7 @@ description = "VectorVein python SDK"
 name = "vectorvein"
 readme = "README.md"
 requires-python = ">=3.10"
-version = "0.1.56"
+version = "0.1.57"
 [project.license]
 text = "MIT"

{vectorvein-0.1.56 → vectorvein-0.1.57}/src/vectorvein/chat_clients/anthropic_client.py RENAMED Viewed

@@ -249,6 +249,7 @@ class AnthropicChatClient(BaseChatClient):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
         **kwargs,
     ) -> ChatCompletionMessage:
         pass
@@ -265,6 +266,7 @@ class AnthropicChatClient(BaseChatClient):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
         **kwargs,
     ) -> Generator[ChatCompletionDeltaMessage, None, None]:
         pass
@@ -281,6 +283,7 @@ class AnthropicChatClient(BaseChatClient):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
         **kwargs,
     ) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
         pass
@@ -296,6 +299,7 @@ class AnthropicChatClient(BaseChatClient):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
         **kwargs,
     ):
         if model is not None:
@@ -304,10 +308,6 @@ class AnthropicChatClient(BaseChatClient):
             self.stream = stream
         if temperature is not None:
             self.temperature = temperature
-        if isinstance(tools, OpenAINotGiven):
-            tools = NOT_GIVEN
-        if isinstance(tool_choice, OpenAINotGiven):
-            tool_choice = NOT_GIVEN
         if self.random_endpoint:
             self.random_endpoint = True
@@ -348,6 +348,7 @@ class AnthropicChatClient(BaseChatClient):
                         tool_choice=_tool_choice,
                         response_format=response_format,
                         stream_options=stream_options,
+                        top_p=top_p,
                         **kwargs,
                     )
                     for chunk in response:
@@ -374,11 +375,19 @@ class AnthropicChatClient(BaseChatClient):
                     tool_choice=_tool_choice,
                     response_format=response_format,
                     stream_options=stream_options,
+                    top_p=top_p,
                     **kwargs,
                 )
         assert isinstance(self.raw_client, Anthropic | AnthropicVertex)
+        if isinstance(tools, OpenAINotGiven):
+            tools = NOT_GIVEN
+        if isinstance(tool_choice, OpenAINotGiven):
+            tool_choice = NOT_GIVEN
+        if isinstance(top_p, OpenAINotGiven) or top_p is None:
+            top_p = NOT_GIVEN
         raw_client = self.raw_client  # 调用完 self.raw_client 后，self.model_id 会被赋值
         self.model_setting = self.backend_settings.models[self.model]
         if self.model_id is None:
@@ -424,6 +433,7 @@ class AnthropicChatClient(BaseChatClient):
                 max_tokens=max_tokens,
                 tools=tools_params,
                 tool_choice=tool_choice_param,
+                top_p=top_p,
                 **kwargs,
             )
@@ -486,6 +496,7 @@ class AnthropicChatClient(BaseChatClient):
                 max_tokens=max_tokens,
                 tools=tools_params,
                 tool_choice=tool_choice_param,
+                top_p=top_p,
                 **kwargs,
             )
@@ -614,6 +625,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
         **kwargs,
     ) -> ChatCompletionMessage:
         pass
@@ -630,6 +642,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
         **kwargs,
     ) -> AsyncGenerator[ChatCompletionDeltaMessage, Any]:
         pass
@@ -646,6 +659,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
         **kwargs,
     ) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, Any]:
         pass
@@ -661,6 +675,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
         **kwargs,
     ):
         if model is not None:
@@ -669,10 +684,6 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
             self.stream = stream
         if temperature is not None:
             self.temperature = temperature
-        if isinstance(tools, OpenAINotGiven):
-            tools = NOT_GIVEN
-        if isinstance(tool_choice, OpenAINotGiven):
-            tool_choice = NOT_GIVEN
         if self.random_endpoint:
             self.random_endpoint = True
@@ -745,6 +756,13 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
         assert isinstance(self.raw_client, AsyncAnthropic | AsyncAnthropicVertex)
+        if isinstance(tools, OpenAINotGiven):
+            tools = NOT_GIVEN
+        if isinstance(tool_choice, OpenAINotGiven):
+            tool_choice = NOT_GIVEN
+        if isinstance(top_p, OpenAINotGiven) or top_p is None:
+            top_p = NOT_GIVEN
         raw_client = self.raw_client  # 调用完 self.raw_client 后，self.model_id 会被赋值
         self.model_setting = self.backend_settings.models[self.model]
         if self.model_id is None:
@@ -790,6 +808,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
                 max_tokens=max_tokens,
                 tools=tools_params,
                 tool_choice=tool_choice_param,
+                top_p=top_p,
                 **kwargs,
             )
@@ -852,6 +871,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
                 max_tokens=max_tokens,
                 tools=tools_params,
                 tool_choice=tool_choice_param,
+                top_p=top_p,
                 **kwargs,
             )

{vectorvein-0.1.56 → vectorvein-0.1.57}/src/vectorvein/chat_clients/base_client.py RENAMED Viewed

@@ -73,6 +73,7 @@ class BaseChatClient(ABC):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
         **kwargs,
     ) -> ChatCompletionMessage:
         pass
@@ -90,6 +91,7 @@ class BaseChatClient(ABC):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
         **kwargs,
     ) -> Generator[ChatCompletionDeltaMessage, Any, None]:
         pass
@@ -107,6 +109,7 @@ class BaseChatClient(ABC):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
         **kwargs,
     ) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
         pass
@@ -123,6 +126,7 @@ class BaseChatClient(ABC):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
         **kwargs,
     ) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
         pass
@@ -137,6 +141,7 @@ class BaseChatClient(ABC):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
         **kwargs,
     ) -> Generator[ChatCompletionDeltaMessage, Any, None]:
         return self.create_completion(
@@ -206,6 +211,7 @@ class BaseAsyncChatClient(ABC):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
         **kwargs,
     ) -> ChatCompletionMessage:
         pass
@@ -223,6 +229,7 @@ class BaseAsyncChatClient(ABC):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
         **kwargs,
     ) -> AsyncGenerator[ChatCompletionDeltaMessage, None]:
         pass
@@ -240,6 +247,7 @@ class BaseAsyncChatClient(ABC):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
         **kwargs,
     ) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, None]:
         pass
@@ -256,6 +264,7 @@ class BaseAsyncChatClient(ABC):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
         **kwargs,
     ) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, None]:
         pass
@@ -270,6 +279,7 @@ class BaseAsyncChatClient(ABC):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
         **kwargs,
     ) -> AsyncGenerator[ChatCompletionDeltaMessage, None]:
         return await self.create_completion(

{vectorvein-0.1.56 → vectorvein-0.1.57}/src/vectorvein/chat_clients/gemini_client.py RENAMED Viewed

@@ -65,6 +65,7 @@ class GeminiChatClient(BaseChatClient):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
         **kwargs,
     ) -> ChatCompletionMessage:
         pass
@@ -81,6 +82,7 @@ class GeminiChatClient(BaseChatClient):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
         **kwargs,
     ) -> Generator[ChatCompletionDeltaMessage, None, None]:
         pass
@@ -97,6 +99,7 @@ class GeminiChatClient(BaseChatClient):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
         **kwargs,
     ) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
         pass
@@ -112,6 +115,7 @@ class GeminiChatClient(BaseChatClient):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
         **kwargs,
     ):
         if model is not None:
@@ -137,16 +141,19 @@ class GeminiChatClient(BaseChatClient):
                 model=self.model_setting.id,
             )
+        tools_params = {}
         if tools:
             tools_params = {"tools": [{"function_declarations": [tool["function"] for tool in tools]}]}
-        else:
-            tools_params = {}
         response_format_params = {}
         if response_format is not None:
             if response_format.get("type") == "json_object":
                 response_format_params = {"response_mime_type": "application/json"}
+        top_p_params = {}
+        if top_p:
+            top_p_params = {"top_p": top_p}
         if self.random_endpoint:
             self.random_endpoint = True
             endpoint_choice = random.choice(self.backend_settings.models[self.model].endpoints)
@@ -168,6 +175,7 @@ class GeminiChatClient(BaseChatClient):
             "generationConfig": {
                 "temperature": self.temperature,
                 "maxOutputTokens": max_tokens,
+                **top_p_params,
                 **response_format_params,
             },
             **tools_params,
@@ -230,6 +238,8 @@ class GeminiChatClient(BaseChatClient):
             else:
                 client = httpx.Client()
             response = client.post(url, json=request_body, headers=headers, params=params, timeout=None).json()
+            if "error" in response:
+                raise Exception(response["error"])
             result = {
                 "content": "",
                 "usage": {
@@ -302,6 +312,7 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
         **kwargs,
     ) -> ChatCompletionMessage:
         pass
@@ -318,6 +329,7 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
         **kwargs,
     ) -> AsyncGenerator[ChatCompletionDeltaMessage, Any]:
         pass
@@ -334,6 +346,7 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
         **kwargs,
     ) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, Any]:
         pass
@@ -349,6 +362,7 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
         **kwargs,
     ):
         if model is not None:
@@ -374,16 +388,19 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
                 model=self.model_setting.id,
             )
+        tools_params = {}
         if tools:
             tools_params = {"tools": [{"function_declarations": [tool["function"] for tool in tools]}]}
-        else:
-            tools_params = {}
         response_format_params = {}
         if response_format is not None:
             if response_format.get("type") == "json_object":
                 response_format_params = {"response_mime_type": "application/json"}
+        top_p_params = {}
+        if top_p:
+            top_p_params = {"top_p": top_p}
         if self.random_endpoint:
             self.random_endpoint = True
             endpoint_choice = random.choice(self.backend_settings.models[self.model].endpoints)
@@ -405,6 +422,7 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
             "generationConfig": {
                 "temperature": self.temperature,
                 "maxOutputTokens": max_tokens,
+                **top_p_params,
                 **response_format_params,
             },
             **tools_params,
@@ -469,6 +487,8 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
             async with client:
                 response = await client.post(url, json=request_body, headers=headers, params=params, timeout=None)
                 response = response.json()
+                if "error" in response:
+                    raise Exception(response["error"])
                 result = {
                     "content": "",
                     "usage": {

{vectorvein-0.1.56 → vectorvein-0.1.57}/src/vectorvein/chat_clients/minimax_client.py RENAMED Viewed

@@ -91,6 +91,7 @@ class MiniMaxChatClient(BaseChatClient):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
         **kwargs,
     ) -> ChatCompletionMessage:
         pass
@@ -107,6 +108,7 @@ class MiniMaxChatClient(BaseChatClient):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
         **kwargs,
     ) -> Generator[ChatCompletionDeltaMessage, None, None]:
         pass
@@ -123,6 +125,7 @@ class MiniMaxChatClient(BaseChatClient):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
         **kwargs,
     ) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
         pass
@@ -138,6 +141,7 @@ class MiniMaxChatClient(BaseChatClient):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
         **kwargs,
     ):
         if model is not None:
@@ -190,6 +194,11 @@ class MiniMaxChatClient(BaseChatClient):
         else:
             tools_params = {}
+        if top_p:
+            top_p_params = {"top_p": top_p}
+        else:
+            top_p_params = {}
         if max_tokens is None:
             max_output_tokens = self.model_setting.max_output_tokens
             if max_output_tokens is not None:
@@ -218,6 +227,7 @@ class MiniMaxChatClient(BaseChatClient):
             "temperature": self.temperature,
             "stream": self.stream,
             "mask_sensitive_info": False,
+            **top_p_params,
             **tools_params,
             **kwargs,
         }
@@ -326,6 +336,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
         **kwargs,
     ) -> ChatCompletionMessage:
         pass
@@ -342,6 +353,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
         **kwargs,
     ) -> AsyncGenerator[ChatCompletionDeltaMessage, Any]:
         pass
@@ -358,6 +370,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
         **kwargs,
     ) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, Any]:
         pass
@@ -373,6 +386,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
+        top_p: float | NotGiven | None = NOT_GIVEN,
         **kwargs,
     ):
         if model is not None:
@@ -423,6 +437,11 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
         else:
             tools_params = {}
+        if top_p:
+            top_p_params = {"top_p": top_p}
+        else:
+            top_p_params = {}
         if max_tokens is None:
             max_output_tokens = self.model_setting.max_output_tokens
             if max_output_tokens is not None:
@@ -451,6 +470,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
             "temperature": self.temperature,
             "stream": self.stream,
             "mask_sensitive_info": False,
+            **top_p_params,
             **tools_params,
             **kwargs,
         }

{vectorvein-0.1.56 → vectorvein-0.1.57}/src/vectorvein/chat_clients/openai_compatible_client.py RENAMED Viewed

@@ -24,6 +24,7 @@ from ..types.llm_parameters import (
     ToolParam,
     ToolChoice,
     OpenAINotGiven,
+    AnthropicNotGiven,
     Usage,
     ChatCompletionMessage,
     ChatCompletionDeltaMessage,
@@ -97,6 +98,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
+        top_p: float | NotGiven | None = NOT_GIVEN,
         **kwargs,
     ) -> ChatCompletionMessage:
         pass
@@ -113,6 +115,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
+        top_p: float | NotGiven | None = NOT_GIVEN,
         **kwargs,
     ) -> Generator[ChatCompletionDeltaMessage, None, None]:
         pass
@@ -129,6 +132,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
+        top_p: float | NotGiven | None = NOT_GIVEN,
         **kwargs,
     ) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
         pass
@@ -144,6 +148,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
+        top_p: float | NotGiven | None = NOT_GIVEN,
         **kwargs,
     ):
         if model is not None:
@@ -152,6 +157,8 @@ class OpenAICompatibleChatClient(BaseChatClient):
             self.stream = stream
         if temperature is not None:
             self.temperature = temperature
+        if isinstance(top_p, AnthropicNotGiven):
+            top_p = NOT_GIVEN
         raw_client = self.raw_client  # 调用完 self.raw_client 后，self.model_id 会被赋值
         self.model_setting = self.backend_settings.models[self.model]
@@ -206,6 +213,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
                 stream=True,
                 temperature=self.temperature,
                 max_tokens=max_tokens,
+                top_p=top_p,
                 **_stream_options_params,
                 **self.response_format,
                 **tools_params,
@@ -260,6 +268,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
                 stream=False,
                 temperature=self.temperature,
                 max_tokens=max_tokens,
+                top_p=top_p,
                 **self.response_format,
                 **tools_params,
                 **kwargs,
@@ -351,6 +360,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
+        top_p: float | NotGiven | None = NOT_GIVEN,
         **kwargs,
     ) -> ChatCompletionMessage:
         pass
@@ -367,6 +377,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
+        top_p: float | NotGiven | None = NOT_GIVEN,
         **kwargs,
     ) -> AsyncGenerator[ChatCompletionDeltaMessage, Any]:
         pass
@@ -383,6 +394,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
+        top_p: float | NotGiven | None = NOT_GIVEN,
         **kwargs,
     ) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, Any]:
         pass
@@ -398,6 +410,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
+        top_p: float | NotGiven | None = NOT_GIVEN,
         **kwargs,
     ):
         if model is not None:
@@ -406,6 +419,8 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
             self.stream = stream
         if temperature is not None:
             self.temperature = temperature
+        if isinstance(top_p, AnthropicNotGiven):
+            top_p = NOT_GIVEN
         raw_client = self.raw_client  # 调用完 self.raw_client 后，self.model_id 会被赋值
         self.model_setting = self.backend_settings.models[self.model]
@@ -460,6 +475,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
                 stream=self.stream,
                 temperature=self.temperature,
                 max_tokens=max_tokens,
+                top_p=top_p,
                 **_stream_options_params,
                 **self.response_format,
                 **tools_params,
@@ -515,6 +531,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
                 stream=self.stream,
                 temperature=self.temperature,
                 max_tokens=max_tokens,
+                top_p=top_p,
                 **self.response_format,
                 **tools_params,
                 **kwargs,

{vectorvein-0.1.56 → vectorvein-0.1.57}/src/vectorvein/chat_clients/utils.py RENAMED Viewed

@@ -201,6 +201,10 @@ def get_token_counts(text: str | dict, model: str = "", use_token_server_first:
         result = response.json()
         return result["data"]["total_tokens"]
     elif model.startswith("gemini"):
+        # TODO: gemini-exp-1114 暂时不支持，使用 gemini-1.5-flash 代替
+        if model == "gemini-exp-1114":
+            model = "gemini-1.5-flash"
         model_setting = settings.gemini.models[model]
         if len(model_setting.endpoints) == 0:
             return len(get_gpt_35_encoding().encode(text))
@@ -208,13 +212,14 @@ def get_token_counts(text: str | dict, model: str = "", use_token_server_first:
         if isinstance(endpoint_id, dict):
             endpoint_id = endpoint_id["endpoint_id"]
         endpoint = settings.get_endpoint(endpoint_id)
         base_url = f"{endpoint.api_base}/models/{model_setting.id}:countTokens"
         params = {"key": endpoint.api_key}
         request_body = {
             "contents": {
                 "role": "USER",
                 "parts": [
-                    {"text": "TEXT"},
+                    {"text": text},
                 ],
             },
         }

{vectorvein-0.1.56 → vectorvein-0.1.57}/src/vectorvein/types/defaults.py RENAMED Viewed

@@ -609,6 +609,13 @@ MINIMAX_MODELS: Final[Dict[str, Dict[str, Any]]] = {
         "function_call_available": True,
         "response_format_available": True,
     },
+    "abab7-preview": {
+        "id": "abab7-preview",
+        "context_length": 245760,
+        "max_output_tokens": 245760,
+        "function_call_available": True,
+        "response_format_available": True,
+    },
 }
 # Gemini models
@@ -616,7 +623,8 @@ GEMINI_DEFAULT_MODEL: Final[str] = "gemini-1.5-pro"
 GEMINI_MODELS: Final[Dict[str, Dict[str, Any]]] = {
     "gemini-1.5-pro": {
         "id": "gemini-1.5-pro",
-        "context_length": 1048576,
+        "context_length": 2097152,
+        "max_output_tokens": 8192,
         "function_call_available": True,
         "response_format_available": True,
         "native_multimodal": True,
@@ -624,6 +632,22 @@ GEMINI_MODELS: Final[Dict[str, Dict[str, Any]]] = {
     "gemini-1.5-flash": {
         "id": "gemini-1.5-flash",
         "context_length": 1048576,
+        "max_output_tokens": 8192,
+        "function_call_available": True,
+        "response_format_available": True,
+        "native_multimodal": True,
+    },
+    "gemini-exp-1114": {
+        "id": "gemini-exp-1114",
+        "context_length": 32767,
+        "function_call_available": True,
+        "response_format_available": True,
+        "native_multimodal": True,
+    },
+    "gemini-1.5-flash-8b": {
+        "id": "gemini-1.5-flash-8b",
+        "context_length": 1048576,
+        "max_output_tokens": 8192,
         "function_call_available": True,
         "response_format_available": True,
         "native_multimodal": True,