PyPI - vectorvein - Versions diffs - 0.1.58__tar.gz → 0.1.60__tar.gz - Mend

vectorvein 0.1.58tar.gz → 0.1.60tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

{vectorvein-0.1.58 → vectorvein-0.1.60}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vectorvein
-Version: 0.1.58
+Version: 0.1.60
 Summary: VectorVein python SDK
 Author-Email: Anderson <andersonby@163.com>
 License: MIT
@@ -8,7 +8,7 @@ Requires-Python: >=3.10
 Requires-Dist: openai>=1.37.1
 Requires-Dist: tiktoken>=0.7.0
 Requires-Dist: httpx>=0.27.0
-Requires-Dist: anthropic[vertex]>=0.31.2
+Requires-Dist: anthropic[bedrock,vertex]>=0.31.2
 Requires-Dist: pydantic>=2.8.2
 Requires-Dist: Pillow>=10.4.0
 Requires-Dist: deepseek-tokenizer>=0.1.0

{vectorvein-0.1.58 → vectorvein-0.1.60}/pyproject.toml RENAMED Viewed

@@ -6,7 +6,7 @@ dependencies = [
     "openai>=1.37.1",
     "tiktoken>=0.7.0",
     "httpx>=0.27.0",
-    "anthropic[vertex]>=0.31.2",
+    "anthropic[vertex,bedrock]>=0.31.2",
     "pydantic>=2.8.2",
     "Pillow>=10.4.0",
     "deepseek-tokenizer>=0.1.0",
@@ -17,7 +17,7 @@ description = "VectorVein python SDK"
 name = "vectorvein"
 readme = "README.md"
 requires-python = ">=3.10"
-version = "0.1.58"
+version = "0.1.60"
 [project.license]
 text = "MIT"

{vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/chat_clients/anthropic_client.py RENAMED Viewed

@@ -8,7 +8,14 @@ from typing import overload, Generator, AsyncGenerator, Any, Literal, Iterable
 import httpx
 from openai._types import NotGiven as OpenAINotGiven
 from openai._types import NOT_GIVEN as OPENAI_NOT_GIVEN
-from anthropic import Anthropic, AnthropicVertex, AsyncAnthropic, AsyncAnthropicVertex
+from anthropic import (
+    Anthropic,
+    AnthropicVertex,
+    AsyncAnthropic,
+    AsyncAnthropicVertex,
+    AnthropicBedrock,
+    AsyncAnthropicBedrock,
+)
 from anthropic._types import NOT_GIVEN
 from anthropic.types import (
     TextBlock,
@@ -219,6 +226,15 @@ class AnthropicChatClient(BaseChatClient):
                 access_token=self.creds.token,
                 http_client=self.http_client,
             )
+        elif self.endpoint.is_bedrock:
+            if self.endpoint.credentials is None:
+                raise ValueError("Anthropic Bedrock endpoint requires credentials")
+            return AnthropicBedrock(
+                aws_access_key=self.endpoint.credentials.get("access_key"),
+                aws_secret_key=self.endpoint.credentials.get("secret_key"),
+                aws_region=self.endpoint.region,
+                http_client=self.http_client,
+            )
         elif self.endpoint.api_schema_type == "default":
             return Anthropic(
                 api_key=self.endpoint.api_key,
@@ -250,6 +266,7 @@ class AnthropicChatClient(BaseChatClient):
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
         top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ) -> ChatCompletionMessage:
         pass
@@ -267,6 +284,7 @@ class AnthropicChatClient(BaseChatClient):
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
         top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ) -> Generator[ChatCompletionDeltaMessage, None, None]:
         pass
@@ -284,6 +302,7 @@ class AnthropicChatClient(BaseChatClient):
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
         top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
         pass
@@ -300,6 +319,7 @@ class AnthropicChatClient(BaseChatClient):
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
         top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ):
         if model is not None:
@@ -349,6 +369,7 @@ class AnthropicChatClient(BaseChatClient):
                         response_format=response_format,
                         stream_options=stream_options,
                         top_p=top_p,
+                        skip_cutoff=skip_cutoff,
                         **kwargs,
                     )
                     for chunk in response:
@@ -374,8 +395,8 @@ class AnthropicChatClient(BaseChatClient):
                     tools=_tools,
                     tool_choice=_tool_choice,
                     response_format=response_format,
-                    stream_options=stream_options,
                     top_p=top_p,
+                    skip_cutoff=skip_cutoff,
                     **kwargs,
                 )
@@ -399,7 +420,7 @@ class AnthropicChatClient(BaseChatClient):
         else:
             system_prompt = ""
-        if self.context_length_control == ContextLengthControlType.Latest:
+        if not skip_cutoff and self.context_length_control == ContextLengthControlType.Latest:
             messages = cutoff_messages(
                 messages,
                 max_count=self.model_setting.context_length,
@@ -595,6 +616,15 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
                 access_token=self.creds.token,
                 http_client=self.http_client,
             )
+        elif self.endpoint.is_bedrock:
+            if self.endpoint.credentials is None:
+                raise ValueError("Anthropic Bedrock endpoint requires credentials")
+            return AsyncAnthropicBedrock(
+                aws_access_key=self.endpoint.credentials.get("aws_access_key"),
+                aws_secret_key=self.endpoint.credentials.get("aws_secret_key"),
+                aws_region=self.endpoint.region,
+                http_client=self.http_client,
+            )
         elif self.endpoint.api_schema_type == "default":
             return AsyncAnthropic(
                 api_key=self.endpoint.api_key,
@@ -626,6 +656,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
         top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ) -> ChatCompletionMessage:
         pass
@@ -643,6 +674,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
         top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ) -> AsyncGenerator[ChatCompletionDeltaMessage, Any]:
         pass
@@ -660,6 +692,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
         top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, Any]:
         pass
@@ -676,6 +709,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
         top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ):
         if model is not None:
@@ -725,6 +759,8 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
                         tool_choice=_tool_choice,
                         response_format=response_format,
                         stream_options=stream_options,
+                        top_p=top_p,
+                        skip_cutoff=skip_cutoff,
                         **kwargs,
                     )
                     async for chunk in response:
@@ -751,6 +787,8 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
                     tools=_tools,
                     tool_choice=_tool_choice,
                     response_format=response_format,
+                    top_p=top_p,
+                    skip_cutoff=skip_cutoff,
                     **kwargs,
                 )
@@ -774,7 +812,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
         else:
             system_prompt = ""
-        if self.context_length_control == ContextLengthControlType.Latest:
+        if not skip_cutoff and self.context_length_control == ContextLengthControlType.Latest:
             messages = cutoff_messages(
                 messages,
                 max_count=self.model_setting.context_length,

{vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/chat_clients/base_client.py RENAMED Viewed

@@ -6,7 +6,14 @@ from typing import Generator, AsyncGenerator, Any, overload, Literal, Iterable
 import httpx
 from openai import OpenAI, AsyncOpenAI, AzureOpenAI, AsyncAzureOpenAI
-from anthropic import Anthropic, AnthropicVertex, AsyncAnthropic, AsyncAnthropicVertex
+from anthropic import (
+    Anthropic,
+    AnthropicVertex,
+    AsyncAnthropic,
+    AsyncAnthropicVertex,
+    AnthropicBedrock,
+    AsyncAnthropicBedrock,
+)
 from ..settings import settings
 from ..types import defaults as defs
@@ -57,7 +64,9 @@ class BaseChatClient(ABC):
     @cached_property
     @abstractmethod
-    def raw_client(self) -> OpenAI | AzureOpenAI | Anthropic | AnthropicVertex | httpx.Client | None:
+    def raw_client(
+        self,
+    ) -> OpenAI | AzureOpenAI | Anthropic | AnthropicVertex | AnthropicBedrock | httpx.Client | None:
         pass
     @overload
@@ -74,6 +83,7 @@ class BaseChatClient(ABC):
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
         top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ) -> ChatCompletionMessage:
         pass
@@ -92,6 +102,7 @@ class BaseChatClient(ABC):
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
         top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ) -> Generator[ChatCompletionDeltaMessage, Any, None]:
         pass
@@ -110,6 +121,7 @@ class BaseChatClient(ABC):
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
         top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
         pass
@@ -127,6 +139,7 @@ class BaseChatClient(ABC):
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
         top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
         pass
@@ -195,7 +208,15 @@ class BaseAsyncChatClient(ABC):
     @abstractmethod
     def raw_client(
         self,
-    ) -> AsyncOpenAI | AsyncAzureOpenAI | AsyncAnthropic | AsyncAnthropicVertex | httpx.AsyncClient | None:
+    ) -> (
+        AsyncOpenAI
+        | AsyncAzureOpenAI
+        | AsyncAnthropic
+        | AsyncAnthropicVertex
+        | AsyncAnthropicBedrock
+        | httpx.AsyncClient
+        | None
+    ):
         pass
     @overload
@@ -212,6 +233,7 @@ class BaseAsyncChatClient(ABC):
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
         top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ) -> ChatCompletionMessage:
         pass
@@ -230,6 +252,7 @@ class BaseAsyncChatClient(ABC):
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
         top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ) -> AsyncGenerator[ChatCompletionDeltaMessage, None]:
         pass
@@ -248,6 +271,7 @@ class BaseAsyncChatClient(ABC):
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
         top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, None]:
         pass
@@ -265,6 +289,7 @@ class BaseAsyncChatClient(ABC):
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
         top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, None]:
         pass

{vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/chat_clients/gemini_client.py RENAMED Viewed

@@ -66,6 +66,7 @@ class GeminiChatClient(BaseChatClient):
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
         top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ) -> ChatCompletionMessage:
         pass
@@ -83,6 +84,7 @@ class GeminiChatClient(BaseChatClient):
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
         top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ) -> Generator[ChatCompletionDeltaMessage, None, None]:
         pass
@@ -100,6 +102,7 @@ class GeminiChatClient(BaseChatClient):
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
         top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
         pass
@@ -116,6 +119,7 @@ class GeminiChatClient(BaseChatClient):
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
         top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ):
         if model is not None:
@@ -133,7 +137,7 @@ class GeminiChatClient(BaseChatClient):
         else:
             system_prompt = ""
-        if self.context_length_control == ContextLengthControlType.Latest:
+        if not skip_cutoff and self.context_length_control == ContextLengthControlType.Latest:
             messages = cutoff_messages(
                 messages,
                 max_count=self.model_setting.context_length,
@@ -313,6 +317,7 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
         top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ) -> ChatCompletionMessage:
         pass
@@ -330,6 +335,7 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
         top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ) -> AsyncGenerator[ChatCompletionDeltaMessage, Any]:
         pass
@@ -347,6 +353,7 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
         top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, Any]:
         pass
@@ -363,6 +370,7 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
         top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ):
         if model is not None:
@@ -380,7 +388,7 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
         else:
             system_prompt = ""
-        if self.context_length_control == ContextLengthControlType.Latest:
+        if not skip_cutoff and self.context_length_control == ContextLengthControlType.Latest:
             messages = cutoff_messages(
                 messages,
                 max_count=self.model_setting.context_length,

{vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/chat_clients/minimax_client.py RENAMED Viewed

@@ -92,6 +92,7 @@ class MiniMaxChatClient(BaseChatClient):
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
         top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ) -> ChatCompletionMessage:
         pass
@@ -109,6 +110,7 @@ class MiniMaxChatClient(BaseChatClient):
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
         top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ) -> Generator[ChatCompletionDeltaMessage, None, None]:
         pass
@@ -126,6 +128,7 @@ class MiniMaxChatClient(BaseChatClient):
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
         top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
         pass
@@ -142,6 +145,7 @@ class MiniMaxChatClient(BaseChatClient):
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
         top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ):
         if model is not None:
@@ -166,7 +170,7 @@ class MiniMaxChatClient(BaseChatClient):
                 self.endpoint_id = endpoint_choice
             self.endpoint = settings.get_endpoint(self.endpoint_id)
-        if self.context_length_control == ContextLengthControlType.Latest:
+        if not skip_cutoff and self.context_length_control == ContextLengthControlType.Latest:
             messages = cutoff_messages(
                 messages,
                 max_count=self.model_setting.context_length,
@@ -337,6 +341,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
         top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ) -> ChatCompletionMessage:
         pass
@@ -354,6 +359,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
         top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ) -> AsyncGenerator[ChatCompletionDeltaMessage, Any]:
         pass
@@ -371,6 +377,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
         top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, Any]:
         pass
@@ -387,6 +394,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None = None,
         top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ):
         if model is not None:
@@ -411,7 +419,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
                 self.endpoint_id = endpoint_choice
             self.endpoint = settings.get_endpoint(self.endpoint_id)
-        if self.context_length_control == ContextLengthControlType.Latest:
+        if not skip_cutoff and self.context_length_control == ContextLengthControlType.Latest:
             messages = cutoff_messages(
                 messages,
                 max_count=self.model_setting.context_length,

{vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/chat_clients/openai_compatible_client.py RENAMED Viewed

@@ -99,6 +99,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
         top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ) -> ChatCompletionMessage:
         pass
@@ -116,6 +117,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
         top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ) -> Generator[ChatCompletionDeltaMessage, None, None]:
         pass
@@ -133,6 +135,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
         top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
         pass
@@ -149,6 +152,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
         top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ):
         if model is not None:
@@ -165,7 +169,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
         if self.model_id is None:
             self.model_id = self.model_setting.id
-        if self.context_length_control == ContextLengthControlType.Latest:
+        if not skip_cutoff and self.context_length_control == ContextLengthControlType.Latest:
             messages = cutoff_messages(
                 messages,
                 max_count=self.model_setting.context_length,
@@ -361,6 +365,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
         top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ) -> ChatCompletionMessage:
         pass
@@ -378,6 +383,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
         top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ) -> AsyncGenerator[ChatCompletionDeltaMessage, Any]:
         pass
@@ -395,6 +401,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
         top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, Any]:
         pass
@@ -411,6 +418,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
         response_format: dict | None = None,
         stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
         top_p: float | NotGiven | None = NOT_GIVEN,
+        skip_cutoff: bool = False,
         **kwargs,
     ):
         if model is not None:
@@ -427,7 +435,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
         if self.model_id is None:
             self.model_id = self.model_setting.id
-        if self.context_length_control == ContextLengthControlType.Latest:
+        if not skip_cutoff and self.context_length_control == ContextLengthControlType.Latest:
             messages = cutoff_messages(
                 messages,
                 max_count=self.model_setting.context_length,

{vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/types/defaults.py RENAMED Viewed

@@ -231,6 +231,20 @@ QWEN_MODELS: Final[Dict[str, Dict[str, Any]]] = {
         "function_call_available": False,
         "response_format_available": True,
     },
+    "qwen2.5-coder-32b-instruct": {
+        "id": "qwen2.5-coder-32b-instruct",
+        "context_length": 30000,
+        "max_output_tokens": 4096,
+        "function_call_available": False,
+        "response_format_available": False,
+    },
+    "qwq-32b-preview": {
+        "id": "qwq-32b-preview",
+        "context_length": 30000,
+        "max_output_tokens": 4096,
+        "function_call_available": False,
+        "response_format_available": False,
+    },
     "qwen2.5-72b-instruct": {
         "id": "qwen2.5-72b-instruct",
         "context_length": 131072,
@@ -238,6 +252,14 @@ QWEN_MODELS: Final[Dict[str, Dict[str, Any]]] = {
         "function_call_available": False,
         "response_format_available": True,
     },
+    "qwen2-vl-72b-instruct": {
+        "id": "qwen2-vl-72b-instruct",
+        "context_length": 131072,
+        "max_output_tokens": 8192,
+        "function_call_available": False,
+        "response_format_available": False,
+        "native_multimodal": True,
+    },
     "qwen-max": {
         "id": "qwen-max",
         "context_length": 8096,

{vectorvein-0.1.58 → vectorvein-0.1.60}/src/vectorvein/types/llm_parameters.py RENAMED Viewed

@@ -37,6 +37,7 @@ class EndpointSetting(BaseModel):
     credentials: Optional[dict] = Field(None, description="Additional credentials if needed.")
     is_azure: bool = Field(False, description="Indicates if the endpoint is for Azure.")
     is_vertex: bool = Field(False, description="Indicates if the endpoint is for Vertex.")
+    is_bedrock: bool = Field(False, description="Indicates if the endpoint is for Bedrock.")
     rpm: int = Field(description="Requests per minute.", default=defs.ENDPOINT_RPM)
     tpm: int = Field(description="Tokens per minute.", default=defs.ENDPOINT_TPM)
     concurrent_requests: int = Field(