PyPI - vectorvein - Versions diffs - 0.1.45__tar.gz → 0.1.47__tar.gz - Mend

vectorvein 0.1.45tar.gz → 0.1.47tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

{vectorvein-0.1.45 → vectorvein-0.1.47}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vectorvein
-Version: 0.1.45
+Version: 0.1.47
 Summary: Default template for PDM package
 Author-Email: Anderson <andersonby@163.com>
 License: MIT

{vectorvein-0.1.45 → vectorvein-0.1.47}/pyproject.toml RENAMED Viewed

@@ -17,7 +17,7 @@ description = "Default template for PDM package"
 name = "vectorvein"
 readme = "README.md"
 requires-python = ">=3.10"
-version = "0.1.45"
+version = "0.1.47"
 [project.license]
 text = "MIT"

{vectorvein-0.1.45 → vectorvein-0.1.47}/src/vectorvein/chat_clients/anthropic_client.py RENAMED Viewed

@@ -38,6 +38,7 @@ from ..types.llm_parameters import (
     ChatCompletionMessage,
     ChatCompletionToolParam,
     ChatCompletionDeltaMessage,
+    ChatCompletionStreamOptionsParam,
 )
@@ -214,6 +215,7 @@ class AnthropicChatClient(BaseChatClient):
         tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
+        stream_options: ChatCompletionStreamOptionsParam | None = None,
         **kwargs,
     ) -> ChatCompletionMessage:
         pass
@@ -229,6 +231,7 @@ class AnthropicChatClient(BaseChatClient):
         tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
+        stream_options: ChatCompletionStreamOptionsParam | None = None,
         **kwargs,
     ) -> Generator[ChatCompletionDeltaMessage, None, None]:
         pass
@@ -244,6 +247,7 @@ class AnthropicChatClient(BaseChatClient):
         tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
+        stream_options: ChatCompletionStreamOptionsParam | None = None,
         **kwargs,
     ) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
         pass
@@ -258,6 +262,7 @@ class AnthropicChatClient(BaseChatClient):
         tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
+        stream_options: ChatCompletionStreamOptionsParam | None = None,
         **kwargs,
     ):
         if model is not None:
@@ -315,7 +320,16 @@ class AnthropicChatClient(BaseChatClient):
                     http_client=self.http_client,
                     backend_name=self.BACKEND_NAME,
                 ).create_completion(
-                    messages, model, False, temperature, max_tokens, _tools, _tool_choice, response_format, **kwargs
+                    messages=messages,
+                    model=model,
+                    stream=False,
+                    temperature=temperature,
+                    max_tokens=max_tokens,
+                    tools=_tools,
+                    tool_choice=_tool_choice,
+                    response_format=response_format,
+                    stream_options=stream_options,
+                    **kwargs,
                 )
         assert isinstance(self.raw_client, Anthropic | AnthropicVertex)
@@ -554,6 +568,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
         tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
+        stream_options: ChatCompletionStreamOptionsParam | None = None,
         **kwargs,
     ) -> ChatCompletionMessage:
         pass
@@ -569,6 +584,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
         tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
+        stream_options: ChatCompletionStreamOptionsParam | None = None,
         **kwargs,
     ) -> AsyncGenerator[ChatCompletionDeltaMessage, Any]:
         pass
@@ -584,6 +600,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
         tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
+        stream_options: ChatCompletionStreamOptionsParam | None = None,
         **kwargs,
     ) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, Any]:
         pass
@@ -598,6 +615,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
         tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
+        stream_options: ChatCompletionStreamOptionsParam | None = None,
         **kwargs,
     ):
         if model is not None:
@@ -639,7 +657,16 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
                         backend_name=self.BACKEND_NAME,
                     )
                     response = await client.create_completion(
-                        messages, model, True, temperature, max_tokens, _tools, _tool_choice, response_format, **kwargs
+                        messages=messages,
+                        model=model,
+                        stream=True,
+                        temperature=temperature,
+                        max_tokens=max_tokens,
+                        tools=_tools,
+                        tool_choice=_tool_choice,
+                        response_format=response_format,
+                        stream_options=stream_options,
+                        **kwargs,
                     )
                     async for chunk in response:
                         yield chunk

{vectorvein-0.1.45 → vectorvein-0.1.47}/src/vectorvein/chat_clients/base_client.py RENAMED Viewed

@@ -18,6 +18,7 @@ from ..types.llm_parameters import (
     ToolChoice,
     ChatCompletionMessage,
     ChatCompletionDeltaMessage,
+    ChatCompletionStreamOptionsParam,
 )
@@ -71,6 +72,7 @@ class BaseChatClient(ABC):
         tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
+        stream_options: ChatCompletionStreamOptionsParam | None = None,
         **kwargs,
     ) -> ChatCompletionMessage:
         pass
@@ -87,6 +89,7 @@ class BaseChatClient(ABC):
         tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
+        stream_options: ChatCompletionStreamOptionsParam | None = None,
         **kwargs,
     ) -> Generator[ChatCompletionDeltaMessage, Any, None]:
         pass
@@ -103,6 +106,7 @@ class BaseChatClient(ABC):
         tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
+        stream_options: ChatCompletionStreamOptionsParam | None = None,
         **kwargs,
     ) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
         pass
@@ -118,6 +122,7 @@ class BaseChatClient(ABC):
         tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
+        stream_options: ChatCompletionStreamOptionsParam | None = None,
         **kwargs,
     ) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
         pass
@@ -131,6 +136,7 @@ class BaseChatClient(ABC):
         tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
+        stream_options: ChatCompletionStreamOptionsParam | None = None,
         **kwargs,
     ) -> Generator[ChatCompletionDeltaMessage, Any, None]:
         return self.create_completion(
@@ -142,6 +148,7 @@ class BaseChatClient(ABC):
             tools=tools,
             tool_choice=tool_choice,
             response_format=response_format,
+            stream_options=stream_options,
             **kwargs,
         )
@@ -198,6 +205,7 @@ class BaseAsyncChatClient(ABC):
         tools: list | NotGiven = NOT_GIVEN,
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
+        stream_options: ChatCompletionStreamOptionsParam | None = None,
         **kwargs,
     ) -> ChatCompletionMessage:
         pass
@@ -214,6 +222,7 @@ class BaseAsyncChatClient(ABC):
         tools: list | NotGiven = NOT_GIVEN,
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
+        stream_options: ChatCompletionStreamOptionsParam | None = None,
         **kwargs,
     ) -> AsyncGenerator[ChatCompletionDeltaMessage, None]:
         pass
@@ -230,6 +239,7 @@ class BaseAsyncChatClient(ABC):
         tools: list | NotGiven = NOT_GIVEN,
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
+        stream_options: ChatCompletionStreamOptionsParam | None = None,
         **kwargs,
     ) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, None]:
         pass
@@ -245,6 +255,7 @@ class BaseAsyncChatClient(ABC):
         tools: list | NotGiven = NOT_GIVEN,
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
+        stream_options: ChatCompletionStreamOptionsParam | None = None,
         **kwargs,
     ) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, None]:
         pass
@@ -258,6 +269,7 @@ class BaseAsyncChatClient(ABC):
         tools: list | NotGiven = NOT_GIVEN,
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
+        stream_options: ChatCompletionStreamOptionsParam | None = None,
         **kwargs,
     ) -> AsyncGenerator[ChatCompletionDeltaMessage, None]:
         return await self.create_completion(
@@ -269,5 +281,6 @@ class BaseAsyncChatClient(ABC):
             tools=tools,
             tool_choice=tool_choice,
             response_format=response_format,
+            stream_options=stream_options,
             **kwargs,
         )

{vectorvein-0.1.45 → vectorvein-0.1.47}/src/vectorvein/chat_clients/gemini_client.py RENAMED Viewed

@@ -19,6 +19,7 @@ from ..types.llm_parameters import (
     ToolChoice,
     ChatCompletionMessage,
     ChatCompletionDeltaMessage,
+    ChatCompletionStreamOptionsParam,
 )
@@ -63,6 +64,7 @@ class GeminiChatClient(BaseChatClient):
         tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
+        stream_options: ChatCompletionStreamOptionsParam | None = None,
         **kwargs,
     ) -> ChatCompletionMessage:
         pass
@@ -78,6 +80,7 @@ class GeminiChatClient(BaseChatClient):
         tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
+        stream_options: ChatCompletionStreamOptionsParam | None = None,
         **kwargs,
     ) -> Generator[ChatCompletionDeltaMessage, None, None]:
         pass
@@ -93,6 +96,7 @@ class GeminiChatClient(BaseChatClient):
         tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
+        stream_options: ChatCompletionStreamOptionsParam | None = None,
         **kwargs,
     ) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
         pass
@@ -107,6 +111,7 @@ class GeminiChatClient(BaseChatClient):
         tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
+        stream_options: ChatCompletionStreamOptionsParam | None = None,
         **kwargs,
     ):
         if model is not None:
@@ -144,7 +149,12 @@ class GeminiChatClient(BaseChatClient):
         if self.random_endpoint:
             self.random_endpoint = True
-            self.endpoint_id = random.choice(self.backend_settings.models[self.model].endpoints)
+            endpoint_choice = random.choice(self.backend_settings.models[self.model].endpoints)
+            if isinstance(endpoint_choice, dict):
+                self.endpoint_id = endpoint_choice["endpoint_id"]
+                self.model_id = endpoint_choice["model_id"]
+            else:
+                self.endpoint_id = endpoint_choice
             self.endpoint = settings.get_endpoint(self.endpoint_id)
         request_body = {
@@ -291,6 +301,7 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
         tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
+        stream_options: ChatCompletionStreamOptionsParam | None = None,
         **kwargs,
     ) -> ChatCompletionMessage:
         pass
@@ -306,6 +317,7 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
         tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
+        stream_options: ChatCompletionStreamOptionsParam | None = None,
         **kwargs,
     ) -> AsyncGenerator[ChatCompletionDeltaMessage, Any]:
         pass
@@ -321,6 +333,7 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
         tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
+        stream_options: ChatCompletionStreamOptionsParam | None = None,
         **kwargs,
     ) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, Any]:
         pass
@@ -335,6 +348,7 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
         tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
+        stream_options: ChatCompletionStreamOptionsParam | None = None,
         **kwargs,
     ):
         if model is not None:
@@ -372,7 +386,12 @@ class AsyncGeminiChatClient(BaseAsyncChatClient):
         if self.random_endpoint:
             self.random_endpoint = True
-            self.endpoint_id = random.choice(self.backend_settings.models[self.model].endpoints)
+            endpoint_choice = random.choice(self.backend_settings.models[self.model].endpoints)
+            if isinstance(endpoint_choice, dict):
+                self.endpoint_id = endpoint_choice["endpoint_id"]
+                self.model_id = endpoint_choice["model_id"]
+            else:
+                self.endpoint_id = endpoint_choice
             self.endpoint = settings.get_endpoint(self.endpoint_id)
         request_body = {

{vectorvein-0.1.45 → vectorvein-0.1.47}/src/vectorvein/chat_clients/minimax_client.py RENAMED Viewed

@@ -18,6 +18,7 @@ from ..types.llm_parameters import (
     ToolChoice,
     ChatCompletionMessage,
     ChatCompletionDeltaMessage,
+    ChatCompletionStreamOptionsParam,
 )
@@ -72,6 +73,7 @@ class MiniMaxChatClient(BaseChatClient):
             self.http_client = http_client
         else:
             self.http_client = httpx.Client()
+        self.model_id = None
     @cached_property
     def raw_client(self):
@@ -88,6 +90,7 @@ class MiniMaxChatClient(BaseChatClient):
         tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
+        stream_options: ChatCompletionStreamOptionsParam | None = None,
         **kwargs,
     ) -> ChatCompletionMessage:
         pass
@@ -103,6 +106,7 @@ class MiniMaxChatClient(BaseChatClient):
         tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
+        stream_options: ChatCompletionStreamOptionsParam | None = None,
         **kwargs,
     ) -> Generator[ChatCompletionDeltaMessage, None, None]:
         pass
@@ -118,6 +122,7 @@ class MiniMaxChatClient(BaseChatClient):
         tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
+        stream_options: ChatCompletionStreamOptionsParam | None = None,
         **kwargs,
     ) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
         pass
@@ -132,6 +137,7 @@ class MiniMaxChatClient(BaseChatClient):
         tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
+        stream_options: ChatCompletionStreamOptionsParam | None = None,
         **kwargs,
     ):
         if model is not None:
@@ -144,9 +150,16 @@ class MiniMaxChatClient(BaseChatClient):
             tool_choice = "auto"
         self.model_setting = self.backend_settings.models[self.model]
+        if self.model_id is None:
+            self.model_id = self.model_setting.id
         if self.random_endpoint:
             self.random_endpoint = True
-            self.endpoint_id = random.choice(self.backend_settings.models[self.model].endpoints)
+            endpoint_choice = random.choice(self.backend_settings.models[self.model].endpoints)
+            if isinstance(endpoint_choice, dict):
+                self.endpoint_id = endpoint_choice["endpoint_id"]
+                self.model_id = endpoint_choice["model_id"]
+            else:
+                self.endpoint_id = endpoint_choice
             self.endpoint = settings.get_endpoint(self.endpoint_id)
         if self.context_length_control == ContextLengthControlType.Latest:
@@ -191,7 +204,7 @@ class MiniMaxChatClient(BaseChatClient):
         self.headers = {"Authorization": f"Bearer {self.endpoint.api_key}", "Content-Type": "application/json"}
         request_body = {
-            "model": self.model,
+            "model": self.model_id,
             "messages": messages,
             "max_tokens": max_tokens,
             "temperature": self.temperature,
@@ -287,6 +300,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
             self.http_client = http_client
         else:
             self.http_client = httpx.AsyncClient()
+        self.model_id = None
     @cached_property
     def raw_client(self):
@@ -303,6 +317,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
         tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
+        stream_options: ChatCompletionStreamOptionsParam | None = None,
         **kwargs,
     ) -> ChatCompletionMessage:
         pass
@@ -318,6 +333,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
         tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
+        stream_options: ChatCompletionStreamOptionsParam | None = None,
         **kwargs,
     ) -> AsyncGenerator[ChatCompletionDeltaMessage, Any]:
         pass
@@ -333,6 +349,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
         tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
+        stream_options: ChatCompletionStreamOptionsParam | None = None,
         **kwargs,
     ) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, Any]:
         pass
@@ -347,6 +364,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
         tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
+        stream_options: ChatCompletionStreamOptionsParam | None = None,
         **kwargs,
     ):
         if model is not None:
@@ -359,9 +377,16 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
             tool_choice = "auto"
         self.model_setting = self.backend_settings.models[self.model]
+        if self.model_id is None:
+            self.model_id = self.model_setting.id
         if self.random_endpoint:
             self.random_endpoint = True
-            self.endpoint_id = random.choice(self.backend_settings.models[self.model].endpoints)
+            endpoint_choice = random.choice(self.backend_settings.models[self.model].endpoints)
+            if isinstance(endpoint_choice, dict):
+                self.endpoint_id = endpoint_choice["endpoint_id"]
+                self.model_id = endpoint_choice["model_id"]
+            else:
+                self.endpoint_id = endpoint_choice
             self.endpoint = settings.get_endpoint(self.endpoint_id)
         if self.context_length_control == ContextLengthControlType.Latest:
@@ -404,7 +429,7 @@ class AsyncMiniMaxChatClient(BaseAsyncChatClient):
         self.headers = {"Authorization": f"Bearer {self.endpoint.api_key}", "Content-Type": "application/json"}
         request_body = {
-            "model": self.model,
+            "model": self.model_id,
             "messages": messages,
             "max_tokens": max_tokens,
             "temperature": self.temperature,

{vectorvein-0.1.45 → vectorvein-0.1.47}/src/vectorvein/chat_clients/openai_compatible_client.py RENAMED Viewed

@@ -6,8 +6,6 @@ from functools import cached_property
 from typing import overload, Generator, AsyncGenerator, Any, Literal, Iterable
 import httpx
-from openai._streaming import Stream, AsyncStream
-from openai.types.chat import ChatCompletion, ChatCompletionChunk
 from openai import OpenAI, AsyncOpenAI, AzureOpenAI, AsyncAzureOpenAI
 from .base_client import BaseChatClient, BaseAsyncChatClient
@@ -25,8 +23,11 @@ from ..types.llm_parameters import (
     NOT_GIVEN,
     ToolParam,
     ToolChoice,
+    OpenAINotGiven,
+    Usage,
     ChatCompletionMessage,
     ChatCompletionDeltaMessage,
+    ChatCompletionStreamOptionsParam,
 )
@@ -74,7 +75,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
             return AzureOpenAI(
                 azure_endpoint=self.endpoint.api_base,
                 api_key=self.endpoint.api_key,
-                api_version="2024-08-01-preview",
+                api_version="2024-10-01-preview",
                 http_client=self.http_client,
             )
         else:
@@ -95,6 +96,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
         tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
+        stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
         **kwargs,
     ) -> ChatCompletionMessage:
         pass
@@ -110,6 +112,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
         tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
+        stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
         **kwargs,
     ) -> Generator[ChatCompletionDeltaMessage, None, None]:
         pass
@@ -125,6 +128,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
         tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
+        stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
         **kwargs,
     ) -> ChatCompletionMessage | Generator[ChatCompletionDeltaMessage, Any, None]:
         pass
@@ -139,6 +143,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
         tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
+        stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
         **kwargs,
     ):
         if model is not None:
@@ -189,13 +194,19 @@ class OpenAICompatibleChatClient(BaseChatClient):
         else:
             self.response_format = {}
+        if stream_options:
+            _stream_options_params = {"stream_options": stream_options}
+        else:
+            _stream_options_params = {}
         if self.stream:
-            stream_response: Stream[ChatCompletionChunk] = raw_client.chat.completions.create(
+            stream_response = raw_client.chat.completions.create(
                 model=self.model_id,
                 messages=messages,
                 stream=True,
                 temperature=self.temperature,
                 max_tokens=max_tokens,
+                **_stream_options_params,
                 **self.response_format,
                 **tools_params,
                 **kwargs,
@@ -204,7 +215,16 @@ class OpenAICompatibleChatClient(BaseChatClient):
             def generator():
                 full_content = ""
                 result = {}
+                usage = None
                 for chunk in stream_response:
+                    if chunk.usage and chunk.usage.total_tokens:
+                        usage = Usage(
+                            completion_tokens=chunk.usage.completion_tokens or 0,
+                            prompt_tokens=chunk.usage.prompt_tokens or 0,
+                            total_tokens=chunk.usage.total_tokens or 0,
+                        )
+                    else:
+                        usage = None
                     if len(chunk.choices) == 0:
                         continue
                     if not chunk.choices[0].delta:
@@ -213,7 +233,7 @@ class OpenAICompatibleChatClient(BaseChatClient):
                         if chunk.choices[0].delta.tool_calls:
                             for index, tool_call in enumerate(chunk.choices[0].delta.tool_calls):
                                 tool_call.index = index
-                        yield ChatCompletionDeltaMessage(**chunk.choices[0].delta.model_dump())
+                        yield ChatCompletionDeltaMessage(**chunk.choices[0].delta.model_dump(), usage=usage)
                     else:
                         message = chunk.choices[0].delta.model_dump()
                         full_content += message["content"] if message["content"] else ""
@@ -225,13 +245,13 @@ class OpenAICompatibleChatClient(BaseChatClient):
                             message["content"] = ""
                             result = message
                             continue
-                        yield ChatCompletionDeltaMessage(**message)
+                        yield ChatCompletionDeltaMessage(**message, usage=usage)
                 if result:
-                    yield ChatCompletionDeltaMessage(**result)
+                    yield ChatCompletionDeltaMessage(**result, usage=usage)
             return generator()
         else:
-            response: ChatCompletion = raw_client.chat.completions.create(
+            response = raw_client.chat.completions.create(
                 model=self.model_id,
                 messages=messages,
                 stream=False,
@@ -306,7 +326,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
             return AsyncAzureOpenAI(
                 azure_endpoint=self.endpoint.api_base,
                 api_key=self.endpoint.api_key,
-                api_version="2024-08-01-preview",
+                api_version="2024-10-01-preview",
                 http_client=self.http_client,
             )
         else:
@@ -327,6 +347,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
         tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
+        stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
         **kwargs,
     ) -> ChatCompletionMessage:
         pass
@@ -342,6 +363,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
         tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
+        stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
         **kwargs,
     ) -> AsyncGenerator[ChatCompletionDeltaMessage, Any]:
         pass
@@ -357,6 +379,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
         tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
+        stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
         **kwargs,
     ) -> ChatCompletionMessage | AsyncGenerator[ChatCompletionDeltaMessage, Any]:
         pass
@@ -371,6 +394,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
         tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
         tool_choice: ToolChoice | NotGiven = NOT_GIVEN,
         response_format: dict | None = None,
+        stream_options: ChatCompletionStreamOptionsParam | None | OpenAINotGiven = NOT_GIVEN,
         **kwargs,
     ):
         if model is not None:
@@ -412,6 +436,11 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
         else:
             self.response_format = {}
+        if stream_options:
+            _stream_options_params = {"stream_options": stream_options}
+        else:
+            _stream_options_params = {}
         if max_tokens is None:
             max_output_tokens = self.model_setting.max_output_tokens
             token_counts = get_message_token_counts(messages=messages, tools=tools, model=self.model)
@@ -422,12 +451,13 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
                 max_tokens = self.model_setting.context_length - token_counts - 64
         if self.stream:
-            stream_response: AsyncStream[ChatCompletionChunk] = await raw_client.chat.completions.create(
+            stream_response = await raw_client.chat.completions.create(
                 model=self.model_id,
                 messages=messages,
                 stream=self.stream,
                 temperature=self.temperature,
                 max_tokens=max_tokens,
+                **_stream_options_params,
                 **self.response_format,
                 **tools_params,
                 **kwargs,
@@ -436,7 +466,16 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
             async def generator():
                 full_content = ""
                 result = {}
+                usage = None
                 async for chunk in stream_response:
+                    if chunk.usage and chunk.usage.total_tokens:
+                        usage = Usage(
+                            completion_tokens=chunk.usage.completion_tokens or 0,
+                            prompt_tokens=chunk.usage.prompt_tokens or 0,
+                            total_tokens=chunk.usage.total_tokens or 0,
+                        )
+                    else:
+                        usage = None
                     if len(chunk.choices) == 0:
                         continue
                     if not chunk.choices[0].delta:
@@ -445,7 +484,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
                         if chunk.choices[0].delta.tool_calls:
                             for index, tool_call in enumerate(chunk.choices[0].delta.tool_calls):
                                 tool_call.index = index
-                        yield ChatCompletionDeltaMessage(**chunk.choices[0].delta.model_dump())
+                        yield ChatCompletionDeltaMessage(**chunk.choices[0].delta.model_dump(), usage=usage)
                     else:
                         message = chunk.choices[0].delta.model_dump()
                         full_content += message["content"] if message["content"] else ""
@@ -457,13 +496,13 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
                             message["content"] = ""
                             result = message
                             continue
-                        yield ChatCompletionDeltaMessage(**message)
+                        yield ChatCompletionDeltaMessage(**message, usage=usage)
                 if result:
-                    yield ChatCompletionDeltaMessage(**result)
+                    yield ChatCompletionDeltaMessage(**result, usage=usage)
             return generator()
         else:
-            response: ChatCompletion = await raw_client.chat.completions.create(
+            response = await raw_client.chat.completions.create(
                 model=self.model_id,
                 messages=messages,
                 stream=self.stream,

{vectorvein-0.1.45 → vectorvein-0.1.47}/src/vectorvein/types/defaults.py RENAMED Viewed

@@ -526,7 +526,7 @@ OPENAI_MODELS: Final[Dict[str, Dict[str, Any]]] = {
 }
 # Anthropic models
-ANTHROPIC_DEFAULT_MODEL: Final[str] = "claude-3-5-sonnet-20240620"
+ANTHROPIC_DEFAULT_MODEL: Final[str] = "claude-3-5-sonnet-20241022"
 ANTHROPIC_MODELS: Final[Dict[str, Dict[str, Any]]] = {
     "claude-3-opus-20240229": {
         "id": "claude-3-opus-20240229",

{vectorvein-0.1.45 → vectorvein-0.1.47}/src/vectorvein/types/llm_parameters.py RENAMED Viewed

@@ -14,6 +14,7 @@ from openai._types import NOT_GIVEN as OPENAI_NOT_GIVEN
 from openai.types.chat.chat_completion_chunk import ChoiceDeltaToolCall
 from openai.types.chat.chat_completion_tool_param import ChatCompletionToolParam
 from openai.types.chat.chat_completion_message_tool_call import ChatCompletionMessageToolCall
+from openai.types.chat.chat_completion_stream_options_param import ChatCompletionStreamOptionsParam
 from openai.types.chat.chat_completion_tool_choice_option_param import ChatCompletionToolChoiceOptionParam
 from . import defaults as defs
@@ -122,6 +123,7 @@ __all__ = [
     "Usage",
     "ChatCompletionMessage",
     "ChatCompletionDeltaMessage",
+    "ChatCompletionStreamOptionsParam",
     "NotGiven",
     "NOT_GIVEN",
     "OpenAIToolParam",