PyPI - vectorvein - Versions diffs - 0.2.96__tar.gz → 0.2.97__tar.gz - Mend

vectorvein 0.2.96tar.gz → 0.2.97tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (67) hide show

{vectorvein-0.2.96 → vectorvein-0.2.97}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vectorvein
-Version: 0.2.96
+Version: 0.2.97
 Summary: VectorVein Python SDK
 Author-Email: Anderson <andersonby@163.com>
 License: MIT

{vectorvein-0.2.96 → vectorvein-0.2.97}/pyproject.toml RENAMED Viewed

@@ -17,7 +17,7 @@ description = "VectorVein Python SDK"
 name = "vectorvein"
 readme = "README.md"
 requires-python = ">=3.10"
-version = "0.2.96"
+version = "0.2.97"
 [project.license]
 text = "MIT"

{vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/chat_clients/anthropic_client.py RENAMED Viewed

@@ -597,9 +597,7 @@ class AnthropicChatClient(BaseChatClient):
         if max_tokens is None:
             max_output_tokens = self.model_setting.max_output_tokens
             native_multimodal = self.model_setting.native_multimodal
-            token_counts = get_message_token_counts(
-                messages=messages, tools=tools, model=self.model, native_multimodal=native_multimodal
-            )
+            token_counts = get_message_token_counts(messages=messages, tools=tools, model=self.model, native_multimodal=native_multimodal)
             if max_output_tokens is not None:
                 max_tokens = self.model_setting.context_length - token_counts
                 max_tokens = min(max(max_tokens, 1), max_output_tokens)
@@ -707,9 +705,7 @@ class AnthropicChatClient(BaseChatClient):
                                         result["raw_content"][i]["input"] = {}
                                     try:
                                         if result["tool_calls"][0]["function"]["arguments"]:
-                                            result["raw_content"][i]["input"] = json.loads(
-                                                result["tool_calls"][0]["function"]["arguments"]
-                                            )
+                                            result["raw_content"][i]["input"] = json.loads(result["tool_calls"][0]["function"]["arguments"])
                                         else:
                                             result["raw_content"][i]["input"] = {}
                                     except json.JSONDecodeError:
@@ -727,9 +723,7 @@ class AnthropicChatClient(BaseChatClient):
                         yield ChatCompletionDeltaMessage(**message)
                     elif isinstance(chunk, RawMessageDeltaEvent):
                         result["usage"]["completion_tokens"] = chunk.usage.output_tokens
-                        result["usage"]["total_tokens"] = (
-                            result["usage"]["prompt_tokens"] + result["usage"]["completion_tokens"]
-                        )
+                        result["usage"]["total_tokens"] = result["usage"]["prompt_tokens"] + result["usage"]["completion_tokens"]
                         yield ChatCompletionDeltaMessage(
                             usage=Usage(
                                 prompt_tokens=result["usage"]["prompt_tokens"],
@@ -1211,9 +1205,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
         if max_tokens is None:
             max_output_tokens = self.model_setting.max_output_tokens
             native_multimodal = self.model_setting.native_multimodal
-            token_counts = get_message_token_counts(
-                messages=messages, tools=tools, model=self.model, native_multimodal=native_multimodal
-            )
+            token_counts = get_message_token_counts(messages=messages, tools=tools, model=self.model, native_multimodal=native_multimodal)
             if max_output_tokens is not None:
                 max_tokens = self.model_setting.context_length - token_counts
                 max_tokens = min(max(max_tokens, 1), max_output_tokens)
@@ -1321,9 +1313,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
                                         result["raw_content"][i]["input"] = {}
                                     try:
                                         if result["tool_calls"][0]["function"]["arguments"]:
-                                            result["raw_content"][i]["input"] = json.loads(
-                                                result["tool_calls"][0]["function"]["arguments"]
-                                            )
+                                            result["raw_content"][i]["input"] = json.loads(result["tool_calls"][0]["function"]["arguments"])
                                         else:
                                             result["raw_content"][i]["input"] = {}
                                     except json.JSONDecodeError:
@@ -1341,9 +1331,7 @@ class AsyncAnthropicChatClient(BaseAsyncChatClient):
                         yield ChatCompletionDeltaMessage(**message)
                     elif isinstance(chunk, RawMessageDeltaEvent):
                         result["usage"]["completion_tokens"] = chunk.usage.output_tokens
-                        result["usage"]["total_tokens"] = (
-                            result["usage"]["prompt_tokens"] + result["usage"]["completion_tokens"]
-                        )
+                        result["usage"]["total_tokens"] = result["usage"]["prompt_tokens"] + result["usage"]["completion_tokens"]
                         yield ChatCompletionDeltaMessage(
                             usage=Usage(
                                 prompt_tokens=result["usage"]["prompt_tokens"],

{vectorvein-0.2.96 → vectorvein-0.2.97}/src/vectorvein/chat_clients/openai_compatible_client.py RENAMED Viewed

@@ -381,8 +381,8 @@ class OpenAICompatibleChatClient(BaseChatClient):
                 usage = None
                 buffer = ""
                 in_reasoning = False
-                current_reasoning = []
-                current_content = []
+                accumulated_reasoning = []
+                accumulated_content = []
                 for chunk in stream_response:
                     if chunk.usage and chunk.usage.total_tokens:
@@ -404,63 +404,142 @@ class OpenAICompatibleChatClient(BaseChatClient):
                             for index, tool_call in enumerate(chunk.choices[0].delta.tool_calls):
                                 tool_call.index = index
                                 tool_call.type = "function"  # 也是 MiniMax 的不规范导致的问题
-                        yield ChatCompletionDeltaMessage(**chunk.choices[0].delta.model_dump(), usage=usage)
+                        # 即使支持 function call，也要处理 <think> 标签
+                        message = chunk.choices[0].delta.model_dump()
+                        delta_content = message.get("content", "")
+                        if delta_content:
+                            buffer += delta_content
+                            # 处理缓冲区中的内容，提取 <think> 标签
+                            current_output_content = ""
+                            current_reasoning_content = ""
+                            while buffer:
+                                if not in_reasoning:
+                                    start_pos = buffer.find("<think>")
+                                    if start_pos != -1:
+                                        # 找到了 <think> 标签的开始
+                                        if start_pos > 0:
+                                            current_output_content += buffer[:start_pos]
+                                        buffer = buffer[start_pos + 7 :]  # 跳过 "<think>"
+                                        in_reasoning = True
+                                    else:
+                                        # 没有找到 <think> 标签，直接输出
+                                        current_output_content += buffer
+                                        buffer = ""
+                                else:
+                                    end_pos = buffer.find("</think>")
+                                    if end_pos != -1:
+                                        # 找到了 </think> 标签的结束
+                                        current_reasoning_content += buffer[:end_pos]
+                                        buffer = buffer[end_pos + 8 :]  # 跳过 "</think>"
+                                        in_reasoning = False
+                                    else:
+                                        # 没有找到结束标签，继续累积到推理内容中
+                                        current_reasoning_content += buffer
+                                        buffer = ""
+                            # 累积内容
+                            if current_output_content:
+                                accumulated_content.append(current_output_content)
+                            if current_reasoning_content:
+                                accumulated_reasoning.append(current_reasoning_content)
+                            # 只要有内容变化就产生 delta
+                            if current_output_content or current_reasoning_content:
+                                if current_output_content:
+                                    message["content"] = current_output_content
+                                elif current_reasoning_content:
+                                    message["reasoning_content"] = current_reasoning_content
+                                    message["content"] = ""  # 推理时不输出普通内容
+                            elif not current_output_content and not current_reasoning_content and not message.get("tool_calls"):
+                                # 如果没有任何内容且没有 tool_calls，则跳过这个消息
+                                continue
+                        yield ChatCompletionDeltaMessage(**message, usage=usage)
                     else:
                         message = chunk.choices[0].delta.model_dump()
                         delta_content = message.get("content", "")
-                        buffer += delta_content or ""
+                        if delta_content:
+                            buffer += delta_content
-                        while True:
+                        # 处理缓冲区中的内容，提取 <think> 标签
+                        current_output_content = ""
+                        current_reasoning_content = ""
+                        while buffer:
                             if not in_reasoning:
                                 start_pos = buffer.find("<think>")
                                 if start_pos != -1:
-                                    current_content.append(buffer[:start_pos])
-                                    buffer = buffer[start_pos + 7 :]
+                                    # 找到了 <think> 标签的开始
+                                    if start_pos > 0:
+                                        current_output_content += buffer[:start_pos]
+                                    buffer = buffer[start_pos + 7 :]  # 跳过 "<think>"
                                     in_reasoning = True
                                 else:
-                                    current_content.append(buffer)
+                                    # 没有找到 <think> 标签，直接输出
+                                    current_output_content += buffer
                                     buffer = ""
-                                    break
                             else:
                                 end_pos = buffer.find("</think>")
                                 if end_pos != -1:
-                                    current_reasoning.append(buffer[:end_pos])
-                                    buffer = buffer[end_pos + 8 :]
+                                    # 找到了 </think> 标签的结束
+                                    current_reasoning_content += buffer[:end_pos]
+                                    buffer = buffer[end_pos + 8 :]  # 跳过 "</think>"
                                     in_reasoning = False
                                 else:
-                                    current_reasoning.append(buffer)
+                                    # 没有找到结束标签，继续累积到推理内容中
+                                    current_reasoning_content += buffer
                                     buffer = ""
-                                    break
-                        message["content"] = "".join(current_content)
-                        if current_reasoning:
-                            message["reasoning_content"] = "".join(current_reasoning)
-                        current_content.clear()
-                        current_reasoning.clear()
-                        if tools:
-                            full_content += message["content"]
-                            tool_call_data = ToolCallContentProcessor(full_content).tool_calls
-                            if tool_call_data:
-                                message["tool_calls"] = tool_call_data["tool_calls"]
-                        if full_content in ("<", "<|", "<|▶", "<|▶|") or full_content.startswith("<|▶|>"):
-                            message["content"] = ""
-                            result = message
-                            continue
-                        yield ChatCompletionDeltaMessage(**message, usage=usage)
+                        # 累积内容
+                        if current_output_content:
+                            accumulated_content.append(current_output_content)
+                        if current_reasoning_content:
+                            accumulated_reasoning.append(current_reasoning_content)
+                        # 只要有内容变化就产生 delta
+                        if current_output_content or current_reasoning_content:
+                            if current_output_content:
+                                message["content"] = current_output_content
+                            elif current_reasoning_content:
+                                message["reasoning_content"] = current_reasoning_content
+                                message["content"] = ""  # 推理时不输出普通内容
+                            if tools:
+                                full_content += current_output_content
+                                tool_call_data = ToolCallContentProcessor(full_content).tool_calls
+                                if tool_call_data:
+                                    message["tool_calls"] = tool_call_data["tool_calls"]
+                            if full_content in ("<", "<|", "<|▶", "<|▶|") or full_content.startswith("<|▶|>"):
+                                message["content"] = ""
+                                result = message
+                                continue
+                            yield ChatCompletionDeltaMessage(**message, usage=usage)
+                # 处理最后剩余的缓冲区内容
                 if buffer:
                     if in_reasoning:
-                        current_reasoning.append(buffer)
+                        accumulated_reasoning.append(buffer)
                     else:
-                        current_content.append(buffer)
-                    final_message = {
-                        "content": "".join(current_content),
-                        "reasoning_content": "".join(current_reasoning) if current_reasoning else None,
-                    }
-                    yield ChatCompletionDeltaMessage(**final_message, usage=usage)
+                        accumulated_content.append(buffer)
+                    final_message = {}
+                    if accumulated_content:
+                        final_content = "".join(accumulated_content)
+                        if final_content.strip():  # 只有当内容非空时才输出
+                            final_message["content"] = final_content
+                    if accumulated_reasoning:
+                        final_reasoning = "".join(accumulated_reasoning)
+                        if final_reasoning.strip():  # 只有当推理内容非空时才输出
+                            final_message["reasoning_content"] = final_reasoning
+                    if final_message:
+                        yield ChatCompletionDeltaMessage(**final_message, usage=usage)
                 if result:
                     yield ChatCompletionDeltaMessage(**result, usage=usage)
@@ -820,7 +899,7 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
                 messages=messages,
                 stream=self.stream,
                 temperature=self.temperature,
-                max_tokens=max_tokens,  # Azure 的 OpenAI 怎么 stream 模式不支持 max_completion_tokens
+                max_tokens=max_tokens,
                 top_p=top_p,
                 audio=audio,
                 frequency_penalty=frequency_penalty,
@@ -855,8 +934,8 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
                 usage = None
                 buffer = ""
                 in_reasoning = False
-                current_reasoning = []
-                current_content = []
+                accumulated_reasoning = []
+                accumulated_content = []
                 async for chunk in stream_response:
                     if chunk.usage and chunk.usage.total_tokens:
@@ -878,63 +957,142 @@ class AsyncOpenAICompatibleChatClient(BaseAsyncChatClient):
                             for index, tool_call in enumerate(chunk.choices[0].delta.tool_calls):
                                 tool_call.index = index
                                 tool_call.type = "function"
-                        yield ChatCompletionDeltaMessage(**chunk.choices[0].delta.model_dump(), usage=usage)
+                        # 即使支持 function call，也要处理 <think> 标签
+                        message = chunk.choices[0].delta.model_dump()
+                        delta_content = message.get("content", "")
+                        if delta_content:
+                            buffer += delta_content
+                            # 处理缓冲区中的内容，提取 <think> 标签
+                            current_output_content = ""
+                            current_reasoning_content = ""
+                            while buffer:
+                                if not in_reasoning:
+                                    start_pos = buffer.find("<think>")
+                                    if start_pos != -1:
+                                        # 找到了 <think> 标签的开始
+                                        if start_pos > 0:
+                                            current_output_content += buffer[:start_pos]
+                                        buffer = buffer[start_pos + 7 :]  # 跳过 "<think>"
+                                        in_reasoning = True
+                                    else:
+                                        # 没有找到 <think> 标签，直接输出
+                                        current_output_content += buffer
+                                        buffer = ""
+                                else:
+                                    end_pos = buffer.find("</think>")
+                                    if end_pos != -1:
+                                        # 找到了 </think> 标签的结束
+                                        current_reasoning_content += buffer[:end_pos]
+                                        buffer = buffer[end_pos + 8 :]  # 跳过 "</think>"
+                                        in_reasoning = False
+                                    else:
+                                        # 没有找到结束标签，继续累积到推理内容中
+                                        current_reasoning_content += buffer
+                                        buffer = ""
+                            # 累积内容
+                            if current_output_content:
+                                accumulated_content.append(current_output_content)
+                            if current_reasoning_content:
+                                accumulated_reasoning.append(current_reasoning_content)
+                            # 只要有内容变化就产生 delta
+                            if current_output_content or current_reasoning_content:
+                                if current_output_content:
+                                    message["content"] = current_output_content
+                                elif current_reasoning_content:
+                                    message["reasoning_content"] = current_reasoning_content
+                                    message["content"] = ""  # 推理时不输出普通内容
+                            elif not current_output_content and not current_reasoning_content and not message.get("tool_calls"):
+                                # 如果没有任何内容且没有 tool_calls，则跳过这个消息
+                                continue
+                        yield ChatCompletionDeltaMessage(**message, usage=usage)
                     else:
                         message = chunk.choices[0].delta.model_dump()
                         delta_content = message.get("content", "")
-                        buffer += delta_content or ""
+                        if delta_content:
+                            buffer += delta_content
-                        while True:
+                        # 处理缓冲区中的内容，提取 <think> 标签
+                        current_output_content = ""
+                        current_reasoning_content = ""
+                        while buffer:
                             if not in_reasoning:
                                 start_pos = buffer.find("<think>")
                                 if start_pos != -1:
-                                    current_content.append(buffer[:start_pos])
-                                    buffer = buffer[start_pos + 7 :]
+                                    # 找到了 <think> 标签的开始
+                                    if start_pos > 0:
+                                        current_output_content += buffer[:start_pos]
+                                    buffer = buffer[start_pos + 7 :]  # 跳过 "<think>"
                                     in_reasoning = True
                                 else:
-                                    current_content.append(buffer)
+                                    # 没有找到 <think> 标签，直接输出
+                                    current_output_content += buffer
                                     buffer = ""
-                                    break
                             else:
                                 end_pos = buffer.find("</think>")
                                 if end_pos != -1:
-                                    current_reasoning.append(buffer[:end_pos])
-                                    buffer = buffer[end_pos + 8 :]
+                                    # 找到了 </think> 标签的结束
+                                    current_reasoning_content += buffer[:end_pos]
+                                    buffer = buffer[end_pos + 8 :]  # 跳过 "</think>"
                                     in_reasoning = False
                                 else:
-                                    current_reasoning.append(buffer)
+                                    # 没有找到结束标签，继续累积到推理内容中
+                                    current_reasoning_content += buffer
                                     buffer = ""
-                                    break
-                        message["content"] = "".join(current_content)
-                        if current_reasoning:
-                            message["reasoning_content"] = "".join(current_reasoning)
-                        current_content.clear()
-                        current_reasoning.clear()
-                        if tools:
-                            full_content += message["content"]
-                            tool_call_data = ToolCallContentProcessor(full_content).tool_calls
-                            if tool_call_data:
-                                message["tool_calls"] = tool_call_data["tool_calls"]
-                        if full_content in ("<", "<|", "<|▶", "<|▶|") or full_content.startswith("<|▶|>"):
-                            message["content"] = ""
-                            result = message
-                            continue
-                        yield ChatCompletionDeltaMessage(**message, usage=usage)
+                        # 累积内容
+                        if current_output_content:
+                            accumulated_content.append(current_output_content)
+                        if current_reasoning_content:
+                            accumulated_reasoning.append(current_reasoning_content)
+                        # 只要有内容变化就产生 delta
+                        if current_output_content or current_reasoning_content:
+                            if current_output_content:
+                                message["content"] = current_output_content
+                            elif current_reasoning_content:
+                                message["reasoning_content"] = current_reasoning_content
+                                message["content"] = ""  # 推理时不输出普通内容
+                            if tools:
+                                full_content += current_output_content
+                                tool_call_data = ToolCallContentProcessor(full_content).tool_calls
+                                if tool_call_data:
+                                    message["tool_calls"] = tool_call_data["tool_calls"]
+                            if full_content in ("<", "<|", "<|▶", "<|▶|") or full_content.startswith("<|▶|>"):
+                                message["content"] = ""
+                                result = message
+                                continue
+                            yield ChatCompletionDeltaMessage(**message, usage=usage)
+                # 处理最后剩余的缓冲区内容
                 if buffer:
                     if in_reasoning:
-                        current_reasoning.append(buffer)
+                        accumulated_reasoning.append(buffer)
                     else:
-                        current_content.append(buffer)
-                    final_message = {
-                        "content": "".join(current_content),
-                        "reasoning_content": "".join(current_reasoning) if current_reasoning else None,
-                    }
-                    yield ChatCompletionDeltaMessage(**final_message, usage=usage)
+                        accumulated_content.append(buffer)
+                    final_message = {}
+                    if accumulated_content:
+                        final_content = "".join(accumulated_content)
+                        if final_content.strip():  # 只有当内容非空时才输出
+                            final_message["content"] = final_content
+                    if accumulated_reasoning:
+                        final_reasoning = "".join(accumulated_reasoning)
+                        if final_reasoning.strip():  # 只有当推理内容非空时才输出
+                            final_message["reasoning_content"] = final_reasoning
+                    if final_message:
+                        yield ChatCompletionDeltaMessage(**final_message, usage=usage)
                 if result:
                     yield ChatCompletionDeltaMessage(**result, usage=usage)