PyPI - yaicli - Versions diffs - 0.5.9__py3-none-any.whl → 0.6.1__py3-none-any.whl - Mend

yaicli 0.5.9py3-none-any.whl → 0.6.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

pyproject.toml +35 -12
yaicli/cli.py +31 -20
yaicli/const.py +6 -5
yaicli/entry.py +1 -1
yaicli/llms/__init__.py +13 -0
yaicli/llms/client.py +120 -0
yaicli/llms/provider.py +78 -0
yaicli/llms/providers/ai21_provider.py +66 -0
yaicli/llms/providers/chatglm_provider.py +139 -0
yaicli/llms/providers/chutes_provider.py +14 -0
yaicli/llms/providers/cohere_provider.py +298 -0
yaicli/llms/providers/deepseek_provider.py +14 -0
yaicli/llms/providers/doubao_provider.py +53 -0
yaicli/llms/providers/groq_provider.py +16 -0
yaicli/llms/providers/infiniai_provider.py +20 -0
yaicli/llms/providers/minimax_provider.py +13 -0
yaicli/llms/providers/modelscope_provider.py +14 -0
yaicli/llms/providers/ollama_provider.py +187 -0
yaicli/llms/providers/openai_provider.py +211 -0
yaicli/llms/providers/openrouter_provider.py +14 -0
yaicli/llms/providers/sambanova_provider.py +30 -0
yaicli/llms/providers/siliconflow_provider.py +14 -0
yaicli/llms/providers/targon_provider.py +14 -0
yaicli/llms/providers/yi_provider.py +14 -0
yaicli/printer.py +4 -16
yaicli/schemas.py +12 -3
yaicli/tools.py +59 -3
{yaicli-0.5.9.dist-info → yaicli-0.6.1.dist-info}/METADATA +238 -32
yaicli-0.6.1.dist-info/RECORD +43 -0
yaicli/client.py +0 -391
yaicli-0.5.9.dist-info/RECORD +0 -24
{yaicli-0.5.9.dist-info → yaicli-0.6.1.dist-info}/WHEEL +0 -0
{yaicli-0.5.9.dist-info → yaicli-0.6.1.dist-info}/entry_points.txt +0 -0
{yaicli-0.5.9.dist-info → yaicli-0.6.1.dist-info}/licenses/LICENSE +0 -0

pyproject.toml CHANGED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "yaicli"
-version = "0.5.9"
+version = "0.6.1"
 description = "A simple CLI tool to interact with LLM"
 authors = [{ name = "belingud", email = "im.victor@qq.com" }]
 readme = "README.md"
@@ -8,7 +8,7 @@ requires-python = ">=3.9"
 license = { file = "LICENSE" }
 classifiers = [
     "Programming Language :: Python :: 3",
-    "License :: OSI Approved :: MIT License",
+    "License :: OSI Approved :: Apache Software License",
     "Operating System :: OS Independent",
 ]
 keywords = [
@@ -16,18 +16,32 @@ keywords = [
     "llm",
     "ai",
     "chatgpt",
-    "openai",
     "gpt",
     "llms",
-    "openai",
     "terminal",
     "interactive",
-    "interact",
-    "interact with llm",
-    "interact with chatgpt",
-    "interact with openai",
-    "interact with gpt",
-    "interact with llms",
+    "command-line",
+    "ai-assistant",
+    "language-model",
+    "text-generation",
+    "conversation",
+    "prompt",
+    "completion",
+    "console-application",
+    "shell-integration",
+    "nlp",
+    "inference",
+    "ai-chat",
+    "python-tool",
+    "terminal-interface",
+    "ai-interaction",
+    "openai",
+    "claude",
+    "gemini",
+    "mistral",
+    "anthropic",
+    "groq",
+    "cohere",
 ]
 dependencies = [
     "click>=8.1.8",
@@ -35,7 +49,6 @@ dependencies = [
     "httpx>=0.28.1",
     "instructor>=1.7.9",
     "json-repair>=0.44.1",
-    "litellm>=1.67.5",
     "openai>=1.76.0",
     "prompt-toolkit>=3.0.50",
     "rich>=13.9.4",
@@ -51,6 +64,12 @@ Documentation = "https://github.com/belingud/yaicli"
 ai = "yaicli.entry:app"
 yaicli = "yaicli.entry:app"
+[project.optional-dependencies]
+doubao = ["volcengine-python-sdk>=3.0.15"]
+ollama = ["ollama>=0.5.1"]
+cohere = ["cohere>=5.15.0"]
+all = ["volcengine-python-sdk>=3.0.15", "ollama>=0.5.1", "cohere>=5.15.0"]
 [tool.pytest.ini_options]
 testpaths = ["tests"]
 python_files = ["test_*.py"]
@@ -60,7 +79,7 @@ filterwarnings = [
     "ignore::PendingDeprecationWarning",
     "ignore::UserWarning",
     "ignore::pydantic.PydanticDeprecatedSince20",
-    "ignore:.*There is no current event loop.*:DeprecationWarning"
+    "ignore:.*There is no current event loop.*:DeprecationWarning",
 ]
 [tool.uv]
@@ -81,6 +100,10 @@ profile = "black"
 line-length = 120
 fix = true
+[tool.ruff.lint]
+select = ["F"]
+fixable = ["F401"]
 [build-system]
 requires = ["hatchling>=1.18.0"]
 build-backend = "hatchling.build"

yaicli/cli.py CHANGED Viewed

@@ -17,7 +17,6 @@ from rich.panel import Panel
 from rich.prompt import Prompt
 from .chat import Chat, FileChatManager, chat_mgr
-from .client import ChatMessage, LitellmClient
 from .config import cfg
 from .console import get_console
 from .const import (
@@ -41,8 +40,10 @@ from .const import (
 )
 from .exceptions import ChatSaveError
 from .history import LimitedFileHistory
+from .llms import LLMClient
 from .printer import Printer
 from .role import Role, RoleManager, role_mgr
+from .schemas import ChatMessage
 from .utils import detect_os, detect_shell, filter_command
@@ -66,7 +67,7 @@ class CLI:
         self.role_manager = role_manager or role_mgr
         self.role: Role = self.role_manager.get_role(self.role_name)
         self.printer = Printer()
-        self.client = client or LitellmClient(verbose=self.verbose)
+        self.client = client or self._create_client()
         self.bindings = KeyBindings()
@@ -338,7 +339,7 @@ class CLI:
         messages.append(ChatMessage(role="user", content=user_input))
         return messages
-    def _handle_llm_response(self, user_input: str) -> Optional[str]:
+    def _handle_llm_response(self, user_input: str) -> tuple[Optional[str], list[ChatMessage]]:
         """Get response from API (streaming or normal) and print it.
         Returns the full content string or None if an error occurred.
@@ -347,44 +348,50 @@ class CLI:
         Returns:
             Optional[str]: The assistant's response content or None if an error occurred.
+            list[ChatMessage]: The updated message history.
         """
         messages = self._build_messages(user_input)
-        if self.verbose:
-            self.console.print(messages)
-        if self.role != DefaultRoleNames.CODER:
+        if self.role.name != DefaultRoleNames.CODER:
             self.console.print("Assistant:", style="bold green")
         try:
-            response = self.client.completion(messages, stream=cfg["STREAM"])
-            if cfg["STREAM"]:
-                content, _ = self.printer.display_stream(response, messages)
-            else:
-                content, _ = self.printer.display_normal(response, messages)
+            response_iterator = self.client.completion_with_tools(messages, stream=cfg["STREAM"])
-            # Just return the content, message addition is handled in _process_user_input
-            return content if content is not None else None
+            content, _ = self.printer.display_stream(response_iterator)
+            # The 'messages' list is modified by the client in-place
+            return content, messages
         except Exception as e:
             self.console.print(f"Error processing LLM response: {e}", style="red")
             if self.verbose:
                 traceback.print_exc()
-            return None
+            return None, messages
     def _process_user_input(self, user_input: str) -> bool:
         """Process user input: get response, print, update history, maybe execute.
         Returns True to continue REPL, False to exit on critical error.
         """
-        content = self._handle_llm_response(user_input)
+        content, updated_messages = self._handle_llm_response(user_input)
-        if content is None:
+        if content is None and not any(msg.tool_calls for msg in updated_messages):
             return True
-        # Update chat history using Chat's add_message method
-        self.chat.add_message("user", user_input)
-        self.chat.add_message("assistant", content)
+        # The client modifies the message list in place, so the updated_messages
+        # contains the full history of the turn (system, history, user, assistant, tools).
+        # We replace the old history with the new one, removing the system prompt.
+        if updated_messages:
+            self.chat.history = updated_messages[1:]
         self._check_history_len()
         if self.current_mode == EXEC_MODE:
-            self._confirm_and_execute(content)
+            # We need to extract the executable command from the last assistant message
+            # in case of tool use.
+            final_content = ""
+            if self.chat.history:
+                last_message = self.chat.history[-1]
+                if last_message.role == "assistant":
+                    final_content = last_message.content or ""
+            self._confirm_and_execute(final_content)
         return True
     def _confirm_and_execute(self, raw_content: str) -> None:
@@ -555,3 +562,7 @@ class CLI:
         else:
             # Run in single-use mode
             self._run_once(user_input or "", shell=shell, code=code)
+    def _create_client(self):
+        """Create an LLM client instance based on configuration"""
+        return LLMClient(provider_name=cfg["PROVIDER"].lower(), verbose=self.verbose, config=cfg)

yaicli/const.py CHANGED Viewed

@@ -6,6 +6,7 @@ except ImportError:
     class StrEnum(str, Enum):
         """Compatible with python below 3.11"""
 from pathlib import Path
 from tempfile import gettempdir
 from typing import Any, Literal, Optional
@@ -51,7 +52,7 @@ DEFAULT_MODEL = "gpt-4o"
 DEFAULT_SHELL_NAME = "auto"
 DEFAULT_OS_NAME = "auto"
 DEFAULT_STREAM: BOOL_STR = "true"
-DEFAULT_TEMPERATURE: float = 0.5
+DEFAULT_TEMPERATURE: float = 0.3
 DEFAULT_TOP_P: float = 1.0
 DEFAULT_MAX_TOKENS: int = 1024
 DEFAULT_MAX_HISTORY: int = 500
@@ -99,9 +100,9 @@ class DefaultRoleNames(StrEnum):
 DEFAULT_ROLES: dict[str, dict[str, Any]] = {
-    DefaultRoleNames.SHELL: {"name": DefaultRoleNames.SHELL, "prompt": SHELL_PROMPT},
-    DefaultRoleNames.DEFAULT: {"name": DefaultRoleNames.DEFAULT, "prompt": DEFAULT_PROMPT},
-    DefaultRoleNames.CODER: {"name": DefaultRoleNames.CODER, "prompt": CODER_PROMPT},
+    DefaultRoleNames.SHELL.value: {"name": DefaultRoleNames.SHELL.value, "prompt": SHELL_PROMPT},
+    DefaultRoleNames.DEFAULT.value: {"name": DefaultRoleNames.DEFAULT.value, "prompt": DEFAULT_PROMPT},
+    DefaultRoleNames.CODER.value: {"name": DefaultRoleNames.CODER.value, "prompt": CODER_PROMPT},
 }
 # DEFAULT_CONFIG_MAP is a dictionary of the configuration options.
@@ -112,7 +113,7 @@ DEFAULT_ROLES: dict[str, dict[str, Any]] = {
 # - type: the type of the configuration option
 DEFAULT_CONFIG_MAP = {
     # Core API settings
-    "BASE_URL": {"value": DEFAULT_BASE_URL, "env_key": "YAI_BASE_URL", "type": str},
+    "BASE_URL": {"value": "", "env_key": "YAI_BASE_URL", "type": str},
     "API_KEY": {"value": "", "env_key": "YAI_API_KEY", "type": str},
     "MODEL": {"value": DEFAULT_MODEL, "env_key": "YAI_MODEL", "type": str},
     # System detection hints

yaicli/entry.py CHANGED Viewed

@@ -82,7 +82,7 @@ def main(
     ),
     # ------------------- Role Options -------------------
     role: str = typer.Option(
-        DefaultRoleNames.DEFAULT,
+        DefaultRoleNames.DEFAULT.value,
         "--role",
         "-r",
         help="Specify the assistant role to use.",

yaicli/llms/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+from ..config import cfg
+from .client import LLMClient
+from .provider import Provider, ProviderFactory
+__all__ = ["LLMClient", "Provider", "ProviderFactory"]
+class BaseProvider:
+    def __init__(self) -> None:
+        self.api_key = cfg["API_KEY"]
+        self.model = cfg["MODEL"]
+        self.base_url = cfg["BASE_URL"]
+        self.timeout = cfg["TIMEOUT"]

yaicli/llms/client.py ADDED Viewed

@@ -0,0 +1,120 @@
+from typing import Generator, List, Optional, Union
+from ..config import cfg
+from ..console import get_console
+from ..schemas import ChatMessage, LLMResponse, RefreshLive, ToolCall
+from ..tools import execute_tool_call
+from .provider import Provider, ProviderFactory
+class LLMClient:
+    """
+    LLM Client that coordinates provider interactions and tool calling
+    This class handles the higher level logic of:
+    1. Getting responses from LLM providers
+    2. Managing tool calls and their execution
+    3. Handling conversation flow with tools
+    """
+    def __init__(
+        self,
+        provider: Optional[Provider] = None,
+        provider_name: str = "",
+        config: dict = cfg,
+        verbose: bool = False,
+        **kwargs,
+    ):
+        """
+        Initialize LLM client
+        Args:
+            provider: Optional pre-initialized Provider instance
+            provider_name: Name of the provider to use if provider not provided
+            config: Configuration dictionary
+            verbose: Whether to enable verbose logging
+        """
+        self.config = config
+        self.verbose = verbose
+        self.console = get_console()
+        # Use provided provider or create one
+        if provider:
+            self.provider = provider
+        elif provider_name:
+            self.provider = ProviderFactory.create_provider(provider_name, config=config, verbose=verbose, **kwargs)
+        else:
+            provider_name = config.get("PROVIDER", "openai").lower()
+            self.provider = ProviderFactory.create_provider(provider_name, config=config, verbose=verbose, **kwargs)
+        self.max_recursion_depth = config.get("MAX_RECURSION_DEPTH", 5)
+    def completion_with_tools(
+        self,
+        messages: List[ChatMessage],
+        stream: bool = False,
+        recursion_depth: int = 0,
+    ) -> Generator[Union[LLMResponse, RefreshLive], None, None]:
+        """
+        Get completion from provider with tool calling support
+        Args:
+            messages: List of messages for the conversation
+            stream: Whether to stream the response
+            recursion_depth: Current recursion depth for tool calls
+        Yields:
+            LLMResponse objects and control signals
+        """
+        if recursion_depth >= self.max_recursion_depth:
+            self.console.print(
+                f"Maximum recursion depth ({self.max_recursion_depth}) reached, stopping further tool calls",
+                style="yellow",
+            )
+            return
+        # Get completion from provider
+        llm_response_generator = self.provider.completion(messages, stream=stream)
+        # To hold the full response
+        assistant_response_content = ""
+        tool_calls: List[ToolCall] = []
+        # Process all responses from the provider
+        for llm_response in llm_response_generator:
+            # Forward the response to the caller
+            yield llm_response
+            # Collect content and tool calls
+            if llm_response.content:
+                assistant_response_content += llm_response.content
+            if llm_response.tool_call and llm_response.tool_call not in tool_calls:
+                tool_calls.append(llm_response.tool_call)
+        # If we have tool calls, execute them and make recursive call
+        if tool_calls and self.config["ENABLE_FUNCTIONS"]:
+            # Yield a refresh signal to indicate new content is coming
+            yield RefreshLive()
+            # Append the assistant message with tool calls to history
+            messages.append(ChatMessage(role="assistant", content=assistant_response_content, tool_calls=tool_calls))
+            # Execute each tool call and append the results
+            for tool_call in tool_calls:
+                function_result, _ = execute_tool_call(tool_call)
+                # Use provider's tool role detection
+                tool_role = self.provider.detect_tool_role()
+                # Append the tool result to history
+                messages.append(
+                    ChatMessage(
+                        role=tool_role,
+                        content=function_result,
+                        name=tool_call.name,
+                        tool_call_id=tool_call.id,
+                    )
+                )
+            # Make a recursive call with the updated history
+            yield from self.completion_with_tools(messages, stream=stream, recursion_depth=recursion_depth + 1)

yaicli/llms/provider.py ADDED Viewed

@@ -0,0 +1,78 @@
+import importlib
+from abc import ABC, abstractmethod
+from typing import Generator, List
+from ..schemas import ChatMessage, LLMResponse
+class Provider(ABC):
+    """Base abstract class for LLM providers"""
+    APP_NAME = "yaicli"
+    APPA_REFERER = "https://github.com/halfrost/yaicli"
+    @abstractmethod
+    def completion(
+        self,
+        messages: List[ChatMessage],
+        stream: bool = False,
+    ) -> Generator[LLMResponse, None, None]:
+        """
+        Send a completion request to the LLM provider
+        Args:
+            messages: List of message objects representing the conversation
+            stream: Whether to stream the response
+        Returns:
+            Generator yielding LLMResponse objects
+        """
+        pass
+    @abstractmethod
+    def detect_tool_role(self) -> str:
+        """Return the role that should be used for tool responses"""
+        pass
+class ProviderFactory:
+    """Factory to create LLM provider instances"""
+    providers_map = {
+        "openai": (".providers.openai_provider", "OpenAIProvider"),
+        "modelscope": (".providers.modelscope_provider", "ModelScopeProvider"),
+        "chatglm": (".providers.chatglm_provider", "ChatglmProvider"),
+        "openrouter": (".providers.openrouter_provider", "OpenRouterProvider"),
+        "siliconflow": (".providers.siliconflow_provider", "SiliconFlowProvider"),
+        "chutes": (".providers.chutes_provider", "ChutesProvider"),
+        "infini-ai": (".providers.infiniai_provider", "InfiniAIProvider"),
+        "yi": (".providers.yi_provider", "YiProvider"),
+        "deepseek": (".providers.deepseek_provider", "DeepSeekProvider"),
+        "doubao": (".providers.doubao_provider", "DoubaoProvider"),
+        "groq": (".providers.groq_provider", "GroqProvider"),
+        "ai21": (".providers.ai21_provider", "AI21Provider"),
+        "ollama": (".providers.ollama_provider", "OllamaProvider"),
+        "cohere": (".providers.cohere_provider", "CohereProvider"),
+        "sambanova": (".providers.sambanova_provider", "SambanovaProvider"),
+        "minimax": (".providers.minimax_provider", "MinimaxProvider"),
+        "targon": (".providers.targon_provider", "TargonProvider"),
+    }
+    @classmethod
+    def create_provider(cls, provider_type: str, verbose: bool = False, **kwargs) -> Provider:
+        """Create a provider instance based on provider type
+        Args:
+            provider_type: The type of provider to create
+            **kwargs: Additional parameters to pass to the provider
+        Returns:
+            A Provider instance
+        """
+        provider_type = provider_type.lower()
+        if provider_type not in cls.providers_map:
+            raise ValueError(f"Unknown provider: {provider_type}")
+        module_path, class_name = cls.providers_map[provider_type]
+        module = importlib.import_module(module_path, package="yaicli.llms")
+        return getattr(module, class_name)(verbose=verbose, **kwargs)

yaicli/llms/providers/ai21_provider.py ADDED Viewed

@@ -0,0 +1,66 @@
+from typing import Any, Dict, Generator, Optional
+from openai._streaming import Stream
+from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
+from ...schemas import LLMResponse, ToolCall
+from .openai_provider import OpenAIProvider
+class AI21Provider(OpenAIProvider):
+    """AI21 provider implementation based on openai-compatible API"""
+    DEFAULT_BASE_URL = "https://api.ai21.com/studio/v1"
+    def get_completion_params(self) -> Dict[str, Any]:
+        params = super().get_completion_params()
+        params["max_tokens"] = params.pop("max_completion_tokens")
+        return params
+    def _handle_stream_response(self, response: Stream[ChatCompletionChunk]) -> Generator[LLMResponse, None, None]:
+        """Handle streaming response from AI21 models
+        Processes chunks from streaming API, extracting content, reasoning and tool calls.
+        The tool call response is scattered across multiple chunks.
+        Args:
+            response: Stream of chat completion chunks from AI21 API
+        Yields:
+            Generator yielding LLMResponse objects containing:
+            - reasoning: The thinking/reasoning content (if any)
+            - content: The normal response content
+            - tool_call: Tool call information when applicable
+        """
+        # Initialize tool call object to accumulate tool call data across chunks
+        tool_call: Optional[ToolCall] = None
+        # Process each chunk in the response stream
+        for chunk in response:
+            choice = chunk.choices[0]
+            delta = choice.delta
+            finish_reason = choice.finish_reason
+            # Extract content from current chunk
+            content = delta.content or ""
+            # Extract reasoning content if available
+            reasoning = self._get_reasoning_content(getattr(delta, "model_extra", None) or delta)
+            # Process tool call information that may be scattered across chunks
+            if hasattr(delta, "tool_calls") and delta.tool_calls:
+                tool_call = self._process_tool_call_chunk(delta.tool_calls, tool_call)
+            # AI21 specific handling: content cannot be empty for tool calls
+            if finish_reason == "tool_calls" and not content:
+                # tool call assistant message, content can't be empty
+                # Error code: 422 - {'detail': {'error': ['Value error, message content must not be an empty string']}}
+                content = tool_call.id
+            # Generate response object
+            yield LLMResponse(
+                reasoning=reasoning,
+                content=content,
+                tool_call=tool_call if finish_reason == "tool_calls" else None,
+                finish_reason=finish_reason,
+            )

yaicli/llms/providers/chatglm_provider.py ADDED Viewed

@@ -0,0 +1,139 @@
+import json
+from typing import Any, Dict, Generator, Optional
+from openai._streaming import Stream
+from openai.types.chat.chat_completion import ChatCompletion, Choice
+from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
+from ...schemas import LLMResponse, ToolCall
+from .openai_provider import OpenAIProvider
+class ChatglmProvider(OpenAIProvider):
+    """Chatglm provider support"""
+    DEFAULT_BASE_URL = "https://open.bigmodel.cn/api/paas/v4/"
+    def get_completion_params(self) -> Dict[str, Any]:
+        params = super().get_completion_params()
+        params["max_tokens"] = params.pop("max_completion_tokens")
+        return params
+    def _handle_normal_response(self, response: ChatCompletion) -> Generator[LLMResponse, None, None]:
+        """Handle normal (non-streaming) response
+        Support both openai capabilities and chatglm
+        Returns:
+            LLMContent object with:
+            - reasoning: The thinking/reasoning content (if any)
+            - content: The normal response content
+        """
+        choice = response.choices[0]
+        content = choice.message.content or ""  # type: ignore
+        reasoning = choice.message.reasoning_content  # type: ignore
+        finish_reason = choice.finish_reason
+        tool_call: Optional[ToolCall] = None
+        # Check if the response contains reasoning content
+        if "<think>" in content and "</think>" in content:
+            # Extract reasoning content
+            content = content.lstrip()
+            if content.startswith("<think>"):
+                think_end = content.find("</think>")
+                if think_end != -1:
+                    reasoning = content[7:think_end].strip()  # Start after <think>
+                    # Remove the <think> block from the main content
+                    content = content[think_end + 8 :].strip()  # Start after </think>
+        # Check if the response contains reasoning content in model_extra
+        elif hasattr(choice.message, "model_extra") and choice.message.model_extra:  # type: ignore
+            model_extra = choice.message.model_extra  # type: ignore
+            reasoning = self._get_reasoning_content(model_extra)
+        if finish_reason == "tool_calls":
+            if '{"index":' in content or '"tool_calls":' in content:
+                # Tool call data may in content after the <think> block
+                # >/n{"index": 0, "tool_call_id": "call_1", "function": {"name": "name", "arguments": "{}"}, "output": null}
+                tool_index = content.find('{"index":')
+                if tool_index != -1:
+                    tmp_content = content[tool_index:]
+                    # Tool call data may in content after the <think> block
+                    try:
+                        choice = self.parse_choice_from_content(tmp_content)
+                    except ValueError:
+                        pass
+            if hasattr(choice, "message") and hasattr(choice.message, "tool_calls") and choice.message.tool_calls:  # type: ignore
+                tool = choice.message.tool_calls[0]  # type: ignore
+                tool_call = ToolCall(tool.id, tool.function.name or "", tool.function.arguments)
+        yield LLMResponse(reasoning=reasoning, content=content, finish_reason=finish_reason, tool_call=tool_call)
+    def _handle_stream_response(self, response: Stream[ChatCompletionChunk]) -> Generator[LLMResponse, None, None]:
+        """Handle streaming response
+        Support both openai capabilities and chatglm
+        Returns:
+            Generator yielding LLMContent objects with:
+            - reasoning: The thinking/reasoning content (if any)
+            - content: The normal response content
+        """
+        full_reasoning = ""
+        full_content = ""
+        content = ""
+        reasoning = ""
+        tool_id = ""
+        tool_call_name = ""
+        arguments = ""
+        tool_call: Optional[ToolCall] = None
+        for chunk in response:
+            # Check if the response contains reasoning content
+            choice = chunk.choices[0]  # type: ignore
+            delta = choice.delta
+            finish_reason = choice.finish_reason
+            # Concat content
+            content = delta.content or ""
+            full_content += content
+            # Concat reasoning
+            reasoning = self._get_reasoning_content(delta)
+            full_reasoning += reasoning or ""
+            if finish_reason:
+                pass
+            if finish_reason == "tool_calls" or ('{"index":' in content or '"tool_calls":' in content):
+                # Tool call data may in content after the <think> block
+                # >/n{"index": 0, "tool_call_id": "call_1", "function": {"name": "name", "arguments": "{}"}, "output": null}
+                tool_index = full_content.find('{"index":')
+                if tool_index != -1:
+                    tmp_content = full_content[tool_index:]
+                    try:
+                        choice = self.parse_choice_from_content(tmp_content)
+                    except ValueError:
+                        pass
+            if hasattr(choice.delta, "tool_calls") and choice.delta.tool_calls:  # type: ignore
+                # Handle tool calls
+                tool_id = choice.delta.tool_calls[0].id or ""  # type: ignore
+                for tool in choice.delta.tool_calls:  # type: ignore
+                    if not tool.function:
+                        continue
+                    tool_call_name = tool.function.name or ""
+                    arguments += tool.function.arguments or ""
+                tool_call = ToolCall(tool_id, tool_call_name, arguments)
+            yield LLMResponse(reasoning=reasoning, content=content, tool_call=tool_call, finish_reason=finish_reason)
+    def parse_choice_from_content(self, content: str) -> "Choice":
+        """
+        Parse the choice from the content after <think>...</think> block.
+        Args:
+            content: The content from the LLM response
+        Returns:
+            The choice object
+        Raises ValueError if the content is not valid JSON
+        """
+        try:
+            content_dict = json.loads(content)
+        except json.JSONDecodeError:
+            raise ValueError(f"Invalid message from LLM: {content}")
+        try:
+            return Choice.model_validate(content_dict)
+        except Exception as e:
+            raise ValueError(f"Invalid message from LLM: {content}") from e

yaicli 0.5.9__py3-none-any.whl → 0.6.1__py3-none-any.whl

yaicli 0.5.9py3-none-any.whl → 0.6.1py3-none-any.whl