PyPI - yaicli - Versions diffs - 0.5.8__py3-none-any.whl → 0.6.0__py3-none-any.whl - Mend

yaicli 0.5.8py3-none-any.whl → 0.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

pyproject.toml +37 -14
yaicli/cli.py +31 -20
yaicli/const.py +6 -5
yaicli/entry.py +1 -1
yaicli/llms/__init__.py +13 -0
yaicli/llms/client.py +120 -0
yaicli/llms/provider.py +76 -0
yaicli/llms/providers/ai21_provider.py +65 -0
yaicli/llms/providers/chatglm_provider.py +134 -0
yaicli/llms/providers/chutes_provider.py +7 -0
yaicli/llms/providers/cohere_provider.py +298 -0
yaicli/llms/providers/deepseek_provider.py +11 -0
yaicli/llms/providers/doubao_provider.py +51 -0
yaicli/llms/providers/groq_provider.py +14 -0
yaicli/llms/providers/infiniai_provider.py +14 -0
yaicli/llms/providers/modelscope_provider.py +11 -0
yaicli/llms/providers/ollama_provider.py +187 -0
yaicli/llms/providers/openai_provider.py +187 -0
yaicli/llms/providers/openrouter_provider.py +11 -0
yaicli/llms/providers/sambanova_provider.py +28 -0
yaicli/llms/providers/siliconflow_provider.py +11 -0
yaicli/llms/providers/yi_provider.py +7 -0
yaicli/printer.py +4 -16
yaicli/schemas.py +12 -3
yaicli/tools.py +59 -3
{yaicli-0.5.8.dist-info → yaicli-0.6.0.dist-info}/METADATA +240 -34
yaicli-0.6.0.dist-info/RECORD +41 -0
yaicli/client.py +0 -391
yaicli-0.5.8.dist-info/RECORD +0 -24
{yaicli-0.5.8.dist-info → yaicli-0.6.0.dist-info}/WHEEL +0 -0
{yaicli-0.5.8.dist-info → yaicli-0.6.0.dist-info}/entry_points.txt +0 -0
{yaicli-0.5.8.dist-info → yaicli-0.6.0.dist-info}/licenses/LICENSE +0 -0

yaicli/llms/providers/chutes_provider.py ADDED Viewed

@@ -0,0 +1,7 @@
+from .openai_provider import OpenAIProvider
+class ChutesProvider(OpenAIProvider):
+    """Chutes provider implementation based on openai-compatible API"""
+    DEFAULT_BASE_URL = "https://llm.chutes.ai/v1"

yaicli/llms/providers/cohere_provider.py ADDED Viewed

@@ -0,0 +1,298 @@
+"""
+Cohere API provider implementation
+This module implements Cohere provider classes for different deployment options:
+- CohereProvider: Standard Cohere API
+- CohereBadrockProvider: AWS Bedrock integration
+- CohereSagemaker: AWS Sagemaker integration
+"""
+from typing import Any, Dict, Generator, List, Optional
+from cohere import BedrockClientV2, ClientV2, SagemakerClientV2
+from cohere.types.tool_call_v2 import ToolCallV2, ToolCallV2Function
+from ...config import cfg
+from ...console import get_console
+from ...schemas import ChatMessage, LLMResponse, ToolCall
+from ...tools import get_openai_schemas
+from ..provider import Provider
+class CohereProvider(Provider):
+    """Cohere provider implementation based on cohere library"""
+    DEFAULT_BASE_URL = "https://api.cohere.com/v2"
+    CLIENT_CLS = ClientV2
+    DEFAULT_MODEL = "command-a-03-2025"
+    def __init__(self, config: dict = cfg, verbose: bool = False, **kwargs):
+        """
+        Initialize the Cohere provider
+        Args:
+            config: Configuration dictionary
+            verbose: Whether to enable verbose logging
+            **kwargs: Additional parameters passed to the client
+        """
+        self.config = config
+        self.verbose = verbose
+        self.client_params = {
+            "api_key": self.config["API_KEY"],
+            "timeout": self.config["TIMEOUT"],
+        }
+        if self.config["BASE_URL"]:
+            self.client_params["base_url"] = self.config["BASE_URL"]
+        self.client = self.create_client()
+        self.console = get_console()
+    def create_client(self):
+        """Create and return Cohere client instance"""
+        if self.config.get("ENVIRONMENT"):
+            self.client_params["environment"] = self.config["ENVIRONMENT"]
+        return self.CLIENT_CLS(**self.client_params)
+    def detect_tool_role(self) -> str:
+        """Return the role name for tool response messages"""
+        return "tool"
+    def _convert_messages(self, messages: List[ChatMessage]) -> List[Dict[str, Any]]:
+        """
+        Convert a list of ChatMessage objects to a list of Cohere message dicts
+        {
+            "role": "tool",
+            "tool_call_id": tc.id,
+            "content": {
+                "type": "document",
+                "document": {"data": string},
+            },
+        }
+        Args:
+            messages: List of ChatMessage objects
+        Returns:
+            List of message dicts formatted for Cohere API
+        """
+        converted_messages = []
+        for msg in messages:
+            # Create base message
+            message = {}
+            # Set role always
+            message["role"] = msg.role
+            # Add tool calls for assistant messages
+            if msg.role == "assistant" and msg.tool_calls:
+                # {
+                #     "role": "assistant",
+                #     "tool_calls": response.message.tool_calls,
+                #     "tool_plan": response.message.tool_plan,
+                # }
+                message["tool_calls"] = [
+                    ToolCallV2(
+                        id=tc.id,
+                        type="function",
+                        function=ToolCallV2Function(name=tc.name, arguments=tc.arguments),
+                    )
+                    for tc in msg.tool_calls
+                ]
+            else:
+                # Add content for non-tool-call messages
+                message["content"] = msg.content or ""
+            # Add tool call ID for tool messages
+            if msg.role == "tool" and msg.tool_call_id:
+                message["tool_call_id"] = msg.tool_call_id
+                # For tool messages, convert content to the expected document format
+                if msg.content:
+                    message["content"] = [{"type": "document", "document": {"data": msg.content}}]
+            converted_messages.append(message)
+        return converted_messages
+    def _prepare_tools(self) -> Optional[List[Dict[str, Any]]]:
+        """
+        Prepare tools for Cohere API if enabled
+        Returns:
+            List of tool definitions or None if disabled
+        """
+        if not self.config.get("ENABLE_FUNCTIONS", False):
+            return None
+        tools = get_openai_schemas()
+        if not tools and self.verbose:
+            self.console.print("No tools available", style="yellow")
+        return tools
+    def _handle_streaming_response(self, response_stream) -> Generator[LLMResponse, None, None]:
+        """
+        Process streaming response from Cohere API
+        doc: https://docs.cohere.com/v2/docs/streaming
+        According to Cohere docs, there are multiple event types:
+        - message-start: First event with metadata
+        - content-start: Start of content block
+        - content-delta: Chunk of generated text
+        - content-end: End of content block
+        - message-end: End of message
+        - tool-plan-delta: Part of tool planning
+        - tool-call-start: Start of tool call
+        - tool-call-delta: Part of tool call
+        - tool-call-end: End of tool call
+        - citation-start/end: For citations in RAG
+        Args:
+            response_stream: Stream from Cohere client
+        Yields:
+            LLMResponse objects with content or tool calls
+        """
+        tool_call: Optional[ToolCall] = None
+        for chunk in response_stream:
+            if not chunk:
+                continue
+            # Handle different event types
+            if chunk.type == "content-delta":
+                # Text generation chunks
+                content = chunk.delta.message.content.text or ""
+                yield LLMResponse(content=content)
+            elif chunk.type == "tool-plan-delta":
+                # Tool planning - when model is deciding which tool to use: cohere.types.chat_tool_plan_delta_event_delta_message.ChatToolPlanDeltaEventDeltaMessage
+                content = chunk.delta.message.tool_plan or ""
+                yield LLMResponse(content=content)
+            elif chunk.type == "tool-call-start":
+                # Start of tool call
+                tool_call_msg = chunk.delta.message.tool_calls
+                tool_call = ToolCall(
+                    id=tool_call_msg.id, name=tool_call_msg.function.name, arguments=tool_call_msg.function.arguments
+                )
+                # Tool call started, waiting for tool-calls-delta events
+                continue
+            elif chunk.type == "tool-call-delta":
+                # Tool call arguments being generated: cohere.types.chat_tool_call_delta_event_delta_message.ChatToolCallDeltaEventDeltaMessage
+                tool_call.arguments += chunk.delta.message.tool_calls.function.arguments
+                # Waiting for tool-call-end event
+                continue
+            elif chunk.type == "tool-call-end":
+                # End of a tool call, empty chunk
+                yield LLMResponse(tool_call=tool_call)
+    def _handle_normal_response(self, response) -> Generator[LLMResponse, None, None]:
+        """
+        Process non-streaming response from Cohere API
+        Args:
+            response: Response from Cohere client
+        Yields:
+            LLMResponse objects with content or tool calls
+        """
+        # Handle content
+        if response.message.content:
+            for content_item in response.message.content:
+                if hasattr(content_item, "text") and content_item.text:
+                    yield LLMResponse(content=content_item.text)
+        # Handle tool calls
+        if response.message.tool_calls:
+            yield LLMResponse(content=response.message.tool_plan)
+            for tool_call in response.message.tool_calls:
+                yield LLMResponse(
+                    tool_call=ToolCall(
+                        id=tool_call.id,
+                        name=tool_call.function.name,
+                        arguments=tool_call.function.arguments,
+                    )
+                )
+    def completion(
+        self, messages: List[ChatMessage], stream: bool = False, **kwargs
+    ) -> Generator[LLMResponse, None, None]:
+        """
+        Get completion from Cohere models
+        Args:
+            messages: List of messages for the conversation
+            stream: Whether to stream the response
+            **kwargs: Additional parameters to pass to the Cohere client
+        Yields:
+            LLMResponse objects with content or tool calls
+        """
+        # Get configuration values
+        model = self.config.get("MODEL", self.DEFAULT_MODEL)
+        temperature = float(self.config.get("TEMPERATURE", 0.7))
+        # Prepare messages and tools
+        cohere_messages = self._convert_messages(messages)
+        if self.verbose:
+            self.console.print("Messages:")
+            self.console.print(cohere_messages)
+        tools = self._prepare_tools()
+        # Common request parameters
+        request_params = {"model": model, "messages": cohere_messages, "temperature": temperature, **kwargs}
+        # Add tools if available
+        if tools:
+            request_params["tools"] = tools
+        # Call Cohere API
+        try:
+            if stream:
+                # Streaming mode
+                response_stream = self.client.chat_stream(**request_params)
+                yield from self._handle_streaming_response(response_stream)
+            else:
+                # Non-streaming mode
+                response = self.client.chat(**request_params)
+                yield from self._handle_normal_response(response)
+        except Exception as e:
+            error_msg = f"Error in Cohere API call: {e}"
+            if self.verbose:
+                import traceback
+                self.console.print("Error in Cohere completion:")
+                traceback.print_exc()
+            yield LLMResponse(content=error_msg)
+class CohereBadrockProvider(CohereProvider):
+    """Cohere provider for AWS Bedrock integration"""
+    CLIENT_CLS = BedrockClientV2
+    DOC_URL = "https://docs.cohere.com/v2/docs/text-gen-quickstart"
+    CLIENT_KEYS = (
+        ("AWS_REGION", "aws_region"),
+        ("AWS_ACCESS_KEY_ID", "aws_access_key"),
+        ("AWS_SECRET_ACCESS_KEY", "aws_secret_key"),
+        ("AWS_SESSION_TOKEN", "aws_session_token"),
+    )
+    def create_client(self):
+        """Create Bedrock client with AWS credentials"""
+        for k, p in self.CLIENT_KEYS:
+            v = self.config.get(k, None)
+            if v is None:
+                raise ValueError(
+                    f"You have to set key `{k}` to use {self.__class__.__name__}, see cohere doc `{self.DOC_URL}`"
+                )
+            self.client_params[p] = v
+        return self.CLIENT_CLS(**self.client_params)
+class CohereSagemaker(CohereBadrockProvider):
+    """Cohere provider for AWS Sagemaker integration"""
+    CLIENT_CLS = SagemakerClientV2

yaicli/llms/providers/deepseek_provider.py ADDED Viewed

@@ -0,0 +1,11 @@
+from .openai_provider import OpenAIProvider
+class DeepSeekProvider(OpenAIProvider):
+    """DeepSeek provider implementation based on openai-compatible API"""
+    DEFAULT_BASE_URL = "https://api.deepseek.com/v1"
+    def __init__(self, config: dict = ..., **kwargs):
+        super().__init__(config, **kwargs)
+        self.completion_params["max_tokens"] = self.completion_params.pop("max_completion_tokens")

yaicli/llms/providers/doubao_provider.py ADDED Viewed

@@ -0,0 +1,51 @@
+from volcenginesdkarkruntime import Ark
+from ...config import cfg
+from ...console import get_console
+from .openai_provider import OpenAIProvider
+class DoubaoProvider(OpenAIProvider):
+    """Doubao provider implementation based on openai-compatible API"""
+    DEFAULT_BASE_URL = "https://ark.cn-beijing.volces.com/api/v3"
+    def __init__(self, config: dict = cfg, **kwargs):
+        self.config = config
+        self.enable_function = self.config["ENABLE_FUNCTIONS"]
+        # Initialize client params
+        self.client_params = {"base_url": self.DEFAULT_BASE_URL}
+        if self.config.get("API_KEY", None):
+            self.client_params["api_key"] = self.config["API_KEY"]
+        if self.config.get("BASE_URL", None):
+            self.client_params["base_url"] = self.config["BASE_URL"]
+        if self.config.get("AK", None):
+            self.client_params["ak"] = self.config["AK"]
+        if self.config.get("SK", None):
+            self.client_params["sk"] = self.config["SK"]
+        if self.config.get("REGION", None):
+            self.client_params["region"] = self.config["REGION"]
+        # Initialize client
+        self.client = Ark(**self.client_params)
+        self.console = get_console()
+        # Store completion params
+        self.completion_params = {
+            "model": self.config["MODEL"],
+            "temperature": self.config["TEMPERATURE"],
+            "top_p": self.config["TOP_P"],
+            "max_tokens": self.config["MAX_TOKENS"],
+            "timeout": self.config["TIMEOUT"],
+        }
+        # Add extra headers if set
+        if self.config.get("EXTRA_HEADERS", None):
+            self.completion_params["extra_headers"] = {
+                **self.config["EXTRA_HEADERS"],
+                "X-Title": self.APP_NAME,
+                "HTTP-Referer": self.APPA_REFERER,
+            }
+        # Add extra body params if set
+        if self.config.get("EXTRA_BODY", None):
+            self.completion_params["extra_body"] = self.config["EXTRA_BODY"]

yaicli/llms/providers/groq_provider.py ADDED Viewed

@@ -0,0 +1,14 @@
+from .openai_provider import OpenAIProvider
+class GroqProvider(OpenAIProvider):
+    """Groq provider implementation based on openai-compatible API"""
+    DEFAULT_BASE_URL = "https://api.groq.com/openai/v1"
+    def __init__(self, config: dict = ..., **kwargs):
+        super().__init__(config, **kwargs)
+        if self.config.get("EXTRA_BODY") and "N" in self.config["EXTRA_BODY"] and self.config["EXTRA_BODY"]["N"] != 1:
+            self.console.print("Groq does not support N parameter, setting N to 1 as Groq default", style="yellow")
+            if "extra_body" in self.completion_params:
+                self.completion_params["extra_body"]["N"] = 1

yaicli/llms/providers/infiniai_provider.py ADDED Viewed

@@ -0,0 +1,14 @@
+from .openai_provider import OpenAIProvider
+class InfiniAIProvider(OpenAIProvider):
+    """InfiniAI provider implementation based on openai-compatible API"""
+    DEFAULT_BASE_URL = "https://cloud.infini-ai.com/maas/v1"
+    def __init__(self, config: dict = ..., **kwargs):
+        super().__init__(config, **kwargs)
+        if self.enable_function:
+            self.console.print("InfiniAI does not support functions, disabled", style="yellow")
+        self.enable_function = False
+        self.completion_params["max_tokens"] = self.completion_params.pop("max_completion_tokens")

yaicli/llms/providers/modelscope_provider.py ADDED Viewed

@@ -0,0 +1,11 @@
+from .openai_provider import OpenAIProvider
+class ModelScopeProvider(OpenAIProvider):
+    """ModelScope provider implementation based on openai-compatible API"""
+    DEFAULT_BASE_URL = "https://api-inference.modelscope.cn/v1/"
+    def __init__(self, config: dict = ..., **kwargs):
+        super().__init__(config, **kwargs)
+        self.completion_params["max_tokens"] = self.completion_params.pop("max_completion_tokens")

yaicli/llms/providers/ollama_provider.py ADDED Viewed

@@ -0,0 +1,187 @@
+import json
+import time
+from typing import Any, Dict, Generator, List
+import ollama
+from ...config import cfg
+from ...console import get_console
+from ...schemas import ChatMessage, LLMResponse, ToolCall
+from ...tools import get_openai_schemas
+from ...utils import str2bool
+from ..provider import Provider
+class OllamaProvider(Provider):
+    """Ollama provider implementation based on ollama Python library"""
+    DEFAULT_BASE_URL = "http://localhost:11434"
+    OPTION_KEYS = (
+        ("SEED", "seed"),
+        ("NUM_PREDICT", "num_predict"),
+        ("NUM_CTX", "num_ctx"),
+        ("NUM_BATCH", "num_batch"),
+        ("NUM_GPU", "num_gpu"),
+        ("MAIN_GPU", "main_gpu"),
+        ("LOW_VRAM", "low_vram"),
+        ("F16_KV", "f16_kv"),
+        ("LOGITS_ALL", "logits_all"),
+        ("VOCAB_ONLY", "vocab_only"),
+        ("USE_MMAP", "use_mmap"),
+        ("USE_MLOCK", "use_mlock"),
+        ("NUM_THREAD", "num_thread"),
+    )
+    def __init__(self, config: dict = cfg, verbose: bool = False, **kwargs):
+        self.config = config
+        self.enable_function = self.config.get("ENABLE_FUNCTIONS", False)
+        self.verbose = verbose
+        self.think = str2bool(self.config.get("THINK", False))
+        # Initialize client params - Ollama host support
+        self.host = self.config.get("BASE_URL") or self.DEFAULT_BASE_URL
+        # Initialize console
+        self.console = get_console()
+        self.client = ollama.Client(host=self.host, timeout=self.config["TIMEOUT"])
+    def _convert_messages(self, messages: List[ChatMessage]) -> List[Dict[str, Any]]:
+        """Convert a list of ChatMessage objects to a list of Ollama message dicts."""
+        converted_messages = []
+        for msg in messages:
+            message = {"role": msg.role, "content": msg.content or ""}
+            if msg.name:
+                message["name"] = msg.name
+            # Handle tool calls - Ollama now supports the OpenAI format directly
+            if msg.role == "assistant" and msg.tool_calls:
+                message["tool_calls"] = [
+                    {
+                        "id": tc.id,
+                        "type": "function",
+                        "function": {"name": tc.name, "arguments": json.loads(tc.arguments)},
+                    }
+                    for tc in msg.tool_calls
+                ]
+            # Handle tool responses - Ollama supports tool_call_id directly
+            if msg.role == "tool" and msg.tool_call_id:
+                message["tool_call_id"] = msg.tool_call_id
+            converted_messages.append(message)
+        return converted_messages
+    def completion(
+        self,
+        messages: List[ChatMessage],
+        stream: bool = False,
+    ) -> Generator[LLMResponse, None, None]:
+        """Send messages to Ollama and get response"""
+        # Convert message format
+        ollama_messages = self._convert_messages(messages)
+        if self.verbose:
+            self.console.print("Messages:")
+            self.console.print(ollama_messages)
+        options = {"temperature": self.config["TEMPERATURE"], "top_p": self.config["TOP_P"]}
+        for k, v in self.OPTION_KEYS:
+            if self.config.get(k, None) is not None:
+                options[v] = self.config[k]
+        # Prepare parameters
+        params = {
+            "model": self.config.get("MODEL", "llama3"),
+            "messages": ollama_messages,
+            "stream": stream,
+            "think": self.think,
+            "options": options,
+        }
+        # Add tools if enabled
+        if self.enable_function:
+            params["tools"] = get_openai_schemas()
+        if self.verbose:
+            self.console.print("Ollama API params:")
+            self.console.print(params)
+        try:
+            if stream:
+                response_generator = self.client.chat(**params)
+                yield from self._handle_stream_response(response_generator)
+            else:
+                response = self.client.chat(**params)
+                yield from self._handle_normal_response(response)
+        except Exception as e:
+            self.console.print(f"Ollama API error: {e}", style="red")
+            yield LLMResponse(content=f"Error calling Ollama API: {str(e)}")
+    def _handle_normal_response(self, response: Dict[str, Any]) -> Generator[LLMResponse, None, None]:
+        """Handle normal (non-streaming) response"""
+        content = response.message.content or ""
+        reasoning = response.message.thinking or ""
+        # Check for tool calls in the response
+        tool_call = None
+        tool_calls = response.message.tool_calls or []
+        if tool_calls and self.enable_function:
+            # Get the first tool call
+            tc = tool_calls[0]
+            function_data = tc.get("function", {})
+            # Create tool call with appropriate data type handling
+            arguments = function_data.get("arguments", "")
+            if isinstance(arguments, dict):
+                arguments = json.dumps(arguments)
+            tool_call = ToolCall(
+                id=tc.get("id", f"tc_{hash(function_data.get('name', ''))}_{int(time.time())}"),
+                name=function_data.get("name", ""),
+                arguments=arguments,
+            )
+        yield LLMResponse(content=content, reasoning=reasoning, tool_call=tool_call)
+    def _handle_stream_response(self, response_generator) -> Generator[LLMResponse, None, None]:
+        """Handle streaming response"""
+        accumulated_content = ""
+        tool_call = None
+        for chunk in response_generator:
+            # Extract content from the current chunk
+            message = chunk.message
+            content = message.content or ""
+            reasoning = message.thinking or ""
+            if content or reasoning:
+                accumulated_content += content
+                yield LLMResponse(content=content, reasoning=reasoning)
+            # Check for tool calls in the chunk
+            tool_calls = message.tool_calls or []
+            if tool_calls and self.enable_function:
+                # Only handle the first tool call for now
+                tc = tool_calls[0]
+                function_data = tc.get("function", {})
+                # Create tool call with appropriate data type handling
+                arguments = function_data.get("arguments", "")
+                if isinstance(arguments, dict):
+                    arguments = json.dumps(arguments)
+                tool_call = ToolCall(
+                    id=tc.get("id", None) or f"tc_{hash(function_data.get('name', ''))}_{int(time.time())}",
+                    name=function_data.get("name", ""),
+                    arguments=arguments,
+                )
+        # After streaming is complete, if we found a tool call, yield it
+        if tool_call:
+            yield LLMResponse(tool_call=tool_call)
+    def detect_tool_role(self) -> str:
+        """Return the role to be used for tool responses"""
+        return "tool"

yaicli 0.5.8__py3-none-any.whl → 0.6.0__py3-none-any.whl

yaicli 0.5.8py3-none-any.whl → 0.6.0py3-none-any.whl