PyPI - sqlsaber - Versions diffs - 0.13.0__py3-none-any.whl → 0.15.0__py3-none-any.whl - Mend

sqlsaber 0.13.0py3-none-any.whl → 0.15.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of sqlsaber might be problematic. Click here for more details.

Files changed (21) hide show

sqlsaber/agents/anthropic.py +63 -123
sqlsaber/agents/base.py +111 -210
sqlsaber/cli/interactive.py +6 -2
sqlsaber/conversation/__init__.py +12 -0
sqlsaber/conversation/manager.py +224 -0
sqlsaber/conversation/models.py +120 -0
sqlsaber/conversation/storage.py +362 -0
sqlsaber/database/schema.py +2 -51
sqlsaber/mcp/mcp.py +43 -51
sqlsaber/tools/__init__.py +25 -0
sqlsaber/tools/base.py +83 -0
sqlsaber/tools/enums.py +21 -0
sqlsaber/tools/instructions.py +251 -0
sqlsaber/tools/registry.py +130 -0
sqlsaber/tools/sql_tools.py +275 -0
sqlsaber/tools/visualization_tools.py +144 -0
{sqlsaber-0.13.0.dist-info → sqlsaber-0.15.0.dist-info}/METADATA +1 -1
{sqlsaber-0.13.0.dist-info → sqlsaber-0.15.0.dist-info}/RECORD +21 -10
{sqlsaber-0.13.0.dist-info → sqlsaber-0.15.0.dist-info}/WHEEL +0 -0
{sqlsaber-0.13.0.dist-info → sqlsaber-0.15.0.dist-info}/entry_points.txt +0 -0
{sqlsaber-0.13.0.dist-info → sqlsaber-0.15.0.dist-info}/licenses/LICENSE +0 -0

sqlsaber/database/schema.py CHANGED Viewed

@@ -1,6 +1,5 @@
 """Database schema introspection utilities."""
-import time
 from abc import ABC, abstractmethod
 from typing import Any
@@ -532,12 +531,10 @@ class SQLiteSchemaIntrospector(BaseSchemaIntrospector):
 class SchemaManager:
-    """Manages database schema introspection with caching."""
+    """Manages database schema introspection."""
-    def __init__(self, db_connection: BaseDatabaseConnection, cache_ttl: int = 900):
+    def __init__(self, db_connection: BaseDatabaseConnection):
         self.db = db_connection
-        self.cache_ttl = cache_ttl  # Default 15 minutes
-        self._schema_cache: dict[str, tuple[float, dict[str, Any]]] = {}
         # Select appropriate introspector based on connection type
         if isinstance(db_connection, PostgreSQLConnection):
@@ -551,10 +548,6 @@ class SchemaManager:
                 f"Unsupported database connection type: {type(db_connection)}"
             )
-    def clear_schema_cache(self):
-        """Clear the schema cache."""
-        self._schema_cache.clear()
     async def get_schema_info(
         self, table_pattern: str | None = None
     ) -> dict[str, SchemaInfo]:
@@ -563,31 +556,6 @@ class SchemaManager:
         Args:
             table_pattern: Optional SQL LIKE pattern to filter tables (e.g., 'public.user%')
         """
-        # Check cache first
-        cache_key = f"schema:{table_pattern or 'all'}"
-        cached_data = self._get_cached_schema(cache_key)
-        if cached_data is not None:
-            return cached_data
-        # Fetch from database if not cached
-        schema_info = await self._fetch_schema_from_db(table_pattern)
-        # Cache the result
-        self._schema_cache[cache_key] = (time.time(), schema_info)
-        return schema_info
-    def _get_cached_schema(self, cache_key: str) -> dict[str, SchemaInfo] | None:
-        """Get schema from cache if available and not expired."""
-        if cache_key in self._schema_cache:
-            cached_time, cached_data = self._schema_cache[cache_key]
-            if time.time() - cached_time < self.cache_ttl:
-                return cached_data
-        return None
-    async def _fetch_schema_from_db(
-        self, table_pattern: str | None
-    ) -> dict[str, SchemaInfo]:
-        """Fetch schema information from database."""
         # Get all schema components
         tables = await self.introspector.get_tables_info(self.db, table_pattern)
         columns = await self.introspector.get_columns_info(self.db, tables)
@@ -672,13 +640,6 @@ class SchemaManager:
     async def list_tables(self) -> dict[str, Any]:
         """Get a list of all tables with basic information."""
-        # Check cache first
-        cache_key = "list_tables"
-        cached_data = self._get_cached_tables(cache_key)
-        if cached_data is not None:
-            return cached_data
-        # Fetch from database if not cached
         tables = await self.introspector.list_tables_info(self.db)
         # Format the result
@@ -694,14 +655,4 @@ class SchemaManager:
                 }
             )
-        # Cache the result
-        self._schema_cache[cache_key] = (time.time(), result)
         return result
-    def _get_cached_tables(self, cache_key: str) -> dict[str, Any] | None:
-        """Get table list from cache if available and not expired."""
-        if cache_key in self._schema_cache:
-            cached_time, cached_data = self._schema_cache[cache_key]
-            if time.time() - cached_time < self.cache_ttl:
-                return cached_data
-        return None

sqlsaber/mcp/mcp.py CHANGED Viewed

@@ -7,25 +7,17 @@ from fastmcp import FastMCP
 from sqlsaber.agents.mcp import MCPSQLAgent
 from sqlsaber.config.database import DatabaseConfigManager
 from sqlsaber.database.connection import DatabaseConnection
+from sqlsaber.tools import SQLTool, tool_registry
+from sqlsaber.tools.instructions import InstructionBuilder
-INSTRUCTIONS = """
-This server provides helpful resources and tools that will help you address users queries on their database.
+# Initialize the instruction builder
+instruction_builder = InstructionBuilder(tool_registry)
-- Get all databases using `get_databases()`
-- Call `list_tables()` to get a list of all tables in the database with row counts. Use this first to discover available tables.
-- Call `introspect_schema()` to introspect database schema to understand table structures.
-- Call `execute_sql()` to execute SQL queries against the database and retrieve results.
+# Generate dynamic instructions
+DYNAMIC_INSTRUCTIONS = instruction_builder.build_mcp_instructions()
-Guidelines:
-- Use list_tables first, then introspect_schema for specific tables only
-- Use table patterns like 'sample%' or '%experiment%' to filter related tables
-- Use proper JOIN syntax and avoid cartesian products
-- Include appropriate WHERE clauses to limit results
-- Handle errors gracefully and suggest fixes
-"""
-# Create the FastMCP server instance
-mcp = FastMCP(name="SQL Assistant", instructions=INSTRUCTIONS)
+# Create the FastMCP server instance with dynamic instructions
+mcp = FastMCP(name="SQL Assistant", instructions=DYNAMIC_INSTRUCTIONS)
 # Initialize the database config manager
 config_manager = DatabaseConfigManager()
@@ -70,10 +62,16 @@ def get_databases() -> dict:
     return {"databases": databases, "count": len(databases)}
-@mcp.tool
-async def list_tables(database: str) -> str:
-    """
-    Get a list of all tables in the database with row counts. Use this first to discover available tables.
+async def _execute_with_connection(tool_name: str, database: str, **kwargs) -> str:
+    """Execute a SQL tool with database connection management.
+    Args:
+        tool_name: Name of the tool to execute
+        database: Database name to connect to
+        **kwargs: Tool-specific parameters
+    Returns:
+        JSON string with the tool's output
     """
     try:
         agent = await _create_agent_for_database(database)
@@ -82,50 +80,44 @@ async def list_tables(database: str) -> str:
                 {"error": f"Database '{database}' not found or could not connect"}
             )
-        result = await agent.list_tables()
+        # Get the tool and set up connection
+        tool = tool_registry.get_tool(tool_name)
+        if isinstance(tool, SQLTool):
+            tool.set_connection(agent.db)
+        # Execute the tool
+        result = await tool.execute(**kwargs)
         await agent.db.close()
         return result
     except Exception as e:
-        return json.dumps({"error": f"Error listing tables: {str(e)}"})
+        return json.dumps({"error": f"Error in {tool_name}: {str(e)}"})
-@mcp.tool
-async def introspect_schema(database: str, table_pattern: str | None = None) -> str:
-    """
-    Introspect database schema to understand table structures. Use optional pattern to filter tables (e.g., 'public.users', 'user%', '%order%').
-    """
-    try:
-        agent = await _create_agent_for_database(database)
-        if not agent:
-            return json.dumps(
-                {"error": f"Database '{database}' not found or could not connect"}
-            )
+# SQL Tool Wrappers with explicit signatures
-        result = await agent.introspect_schema(table_pattern)
-        await agent.db.close()
-        return result
-    except Exception as e:
-        return json.dumps({"error": f"Error introspecting schema: {str(e)}"})
+@mcp.tool
+async def list_tables(database: str) -> str:
+    """Get a list of all tables in the database with row counts. Use this first to discover available tables."""
+    return await _execute_with_connection("list_tables", database)
 @mcp.tool
-async def execute_sql(database: str, query: str, limit: int | None = 100) -> str:
-    """Execute a SQL query against the specified database."""
-    try:
-        agent = await _create_agent_for_database(database)
-        if not agent:
-            return json.dumps(
-                {"error": f"Database '{database}' not found or could not connect"}
-            )
+async def introspect_schema(database: str, table_pattern: str = None) -> str:
+    """Introspect database schema to understand table structures."""
+    kwargs = {}
+    if table_pattern is not None:
+        kwargs["table_pattern"] = table_pattern
+    return await _execute_with_connection("introspect_schema", database, **kwargs)
-        result = await agent.execute_sql(query, limit)
-        await agent.db.close()
-        return result
-    except Exception as e:
-        return json.dumps({"error": f"Error executing SQL: {str(e)}"})
+@mcp.tool
+async def execute_sql(database: str, query: str, limit: int = 100) -> str:
+    """Execute a SQL query against the database."""
+    return await _execute_with_connection(
+        "execute_sql", database, query=query, limit=limit
+    )
 def main():

sqlsaber/tools/__init__.py ADDED Viewed

@@ -0,0 +1,25 @@
+"""SQLSaber tools module."""
+from .base import Tool
+from .enums import ToolCategory, WorkflowPosition
+from .instructions import InstructionBuilder
+from .registry import ToolRegistry, register_tool, tool_registry
+# Import concrete tools to register them
+from .sql_tools import ExecuteSQLTool, IntrospectSchemaTool, ListTablesTool, SQLTool
+from .visualization_tools import PlotDataTool
+__all__ = [
+    "Tool",
+    "ToolCategory",
+    "WorkflowPosition",
+    "ToolRegistry",
+    "tool_registry",
+    "register_tool",
+    "InstructionBuilder",
+    "SQLTool",
+    "ListTablesTool",
+    "IntrospectSchemaTool",
+    "ExecuteSQLTool",
+    "PlotDataTool",
+]

sqlsaber/tools/base.py ADDED Viewed

@@ -0,0 +1,83 @@
+"""Base class for SQLSaber tools."""
+from abc import ABC, abstractmethod
+from typing import Any
+from sqlsaber.clients.models import ToolDefinition
+from .enums import ToolCategory, WorkflowPosition
+class Tool(ABC):
+    """Abstract base class for all tools."""
+    def __init__(self):
+        """Initialize the tool."""
+        pass
+    @property
+    @abstractmethod
+    def name(self) -> str:
+        """Return the tool name."""
+        pass
+    @property
+    @abstractmethod
+    def description(self) -> str:
+        """Return the tool description."""
+        pass
+    @property
+    @abstractmethod
+    def input_schema(self) -> dict[str, Any]:
+        """Return the tool's input schema."""
+        pass
+    @abstractmethod
+    async def execute(self, **kwargs) -> str:
+        """Execute the tool with given inputs.
+        Args:
+            **kwargs: Tool-specific keyword arguments
+        Returns:
+            JSON string with the tool's output
+        """
+        pass
+    def to_definition(self) -> ToolDefinition:
+        """Convert this tool to a ToolDefinition."""
+        return ToolDefinition(
+            name=self.name,
+            description=self.description,
+            input_schema=self.input_schema,
+        )
+    @property
+    def category(self) -> ToolCategory:
+        """Return the tool category. Override to customize."""
+        return ToolCategory.GENERAL
+    def get_usage_instructions(self) -> str | None:
+        """Return tool-specific usage instructions for LLM guidance.
+        Returns:
+            Usage instructions string, or None for no specific guidance
+        """
+        return None
+    def get_priority(self) -> int:
+        """Return priority for tool ordering in instructions.
+        Returns:
+            Priority number (lower = higher priority, default = 100)
+        """
+        return 100
+    def get_workflow_position(self) -> WorkflowPosition:
+        """Return the typical workflow position for this tool.
+        Returns:
+            WorkflowPosition enum value
+        """
+        return WorkflowPosition.OTHER

sqlsaber/tools/enums.py ADDED Viewed

@@ -0,0 +1,21 @@
+"""Enums for tool categories and workflow positions."""
+from enum import Enum
+class ToolCategory(Enum):
+    """Tool categories for organizing and filtering tools."""
+    GENERAL = "general"
+    SQL = "sql"
+    VISUALIZATION = "visualization"
+class WorkflowPosition(Enum):
+    """Workflow positions for organizing tools by usage order."""
+    DISCOVERY = "discovery"
+    ANALYSIS = "analysis"
+    EXECUTION = "execution"
+    VISUALIZATION = "visualization"
+    OTHER = "other"

sqlsaber/tools/instructions.py ADDED Viewed

@@ -0,0 +1,251 @@
+"""Dynamic instruction builder for tools."""
+from .base import Tool
+from .enums import ToolCategory, WorkflowPosition
+from .registry import ToolRegistry
+class InstructionBuilder:
+    """Builds dynamic instructions based on available tools."""
+    def __init__(self, tool_registry: ToolRegistry):
+        """Initialize with a tool registry."""
+        self.registry = tool_registry
+    def build_instructions(
+        self,
+        db_type: str = "database",
+        category: str | ToolCategory | None = None,
+        include_base_instructions: bool = True,
+    ) -> str:
+        """Build dynamic instructions from available tools.
+        Args:
+            db_type: Type of database (PostgreSQL, MySQL, SQLite, etc.)
+            category: Optional category to filter tools by (string or ToolCategory enum)
+            include_base_instructions: Whether to include base SQL assistant instructions
+        Returns:
+            Complete instruction string for LLM
+        """
+        # Get available tools
+        tools = self.registry.get_all_tools(category)
+        if not tools:
+            return self._get_base_instructions(db_type)
+        # Sort tools by priority and workflow position
+        sorted_tools = self._sort_tools_by_workflow(tools)
+        # Build instruction components
+        instructions_parts = []
+        if include_base_instructions:
+            instructions_parts.append(self._get_base_instructions(db_type))
+        # Add tool-specific workflow guidance
+        workflow_instructions = self._build_workflow_instructions(sorted_tools)
+        if workflow_instructions:
+            instructions_parts.append(workflow_instructions)
+        # Add tool descriptions and guidelines
+        tool_guidelines = self._build_tool_guidelines(sorted_tools)
+        if tool_guidelines:
+            instructions_parts.append(tool_guidelines)
+        # Add general guidelines
+        general_guidelines = self._build_general_guidelines(sorted_tools)
+        if general_guidelines:
+            instructions_parts.append(general_guidelines)
+        return "\n\n".join(instructions_parts)
+    def _get_base_instructions(self, db_type: str) -> str:
+        """Get base SQL assistant instructions."""
+        return f"""You are also a helpful SQL assistant that helps users query their {db_type} database.
+Your responsibilities:
+1. Understand user's natural language requests, think and convert them to SQL
+2. Use the provided tools efficiently to explore database schema
+3. Generate appropriate SQL queries
+4. Execute queries safely - queries that modify the database are not allowed
+5. Format and explain results clearly
+6. Create visualizations when requested or when they would be helpful"""
+    def _sort_tools_by_workflow(self, tools: list[Tool]) -> list[Tool]:
+        """Sort tools by priority and workflow position."""
+        # Define workflow position ordering
+        position_order = {
+            WorkflowPosition.DISCOVERY: 1,
+            WorkflowPosition.ANALYSIS: 2,
+            WorkflowPosition.EXECUTION: 3,
+            WorkflowPosition.VISUALIZATION: 4,
+            WorkflowPosition.OTHER: 5,
+        }
+        return sorted(
+            tools,
+            key=lambda tool: (
+                position_order.get(tool.get_workflow_position(), 5),
+                tool.get_priority(),
+                tool.name,
+            ),
+        )
+    def _build_workflow_instructions(self, sorted_tools: list[Tool]) -> str:
+        """Build workflow-based instructions."""
+        # Group tools by workflow position
+        workflow_groups = {}
+        for tool in sorted_tools:
+            position = tool.get_workflow_position()
+            if position not in workflow_groups:
+                workflow_groups[position] = []
+            workflow_groups[position].append(tool)
+        # Build workflow instructions
+        instructions = ["IMPORTANT - Tool Usage Strategy:"]
+        step = 1
+        # Add discovery tools first
+        if WorkflowPosition.DISCOVERY in workflow_groups:
+            discovery_tools = workflow_groups[WorkflowPosition.DISCOVERY]
+            for tool in discovery_tools:
+                usage = tool.get_usage_instructions()
+                if usage:
+                    instructions.append(f"{step}. {usage}")
+                else:
+                    instructions.append(
+                        f"{step}. Use '{tool.name}' to {tool.description.lower()}"
+                    )
+                step += 1
+        # Add analysis tools
+        if WorkflowPosition.ANALYSIS in workflow_groups:
+            analysis_tools = workflow_groups[WorkflowPosition.ANALYSIS]
+            for tool in analysis_tools:
+                usage = tool.get_usage_instructions()
+                if usage:
+                    instructions.append(f"{step}. {usage}")
+                else:
+                    instructions.append(
+                        f"{step}. Use '{tool.name}' to {tool.description.lower()}"
+                    )
+                step += 1
+        # Add execution tools
+        if WorkflowPosition.EXECUTION in workflow_groups:
+            execution_tools = workflow_groups[WorkflowPosition.EXECUTION]
+            for tool in execution_tools:
+                usage = tool.get_usage_instructions()
+                if usage:
+                    instructions.append(f"{step}. {usage}")
+                else:
+                    instructions.append(
+                        f"{step}. Use '{tool.name}' to {tool.description.lower()}"
+                    )
+                step += 1
+        # Add visualization tools
+        if WorkflowPosition.VISUALIZATION in workflow_groups:
+            viz_tools = workflow_groups[WorkflowPosition.VISUALIZATION]
+            for tool in viz_tools:
+                usage = tool.get_usage_instructions()
+                if usage:
+                    instructions.append(f"{step}. {usage}")
+                else:
+                    instructions.append(
+                        f"{step}. Use '{tool.name}' when creating visualizations"
+                    )
+                step += 1
+        return "\n".join(instructions) if len(instructions) > 1 else ""
+    def _build_tool_guidelines(self, sorted_tools: list[Tool]) -> str:
+        """Build tool-specific guidelines."""
+        guidelines = []
+        for tool in sorted_tools:
+            usage = tool.get_usage_instructions()
+            if usage and not self._is_usage_in_workflow(usage):
+                guidelines.append(f"- {tool.name}: {usage}")
+        if guidelines:
+            return "Tool-Specific Guidelines:\n" + "\n".join(guidelines)
+        return ""
+    def _build_general_guidelines(self, sorted_tools: list[Tool]) -> str:
+        """Build general usage guidelines."""
+        guidelines = [
+            "Guidelines:",
+            "- Use proper JOIN syntax and avoid cartesian products",
+            "- Include appropriate WHERE clauses to limit results",
+            "- Explain what the query does in simple terms",
+            "- Handle errors gracefully and suggest fixes",
+            "- Be security conscious - use parameterized queries when needed",
+        ]
+        # Add category-specific guidelines
+        categories = {tool.category for tool in sorted_tools}
+        if ToolCategory.SQL in categories:
+            guidelines.extend(
+                [
+                    "- Timestamp columns must be converted to text when you write queries",
+                    "- Use table patterns like 'sample%' or '%experiment%' to filter related tables",
+                ]
+            )
+        if ToolCategory.VISUALIZATION in categories:
+            guidelines.append(
+                "- Create visualizations when they would enhance understanding of the data"
+            )
+        return "\n".join(guidelines)
+    def _is_usage_in_workflow(self, usage: str) -> bool:
+        """Check if usage instruction is already covered in workflow section."""
+        # Simple heuristic - if usage starts with workflow words, it's probably in workflow
+        workflow_words = ["always start", "first", "use this", "begin with", "start by"]
+        usage_lower = usage.lower()
+        return any(word in usage_lower for word in workflow_words)
+    def build_mcp_instructions(self) -> str:
+        """Build instructions specifically for MCP server."""
+        instructions = [
+            "This server provides helpful resources and tools that will help you address users queries on their database.",
+            "",
+        ]
+        # Add database discovery
+        instructions.append("- Get all databases using `get_databases()`")
+        # Add tool-specific instructions
+        sql_tools = self.registry.get_all_tools(category=ToolCategory.SQL)
+        sorted_tools = self._sort_tools_by_workflow(sql_tools)
+        for tool in sorted_tools:
+            instructions.append(f"- Call `{tool.name}()` to {tool.description.lower()}")
+        # Add workflow guidelines
+        instructions.extend(["", "Guidelines:"])
+        workflow_instructions = self._build_workflow_instructions(sorted_tools)
+        if workflow_instructions:
+            # Extract just the numbered steps without the "IMPORTANT" header
+            lines = workflow_instructions.split("\n")[1:]  # Skip header
+            for line in lines:
+                if line.strip():
+                    # Convert numbered steps to bullet points
+                    if line.strip()[0].isdigit():
+                        instructions.append(f"- {line.strip()[3:]}")  # Remove "X. "
+        # Add general guidelines
+        instructions.extend(
+            [
+                "- Use proper JOIN syntax and avoid cartesian products",
+                "- Include appropriate WHERE clauses to limit results",
+                "- Handle errors gracefully and suggest fixes",
+            ]
+        )
+        return "\n".join(instructions)

sqlsaber 0.13.0__py3-none-any.whl → 0.15.0__py3-none-any.whl

Potentially problematic release.

sqlsaber 0.13.0py3-none-any.whl → 0.15.0py3-none-any.whl