PyPI - sqlsaber - Versions diffs - 0.25.0__py3-none-any.whl → 0.27.0__py3-none-any.whl - Mend

sqlsaber 0.25.0py3-none-any.whl → 0.27.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of sqlsaber might be problematic. Click here for more details.

Files changed (38) hide show

sqlsaber/agents/__init__.py +2 -2
sqlsaber/agents/base.py +1 -1
sqlsaber/agents/mcp.py +1 -1
sqlsaber/agents/pydantic_ai_agent.py +207 -135
sqlsaber/application/__init__.py +1 -0
sqlsaber/application/auth_setup.py +164 -0
sqlsaber/application/db_setup.py +223 -0
sqlsaber/application/model_selection.py +98 -0
sqlsaber/application/prompts.py +115 -0
sqlsaber/cli/auth.py +22 -50
sqlsaber/cli/commands.py +22 -28
sqlsaber/cli/completers.py +2 -0
sqlsaber/cli/database.py +25 -86
sqlsaber/cli/display.py +29 -9
sqlsaber/cli/interactive.py +150 -127
sqlsaber/cli/models.py +18 -28
sqlsaber/cli/onboarding.py +325 -0
sqlsaber/cli/streaming.py +15 -17
sqlsaber/cli/threads.py +10 -6
sqlsaber/config/api_keys.py +2 -2
sqlsaber/config/settings.py +25 -2
sqlsaber/database/__init__.py +55 -1
sqlsaber/database/base.py +124 -0
sqlsaber/database/csv.py +133 -0
sqlsaber/database/duckdb.py +313 -0
sqlsaber/database/mysql.py +345 -0
sqlsaber/database/postgresql.py +328 -0
sqlsaber/database/schema.py +66 -963
sqlsaber/database/sqlite.py +258 -0
sqlsaber/mcp/mcp.py +1 -1
sqlsaber/tools/sql_tools.py +1 -1
{sqlsaber-0.25.0.dist-info → sqlsaber-0.27.0.dist-info}/METADATA +43 -9
sqlsaber-0.27.0.dist-info/RECORD +58 -0
sqlsaber/database/connection.py +0 -535
sqlsaber-0.25.0.dist-info/RECORD +0 -47
{sqlsaber-0.25.0.dist-info → sqlsaber-0.27.0.dist-info}/WHEEL +0 -0
{sqlsaber-0.25.0.dist-info → sqlsaber-0.27.0.dist-info}/entry_points.txt +0 -0
{sqlsaber-0.25.0.dist-info → sqlsaber-0.27.0.dist-info}/licenses/LICENSE +0 -0

sqlsaber/database/base.py ADDED Viewed

@@ -0,0 +1,124 @@
+"""Base classes and type definitions for database connections and schema introspection."""
+from abc import ABC, abstractmethod
+from typing import Any, TypedDict
+# Default query timeout to prevent runaway queries
+DEFAULT_QUERY_TIMEOUT = 30.0  # seconds
+class QueryTimeoutError(RuntimeError):
+    """Exception raised when a query exceeds its timeout."""
+    def __init__(self, seconds: float):
+        self.timeout = seconds
+        super().__init__(f"Query exceeded timeout of {seconds}s")
+class ColumnInfo(TypedDict):
+    """Type definition for column information."""
+    data_type: str
+    nullable: bool
+    default: str | None
+    max_length: int | None
+    precision: int | None
+    scale: int | None
+class ForeignKeyInfo(TypedDict):
+    """Type definition for foreign key information."""
+    column: str
+    references: dict[str, str]  # {"table": "schema.table", "column": "column_name"}
+class IndexInfo(TypedDict):
+    """Type definition for index information."""
+    name: str
+    columns: list[str]  # ordered
+    unique: bool
+    type: str | None  # btree, gin, FULLTEXT, etc. None if unknown
+class SchemaInfo(TypedDict):
+    """Type definition for schema information."""
+    schema: str
+    name: str
+    type: str
+    columns: dict[str, ColumnInfo]
+    primary_keys: list[str]
+    foreign_keys: list[ForeignKeyInfo]
+    indexes: list[IndexInfo]
+class BaseDatabaseConnection(ABC):
+    """Abstract base class for database connections."""
+    def __init__(self, connection_string: str):
+        self.connection_string = connection_string
+        self._pool = None
+    @abstractmethod
+    async def get_pool(self):
+        """Get or create connection pool."""
+        pass
+    @abstractmethod
+    async def close(self):
+        """Close the connection pool."""
+        pass
+    @abstractmethod
+    async def execute_query(
+        self, query: str, *args, timeout: float | None = None
+    ) -> list[dict[str, Any]]:
+        """Execute a query and return results as list of dicts.
+        All queries run in a transaction that is rolled back at the end,
+        ensuring no changes are persisted to the database.
+        Args:
+            query: SQL query to execute
+            *args: Query parameters
+            timeout: Query timeout in seconds (overrides default_timeout)
+        """
+        pass
+class BaseSchemaIntrospector(ABC):
+    """Abstract base class for database-specific schema introspection."""
+    @abstractmethod
+    async def get_tables_info(
+        self, connection, table_pattern: str | None = None
+    ) -> dict[str, Any]:
+        """Get tables information for the specific database type."""
+        pass
+    @abstractmethod
+    async def get_columns_info(self, connection, tables: list) -> list:
+        """Get columns information for the specific database type."""
+        pass
+    @abstractmethod
+    async def get_foreign_keys_info(self, connection, tables: list) -> list:
+        """Get foreign keys information for the specific database type."""
+        pass
+    @abstractmethod
+    async def get_primary_keys_info(self, connection, tables: list) -> list:
+        """Get primary keys information for the specific database type."""
+        pass
+    @abstractmethod
+    async def get_indexes_info(self, connection, tables: list) -> list:
+        """Get indexes information for the specific database type."""
+        pass
+    @abstractmethod
+    async def list_tables_info(self, connection) -> list[dict[str, Any]]:
+        """Get list of tables with basic information."""
+        pass

sqlsaber/database/csv.py ADDED Viewed

@@ -0,0 +1,133 @@
+"""CSV database connection using DuckDB backend."""
+import asyncio
+from pathlib import Path
+from typing import Any
+from urllib.parse import parse_qs, urlparse
+import duckdb
+from .base import DEFAULT_QUERY_TIMEOUT, BaseDatabaseConnection, QueryTimeoutError
+from .duckdb import DuckDBSchemaIntrospector
+def _execute_duckdb_transaction(
+    conn: duckdb.DuckDBPyConnection, query: str, args: tuple[Any, ...]
+) -> list[dict[str, Any]]:
+    """Run a DuckDB query inside a transaction and return list of dicts."""
+    conn.execute("BEGIN TRANSACTION")
+    try:
+        if args:
+            conn.execute(query, args)
+        else:
+            conn.execute(query)
+        if conn.description is None:
+            rows: list[dict[str, Any]] = []
+        else:
+            columns = [col[0] for col in conn.description]
+            data = conn.fetchall()
+            rows = [dict(zip(columns, row)) for row in data]
+        conn.execute("ROLLBACK")
+        return rows
+    except Exception:
+        conn.execute("ROLLBACK")
+        raise
+class CSVConnection(BaseDatabaseConnection):
+    """CSV file connection using DuckDB per query."""
+    def __init__(self, connection_string: str):
+        super().__init__(connection_string)
+        raw_path = connection_string.replace("csv:///", "", 1)
+        self.csv_path = raw_path.split("?", 1)[0]
+        self.delimiter = ","
+        self.encoding = "utf-8"
+        self.has_header = True
+        parsed = urlparse(connection_string)
+        if parsed.query:
+            params = parse_qs(parsed.query)
+            self.delimiter = params.get("delimiter", [self.delimiter])[0]
+            self.encoding = params.get("encoding", [self.encoding])[0]
+            self.has_header = params.get("header", ["true"])[0].lower() == "true"
+        self.table_name = Path(self.csv_path).stem or "csv_table"
+    async def get_pool(self):
+        """CSV connections do not maintain a pool."""
+        return None
+    async def close(self):
+        """No persistent resources to close for CSV connections."""
+        pass
+    def _quote_identifier(self, identifier: str) -> str:
+        escaped = identifier.replace('"', '""')
+        return f'"{escaped}"'
+    def _quote_literal(self, value: str) -> str:
+        escaped = value.replace("'", "''")
+        return f"'{escaped}'"
+    def _normalized_encoding(self) -> str | None:
+        encoding = (self.encoding or "").strip()
+        if not encoding or encoding.lower() == "utf-8":
+            return None
+        return encoding.replace("-", "").replace("_", "").upper()
+    def _create_view(self, conn: duckdb.DuckDBPyConnection) -> None:
+        header_literal = "TRUE" if self.has_header else "FALSE"
+        option_parts = [f"HEADER={header_literal}"]
+        if self.delimiter:
+            option_parts.append(f"DELIM={self._quote_literal(self.delimiter)}")
+        encoding = self._normalized_encoding()
+        if encoding:
+            option_parts.append(f"ENCODING={self._quote_literal(encoding)}")
+        options_sql = ""
+        if option_parts:
+            options_sql = ", " + ", ".join(option_parts)
+        base_relation_sql = (
+            f"read_csv_auto({self._quote_literal(self.csv_path)}{options_sql})"
+        )
+        create_view_sql = (
+            f"CREATE VIEW {self._quote_identifier(self.table_name)} AS "
+            f"SELECT * FROM {base_relation_sql}"
+        )
+        conn.execute(create_view_sql)
+    async def execute_query(
+        self, query: str, *args, timeout: float | None = None
+    ) -> list[dict[str, Any]]:
+        effective_timeout = timeout or DEFAULT_QUERY_TIMEOUT
+        args_tuple = tuple(args) if args else tuple()
+        def _run_query() -> list[dict[str, Any]]:
+            conn = duckdb.connect(":memory:")
+            try:
+                self._create_view(conn)
+                return _execute_duckdb_transaction(conn, query, args_tuple)
+            finally:
+                conn.close()
+        try:
+            return await asyncio.wait_for(
+                asyncio.to_thread(_run_query), timeout=effective_timeout
+            )
+        except asyncio.TimeoutError as exc:
+            raise QueryTimeoutError(effective_timeout or 0) from exc
+class CSVSchemaIntrospector(DuckDBSchemaIntrospector):
+    """CSV-specific schema introspection using DuckDB backend."""
+    pass

sqlsaber/database/duckdb.py ADDED Viewed

@@ -0,0 +1,313 @@
+"""DuckDB database connection and schema introspection."""
+import asyncio
+from typing import Any
+import duckdb
+from .base import (
+    DEFAULT_QUERY_TIMEOUT,
+    BaseDatabaseConnection,
+    BaseSchemaIntrospector,
+    QueryTimeoutError,
+)
+def _execute_duckdb_transaction(
+    conn: duckdb.DuckDBPyConnection, query: str, args: tuple[Any, ...]
+) -> list[dict[str, Any]]:
+    """Run a DuckDB query inside a transaction and return list of dicts."""
+    conn.execute("BEGIN TRANSACTION")
+    try:
+        if args:
+            conn.execute(query, args)
+        else:
+            conn.execute(query)
+        if conn.description is None:
+            rows: list[dict[str, Any]] = []
+        else:
+            columns = [col[0] for col in conn.description]
+            data = conn.fetchall()
+            rows = [dict(zip(columns, row)) for row in data]
+        conn.execute("ROLLBACK")
+        return rows
+    except Exception:
+        conn.execute("ROLLBACK")
+        raise
+class DuckDBConnection(BaseDatabaseConnection):
+    """DuckDB database connection using duckdb Python API."""
+    def __init__(self, connection_string: str):
+        super().__init__(connection_string)
+        if connection_string.startswith("duckdb:///"):
+            db_path = connection_string.replace("duckdb:///", "", 1)
+        elif connection_string.startswith("duckdb://"):
+            db_path = connection_string.replace("duckdb://", "", 1)
+        else:
+            db_path = connection_string
+        self.database_path = db_path or ":memory:"
+    async def get_pool(self):
+        """DuckDB creates connections per query, return database path."""
+        return self.database_path
+    async def close(self):
+        """DuckDB connections are created per query, no persistent pool to close."""
+        pass
+    async def execute_query(
+        self, query: str, *args, timeout: float | None = None
+    ) -> list[dict[str, Any]]:
+        """Execute a query and return results as list of dicts.
+        All queries run in a transaction that is rolled back at the end,
+        ensuring no changes are persisted to the database.
+        """
+        effective_timeout = timeout or DEFAULT_QUERY_TIMEOUT
+        args_tuple = tuple(args) if args else tuple()
+        def _run_query() -> list[dict[str, Any]]:
+            conn = duckdb.connect(self.database_path)
+            try:
+                return _execute_duckdb_transaction(conn, query, args_tuple)
+            finally:
+                conn.close()
+        try:
+            return await asyncio.wait_for(
+                asyncio.to_thread(_run_query), timeout=effective_timeout
+            )
+        except asyncio.TimeoutError as exc:
+            raise QueryTimeoutError(effective_timeout or 0) from exc
+class DuckDBSchemaIntrospector(BaseSchemaIntrospector):
+    """DuckDB-specific schema introspection."""
+    async def _execute_query(
+        self,
+        connection,
+        query: str,
+        params: tuple[Any, ...] = (),
+    ) -> list[dict[str, Any]]:
+        """Run a DuckDB query on a thread and return list of dictionaries."""
+        params_tuple = tuple(params)
+        def fetch_rows(conn: duckdb.DuckDBPyConnection) -> list[dict[str, Any]]:
+            cursor = conn.execute(query, params_tuple)
+            if cursor.description is None:
+                return []
+            columns = [col[0] for col in cursor.description]
+            rows = conn.fetchall()
+            return [dict(zip(columns, row)) for row in rows]
+        # Handle CSV connections differently
+        if hasattr(connection, "execute_query") and hasattr(connection, "csv_path"):
+            return await connection.execute_query(query, *params_tuple)
+        def run_query() -> list[dict[str, Any]]:
+            conn = duckdb.connect(connection.database_path)
+            try:
+                return fetch_rows(conn)
+            finally:
+                conn.close()
+        return await asyncio.to_thread(run_query)
+    async def get_tables_info(
+        self, connection, table_pattern: str | None = None
+    ) -> list[dict[str, Any]]:
+        """Get tables information for DuckDB."""
+        where_conditions = [
+            "table_schema NOT IN ('information_schema', 'pg_catalog', 'duckdb_catalog')"
+        ]
+        params: list[Any] = []
+        if table_pattern:
+            if "." in table_pattern:
+                schema_pattern, table_name_pattern = table_pattern.split(".", 1)
+                where_conditions.append("(table_schema LIKE ? AND table_name LIKE ?)")
+                params.extend([schema_pattern, table_name_pattern])
+            else:
+                where_conditions.append(
+                    "(table_name LIKE ? OR table_schema || '.' || table_name LIKE ?)"
+                )
+                params.extend([table_pattern, table_pattern])
+        query = f"""
+            SELECT
+                table_schema,
+                table_name,
+                table_type
+            FROM information_schema.tables
+            WHERE {" AND ".join(where_conditions)}
+            ORDER BY table_schema, table_name;
+        """
+        return await self._execute_query(connection, query, tuple(params))
+    async def get_columns_info(self, connection, tables: list) -> list[dict[str, Any]]:
+        """Get columns information for DuckDB."""
+        if not tables:
+            return []
+        table_filters = []
+        for table in tables:
+            table_filters.append("(table_schema = ? AND table_name = ?)")
+        params: list[Any] = []
+        for table in tables:
+            params.extend([table["table_schema"], table["table_name"]])
+        query = f"""
+            SELECT
+                table_schema,
+                table_name,
+                column_name,
+                data_type,
+                is_nullable,
+                column_default,
+                character_maximum_length,
+                numeric_precision,
+                numeric_scale
+            FROM information_schema.columns
+            WHERE {" OR ".join(table_filters)}
+            ORDER BY table_schema, table_name, ordinal_position;
+        """
+        return await self._execute_query(connection, query, tuple(params))
+    async def get_foreign_keys_info(
+        self, connection, tables: list
+    ) -> list[dict[str, Any]]:
+        """Get foreign keys information for DuckDB."""
+        if not tables:
+            return []
+        table_filters = []
+        params: list[Any] = []
+        for table in tables:
+            table_filters.append("(kcu.table_schema = ? AND kcu.table_name = ?)")
+            params.extend([table["table_schema"], table["table_name"]])
+        query = f"""
+            SELECT
+                kcu.table_schema,
+                kcu.table_name,
+                kcu.column_name,
+                ccu.table_schema AS foreign_table_schema,
+                ccu.table_name AS foreign_table_name,
+                ccu.column_name AS foreign_column_name
+            FROM information_schema.referential_constraints AS rc
+            JOIN information_schema.key_column_usage AS kcu
+                ON rc.constraint_schema = kcu.constraint_schema
+                AND rc.constraint_name = kcu.constraint_name
+            JOIN information_schema.key_column_usage AS ccu
+                ON rc.unique_constraint_schema = ccu.constraint_schema
+                AND rc.unique_constraint_name = ccu.constraint_name
+                AND ccu.ordinal_position = kcu.position_in_unique_constraint
+            WHERE {" OR ".join(table_filters)}
+            ORDER BY kcu.table_schema, kcu.table_name, kcu.ordinal_position;
+        """
+        return await self._execute_query(connection, query, tuple(params))
+    async def get_primary_keys_info(
+        self, connection, tables: list
+    ) -> list[dict[str, Any]]:
+        """Get primary keys information for DuckDB."""
+        if not tables:
+            return []
+        table_filters = []
+        params: list[Any] = []
+        for table in tables:
+            table_filters.append("(tc.table_schema = ? AND tc.table_name = ?)")
+            params.extend([table["table_schema"], table["table_name"]])
+        query = f"""
+            SELECT
+                tc.table_schema,
+                tc.table_name,
+                kcu.column_name
+            FROM information_schema.table_constraints AS tc
+            JOIN information_schema.key_column_usage AS kcu
+                ON tc.constraint_name = kcu.constraint_name
+                AND tc.constraint_schema = kcu.constraint_schema
+            WHERE tc.constraint_type = 'PRIMARY KEY'
+                AND ({" OR ".join(table_filters)})
+            ORDER BY tc.table_schema, tc.table_name, kcu.ordinal_position;
+        """
+        return await self._execute_query(connection, query, tuple(params))
+    async def get_indexes_info(self, connection, tables: list) -> list[dict[str, Any]]:
+        """Get indexes information for DuckDB."""
+        if not tables:
+            return []
+        indexes: list[dict[str, Any]] = []
+        for table in tables:
+            schema = table["table_schema"]
+            table_name = table["table_name"]
+            query = """
+                SELECT
+                    schema_name,
+                    table_name,
+                    index_name,
+                    sql
+                FROM duckdb_indexes()
+                WHERE schema_name = ? AND table_name = ?;
+            """
+            rows = await self._execute_query(connection, query, (schema, table_name))
+            for row in rows:
+                sql_text = (row.get("sql") or "").strip()
+                upper_sql = sql_text.upper()
+                unique = "UNIQUE" in upper_sql.split("(")[0]
+                columns: list[str] = []
+                if "(" in sql_text and ")" in sql_text:
+                    column_section = sql_text[
+                        sql_text.find("(") + 1 : sql_text.rfind(")")
+                    ]
+                    columns = [
+                        col.strip().strip('"')
+                        for col in column_section.split(",")
+                        if col.strip()
+                    ]
+                indexes.append(
+                    {
+                        "table_schema": row.get("schema_name") or schema or "main",
+                        "table_name": row.get("table_name") or table_name,
+                        "index_name": row.get("index_name"),
+                        "is_unique": unique,
+                        "index_type": None,
+                        "column_names": columns,
+                    }
+                )
+        return indexes
+    async def list_tables_info(self, connection) -> list[dict[str, Any]]:
+        """Get list of tables with basic information for DuckDB."""
+        query = """
+            SELECT
+                table_schema,
+                table_name,
+                table_type
+            FROM information_schema.tables
+            WHERE table_schema NOT IN ('information_schema', 'pg_catalog', 'duckdb_catalog')
+            ORDER BY table_schema, table_name;
+        """
+        return await self._execute_query(connection, query)

sqlsaber 0.25.0__py3-none-any.whl → 0.27.0__py3-none-any.whl

Potentially problematic release.

sqlsaber 0.25.0py3-none-any.whl → 0.27.0py3-none-any.whl