PyPI - sqlsaber - Versions diffs - 0.23.0__py3-none-any.whl → 0.25.0__py3-none-any.whl - Mend

sqlsaber 0.23.0py3-none-any.whl → 0.25.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of sqlsaber might be problematic. Click here for more details.

Files changed (17) hide show

sqlsaber/agents/base.py +4 -1
sqlsaber/agents/pydantic_ai_agent.py +4 -1
sqlsaber/cli/commands.py +19 -11
sqlsaber/cli/database.py +17 -6
sqlsaber/cli/display.py +49 -19
sqlsaber/cli/interactive.py +6 -1
sqlsaber/cli/threads.py +41 -18
sqlsaber/config/database.py +3 -1
sqlsaber/database/connection.py +123 -99
sqlsaber/database/resolver.py +7 -3
sqlsaber/database/schema.py +377 -1
sqlsaber/tools/sql_tools.py +6 -0
{sqlsaber-0.23.0.dist-info → sqlsaber-0.25.0.dist-info}/METADATA +4 -3
{sqlsaber-0.23.0.dist-info → sqlsaber-0.25.0.dist-info}/RECORD +17 -17
{sqlsaber-0.23.0.dist-info → sqlsaber-0.25.0.dist-info}/WHEEL +0 -0
{sqlsaber-0.23.0.dist-info → sqlsaber-0.25.0.dist-info}/entry_points.txt +0 -0
{sqlsaber-0.23.0.dist-info → sqlsaber-0.25.0.dist-info}/licenses/LICENSE +0 -0

sqlsaber/database/connection.py CHANGED Viewed

@@ -10,6 +10,7 @@ from urllib.parse import parse_qs, urlparse
 import aiomysql
 import aiosqlite
 import asyncpg
+import duckdb
 # Default query timeout to prevent runaway queries
 DEFAULT_QUERY_TIMEOUT = 30.0  # seconds
@@ -351,115 +352,143 @@ class SQLiteConnection(BaseDatabaseConnection):
                 await conn.rollback()
+def _execute_duckdb_transaction(
+    conn: duckdb.DuckDBPyConnection, query: str, args: tuple[Any, ...]
+) -> list[dict[str, Any]]:
+    """Run a DuckDB query inside a transaction and return list of dicts."""
+    conn.execute("BEGIN TRANSACTION")
+    try:
+        if args:
+            conn.execute(query, args)
+        else:
+            conn.execute(query)
+        if conn.description is None:
+            rows: list[dict[str, Any]] = []
+        else:
+            columns = [col[0] for col in conn.description]
+            data = conn.fetchall()
+            rows = [dict(zip(columns, row)) for row in data]
+        conn.execute("ROLLBACK")
+        return rows
+    except Exception:
+        conn.execute("ROLLBACK")
+        raise
 class CSVConnection(BaseDatabaseConnection):
-    """CSV file connection using in-memory SQLite database."""
+    """CSV file connection using DuckDB per query."""
     def __init__(self, connection_string: str):
         super().__init__(connection_string)
-        # Parse CSV file path from connection string
-        self.csv_path = connection_string.replace("csv:///", "")
+        raw_path = connection_string.replace("csv:///", "", 1)
+        self.csv_path = raw_path.split("?", 1)[0]
-        # CSV parsing options
         self.delimiter = ","
         self.encoding = "utf-8"
         self.has_header = True
-        # Parse additional options from connection string
         parsed = urlparse(connection_string)
         if parsed.query:
             params = parse_qs(parsed.query)
-            self.delimiter = params.get("delimiter", [","])[0]
-            self.encoding = params.get("encoding", ["utf-8"])[0]
+            self.delimiter = params.get("delimiter", [self.delimiter])[0]
+            self.encoding = params.get("encoding", [self.encoding])[0]
             self.has_header = params.get("header", ["true"])[0].lower() == "true"
-        # Table name derived from filename
-        self.table_name = Path(self.csv_path).stem
-        # Initialize connection and flag to track if CSV is loaded
-        self._conn = None
-        self._csv_loaded = False
+        self.table_name = Path(self.csv_path).stem or "csv_table"
     async def get_pool(self):
-        """Get or create the in-memory database connection."""
-        if self._conn is None:
-            self._conn = await aiosqlite.connect(":memory:")
-            self._conn.row_factory = aiosqlite.Row
-            await self._load_csv_data()
-        return self._conn
+        """CSV connections do not maintain a pool."""
+        return None
     async def close(self):
-        """Close the database connection."""
-        if self._conn:
-            await self._conn.close()
-            self._conn = None
-            self._csv_loaded = False
+        """No persistent resources to close for CSV connections."""
+        pass
+    def _quote_identifier(self, identifier: str) -> str:
+        escaped = identifier.replace('"', '""')
+        return f'"{escaped}"'
+    def _quote_literal(self, value: str) -> str:
+        escaped = value.replace("'", "''")
+        return f"'{escaped}'"
+    def _normalized_encoding(self) -> str | None:
+        encoding = (self.encoding or "").strip()
+        if not encoding or encoding.lower() == "utf-8":
+            return None
+        return encoding.replace("-", "").replace("_", "").upper()
+    def _create_view(self, conn: duckdb.DuckDBPyConnection) -> None:
+        header_literal = "TRUE" if self.has_header else "FALSE"
+        option_parts = [f"HEADER={header_literal}"]
+        if self.delimiter:
+            option_parts.append(f"DELIM={self._quote_literal(self.delimiter)}")
+        encoding = self._normalized_encoding()
+        if encoding:
+            option_parts.append(f"ENCODING={self._quote_literal(encoding)}")
+        options_sql = ""
+        if option_parts:
+            options_sql = ", " + ", ".join(option_parts)
+        base_relation_sql = (
+            f"read_csv_auto({self._quote_literal(self.csv_path)}{options_sql})"
+        )
+        create_view_sql = (
+            f"CREATE VIEW {self._quote_identifier(self.table_name)} AS "
+            f"SELECT * FROM {base_relation_sql}"
+        )
+        conn.execute(create_view_sql)
+    async def execute_query(
+        self, query: str, *args, timeout: float | None = None
+    ) -> list[dict[str, Any]]:
+        effective_timeout = timeout or DEFAULT_QUERY_TIMEOUT
+        args_tuple = tuple(args) if args else tuple()
-    async def _load_csv_data(self):
-        """Load CSV data into the in-memory SQLite database."""
-        if self._csv_loaded or not self._conn:
-            return
+        def _run_query() -> list[dict[str, Any]]:
+            conn = duckdb.connect(":memory:")
+            try:
+                self._create_view(conn)
+                return _execute_duckdb_transaction(conn, query, args_tuple)
+            finally:
+                conn.close()
         try:
-            # Import pandas only when needed for CSV operations
-            # This improves CLI load times
-            import pandas as pd
-            # Read CSV file using pandas
-            df = pd.read_csv(
-                self.csv_path,
-                delimiter=self.delimiter,
-                encoding=self.encoding,
-                header=0 if self.has_header else None,
+            return await asyncio.wait_for(
+                asyncio.to_thread(_run_query), timeout=effective_timeout
             )
+        except asyncio.TimeoutError as exc:
+            raise QueryTimeoutError(effective_timeout or 0) from exc
-            # If no header, create column names
-            if not self.has_header:
-                df.columns = [f"column_{i}" for i in range(len(df.columns))]
-            # Create table with proper column types
-            columns_sql = []
-            for col in df.columns:
-                # Infer SQLite type from pandas dtype
-                dtype = df[col].dtype
-                if pd.api.types.is_integer_dtype(dtype):
-                    sql_type = "INTEGER"
-                elif pd.api.types.is_float_dtype(dtype):
-                    sql_type = "REAL"
-                elif pd.api.types.is_bool_dtype(dtype):
-                    sql_type = "INTEGER"  # SQLite doesn't have BOOLEAN
-                else:
-                    sql_type = "TEXT"
-                columns_sql.append(f'"{col}" {sql_type}')
+class DuckDBConnection(BaseDatabaseConnection):
+    """DuckDB database connection using duckdb Python API."""
-            create_table_sql = (
-                f'CREATE TABLE "{self.table_name}" ({", ".join(columns_sql)})'
-            )
-            await self._conn.execute(create_table_sql)
-            # Insert data row by row
-            placeholders = ", ".join(["?" for _ in df.columns])
-            insert_sql = f'INSERT INTO "{self.table_name}" VALUES ({placeholders})'
-            for _, row in df.iterrows():
-                # Convert pandas values to Python native types
-                values = []
-                for val in row:
-                    if pd.isna(val):
-                        values.append(None)
-                    elif isinstance(val, (pd.Timestamp, pd.Timedelta)):
-                        values.append(str(val))
-                    else:
-                        values.append(val)
+    def __init__(self, connection_string: str):
+        super().__init__(connection_string)
+        if connection_string.startswith("duckdb:///"):
+            db_path = connection_string.replace("duckdb:///", "", 1)
+        elif connection_string.startswith("duckdb://"):
+            db_path = connection_string.replace("duckdb://", "", 1)
+        else:
+            db_path = connection_string
-                await self._conn.execute(insert_sql, values)
+        self.database_path = db_path or ":memory:"
-            await self._conn.commit()
-            self._csv_loaded = True
+    async def get_pool(self):
+        """DuckDB creates connections per query, return database path."""
+        return self.database_path
-        except Exception as e:
-            raise ValueError(f"Error loading CSV file '{self.csv_path}': {str(e)}")
+    async def close(self):
+        """DuckDB connections are created per query, no persistent pool to close."""
+        pass
     async def execute_query(
         self, query: str, *args, timeout: float | None = None
@@ -470,29 +499,22 @@ class CSVConnection(BaseDatabaseConnection):
         ensuring no changes are persisted to the database.
         """
         effective_timeout = timeout or DEFAULT_QUERY_TIMEOUT
-        conn = await self.get_pool()
-        # Start transaction
-        await conn.execute("BEGIN")
-        try:
-            # Execute query with client-side timeout (CSV uses in-memory SQLite)
-            if effective_timeout:
-                cursor = await asyncio.wait_for(
-                    conn.execute(query, args if args else ()), timeout=effective_timeout
-                )
-                rows = await asyncio.wait_for(
-                    cursor.fetchall(), timeout=effective_timeout
-                )
-            else:
-                cursor = await conn.execute(query, args if args else ())
-                rows = await cursor.fetchall()
+        args_tuple = tuple(args) if args else tuple()
-            return [dict(row) for row in rows]
+        def _run_query() -> list[dict[str, Any]]:
+            conn = duckdb.connect(self.database_path)
+            try:
+                return _execute_duckdb_transaction(conn, query, args_tuple)
+            finally:
+                conn.close()
+        try:
+            return await asyncio.wait_for(
+                asyncio.to_thread(_run_query), timeout=effective_timeout
+            )
         except asyncio.TimeoutError as exc:
             raise QueryTimeoutError(effective_timeout or 0) from exc
-        finally:
-            # Always rollback to ensure no changes are committed
-            await conn.rollback()
 def DatabaseConnection(connection_string: str) -> BaseDatabaseConnection:
@@ -503,6 +525,8 @@ def DatabaseConnection(connection_string: str) -> BaseDatabaseConnection:
         return MySQLConnection(connection_string)
     elif connection_string.startswith("sqlite:///"):
         return SQLiteConnection(connection_string)
+    elif connection_string.startswith("duckdb://"):
+        return DuckDBConnection(connection_string)
     elif connection_string.startswith("csv:///"):
         return CSVConnection(connection_string)
     else:

sqlsaber/database/resolver.py CHANGED Viewed

@@ -23,7 +23,7 @@ class ResolvedDatabase:
     connection_string: str  # Canonical connection string for DatabaseConnection factory
-SUPPORTED_SCHEMES = {"postgresql", "mysql", "sqlite", "csv"}
+SUPPORTED_SCHEMES = {"postgresql", "mysql", "sqlite", "duckdb", "csv"}
 def _is_connection_string(s: str) -> bool:
@@ -67,8 +67,8 @@ def resolve_database(
         scheme = urlparse(spec).scheme
         if scheme in {"postgresql", "mysql"}:
             db_name = urlparse(spec).path.lstrip("/") or "database"
-        elif scheme in {"sqlite", "csv"}:
-            db_name = Path(urlparse(spec).path).stem
+        elif scheme in {"sqlite", "duckdb", "csv"}:
+            db_name = Path(urlparse(spec).path).stem or "database"
         else:  # should not happen because of SUPPORTED_SCHEMES
             db_name = "database"
         return ResolvedDatabase(name=db_name, connection_string=spec)
@@ -83,6 +83,10 @@ def resolve_database(
         if not path.exists():
             raise DatabaseResolutionError(f"SQLite file '{spec}' not found.")
         return ResolvedDatabase(name=path.stem, connection_string=f"sqlite:///{path}")
+    if path.suffix.lower() in {".duckdb", ".ddb"}:
+        if not path.exists():
+            raise DatabaseResolutionError(f"DuckDB file '{spec}' not found.")
+        return ResolvedDatabase(name=path.stem, connection_string=f"duckdb:///{path}")
     # 3. Must be a configured name
     db_cfg: DatabaseConfig | None = config_mgr.get_database(spec)

sqlsaber 0.23.0__py3-none-any.whl → 0.25.0__py3-none-any.whl

Potentially problematic release.

sqlsaber 0.23.0py3-none-any.whl → 0.25.0py3-none-any.whl