sqlsaber 0.24.0__py3-none-any.whl → 0.25.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sqlsaber might be problematic. Click here for more details.

sqlsaber/agents/base.py CHANGED
@@ -8,6 +8,7 @@ from typing import Any, AsyncIterator
8
8
  from sqlsaber.database.connection import (
9
9
  BaseDatabaseConnection,
10
10
  CSVConnection,
11
+ DuckDBConnection,
11
12
  MySQLConnection,
12
13
  PostgreSQLConnection,
13
14
  SQLiteConnection,
@@ -51,7 +52,9 @@ class BaseSQLAgent(ABC):
51
52
  elif isinstance(self.db, SQLiteConnection):
52
53
  return "SQLite"
53
54
  elif isinstance(self.db, CSVConnection):
54
- return "SQLite" # we convert csv to in-memory sqlite
55
+ return "DuckDB"
56
+ elif isinstance(self.db, DuckDBConnection):
57
+ return "DuckDB"
55
58
  else:
56
59
  return "database" # Fallback
57
60
 
@@ -17,6 +17,7 @@ from sqlsaber.config.settings import Config
17
17
  from sqlsaber.database.connection import (
18
18
  BaseDatabaseConnection,
19
19
  CSVConnection,
20
+ DuckDBConnection,
20
21
  MySQLConnection,
21
22
  PostgreSQLConnection,
22
23
  SQLiteConnection,
@@ -169,7 +170,9 @@ def _get_database_type_name(db: BaseDatabaseConnection) -> str:
169
170
  return "MySQL"
170
171
  elif isinstance(db, SQLiteConnection):
171
172
  return "SQLite"
173
+ elif isinstance(db, DuckDBConnection):
174
+ return "DuckDB"
172
175
  elif isinstance(db, CSVConnection):
173
- return "SQLite"
176
+ return "DuckDB"
174
177
  else:
175
178
  return "database"
sqlsaber/cli/commands.py CHANGED
@@ -46,7 +46,7 @@ def meta_handler(
46
46
  str | None,
47
47
  cyclopts.Parameter(
48
48
  ["--database", "-d"],
49
- help="Database connection name, file path (CSV/SQLite), or connection string (postgresql://, mysql://) (uses default if not specified)",
49
+ help="Database connection name, file path (CSV/SQLite/DuckDB), or connection string (postgresql://, mysql://, duckdb://) (uses default if not specified)",
50
50
  ),
51
51
  ] = None,
52
52
  ):
@@ -59,8 +59,10 @@ def meta_handler(
59
59
  saber -d mydb "show me users" # Run a query with specific database
60
60
  saber -d data.csv "show me users" # Run a query with ad-hoc CSV file
61
61
  saber -d data.db "show me users" # Run a query with ad-hoc SQLite file
62
+ saber -d data.duckdb "show me users" # Run a query with ad-hoc DuckDB file
62
63
  saber -d "postgresql://user:pass@host:5432/db" "show users" # PostgreSQL connection string
63
64
  saber -d "mysql://user:pass@host:3306/db" "show users" # MySQL connection string
65
+ saber -d "duckdb:///data.duckdb" "show users" # DuckDB connection string
64
66
  echo "show me all users" | saber # Read query from stdin
65
67
  cat query.txt | saber # Read query from file via stdin
66
68
  """
@@ -80,7 +82,7 @@ def query(
80
82
  str | None,
81
83
  cyclopts.Parameter(
82
84
  ["--database", "-d"],
83
- help="Database connection name, file path (CSV/SQLite), or connection string (postgresql://, mysql://) (uses default if not specified)",
85
+ help="Database connection name, file path (CSV/SQLite/DuckDB), or connection string (postgresql://, mysql://, duckdb://) (uses default if not specified)",
84
86
  ),
85
87
  ] = None,
86
88
  ):
@@ -97,8 +99,10 @@ def query(
97
99
  saber "show me all users" # Run a single query
98
100
  saber -d data.csv "show users" # Run a query with ad-hoc CSV file
99
101
  saber -d data.db "show users" # Run a query with ad-hoc SQLite file
102
+ saber -d data.duckdb "show users" # Run a query with ad-hoc DuckDB file
100
103
  saber -d "postgresql://user:pass@host:5432/db" "show users" # PostgreSQL connection string
101
104
  saber -d "mysql://user:pass@host:3306/db" "show users" # MySQL connection string
105
+ saber -d "duckdb:///data.duckdb" "show users" # DuckDB connection string
102
106
  echo "show me all users" | saber # Read query from stdin
103
107
  """
104
108
 
@@ -111,6 +115,7 @@ def query(
111
115
  from sqlsaber.database.connection import (
112
116
  CSVConnection,
113
117
  DatabaseConnection,
118
+ DuckDBConnection,
114
119
  MySQLConnection,
115
120
  PostgreSQLConnection,
116
121
  SQLiteConnection,
@@ -149,15 +154,18 @@ def query(
149
154
  # Single query mode with streaming
150
155
  streaming_handler = StreamingQueryHandler(console)
151
156
  # Compute DB type for the greeting line
152
- db_type = (
153
- "PostgreSQL"
154
- if isinstance(db_conn, PostgreSQLConnection)
155
- else "MySQL"
156
- if isinstance(db_conn, MySQLConnection)
157
- else "SQLite"
158
- if isinstance(db_conn, (SQLiteConnection, CSVConnection))
159
- else "database"
160
- )
157
+ if isinstance(db_conn, PostgreSQLConnection):
158
+ db_type = "PostgreSQL"
159
+ elif isinstance(db_conn, MySQLConnection):
160
+ db_type = "MySQL"
161
+ elif isinstance(db_conn, DuckDBConnection):
162
+ db_type = "DuckDB"
163
+ elif isinstance(db_conn, SQLiteConnection):
164
+ db_type = "SQLite"
165
+ elif isinstance(db_conn, CSVConnection):
166
+ db_type = "DuckDB"
167
+ else:
168
+ db_type = "database"
161
169
  console.print(
162
170
  f"[bold blue]Connected to:[/bold blue] {db_name} ({db_type})\n"
163
171
  )
sqlsaber/cli/database.py CHANGED
@@ -31,7 +31,7 @@ def add(
31
31
  str,
32
32
  cyclopts.Parameter(
33
33
  ["--type", "-t"],
34
- help="Database type (postgresql, mysql, sqlite)",
34
+ help="Database type (postgresql, mysql, sqlite, duckdb)",
35
35
  ),
36
36
  ] = "postgresql",
37
37
  host: Annotated[
@@ -87,17 +87,17 @@ def add(
87
87
  if not type or type == "postgresql":
88
88
  type = questionary.select(
89
89
  "Database type:",
90
- choices=["postgresql", "mysql", "sqlite"],
90
+ choices=["postgresql", "mysql", "sqlite", "duckdb"],
91
91
  default="postgresql",
92
92
  ).ask()
93
93
 
94
- if type == "sqlite":
95
- # SQLite only needs database path
94
+ if type in {"sqlite", "duckdb"}:
95
+ # SQLite/DuckDB only need database file path
96
96
  database = database or questionary.path("Database file path:").ask()
97
97
  database = str(Path(database).expanduser().resolve())
98
98
  host = "localhost"
99
99
  port = 0
100
- username = "sqlite"
100
+ username = type
101
101
  password = ""
102
102
  else:
103
103
  # PostgreSQL/MySQL need connection details
@@ -182,6 +182,17 @@ def add(
182
182
  port = 0
183
183
  username = "sqlite"
184
184
  password = ""
185
+ elif type == "duckdb":
186
+ if not database:
187
+ console.print(
188
+ "[bold red]Error:[/bold red] Database file path is required for DuckDB"
189
+ )
190
+ sys.exit(1)
191
+ database = str(Path(database).expanduser().resolve())
192
+ host = "localhost"
193
+ port = 0
194
+ username = "duckdb"
195
+ password = ""
185
196
  else:
186
197
  if not all([host, database, username]):
187
198
  console.print(
@@ -264,7 +275,7 @@ def list():
264
275
  if db.ssl_ca or db.ssl_cert:
265
276
  ssl_status += " (certs)"
266
277
  else:
267
- ssl_status = "disabled" if db.type != "sqlite" else "N/A"
278
+ ssl_status = "disabled" if db.type not in {"sqlite", "duckdb"} else "N/A"
268
279
 
269
280
  table.add_row(
270
281
  db.name,
@@ -23,6 +23,7 @@ from sqlsaber.cli.display import DisplayManager
23
23
  from sqlsaber.cli.streaming import StreamingQueryHandler
24
24
  from sqlsaber.database.connection import (
25
25
  CSVConnection,
26
+ DuckDBConnection,
26
27
  MySQLConnection,
27
28
  PostgreSQLConnection,
28
29
  SQLiteConnection,
@@ -85,8 +86,12 @@ class InteractiveSession:
85
86
  if isinstance(self.db_conn, PostgreSQLConnection)
86
87
  else "MySQL"
87
88
  if isinstance(self.db_conn, MySQLConnection)
89
+ else "DuckDB"
90
+ if isinstance(self.db_conn, DuckDBConnection)
91
+ else "DuckDB"
92
+ if isinstance(self.db_conn, CSVConnection)
88
93
  else "SQLite"
89
- if isinstance(self.db_conn, (SQLiteConnection, CSVConnection))
94
+ if isinstance(self.db_conn, SQLiteConnection)
90
95
  else "database"
91
96
  )
92
97
 
@@ -18,7 +18,7 @@ class DatabaseConfig:
18
18
  """Database connection configuration."""
19
19
 
20
20
  name: str
21
- type: str # postgresql, mysql, sqlite, csv
21
+ type: str # postgresql, mysql, sqlite, duckdb, csv
22
22
  host: str | None
23
23
  port: int | None
24
24
  database: str
@@ -90,6 +90,8 @@ class DatabaseConfig:
90
90
 
91
91
  elif self.type == "sqlite":
92
92
  return f"sqlite:///{self.database}"
93
+ elif self.type == "duckdb":
94
+ return f"duckdb:///{self.database}"
93
95
  elif self.type == "csv":
94
96
  # For CSV files, database field contains the file path
95
97
  base_url = f"csv:///{self.database}"
@@ -10,6 +10,7 @@ from urllib.parse import parse_qs, urlparse
10
10
  import aiomysql
11
11
  import aiosqlite
12
12
  import asyncpg
13
+ import duckdb
13
14
 
14
15
  # Default query timeout to prevent runaway queries
15
16
  DEFAULT_QUERY_TIMEOUT = 30.0 # seconds
@@ -351,115 +352,143 @@ class SQLiteConnection(BaseDatabaseConnection):
351
352
  await conn.rollback()
352
353
 
353
354
 
355
+ def _execute_duckdb_transaction(
356
+ conn: duckdb.DuckDBPyConnection, query: str, args: tuple[Any, ...]
357
+ ) -> list[dict[str, Any]]:
358
+ """Run a DuckDB query inside a transaction and return list of dicts."""
359
+ conn.execute("BEGIN TRANSACTION")
360
+ try:
361
+ if args:
362
+ conn.execute(query, args)
363
+ else:
364
+ conn.execute(query)
365
+
366
+ if conn.description is None:
367
+ rows: list[dict[str, Any]] = []
368
+ else:
369
+ columns = [col[0] for col in conn.description]
370
+ data = conn.fetchall()
371
+ rows = [dict(zip(columns, row)) for row in data]
372
+
373
+ conn.execute("ROLLBACK")
374
+ return rows
375
+ except Exception:
376
+ conn.execute("ROLLBACK")
377
+ raise
378
+
379
+
354
380
  class CSVConnection(BaseDatabaseConnection):
355
- """CSV file connection using in-memory SQLite database."""
381
+ """CSV file connection using DuckDB per query."""
356
382
 
357
383
  def __init__(self, connection_string: str):
358
384
  super().__init__(connection_string)
359
385
 
360
- # Parse CSV file path from connection string
361
- self.csv_path = connection_string.replace("csv:///", "")
386
+ raw_path = connection_string.replace("csv:///", "", 1)
387
+ self.csv_path = raw_path.split("?", 1)[0]
362
388
 
363
- # CSV parsing options
364
389
  self.delimiter = ","
365
390
  self.encoding = "utf-8"
366
391
  self.has_header = True
367
392
 
368
- # Parse additional options from connection string
369
393
  parsed = urlparse(connection_string)
370
394
  if parsed.query:
371
395
  params = parse_qs(parsed.query)
372
- self.delimiter = params.get("delimiter", [","])[0]
373
- self.encoding = params.get("encoding", ["utf-8"])[0]
396
+ self.delimiter = params.get("delimiter", [self.delimiter])[0]
397
+ self.encoding = params.get("encoding", [self.encoding])[0]
374
398
  self.has_header = params.get("header", ["true"])[0].lower() == "true"
375
399
 
376
- # Table name derived from filename
377
- self.table_name = Path(self.csv_path).stem
378
-
379
- # Initialize connection and flag to track if CSV is loaded
380
- self._conn = None
381
- self._csv_loaded = False
400
+ self.table_name = Path(self.csv_path).stem or "csv_table"
382
401
 
383
402
  async def get_pool(self):
384
- """Get or create the in-memory database connection."""
385
- if self._conn is None:
386
- self._conn = await aiosqlite.connect(":memory:")
387
- self._conn.row_factory = aiosqlite.Row
388
- await self._load_csv_data()
389
- return self._conn
403
+ """CSV connections do not maintain a pool."""
404
+ return None
390
405
 
391
406
  async def close(self):
392
- """Close the database connection."""
393
- if self._conn:
394
- await self._conn.close()
395
- self._conn = None
396
- self._csv_loaded = False
407
+ """No persistent resources to close for CSV connections."""
408
+ pass
409
+
410
+ def _quote_identifier(self, identifier: str) -> str:
411
+ escaped = identifier.replace('"', '""')
412
+ return f'"{escaped}"'
413
+
414
+ def _quote_literal(self, value: str) -> str:
415
+ escaped = value.replace("'", "''")
416
+ return f"'{escaped}'"
417
+
418
+ def _normalized_encoding(self) -> str | None:
419
+ encoding = (self.encoding or "").strip()
420
+ if not encoding or encoding.lower() == "utf-8":
421
+ return None
422
+ return encoding.replace("-", "").replace("_", "").upper()
423
+
424
+ def _create_view(self, conn: duckdb.DuckDBPyConnection) -> None:
425
+ header_literal = "TRUE" if self.has_header else "FALSE"
426
+ option_parts = [f"HEADER={header_literal}"]
427
+
428
+ if self.delimiter:
429
+ option_parts.append(f"DELIM={self._quote_literal(self.delimiter)}")
430
+
431
+ encoding = self._normalized_encoding()
432
+ if encoding:
433
+ option_parts.append(f"ENCODING={self._quote_literal(encoding)}")
434
+
435
+ options_sql = ""
436
+ if option_parts:
437
+ options_sql = ", " + ", ".join(option_parts)
438
+
439
+ base_relation_sql = (
440
+ f"read_csv_auto({self._quote_literal(self.csv_path)}{options_sql})"
441
+ )
442
+
443
+ create_view_sql = (
444
+ f"CREATE VIEW {self._quote_identifier(self.table_name)} AS "
445
+ f"SELECT * FROM {base_relation_sql}"
446
+ )
447
+ conn.execute(create_view_sql)
448
+
449
+ async def execute_query(
450
+ self, query: str, *args, timeout: float | None = None
451
+ ) -> list[dict[str, Any]]:
452
+ effective_timeout = timeout or DEFAULT_QUERY_TIMEOUT
453
+ args_tuple = tuple(args) if args else tuple()
397
454
 
398
- async def _load_csv_data(self):
399
- """Load CSV data into the in-memory SQLite database."""
400
- if self._csv_loaded or not self._conn:
401
- return
455
+ def _run_query() -> list[dict[str, Any]]:
456
+ conn = duckdb.connect(":memory:")
457
+ try:
458
+ self._create_view(conn)
459
+ return _execute_duckdb_transaction(conn, query, args_tuple)
460
+ finally:
461
+ conn.close()
402
462
 
403
463
  try:
404
- # Import pandas only when needed for CSV operations
405
- # This improves CLI load times
406
- import pandas as pd
407
-
408
- # Read CSV file using pandas
409
- df = pd.read_csv(
410
- self.csv_path,
411
- delimiter=self.delimiter,
412
- encoding=self.encoding,
413
- header=0 if self.has_header else None,
464
+ return await asyncio.wait_for(
465
+ asyncio.to_thread(_run_query), timeout=effective_timeout
414
466
  )
467
+ except asyncio.TimeoutError as exc:
468
+ raise QueryTimeoutError(effective_timeout or 0) from exc
415
469
 
416
- # If no header, create column names
417
- if not self.has_header:
418
- df.columns = [f"column_{i}" for i in range(len(df.columns))]
419
-
420
- # Create table with proper column types
421
- columns_sql = []
422
- for col in df.columns:
423
- # Infer SQLite type from pandas dtype
424
- dtype = df[col].dtype
425
- if pd.api.types.is_integer_dtype(dtype):
426
- sql_type = "INTEGER"
427
- elif pd.api.types.is_float_dtype(dtype):
428
- sql_type = "REAL"
429
- elif pd.api.types.is_bool_dtype(dtype):
430
- sql_type = "INTEGER" # SQLite doesn't have BOOLEAN
431
- else:
432
- sql_type = "TEXT"
433
470
 
434
- columns_sql.append(f'"{col}" {sql_type}')
471
+ class DuckDBConnection(BaseDatabaseConnection):
472
+ """DuckDB database connection using duckdb Python API."""
435
473
 
436
- create_table_sql = (
437
- f'CREATE TABLE "{self.table_name}" ({", ".join(columns_sql)})'
438
- )
439
- await self._conn.execute(create_table_sql)
440
-
441
- # Insert data row by row
442
- placeholders = ", ".join(["?" for _ in df.columns])
443
- insert_sql = f'INSERT INTO "{self.table_name}" VALUES ({placeholders})'
444
-
445
- for _, row in df.iterrows():
446
- # Convert pandas values to Python native types
447
- values = []
448
- for val in row:
449
- if pd.isna(val):
450
- values.append(None)
451
- elif isinstance(val, (pd.Timestamp, pd.Timedelta)):
452
- values.append(str(val))
453
- else:
454
- values.append(val)
474
+ def __init__(self, connection_string: str):
475
+ super().__init__(connection_string)
476
+ if connection_string.startswith("duckdb:///"):
477
+ db_path = connection_string.replace("duckdb:///", "", 1)
478
+ elif connection_string.startswith("duckdb://"):
479
+ db_path = connection_string.replace("duckdb://", "", 1)
480
+ else:
481
+ db_path = connection_string
455
482
 
456
- await self._conn.execute(insert_sql, values)
483
+ self.database_path = db_path or ":memory:"
457
484
 
458
- await self._conn.commit()
459
- self._csv_loaded = True
485
+ async def get_pool(self):
486
+ """DuckDB creates connections per query, return database path."""
487
+ return self.database_path
460
488
 
461
- except Exception as e:
462
- raise ValueError(f"Error loading CSV file '{self.csv_path}': {str(e)}")
489
+ async def close(self):
490
+ """DuckDB connections are created per query, no persistent pool to close."""
491
+ pass
463
492
 
464
493
  async def execute_query(
465
494
  self, query: str, *args, timeout: float | None = None
@@ -470,29 +499,22 @@ class CSVConnection(BaseDatabaseConnection):
470
499
  ensuring no changes are persisted to the database.
471
500
  """
472
501
  effective_timeout = timeout or DEFAULT_QUERY_TIMEOUT
473
- conn = await self.get_pool()
474
502
 
475
- # Start transaction
476
- await conn.execute("BEGIN")
477
- try:
478
- # Execute query with client-side timeout (CSV uses in-memory SQLite)
479
- if effective_timeout:
480
- cursor = await asyncio.wait_for(
481
- conn.execute(query, args if args else ()), timeout=effective_timeout
482
- )
483
- rows = await asyncio.wait_for(
484
- cursor.fetchall(), timeout=effective_timeout
485
- )
486
- else:
487
- cursor = await conn.execute(query, args if args else ())
488
- rows = await cursor.fetchall()
503
+ args_tuple = tuple(args) if args else tuple()
489
504
 
490
- return [dict(row) for row in rows]
505
+ def _run_query() -> list[dict[str, Any]]:
506
+ conn = duckdb.connect(self.database_path)
507
+ try:
508
+ return _execute_duckdb_transaction(conn, query, args_tuple)
509
+ finally:
510
+ conn.close()
511
+
512
+ try:
513
+ return await asyncio.wait_for(
514
+ asyncio.to_thread(_run_query), timeout=effective_timeout
515
+ )
491
516
  except asyncio.TimeoutError as exc:
492
517
  raise QueryTimeoutError(effective_timeout or 0) from exc
493
- finally:
494
- # Always rollback to ensure no changes are committed
495
- await conn.rollback()
496
518
 
497
519
 
498
520
  def DatabaseConnection(connection_string: str) -> BaseDatabaseConnection:
@@ -503,6 +525,8 @@ def DatabaseConnection(connection_string: str) -> BaseDatabaseConnection:
503
525
  return MySQLConnection(connection_string)
504
526
  elif connection_string.startswith("sqlite:///"):
505
527
  return SQLiteConnection(connection_string)
528
+ elif connection_string.startswith("duckdb://"):
529
+ return DuckDBConnection(connection_string)
506
530
  elif connection_string.startswith("csv:///"):
507
531
  return CSVConnection(connection_string)
508
532
  else:
@@ -23,7 +23,7 @@ class ResolvedDatabase:
23
23
  connection_string: str # Canonical connection string for DatabaseConnection factory
24
24
 
25
25
 
26
- SUPPORTED_SCHEMES = {"postgresql", "mysql", "sqlite", "csv"}
26
+ SUPPORTED_SCHEMES = {"postgresql", "mysql", "sqlite", "duckdb", "csv"}
27
27
 
28
28
 
29
29
  def _is_connection_string(s: str) -> bool:
@@ -67,8 +67,8 @@ def resolve_database(
67
67
  scheme = urlparse(spec).scheme
68
68
  if scheme in {"postgresql", "mysql"}:
69
69
  db_name = urlparse(spec).path.lstrip("/") or "database"
70
- elif scheme in {"sqlite", "csv"}:
71
- db_name = Path(urlparse(spec).path).stem
70
+ elif scheme in {"sqlite", "duckdb", "csv"}:
71
+ db_name = Path(urlparse(spec).path).stem or "database"
72
72
  else: # should not happen because of SUPPORTED_SCHEMES
73
73
  db_name = "database"
74
74
  return ResolvedDatabase(name=db_name, connection_string=spec)
@@ -83,6 +83,10 @@ def resolve_database(
83
83
  if not path.exists():
84
84
  raise DatabaseResolutionError(f"SQLite file '{spec}' not found.")
85
85
  return ResolvedDatabase(name=path.stem, connection_string=f"sqlite:///{path}")
86
+ if path.suffix.lower() in {".duckdb", ".ddb"}:
87
+ if not path.exists():
88
+ raise DatabaseResolutionError(f"DuckDB file '{spec}' not found.")
89
+ return ResolvedDatabase(name=path.stem, connection_string=f"duckdb:///{path}")
86
90
 
87
91
  # 3. Must be a configured name
88
92
  db_cfg: DatabaseConfig | None = config_mgr.get_database(spec)
@@ -1,13 +1,16 @@
1
1
  """Database schema introspection utilities."""
2
2
 
3
+ import asyncio
3
4
  from abc import ABC, abstractmethod
4
5
  from typing import Any, TypedDict
5
6
 
6
7
  import aiosqlite
8
+ import duckdb
7
9
 
8
10
  from sqlsaber.database.connection import (
9
11
  BaseDatabaseConnection,
10
12
  CSVConnection,
13
+ DuckDBConnection,
11
14
  MySQLConnection,
12
15
  PostgreSQLConnection,
13
16
  SQLiteConnection,
@@ -682,6 +685,225 @@ class SQLiteSchemaIntrospector(BaseSchemaIntrospector):
682
685
  ]
683
686
 
684
687
 
688
+ class DuckDBSchemaIntrospector(BaseSchemaIntrospector):
689
+ """DuckDB-specific schema introspection."""
690
+
691
+ async def _execute_query(
692
+ self,
693
+ connection: DuckDBConnection | CSVConnection,
694
+ query: str,
695
+ params: tuple[Any, ...] = (),
696
+ ) -> list[dict[str, Any]]:
697
+ """Run a DuckDB query on a thread and return list of dictionaries."""
698
+
699
+ params_tuple = tuple(params)
700
+
701
+ def fetch_rows(conn: duckdb.DuckDBPyConnection) -> list[dict[str, Any]]:
702
+ cursor = conn.execute(query, params_tuple)
703
+ if cursor.description is None:
704
+ return []
705
+
706
+ columns = [col[0] for col in cursor.description]
707
+ rows = conn.fetchall()
708
+ return [dict(zip(columns, row)) for row in rows]
709
+
710
+ if isinstance(connection, CSVConnection):
711
+ return await connection.execute_query(query, *params_tuple)
712
+
713
+ def run_query() -> list[dict[str, Any]]:
714
+ conn = duckdb.connect(connection.database_path)
715
+ try:
716
+ return fetch_rows(conn)
717
+ finally:
718
+ conn.close()
719
+
720
+ return await asyncio.to_thread(run_query)
721
+
722
+ async def get_tables_info(
723
+ self, connection, table_pattern: str | None = None
724
+ ) -> list[dict[str, Any]]:
725
+ """Get tables information for DuckDB."""
726
+ where_conditions = [
727
+ "table_schema NOT IN ('information_schema', 'pg_catalog', 'duckdb_catalog')"
728
+ ]
729
+ params: list[Any] = []
730
+
731
+ if table_pattern:
732
+ if "." in table_pattern:
733
+ schema_pattern, table_name_pattern = table_pattern.split(".", 1)
734
+ where_conditions.append(
735
+ "(table_schema LIKE ? AND table_name LIKE ?)"
736
+ )
737
+ params.extend([schema_pattern, table_name_pattern])
738
+ else:
739
+ where_conditions.append(
740
+ "(table_name LIKE ? OR table_schema || '.' || table_name LIKE ?)"
741
+ )
742
+ params.extend([table_pattern, table_pattern])
743
+
744
+ query = f"""
745
+ SELECT
746
+ table_schema,
747
+ table_name,
748
+ table_type
749
+ FROM information_schema.tables
750
+ WHERE {" AND ".join(where_conditions)}
751
+ ORDER BY table_schema, table_name;
752
+ """
753
+
754
+ return await self._execute_query(connection, query, tuple(params))
755
+
756
+ async def get_columns_info(self, connection, tables: list) -> list[dict[str, Any]]:
757
+ """Get columns information for DuckDB."""
758
+ if not tables:
759
+ return []
760
+
761
+ table_filters = []
762
+ for table in tables:
763
+ table_filters.append(
764
+ "(table_schema = ? AND table_name = ?)"
765
+ )
766
+
767
+ params: list[Any] = []
768
+ for table in tables:
769
+ params.extend([table["table_schema"], table["table_name"]])
770
+
771
+ query = f"""
772
+ SELECT
773
+ table_schema,
774
+ table_name,
775
+ column_name,
776
+ data_type,
777
+ is_nullable,
778
+ column_default,
779
+ character_maximum_length,
780
+ numeric_precision,
781
+ numeric_scale
782
+ FROM information_schema.columns
783
+ WHERE {" OR ".join(table_filters)}
784
+ ORDER BY table_schema, table_name, ordinal_position;
785
+ """
786
+
787
+ return await self._execute_query(connection, query, tuple(params))
788
+
789
+ async def get_foreign_keys_info(self, connection, tables: list) -> list[dict[str, Any]]:
790
+ """Get foreign keys information for DuckDB."""
791
+ if not tables:
792
+ return []
793
+
794
+ table_filters = []
795
+ params: list[Any] = []
796
+ for table in tables:
797
+ table_filters.append("(kcu.table_schema = ? AND kcu.table_name = ?)")
798
+ params.extend([table["table_schema"], table["table_name"]])
799
+
800
+ query = f"""
801
+ SELECT
802
+ kcu.table_schema,
803
+ kcu.table_name,
804
+ kcu.column_name,
805
+ ccu.table_schema AS foreign_table_schema,
806
+ ccu.table_name AS foreign_table_name,
807
+ ccu.column_name AS foreign_column_name
808
+ FROM information_schema.referential_constraints AS rc
809
+ JOIN information_schema.key_column_usage AS kcu
810
+ ON rc.constraint_schema = kcu.constraint_schema
811
+ AND rc.constraint_name = kcu.constraint_name
812
+ JOIN information_schema.key_column_usage AS ccu
813
+ ON rc.unique_constraint_schema = ccu.constraint_schema
814
+ AND rc.unique_constraint_name = ccu.constraint_name
815
+ AND ccu.ordinal_position = kcu.position_in_unique_constraint
816
+ WHERE {" OR ".join(table_filters)}
817
+ ORDER BY kcu.table_schema, kcu.table_name, kcu.ordinal_position;
818
+ """
819
+
820
+ return await self._execute_query(connection, query, tuple(params))
821
+
822
+ async def get_primary_keys_info(self, connection, tables: list) -> list[dict[str, Any]]:
823
+ """Get primary keys information for DuckDB."""
824
+ if not tables:
825
+ return []
826
+
827
+ table_filters = []
828
+ params: list[Any] = []
829
+ for table in tables:
830
+ table_filters.append("(tc.table_schema = ? AND tc.table_name = ?)")
831
+ params.extend([table["table_schema"], table["table_name"]])
832
+
833
+ query = f"""
834
+ SELECT
835
+ tc.table_schema,
836
+ tc.table_name,
837
+ kcu.column_name
838
+ FROM information_schema.table_constraints AS tc
839
+ JOIN information_schema.key_column_usage AS kcu
840
+ ON tc.constraint_name = kcu.constraint_name
841
+ AND tc.constraint_schema = kcu.constraint_schema
842
+ WHERE tc.constraint_type = 'PRIMARY KEY'
843
+ AND ({" OR ".join(table_filters)})
844
+ ORDER BY tc.table_schema, tc.table_name, kcu.ordinal_position;
845
+ """
846
+
847
+ return await self._execute_query(connection, query, tuple(params))
848
+
849
+ async def get_indexes_info(self, connection, tables: list) -> list[dict[str, Any]]:
850
+ """Get indexes information for DuckDB."""
851
+ if not tables:
852
+ return []
853
+
854
+ indexes: list[dict[str, Any]] = []
855
+ for table in tables:
856
+ schema = table["table_schema"]
857
+ table_name = table["table_name"]
858
+ query = """
859
+ SELECT
860
+ schema_name,
861
+ table_name,
862
+ index_name,
863
+ sql
864
+ FROM duckdb_indexes()
865
+ WHERE schema_name = ? AND table_name = ?;
866
+ """
867
+ rows = await self._execute_query(connection, query, (schema, table_name))
868
+
869
+ for row in rows:
870
+ sql_text = (row.get("sql") or "").strip()
871
+ upper_sql = sql_text.upper()
872
+ unique = "UNIQUE" in upper_sql.split("(")[0]
873
+
874
+ columns: list[str] = []
875
+ if "(" in sql_text and ")" in sql_text:
876
+ column_section = sql_text[sql_text.find("(") + 1 : sql_text.rfind(")")]
877
+ columns = [col.strip().strip('"') for col in column_section.split(",") if col.strip()]
878
+
879
+ indexes.append(
880
+ {
881
+ "table_schema": row.get("schema_name") or schema or "main",
882
+ "table_name": row.get("table_name") or table_name,
883
+ "index_name": row.get("index_name"),
884
+ "is_unique": unique,
885
+ "index_type": None,
886
+ "column_names": columns,
887
+ }
888
+ )
889
+
890
+ return indexes
891
+
892
+ async def list_tables_info(self, connection) -> list[dict[str, Any]]:
893
+ """Get list of tables with basic information for DuckDB."""
894
+ query = """
895
+ SELECT
896
+ table_schema,
897
+ table_name,
898
+ table_type
899
+ FROM information_schema.tables
900
+ WHERE table_schema NOT IN ('information_schema', 'pg_catalog', 'duckdb_catalog')
901
+ ORDER BY table_schema, table_name;
902
+ """
903
+
904
+ return await self._execute_query(connection, query)
905
+
906
+
685
907
  class SchemaManager:
686
908
  """Manages database schema introspection."""
687
909
 
@@ -693,8 +915,10 @@ class SchemaManager:
693
915
  self.introspector = PostgreSQLSchemaIntrospector()
694
916
  elif isinstance(db_connection, MySQLConnection):
695
917
  self.introspector = MySQLSchemaIntrospector()
696
- elif isinstance(db_connection, (SQLiteConnection, CSVConnection)):
918
+ elif isinstance(db_connection, SQLiteConnection):
697
919
  self.introspector = SQLiteSchemaIntrospector()
920
+ elif isinstance(db_connection, (DuckDBConnection, CSVConnection)):
921
+ self.introspector = DuckDBSchemaIntrospector()
698
922
  else:
699
923
  raise ValueError(
700
924
  f"Unsupported database connection type: {type(db_connection)}"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sqlsaber
3
- Version: 0.24.0
3
+ Version: 0.25.0
4
4
  Summary: SQLsaber - Open-source agentic SQL assistant
5
5
  License-File: LICENSE
6
6
  Requires-Python: >=3.12
@@ -8,10 +8,10 @@ Requires-Dist: aiomysql>=0.2.0
8
8
  Requires-Dist: aiosqlite>=0.21.0
9
9
  Requires-Dist: asyncpg>=0.30.0
10
10
  Requires-Dist: cyclopts>=3.22.1
11
+ Requires-Dist: duckdb>=0.9.2
11
12
  Requires-Dist: fastmcp>=2.9.0
12
13
  Requires-Dist: httpx>=0.28.1
13
14
  Requires-Dist: keyring>=25.6.0
14
- Requires-Dist: pandas>=2.0.0
15
15
  Requires-Dist: platformdirs>=4.0.0
16
16
  Requires-Dist: prompt-toolkit>3.0.51
17
17
  Requires-Dist: pydantic-ai
@@ -58,7 +58,7 @@ Ask your questions in natural language and `sqlsaber` will gather the right cont
58
58
  - 🧠 Memory management
59
59
  - 💬 Interactive REPL mode
60
60
  - 🧵 Conversation threads (store, display, and resume conversations)
61
- - 🗄️ Support for PostgreSQL, SQLite, and MySQL
61
+ - 🗄️ Support for PostgreSQL, MySQL, SQLite, and DuckDB
62
62
  - 🔌 MCP (Model Context Protocol) server support
63
63
  - 🎨 Beautiful formatted output
64
64
 
@@ -170,6 +170,7 @@ saber -d mydb "count all orders"
170
170
 
171
171
  # You can also pass a connection string
172
172
  saber -d "postgresql://user:password@localhost:5432/mydb" "count all orders"
173
+ saber -d "duckdb:///path/to/data.duckdb" "top customers"
173
174
  ```
174
175
 
175
176
  ## Examples
@@ -1,16 +1,16 @@
1
1
  sqlsaber/__init__.py,sha256=HjS8ULtP4MGpnTL7njVY45NKV9Fi4e_yeYuY-hyXWQc,73
2
2
  sqlsaber/__main__.py,sha256=RIHxWeWh2QvLfah-2OkhI5IJxojWfy4fXpMnVEJYvxw,78
3
3
  sqlsaber/agents/__init__.py,sha256=i_MI2eWMQaVzGikKU71FPCmSQxNDKq36Imq1PrYoIPU,130
4
- sqlsaber/agents/base.py,sha256=7zOZTHKxUuU0uMc-NTaCkkBfDnU3jtwbT8_eP1ZtJ2k,2615
4
+ sqlsaber/agents/base.py,sha256=EAuoj3vpWNqksudMd2lL1Fmx68Y91qNX6NyK1RjQ4-g,2679
5
5
  sqlsaber/agents/mcp.py,sha256=GcJTx7YDYH6aaxIADEIxSgcWAdWakUx395JIzVnf17U,768
6
- sqlsaber/agents/pydantic_ai_agent.py,sha256=6RvG2O7G8P6NN9QaRXUodg5Q26QJ4ShGWoTGYbVQ5K4,7065
6
+ sqlsaber/agents/pydantic_ai_agent.py,sha256=qn-DnTGcdUzSEn9xBWwGhgtifYxZ_NEo8XPePnl1StE,7154
7
7
  sqlsaber/cli/__init__.py,sha256=qVSLVJLLJYzoC6aj6y9MFrzZvAwc4_OgxU9DlkQnZ4M,86
8
8
  sqlsaber/cli/auth.py,sha256=jTsRgbmlGPlASSuIKmdjjwfqtKvjfKd_cTYxX0-QqaQ,7400
9
- sqlsaber/cli/commands.py,sha256=mjLG9i1bXf0TEroxkIxq5O7Hhjufz3Ad72cyJz7vE1k,8128
9
+ sqlsaber/cli/commands.py,sha256=NyBDr5qEnCOZrHEMGcEpHLXEWdlzEQW3D61NIrPi2fQ,8727
10
10
  sqlsaber/cli/completers.py,sha256=HsUPjaZweLSeYCWkAcgMl8FylQ1xjWBWYTEL_9F6xfU,6430
11
- sqlsaber/cli/database.py,sha256=JKtHSN-BFzBa14REf0phFVQB7d67m1M5FFaD8N6DdrY,12966
11
+ sqlsaber/cli/database.py,sha256=93etjqiYAfH08jBe_OJpLMNKiu3H81G8O7CMB31MIIc,13424
12
12
  sqlsaber/cli/display.py,sha256=XuKiTWUw5k0U0P_f1K7zhDWX5KTO2DQVG0Q0XU9VEhs,16334
13
- sqlsaber/cli/interactive.py,sha256=7uM4LoXbhPJr8o5yNjICSzL0uxZkp1psWrVq4G9V0OI,13118
13
+ sqlsaber/cli/interactive.py,sha256=lVOtONBeAmZxWdfkvdoVoX4POs_-C1YVs0jPxY9MoZs,13288
14
14
  sqlsaber/cli/memory.py,sha256=OufHFJFwV0_GGn7LvKRTJikkWhV1IwNIUDOxFPHXOaQ,7794
15
15
  sqlsaber/cli/models.py,sha256=ZewtwGQwhd9b-yxBAPKePolvI1qQG-EkmeWAGMqtWNQ,8986
16
16
  sqlsaber/cli/streaming.py,sha256=Eo5CNUgDGY1WYP90jwDA2aY7RefN-TfcStA6NyjUQTY,7076
@@ -18,15 +18,15 @@ sqlsaber/cli/threads.py,sha256=ufDABlqndVJKd5COgSokcFRIKTgsGqXdHV84DVVm7MA,12743
18
18
  sqlsaber/config/__init__.py,sha256=olwC45k8Nc61yK0WmPUk7XHdbsZH9HuUAbwnmKe3IgA,100
19
19
  sqlsaber/config/api_keys.py,sha256=RqWQCko1tY7sES7YOlexgBH5Hd5ne_kGXHdBDNqcV2U,3649
20
20
  sqlsaber/config/auth.py,sha256=b5qB2h1doXyO9Bn8z0CcL8LAR2jF431gGXBGKLgTmtQ,2756
21
- sqlsaber/config/database.py,sha256=c6q3l4EvoBch1ckYHA70hf6L7fSOY-sItnLCpvJiPrA,11357
21
+ sqlsaber/config/database.py,sha256=Yec6_0wdzq-ADblMNnbgvouYCimYOY_DWHT9oweaISc,11449
22
22
  sqlsaber/config/oauth_flow.py,sha256=A3bSXaBLzuAfXV2ZPA94m9NV33c2MyL6M4ii9oEkswQ,10291
23
23
  sqlsaber/config/oauth_tokens.py,sha256=C9z35hyx-PvSAYdC1LNf3rg9_wsEIY56hkEczelbad0,6015
24
24
  sqlsaber/config/providers.py,sha256=JFjeJv1K5Q93zWSlWq3hAvgch1TlgoF0qFa0KJROkKY,2957
25
25
  sqlsaber/config/settings.py,sha256=vgb_RXaM-7DgbxYDmWNw1cSyMqwys4j3qNCvM4bljwI,5586
26
26
  sqlsaber/database/__init__.py,sha256=a_gtKRJnZVO8-fEZI7g3Z8YnGa6Nio-5Y50PgVp07ss,176
27
- sqlsaber/database/connection.py,sha256=1bDPEa6cmdh87gPfhNeBLpOdI0E2_2KlE74q_-4l_jI,18913
28
- sqlsaber/database/resolver.py,sha256=RPXF5EoKzvQDDLmPGNHYd2uG_oNICH8qvUjBp6iXmNY,3348
29
- sqlsaber/database/schema.py,sha256=Le5DXSgpsWyhMDuY6qpc_dsP4jjMXgJTRtAKq9S5Oog,32868
27
+ sqlsaber/database/connection.py,sha256=J3U08Qu7NQrmem0jPM5XKIHPmPJE927IiLhN8zA6oLo,19392
28
+ sqlsaber/database/resolver.py,sha256=wSCcn__aCqwIfpt_LCjtW2Zgb8RpG5PlmwwZHli1q_U,3628
29
+ sqlsaber/database/schema.py,sha256=9HXTb5O_nlS2aNDeyv7EXhX7_kN2hs6rbPnJ8fnLyWk,41260
30
30
  sqlsaber/mcp/__init__.py,sha256=COdWq7wauPBp5Ew8tfZItFzbcLDSEkHBJSMhxzy8C9c,112
31
31
  sqlsaber/mcp/mcp.py,sha256=X12oCMZYAtgJ7MNuh5cqz8y3lALrOzkXWcfpuY0Ijxk,3950
32
32
  sqlsaber/memory/__init__.py,sha256=GiWkU6f6YYVV0EvvXDmFWe_CxarmDCql05t70MkTEWs,63
@@ -40,8 +40,8 @@ sqlsaber/tools/enums.py,sha256=CH32mL-0k9ZA18911xLpNtsgpV6tB85TktMj6uqGz54,411
40
40
  sqlsaber/tools/instructions.py,sha256=X-x8maVkkyi16b6Tl0hcAFgjiYceZaSwyWTfmrvx8U8,9024
41
41
  sqlsaber/tools/registry.py,sha256=HWOQMsNIdL4XZS6TeNUyrL-5KoSDH6PHsWd3X66o-18,3211
42
42
  sqlsaber/tools/sql_tools.py,sha256=j4yRqfKokPFnZ_tEZPrWU5WStDc3Mexo1fWZ8KsmUjQ,9965
43
- sqlsaber-0.24.0.dist-info/METADATA,sha256=cPXj4eFPU-I6AWgHVVboKwu3zMmYKvs46LtrmZCBlhU,6178
44
- sqlsaber-0.24.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
45
- sqlsaber-0.24.0.dist-info/entry_points.txt,sha256=qEbOB7OffXPFgyJc7qEIJlMEX5RN9xdzLmWZa91zCQQ,162
46
- sqlsaber-0.24.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
47
- sqlsaber-0.24.0.dist-info/RECORD,,
43
+ sqlsaber-0.25.0.dist-info/METADATA,sha256=9Q2AsBv4I78FLo8Uezmnv_fCch3jIKgv1gzBBm1cVB4,6243
44
+ sqlsaber-0.25.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
45
+ sqlsaber-0.25.0.dist-info/entry_points.txt,sha256=qEbOB7OffXPFgyJc7qEIJlMEX5RN9xdzLmWZa91zCQQ,162
46
+ sqlsaber-0.25.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
47
+ sqlsaber-0.25.0.dist-info/RECORD,,