sqlsaber 0.23.0__py3-none-any.whl → 0.25.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sqlsaber might be problematic. Click here for more details.
- sqlsaber/agents/base.py +4 -1
- sqlsaber/agents/pydantic_ai_agent.py +4 -1
- sqlsaber/cli/commands.py +19 -11
- sqlsaber/cli/database.py +17 -6
- sqlsaber/cli/display.py +49 -19
- sqlsaber/cli/interactive.py +6 -1
- sqlsaber/cli/threads.py +41 -18
- sqlsaber/config/database.py +3 -1
- sqlsaber/database/connection.py +123 -99
- sqlsaber/database/resolver.py +7 -3
- sqlsaber/database/schema.py +377 -1
- sqlsaber/tools/sql_tools.py +6 -0
- {sqlsaber-0.23.0.dist-info → sqlsaber-0.25.0.dist-info}/METADATA +4 -3
- {sqlsaber-0.23.0.dist-info → sqlsaber-0.25.0.dist-info}/RECORD +17 -17
- {sqlsaber-0.23.0.dist-info → sqlsaber-0.25.0.dist-info}/WHEEL +0 -0
- {sqlsaber-0.23.0.dist-info → sqlsaber-0.25.0.dist-info}/entry_points.txt +0 -0
- {sqlsaber-0.23.0.dist-info → sqlsaber-0.25.0.dist-info}/licenses/LICENSE +0 -0
sqlsaber/database/connection.py
CHANGED
|
@@ -10,6 +10,7 @@ from urllib.parse import parse_qs, urlparse
|
|
|
10
10
|
import aiomysql
|
|
11
11
|
import aiosqlite
|
|
12
12
|
import asyncpg
|
|
13
|
+
import duckdb
|
|
13
14
|
|
|
14
15
|
# Default query timeout to prevent runaway queries
|
|
15
16
|
DEFAULT_QUERY_TIMEOUT = 30.0 # seconds
|
|
@@ -351,115 +352,143 @@ class SQLiteConnection(BaseDatabaseConnection):
|
|
|
351
352
|
await conn.rollback()
|
|
352
353
|
|
|
353
354
|
|
|
355
|
+
def _execute_duckdb_transaction(
|
|
356
|
+
conn: duckdb.DuckDBPyConnection, query: str, args: tuple[Any, ...]
|
|
357
|
+
) -> list[dict[str, Any]]:
|
|
358
|
+
"""Run a DuckDB query inside a transaction and return list of dicts."""
|
|
359
|
+
conn.execute("BEGIN TRANSACTION")
|
|
360
|
+
try:
|
|
361
|
+
if args:
|
|
362
|
+
conn.execute(query, args)
|
|
363
|
+
else:
|
|
364
|
+
conn.execute(query)
|
|
365
|
+
|
|
366
|
+
if conn.description is None:
|
|
367
|
+
rows: list[dict[str, Any]] = []
|
|
368
|
+
else:
|
|
369
|
+
columns = [col[0] for col in conn.description]
|
|
370
|
+
data = conn.fetchall()
|
|
371
|
+
rows = [dict(zip(columns, row)) for row in data]
|
|
372
|
+
|
|
373
|
+
conn.execute("ROLLBACK")
|
|
374
|
+
return rows
|
|
375
|
+
except Exception:
|
|
376
|
+
conn.execute("ROLLBACK")
|
|
377
|
+
raise
|
|
378
|
+
|
|
379
|
+
|
|
354
380
|
class CSVConnection(BaseDatabaseConnection):
|
|
355
|
-
"""CSV file connection using
|
|
381
|
+
"""CSV file connection using DuckDB per query."""
|
|
356
382
|
|
|
357
383
|
def __init__(self, connection_string: str):
|
|
358
384
|
super().__init__(connection_string)
|
|
359
385
|
|
|
360
|
-
|
|
361
|
-
self.csv_path =
|
|
386
|
+
raw_path = connection_string.replace("csv:///", "", 1)
|
|
387
|
+
self.csv_path = raw_path.split("?", 1)[0]
|
|
362
388
|
|
|
363
|
-
# CSV parsing options
|
|
364
389
|
self.delimiter = ","
|
|
365
390
|
self.encoding = "utf-8"
|
|
366
391
|
self.has_header = True
|
|
367
392
|
|
|
368
|
-
# Parse additional options from connection string
|
|
369
393
|
parsed = urlparse(connection_string)
|
|
370
394
|
if parsed.query:
|
|
371
395
|
params = parse_qs(parsed.query)
|
|
372
|
-
self.delimiter = params.get("delimiter", [
|
|
373
|
-
self.encoding = params.get("encoding", [
|
|
396
|
+
self.delimiter = params.get("delimiter", [self.delimiter])[0]
|
|
397
|
+
self.encoding = params.get("encoding", [self.encoding])[0]
|
|
374
398
|
self.has_header = params.get("header", ["true"])[0].lower() == "true"
|
|
375
399
|
|
|
376
|
-
|
|
377
|
-
self.table_name = Path(self.csv_path).stem
|
|
378
|
-
|
|
379
|
-
# Initialize connection and flag to track if CSV is loaded
|
|
380
|
-
self._conn = None
|
|
381
|
-
self._csv_loaded = False
|
|
400
|
+
self.table_name = Path(self.csv_path).stem or "csv_table"
|
|
382
401
|
|
|
383
402
|
async def get_pool(self):
|
|
384
|
-
"""
|
|
385
|
-
|
|
386
|
-
self._conn = await aiosqlite.connect(":memory:")
|
|
387
|
-
self._conn.row_factory = aiosqlite.Row
|
|
388
|
-
await self._load_csv_data()
|
|
389
|
-
return self._conn
|
|
403
|
+
"""CSV connections do not maintain a pool."""
|
|
404
|
+
return None
|
|
390
405
|
|
|
391
406
|
async def close(self):
|
|
392
|
-
"""
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
407
|
+
"""No persistent resources to close for CSV connections."""
|
|
408
|
+
pass
|
|
409
|
+
|
|
410
|
+
def _quote_identifier(self, identifier: str) -> str:
|
|
411
|
+
escaped = identifier.replace('"', '""')
|
|
412
|
+
return f'"{escaped}"'
|
|
413
|
+
|
|
414
|
+
def _quote_literal(self, value: str) -> str:
|
|
415
|
+
escaped = value.replace("'", "''")
|
|
416
|
+
return f"'{escaped}'"
|
|
417
|
+
|
|
418
|
+
def _normalized_encoding(self) -> str | None:
|
|
419
|
+
encoding = (self.encoding or "").strip()
|
|
420
|
+
if not encoding or encoding.lower() == "utf-8":
|
|
421
|
+
return None
|
|
422
|
+
return encoding.replace("-", "").replace("_", "").upper()
|
|
423
|
+
|
|
424
|
+
def _create_view(self, conn: duckdb.DuckDBPyConnection) -> None:
|
|
425
|
+
header_literal = "TRUE" if self.has_header else "FALSE"
|
|
426
|
+
option_parts = [f"HEADER={header_literal}"]
|
|
427
|
+
|
|
428
|
+
if self.delimiter:
|
|
429
|
+
option_parts.append(f"DELIM={self._quote_literal(self.delimiter)}")
|
|
430
|
+
|
|
431
|
+
encoding = self._normalized_encoding()
|
|
432
|
+
if encoding:
|
|
433
|
+
option_parts.append(f"ENCODING={self._quote_literal(encoding)}")
|
|
434
|
+
|
|
435
|
+
options_sql = ""
|
|
436
|
+
if option_parts:
|
|
437
|
+
options_sql = ", " + ", ".join(option_parts)
|
|
438
|
+
|
|
439
|
+
base_relation_sql = (
|
|
440
|
+
f"read_csv_auto({self._quote_literal(self.csv_path)}{options_sql})"
|
|
441
|
+
)
|
|
442
|
+
|
|
443
|
+
create_view_sql = (
|
|
444
|
+
f"CREATE VIEW {self._quote_identifier(self.table_name)} AS "
|
|
445
|
+
f"SELECT * FROM {base_relation_sql}"
|
|
446
|
+
)
|
|
447
|
+
conn.execute(create_view_sql)
|
|
448
|
+
|
|
449
|
+
async def execute_query(
|
|
450
|
+
self, query: str, *args, timeout: float | None = None
|
|
451
|
+
) -> list[dict[str, Any]]:
|
|
452
|
+
effective_timeout = timeout or DEFAULT_QUERY_TIMEOUT
|
|
453
|
+
args_tuple = tuple(args) if args else tuple()
|
|
397
454
|
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
455
|
+
def _run_query() -> list[dict[str, Any]]:
|
|
456
|
+
conn = duckdb.connect(":memory:")
|
|
457
|
+
try:
|
|
458
|
+
self._create_view(conn)
|
|
459
|
+
return _execute_duckdb_transaction(conn, query, args_tuple)
|
|
460
|
+
finally:
|
|
461
|
+
conn.close()
|
|
402
462
|
|
|
403
463
|
try:
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
import pandas as pd
|
|
407
|
-
|
|
408
|
-
# Read CSV file using pandas
|
|
409
|
-
df = pd.read_csv(
|
|
410
|
-
self.csv_path,
|
|
411
|
-
delimiter=self.delimiter,
|
|
412
|
-
encoding=self.encoding,
|
|
413
|
-
header=0 if self.has_header else None,
|
|
464
|
+
return await asyncio.wait_for(
|
|
465
|
+
asyncio.to_thread(_run_query), timeout=effective_timeout
|
|
414
466
|
)
|
|
467
|
+
except asyncio.TimeoutError as exc:
|
|
468
|
+
raise QueryTimeoutError(effective_timeout or 0) from exc
|
|
415
469
|
|
|
416
|
-
# If no header, create column names
|
|
417
|
-
if not self.has_header:
|
|
418
|
-
df.columns = [f"column_{i}" for i in range(len(df.columns))]
|
|
419
|
-
|
|
420
|
-
# Create table with proper column types
|
|
421
|
-
columns_sql = []
|
|
422
|
-
for col in df.columns:
|
|
423
|
-
# Infer SQLite type from pandas dtype
|
|
424
|
-
dtype = df[col].dtype
|
|
425
|
-
if pd.api.types.is_integer_dtype(dtype):
|
|
426
|
-
sql_type = "INTEGER"
|
|
427
|
-
elif pd.api.types.is_float_dtype(dtype):
|
|
428
|
-
sql_type = "REAL"
|
|
429
|
-
elif pd.api.types.is_bool_dtype(dtype):
|
|
430
|
-
sql_type = "INTEGER" # SQLite doesn't have BOOLEAN
|
|
431
|
-
else:
|
|
432
|
-
sql_type = "TEXT"
|
|
433
470
|
|
|
434
|
-
|
|
471
|
+
class DuckDBConnection(BaseDatabaseConnection):
|
|
472
|
+
"""DuckDB database connection using duckdb Python API."""
|
|
435
473
|
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
for _, row in df.iterrows():
|
|
446
|
-
# Convert pandas values to Python native types
|
|
447
|
-
values = []
|
|
448
|
-
for val in row:
|
|
449
|
-
if pd.isna(val):
|
|
450
|
-
values.append(None)
|
|
451
|
-
elif isinstance(val, (pd.Timestamp, pd.Timedelta)):
|
|
452
|
-
values.append(str(val))
|
|
453
|
-
else:
|
|
454
|
-
values.append(val)
|
|
474
|
+
def __init__(self, connection_string: str):
|
|
475
|
+
super().__init__(connection_string)
|
|
476
|
+
if connection_string.startswith("duckdb:///"):
|
|
477
|
+
db_path = connection_string.replace("duckdb:///", "", 1)
|
|
478
|
+
elif connection_string.startswith("duckdb://"):
|
|
479
|
+
db_path = connection_string.replace("duckdb://", "", 1)
|
|
480
|
+
else:
|
|
481
|
+
db_path = connection_string
|
|
455
482
|
|
|
456
|
-
|
|
483
|
+
self.database_path = db_path or ":memory:"
|
|
457
484
|
|
|
458
|
-
|
|
459
|
-
|
|
485
|
+
async def get_pool(self):
|
|
486
|
+
"""DuckDB creates connections per query, return database path."""
|
|
487
|
+
return self.database_path
|
|
460
488
|
|
|
461
|
-
|
|
462
|
-
|
|
489
|
+
async def close(self):
|
|
490
|
+
"""DuckDB connections are created per query, no persistent pool to close."""
|
|
491
|
+
pass
|
|
463
492
|
|
|
464
493
|
async def execute_query(
|
|
465
494
|
self, query: str, *args, timeout: float | None = None
|
|
@@ -470,29 +499,22 @@ class CSVConnection(BaseDatabaseConnection):
|
|
|
470
499
|
ensuring no changes are persisted to the database.
|
|
471
500
|
"""
|
|
472
501
|
effective_timeout = timeout or DEFAULT_QUERY_TIMEOUT
|
|
473
|
-
conn = await self.get_pool()
|
|
474
502
|
|
|
475
|
-
|
|
476
|
-
await conn.execute("BEGIN")
|
|
477
|
-
try:
|
|
478
|
-
# Execute query with client-side timeout (CSV uses in-memory SQLite)
|
|
479
|
-
if effective_timeout:
|
|
480
|
-
cursor = await asyncio.wait_for(
|
|
481
|
-
conn.execute(query, args if args else ()), timeout=effective_timeout
|
|
482
|
-
)
|
|
483
|
-
rows = await asyncio.wait_for(
|
|
484
|
-
cursor.fetchall(), timeout=effective_timeout
|
|
485
|
-
)
|
|
486
|
-
else:
|
|
487
|
-
cursor = await conn.execute(query, args if args else ())
|
|
488
|
-
rows = await cursor.fetchall()
|
|
503
|
+
args_tuple = tuple(args) if args else tuple()
|
|
489
504
|
|
|
490
|
-
|
|
505
|
+
def _run_query() -> list[dict[str, Any]]:
|
|
506
|
+
conn = duckdb.connect(self.database_path)
|
|
507
|
+
try:
|
|
508
|
+
return _execute_duckdb_transaction(conn, query, args_tuple)
|
|
509
|
+
finally:
|
|
510
|
+
conn.close()
|
|
511
|
+
|
|
512
|
+
try:
|
|
513
|
+
return await asyncio.wait_for(
|
|
514
|
+
asyncio.to_thread(_run_query), timeout=effective_timeout
|
|
515
|
+
)
|
|
491
516
|
except asyncio.TimeoutError as exc:
|
|
492
517
|
raise QueryTimeoutError(effective_timeout or 0) from exc
|
|
493
|
-
finally:
|
|
494
|
-
# Always rollback to ensure no changes are committed
|
|
495
|
-
await conn.rollback()
|
|
496
518
|
|
|
497
519
|
|
|
498
520
|
def DatabaseConnection(connection_string: str) -> BaseDatabaseConnection:
|
|
@@ -503,6 +525,8 @@ def DatabaseConnection(connection_string: str) -> BaseDatabaseConnection:
|
|
|
503
525
|
return MySQLConnection(connection_string)
|
|
504
526
|
elif connection_string.startswith("sqlite:///"):
|
|
505
527
|
return SQLiteConnection(connection_string)
|
|
528
|
+
elif connection_string.startswith("duckdb://"):
|
|
529
|
+
return DuckDBConnection(connection_string)
|
|
506
530
|
elif connection_string.startswith("csv:///"):
|
|
507
531
|
return CSVConnection(connection_string)
|
|
508
532
|
else:
|
sqlsaber/database/resolver.py
CHANGED
|
@@ -23,7 +23,7 @@ class ResolvedDatabase:
|
|
|
23
23
|
connection_string: str # Canonical connection string for DatabaseConnection factory
|
|
24
24
|
|
|
25
25
|
|
|
26
|
-
SUPPORTED_SCHEMES = {"postgresql", "mysql", "sqlite", "csv"}
|
|
26
|
+
SUPPORTED_SCHEMES = {"postgresql", "mysql", "sqlite", "duckdb", "csv"}
|
|
27
27
|
|
|
28
28
|
|
|
29
29
|
def _is_connection_string(s: str) -> bool:
|
|
@@ -67,8 +67,8 @@ def resolve_database(
|
|
|
67
67
|
scheme = urlparse(spec).scheme
|
|
68
68
|
if scheme in {"postgresql", "mysql"}:
|
|
69
69
|
db_name = urlparse(spec).path.lstrip("/") or "database"
|
|
70
|
-
elif scheme in {"sqlite", "csv"}:
|
|
71
|
-
db_name = Path(urlparse(spec).path).stem
|
|
70
|
+
elif scheme in {"sqlite", "duckdb", "csv"}:
|
|
71
|
+
db_name = Path(urlparse(spec).path).stem or "database"
|
|
72
72
|
else: # should not happen because of SUPPORTED_SCHEMES
|
|
73
73
|
db_name = "database"
|
|
74
74
|
return ResolvedDatabase(name=db_name, connection_string=spec)
|
|
@@ -83,6 +83,10 @@ def resolve_database(
|
|
|
83
83
|
if not path.exists():
|
|
84
84
|
raise DatabaseResolutionError(f"SQLite file '{spec}' not found.")
|
|
85
85
|
return ResolvedDatabase(name=path.stem, connection_string=f"sqlite:///{path}")
|
|
86
|
+
if path.suffix.lower() in {".duckdb", ".ddb"}:
|
|
87
|
+
if not path.exists():
|
|
88
|
+
raise DatabaseResolutionError(f"DuckDB file '{spec}' not found.")
|
|
89
|
+
return ResolvedDatabase(name=path.stem, connection_string=f"duckdb:///{path}")
|
|
86
90
|
|
|
87
91
|
# 3. Must be a configured name
|
|
88
92
|
db_cfg: DatabaseConfig | None = config_mgr.get_database(spec)
|