sqlsaber 0.23.0__py3-none-any.whl → 0.25.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sqlsaber might be problematic. Click here for more details.

@@ -10,6 +10,7 @@ from urllib.parse import parse_qs, urlparse
10
10
  import aiomysql
11
11
  import aiosqlite
12
12
  import asyncpg
13
+ import duckdb
13
14
 
14
15
  # Default query timeout to prevent runaway queries
15
16
  DEFAULT_QUERY_TIMEOUT = 30.0 # seconds
@@ -351,115 +352,143 @@ class SQLiteConnection(BaseDatabaseConnection):
351
352
  await conn.rollback()
352
353
 
353
354
 
355
+ def _execute_duckdb_transaction(
356
+ conn: duckdb.DuckDBPyConnection, query: str, args: tuple[Any, ...]
357
+ ) -> list[dict[str, Any]]:
358
+ """Run a DuckDB query inside a transaction and return list of dicts."""
359
+ conn.execute("BEGIN TRANSACTION")
360
+ try:
361
+ if args:
362
+ conn.execute(query, args)
363
+ else:
364
+ conn.execute(query)
365
+
366
+ if conn.description is None:
367
+ rows: list[dict[str, Any]] = []
368
+ else:
369
+ columns = [col[0] for col in conn.description]
370
+ data = conn.fetchall()
371
+ rows = [dict(zip(columns, row)) for row in data]
372
+
373
+ conn.execute("ROLLBACK")
374
+ return rows
375
+ except Exception:
376
+ conn.execute("ROLLBACK")
377
+ raise
378
+
379
+
354
380
  class CSVConnection(BaseDatabaseConnection):
355
- """CSV file connection using in-memory SQLite database."""
381
+ """CSV file connection using DuckDB per query."""
356
382
 
357
383
  def __init__(self, connection_string: str):
358
384
  super().__init__(connection_string)
359
385
 
360
- # Parse CSV file path from connection string
361
- self.csv_path = connection_string.replace("csv:///", "")
386
+ raw_path = connection_string.replace("csv:///", "", 1)
387
+ self.csv_path = raw_path.split("?", 1)[0]
362
388
 
363
- # CSV parsing options
364
389
  self.delimiter = ","
365
390
  self.encoding = "utf-8"
366
391
  self.has_header = True
367
392
 
368
- # Parse additional options from connection string
369
393
  parsed = urlparse(connection_string)
370
394
  if parsed.query:
371
395
  params = parse_qs(parsed.query)
372
- self.delimiter = params.get("delimiter", [","])[0]
373
- self.encoding = params.get("encoding", ["utf-8"])[0]
396
+ self.delimiter = params.get("delimiter", [self.delimiter])[0]
397
+ self.encoding = params.get("encoding", [self.encoding])[0]
374
398
  self.has_header = params.get("header", ["true"])[0].lower() == "true"
375
399
 
376
- # Table name derived from filename
377
- self.table_name = Path(self.csv_path).stem
378
-
379
- # Initialize connection and flag to track if CSV is loaded
380
- self._conn = None
381
- self._csv_loaded = False
400
+ self.table_name = Path(self.csv_path).stem or "csv_table"
382
401
 
383
402
  async def get_pool(self):
384
- """Get or create the in-memory database connection."""
385
- if self._conn is None:
386
- self._conn = await aiosqlite.connect(":memory:")
387
- self._conn.row_factory = aiosqlite.Row
388
- await self._load_csv_data()
389
- return self._conn
403
+ """CSV connections do not maintain a pool."""
404
+ return None
390
405
 
391
406
  async def close(self):
392
- """Close the database connection."""
393
- if self._conn:
394
- await self._conn.close()
395
- self._conn = None
396
- self._csv_loaded = False
407
+ """No persistent resources to close for CSV connections."""
408
+ pass
409
+
410
+ def _quote_identifier(self, identifier: str) -> str:
411
+ escaped = identifier.replace('"', '""')
412
+ return f'"{escaped}"'
413
+
414
+ def _quote_literal(self, value: str) -> str:
415
+ escaped = value.replace("'", "''")
416
+ return f"'{escaped}'"
417
+
418
+ def _normalized_encoding(self) -> str | None:
419
+ encoding = (self.encoding or "").strip()
420
+ if not encoding or encoding.lower() == "utf-8":
421
+ return None
422
+ return encoding.replace("-", "").replace("_", "").upper()
423
+
424
+ def _create_view(self, conn: duckdb.DuckDBPyConnection) -> None:
425
+ header_literal = "TRUE" if self.has_header else "FALSE"
426
+ option_parts = [f"HEADER={header_literal}"]
427
+
428
+ if self.delimiter:
429
+ option_parts.append(f"DELIM={self._quote_literal(self.delimiter)}")
430
+
431
+ encoding = self._normalized_encoding()
432
+ if encoding:
433
+ option_parts.append(f"ENCODING={self._quote_literal(encoding)}")
434
+
435
+ options_sql = ""
436
+ if option_parts:
437
+ options_sql = ", " + ", ".join(option_parts)
438
+
439
+ base_relation_sql = (
440
+ f"read_csv_auto({self._quote_literal(self.csv_path)}{options_sql})"
441
+ )
442
+
443
+ create_view_sql = (
444
+ f"CREATE VIEW {self._quote_identifier(self.table_name)} AS "
445
+ f"SELECT * FROM {base_relation_sql}"
446
+ )
447
+ conn.execute(create_view_sql)
448
+
449
+ async def execute_query(
450
+ self, query: str, *args, timeout: float | None = None
451
+ ) -> list[dict[str, Any]]:
452
+ effective_timeout = timeout or DEFAULT_QUERY_TIMEOUT
453
+ args_tuple = tuple(args) if args else tuple()
397
454
 
398
- async def _load_csv_data(self):
399
- """Load CSV data into the in-memory SQLite database."""
400
- if self._csv_loaded or not self._conn:
401
- return
455
+ def _run_query() -> list[dict[str, Any]]:
456
+ conn = duckdb.connect(":memory:")
457
+ try:
458
+ self._create_view(conn)
459
+ return _execute_duckdb_transaction(conn, query, args_tuple)
460
+ finally:
461
+ conn.close()
402
462
 
403
463
  try:
404
- # Import pandas only when needed for CSV operations
405
- # This improves CLI load times
406
- import pandas as pd
407
-
408
- # Read CSV file using pandas
409
- df = pd.read_csv(
410
- self.csv_path,
411
- delimiter=self.delimiter,
412
- encoding=self.encoding,
413
- header=0 if self.has_header else None,
464
+ return await asyncio.wait_for(
465
+ asyncio.to_thread(_run_query), timeout=effective_timeout
414
466
  )
467
+ except asyncio.TimeoutError as exc:
468
+ raise QueryTimeoutError(effective_timeout or 0) from exc
415
469
 
416
- # If no header, create column names
417
- if not self.has_header:
418
- df.columns = [f"column_{i}" for i in range(len(df.columns))]
419
-
420
- # Create table with proper column types
421
- columns_sql = []
422
- for col in df.columns:
423
- # Infer SQLite type from pandas dtype
424
- dtype = df[col].dtype
425
- if pd.api.types.is_integer_dtype(dtype):
426
- sql_type = "INTEGER"
427
- elif pd.api.types.is_float_dtype(dtype):
428
- sql_type = "REAL"
429
- elif pd.api.types.is_bool_dtype(dtype):
430
- sql_type = "INTEGER" # SQLite doesn't have BOOLEAN
431
- else:
432
- sql_type = "TEXT"
433
470
 
434
- columns_sql.append(f'"{col}" {sql_type}')
471
+ class DuckDBConnection(BaseDatabaseConnection):
472
+ """DuckDB database connection using duckdb Python API."""
435
473
 
436
- create_table_sql = (
437
- f'CREATE TABLE "{self.table_name}" ({", ".join(columns_sql)})'
438
- )
439
- await self._conn.execute(create_table_sql)
440
-
441
- # Insert data row by row
442
- placeholders = ", ".join(["?" for _ in df.columns])
443
- insert_sql = f'INSERT INTO "{self.table_name}" VALUES ({placeholders})'
444
-
445
- for _, row in df.iterrows():
446
- # Convert pandas values to Python native types
447
- values = []
448
- for val in row:
449
- if pd.isna(val):
450
- values.append(None)
451
- elif isinstance(val, (pd.Timestamp, pd.Timedelta)):
452
- values.append(str(val))
453
- else:
454
- values.append(val)
474
+ def __init__(self, connection_string: str):
475
+ super().__init__(connection_string)
476
+ if connection_string.startswith("duckdb:///"):
477
+ db_path = connection_string.replace("duckdb:///", "", 1)
478
+ elif connection_string.startswith("duckdb://"):
479
+ db_path = connection_string.replace("duckdb://", "", 1)
480
+ else:
481
+ db_path = connection_string
455
482
 
456
- await self._conn.execute(insert_sql, values)
483
+ self.database_path = db_path or ":memory:"
457
484
 
458
- await self._conn.commit()
459
- self._csv_loaded = True
485
+ async def get_pool(self):
486
+ """DuckDB creates connections per query, return database path."""
487
+ return self.database_path
460
488
 
461
- except Exception as e:
462
- raise ValueError(f"Error loading CSV file '{self.csv_path}': {str(e)}")
489
+ async def close(self):
490
+ """DuckDB connections are created per query, no persistent pool to close."""
491
+ pass
463
492
 
464
493
  async def execute_query(
465
494
  self, query: str, *args, timeout: float | None = None
@@ -470,29 +499,22 @@ class CSVConnection(BaseDatabaseConnection):
470
499
  ensuring no changes are persisted to the database.
471
500
  """
472
501
  effective_timeout = timeout or DEFAULT_QUERY_TIMEOUT
473
- conn = await self.get_pool()
474
502
 
475
- # Start transaction
476
- await conn.execute("BEGIN")
477
- try:
478
- # Execute query with client-side timeout (CSV uses in-memory SQLite)
479
- if effective_timeout:
480
- cursor = await asyncio.wait_for(
481
- conn.execute(query, args if args else ()), timeout=effective_timeout
482
- )
483
- rows = await asyncio.wait_for(
484
- cursor.fetchall(), timeout=effective_timeout
485
- )
486
- else:
487
- cursor = await conn.execute(query, args if args else ())
488
- rows = await cursor.fetchall()
503
+ args_tuple = tuple(args) if args else tuple()
489
504
 
490
- return [dict(row) for row in rows]
505
+ def _run_query() -> list[dict[str, Any]]:
506
+ conn = duckdb.connect(self.database_path)
507
+ try:
508
+ return _execute_duckdb_transaction(conn, query, args_tuple)
509
+ finally:
510
+ conn.close()
511
+
512
+ try:
513
+ return await asyncio.wait_for(
514
+ asyncio.to_thread(_run_query), timeout=effective_timeout
515
+ )
491
516
  except asyncio.TimeoutError as exc:
492
517
  raise QueryTimeoutError(effective_timeout or 0) from exc
493
- finally:
494
- # Always rollback to ensure no changes are committed
495
- await conn.rollback()
496
518
 
497
519
 
498
520
  def DatabaseConnection(connection_string: str) -> BaseDatabaseConnection:
@@ -503,6 +525,8 @@ def DatabaseConnection(connection_string: str) -> BaseDatabaseConnection:
503
525
  return MySQLConnection(connection_string)
504
526
  elif connection_string.startswith("sqlite:///"):
505
527
  return SQLiteConnection(connection_string)
528
+ elif connection_string.startswith("duckdb://"):
529
+ return DuckDBConnection(connection_string)
506
530
  elif connection_string.startswith("csv:///"):
507
531
  return CSVConnection(connection_string)
508
532
  else:
@@ -23,7 +23,7 @@ class ResolvedDatabase:
23
23
  connection_string: str # Canonical connection string for DatabaseConnection factory
24
24
 
25
25
 
26
- SUPPORTED_SCHEMES = {"postgresql", "mysql", "sqlite", "csv"}
26
+ SUPPORTED_SCHEMES = {"postgresql", "mysql", "sqlite", "duckdb", "csv"}
27
27
 
28
28
 
29
29
  def _is_connection_string(s: str) -> bool:
@@ -67,8 +67,8 @@ def resolve_database(
67
67
  scheme = urlparse(spec).scheme
68
68
  if scheme in {"postgresql", "mysql"}:
69
69
  db_name = urlparse(spec).path.lstrip("/") or "database"
70
- elif scheme in {"sqlite", "csv"}:
71
- db_name = Path(urlparse(spec).path).stem
70
+ elif scheme in {"sqlite", "duckdb", "csv"}:
71
+ db_name = Path(urlparse(spec).path).stem or "database"
72
72
  else: # should not happen because of SUPPORTED_SCHEMES
73
73
  db_name = "database"
74
74
  return ResolvedDatabase(name=db_name, connection_string=spec)
@@ -83,6 +83,10 @@ def resolve_database(
83
83
  if not path.exists():
84
84
  raise DatabaseResolutionError(f"SQLite file '{spec}' not found.")
85
85
  return ResolvedDatabase(name=path.stem, connection_string=f"sqlite:///{path}")
86
+ if path.suffix.lower() in {".duckdb", ".ddb"}:
87
+ if not path.exists():
88
+ raise DatabaseResolutionError(f"DuckDB file '{spec}' not found.")
89
+ return ResolvedDatabase(name=path.stem, connection_string=f"duckdb:///{path}")
86
90
 
87
91
  # 3. Must be a configured name
88
92
  db_cfg: DatabaseConfig | None = config_mgr.get_database(spec)