sqlnow-mcp 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sqlnow_mcp/db.py ADDED
@@ -0,0 +1,827 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import secrets
5
+ import threading
6
+ from datetime import date, datetime, time, timedelta
7
+ from decimal import Decimal
8
+ from pathlib import Path
9
+ from typing import Any, Literal
10
+
11
+ import duckdb
12
+
13
+ AttachMode = Literal["view", "load"]
14
+ DbType = Literal["POSTGRES", "SQLITE", "MYSQL"]
15
+
16
+ MAX_ROWS = 100_000
17
+ BACKGROUND_CHUNK_ROWS = 2_000
18
+
19
+
20
+ class DuckDBSessionError(Exception):
21
+ pass
22
+
23
+
24
+ class DuckDBSession:
25
+ def __init__(
26
+ self,
27
+ data_dir: Path,
28
+ allow_paths: list[Path] | tuple[Path, ...] = (),
29
+ allow_external: bool = False,
30
+ *,
31
+ read_only: bool = False,
32
+ query_timeout_sec: float | None = None,
33
+ ) -> None:
34
+ self.data_dir = data_dir.resolve()
35
+ resolved_allow = tuple(p.resolve() for p in allow_paths)
36
+ for path in resolved_allow:
37
+ if path in {Path("/"), Path()}:
38
+ raise DuckDBSessionError(
39
+ f"Unsafe allow_paths entry {path!s} (would allow any file on disk)"
40
+ )
41
+ self.allow_paths = resolved_allow
42
+ self.allow_external = allow_external
43
+ self.read_only = read_only
44
+ self.query_timeout_sec = query_timeout_sec
45
+ self.native_tables_only = False
46
+ self.conn: duckdb.DuckDBPyConnection | None = None
47
+ self.active_db: Path | None = None
48
+ self.attachments: list[dict[str, Any]] = []
49
+ self.failed_attachments: list[dict[str, Any]] = []
50
+ self._query_view_ids: list[str] = []
51
+ self._query_meta: dict[str, dict[str, Any]] = {}
52
+
53
+ def use_memory(self) -> None:
54
+ self._close_connection()
55
+ self.conn = duckdb.connect()
56
+ self._install_extensions(self.conn)
57
+ self.active_db = None
58
+ self.attachments = []
59
+ self.failed_attachments = []
60
+
61
+ def use_database(self, db_name: str) -> dict[str, Any]:
62
+ db_path = self._resolve_db_path(db_name)
63
+ if not db_path.exists():
64
+ raise DuckDBSessionError(f"Database not found: {db_name}")
65
+
66
+ self._close_connection()
67
+ self.conn = duckdb.connect(str(db_path))
68
+ self._install_extensions(self.conn)
69
+ self.active_db = db_path
70
+ self.attachments = []
71
+ self.failed_attachments = []
72
+
73
+ sidecar = self._load_sidecar(db_path)
74
+ for entry in sidecar.get("attachments", []):
75
+ try:
76
+ if not self._is_attached(entry["name"]):
77
+ self._attach_from_sidecar(entry)
78
+ self.attachments.append(entry)
79
+ except Exception as exc:
80
+ failed = dict(entry)
81
+ failed["error"] = str(exc)
82
+ self.failed_attachments.append(failed)
83
+
84
+ return {
85
+ "name": db_path.stem,
86
+ "path": str(db_path),
87
+ "tables": self._list_table_names(),
88
+ "attachments": list(self.attachments),
89
+ "failed_attachments": list(self.failed_attachments),
90
+ }
91
+
92
+ def open_publish_database(
93
+ self,
94
+ db_path: Path,
95
+ *,
96
+ memory_limit: str,
97
+ threads: int,
98
+ max_temp_directory_size: str | None = None,
99
+ ) -> dict[str, Any]:
100
+ resolved = db_path.resolve()
101
+ if not resolved.exists():
102
+ raise DuckDBSessionError(f"Database not found: {resolved}")
103
+
104
+ self._close_connection()
105
+ self.conn = duckdb.connect(str(resolved), read_only=True)
106
+ self._install_extensions(self.conn)
107
+ self.active_db = resolved
108
+ self.attachments = []
109
+ self.failed_attachments = []
110
+ self.native_tables_only = True
111
+
112
+ self._reject_publish_attachments(resolved)
113
+
114
+ escaped_memory = self._escape_sql_string(memory_limit)
115
+ self._run_sql(f"SET memory_limit = '{escaped_memory}'")
116
+ self._run_sql(f"SET threads = {int(threads)}")
117
+ if max_temp_directory_size:
118
+ escaped_temp = self._escape_sql_string(max_temp_directory_size)
119
+ self._run_sql(f"SET max_temp_directory_size = '{escaped_temp}'")
120
+
121
+ return {
122
+ "name": resolved.stem,
123
+ "path": str(resolved),
124
+ "tables": self._list_table_names(native_only=True),
125
+ "read_only": True,
126
+ }
127
+
128
+ def create_database(self, db_name: str) -> dict[str, Any]:
129
+ self.data_dir.mkdir(parents=True, exist_ok=True)
130
+ db_path = self._resolve_db_path(db_name)
131
+ name = db_path.stem
132
+ if not name or name in {".", ".."}:
133
+ raise DuckDBSessionError(f"Invalid database name: {db_name}")
134
+ if not self._is_under_root(db_path, self.data_dir):
135
+ raise DuckDBSessionError(f"Invalid database name: {db_name}")
136
+ if db_path.exists():
137
+ raise DuckDBSessionError(f"Database already exists: {name}")
138
+
139
+ conn = duckdb.connect(str(db_path))
140
+ try:
141
+ self._install_extensions(conn)
142
+ finally:
143
+ conn.close()
144
+
145
+ return {"name": name, "path": str(db_path)}
146
+
147
+ def list_databases(self) -> list[dict[str, Any]]:
148
+ self.data_dir.mkdir(parents=True, exist_ok=True)
149
+ results: list[dict[str, Any]] = []
150
+ for db_path in sorted(self.data_dir.glob("*.db")):
151
+ stat = db_path.stat()
152
+ results.append(
153
+ {
154
+ "name": db_path.stem,
155
+ "path": str(db_path),
156
+ "size_mb": round(stat.st_size / (1024 * 1024), 3),
157
+ "last_modified": datetime.fromtimestamp(stat.st_mtime).isoformat(),
158
+ }
159
+ )
160
+ return results
161
+
162
+ def current_database(self) -> dict[str, Any]:
163
+ if self.conn is None:
164
+ return {
165
+ "name": None,
166
+ "path": None,
167
+ "in_memory": False,
168
+ "attachments": [],
169
+ "failed_attachments": [],
170
+ }
171
+ return {
172
+ "name": self.active_db.stem if self.active_db else None,
173
+ "path": str(self.active_db) if self.active_db else None,
174
+ "in_memory": self.active_db is None,
175
+ "attachments": list(self.attachments),
176
+ "failed_attachments": list(self.failed_attachments),
177
+ }
178
+
179
+ def attach_file(
180
+ self,
181
+ path: str,
182
+ name: str | None = None,
183
+ mode: AttachMode = "view",
184
+ ) -> dict[str, Any]:
185
+ conn = self._require_conn()
186
+ resolved = self._resolve_allowed_path(path)
187
+ if not resolved.exists():
188
+ raise DuckDBSessionError(f"File not found: {path}")
189
+
190
+ table_name = name or resolved.stem
191
+ sql = self._file_attach_sql(resolved, table_name, mode)
192
+ conn.execute(sql)
193
+ return {"name": table_name, "path": str(resolved), "mode": mode}
194
+
195
+ def attach_database(
196
+ self,
197
+ connection_string: str,
198
+ name: str,
199
+ tables: list[str] | None = None,
200
+ ) -> dict[str, Any]:
201
+ if not self.allow_external:
202
+ raise DuckDBSessionError("External database attachments are disabled")
203
+
204
+ conn = self._require_conn()
205
+ db_type, attach_str = self._parse_database_connection(connection_string)
206
+ sql = (
207
+ f"ATTACH '{self._escape_sql_string(attach_str)}' "
208
+ f"AS {self._quote_ident(name)} (TYPE {db_type})"
209
+ )
210
+ conn.execute(sql)
211
+
212
+ entry: dict[str, Any] = {
213
+ "name": name,
214
+ "type": db_type,
215
+ "connection_string": connection_string,
216
+ }
217
+ if tables:
218
+ entry["tables"] = tables
219
+
220
+ self.attachments.append(entry)
221
+ self._save_sidecar()
222
+ return entry
223
+
224
+ def detach_source(self, name: str) -> dict[str, Any]:
225
+ conn = self._require_conn()
226
+ attachment = next((a for a in self.attachments if a["name"] == name), None)
227
+ if attachment is not None:
228
+ conn.execute(f"DETACH {self._quote_ident(name)}")
229
+ self.attachments = [a for a in self.attachments if a["name"] != name]
230
+ self._save_sidecar()
231
+ return {"name": name, "detached": True, "type": "database"}
232
+
233
+ quoted = self._quote_ident(name)
234
+ conn.execute(f"DROP VIEW IF EXISTS {quoted}")
235
+ conn.execute(f"DROP TABLE IF EXISTS {quoted}")
236
+ return {"name": name, "detached": True, "type": "file"}
237
+
238
+ def run_mutating_query(self, sql: str) -> dict[str, Any]:
239
+ result = self._run_sql(sql.strip().rstrip(";"))
240
+ return self._format_execution_result(result)
241
+
242
+ def run_query(self, sql: str, limit: int = 500) -> dict[str, Any]:
243
+ if self._is_mutating_sql(sql):
244
+ return self.run_mutating_query(sql)
245
+
246
+ result = self._run_sql(sql)
247
+ if not result.description:
248
+ return {"headers": [], "rows": [], "types": []}
249
+ headers = [col[0] for col in result.description]
250
+ types = [str(col[1]) for col in result.description]
251
+ rows: list[list[str]] = []
252
+ for row in result.fetchmany(limit):
253
+ rows.append([self._stringify_value(v) for v in row])
254
+ return {"headers": headers, "rows": rows, "types": types}
255
+
256
+ def run_select_query(self, sql: str, limit: int = 500) -> dict[str, Any]:
257
+ """Create a TEMP VIEW for the SELECT and return the first page for the table UI."""
258
+ if self._is_mutating_sql(sql):
259
+ raise DuckDBSessionError(
260
+ "run_select_query requires a SELECT statement; use run_query for DDL/DML"
261
+ )
262
+
263
+ query_id = self._new_query_id()
264
+ view_name = self._query_view_name(query_id)
265
+ stripped = sql.strip().rstrip(";")
266
+ self._run_sql(f"CREATE OR REPLACE TEMP VIEW {view_name} AS ({stripped})")
267
+ self._query_view_ids.append(query_id)
268
+
269
+ total_rows = self._count_query_rows(query_id)
270
+ self._query_meta.setdefault(query_id, {})["total_rows"] = total_rows
271
+
272
+ page_limit = min(int(limit), MAX_ROWS)
273
+ page = self._fetch_query_page(query_id, offset=0, limit=page_limit)
274
+ has_more = page["has_more"]
275
+ return {
276
+ "query_id": query_id,
277
+ "sql": stripped,
278
+ "columns": page["columns"],
279
+ "types": page["types"],
280
+ "rows": page["rows"],
281
+ "offset": 0,
282
+ "total_rows": total_rows,
283
+ "row_cap": MAX_ROWS,
284
+ "has_more": has_more,
285
+ "loading": has_more,
286
+ "complete": not has_more,
287
+ "capped": False,
288
+ }
289
+
290
+ def fetch_table_page(
291
+ self, query_id: str, offset: int, limit: int = BACKGROUND_CHUNK_ROWS
292
+ ) -> dict[str, Any]:
293
+ if query_id not in self._query_view_ids:
294
+ raise DuckDBSessionError(f"Unknown query_id: {query_id}")
295
+
296
+ offset = int(offset)
297
+ if offset >= MAX_ROWS:
298
+ meta = self._query_meta[query_id]
299
+ return {
300
+ "query_id": query_id,
301
+ "columns": meta["columns"],
302
+ "types": meta["types"],
303
+ "rows": [],
304
+ "offset": offset,
305
+ "total_rows": meta.get("total_rows"),
306
+ "row_cap": MAX_ROWS,
307
+ "has_more": False,
308
+ "complete": True,
309
+ "capped": True,
310
+ }
311
+
312
+ page_limit = min(int(limit), MAX_ROWS - offset, BACKGROUND_CHUNK_ROWS)
313
+ page = self._fetch_query_page(query_id, offset=offset, limit=page_limit)
314
+ loaded = offset + len(page["rows"])
315
+ capped = loaded >= MAX_ROWS and page["has_more"]
316
+ if capped:
317
+ page["has_more"] = False
318
+ meta = self._query_meta.get(query_id, {})
319
+ return {
320
+ "query_id": query_id,
321
+ "columns": page["columns"],
322
+ "types": page["types"],
323
+ "rows": page["rows"],
324
+ "offset": offset,
325
+ "total_rows": meta.get("total_rows"),
326
+ "row_cap": MAX_ROWS,
327
+ "has_more": page["has_more"],
328
+ "complete": not page["has_more"],
329
+ "capped": capped,
330
+ }
331
+
332
+ def list_tables(self, *, native_only: bool | None = None) -> list[dict[str, Any]]:
333
+ if native_only is None:
334
+ native_only = self.native_tables_only
335
+ return self._list_tables(native_only=native_only)
336
+
337
+ def describe_table(
338
+ self, table_name: str, *, native_only: bool | None = None
339
+ ) -> dict[str, Any]:
340
+ if native_only is None:
341
+ native_only = self.native_tables_only
342
+ return self._describe_table(table_name, native_only=native_only)
343
+
344
+ def sample_table(self, table_name: str, n: int = 10) -> dict[str, Any]:
345
+ quoted = self._quote_table_ref(table_name)
346
+ sql = f"SELECT * FROM {quoted} LIMIT {int(n)}"
347
+ result = self.run_query(sql, limit=int(n))
348
+ return {
349
+ "table": table_name,
350
+ "sql": sql,
351
+ "rows": [dict(zip(result["headers"], row, strict=True)) for row in result["rows"]],
352
+ }
353
+
354
+ def profile_table(self, table_name: str, sample_threshold: int = 100_000) -> dict[str, Any]:
355
+ quoted = self._quote_table_ref(table_name)
356
+ count_row = self._run_sql(f"SELECT COUNT(*) FROM {quoted}").fetchone()
357
+ if count_row is None:
358
+ raise DuckDBSessionError(f"Could not count rows for table: {table_name}")
359
+ count = count_row[0]
360
+ if count <= sample_threshold:
361
+ sql = f"SUMMARIZE {quoted}"
362
+ sampled = False
363
+ else:
364
+ pct = min(100 * sample_threshold / count, 100)
365
+ sql = (
366
+ f"SUMMARIZE SELECT * FROM {quoted} "
367
+ f"USING SAMPLE {pct:.2f} PERCENT (bernoulli)"
368
+ )
369
+ sampled = True
370
+ result = self._run_sql(sql)
371
+ headers = [col[0] for col in result.description]
372
+ profile = [
373
+ dict(zip(headers, row, strict=True)) for row in result.fetchall()
374
+ ]
375
+ return {"sampled": sampled, "row_count": count, "profile": profile}
376
+
377
+ def value_counts(self, table_name: str, column_name: str, limit: int = 20) -> dict[str, Any]:
378
+ quoted_table = self._quote_table_ref(table_name)
379
+ quoted_col = self._quote_ident(column_name)
380
+ sql = (
381
+ f"SELECT {quoted_col} AS value, COUNT(*) AS count "
382
+ f"FROM {quoted_table} "
383
+ f"GROUP BY {quoted_col} "
384
+ f"ORDER BY count DESC "
385
+ f"LIMIT {int(limit)}"
386
+ )
387
+ result = self.run_query(sql, limit=limit)
388
+ rows = [dict(zip(result["headers"], row, strict=True)) for row in result["rows"]]
389
+ return {
390
+ "table": table_name,
391
+ "column": column_name,
392
+ "sql": sql,
393
+ "rows": rows,
394
+ }
395
+
396
+ def _format_execution_result(self, result: Any) -> dict[str, Any]:
397
+ if not result.description:
398
+ return {"ok": True, "message": "Statement executed successfully."}
399
+
400
+ headers = [col[0] for col in result.description]
401
+ record_rows = [
402
+ dict(
403
+ zip(
404
+ headers,
405
+ [self._stringify_value(v) for v in row],
406
+ strict=True,
407
+ )
408
+ )
409
+ for row in result.fetchall()
410
+ ]
411
+
412
+ if not record_rows:
413
+ return {"ok": True, "message": "Statement executed successfully."}
414
+
415
+ if len(headers) == 1 and len(record_rows) == 1:
416
+ column = headers[0]
417
+ value = record_rows[0][column]
418
+ if column.lower() == "count" and value.isdigit():
419
+ count = int(value)
420
+ return {
421
+ "ok": True,
422
+ "message": f"{count:,} row(s) affected.",
423
+ "rows_affected": count,
424
+ }
425
+ if column.lower() == "success":
426
+ return {
427
+ "ok": True,
428
+ "message": "Statement executed successfully.",
429
+ }
430
+
431
+ if len(record_rows) == 1 and len(headers) <= 8:
432
+ parts = [f"{column}: {record_rows[0][column]}" for column in headers]
433
+ return {
434
+ "ok": True,
435
+ "message": "; ".join(parts),
436
+ "columns": headers,
437
+ "rows": record_rows,
438
+ }
439
+
440
+ if len(headers) <= 12 and len(record_rows) <= 20:
441
+ lines = ["\t".join(headers)]
442
+ for row in record_rows:
443
+ lines.append("\t".join(row[column] for column in headers))
444
+ return {
445
+ "ok": True,
446
+ "message": "\n".join(lines),
447
+ "columns": headers,
448
+ "rows": record_rows,
449
+ }
450
+
451
+ return {
452
+ "ok": True,
453
+ "message": f"Statement returned {len(record_rows):,} row(s).",
454
+ "columns": headers,
455
+ "rows": record_rows,
456
+ }
457
+
458
+ @staticmethod
459
+ def _is_mutating_sql(sql: str) -> bool:
460
+ stripped = sql.strip().rstrip(";")
461
+ upper = stripped.upper()
462
+ mutating_prefixes = (
463
+ "CREATE",
464
+ "DROP",
465
+ "ALTER",
466
+ "INSERT",
467
+ "UPDATE",
468
+ "DELETE",
469
+ "COPY",
470
+ "ATTACH",
471
+ "DETACH",
472
+ "INSTALL",
473
+ "LOAD",
474
+ "SET",
475
+ "PRAGMA",
476
+ )
477
+ return any(upper.startswith(prefix) for prefix in mutating_prefixes)
478
+
479
+ def _require_conn(self) -> duckdb.DuckDBPyConnection:
480
+ if self.conn is None:
481
+ raise DuckDBSessionError("No active database connection")
482
+ return self.conn
483
+
484
+ def _run_sql(self, sql: str, parameters: list[Any] | None = None) -> Any:
485
+ conn = self._require_conn()
486
+ if self.query_timeout_sec is None:
487
+ if parameters is None:
488
+ return conn.execute(sql)
489
+ return conn.execute(sql, parameters)
490
+
491
+ timer: threading.Timer | None = None
492
+ try:
493
+ timer = threading.Timer(self.query_timeout_sec, conn.interrupt)
494
+ timer.start()
495
+ if parameters is None:
496
+ return conn.execute(sql)
497
+ return conn.execute(sql, parameters)
498
+ except duckdb.InterruptException as exc:
499
+ raise DuckDBSessionError(
500
+ f"Query timed out after {self.query_timeout_sec:g}s"
501
+ ) from exc
502
+ finally:
503
+ if timer is not None:
504
+ timer.cancel()
505
+
506
+ def _close_connection(self) -> None:
507
+ if self.conn is not None:
508
+ self.conn.close()
509
+ self.conn = None
510
+ self._query_view_ids = []
511
+ self._query_meta = {}
512
+
513
+ @staticmethod
514
+ def _new_query_id() -> str:
515
+ return secrets.token_hex(6)
516
+
517
+ def _query_view_name(self, query_id: str) -> str:
518
+ return f"__sqlnow_q_{query_id}"
519
+
520
+ def _count_query_rows(self, query_id: str) -> int:
521
+ quoted = self._quote_ident(self._query_view_name(query_id))
522
+ row = self._run_sql(f"SELECT COUNT(*) FROM {quoted}").fetchone()
523
+ if row is None:
524
+ raise DuckDBSessionError(f"Could not count rows for query: {query_id}")
525
+ return int(row[0])
526
+
527
+ def _fetch_query_page(
528
+ self, query_id: str, offset: int, limit: int
529
+ ) -> dict[str, Any]:
530
+ view_name = self._query_view_name(query_id)
531
+ quoted = self._quote_ident(view_name)
532
+ result = self._run_sql(
533
+ f"SELECT * FROM {quoted} LIMIT {int(limit)} OFFSET {int(offset)}"
534
+ )
535
+ if not result.description:
536
+ meta = self._query_meta.get(query_id, {})
537
+ return {
538
+ "columns": meta.get("columns", []),
539
+ "types": meta.get("types", []),
540
+ "rows": [],
541
+ "has_more": False,
542
+ }
543
+
544
+ columns = [col[0] for col in result.description]
545
+ types = [str(col[1]) for col in result.description]
546
+ meta = self._query_meta.setdefault(query_id, {})
547
+ meta["columns"] = columns
548
+ meta["types"] = types
549
+
550
+ record_rows: list[dict[str, str]] = []
551
+ for row in result.fetchall():
552
+ record_rows.append(
553
+ dict(
554
+ zip(
555
+ columns,
556
+ [self._stringify_value(v) for v in row],
557
+ strict=True,
558
+ )
559
+ )
560
+ )
561
+
562
+ next_offset = offset + len(record_rows)
563
+ if len(record_rows) < limit or next_offset >= MAX_ROWS:
564
+ has_more = False
565
+ else:
566
+ peek = self._run_sql(
567
+ f"SELECT 1 FROM {quoted} LIMIT 1 OFFSET {next_offset}"
568
+ ).fetchone()
569
+ has_more = peek is not None
570
+ return {
571
+ "columns": columns,
572
+ "types": types,
573
+ "rows": record_rows,
574
+ "has_more": has_more,
575
+ }
576
+
577
+ def _resolve_db_path(self, db_name: str) -> Path:
578
+ name = db_name.removesuffix(".db")
579
+ return (self.data_dir / f"{name}.db").resolve()
580
+
581
+ def _sidecar_path(self, db_path: Path | None = None) -> Path | None:
582
+ path = db_path or self.active_db
583
+ if path is None:
584
+ return None
585
+ return path.with_suffix(".db.json")
586
+
587
+ def _load_sidecar(self, db_path: Path) -> dict[str, Any]:
588
+ sidecar = self._sidecar_path(db_path)
589
+ if sidecar is None or not sidecar.exists():
590
+ return {"attachments": []}
591
+ return json.loads(sidecar.read_text(encoding="utf-8"))
592
+
593
+ def _save_sidecar(self) -> None:
594
+ sidecar = self._sidecar_path()
595
+ if sidecar is None:
596
+ return
597
+ payload = {"attachments": self.attachments}
598
+ sidecar.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
599
+
600
+ def _is_attached(self, name: str) -> bool:
601
+ conn = self._require_conn()
602
+ rows = conn.execute(
603
+ "SELECT 1 FROM duckdb_databases() WHERE database_name = ?",
604
+ [name],
605
+ ).fetchall()
606
+ return bool(rows)
607
+
608
+ def _attach_from_sidecar(self, entry: dict[str, Any]) -> None:
609
+ conn = self._require_conn()
610
+ db_type = entry["type"]
611
+ name = entry["name"]
612
+ connection_string = entry["connection_string"]
613
+ _, attach_str = self._parse_database_connection(connection_string)
614
+ sql = (
615
+ f"ATTACH '{self._escape_sql_string(attach_str)}' "
616
+ f"AS {self._quote_ident(name)} (TYPE {db_type})"
617
+ )
618
+ conn.execute(sql)
619
+
620
+ @staticmethod
621
+ def _install_extensions(conn: duckdb.DuckDBPyConnection) -> None:
622
+ conn.execute(
623
+ """
624
+ INSTALL parquet; LOAD parquet;
625
+ INSTALL httpfs; LOAD httpfs;
626
+ INSTALL aws; LOAD aws;
627
+ INSTALL postgres; LOAD postgres;
628
+ INSTALL sqlite; LOAD sqlite;
629
+ INSTALL mysql; LOAD mysql;
630
+ INSTALL json; LOAD json;
631
+ INSTALL excel; LOAD excel;
632
+ SET GLOBAL sqlite_all_varchar = true;
633
+ """
634
+ )
635
+
636
+ def _resolve_allowed_path(self, path: str) -> Path:
637
+ candidate = Path(path).expanduser()
638
+ if not candidate.is_absolute():
639
+ candidate = (self.data_dir / candidate).resolve()
640
+ else:
641
+ candidate = candidate.resolve()
642
+
643
+ allowed_roots = (self.data_dir, *self.allow_paths)
644
+ if not any(self._is_under_root(candidate, root) for root in allowed_roots):
645
+ raise DuckDBSessionError(f"Path not allowed: {path}")
646
+ return candidate
647
+
648
+ @staticmethod
649
+ def _is_under_root(path: Path, root: Path) -> bool:
650
+ try:
651
+ path.relative_to(root.resolve() if hasattr(root, 'resolve') else root)
652
+ return True
653
+ except ValueError:
654
+ return False
655
+
656
+ @staticmethod
657
+ def _escape_sql_string(value: str) -> str:
658
+ return value.replace("'", "''")
659
+
660
+ @staticmethod
661
+ def _quote_ident(name: str) -> str:
662
+ escaped = name.replace('"', '""')
663
+ return f'"{escaped}"'
664
+
665
+ def _quote_table_ref(self, table_name: str) -> str:
666
+ if "." in table_name:
667
+ parts = table_name.split(".")
668
+ return ".".join(self._quote_ident(part) for part in parts)
669
+ return self._quote_ident(table_name)
670
+
671
+ @staticmethod
672
+ def _parse_database_connection(connection_string: str) -> tuple[DbType, str]:
673
+ if connection_string.startswith(("postgresql://", "postgres://")):
674
+ return "POSTGRES", connection_string
675
+ if connection_string.startswith("mysql://"):
676
+ return "MYSQL", connection_string
677
+ if connection_string.startswith("sqlite://"):
678
+ return "SQLITE", connection_string.replace("sqlite://", "", 1)
679
+ if connection_string.endswith((".db", ".sqlite")):
680
+ return "SQLITE", connection_string
681
+ raise DuckDBSessionError(f"Unsupported database connection string: {connection_string}")
682
+
683
+ def _file_reader_sql(self, path: Path) -> str:
684
+ """Build a DuckDB table function for a supported file type."""
685
+ # Future: tiered JSON attach — try read_json/read_ndjson first (here); on failure
686
+ # or attach_file(flatten=True), use the Python `flatterer` package (same approach
687
+ # as querier/libsqlnow json.rs) to flatten nested JSON into multiple CSV tables
688
+ # and COPY into DuckDB. That gives IATI-style multi-table loads; not needed for
689
+ # flat JSON arrays.
690
+ path_sql = self._escape_sql_string(str(path))
691
+ suffix = path.suffix.lower()
692
+ if suffix == ".csv":
693
+ return f"read_csv('{path_sql}', header = true)"
694
+ if suffix == ".parquet":
695
+ return f"read_parquet('{path_sql}')"
696
+ if suffix == ".json":
697
+ return f"read_json('{path_sql}')"
698
+ if suffix == ".jsonl":
699
+ return f"read_ndjson('{path_sql}')"
700
+ if suffix == ".xlsx":
701
+ return f"read_xlsx('{path_sql}', header = true)"
702
+ raise DuckDBSessionError(f"Unsupported file type: {suffix}")
703
+
704
+ def _file_attach_sql(self, path: Path, name: str, mode: AttachMode) -> str:
705
+ reader = self._file_reader_sql(path)
706
+ quoted = self._quote_ident(name)
707
+ object_type = "VIEW" if mode == "view" else "TABLE"
708
+ return f"CREATE OR REPLACE {object_type} {quoted} AS SELECT * FROM {reader}"
709
+
710
+ def _list_table_names(self, *, native_only: bool = False) -> list[str]:
711
+ return [t["name"] for t in self._list_tables(native_only=native_only)]
712
+
713
+ def _list_tables(self, *, native_only: bool = False) -> list[dict[str, Any]]:
714
+ conn = self._require_conn()
715
+ main_catalog = self.active_db.stem if self.active_db else None
716
+ tables_sql = """
717
+ SELECT table_catalog, table_schema, table_name
718
+ FROM information_schema.tables
719
+ WHERE table_schema NOT IN ('information_schema', 'pg_catalog')
720
+ ORDER BY table_catalog, table_schema, table_name
721
+ """
722
+ columns_sql = """
723
+ SELECT table_catalog, table_schema, table_name, column_name, data_type
724
+ FROM information_schema.columns
725
+ WHERE table_schema NOT IN ('information_schema', 'pg_catalog')
726
+ ORDER BY table_catalog, table_schema, table_name, column_name
727
+ """
728
+ table_rows = conn.execute(tables_sql).fetchall()
729
+ column_rows = conn.execute(columns_sql).fetchall()
730
+
731
+ columns_by_table: dict[tuple[str, str, str], list[dict[str, str]]] = {}
732
+ for catalog, schema, table, column_name, data_type in column_rows:
733
+ key = (catalog, schema, table)
734
+ columns_by_table.setdefault(key, []).append(
735
+ {"name": column_name, "type": data_type}
736
+ )
737
+
738
+ results: list[dict[str, Any]] = []
739
+ for catalog, schema, table in table_rows:
740
+ if native_only and main_catalog and catalog != main_catalog:
741
+ continue
742
+ if not self._table_visible(catalog, schema, table):
743
+ continue
744
+ display_name = self._display_table_name(catalog, schema, table)
745
+ key = (catalog, schema, table)
746
+ columns = columns_by_table.get(key, [])
747
+ if not columns:
748
+ continue
749
+ results.append({"name": display_name, "columns": columns})
750
+ return results
751
+
752
+ def _describe_table(self, table_name: str, *, native_only: bool = False) -> dict[str, Any]:
753
+ tables = self._list_tables(native_only=native_only)
754
+ match = next((t for t in tables if t["name"] == table_name), None)
755
+ if match is None:
756
+ raise DuckDBSessionError(f"Table not found: {table_name}")
757
+ return match
758
+
759
+ def _reject_publish_attachments(self, db_path: Path) -> None:
760
+ sidecar = self._load_sidecar(db_path)
761
+ attachments = sidecar.get("attachments") or []
762
+ if attachments:
763
+ names = ", ".join(entry["name"] for entry in attachments)
764
+ sidecar_name = db_path.with_suffix(".db.json").name
765
+ raise DuckDBSessionError(
766
+ "Publish mode does not support attached databases; "
767
+ f"remove entries from {sidecar_name}: {names}"
768
+ )
769
+
770
+ main_catalog = db_path.stem
771
+ rows = self._run_sql(
772
+ """
773
+ SELECT database_name
774
+ FROM duckdb_databases()
775
+ WHERE database_name NOT IN ('system', 'temp', ?)
776
+ """,
777
+ [main_catalog],
778
+ ).fetchall()
779
+ if rows:
780
+ names = ", ".join(row[0] for row in rows)
781
+ raise DuckDBSessionError(
782
+ "Publish mode does not support attached databases; "
783
+ f"found attached catalog(s): {names}"
784
+ )
785
+
786
+ def _table_visible(self, catalog: str, schema: str, table: str) -> bool:
787
+ attachment = next((a for a in self.attachments if a["name"] == catalog), None)
788
+ if attachment is None:
789
+ return True
790
+ allowed = attachment.get("tables")
791
+ if not allowed:
792
+ return True
793
+ return table in allowed
794
+
795
+ def _display_table_name(self, catalog: str, schema: str, table: str) -> str:
796
+ attachment = next((a for a in self.attachments if a["name"] == catalog), None)
797
+ if attachment is None:
798
+ if schema == "main":
799
+ return table
800
+ return f"{schema}.{table}"
801
+
802
+ db_type = attachment["type"]
803
+ if db_type == "POSTGRES":
804
+ if schema == "public":
805
+ return f"{catalog}.{table}"
806
+ return f"{catalog}.{schema}.{table}"
807
+ if db_type == "SQLITE":
808
+ if schema == "main":
809
+ return f"{catalog}.{table}"
810
+ return f"{catalog}.{schema}.{table}"
811
+ return f"{catalog}.{table}"
812
+
813
+ @staticmethod
814
+ def _stringify_value(value: Any) -> str:
815
+ if value is None:
816
+ return ""
817
+ if isinstance(value, bool):
818
+ return str(value).lower()
819
+ if isinstance(value, (int, float, Decimal)):
820
+ return str(value)
821
+ if isinstance(value, (datetime, date, time, timedelta)):
822
+ return str(value)
823
+ if isinstance(value, bytes):
824
+ return value.decode("utf-8", errors="replace")
825
+ if isinstance(value, (list, dict, tuple)):
826
+ return str(value)
827
+ return str(value)