sqlnow-mcp 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlnow_mcp/__init__.py +3 -0
- sqlnow_mcp/cli.py +545 -0
- sqlnow_mcp/config.py +100 -0
- sqlnow_mcp/db.py +827 -0
- sqlnow_mcp/metadata.py +668 -0
- sqlnow_mcp/resource_limits.py +37 -0
- sqlnow_mcp/server.py +445 -0
- sqlnow_mcp/table_result.py +246 -0
- sqlnow_mcp/ui/table.html +355 -0
- sqlnow_mcp/ui.py +21 -0
- sqlnow_mcp-0.1.0.dist-info/METADATA +341 -0
- sqlnow_mcp-0.1.0.dist-info/RECORD +15 -0
- sqlnow_mcp-0.1.0.dist-info/WHEEL +4 -0
- sqlnow_mcp-0.1.0.dist-info/entry_points.txt +3 -0
- sqlnow_mcp-0.1.0.dist-info/licenses/LICENSE +21 -0
sqlnow_mcp/db.py
ADDED
|
@@ -0,0 +1,827 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import secrets
|
|
5
|
+
import threading
|
|
6
|
+
from datetime import date, datetime, time, timedelta
|
|
7
|
+
from decimal import Decimal
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any, Literal
|
|
10
|
+
|
|
11
|
+
import duckdb
|
|
12
|
+
|
|
13
|
+
AttachMode = Literal["view", "load"]
|
|
14
|
+
DbType = Literal["POSTGRES", "SQLITE", "MYSQL"]
|
|
15
|
+
|
|
16
|
+
MAX_ROWS = 100_000
|
|
17
|
+
BACKGROUND_CHUNK_ROWS = 2_000
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class DuckDBSessionError(Exception):
|
|
21
|
+
pass
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class DuckDBSession:
|
|
25
|
+
def __init__(
|
|
26
|
+
self,
|
|
27
|
+
data_dir: Path,
|
|
28
|
+
allow_paths: list[Path] | tuple[Path, ...] = (),
|
|
29
|
+
allow_external: bool = False,
|
|
30
|
+
*,
|
|
31
|
+
read_only: bool = False,
|
|
32
|
+
query_timeout_sec: float | None = None,
|
|
33
|
+
) -> None:
|
|
34
|
+
self.data_dir = data_dir.resolve()
|
|
35
|
+
resolved_allow = tuple(p.resolve() for p in allow_paths)
|
|
36
|
+
for path in resolved_allow:
|
|
37
|
+
if path in {Path("/"), Path()}:
|
|
38
|
+
raise DuckDBSessionError(
|
|
39
|
+
f"Unsafe allow_paths entry {path!s} (would allow any file on disk)"
|
|
40
|
+
)
|
|
41
|
+
self.allow_paths = resolved_allow
|
|
42
|
+
self.allow_external = allow_external
|
|
43
|
+
self.read_only = read_only
|
|
44
|
+
self.query_timeout_sec = query_timeout_sec
|
|
45
|
+
self.native_tables_only = False
|
|
46
|
+
self.conn: duckdb.DuckDBPyConnection | None = None
|
|
47
|
+
self.active_db: Path | None = None
|
|
48
|
+
self.attachments: list[dict[str, Any]] = []
|
|
49
|
+
self.failed_attachments: list[dict[str, Any]] = []
|
|
50
|
+
self._query_view_ids: list[str] = []
|
|
51
|
+
self._query_meta: dict[str, dict[str, Any]] = {}
|
|
52
|
+
|
|
53
|
+
def use_memory(self) -> None:
|
|
54
|
+
self._close_connection()
|
|
55
|
+
self.conn = duckdb.connect()
|
|
56
|
+
self._install_extensions(self.conn)
|
|
57
|
+
self.active_db = None
|
|
58
|
+
self.attachments = []
|
|
59
|
+
self.failed_attachments = []
|
|
60
|
+
|
|
61
|
+
def use_database(self, db_name: str) -> dict[str, Any]:
|
|
62
|
+
db_path = self._resolve_db_path(db_name)
|
|
63
|
+
if not db_path.exists():
|
|
64
|
+
raise DuckDBSessionError(f"Database not found: {db_name}")
|
|
65
|
+
|
|
66
|
+
self._close_connection()
|
|
67
|
+
self.conn = duckdb.connect(str(db_path))
|
|
68
|
+
self._install_extensions(self.conn)
|
|
69
|
+
self.active_db = db_path
|
|
70
|
+
self.attachments = []
|
|
71
|
+
self.failed_attachments = []
|
|
72
|
+
|
|
73
|
+
sidecar = self._load_sidecar(db_path)
|
|
74
|
+
for entry in sidecar.get("attachments", []):
|
|
75
|
+
try:
|
|
76
|
+
if not self._is_attached(entry["name"]):
|
|
77
|
+
self._attach_from_sidecar(entry)
|
|
78
|
+
self.attachments.append(entry)
|
|
79
|
+
except Exception as exc:
|
|
80
|
+
failed = dict(entry)
|
|
81
|
+
failed["error"] = str(exc)
|
|
82
|
+
self.failed_attachments.append(failed)
|
|
83
|
+
|
|
84
|
+
return {
|
|
85
|
+
"name": db_path.stem,
|
|
86
|
+
"path": str(db_path),
|
|
87
|
+
"tables": self._list_table_names(),
|
|
88
|
+
"attachments": list(self.attachments),
|
|
89
|
+
"failed_attachments": list(self.failed_attachments),
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
def open_publish_database(
|
|
93
|
+
self,
|
|
94
|
+
db_path: Path,
|
|
95
|
+
*,
|
|
96
|
+
memory_limit: str,
|
|
97
|
+
threads: int,
|
|
98
|
+
max_temp_directory_size: str | None = None,
|
|
99
|
+
) -> dict[str, Any]:
|
|
100
|
+
resolved = db_path.resolve()
|
|
101
|
+
if not resolved.exists():
|
|
102
|
+
raise DuckDBSessionError(f"Database not found: {resolved}")
|
|
103
|
+
|
|
104
|
+
self._close_connection()
|
|
105
|
+
self.conn = duckdb.connect(str(resolved), read_only=True)
|
|
106
|
+
self._install_extensions(self.conn)
|
|
107
|
+
self.active_db = resolved
|
|
108
|
+
self.attachments = []
|
|
109
|
+
self.failed_attachments = []
|
|
110
|
+
self.native_tables_only = True
|
|
111
|
+
|
|
112
|
+
self._reject_publish_attachments(resolved)
|
|
113
|
+
|
|
114
|
+
escaped_memory = self._escape_sql_string(memory_limit)
|
|
115
|
+
self._run_sql(f"SET memory_limit = '{escaped_memory}'")
|
|
116
|
+
self._run_sql(f"SET threads = {int(threads)}")
|
|
117
|
+
if max_temp_directory_size:
|
|
118
|
+
escaped_temp = self._escape_sql_string(max_temp_directory_size)
|
|
119
|
+
self._run_sql(f"SET max_temp_directory_size = '{escaped_temp}'")
|
|
120
|
+
|
|
121
|
+
return {
|
|
122
|
+
"name": resolved.stem,
|
|
123
|
+
"path": str(resolved),
|
|
124
|
+
"tables": self._list_table_names(native_only=True),
|
|
125
|
+
"read_only": True,
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
def create_database(self, db_name: str) -> dict[str, Any]:
|
|
129
|
+
self.data_dir.mkdir(parents=True, exist_ok=True)
|
|
130
|
+
db_path = self._resolve_db_path(db_name)
|
|
131
|
+
name = db_path.stem
|
|
132
|
+
if not name or name in {".", ".."}:
|
|
133
|
+
raise DuckDBSessionError(f"Invalid database name: {db_name}")
|
|
134
|
+
if not self._is_under_root(db_path, self.data_dir):
|
|
135
|
+
raise DuckDBSessionError(f"Invalid database name: {db_name}")
|
|
136
|
+
if db_path.exists():
|
|
137
|
+
raise DuckDBSessionError(f"Database already exists: {name}")
|
|
138
|
+
|
|
139
|
+
conn = duckdb.connect(str(db_path))
|
|
140
|
+
try:
|
|
141
|
+
self._install_extensions(conn)
|
|
142
|
+
finally:
|
|
143
|
+
conn.close()
|
|
144
|
+
|
|
145
|
+
return {"name": name, "path": str(db_path)}
|
|
146
|
+
|
|
147
|
+
def list_databases(self) -> list[dict[str, Any]]:
|
|
148
|
+
self.data_dir.mkdir(parents=True, exist_ok=True)
|
|
149
|
+
results: list[dict[str, Any]] = []
|
|
150
|
+
for db_path in sorted(self.data_dir.glob("*.db")):
|
|
151
|
+
stat = db_path.stat()
|
|
152
|
+
results.append(
|
|
153
|
+
{
|
|
154
|
+
"name": db_path.stem,
|
|
155
|
+
"path": str(db_path),
|
|
156
|
+
"size_mb": round(stat.st_size / (1024 * 1024), 3),
|
|
157
|
+
"last_modified": datetime.fromtimestamp(stat.st_mtime).isoformat(),
|
|
158
|
+
}
|
|
159
|
+
)
|
|
160
|
+
return results
|
|
161
|
+
|
|
162
|
+
def current_database(self) -> dict[str, Any]:
|
|
163
|
+
if self.conn is None:
|
|
164
|
+
return {
|
|
165
|
+
"name": None,
|
|
166
|
+
"path": None,
|
|
167
|
+
"in_memory": False,
|
|
168
|
+
"attachments": [],
|
|
169
|
+
"failed_attachments": [],
|
|
170
|
+
}
|
|
171
|
+
return {
|
|
172
|
+
"name": self.active_db.stem if self.active_db else None,
|
|
173
|
+
"path": str(self.active_db) if self.active_db else None,
|
|
174
|
+
"in_memory": self.active_db is None,
|
|
175
|
+
"attachments": list(self.attachments),
|
|
176
|
+
"failed_attachments": list(self.failed_attachments),
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
def attach_file(
|
|
180
|
+
self,
|
|
181
|
+
path: str,
|
|
182
|
+
name: str | None = None,
|
|
183
|
+
mode: AttachMode = "view",
|
|
184
|
+
) -> dict[str, Any]:
|
|
185
|
+
conn = self._require_conn()
|
|
186
|
+
resolved = self._resolve_allowed_path(path)
|
|
187
|
+
if not resolved.exists():
|
|
188
|
+
raise DuckDBSessionError(f"File not found: {path}")
|
|
189
|
+
|
|
190
|
+
table_name = name or resolved.stem
|
|
191
|
+
sql = self._file_attach_sql(resolved, table_name, mode)
|
|
192
|
+
conn.execute(sql)
|
|
193
|
+
return {"name": table_name, "path": str(resolved), "mode": mode}
|
|
194
|
+
|
|
195
|
+
def attach_database(
|
|
196
|
+
self,
|
|
197
|
+
connection_string: str,
|
|
198
|
+
name: str,
|
|
199
|
+
tables: list[str] | None = None,
|
|
200
|
+
) -> dict[str, Any]:
|
|
201
|
+
if not self.allow_external:
|
|
202
|
+
raise DuckDBSessionError("External database attachments are disabled")
|
|
203
|
+
|
|
204
|
+
conn = self._require_conn()
|
|
205
|
+
db_type, attach_str = self._parse_database_connection(connection_string)
|
|
206
|
+
sql = (
|
|
207
|
+
f"ATTACH '{self._escape_sql_string(attach_str)}' "
|
|
208
|
+
f"AS {self._quote_ident(name)} (TYPE {db_type})"
|
|
209
|
+
)
|
|
210
|
+
conn.execute(sql)
|
|
211
|
+
|
|
212
|
+
entry: dict[str, Any] = {
|
|
213
|
+
"name": name,
|
|
214
|
+
"type": db_type,
|
|
215
|
+
"connection_string": connection_string,
|
|
216
|
+
}
|
|
217
|
+
if tables:
|
|
218
|
+
entry["tables"] = tables
|
|
219
|
+
|
|
220
|
+
self.attachments.append(entry)
|
|
221
|
+
self._save_sidecar()
|
|
222
|
+
return entry
|
|
223
|
+
|
|
224
|
+
def detach_source(self, name: str) -> dict[str, Any]:
|
|
225
|
+
conn = self._require_conn()
|
|
226
|
+
attachment = next((a for a in self.attachments if a["name"] == name), None)
|
|
227
|
+
if attachment is not None:
|
|
228
|
+
conn.execute(f"DETACH {self._quote_ident(name)}")
|
|
229
|
+
self.attachments = [a for a in self.attachments if a["name"] != name]
|
|
230
|
+
self._save_sidecar()
|
|
231
|
+
return {"name": name, "detached": True, "type": "database"}
|
|
232
|
+
|
|
233
|
+
quoted = self._quote_ident(name)
|
|
234
|
+
conn.execute(f"DROP VIEW IF EXISTS {quoted}")
|
|
235
|
+
conn.execute(f"DROP TABLE IF EXISTS {quoted}")
|
|
236
|
+
return {"name": name, "detached": True, "type": "file"}
|
|
237
|
+
|
|
238
|
+
def run_mutating_query(self, sql: str) -> dict[str, Any]:
|
|
239
|
+
result = self._run_sql(sql.strip().rstrip(";"))
|
|
240
|
+
return self._format_execution_result(result)
|
|
241
|
+
|
|
242
|
+
def run_query(self, sql: str, limit: int = 500) -> dict[str, Any]:
|
|
243
|
+
if self._is_mutating_sql(sql):
|
|
244
|
+
return self.run_mutating_query(sql)
|
|
245
|
+
|
|
246
|
+
result = self._run_sql(sql)
|
|
247
|
+
if not result.description:
|
|
248
|
+
return {"headers": [], "rows": [], "types": []}
|
|
249
|
+
headers = [col[0] for col in result.description]
|
|
250
|
+
types = [str(col[1]) for col in result.description]
|
|
251
|
+
rows: list[list[str]] = []
|
|
252
|
+
for row in result.fetchmany(limit):
|
|
253
|
+
rows.append([self._stringify_value(v) for v in row])
|
|
254
|
+
return {"headers": headers, "rows": rows, "types": types}
|
|
255
|
+
|
|
256
|
+
def run_select_query(self, sql: str, limit: int = 500) -> dict[str, Any]:
|
|
257
|
+
"""Create a TEMP VIEW for the SELECT and return the first page for the table UI."""
|
|
258
|
+
if self._is_mutating_sql(sql):
|
|
259
|
+
raise DuckDBSessionError(
|
|
260
|
+
"run_select_query requires a SELECT statement; use run_query for DDL/DML"
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
query_id = self._new_query_id()
|
|
264
|
+
view_name = self._query_view_name(query_id)
|
|
265
|
+
stripped = sql.strip().rstrip(";")
|
|
266
|
+
self._run_sql(f"CREATE OR REPLACE TEMP VIEW {view_name} AS ({stripped})")
|
|
267
|
+
self._query_view_ids.append(query_id)
|
|
268
|
+
|
|
269
|
+
total_rows = self._count_query_rows(query_id)
|
|
270
|
+
self._query_meta.setdefault(query_id, {})["total_rows"] = total_rows
|
|
271
|
+
|
|
272
|
+
page_limit = min(int(limit), MAX_ROWS)
|
|
273
|
+
page = self._fetch_query_page(query_id, offset=0, limit=page_limit)
|
|
274
|
+
has_more = page["has_more"]
|
|
275
|
+
return {
|
|
276
|
+
"query_id": query_id,
|
|
277
|
+
"sql": stripped,
|
|
278
|
+
"columns": page["columns"],
|
|
279
|
+
"types": page["types"],
|
|
280
|
+
"rows": page["rows"],
|
|
281
|
+
"offset": 0,
|
|
282
|
+
"total_rows": total_rows,
|
|
283
|
+
"row_cap": MAX_ROWS,
|
|
284
|
+
"has_more": has_more,
|
|
285
|
+
"loading": has_more,
|
|
286
|
+
"complete": not has_more,
|
|
287
|
+
"capped": False,
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
def fetch_table_page(
|
|
291
|
+
self, query_id: str, offset: int, limit: int = BACKGROUND_CHUNK_ROWS
|
|
292
|
+
) -> dict[str, Any]:
|
|
293
|
+
if query_id not in self._query_view_ids:
|
|
294
|
+
raise DuckDBSessionError(f"Unknown query_id: {query_id}")
|
|
295
|
+
|
|
296
|
+
offset = int(offset)
|
|
297
|
+
if offset >= MAX_ROWS:
|
|
298
|
+
meta = self._query_meta[query_id]
|
|
299
|
+
return {
|
|
300
|
+
"query_id": query_id,
|
|
301
|
+
"columns": meta["columns"],
|
|
302
|
+
"types": meta["types"],
|
|
303
|
+
"rows": [],
|
|
304
|
+
"offset": offset,
|
|
305
|
+
"total_rows": meta.get("total_rows"),
|
|
306
|
+
"row_cap": MAX_ROWS,
|
|
307
|
+
"has_more": False,
|
|
308
|
+
"complete": True,
|
|
309
|
+
"capped": True,
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
page_limit = min(int(limit), MAX_ROWS - offset, BACKGROUND_CHUNK_ROWS)
|
|
313
|
+
page = self._fetch_query_page(query_id, offset=offset, limit=page_limit)
|
|
314
|
+
loaded = offset + len(page["rows"])
|
|
315
|
+
capped = loaded >= MAX_ROWS and page["has_more"]
|
|
316
|
+
if capped:
|
|
317
|
+
page["has_more"] = False
|
|
318
|
+
meta = self._query_meta.get(query_id, {})
|
|
319
|
+
return {
|
|
320
|
+
"query_id": query_id,
|
|
321
|
+
"columns": page["columns"],
|
|
322
|
+
"types": page["types"],
|
|
323
|
+
"rows": page["rows"],
|
|
324
|
+
"offset": offset,
|
|
325
|
+
"total_rows": meta.get("total_rows"),
|
|
326
|
+
"row_cap": MAX_ROWS,
|
|
327
|
+
"has_more": page["has_more"],
|
|
328
|
+
"complete": not page["has_more"],
|
|
329
|
+
"capped": capped,
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
def list_tables(self, *, native_only: bool | None = None) -> list[dict[str, Any]]:
|
|
333
|
+
if native_only is None:
|
|
334
|
+
native_only = self.native_tables_only
|
|
335
|
+
return self._list_tables(native_only=native_only)
|
|
336
|
+
|
|
337
|
+
def describe_table(
|
|
338
|
+
self, table_name: str, *, native_only: bool | None = None
|
|
339
|
+
) -> dict[str, Any]:
|
|
340
|
+
if native_only is None:
|
|
341
|
+
native_only = self.native_tables_only
|
|
342
|
+
return self._describe_table(table_name, native_only=native_only)
|
|
343
|
+
|
|
344
|
+
def sample_table(self, table_name: str, n: int = 10) -> dict[str, Any]:
|
|
345
|
+
quoted = self._quote_table_ref(table_name)
|
|
346
|
+
sql = f"SELECT * FROM {quoted} LIMIT {int(n)}"
|
|
347
|
+
result = self.run_query(sql, limit=int(n))
|
|
348
|
+
return {
|
|
349
|
+
"table": table_name,
|
|
350
|
+
"sql": sql,
|
|
351
|
+
"rows": [dict(zip(result["headers"], row, strict=True)) for row in result["rows"]],
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
def profile_table(self, table_name: str, sample_threshold: int = 100_000) -> dict[str, Any]:
|
|
355
|
+
quoted = self._quote_table_ref(table_name)
|
|
356
|
+
count_row = self._run_sql(f"SELECT COUNT(*) FROM {quoted}").fetchone()
|
|
357
|
+
if count_row is None:
|
|
358
|
+
raise DuckDBSessionError(f"Could not count rows for table: {table_name}")
|
|
359
|
+
count = count_row[0]
|
|
360
|
+
if count <= sample_threshold:
|
|
361
|
+
sql = f"SUMMARIZE {quoted}"
|
|
362
|
+
sampled = False
|
|
363
|
+
else:
|
|
364
|
+
pct = min(100 * sample_threshold / count, 100)
|
|
365
|
+
sql = (
|
|
366
|
+
f"SUMMARIZE SELECT * FROM {quoted} "
|
|
367
|
+
f"USING SAMPLE {pct:.2f} PERCENT (bernoulli)"
|
|
368
|
+
)
|
|
369
|
+
sampled = True
|
|
370
|
+
result = self._run_sql(sql)
|
|
371
|
+
headers = [col[0] for col in result.description]
|
|
372
|
+
profile = [
|
|
373
|
+
dict(zip(headers, row, strict=True)) for row in result.fetchall()
|
|
374
|
+
]
|
|
375
|
+
return {"sampled": sampled, "row_count": count, "profile": profile}
|
|
376
|
+
|
|
377
|
+
def value_counts(self, table_name: str, column_name: str, limit: int = 20) -> dict[str, Any]:
|
|
378
|
+
quoted_table = self._quote_table_ref(table_name)
|
|
379
|
+
quoted_col = self._quote_ident(column_name)
|
|
380
|
+
sql = (
|
|
381
|
+
f"SELECT {quoted_col} AS value, COUNT(*) AS count "
|
|
382
|
+
f"FROM {quoted_table} "
|
|
383
|
+
f"GROUP BY {quoted_col} "
|
|
384
|
+
f"ORDER BY count DESC "
|
|
385
|
+
f"LIMIT {int(limit)}"
|
|
386
|
+
)
|
|
387
|
+
result = self.run_query(sql, limit=limit)
|
|
388
|
+
rows = [dict(zip(result["headers"], row, strict=True)) for row in result["rows"]]
|
|
389
|
+
return {
|
|
390
|
+
"table": table_name,
|
|
391
|
+
"column": column_name,
|
|
392
|
+
"sql": sql,
|
|
393
|
+
"rows": rows,
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
def _format_execution_result(self, result: Any) -> dict[str, Any]:
|
|
397
|
+
if not result.description:
|
|
398
|
+
return {"ok": True, "message": "Statement executed successfully."}
|
|
399
|
+
|
|
400
|
+
headers = [col[0] for col in result.description]
|
|
401
|
+
record_rows = [
|
|
402
|
+
dict(
|
|
403
|
+
zip(
|
|
404
|
+
headers,
|
|
405
|
+
[self._stringify_value(v) for v in row],
|
|
406
|
+
strict=True,
|
|
407
|
+
)
|
|
408
|
+
)
|
|
409
|
+
for row in result.fetchall()
|
|
410
|
+
]
|
|
411
|
+
|
|
412
|
+
if not record_rows:
|
|
413
|
+
return {"ok": True, "message": "Statement executed successfully."}
|
|
414
|
+
|
|
415
|
+
if len(headers) == 1 and len(record_rows) == 1:
|
|
416
|
+
column = headers[0]
|
|
417
|
+
value = record_rows[0][column]
|
|
418
|
+
if column.lower() == "count" and value.isdigit():
|
|
419
|
+
count = int(value)
|
|
420
|
+
return {
|
|
421
|
+
"ok": True,
|
|
422
|
+
"message": f"{count:,} row(s) affected.",
|
|
423
|
+
"rows_affected": count,
|
|
424
|
+
}
|
|
425
|
+
if column.lower() == "success":
|
|
426
|
+
return {
|
|
427
|
+
"ok": True,
|
|
428
|
+
"message": "Statement executed successfully.",
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
if len(record_rows) == 1 and len(headers) <= 8:
|
|
432
|
+
parts = [f"{column}: {record_rows[0][column]}" for column in headers]
|
|
433
|
+
return {
|
|
434
|
+
"ok": True,
|
|
435
|
+
"message": "; ".join(parts),
|
|
436
|
+
"columns": headers,
|
|
437
|
+
"rows": record_rows,
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
if len(headers) <= 12 and len(record_rows) <= 20:
|
|
441
|
+
lines = ["\t".join(headers)]
|
|
442
|
+
for row in record_rows:
|
|
443
|
+
lines.append("\t".join(row[column] for column in headers))
|
|
444
|
+
return {
|
|
445
|
+
"ok": True,
|
|
446
|
+
"message": "\n".join(lines),
|
|
447
|
+
"columns": headers,
|
|
448
|
+
"rows": record_rows,
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
return {
|
|
452
|
+
"ok": True,
|
|
453
|
+
"message": f"Statement returned {len(record_rows):,} row(s).",
|
|
454
|
+
"columns": headers,
|
|
455
|
+
"rows": record_rows,
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
@staticmethod
|
|
459
|
+
def _is_mutating_sql(sql: str) -> bool:
|
|
460
|
+
stripped = sql.strip().rstrip(";")
|
|
461
|
+
upper = stripped.upper()
|
|
462
|
+
mutating_prefixes = (
|
|
463
|
+
"CREATE",
|
|
464
|
+
"DROP",
|
|
465
|
+
"ALTER",
|
|
466
|
+
"INSERT",
|
|
467
|
+
"UPDATE",
|
|
468
|
+
"DELETE",
|
|
469
|
+
"COPY",
|
|
470
|
+
"ATTACH",
|
|
471
|
+
"DETACH",
|
|
472
|
+
"INSTALL",
|
|
473
|
+
"LOAD",
|
|
474
|
+
"SET",
|
|
475
|
+
"PRAGMA",
|
|
476
|
+
)
|
|
477
|
+
return any(upper.startswith(prefix) for prefix in mutating_prefixes)
|
|
478
|
+
|
|
479
|
+
def _require_conn(self) -> duckdb.DuckDBPyConnection:
|
|
480
|
+
if self.conn is None:
|
|
481
|
+
raise DuckDBSessionError("No active database connection")
|
|
482
|
+
return self.conn
|
|
483
|
+
|
|
484
|
+
def _run_sql(self, sql: str, parameters: list[Any] | None = None) -> Any:
|
|
485
|
+
conn = self._require_conn()
|
|
486
|
+
if self.query_timeout_sec is None:
|
|
487
|
+
if parameters is None:
|
|
488
|
+
return conn.execute(sql)
|
|
489
|
+
return conn.execute(sql, parameters)
|
|
490
|
+
|
|
491
|
+
timer: threading.Timer | None = None
|
|
492
|
+
try:
|
|
493
|
+
timer = threading.Timer(self.query_timeout_sec, conn.interrupt)
|
|
494
|
+
timer.start()
|
|
495
|
+
if parameters is None:
|
|
496
|
+
return conn.execute(sql)
|
|
497
|
+
return conn.execute(sql, parameters)
|
|
498
|
+
except duckdb.InterruptException as exc:
|
|
499
|
+
raise DuckDBSessionError(
|
|
500
|
+
f"Query timed out after {self.query_timeout_sec:g}s"
|
|
501
|
+
) from exc
|
|
502
|
+
finally:
|
|
503
|
+
if timer is not None:
|
|
504
|
+
timer.cancel()
|
|
505
|
+
|
|
506
|
+
def _close_connection(self) -> None:
|
|
507
|
+
if self.conn is not None:
|
|
508
|
+
self.conn.close()
|
|
509
|
+
self.conn = None
|
|
510
|
+
self._query_view_ids = []
|
|
511
|
+
self._query_meta = {}
|
|
512
|
+
|
|
513
|
+
@staticmethod
|
|
514
|
+
def _new_query_id() -> str:
|
|
515
|
+
return secrets.token_hex(6)
|
|
516
|
+
|
|
517
|
+
def _query_view_name(self, query_id: str) -> str:
|
|
518
|
+
return f"__sqlnow_q_{query_id}"
|
|
519
|
+
|
|
520
|
+
def _count_query_rows(self, query_id: str) -> int:
|
|
521
|
+
quoted = self._quote_ident(self._query_view_name(query_id))
|
|
522
|
+
row = self._run_sql(f"SELECT COUNT(*) FROM {quoted}").fetchone()
|
|
523
|
+
if row is None:
|
|
524
|
+
raise DuckDBSessionError(f"Could not count rows for query: {query_id}")
|
|
525
|
+
return int(row[0])
|
|
526
|
+
|
|
527
|
+
def _fetch_query_page(
|
|
528
|
+
self, query_id: str, offset: int, limit: int
|
|
529
|
+
) -> dict[str, Any]:
|
|
530
|
+
view_name = self._query_view_name(query_id)
|
|
531
|
+
quoted = self._quote_ident(view_name)
|
|
532
|
+
result = self._run_sql(
|
|
533
|
+
f"SELECT * FROM {quoted} LIMIT {int(limit)} OFFSET {int(offset)}"
|
|
534
|
+
)
|
|
535
|
+
if not result.description:
|
|
536
|
+
meta = self._query_meta.get(query_id, {})
|
|
537
|
+
return {
|
|
538
|
+
"columns": meta.get("columns", []),
|
|
539
|
+
"types": meta.get("types", []),
|
|
540
|
+
"rows": [],
|
|
541
|
+
"has_more": False,
|
|
542
|
+
}
|
|
543
|
+
|
|
544
|
+
columns = [col[0] for col in result.description]
|
|
545
|
+
types = [str(col[1]) for col in result.description]
|
|
546
|
+
meta = self._query_meta.setdefault(query_id, {})
|
|
547
|
+
meta["columns"] = columns
|
|
548
|
+
meta["types"] = types
|
|
549
|
+
|
|
550
|
+
record_rows: list[dict[str, str]] = []
|
|
551
|
+
for row in result.fetchall():
|
|
552
|
+
record_rows.append(
|
|
553
|
+
dict(
|
|
554
|
+
zip(
|
|
555
|
+
columns,
|
|
556
|
+
[self._stringify_value(v) for v in row],
|
|
557
|
+
strict=True,
|
|
558
|
+
)
|
|
559
|
+
)
|
|
560
|
+
)
|
|
561
|
+
|
|
562
|
+
next_offset = offset + len(record_rows)
|
|
563
|
+
if len(record_rows) < limit or next_offset >= MAX_ROWS:
|
|
564
|
+
has_more = False
|
|
565
|
+
else:
|
|
566
|
+
peek = self._run_sql(
|
|
567
|
+
f"SELECT 1 FROM {quoted} LIMIT 1 OFFSET {next_offset}"
|
|
568
|
+
).fetchone()
|
|
569
|
+
has_more = peek is not None
|
|
570
|
+
return {
|
|
571
|
+
"columns": columns,
|
|
572
|
+
"types": types,
|
|
573
|
+
"rows": record_rows,
|
|
574
|
+
"has_more": has_more,
|
|
575
|
+
}
|
|
576
|
+
|
|
577
|
+
def _resolve_db_path(self, db_name: str) -> Path:
|
|
578
|
+
name = db_name.removesuffix(".db")
|
|
579
|
+
return (self.data_dir / f"{name}.db").resolve()
|
|
580
|
+
|
|
581
|
+
def _sidecar_path(self, db_path: Path | None = None) -> Path | None:
|
|
582
|
+
path = db_path or self.active_db
|
|
583
|
+
if path is None:
|
|
584
|
+
return None
|
|
585
|
+
return path.with_suffix(".db.json")
|
|
586
|
+
|
|
587
|
+
def _load_sidecar(self, db_path: Path) -> dict[str, Any]:
|
|
588
|
+
sidecar = self._sidecar_path(db_path)
|
|
589
|
+
if sidecar is None or not sidecar.exists():
|
|
590
|
+
return {"attachments": []}
|
|
591
|
+
return json.loads(sidecar.read_text(encoding="utf-8"))
|
|
592
|
+
|
|
593
|
+
def _save_sidecar(self) -> None:
|
|
594
|
+
sidecar = self._sidecar_path()
|
|
595
|
+
if sidecar is None:
|
|
596
|
+
return
|
|
597
|
+
payload = {"attachments": self.attachments}
|
|
598
|
+
sidecar.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
|
|
599
|
+
|
|
600
|
+
def _is_attached(self, name: str) -> bool:
|
|
601
|
+
conn = self._require_conn()
|
|
602
|
+
rows = conn.execute(
|
|
603
|
+
"SELECT 1 FROM duckdb_databases() WHERE database_name = ?",
|
|
604
|
+
[name],
|
|
605
|
+
).fetchall()
|
|
606
|
+
return bool(rows)
|
|
607
|
+
|
|
608
|
+
def _attach_from_sidecar(self, entry: dict[str, Any]) -> None:
|
|
609
|
+
conn = self._require_conn()
|
|
610
|
+
db_type = entry["type"]
|
|
611
|
+
name = entry["name"]
|
|
612
|
+
connection_string = entry["connection_string"]
|
|
613
|
+
_, attach_str = self._parse_database_connection(connection_string)
|
|
614
|
+
sql = (
|
|
615
|
+
f"ATTACH '{self._escape_sql_string(attach_str)}' "
|
|
616
|
+
f"AS {self._quote_ident(name)} (TYPE {db_type})"
|
|
617
|
+
)
|
|
618
|
+
conn.execute(sql)
|
|
619
|
+
|
|
620
|
+
@staticmethod
|
|
621
|
+
def _install_extensions(conn: duckdb.DuckDBPyConnection) -> None:
|
|
622
|
+
conn.execute(
|
|
623
|
+
"""
|
|
624
|
+
INSTALL parquet; LOAD parquet;
|
|
625
|
+
INSTALL httpfs; LOAD httpfs;
|
|
626
|
+
INSTALL aws; LOAD aws;
|
|
627
|
+
INSTALL postgres; LOAD postgres;
|
|
628
|
+
INSTALL sqlite; LOAD sqlite;
|
|
629
|
+
INSTALL mysql; LOAD mysql;
|
|
630
|
+
INSTALL json; LOAD json;
|
|
631
|
+
INSTALL excel; LOAD excel;
|
|
632
|
+
SET GLOBAL sqlite_all_varchar = true;
|
|
633
|
+
"""
|
|
634
|
+
)
|
|
635
|
+
|
|
636
|
+
def _resolve_allowed_path(self, path: str) -> Path:
|
|
637
|
+
candidate = Path(path).expanduser()
|
|
638
|
+
if not candidate.is_absolute():
|
|
639
|
+
candidate = (self.data_dir / candidate).resolve()
|
|
640
|
+
else:
|
|
641
|
+
candidate = candidate.resolve()
|
|
642
|
+
|
|
643
|
+
allowed_roots = (self.data_dir, *self.allow_paths)
|
|
644
|
+
if not any(self._is_under_root(candidate, root) for root in allowed_roots):
|
|
645
|
+
raise DuckDBSessionError(f"Path not allowed: {path}")
|
|
646
|
+
return candidate
|
|
647
|
+
|
|
648
|
+
@staticmethod
|
|
649
|
+
def _is_under_root(path: Path, root: Path) -> bool:
|
|
650
|
+
try:
|
|
651
|
+
path.relative_to(root.resolve() if hasattr(root, 'resolve') else root)
|
|
652
|
+
return True
|
|
653
|
+
except ValueError:
|
|
654
|
+
return False
|
|
655
|
+
|
|
656
|
+
@staticmethod
|
|
657
|
+
def _escape_sql_string(value: str) -> str:
|
|
658
|
+
return value.replace("'", "''")
|
|
659
|
+
|
|
660
|
+
@staticmethod
|
|
661
|
+
def _quote_ident(name: str) -> str:
|
|
662
|
+
escaped = name.replace('"', '""')
|
|
663
|
+
return f'"{escaped}"'
|
|
664
|
+
|
|
665
|
+
def _quote_table_ref(self, table_name: str) -> str:
|
|
666
|
+
if "." in table_name:
|
|
667
|
+
parts = table_name.split(".")
|
|
668
|
+
return ".".join(self._quote_ident(part) for part in parts)
|
|
669
|
+
return self._quote_ident(table_name)
|
|
670
|
+
|
|
671
|
+
@staticmethod
|
|
672
|
+
def _parse_database_connection(connection_string: str) -> tuple[DbType, str]:
|
|
673
|
+
if connection_string.startswith(("postgresql://", "postgres://")):
|
|
674
|
+
return "POSTGRES", connection_string
|
|
675
|
+
if connection_string.startswith("mysql://"):
|
|
676
|
+
return "MYSQL", connection_string
|
|
677
|
+
if connection_string.startswith("sqlite://"):
|
|
678
|
+
return "SQLITE", connection_string.replace("sqlite://", "", 1)
|
|
679
|
+
if connection_string.endswith((".db", ".sqlite")):
|
|
680
|
+
return "SQLITE", connection_string
|
|
681
|
+
raise DuckDBSessionError(f"Unsupported database connection string: {connection_string}")
|
|
682
|
+
|
|
683
|
+
def _file_reader_sql(self, path: Path) -> str:
|
|
684
|
+
"""Build a DuckDB table function for a supported file type."""
|
|
685
|
+
# Future: tiered JSON attach — try read_json/read_ndjson first (here); on failure
|
|
686
|
+
# or attach_file(flatten=True), use the Python `flatterer` package (same approach
|
|
687
|
+
# as querier/libsqlnow json.rs) to flatten nested JSON into multiple CSV tables
|
|
688
|
+
# and COPY into DuckDB. That gives IATI-style multi-table loads; not needed for
|
|
689
|
+
# flat JSON arrays.
|
|
690
|
+
path_sql = self._escape_sql_string(str(path))
|
|
691
|
+
suffix = path.suffix.lower()
|
|
692
|
+
if suffix == ".csv":
|
|
693
|
+
return f"read_csv('{path_sql}', header = true)"
|
|
694
|
+
if suffix == ".parquet":
|
|
695
|
+
return f"read_parquet('{path_sql}')"
|
|
696
|
+
if suffix == ".json":
|
|
697
|
+
return f"read_json('{path_sql}')"
|
|
698
|
+
if suffix == ".jsonl":
|
|
699
|
+
return f"read_ndjson('{path_sql}')"
|
|
700
|
+
if suffix == ".xlsx":
|
|
701
|
+
return f"read_xlsx('{path_sql}', header = true)"
|
|
702
|
+
raise DuckDBSessionError(f"Unsupported file type: {suffix}")
|
|
703
|
+
|
|
704
|
+
def _file_attach_sql(self, path: Path, name: str, mode: AttachMode) -> str:
|
|
705
|
+
reader = self._file_reader_sql(path)
|
|
706
|
+
quoted = self._quote_ident(name)
|
|
707
|
+
object_type = "VIEW" if mode == "view" else "TABLE"
|
|
708
|
+
return f"CREATE OR REPLACE {object_type} {quoted} AS SELECT * FROM {reader}"
|
|
709
|
+
|
|
710
|
+
def _list_table_names(self, *, native_only: bool = False) -> list[str]:
|
|
711
|
+
return [t["name"] for t in self._list_tables(native_only=native_only)]
|
|
712
|
+
|
|
713
|
+
def _list_tables(self, *, native_only: bool = False) -> list[dict[str, Any]]:
|
|
714
|
+
conn = self._require_conn()
|
|
715
|
+
main_catalog = self.active_db.stem if self.active_db else None
|
|
716
|
+
tables_sql = """
|
|
717
|
+
SELECT table_catalog, table_schema, table_name
|
|
718
|
+
FROM information_schema.tables
|
|
719
|
+
WHERE table_schema NOT IN ('information_schema', 'pg_catalog')
|
|
720
|
+
ORDER BY table_catalog, table_schema, table_name
|
|
721
|
+
"""
|
|
722
|
+
columns_sql = """
|
|
723
|
+
SELECT table_catalog, table_schema, table_name, column_name, data_type
|
|
724
|
+
FROM information_schema.columns
|
|
725
|
+
WHERE table_schema NOT IN ('information_schema', 'pg_catalog')
|
|
726
|
+
ORDER BY table_catalog, table_schema, table_name, column_name
|
|
727
|
+
"""
|
|
728
|
+
table_rows = conn.execute(tables_sql).fetchall()
|
|
729
|
+
column_rows = conn.execute(columns_sql).fetchall()
|
|
730
|
+
|
|
731
|
+
columns_by_table: dict[tuple[str, str, str], list[dict[str, str]]] = {}
|
|
732
|
+
for catalog, schema, table, column_name, data_type in column_rows:
|
|
733
|
+
key = (catalog, schema, table)
|
|
734
|
+
columns_by_table.setdefault(key, []).append(
|
|
735
|
+
{"name": column_name, "type": data_type}
|
|
736
|
+
)
|
|
737
|
+
|
|
738
|
+
results: list[dict[str, Any]] = []
|
|
739
|
+
for catalog, schema, table in table_rows:
|
|
740
|
+
if native_only and main_catalog and catalog != main_catalog:
|
|
741
|
+
continue
|
|
742
|
+
if not self._table_visible(catalog, schema, table):
|
|
743
|
+
continue
|
|
744
|
+
display_name = self._display_table_name(catalog, schema, table)
|
|
745
|
+
key = (catalog, schema, table)
|
|
746
|
+
columns = columns_by_table.get(key, [])
|
|
747
|
+
if not columns:
|
|
748
|
+
continue
|
|
749
|
+
results.append({"name": display_name, "columns": columns})
|
|
750
|
+
return results
|
|
751
|
+
|
|
752
|
+
def _describe_table(self, table_name: str, *, native_only: bool = False) -> dict[str, Any]:
|
|
753
|
+
tables = self._list_tables(native_only=native_only)
|
|
754
|
+
match = next((t for t in tables if t["name"] == table_name), None)
|
|
755
|
+
if match is None:
|
|
756
|
+
raise DuckDBSessionError(f"Table not found: {table_name}")
|
|
757
|
+
return match
|
|
758
|
+
|
|
759
|
+
def _reject_publish_attachments(self, db_path: Path) -> None:
|
|
760
|
+
sidecar = self._load_sidecar(db_path)
|
|
761
|
+
attachments = sidecar.get("attachments") or []
|
|
762
|
+
if attachments:
|
|
763
|
+
names = ", ".join(entry["name"] for entry in attachments)
|
|
764
|
+
sidecar_name = db_path.with_suffix(".db.json").name
|
|
765
|
+
raise DuckDBSessionError(
|
|
766
|
+
"Publish mode does not support attached databases; "
|
|
767
|
+
f"remove entries from {sidecar_name}: {names}"
|
|
768
|
+
)
|
|
769
|
+
|
|
770
|
+
main_catalog = db_path.stem
|
|
771
|
+
rows = self._run_sql(
|
|
772
|
+
"""
|
|
773
|
+
SELECT database_name
|
|
774
|
+
FROM duckdb_databases()
|
|
775
|
+
WHERE database_name NOT IN ('system', 'temp', ?)
|
|
776
|
+
""",
|
|
777
|
+
[main_catalog],
|
|
778
|
+
).fetchall()
|
|
779
|
+
if rows:
|
|
780
|
+
names = ", ".join(row[0] for row in rows)
|
|
781
|
+
raise DuckDBSessionError(
|
|
782
|
+
"Publish mode does not support attached databases; "
|
|
783
|
+
f"found attached catalog(s): {names}"
|
|
784
|
+
)
|
|
785
|
+
|
|
786
|
+
def _table_visible(self, catalog: str, schema: str, table: str) -> bool:
|
|
787
|
+
attachment = next((a for a in self.attachments if a["name"] == catalog), None)
|
|
788
|
+
if attachment is None:
|
|
789
|
+
return True
|
|
790
|
+
allowed = attachment.get("tables")
|
|
791
|
+
if not allowed:
|
|
792
|
+
return True
|
|
793
|
+
return table in allowed
|
|
794
|
+
|
|
795
|
+
def _display_table_name(self, catalog: str, schema: str, table: str) -> str:
|
|
796
|
+
attachment = next((a for a in self.attachments if a["name"] == catalog), None)
|
|
797
|
+
if attachment is None:
|
|
798
|
+
if schema == "main":
|
|
799
|
+
return table
|
|
800
|
+
return f"{schema}.{table}"
|
|
801
|
+
|
|
802
|
+
db_type = attachment["type"]
|
|
803
|
+
if db_type == "POSTGRES":
|
|
804
|
+
if schema == "public":
|
|
805
|
+
return f"{catalog}.{table}"
|
|
806
|
+
return f"{catalog}.{schema}.{table}"
|
|
807
|
+
if db_type == "SQLITE":
|
|
808
|
+
if schema == "main":
|
|
809
|
+
return f"{catalog}.{table}"
|
|
810
|
+
return f"{catalog}.{schema}.{table}"
|
|
811
|
+
return f"{catalog}.{table}"
|
|
812
|
+
|
|
813
|
+
@staticmethod
|
|
814
|
+
def _stringify_value(value: Any) -> str:
|
|
815
|
+
if value is None:
|
|
816
|
+
return ""
|
|
817
|
+
if isinstance(value, bool):
|
|
818
|
+
return str(value).lower()
|
|
819
|
+
if isinstance(value, (int, float, Decimal)):
|
|
820
|
+
return str(value)
|
|
821
|
+
if isinstance(value, (datetime, date, time, timedelta)):
|
|
822
|
+
return str(value)
|
|
823
|
+
if isinstance(value, bytes):
|
|
824
|
+
return value.decode("utf-8", errors="replace")
|
|
825
|
+
if isinstance(value, (list, dict, tuple)):
|
|
826
|
+
return str(value)
|
|
827
|
+
return str(value)
|