sql-query-mcp 0.3.0__tar.gz → 0.4.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sql_query_mcp-0.3.0/sql_query_mcp.egg-info → sql_query_mcp-0.4.1}/PKG-INFO +16 -7
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/README.md +15 -6
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/pyproject.toml +1 -1
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/sql_query_mcp/__init__.py +1 -1
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/sql_query_mcp/adapters/hive.py +18 -9
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/sql_query_mcp/adapters/mysql.py +43 -13
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/sql_query_mcp/adapters/postgres.py +19 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/sql_query_mcp/app.py +28 -0
- sql_query_mcp-0.4.1/sql_query_mcp/exporter.py +264 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/sql_query_mcp/importer.py +27 -4
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1/sql_query_mcp.egg-info}/PKG-INFO +16 -7
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/sql_query_mcp.egg-info/SOURCES.txt +2 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/tests/test_app.py +7 -0
- sql_query_mcp-0.4.1/tests/test_exporter.py +395 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/tests/test_importer.py +110 -3
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/tests/test_validator.py +149 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/LICENSE +0 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/setup.cfg +0 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/sql_query_mcp/__main__.py +0 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/sql_query_mcp/adapters/__init__.py +0 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/sql_query_mcp/async_queries.py +0 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/sql_query_mcp/audit.py +0 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/sql_query_mcp/config.py +0 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/sql_query_mcp/errors.py +0 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/sql_query_mcp/executor.py +0 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/sql_query_mcp/introspection.py +0 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/sql_query_mcp/namespace.py +0 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/sql_query_mcp/registry.py +0 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/sql_query_mcp/release_metadata.py +0 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/sql_query_mcp/validator.py +0 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/sql_query_mcp.egg-info/dependency_links.txt +0 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/sql_query_mcp.egg-info/entry_points.txt +0 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/sql_query_mcp.egg-info/requires.txt +0 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/sql_query_mcp.egg-info/top_level.txt +0 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/tests/test_async_queries.py +0 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/tests/test_audit.py +0 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/tests/test_config.py +0 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/tests/test_executor.py +0 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/tests/test_metadata.py +0 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/tests/test_namespace.py +0 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/tests/test_registry.py +0 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/tests/test_release_metadata.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sql-query-mcp
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.1
|
|
4
4
|
Summary: Read-only SQL MCP server for PostgreSQL and MySQL.
|
|
5
5
|
Author: Andy Wang
|
|
6
6
|
License-Expression: MIT
|
|
@@ -62,10 +62,11 @@ without exposing raw connection strings or flattening engine-specific concepts.
|
|
|
62
62
|
## What AI can do with it
|
|
63
63
|
|
|
64
64
|
The current tool set focuses on database discovery, controlled query workflows,
|
|
65
|
-
asynchronous read-only queries,
|
|
66
|
-
use it to help an AI assistant understand
|
|
67
|
-
runs a bounded query, starts a long-running
|
|
68
|
-
|
|
65
|
+
asynchronous read-only queries, batched query result exports, and one narrow
|
|
66
|
+
local file import path. You can use it to help an AI assistant understand
|
|
67
|
+
structure before it generates SQL, runs a bounded query, starts a long-running
|
|
68
|
+
read-only query, exports PostgreSQL or MySQL results to a local file, or imports
|
|
69
|
+
a prepared CSV/XLSX file into an existing table.
|
|
69
70
|
|
|
70
71
|
MySQL and Hive support `explain_query`. Hive uses `EXPLAIN` and
|
|
71
72
|
`EXPLAIN ANALYZE` for `explain_query`.
|
|
@@ -83,19 +84,22 @@ MySQL and Hive support `explain_query`. Hive uses `EXPLAIN` and
|
|
|
83
84
|
| `cancel_query(query_id)` | Yes | Yes | Yes | Cancel running async queries |
|
|
84
85
|
| `explain_query(connection_id, sql, analyze?)` | Yes | Yes | Yes | Inspect query plans |
|
|
85
86
|
| `get_table_sample(connection_id, table_name, schema?, database?, limit?)` | Yes | Yes | Yes | Fetch small table samples |
|
|
87
|
+
| `export_query_file(connection_id, sql, output_path, format?, limit?, export_all?, file_name?, overwrite?)` | Yes | Yes | No | Export query results to local CSV/XLSX files |
|
|
86
88
|
| `import_table_file(connection_id, table_name, file_path, schema?, database?, sheet_name?)` | Yes | Yes | Yes | Import local CSV/XLSX files |
|
|
87
89
|
|
|
88
90
|
These tools are useful for tasks such as listing namespaces, inspecting table
|
|
89
91
|
definitions, reviewing indexes, sampling records, running short read-only
|
|
90
92
|
queries with `run_select`, running long read-only queries with `start_query`,
|
|
91
93
|
`get_query`, and `cancel_query`, analyzing read-only queries with `EXPLAIN`, and
|
|
92
|
-
|
|
94
|
+
exporting PostgreSQL or MySQL query results to local CSV/XLSX files. You can
|
|
95
|
+
also import prepared local files. For full request and response details, see
|
|
93
96
|
`docs/api-reference.md` (Chinese).
|
|
94
97
|
|
|
95
98
|
## How boundaries are constrained
|
|
96
99
|
|
|
97
100
|
The product boundary is intentionally narrow today. PostgreSQL, MySQL, and Hive
|
|
98
|
-
are available today. Query tools remain read-only, and
|
|
101
|
+
are available today. Query tools remain read-only, PostgreSQL and MySQL query
|
|
102
|
+
results can be exported to local files, and the only database write path is a
|
|
99
103
|
controlled local CSV/XLSX import into existing tables.
|
|
100
104
|
|
|
101
105
|
The service keeps those boundaries explicit in a few ways.
|
|
@@ -112,6 +116,11 @@ The service keeps those boundaries explicit in a few ways.
|
|
|
112
116
|
queries.
|
|
113
117
|
- The server accepts only `SELECT` and `WITH ... SELECT`, rejects comments and
|
|
114
118
|
multi-statement input, and records audit logs for each call.
|
|
119
|
+
- `export_query_file` writes files on the MCP server machine. It is synchronous
|
|
120
|
+
but reads database rows and writes CSV/XLSX files in batches. Large exports can
|
|
121
|
+
still hit your MCP client's tool timeout. For XLSX output, UUID values are
|
|
122
|
+
written as text and timezone-aware datetime values are written without the
|
|
123
|
+
timezone. Hive export is not supported yet.
|
|
115
124
|
- `import_table_file` doesn't accept raw SQL. It inserts only file columns whose
|
|
116
125
|
headers exactly match existing table columns.
|
|
117
126
|
- Hive `import_table_file` is intended for small files only and rejects files
|
|
@@ -30,10 +30,11 @@ without exposing raw connection strings or flattening engine-specific concepts.
|
|
|
30
30
|
## What AI can do with it
|
|
31
31
|
|
|
32
32
|
The current tool set focuses on database discovery, controlled query workflows,
|
|
33
|
-
asynchronous read-only queries,
|
|
34
|
-
use it to help an AI assistant understand
|
|
35
|
-
runs a bounded query, starts a long-running
|
|
36
|
-
|
|
33
|
+
asynchronous read-only queries, batched query result exports, and one narrow
|
|
34
|
+
local file import path. You can use it to help an AI assistant understand
|
|
35
|
+
structure before it generates SQL, runs a bounded query, starts a long-running
|
|
36
|
+
read-only query, exports PostgreSQL or MySQL results to a local file, or imports
|
|
37
|
+
a prepared CSV/XLSX file into an existing table.
|
|
37
38
|
|
|
38
39
|
MySQL and Hive support `explain_query`. Hive uses `EXPLAIN` and
|
|
39
40
|
`EXPLAIN ANALYZE` for `explain_query`.
|
|
@@ -51,19 +52,22 @@ MySQL and Hive support `explain_query`. Hive uses `EXPLAIN` and
|
|
|
51
52
|
| `cancel_query(query_id)` | Yes | Yes | Yes | Cancel running async queries |
|
|
52
53
|
| `explain_query(connection_id, sql, analyze?)` | Yes | Yes | Yes | Inspect query plans |
|
|
53
54
|
| `get_table_sample(connection_id, table_name, schema?, database?, limit?)` | Yes | Yes | Yes | Fetch small table samples |
|
|
55
|
+
| `export_query_file(connection_id, sql, output_path, format?, limit?, export_all?, file_name?, overwrite?)` | Yes | Yes | No | Export query results to local CSV/XLSX files |
|
|
54
56
|
| `import_table_file(connection_id, table_name, file_path, schema?, database?, sheet_name?)` | Yes | Yes | Yes | Import local CSV/XLSX files |
|
|
55
57
|
|
|
56
58
|
These tools are useful for tasks such as listing namespaces, inspecting table
|
|
57
59
|
definitions, reviewing indexes, sampling records, running short read-only
|
|
58
60
|
queries with `run_select`, running long read-only queries with `start_query`,
|
|
59
61
|
`get_query`, and `cancel_query`, analyzing read-only queries with `EXPLAIN`, and
|
|
60
|
-
|
|
62
|
+
exporting PostgreSQL or MySQL query results to local CSV/XLSX files. You can
|
|
63
|
+
also import prepared local files. For full request and response details, see
|
|
61
64
|
`docs/api-reference.md` (Chinese).
|
|
62
65
|
|
|
63
66
|
## How boundaries are constrained
|
|
64
67
|
|
|
65
68
|
The product boundary is intentionally narrow today. PostgreSQL, MySQL, and Hive
|
|
66
|
-
are available today. Query tools remain read-only, and
|
|
69
|
+
are available today. Query tools remain read-only, PostgreSQL and MySQL query
|
|
70
|
+
results can be exported to local files, and the only database write path is a
|
|
67
71
|
controlled local CSV/XLSX import into existing tables.
|
|
68
72
|
|
|
69
73
|
The service keeps those boundaries explicit in a few ways.
|
|
@@ -80,6 +84,11 @@ The service keeps those boundaries explicit in a few ways.
|
|
|
80
84
|
queries.
|
|
81
85
|
- The server accepts only `SELECT` and `WITH ... SELECT`, rejects comments and
|
|
82
86
|
multi-statement input, and records audit logs for each call.
|
|
87
|
+
- `export_query_file` writes files on the MCP server machine. It is synchronous
|
|
88
|
+
but reads database rows and writes CSV/XLSX files in batches. Large exports can
|
|
89
|
+
still hit your MCP client's tool timeout. For XLSX output, UUID values are
|
|
90
|
+
written as text and timezone-aware datetime values are written without the
|
|
91
|
+
timezone. Hive export is not supported yet.
|
|
83
92
|
- `import_table_file` doesn't accept raw SQL. It inserts only file columns whose
|
|
84
93
|
headers exactly match existing table columns.
|
|
85
94
|
- Hive `import_table_file` is intended for small files only and rejects files
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
5
|
from contextlib import contextmanager
|
|
6
|
-
from typing import Iterator, List
|
|
6
|
+
from typing import Any, Iterator, List
|
|
7
7
|
from urllib.parse import parse_qs, unquote, urlparse
|
|
8
8
|
|
|
9
9
|
try:
|
|
@@ -52,6 +52,10 @@ class HiveAdapter:
|
|
|
52
52
|
def column_names(self, description) -> List[str]:
|
|
53
53
|
return [column[0] for column in (description or [])]
|
|
54
54
|
|
|
55
|
+
def normalize_identifier(self, value: str) -> str:
|
|
56
|
+
# Hive table and column identifiers are case-insensitive.
|
|
57
|
+
return value.casefold()
|
|
58
|
+
|
|
55
59
|
def normalize_rows(self, rows, columns: List[str]) -> List[dict]:
|
|
56
60
|
return [dict(zip(columns, row)) for row in rows]
|
|
57
61
|
|
|
@@ -80,7 +84,7 @@ class HiveAdapter:
|
|
|
80
84
|
columns = []
|
|
81
85
|
in_partitions = False
|
|
82
86
|
for row in rows:
|
|
83
|
-
name = self.
|
|
87
|
+
name = self._describe_value(row, "col_name", 0)
|
|
84
88
|
if not name:
|
|
85
89
|
continue
|
|
86
90
|
if str(name).startswith("# Partition Information"):
|
|
@@ -88,9 +92,8 @@ class HiveAdapter:
|
|
|
88
92
|
continue
|
|
89
93
|
if str(name).startswith("#"):
|
|
90
94
|
continue
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
comment = values[2] if len(values) > 2 else None
|
|
95
|
+
data_type = self._describe_value(row, "data_type", 1)
|
|
96
|
+
comment = self._describe_value(row, "comment", 2)
|
|
94
97
|
columns.append(
|
|
95
98
|
{
|
|
96
99
|
"column_name": name,
|
|
@@ -141,7 +144,13 @@ class HiveAdapter:
|
|
|
141
144
|
return next(iter(row.values()))
|
|
142
145
|
return row[0]
|
|
143
146
|
|
|
144
|
-
def
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
147
|
+
def _describe_value(self, row, key: str, index: int) -> Any:
|
|
148
|
+
# Hive table and column identifiers are case-insensitive. DESCRIBE may
|
|
149
|
+
# return tuples or dict rows, so dict key lookup follows Hive semantics.
|
|
150
|
+
if not isinstance(row, dict):
|
|
151
|
+
return row[index] if len(row) > index else None
|
|
152
|
+
lowered_key = key.lower()
|
|
153
|
+
for existing_key, value in row.items():
|
|
154
|
+
if existing_key.lower() == lowered_key:
|
|
155
|
+
return value
|
|
156
|
+
return None
|
|
@@ -4,7 +4,7 @@ from __future__ import annotations
|
|
|
4
4
|
|
|
5
5
|
import json
|
|
6
6
|
from contextlib import contextmanager
|
|
7
|
-
from typing import Iterator, List
|
|
7
|
+
from typing import Any, Iterator, List
|
|
8
8
|
from urllib.parse import parse_qs, unquote, urlparse
|
|
9
9
|
|
|
10
10
|
try:
|
|
@@ -42,6 +42,11 @@ class MySQLAdapter:
|
|
|
42
42
|
with conn.cursor() as cur:
|
|
43
43
|
cur.execute("SET SESSION max_execution_time = %s", (int(timeout_ms),))
|
|
44
44
|
|
|
45
|
+
def export_cursor(self, conn: object):
|
|
46
|
+
if pymysql is None:
|
|
47
|
+
raise ConfigurationError("缺少 PyMySQL 依赖,请先安装项目依赖。")
|
|
48
|
+
return conn.cursor(pymysql.cursors.SSDictCursor)
|
|
49
|
+
|
|
45
50
|
def list_databases(self, conn: object) -> List[str]:
|
|
46
51
|
with conn.cursor() as cur:
|
|
47
52
|
cur.execute(
|
|
@@ -52,7 +57,7 @@ class MySQLAdapter:
|
|
|
52
57
|
ORDER BY schema_name
|
|
53
58
|
"""
|
|
54
59
|
)
|
|
55
|
-
return [row
|
|
60
|
+
return [_row_value(row, "database_name") for row in cur.fetchall()]
|
|
56
61
|
|
|
57
62
|
def list_tables(self, conn: object, database: str):
|
|
58
63
|
with conn.cursor() as cur:
|
|
@@ -65,7 +70,14 @@ class MySQLAdapter:
|
|
|
65
70
|
""",
|
|
66
71
|
(database,),
|
|
67
72
|
)
|
|
68
|
-
return
|
|
73
|
+
return [
|
|
74
|
+
{
|
|
75
|
+
"database_name": _row_value(row, "database_name"),
|
|
76
|
+
"table_name": _row_value(row, "table_name"),
|
|
77
|
+
"table_type": _row_value(row, "table_type"),
|
|
78
|
+
}
|
|
79
|
+
for row in cur.fetchall()
|
|
80
|
+
]
|
|
69
81
|
|
|
70
82
|
def describe_table(self, conn: object, database: str, table_name: str):
|
|
71
83
|
with conn.cursor() as cur:
|
|
@@ -96,13 +108,13 @@ class MySQLAdapter:
|
|
|
96
108
|
return {
|
|
97
109
|
"columns": [
|
|
98
110
|
{
|
|
99
|
-
"column_name": row
|
|
100
|
-
"data_type": row
|
|
111
|
+
"column_name": _row_value(row, "column_name"),
|
|
112
|
+
"data_type": _row_value(row, "column_type"),
|
|
101
113
|
"udt_name": None,
|
|
102
|
-
"nullable": row
|
|
103
|
-
"default": row
|
|
104
|
-
"primary_key": row
|
|
105
|
-
"extra": row
|
|
114
|
+
"nullable": _row_value(row, "is_nullable") == "YES",
|
|
115
|
+
"default": _row_value(row, "column_default"),
|
|
116
|
+
"primary_key": _row_value(row, "column_key") == "PRI",
|
|
117
|
+
"extra": _row_value(row, "extra"),
|
|
106
118
|
}
|
|
107
119
|
for row in columns
|
|
108
120
|
],
|
|
@@ -131,7 +143,7 @@ class MySQLAdapter:
|
|
|
131
143
|
def extract_plan(self, rows):
|
|
132
144
|
if not rows:
|
|
133
145
|
return []
|
|
134
|
-
plan = rows[0]
|
|
146
|
+
plan = _row_value(rows[0], "EXPLAIN")
|
|
135
147
|
if isinstance(plan, str):
|
|
136
148
|
try:
|
|
137
149
|
return json.loads(plan)
|
|
@@ -142,6 +154,11 @@ class MySQLAdapter:
|
|
|
142
154
|
def column_names(self, description) -> List[str]:
|
|
143
155
|
return [column[0] for column in (description or [])]
|
|
144
156
|
|
|
157
|
+
def normalize_identifier(self, value: str) -> str:
|
|
158
|
+
# MySQL column names, index names, and column aliases are
|
|
159
|
+
# case-insensitive on every platform.
|
|
160
|
+
return value.casefold()
|
|
161
|
+
|
|
145
162
|
def _parse_dsn(self, dsn: str) -> dict:
|
|
146
163
|
parsed = urlparse(dsn)
|
|
147
164
|
if parsed.scheme not in {"mysql", "mysql+pymysql"}:
|
|
@@ -164,16 +181,29 @@ class MySQLAdapter:
|
|
|
164
181
|
def _normalize_indexes(self, rows: List[dict]) -> List[dict]:
|
|
165
182
|
grouped = {}
|
|
166
183
|
for row in rows:
|
|
167
|
-
index_name = row
|
|
184
|
+
index_name = _row_value(row, "index_name")
|
|
168
185
|
item = grouped.setdefault(
|
|
169
186
|
index_name,
|
|
170
187
|
{
|
|
171
188
|
"index_name": index_name,
|
|
172
189
|
"columns": [],
|
|
173
|
-
"unique": row
|
|
190
|
+
"unique": _row_value(row, "non_unique") == 0,
|
|
174
191
|
"primary_key": index_name == "PRIMARY",
|
|
175
192
|
"definition": None,
|
|
176
193
|
},
|
|
177
194
|
)
|
|
178
|
-
item["columns"].append(row
|
|
195
|
+
item["columns"].append(_row_value(row, "column_name"))
|
|
179
196
|
return [grouped[name] for name in sorted(grouped)]
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def _row_value(row: dict, key: str) -> Any:
|
|
200
|
+
# MySQL column names, index names, and column aliases are case-insensitive,
|
|
201
|
+
# and drivers may expose information_schema labels as COLUMN_NAME or
|
|
202
|
+
# column_name. Keep this normalization local to the MySQL adapter.
|
|
203
|
+
if key in row:
|
|
204
|
+
return row[key]
|
|
205
|
+
lowered_key = key.lower()
|
|
206
|
+
for existing_key, value in row.items():
|
|
207
|
+
if existing_key.lower() == lowered_key:
|
|
208
|
+
return value
|
|
209
|
+
raise KeyError(key)
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
5
|
from contextlib import contextmanager
|
|
6
|
+
from uuid import uuid4
|
|
6
7
|
from typing import Iterator, List
|
|
7
8
|
|
|
8
9
|
try:
|
|
@@ -37,6 +38,19 @@ class PostgresAdapter:
|
|
|
37
38
|
with conn.cursor() as cur:
|
|
38
39
|
cur.execute("SELECT set_config('statement_timeout', %s, false)", (str(timeout_ms),))
|
|
39
40
|
|
|
41
|
+
@contextmanager
|
|
42
|
+
def export_cursor(self, conn: object) -> Iterator[object]:
|
|
43
|
+
previous_autocommit = getattr(conn, "autocommit", None)
|
|
44
|
+
if previous_autocommit is True:
|
|
45
|
+
conn.autocommit = False
|
|
46
|
+
try:
|
|
47
|
+
with conn.cursor(name=f"sql_query_mcp_export_{uuid4().hex}") as cur:
|
|
48
|
+
yield cur
|
|
49
|
+
finally:
|
|
50
|
+
if previous_autocommit is True:
|
|
51
|
+
conn.rollback()
|
|
52
|
+
conn.autocommit = True
|
|
53
|
+
|
|
40
54
|
def list_schemas(self, conn: object) -> List[str]:
|
|
41
55
|
with conn.cursor() as cur:
|
|
42
56
|
cur.execute(
|
|
@@ -174,6 +188,11 @@ class PostgresAdapter:
|
|
|
174
188
|
def column_names(self, description) -> List[str]:
|
|
175
189
|
return [column.name for column in (description or [])]
|
|
176
190
|
|
|
191
|
+
def normalize_identifier(self, value: str) -> str:
|
|
192
|
+
# PostgreSQL quoted identifiers are case-sensitive, and this adapter
|
|
193
|
+
# quotes import columns with sql.Identifier, so header matching is exact.
|
|
194
|
+
return value
|
|
195
|
+
|
|
177
196
|
def _get_pool(self, connection_id: str, dsn: str) -> ConnectionPool:
|
|
178
197
|
if ConnectionPool is None or dict_row is None:
|
|
179
198
|
raise ConfigurationError("缺少 psycopg / psycopg-pool 依赖,请先安装项目依赖。")
|
|
@@ -11,6 +11,7 @@ from .audit import AuditLogger
|
|
|
11
11
|
from .config import load_config
|
|
12
12
|
from .errors import SqlQueryMCPError
|
|
13
13
|
from .executor import QueryExecutor
|
|
14
|
+
from .exporter import QueryExporter
|
|
14
15
|
from .importer import TableFileImporter
|
|
15
16
|
from .introspection import MetadataService
|
|
16
17
|
from .registry import ConnectionRegistry
|
|
@@ -22,6 +23,7 @@ def create_app() -> FastMCP:
|
|
|
22
23
|
audit_logger = AuditLogger(app_config.settings.audit_log_path)
|
|
23
24
|
metadata = MetadataService(registry, app_config.settings, audit_logger)
|
|
24
25
|
executor = QueryExecutor(registry, app_config.settings, audit_logger)
|
|
26
|
+
exporter = QueryExporter(registry, app_config.settings, audit_logger)
|
|
25
27
|
importer = TableFileImporter(registry, app_config.settings, audit_logger)
|
|
26
28
|
async_queries = AsyncQueryService(registry, app_config.settings, audit_logger)
|
|
27
29
|
|
|
@@ -90,6 +92,32 @@ def create_app() -> FastMCP:
|
|
|
90
92
|
|
|
91
93
|
return _run_tool(lambda: executor.get_table_sample(connection_id, table_name, schema, database, limit))
|
|
92
94
|
|
|
95
|
+
@mcp.tool()
|
|
96
|
+
def export_query_file(
|
|
97
|
+
connection_id: str,
|
|
98
|
+
sql: str,
|
|
99
|
+
output_path: str,
|
|
100
|
+
format: str = "csv",
|
|
101
|
+
limit: Optional[int] = 1000,
|
|
102
|
+
export_all: bool = False,
|
|
103
|
+
file_name: Optional[str] = None,
|
|
104
|
+
overwrite: bool = False,
|
|
105
|
+
) -> dict:
|
|
106
|
+
"""Export a read-only query result to a local CSV or XLSX file."""
|
|
107
|
+
|
|
108
|
+
return _run_tool(
|
|
109
|
+
lambda: exporter.export_query_file(
|
|
110
|
+
connection_id,
|
|
111
|
+
sql,
|
|
112
|
+
output_path,
|
|
113
|
+
format,
|
|
114
|
+
limit,
|
|
115
|
+
export_all,
|
|
116
|
+
file_name,
|
|
117
|
+
overwrite,
|
|
118
|
+
)
|
|
119
|
+
)
|
|
120
|
+
|
|
93
121
|
@mcp.tool()
|
|
94
122
|
def start_query(connection_id: str, sql: str, limit: Optional[int] = None) -> dict:
|
|
95
123
|
"""Start an asynchronous read-only SELECT or CTE query."""
|
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
"""Controlled query result exports."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import csv
|
|
6
|
+
import time
|
|
7
|
+
import uuid
|
|
8
|
+
from datetime import datetime, time as datetime_time, timezone
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, cast
|
|
11
|
+
|
|
12
|
+
try:
|
|
13
|
+
from openpyxl import Workbook
|
|
14
|
+
except ImportError: # pragma: no cover - runtime dependency
|
|
15
|
+
Workbook = None
|
|
16
|
+
|
|
17
|
+
from .audit import AuditLogger
|
|
18
|
+
from .config import ServerSettings
|
|
19
|
+
from .errors import QueryExecutionError, sanitize_error_message
|
|
20
|
+
from .validator import clamp_limit, summarize_sql, validate_select_sql
|
|
21
|
+
|
|
22
|
+
EXPORT_BATCH_SIZE = 1000
|
|
23
|
+
SUPPORTED_EXPORT_ENGINES = {"postgres", "mysql"}
|
|
24
|
+
SUPPORTED_EXPORT_FORMATS = {"csv", "xlsx"}
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class QueryExporter:
|
|
28
|
+
"""Export validated read-only query results to local files."""
|
|
29
|
+
|
|
30
|
+
def __init__(self, registry: Any, settings: ServerSettings, audit_logger: AuditLogger):
|
|
31
|
+
self._registry = registry
|
|
32
|
+
self._settings = settings
|
|
33
|
+
self._audit = audit_logger
|
|
34
|
+
|
|
35
|
+
def export_query_file(
|
|
36
|
+
self,
|
|
37
|
+
connection_id: str,
|
|
38
|
+
sql_text: str,
|
|
39
|
+
output_path: str,
|
|
40
|
+
format: str = "csv",
|
|
41
|
+
limit: Optional[int] = 1000,
|
|
42
|
+
export_all: bool = False,
|
|
43
|
+
file_name: Optional[str] = None,
|
|
44
|
+
overwrite: bool = False,
|
|
45
|
+
) -> Dict[str, object]:
|
|
46
|
+
started = time.perf_counter()
|
|
47
|
+
config = None
|
|
48
|
+
final_path: Optional[Path] = None
|
|
49
|
+
row_count = 0
|
|
50
|
+
applied_limit = None
|
|
51
|
+
sql_summary = summarize_sql(sql_text)
|
|
52
|
+
normalized_format = str(format).lower().lstrip(".")
|
|
53
|
+
|
|
54
|
+
try:
|
|
55
|
+
if normalized_format not in SUPPORTED_EXPORT_FORMATS:
|
|
56
|
+
raise QueryExecutionError("导出格式仅支持 csv 和 xlsx。")
|
|
57
|
+
config = self._registry.get_connection_config(connection_id)
|
|
58
|
+
if config.engine not in SUPPORTED_EXPORT_ENGINES:
|
|
59
|
+
raise QueryExecutionError("export_query_file 首版仅支持 PostgreSQL 和 MySQL。")
|
|
60
|
+
cleaned_sql = validate_select_sql(sql_text, config.engine)
|
|
61
|
+
sql_summary = summarize_sql(cleaned_sql)
|
|
62
|
+
final_path = _resolve_output_file(
|
|
63
|
+
output_path,
|
|
64
|
+
normalized_format,
|
|
65
|
+
file_name=file_name,
|
|
66
|
+
overwrite=overwrite,
|
|
67
|
+
)
|
|
68
|
+
query = cleaned_sql
|
|
69
|
+
if not export_all:
|
|
70
|
+
applied_limit = clamp_limit(limit, 1000, self._settings.max_limit)
|
|
71
|
+
query = _build_exact_limited_query(cleaned_sql, applied_limit)
|
|
72
|
+
|
|
73
|
+
with self._registry.connection_from_config(config) as (conn, adapter):
|
|
74
|
+
_apply_statement_timeout(adapter, conn, self._settings.statement_timeout_ms)
|
|
75
|
+
with _open_export_cursor(adapter, conn) as cur:
|
|
76
|
+
cur.execute(query)
|
|
77
|
+
columns = adapter.column_names(cur.description)
|
|
78
|
+
batches = _iter_batches(cur, columns, adapter)
|
|
79
|
+
if normalized_format == "csv":
|
|
80
|
+
row_count = _write_csv(final_path, columns, batches)
|
|
81
|
+
else:
|
|
82
|
+
row_count = _write_xlsx(final_path, columns, batches)
|
|
83
|
+
|
|
84
|
+
duration_ms = _elapsed_ms(started)
|
|
85
|
+
self._audit.log(
|
|
86
|
+
tool="export_query_file",
|
|
87
|
+
connection_id=connection_id,
|
|
88
|
+
success=True,
|
|
89
|
+
duration_ms=duration_ms,
|
|
90
|
+
row_count=row_count,
|
|
91
|
+
sql_summary=sql_summary,
|
|
92
|
+
extra=_audit_extra(config, final_path, normalized_format, export_all, applied_limit),
|
|
93
|
+
)
|
|
94
|
+
return {
|
|
95
|
+
"connection_id": connection_id,
|
|
96
|
+
"engine": config.engine,
|
|
97
|
+
"file_path": str(final_path),
|
|
98
|
+
"format": normalized_format,
|
|
99
|
+
"row_count": row_count,
|
|
100
|
+
"duration_ms": duration_ms,
|
|
101
|
+
"export_all": export_all,
|
|
102
|
+
"applied_limit": applied_limit,
|
|
103
|
+
}
|
|
104
|
+
except Exception as exc:
|
|
105
|
+
duration_ms = _elapsed_ms(started)
|
|
106
|
+
sanitized = sanitize_error_message(str(exc))
|
|
107
|
+
self._audit.log(
|
|
108
|
+
tool="export_query_file",
|
|
109
|
+
connection_id=connection_id,
|
|
110
|
+
success=False,
|
|
111
|
+
duration_ms=duration_ms,
|
|
112
|
+
row_count=row_count,
|
|
113
|
+
sql_summary=sql_summary,
|
|
114
|
+
error=sanitized,
|
|
115
|
+
extra=_audit_extra(config, final_path, normalized_format, export_all, applied_limit),
|
|
116
|
+
)
|
|
117
|
+
raise QueryExecutionError(sanitized) from exc
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def _iter_batches(cur: Any, columns: Sequence[str], adapter: Any) -> Iterable[List[object]]:
|
|
121
|
+
while True:
|
|
122
|
+
rows = cur.fetchmany(EXPORT_BATCH_SIZE)
|
|
123
|
+
if not rows:
|
|
124
|
+
return
|
|
125
|
+
if hasattr(adapter, "normalize_rows"):
|
|
126
|
+
rows = adapter.normalize_rows(rows, list(columns))
|
|
127
|
+
yield cast(List[object], rows)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def _build_exact_limited_query(sql: str, row_limit: int) -> str:
|
|
131
|
+
return f"SELECT * FROM ({sql}) AS pq_result LIMIT {int(row_limit)}"
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def _write_csv(path: Path, columns: Sequence[str], batches: Iterable[Sequence[object]]) -> int:
|
|
135
|
+
row_count = 0
|
|
136
|
+
with path.open("w", encoding="utf-8", newline="") as handle:
|
|
137
|
+
writer = csv.writer(handle)
|
|
138
|
+
writer.writerow(columns)
|
|
139
|
+
for batch in batches:
|
|
140
|
+
for row in batch:
|
|
141
|
+
writer.writerow(_row_values(row, columns))
|
|
142
|
+
row_count += 1
|
|
143
|
+
return row_count
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def _write_xlsx(path: Path, columns: Sequence[str], batches: Iterable[Sequence[object]]) -> int:
|
|
147
|
+
if Workbook is None:
|
|
148
|
+
raise QueryExecutionError("缺少 openpyxl 依赖,请先安装项目依赖。")
|
|
149
|
+
workbook = Workbook(write_only=True)
|
|
150
|
+
worksheet = workbook.create_sheet("Export")
|
|
151
|
+
worksheet.append(list(columns))
|
|
152
|
+
row_count = 0
|
|
153
|
+
for batch in batches:
|
|
154
|
+
for row in batch:
|
|
155
|
+
values = _row_values(row, columns, normalize_value=_normalize_xlsx_value)
|
|
156
|
+
try:
|
|
157
|
+
worksheet.append(values)
|
|
158
|
+
except Exception as exc:
|
|
159
|
+
raise QueryExecutionError(_format_xlsx_error(columns, values, exc)) from exc
|
|
160
|
+
row_count += 1
|
|
161
|
+
workbook.save(path)
|
|
162
|
+
return row_count
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def _row_values(
|
|
166
|
+
row: object,
|
|
167
|
+
columns: Sequence[str],
|
|
168
|
+
normalize_value: Optional[Callable[[object], object]] = None,
|
|
169
|
+
) -> List[object]:
|
|
170
|
+
if isinstance(row, dict):
|
|
171
|
+
values: List[object] = [row.get(column) for column in columns]
|
|
172
|
+
elif isinstance(row, (list, tuple)):
|
|
173
|
+
values = list(row)
|
|
174
|
+
else:
|
|
175
|
+
values = [row]
|
|
176
|
+
if normalize_value is None:
|
|
177
|
+
return values
|
|
178
|
+
return [normalize_value(value) for value in values]
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def _normalize_xlsx_value(value: object) -> object:
|
|
182
|
+
if isinstance(value, uuid.UUID):
|
|
183
|
+
return str(value)
|
|
184
|
+
if isinstance(value, datetime) and value.tzinfo is not None:
|
|
185
|
+
return value.replace(tzinfo=None)
|
|
186
|
+
if isinstance(value, datetime_time) and value.tzinfo is not None:
|
|
187
|
+
return value.replace(tzinfo=None)
|
|
188
|
+
return value
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def _format_xlsx_error(columns: Sequence[str], values: Sequence[object], exc: Exception) -> str:
|
|
192
|
+
message = str(exc) or exc.__class__.__name__
|
|
193
|
+
details = ", ".join(
|
|
194
|
+
f"{column}={type(value).__name__}"
|
|
195
|
+
for column, value in zip(columns, values)
|
|
196
|
+
)
|
|
197
|
+
return f"XLSX 导出失败: {message}; columns: {details}"
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def _resolve_output_file(output_path: str, format: str, file_name: Optional[str], overwrite: bool) -> Path:
|
|
201
|
+
base = Path(output_path).expanduser()
|
|
202
|
+
suffix = f".{format}"
|
|
203
|
+
if base.exists() and base.is_dir():
|
|
204
|
+
name = file_name or f"export_{datetime.now(timezone.utc).strftime('%Y%m%d_%H%M%S')}"
|
|
205
|
+
candidate = base / _with_suffix(name, suffix)
|
|
206
|
+
else:
|
|
207
|
+
if file_name:
|
|
208
|
+
raise QueryExecutionError("output_path 为文件路径时不能同时传 file_name。")
|
|
209
|
+
candidate = Path(_with_suffix(str(base), suffix)).expanduser()
|
|
210
|
+
if not candidate.parent.exists():
|
|
211
|
+
raise QueryExecutionError("导出目录不存在。")
|
|
212
|
+
if overwrite or not candidate.exists():
|
|
213
|
+
return candidate
|
|
214
|
+
return _next_available_path(candidate)
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def _with_suffix(value: str, suffix: str) -> str:
|
|
218
|
+
return value if value.lower().endswith(suffix) else value + suffix
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def _next_available_path(path: Path) -> Path:
|
|
222
|
+
stem = path.stem
|
|
223
|
+
suffix = path.suffix
|
|
224
|
+
parent = path.parent
|
|
225
|
+
index = 1
|
|
226
|
+
while True:
|
|
227
|
+
candidate = parent / f"{stem} ({index}){suffix}"
|
|
228
|
+
if not candidate.exists():
|
|
229
|
+
return candidate
|
|
230
|
+
index += 1
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
def _apply_statement_timeout(adapter: Any, conn: Any, timeout_ms: Optional[int]) -> None:
|
|
234
|
+
if timeout_ms is not None:
|
|
235
|
+
getattr(adapter, "set_statement_timeout")(conn, timeout_ms)
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def _open_export_cursor(adapter: Any, conn: Any) -> Any:
|
|
239
|
+
export_cursor = getattr(adapter, "export_cursor", None)
|
|
240
|
+
if callable(export_cursor):
|
|
241
|
+
return export_cursor(conn)
|
|
242
|
+
return conn.cursor()
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def _elapsed_ms(started: float) -> int:
|
|
246
|
+
return int((time.perf_counter() - started) * 1000)
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def _audit_extra(
|
|
250
|
+
config: Any,
|
|
251
|
+
file_path: Optional[Path],
|
|
252
|
+
format: str,
|
|
253
|
+
export_all: bool,
|
|
254
|
+
applied_limit: Optional[int],
|
|
255
|
+
) -> Dict[str, object]:
|
|
256
|
+
extra: Dict[str, object] = {
|
|
257
|
+
"file_path": str(file_path) if file_path is not None else None,
|
|
258
|
+
"format": format,
|
|
259
|
+
"export_all": export_all,
|
|
260
|
+
"applied_limit": applied_limit,
|
|
261
|
+
}
|
|
262
|
+
if config is not None:
|
|
263
|
+
extra["engine"] = config.engine
|
|
264
|
+
return extra
|