sql-query-mcp 0.3.0__tar.gz → 0.4.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. {sql_query_mcp-0.3.0/sql_query_mcp.egg-info → sql_query_mcp-0.4.1}/PKG-INFO +16 -7
  2. {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/README.md +15 -6
  3. {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/pyproject.toml +1 -1
  4. {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/sql_query_mcp/__init__.py +1 -1
  5. {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/sql_query_mcp/adapters/hive.py +18 -9
  6. {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/sql_query_mcp/adapters/mysql.py +43 -13
  7. {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/sql_query_mcp/adapters/postgres.py +19 -0
  8. {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/sql_query_mcp/app.py +28 -0
  9. sql_query_mcp-0.4.1/sql_query_mcp/exporter.py +264 -0
  10. {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/sql_query_mcp/importer.py +27 -4
  11. {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1/sql_query_mcp.egg-info}/PKG-INFO +16 -7
  12. {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/sql_query_mcp.egg-info/SOURCES.txt +2 -0
  13. {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/tests/test_app.py +7 -0
  14. sql_query_mcp-0.4.1/tests/test_exporter.py +395 -0
  15. {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/tests/test_importer.py +110 -3
  16. {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/tests/test_validator.py +149 -0
  17. {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/LICENSE +0 -0
  18. {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/setup.cfg +0 -0
  19. {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/sql_query_mcp/__main__.py +0 -0
  20. {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/sql_query_mcp/adapters/__init__.py +0 -0
  21. {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/sql_query_mcp/async_queries.py +0 -0
  22. {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/sql_query_mcp/audit.py +0 -0
  23. {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/sql_query_mcp/config.py +0 -0
  24. {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/sql_query_mcp/errors.py +0 -0
  25. {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/sql_query_mcp/executor.py +0 -0
  26. {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/sql_query_mcp/introspection.py +0 -0
  27. {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/sql_query_mcp/namespace.py +0 -0
  28. {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/sql_query_mcp/registry.py +0 -0
  29. {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/sql_query_mcp/release_metadata.py +0 -0
  30. {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/sql_query_mcp/validator.py +0 -0
  31. {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/sql_query_mcp.egg-info/dependency_links.txt +0 -0
  32. {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/sql_query_mcp.egg-info/entry_points.txt +0 -0
  33. {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/sql_query_mcp.egg-info/requires.txt +0 -0
  34. {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/sql_query_mcp.egg-info/top_level.txt +0 -0
  35. {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/tests/test_async_queries.py +0 -0
  36. {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/tests/test_audit.py +0 -0
  37. {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/tests/test_config.py +0 -0
  38. {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/tests/test_executor.py +0 -0
  39. {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/tests/test_metadata.py +0 -0
  40. {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/tests/test_namespace.py +0 -0
  41. {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/tests/test_registry.py +0 -0
  42. {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.1}/tests/test_release_metadata.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sql-query-mcp
3
- Version: 0.3.0
3
+ Version: 0.4.1
4
4
  Summary: Read-only SQL MCP server for PostgreSQL and MySQL.
5
5
  Author: Andy Wang
6
6
  License-Expression: MIT
@@ -62,10 +62,11 @@ without exposing raw connection strings or flattening engine-specific concepts.
62
62
  ## What AI can do with it
63
63
 
64
64
  The current tool set focuses on database discovery, controlled query workflows,
65
- asynchronous read-only queries, and one narrow local file import path. You can
66
- use it to help an AI assistant understand structure before it generates SQL,
67
- runs a bounded query, starts a long-running read-only query, or imports a
68
- prepared CSV/XLSX file into an existing table.
65
+ asynchronous read-only queries, batched query result exports, and one narrow
66
+ local file import path. You can use it to help an AI assistant understand
67
+ structure before it generates SQL, runs a bounded query, starts a long-running
68
+ read-only query, exports PostgreSQL or MySQL results to a local file, or imports
69
+ a prepared CSV/XLSX file into an existing table.
69
70
 
70
71
  MySQL and Hive support `explain_query`. Hive uses `EXPLAIN` and
71
72
  `EXPLAIN ANALYZE` for `explain_query`.
@@ -83,19 +84,22 @@ MySQL and Hive support `explain_query`. Hive uses `EXPLAIN` and
83
84
  | `cancel_query(query_id)` | Yes | Yes | Yes | Cancel running async queries |
84
85
  | `explain_query(connection_id, sql, analyze?)` | Yes | Yes | Yes | Inspect query plans |
85
86
  | `get_table_sample(connection_id, table_name, schema?, database?, limit?)` | Yes | Yes | Yes | Fetch small table samples |
87
+ | `export_query_file(connection_id, sql, output_path, format?, limit?, export_all?, file_name?, overwrite?)` | Yes | Yes | No | Export query results to local CSV/XLSX files |
86
88
  | `import_table_file(connection_id, table_name, file_path, schema?, database?, sheet_name?)` | Yes | Yes | Yes | Import local CSV/XLSX files |
87
89
 
88
90
  These tools are useful for tasks such as listing namespaces, inspecting table
89
91
  definitions, reviewing indexes, sampling records, running short read-only
90
92
  queries with `run_select`, running long read-only queries with `start_query`,
91
93
  `get_query`, and `cancel_query`, analyzing read-only queries with `EXPLAIN`, and
92
- importing prepared local files. For full request and response details, see
94
+ exporting PostgreSQL or MySQL query results to local CSV/XLSX files. You can
95
+ also import prepared local files. For full request and response details, see
93
96
  `docs/api-reference.md` (Chinese).
94
97
 
95
98
  ## How boundaries are constrained
96
99
 
97
100
  The product boundary is intentionally narrow today. PostgreSQL, MySQL, and Hive
98
- are available today. Query tools remain read-only, and the only write path is a
101
+ are available today. Query tools remain read-only, PostgreSQL and MySQL query
102
+ results can be exported to local files, and the only database write path is a
99
103
  controlled local CSV/XLSX import into existing tables.
100
104
 
101
105
  The service keeps those boundaries explicit in a few ways.
@@ -112,6 +116,11 @@ The service keeps those boundaries explicit in a few ways.
112
116
  queries.
113
117
  - The server accepts only `SELECT` and `WITH ... SELECT`, rejects comments and
114
118
  multi-statement input, and records audit logs for each call.
119
+ - `export_query_file` writes files on the MCP server machine. It is synchronous
120
+ but reads database rows and writes CSV/XLSX files in batches. Large exports can
121
+ still hit your MCP client's tool timeout. For XLSX output, UUID values are
122
+ written as text and timezone-aware datetime values are written without the
123
+ timezone. Hive export is not supported yet.
115
124
  - `import_table_file` doesn't accept raw SQL. It inserts only file columns whose
116
125
  headers exactly match existing table columns.
117
126
  - Hive `import_table_file` is intended for small files only and rejects files
@@ -30,10 +30,11 @@ without exposing raw connection strings or flattening engine-specific concepts.
30
30
  ## What AI can do with it
31
31
 
32
32
  The current tool set focuses on database discovery, controlled query workflows,
33
- asynchronous read-only queries, and one narrow local file import path. You can
34
- use it to help an AI assistant understand structure before it generates SQL,
35
- runs a bounded query, starts a long-running read-only query, or imports a
36
- prepared CSV/XLSX file into an existing table.
33
+ asynchronous read-only queries, batched query result exports, and one narrow
34
+ local file import path. You can use it to help an AI assistant understand
35
+ structure before it generates SQL, runs a bounded query, starts a long-running
36
+ read-only query, exports PostgreSQL or MySQL results to a local file, or imports
37
+ a prepared CSV/XLSX file into an existing table.
37
38
 
38
39
  MySQL and Hive support `explain_query`. Hive uses `EXPLAIN` and
39
40
  `EXPLAIN ANALYZE` for `explain_query`.
@@ -51,19 +52,22 @@ MySQL and Hive support `explain_query`. Hive uses `EXPLAIN` and
51
52
  | `cancel_query(query_id)` | Yes | Yes | Yes | Cancel running async queries |
52
53
  | `explain_query(connection_id, sql, analyze?)` | Yes | Yes | Yes | Inspect query plans |
53
54
  | `get_table_sample(connection_id, table_name, schema?, database?, limit?)` | Yes | Yes | Yes | Fetch small table samples |
55
+ | `export_query_file(connection_id, sql, output_path, format?, limit?, export_all?, file_name?, overwrite?)` | Yes | Yes | No | Export query results to local CSV/XLSX files |
54
56
  | `import_table_file(connection_id, table_name, file_path, schema?, database?, sheet_name?)` | Yes | Yes | Yes | Import local CSV/XLSX files |
55
57
 
56
58
  These tools are useful for tasks such as listing namespaces, inspecting table
57
59
  definitions, reviewing indexes, sampling records, running short read-only
58
60
  queries with `run_select`, running long read-only queries with `start_query`,
59
61
  `get_query`, and `cancel_query`, analyzing read-only queries with `EXPLAIN`, and
60
- importing prepared local files. For full request and response details, see
62
+ exporting PostgreSQL or MySQL query results to local CSV/XLSX files. You can
63
+ also import prepared local files. For full request and response details, see
61
64
  `docs/api-reference.md` (Chinese).
62
65
 
63
66
  ## How boundaries are constrained
64
67
 
65
68
  The product boundary is intentionally narrow today. PostgreSQL, MySQL, and Hive
66
- are available today. Query tools remain read-only, and the only write path is a
69
+ are available today. Query tools remain read-only, PostgreSQL and MySQL query
70
+ results can be exported to local files, and the only database write path is a
67
71
  controlled local CSV/XLSX import into existing tables.
68
72
 
69
73
  The service keeps those boundaries explicit in a few ways.
@@ -80,6 +84,11 @@ The service keeps those boundaries explicit in a few ways.
80
84
  queries.
81
85
  - The server accepts only `SELECT` and `WITH ... SELECT`, rejects comments and
82
86
  multi-statement input, and records audit logs for each call.
87
+ - `export_query_file` writes files on the MCP server machine. It is synchronous
88
+ but reads database rows and writes CSV/XLSX files in batches. Large exports can
89
+ still hit your MCP client's tool timeout. For XLSX output, UUID values are
90
+ written as text and timezone-aware datetime values are written without the
91
+ timezone. Hive export is not supported yet.
83
92
  - `import_table_file` doesn't accept raw SQL. It inserts only file columns whose
84
93
  headers exactly match existing table columns.
85
94
  - Hive `import_table_file` is intended for small files only and rejects files
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "sql-query-mcp"
7
- version = "0.3.0"
7
+ version = "0.4.1"
8
8
  description = "Read-only SQL MCP server for PostgreSQL and MySQL."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -2,4 +2,4 @@
2
2
 
3
3
  __all__ = ["__version__"]
4
4
 
5
- __version__ = "0.1.4"
5
+ __version__ = "0.4.1"
@@ -3,7 +3,7 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  from contextlib import contextmanager
6
- from typing import Iterator, List
6
+ from typing import Any, Iterator, List
7
7
  from urllib.parse import parse_qs, unquote, urlparse
8
8
 
9
9
  try:
@@ -52,6 +52,10 @@ class HiveAdapter:
52
52
  def column_names(self, description) -> List[str]:
53
53
  return [column[0] for column in (description or [])]
54
54
 
55
+ def normalize_identifier(self, value: str) -> str:
56
+ # Hive table and column identifiers are case-insensitive.
57
+ return value.casefold()
58
+
55
59
  def normalize_rows(self, rows, columns: List[str]) -> List[dict]:
56
60
  return [dict(zip(columns, row)) for row in rows]
57
61
 
@@ -80,7 +84,7 @@ class HiveAdapter:
80
84
  columns = []
81
85
  in_partitions = False
82
86
  for row in rows:
83
- name = self._first_value(row)
87
+ name = self._describe_value(row, "col_name", 0)
84
88
  if not name:
85
89
  continue
86
90
  if str(name).startswith("# Partition Information"):
@@ -88,9 +92,8 @@ class HiveAdapter:
88
92
  continue
89
93
  if str(name).startswith("#"):
90
94
  continue
91
- values = self._row_values(row)
92
- data_type = values[1] if len(values) > 1 else None
93
- comment = values[2] if len(values) > 2 else None
95
+ data_type = self._describe_value(row, "data_type", 1)
96
+ comment = self._describe_value(row, "comment", 2)
94
97
  columns.append(
95
98
  {
96
99
  "column_name": name,
@@ -141,7 +144,13 @@ class HiveAdapter:
141
144
  return next(iter(row.values()))
142
145
  return row[0]
143
146
 
144
- def _row_values(self, row):
145
- if isinstance(row, dict):
146
- return list(row.values())
147
- return list(row)
147
+ def _describe_value(self, row, key: str, index: int) -> Any:
148
+ # Hive table and column identifiers are case-insensitive. DESCRIBE may
149
+ # return tuples or dict rows, so dict key lookup follows Hive semantics.
150
+ if not isinstance(row, dict):
151
+ return row[index] if len(row) > index else None
152
+ lowered_key = key.lower()
153
+ for existing_key, value in row.items():
154
+ if existing_key.lower() == lowered_key:
155
+ return value
156
+ return None
@@ -4,7 +4,7 @@ from __future__ import annotations
4
4
 
5
5
  import json
6
6
  from contextlib import contextmanager
7
- from typing import Iterator, List
7
+ from typing import Any, Iterator, List
8
8
  from urllib.parse import parse_qs, unquote, urlparse
9
9
 
10
10
  try:
@@ -42,6 +42,11 @@ class MySQLAdapter:
42
42
  with conn.cursor() as cur:
43
43
  cur.execute("SET SESSION max_execution_time = %s", (int(timeout_ms),))
44
44
 
45
+ def export_cursor(self, conn: object):
46
+ if pymysql is None:
47
+ raise ConfigurationError("缺少 PyMySQL 依赖,请先安装项目依赖。")
48
+ return conn.cursor(pymysql.cursors.SSDictCursor)
49
+
45
50
  def list_databases(self, conn: object) -> List[str]:
46
51
  with conn.cursor() as cur:
47
52
  cur.execute(
@@ -52,7 +57,7 @@ class MySQLAdapter:
52
57
  ORDER BY schema_name
53
58
  """
54
59
  )
55
- return [row["database_name"] for row in cur.fetchall()]
60
+ return [_row_value(row, "database_name") for row in cur.fetchall()]
56
61
 
57
62
  def list_tables(self, conn: object, database: str):
58
63
  with conn.cursor() as cur:
@@ -65,7 +70,14 @@ class MySQLAdapter:
65
70
  """,
66
71
  (database,),
67
72
  )
68
- return cur.fetchall()
73
+ return [
74
+ {
75
+ "database_name": _row_value(row, "database_name"),
76
+ "table_name": _row_value(row, "table_name"),
77
+ "table_type": _row_value(row, "table_type"),
78
+ }
79
+ for row in cur.fetchall()
80
+ ]
69
81
 
70
82
  def describe_table(self, conn: object, database: str, table_name: str):
71
83
  with conn.cursor() as cur:
@@ -96,13 +108,13 @@ class MySQLAdapter:
96
108
  return {
97
109
  "columns": [
98
110
  {
99
- "column_name": row["column_name"],
100
- "data_type": row["column_type"],
111
+ "column_name": _row_value(row, "column_name"),
112
+ "data_type": _row_value(row, "column_type"),
101
113
  "udt_name": None,
102
- "nullable": row["is_nullable"] == "YES",
103
- "default": row["column_default"],
104
- "primary_key": row["column_key"] == "PRI",
105
- "extra": row["extra"],
114
+ "nullable": _row_value(row, "is_nullable") == "YES",
115
+ "default": _row_value(row, "column_default"),
116
+ "primary_key": _row_value(row, "column_key") == "PRI",
117
+ "extra": _row_value(row, "extra"),
106
118
  }
107
119
  for row in columns
108
120
  ],
@@ -131,7 +143,7 @@ class MySQLAdapter:
131
143
  def extract_plan(self, rows):
132
144
  if not rows:
133
145
  return []
134
- plan = rows[0].get("EXPLAIN", [])
146
+ plan = _row_value(rows[0], "EXPLAIN")
135
147
  if isinstance(plan, str):
136
148
  try:
137
149
  return json.loads(plan)
@@ -142,6 +154,11 @@ class MySQLAdapter:
142
154
  def column_names(self, description) -> List[str]:
143
155
  return [column[0] for column in (description or [])]
144
156
 
157
+ def normalize_identifier(self, value: str) -> str:
158
+ # MySQL column names, index names, and column aliases are
159
+ # case-insensitive on every platform.
160
+ return value.casefold()
161
+
145
162
  def _parse_dsn(self, dsn: str) -> dict:
146
163
  parsed = urlparse(dsn)
147
164
  if parsed.scheme not in {"mysql", "mysql+pymysql"}:
@@ -164,16 +181,29 @@ class MySQLAdapter:
164
181
  def _normalize_indexes(self, rows: List[dict]) -> List[dict]:
165
182
  grouped = {}
166
183
  for row in rows:
167
- index_name = row["index_name"]
184
+ index_name = _row_value(row, "index_name")
168
185
  item = grouped.setdefault(
169
186
  index_name,
170
187
  {
171
188
  "index_name": index_name,
172
189
  "columns": [],
173
- "unique": row["non_unique"] == 0,
190
+ "unique": _row_value(row, "non_unique") == 0,
174
191
  "primary_key": index_name == "PRIMARY",
175
192
  "definition": None,
176
193
  },
177
194
  )
178
- item["columns"].append(row["column_name"])
195
+ item["columns"].append(_row_value(row, "column_name"))
179
196
  return [grouped[name] for name in sorted(grouped)]
197
+
198
+
199
+ def _row_value(row: dict, key: str) -> Any:
200
+ # MySQL column names, index names, and column aliases are case-insensitive,
201
+ # and drivers may expose information_schema labels as COLUMN_NAME or
202
+ # column_name. Keep this normalization local to the MySQL adapter.
203
+ if key in row:
204
+ return row[key]
205
+ lowered_key = key.lower()
206
+ for existing_key, value in row.items():
207
+ if existing_key.lower() == lowered_key:
208
+ return value
209
+ raise KeyError(key)
@@ -3,6 +3,7 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  from contextlib import contextmanager
6
+ from uuid import uuid4
6
7
  from typing import Iterator, List
7
8
 
8
9
  try:
@@ -37,6 +38,19 @@ class PostgresAdapter:
37
38
  with conn.cursor() as cur:
38
39
  cur.execute("SELECT set_config('statement_timeout', %s, false)", (str(timeout_ms),))
39
40
 
41
+ @contextmanager
42
+ def export_cursor(self, conn: object) -> Iterator[object]:
43
+ previous_autocommit = getattr(conn, "autocommit", None)
44
+ if previous_autocommit is True:
45
+ conn.autocommit = False
46
+ try:
47
+ with conn.cursor(name=f"sql_query_mcp_export_{uuid4().hex}") as cur:
48
+ yield cur
49
+ finally:
50
+ if previous_autocommit is True:
51
+ conn.rollback()
52
+ conn.autocommit = True
53
+
40
54
  def list_schemas(self, conn: object) -> List[str]:
41
55
  with conn.cursor() as cur:
42
56
  cur.execute(
@@ -174,6 +188,11 @@ class PostgresAdapter:
174
188
  def column_names(self, description) -> List[str]:
175
189
  return [column.name for column in (description or [])]
176
190
 
191
+ def normalize_identifier(self, value: str) -> str:
192
+ # PostgreSQL quoted identifiers are case-sensitive, and this adapter
193
+ # quotes import columns with sql.Identifier, so header matching is exact.
194
+ return value
195
+
177
196
  def _get_pool(self, connection_id: str, dsn: str) -> ConnectionPool:
178
197
  if ConnectionPool is None or dict_row is None:
179
198
  raise ConfigurationError("缺少 psycopg / psycopg-pool 依赖,请先安装项目依赖。")
@@ -11,6 +11,7 @@ from .audit import AuditLogger
11
11
  from .config import load_config
12
12
  from .errors import SqlQueryMCPError
13
13
  from .executor import QueryExecutor
14
+ from .exporter import QueryExporter
14
15
  from .importer import TableFileImporter
15
16
  from .introspection import MetadataService
16
17
  from .registry import ConnectionRegistry
@@ -22,6 +23,7 @@ def create_app() -> FastMCP:
22
23
  audit_logger = AuditLogger(app_config.settings.audit_log_path)
23
24
  metadata = MetadataService(registry, app_config.settings, audit_logger)
24
25
  executor = QueryExecutor(registry, app_config.settings, audit_logger)
26
+ exporter = QueryExporter(registry, app_config.settings, audit_logger)
25
27
  importer = TableFileImporter(registry, app_config.settings, audit_logger)
26
28
  async_queries = AsyncQueryService(registry, app_config.settings, audit_logger)
27
29
 
@@ -90,6 +92,32 @@ def create_app() -> FastMCP:
90
92
 
91
93
  return _run_tool(lambda: executor.get_table_sample(connection_id, table_name, schema, database, limit))
92
94
 
95
+ @mcp.tool()
96
+ def export_query_file(
97
+ connection_id: str,
98
+ sql: str,
99
+ output_path: str,
100
+ format: str = "csv",
101
+ limit: Optional[int] = 1000,
102
+ export_all: bool = False,
103
+ file_name: Optional[str] = None,
104
+ overwrite: bool = False,
105
+ ) -> dict:
106
+ """Export a read-only query result to a local CSV or XLSX file."""
107
+
108
+ return _run_tool(
109
+ lambda: exporter.export_query_file(
110
+ connection_id,
111
+ sql,
112
+ output_path,
113
+ format,
114
+ limit,
115
+ export_all,
116
+ file_name,
117
+ overwrite,
118
+ )
119
+ )
120
+
93
121
  @mcp.tool()
94
122
  def start_query(connection_id: str, sql: str, limit: Optional[int] = None) -> dict:
95
123
  """Start an asynchronous read-only SELECT or CTE query."""
@@ -0,0 +1,264 @@
1
+ """Controlled query result exports."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import csv
6
+ import time
7
+ import uuid
8
+ from datetime import datetime, time as datetime_time, timezone
9
+ from pathlib import Path
10
+ from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, cast
11
+
12
+ try:
13
+ from openpyxl import Workbook
14
+ except ImportError: # pragma: no cover - runtime dependency
15
+ Workbook = None
16
+
17
+ from .audit import AuditLogger
18
+ from .config import ServerSettings
19
+ from .errors import QueryExecutionError, sanitize_error_message
20
+ from .validator import clamp_limit, summarize_sql, validate_select_sql
21
+
22
+ EXPORT_BATCH_SIZE = 1000
23
+ SUPPORTED_EXPORT_ENGINES = {"postgres", "mysql"}
24
+ SUPPORTED_EXPORT_FORMATS = {"csv", "xlsx"}
25
+
26
+
27
+ class QueryExporter:
28
+ """Export validated read-only query results to local files."""
29
+
30
+ def __init__(self, registry: Any, settings: ServerSettings, audit_logger: AuditLogger):
31
+ self._registry = registry
32
+ self._settings = settings
33
+ self._audit = audit_logger
34
+
35
+ def export_query_file(
36
+ self,
37
+ connection_id: str,
38
+ sql_text: str,
39
+ output_path: str,
40
+ format: str = "csv",
41
+ limit: Optional[int] = 1000,
42
+ export_all: bool = False,
43
+ file_name: Optional[str] = None,
44
+ overwrite: bool = False,
45
+ ) -> Dict[str, object]:
46
+ started = time.perf_counter()
47
+ config = None
48
+ final_path: Optional[Path] = None
49
+ row_count = 0
50
+ applied_limit = None
51
+ sql_summary = summarize_sql(sql_text)
52
+ normalized_format = str(format).lower().lstrip(".")
53
+
54
+ try:
55
+ if normalized_format not in SUPPORTED_EXPORT_FORMATS:
56
+ raise QueryExecutionError("导出格式仅支持 csv 和 xlsx。")
57
+ config = self._registry.get_connection_config(connection_id)
58
+ if config.engine not in SUPPORTED_EXPORT_ENGINES:
59
+ raise QueryExecutionError("export_query_file 首版仅支持 PostgreSQL 和 MySQL。")
60
+ cleaned_sql = validate_select_sql(sql_text, config.engine)
61
+ sql_summary = summarize_sql(cleaned_sql)
62
+ final_path = _resolve_output_file(
63
+ output_path,
64
+ normalized_format,
65
+ file_name=file_name,
66
+ overwrite=overwrite,
67
+ )
68
+ query = cleaned_sql
69
+ if not export_all:
70
+ applied_limit = clamp_limit(limit, 1000, self._settings.max_limit)
71
+ query = _build_exact_limited_query(cleaned_sql, applied_limit)
72
+
73
+ with self._registry.connection_from_config(config) as (conn, adapter):
74
+ _apply_statement_timeout(adapter, conn, self._settings.statement_timeout_ms)
75
+ with _open_export_cursor(adapter, conn) as cur:
76
+ cur.execute(query)
77
+ columns = adapter.column_names(cur.description)
78
+ batches = _iter_batches(cur, columns, adapter)
79
+ if normalized_format == "csv":
80
+ row_count = _write_csv(final_path, columns, batches)
81
+ else:
82
+ row_count = _write_xlsx(final_path, columns, batches)
83
+
84
+ duration_ms = _elapsed_ms(started)
85
+ self._audit.log(
86
+ tool="export_query_file",
87
+ connection_id=connection_id,
88
+ success=True,
89
+ duration_ms=duration_ms,
90
+ row_count=row_count,
91
+ sql_summary=sql_summary,
92
+ extra=_audit_extra(config, final_path, normalized_format, export_all, applied_limit),
93
+ )
94
+ return {
95
+ "connection_id": connection_id,
96
+ "engine": config.engine,
97
+ "file_path": str(final_path),
98
+ "format": normalized_format,
99
+ "row_count": row_count,
100
+ "duration_ms": duration_ms,
101
+ "export_all": export_all,
102
+ "applied_limit": applied_limit,
103
+ }
104
+ except Exception as exc:
105
+ duration_ms = _elapsed_ms(started)
106
+ sanitized = sanitize_error_message(str(exc))
107
+ self._audit.log(
108
+ tool="export_query_file",
109
+ connection_id=connection_id,
110
+ success=False,
111
+ duration_ms=duration_ms,
112
+ row_count=row_count,
113
+ sql_summary=sql_summary,
114
+ error=sanitized,
115
+ extra=_audit_extra(config, final_path, normalized_format, export_all, applied_limit),
116
+ )
117
+ raise QueryExecutionError(sanitized) from exc
118
+
119
+
120
+ def _iter_batches(cur: Any, columns: Sequence[str], adapter: Any) -> Iterable[List[object]]:
121
+ while True:
122
+ rows = cur.fetchmany(EXPORT_BATCH_SIZE)
123
+ if not rows:
124
+ return
125
+ if hasattr(adapter, "normalize_rows"):
126
+ rows = adapter.normalize_rows(rows, list(columns))
127
+ yield cast(List[object], rows)
128
+
129
+
130
+ def _build_exact_limited_query(sql: str, row_limit: int) -> str:
131
+ return f"SELECT * FROM ({sql}) AS pq_result LIMIT {int(row_limit)}"
132
+
133
+
134
+ def _write_csv(path: Path, columns: Sequence[str], batches: Iterable[Sequence[object]]) -> int:
135
+ row_count = 0
136
+ with path.open("w", encoding="utf-8", newline="") as handle:
137
+ writer = csv.writer(handle)
138
+ writer.writerow(columns)
139
+ for batch in batches:
140
+ for row in batch:
141
+ writer.writerow(_row_values(row, columns))
142
+ row_count += 1
143
+ return row_count
144
+
145
+
146
+ def _write_xlsx(path: Path, columns: Sequence[str], batches: Iterable[Sequence[object]]) -> int:
147
+ if Workbook is None:
148
+ raise QueryExecutionError("缺少 openpyxl 依赖,请先安装项目依赖。")
149
+ workbook = Workbook(write_only=True)
150
+ worksheet = workbook.create_sheet("Export")
151
+ worksheet.append(list(columns))
152
+ row_count = 0
153
+ for batch in batches:
154
+ for row in batch:
155
+ values = _row_values(row, columns, normalize_value=_normalize_xlsx_value)
156
+ try:
157
+ worksheet.append(values)
158
+ except Exception as exc:
159
+ raise QueryExecutionError(_format_xlsx_error(columns, values, exc)) from exc
160
+ row_count += 1
161
+ workbook.save(path)
162
+ return row_count
163
+
164
+
165
+ def _row_values(
166
+ row: object,
167
+ columns: Sequence[str],
168
+ normalize_value: Optional[Callable[[object], object]] = None,
169
+ ) -> List[object]:
170
+ if isinstance(row, dict):
171
+ values: List[object] = [row.get(column) for column in columns]
172
+ elif isinstance(row, (list, tuple)):
173
+ values = list(row)
174
+ else:
175
+ values = [row]
176
+ if normalize_value is None:
177
+ return values
178
+ return [normalize_value(value) for value in values]
179
+
180
+
181
+ def _normalize_xlsx_value(value: object) -> object:
182
+ if isinstance(value, uuid.UUID):
183
+ return str(value)
184
+ if isinstance(value, datetime) and value.tzinfo is not None:
185
+ return value.replace(tzinfo=None)
186
+ if isinstance(value, datetime_time) and value.tzinfo is not None:
187
+ return value.replace(tzinfo=None)
188
+ return value
189
+
190
+
191
+ def _format_xlsx_error(columns: Sequence[str], values: Sequence[object], exc: Exception) -> str:
192
+ message = str(exc) or exc.__class__.__name__
193
+ details = ", ".join(
194
+ f"{column}={type(value).__name__}"
195
+ for column, value in zip(columns, values)
196
+ )
197
+ return f"XLSX 导出失败: {message}; columns: {details}"
198
+
199
+
200
+ def _resolve_output_file(output_path: str, format: str, file_name: Optional[str], overwrite: bool) -> Path:
201
+ base = Path(output_path).expanduser()
202
+ suffix = f".{format}"
203
+ if base.exists() and base.is_dir():
204
+ name = file_name or f"export_{datetime.now(timezone.utc).strftime('%Y%m%d_%H%M%S')}"
205
+ candidate = base / _with_suffix(name, suffix)
206
+ else:
207
+ if file_name:
208
+ raise QueryExecutionError("output_path 为文件路径时不能同时传 file_name。")
209
+ candidate = Path(_with_suffix(str(base), suffix)).expanduser()
210
+ if not candidate.parent.exists():
211
+ raise QueryExecutionError("导出目录不存在。")
212
+ if overwrite or not candidate.exists():
213
+ return candidate
214
+ return _next_available_path(candidate)
215
+
216
+
217
+ def _with_suffix(value: str, suffix: str) -> str:
218
+ return value if value.lower().endswith(suffix) else value + suffix
219
+
220
+
221
+ def _next_available_path(path: Path) -> Path:
222
+ stem = path.stem
223
+ suffix = path.suffix
224
+ parent = path.parent
225
+ index = 1
226
+ while True:
227
+ candidate = parent / f"{stem} ({index}){suffix}"
228
+ if not candidate.exists():
229
+ return candidate
230
+ index += 1
231
+
232
+
233
+ def _apply_statement_timeout(adapter: Any, conn: Any, timeout_ms: Optional[int]) -> None:
234
+ if timeout_ms is not None:
235
+ getattr(adapter, "set_statement_timeout")(conn, timeout_ms)
236
+
237
+
238
+ def _open_export_cursor(adapter: Any, conn: Any) -> Any:
239
+ export_cursor = getattr(adapter, "export_cursor", None)
240
+ if callable(export_cursor):
241
+ return export_cursor(conn)
242
+ return conn.cursor()
243
+
244
+
245
+ def _elapsed_ms(started: float) -> int:
246
+ return int((time.perf_counter() - started) * 1000)
247
+
248
+
249
+ def _audit_extra(
250
+ config: Any,
251
+ file_path: Optional[Path],
252
+ format: str,
253
+ export_all: bool,
254
+ applied_limit: Optional[int],
255
+ ) -> Dict[str, object]:
256
+ extra: Dict[str, object] = {
257
+ "file_path": str(file_path) if file_path is not None else None,
258
+ "format": format,
259
+ "export_all": export_all,
260
+ "applied_limit": applied_limit,
261
+ }
262
+ if config is not None:
263
+ extra["engine"] = config.engine
264
+ return extra