sql-query-mcp 0.3.0__tar.gz → 0.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sql_query_mcp-0.3.0/sql_query_mcp.egg-info → sql_query_mcp-0.4.0}/PKG-INFO +16 -7
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.0}/README.md +15 -6
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.0}/pyproject.toml +1 -1
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.0}/sql_query_mcp/adapters/mysql.py +5 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.0}/sql_query_mcp/adapters/postgres.py +14 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.0}/sql_query_mcp/app.py +28 -0
- sql_query_mcp-0.4.0/sql_query_mcp/exporter.py +264 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.0/sql_query_mcp.egg-info}/PKG-INFO +16 -7
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.0}/sql_query_mcp.egg-info/SOURCES.txt +2 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.0}/tests/test_app.py +7 -0
- sql_query_mcp-0.4.0/tests/test_exporter.py +395 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.0}/LICENSE +0 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.0}/setup.cfg +0 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.0}/sql_query_mcp/__init__.py +0 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.0}/sql_query_mcp/__main__.py +0 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.0}/sql_query_mcp/adapters/__init__.py +0 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.0}/sql_query_mcp/adapters/hive.py +0 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.0}/sql_query_mcp/async_queries.py +0 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.0}/sql_query_mcp/audit.py +0 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.0}/sql_query_mcp/config.py +0 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.0}/sql_query_mcp/errors.py +0 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.0}/sql_query_mcp/executor.py +0 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.0}/sql_query_mcp/importer.py +0 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.0}/sql_query_mcp/introspection.py +0 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.0}/sql_query_mcp/namespace.py +0 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.0}/sql_query_mcp/registry.py +0 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.0}/sql_query_mcp/release_metadata.py +0 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.0}/sql_query_mcp/validator.py +0 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.0}/sql_query_mcp.egg-info/dependency_links.txt +0 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.0}/sql_query_mcp.egg-info/entry_points.txt +0 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.0}/sql_query_mcp.egg-info/requires.txt +0 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.0}/sql_query_mcp.egg-info/top_level.txt +0 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.0}/tests/test_async_queries.py +0 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.0}/tests/test_audit.py +0 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.0}/tests/test_config.py +0 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.0}/tests/test_executor.py +0 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.0}/tests/test_importer.py +0 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.0}/tests/test_metadata.py +0 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.0}/tests/test_namespace.py +0 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.0}/tests/test_registry.py +0 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.0}/tests/test_release_metadata.py +0 -0
- {sql_query_mcp-0.3.0 → sql_query_mcp-0.4.0}/tests/test_validator.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sql-query-mcp
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.0
|
|
4
4
|
Summary: Read-only SQL MCP server for PostgreSQL and MySQL.
|
|
5
5
|
Author: Andy Wang
|
|
6
6
|
License-Expression: MIT
|
|
@@ -62,10 +62,11 @@ without exposing raw connection strings or flattening engine-specific concepts.
|
|
|
62
62
|
## What AI can do with it
|
|
63
63
|
|
|
64
64
|
The current tool set focuses on database discovery, controlled query workflows,
|
|
65
|
-
asynchronous read-only queries,
|
|
66
|
-
use it to help an AI assistant understand
|
|
67
|
-
runs a bounded query, starts a long-running
|
|
68
|
-
|
|
65
|
+
asynchronous read-only queries, batched query result exports, and one narrow
|
|
66
|
+
local file import path. You can use it to help an AI assistant understand
|
|
67
|
+
structure before it generates SQL, runs a bounded query, starts a long-running
|
|
68
|
+
read-only query, exports PostgreSQL or MySQL results to a local file, or imports
|
|
69
|
+
a prepared CSV/XLSX file into an existing table.
|
|
69
70
|
|
|
70
71
|
MySQL and Hive support `explain_query`. Hive uses `EXPLAIN` and
|
|
71
72
|
`EXPLAIN ANALYZE` for `explain_query`.
|
|
@@ -83,19 +84,22 @@ MySQL and Hive support `explain_query`. Hive uses `EXPLAIN` and
|
|
|
83
84
|
| `cancel_query(query_id)` | Yes | Yes | Yes | Cancel running async queries |
|
|
84
85
|
| `explain_query(connection_id, sql, analyze?)` | Yes | Yes | Yes | Inspect query plans |
|
|
85
86
|
| `get_table_sample(connection_id, table_name, schema?, database?, limit?)` | Yes | Yes | Yes | Fetch small table samples |
|
|
87
|
+
| `export_query_file(connection_id, sql, output_path, format?, limit?, export_all?, file_name?, overwrite?)` | Yes | Yes | No | Export query results to local CSV/XLSX files |
|
|
86
88
|
| `import_table_file(connection_id, table_name, file_path, schema?, database?, sheet_name?)` | Yes | Yes | Yes | Import local CSV/XLSX files |
|
|
87
89
|
|
|
88
90
|
These tools are useful for tasks such as listing namespaces, inspecting table
|
|
89
91
|
definitions, reviewing indexes, sampling records, running short read-only
|
|
90
92
|
queries with `run_select`, running long read-only queries with `start_query`,
|
|
91
93
|
`get_query`, and `cancel_query`, analyzing read-only queries with `EXPLAIN`, and
|
|
92
|
-
|
|
94
|
+
exporting PostgreSQL or MySQL query results to local CSV/XLSX files. You can
|
|
95
|
+
also import prepared local files. For full request and response details, see
|
|
93
96
|
`docs/api-reference.md` (Chinese).
|
|
94
97
|
|
|
95
98
|
## How boundaries are constrained
|
|
96
99
|
|
|
97
100
|
The product boundary is intentionally narrow today. PostgreSQL, MySQL, and Hive
|
|
98
|
-
are available today. Query tools remain read-only, and
|
|
101
|
+
are available today. Query tools remain read-only, PostgreSQL and MySQL query
|
|
102
|
+
results can be exported to local files, and the only database write path is a
|
|
99
103
|
controlled local CSV/XLSX import into existing tables.
|
|
100
104
|
|
|
101
105
|
The service keeps those boundaries explicit in a few ways.
|
|
@@ -112,6 +116,11 @@ The service keeps those boundaries explicit in a few ways.
|
|
|
112
116
|
queries.
|
|
113
117
|
- The server accepts only `SELECT` and `WITH ... SELECT`, rejects comments and
|
|
114
118
|
multi-statement input, and records audit logs for each call.
|
|
119
|
+
- `export_query_file` writes files on the MCP server machine. It is synchronous
|
|
120
|
+
but reads database rows and writes CSV/XLSX files in batches. Large exports can
|
|
121
|
+
still hit your MCP client's tool timeout. For XLSX output, UUID values are
|
|
122
|
+
written as text and timezone-aware datetime values are written without the
|
|
123
|
+
timezone. Hive export is not supported yet.
|
|
115
124
|
- `import_table_file` doesn't accept raw SQL. It inserts only file columns whose
|
|
116
125
|
headers exactly match existing table columns.
|
|
117
126
|
- Hive `import_table_file` is intended for small files only and rejects files
|
|
@@ -30,10 +30,11 @@ without exposing raw connection strings or flattening engine-specific concepts.
|
|
|
30
30
|
## What AI can do with it
|
|
31
31
|
|
|
32
32
|
The current tool set focuses on database discovery, controlled query workflows,
|
|
33
|
-
asynchronous read-only queries,
|
|
34
|
-
use it to help an AI assistant understand
|
|
35
|
-
runs a bounded query, starts a long-running
|
|
36
|
-
|
|
33
|
+
asynchronous read-only queries, batched query result exports, and one narrow
|
|
34
|
+
local file import path. You can use it to help an AI assistant understand
|
|
35
|
+
structure before it generates SQL, runs a bounded query, starts a long-running
|
|
36
|
+
read-only query, exports PostgreSQL or MySQL results to a local file, or imports
|
|
37
|
+
a prepared CSV/XLSX file into an existing table.
|
|
37
38
|
|
|
38
39
|
MySQL and Hive support `explain_query`. Hive uses `EXPLAIN` and
|
|
39
40
|
`EXPLAIN ANALYZE` for `explain_query`.
|
|
@@ -51,19 +52,22 @@ MySQL and Hive support `explain_query`. Hive uses `EXPLAIN` and
|
|
|
51
52
|
| `cancel_query(query_id)` | Yes | Yes | Yes | Cancel running async queries |
|
|
52
53
|
| `explain_query(connection_id, sql, analyze?)` | Yes | Yes | Yes | Inspect query plans |
|
|
53
54
|
| `get_table_sample(connection_id, table_name, schema?, database?, limit?)` | Yes | Yes | Yes | Fetch small table samples |
|
|
55
|
+
| `export_query_file(connection_id, sql, output_path, format?, limit?, export_all?, file_name?, overwrite?)` | Yes | Yes | No | Export query results to local CSV/XLSX files |
|
|
54
56
|
| `import_table_file(connection_id, table_name, file_path, schema?, database?, sheet_name?)` | Yes | Yes | Yes | Import local CSV/XLSX files |
|
|
55
57
|
|
|
56
58
|
These tools are useful for tasks such as listing namespaces, inspecting table
|
|
57
59
|
definitions, reviewing indexes, sampling records, running short read-only
|
|
58
60
|
queries with `run_select`, running long read-only queries with `start_query`,
|
|
59
61
|
`get_query`, and `cancel_query`, analyzing read-only queries with `EXPLAIN`, and
|
|
60
|
-
|
|
62
|
+
exporting PostgreSQL or MySQL query results to local CSV/XLSX files. You can
|
|
63
|
+
also import prepared local files. For full request and response details, see
|
|
61
64
|
`docs/api-reference.md` (Chinese).
|
|
62
65
|
|
|
63
66
|
## How boundaries are constrained
|
|
64
67
|
|
|
65
68
|
The product boundary is intentionally narrow today. PostgreSQL, MySQL, and Hive
|
|
66
|
-
are available today. Query tools remain read-only, and
|
|
69
|
+
are available today. Query tools remain read-only, PostgreSQL and MySQL query
|
|
70
|
+
results can be exported to local files, and the only database write path is a
|
|
67
71
|
controlled local CSV/XLSX import into existing tables.
|
|
68
72
|
|
|
69
73
|
The service keeps those boundaries explicit in a few ways.
|
|
@@ -80,6 +84,11 @@ The service keeps those boundaries explicit in a few ways.
|
|
|
80
84
|
queries.
|
|
81
85
|
- The server accepts only `SELECT` and `WITH ... SELECT`, rejects comments and
|
|
82
86
|
multi-statement input, and records audit logs for each call.
|
|
87
|
+
- `export_query_file` writes files on the MCP server machine. It is synchronous
|
|
88
|
+
but reads database rows and writes CSV/XLSX files in batches. Large exports can
|
|
89
|
+
still hit your MCP client's tool timeout. For XLSX output, UUID values are
|
|
90
|
+
written as text and timezone-aware datetime values are written without the
|
|
91
|
+
timezone. Hive export is not supported yet.
|
|
83
92
|
- `import_table_file` doesn't accept raw SQL. It inserts only file columns whose
|
|
84
93
|
headers exactly match existing table columns.
|
|
85
94
|
- Hive `import_table_file` is intended for small files only and rejects files
|
|
@@ -42,6 +42,11 @@ class MySQLAdapter:
|
|
|
42
42
|
with conn.cursor() as cur:
|
|
43
43
|
cur.execute("SET SESSION max_execution_time = %s", (int(timeout_ms),))
|
|
44
44
|
|
|
45
|
+
def export_cursor(self, conn: object):
|
|
46
|
+
if pymysql is None:
|
|
47
|
+
raise ConfigurationError("缺少 PyMySQL 依赖,请先安装项目依赖。")
|
|
48
|
+
return conn.cursor(pymysql.cursors.SSDictCursor)
|
|
49
|
+
|
|
45
50
|
def list_databases(self, conn: object) -> List[str]:
|
|
46
51
|
with conn.cursor() as cur:
|
|
47
52
|
cur.execute(
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
5
|
from contextlib import contextmanager
|
|
6
|
+
from uuid import uuid4
|
|
6
7
|
from typing import Iterator, List
|
|
7
8
|
|
|
8
9
|
try:
|
|
@@ -37,6 +38,19 @@ class PostgresAdapter:
|
|
|
37
38
|
with conn.cursor() as cur:
|
|
38
39
|
cur.execute("SELECT set_config('statement_timeout', %s, false)", (str(timeout_ms),))
|
|
39
40
|
|
|
41
|
+
@contextmanager
|
|
42
|
+
def export_cursor(self, conn: object) -> Iterator[object]:
|
|
43
|
+
previous_autocommit = getattr(conn, "autocommit", None)
|
|
44
|
+
if previous_autocommit is True:
|
|
45
|
+
conn.autocommit = False
|
|
46
|
+
try:
|
|
47
|
+
with conn.cursor(name=f"sql_query_mcp_export_{uuid4().hex}") as cur:
|
|
48
|
+
yield cur
|
|
49
|
+
finally:
|
|
50
|
+
if previous_autocommit is True:
|
|
51
|
+
conn.rollback()
|
|
52
|
+
conn.autocommit = True
|
|
53
|
+
|
|
40
54
|
def list_schemas(self, conn: object) -> List[str]:
|
|
41
55
|
with conn.cursor() as cur:
|
|
42
56
|
cur.execute(
|
|
@@ -11,6 +11,7 @@ from .audit import AuditLogger
|
|
|
11
11
|
from .config import load_config
|
|
12
12
|
from .errors import SqlQueryMCPError
|
|
13
13
|
from .executor import QueryExecutor
|
|
14
|
+
from .exporter import QueryExporter
|
|
14
15
|
from .importer import TableFileImporter
|
|
15
16
|
from .introspection import MetadataService
|
|
16
17
|
from .registry import ConnectionRegistry
|
|
@@ -22,6 +23,7 @@ def create_app() -> FastMCP:
|
|
|
22
23
|
audit_logger = AuditLogger(app_config.settings.audit_log_path)
|
|
23
24
|
metadata = MetadataService(registry, app_config.settings, audit_logger)
|
|
24
25
|
executor = QueryExecutor(registry, app_config.settings, audit_logger)
|
|
26
|
+
exporter = QueryExporter(registry, app_config.settings, audit_logger)
|
|
25
27
|
importer = TableFileImporter(registry, app_config.settings, audit_logger)
|
|
26
28
|
async_queries = AsyncQueryService(registry, app_config.settings, audit_logger)
|
|
27
29
|
|
|
@@ -90,6 +92,32 @@ def create_app() -> FastMCP:
|
|
|
90
92
|
|
|
91
93
|
return _run_tool(lambda: executor.get_table_sample(connection_id, table_name, schema, database, limit))
|
|
92
94
|
|
|
95
|
+
@mcp.tool()
|
|
96
|
+
def export_query_file(
|
|
97
|
+
connection_id: str,
|
|
98
|
+
sql: str,
|
|
99
|
+
output_path: str,
|
|
100
|
+
format: str = "csv",
|
|
101
|
+
limit: Optional[int] = 1000,
|
|
102
|
+
export_all: bool = False,
|
|
103
|
+
file_name: Optional[str] = None,
|
|
104
|
+
overwrite: bool = False,
|
|
105
|
+
) -> dict:
|
|
106
|
+
"""Export a read-only query result to a local CSV or XLSX file."""
|
|
107
|
+
|
|
108
|
+
return _run_tool(
|
|
109
|
+
lambda: exporter.export_query_file(
|
|
110
|
+
connection_id,
|
|
111
|
+
sql,
|
|
112
|
+
output_path,
|
|
113
|
+
format,
|
|
114
|
+
limit,
|
|
115
|
+
export_all,
|
|
116
|
+
file_name,
|
|
117
|
+
overwrite,
|
|
118
|
+
)
|
|
119
|
+
)
|
|
120
|
+
|
|
93
121
|
@mcp.tool()
|
|
94
122
|
def start_query(connection_id: str, sql: str, limit: Optional[int] = None) -> dict:
|
|
95
123
|
"""Start an asynchronous read-only SELECT or CTE query."""
|
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
"""Controlled query result exports."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import csv
|
|
6
|
+
import time
|
|
7
|
+
import uuid
|
|
8
|
+
from datetime import datetime, time as datetime_time, timezone
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, cast
|
|
11
|
+
|
|
12
|
+
try:
|
|
13
|
+
from openpyxl import Workbook
|
|
14
|
+
except ImportError: # pragma: no cover - runtime dependency
|
|
15
|
+
Workbook = None
|
|
16
|
+
|
|
17
|
+
from .audit import AuditLogger
|
|
18
|
+
from .config import ServerSettings
|
|
19
|
+
from .errors import QueryExecutionError, sanitize_error_message
|
|
20
|
+
from .validator import clamp_limit, summarize_sql, validate_select_sql
|
|
21
|
+
|
|
22
|
+
EXPORT_BATCH_SIZE = 1000
|
|
23
|
+
SUPPORTED_EXPORT_ENGINES = {"postgres", "mysql"}
|
|
24
|
+
SUPPORTED_EXPORT_FORMATS = {"csv", "xlsx"}
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class QueryExporter:
|
|
28
|
+
"""Export validated read-only query results to local files."""
|
|
29
|
+
|
|
30
|
+
def __init__(self, registry: Any, settings: ServerSettings, audit_logger: AuditLogger):
|
|
31
|
+
self._registry = registry
|
|
32
|
+
self._settings = settings
|
|
33
|
+
self._audit = audit_logger
|
|
34
|
+
|
|
35
|
+
def export_query_file(
|
|
36
|
+
self,
|
|
37
|
+
connection_id: str,
|
|
38
|
+
sql_text: str,
|
|
39
|
+
output_path: str,
|
|
40
|
+
format: str = "csv",
|
|
41
|
+
limit: Optional[int] = 1000,
|
|
42
|
+
export_all: bool = False,
|
|
43
|
+
file_name: Optional[str] = None,
|
|
44
|
+
overwrite: bool = False,
|
|
45
|
+
) -> Dict[str, object]:
|
|
46
|
+
started = time.perf_counter()
|
|
47
|
+
config = None
|
|
48
|
+
final_path: Optional[Path] = None
|
|
49
|
+
row_count = 0
|
|
50
|
+
applied_limit = None
|
|
51
|
+
sql_summary = summarize_sql(sql_text)
|
|
52
|
+
normalized_format = str(format).lower().lstrip(".")
|
|
53
|
+
|
|
54
|
+
try:
|
|
55
|
+
if normalized_format not in SUPPORTED_EXPORT_FORMATS:
|
|
56
|
+
raise QueryExecutionError("导出格式仅支持 csv 和 xlsx。")
|
|
57
|
+
config = self._registry.get_connection_config(connection_id)
|
|
58
|
+
if config.engine not in SUPPORTED_EXPORT_ENGINES:
|
|
59
|
+
raise QueryExecutionError("export_query_file 首版仅支持 PostgreSQL 和 MySQL。")
|
|
60
|
+
cleaned_sql = validate_select_sql(sql_text, config.engine)
|
|
61
|
+
sql_summary = summarize_sql(cleaned_sql)
|
|
62
|
+
final_path = _resolve_output_file(
|
|
63
|
+
output_path,
|
|
64
|
+
normalized_format,
|
|
65
|
+
file_name=file_name,
|
|
66
|
+
overwrite=overwrite,
|
|
67
|
+
)
|
|
68
|
+
query = cleaned_sql
|
|
69
|
+
if not export_all:
|
|
70
|
+
applied_limit = clamp_limit(limit, 1000, self._settings.max_limit)
|
|
71
|
+
query = _build_exact_limited_query(cleaned_sql, applied_limit)
|
|
72
|
+
|
|
73
|
+
with self._registry.connection_from_config(config) as (conn, adapter):
|
|
74
|
+
_apply_statement_timeout(adapter, conn, self._settings.statement_timeout_ms)
|
|
75
|
+
with _open_export_cursor(adapter, conn) as cur:
|
|
76
|
+
cur.execute(query)
|
|
77
|
+
columns = adapter.column_names(cur.description)
|
|
78
|
+
batches = _iter_batches(cur, columns, adapter)
|
|
79
|
+
if normalized_format == "csv":
|
|
80
|
+
row_count = _write_csv(final_path, columns, batches)
|
|
81
|
+
else:
|
|
82
|
+
row_count = _write_xlsx(final_path, columns, batches)
|
|
83
|
+
|
|
84
|
+
duration_ms = _elapsed_ms(started)
|
|
85
|
+
self._audit.log(
|
|
86
|
+
tool="export_query_file",
|
|
87
|
+
connection_id=connection_id,
|
|
88
|
+
success=True,
|
|
89
|
+
duration_ms=duration_ms,
|
|
90
|
+
row_count=row_count,
|
|
91
|
+
sql_summary=sql_summary,
|
|
92
|
+
extra=_audit_extra(config, final_path, normalized_format, export_all, applied_limit),
|
|
93
|
+
)
|
|
94
|
+
return {
|
|
95
|
+
"connection_id": connection_id,
|
|
96
|
+
"engine": config.engine,
|
|
97
|
+
"file_path": str(final_path),
|
|
98
|
+
"format": normalized_format,
|
|
99
|
+
"row_count": row_count,
|
|
100
|
+
"duration_ms": duration_ms,
|
|
101
|
+
"export_all": export_all,
|
|
102
|
+
"applied_limit": applied_limit,
|
|
103
|
+
}
|
|
104
|
+
except Exception as exc:
|
|
105
|
+
duration_ms = _elapsed_ms(started)
|
|
106
|
+
sanitized = sanitize_error_message(str(exc))
|
|
107
|
+
self._audit.log(
|
|
108
|
+
tool="export_query_file",
|
|
109
|
+
connection_id=connection_id,
|
|
110
|
+
success=False,
|
|
111
|
+
duration_ms=duration_ms,
|
|
112
|
+
row_count=row_count,
|
|
113
|
+
sql_summary=sql_summary,
|
|
114
|
+
error=sanitized,
|
|
115
|
+
extra=_audit_extra(config, final_path, normalized_format, export_all, applied_limit),
|
|
116
|
+
)
|
|
117
|
+
raise QueryExecutionError(sanitized) from exc
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def _iter_batches(cur: Any, columns: Sequence[str], adapter: Any) -> Iterable[List[object]]:
|
|
121
|
+
while True:
|
|
122
|
+
rows = cur.fetchmany(EXPORT_BATCH_SIZE)
|
|
123
|
+
if not rows:
|
|
124
|
+
return
|
|
125
|
+
if hasattr(adapter, "normalize_rows"):
|
|
126
|
+
rows = adapter.normalize_rows(rows, list(columns))
|
|
127
|
+
yield cast(List[object], rows)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def _build_exact_limited_query(sql: str, row_limit: int) -> str:
|
|
131
|
+
return f"SELECT * FROM ({sql}) AS pq_result LIMIT {int(row_limit)}"
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def _write_csv(path: Path, columns: Sequence[str], batches: Iterable[Sequence[object]]) -> int:
|
|
135
|
+
row_count = 0
|
|
136
|
+
with path.open("w", encoding="utf-8", newline="") as handle:
|
|
137
|
+
writer = csv.writer(handle)
|
|
138
|
+
writer.writerow(columns)
|
|
139
|
+
for batch in batches:
|
|
140
|
+
for row in batch:
|
|
141
|
+
writer.writerow(_row_values(row, columns))
|
|
142
|
+
row_count += 1
|
|
143
|
+
return row_count
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def _write_xlsx(path: Path, columns: Sequence[str], batches: Iterable[Sequence[object]]) -> int:
|
|
147
|
+
if Workbook is None:
|
|
148
|
+
raise QueryExecutionError("缺少 openpyxl 依赖,请先安装项目依赖。")
|
|
149
|
+
workbook = Workbook(write_only=True)
|
|
150
|
+
worksheet = workbook.create_sheet("Export")
|
|
151
|
+
worksheet.append(list(columns))
|
|
152
|
+
row_count = 0
|
|
153
|
+
for batch in batches:
|
|
154
|
+
for row in batch:
|
|
155
|
+
values = _row_values(row, columns, normalize_value=_normalize_xlsx_value)
|
|
156
|
+
try:
|
|
157
|
+
worksheet.append(values)
|
|
158
|
+
except Exception as exc:
|
|
159
|
+
raise QueryExecutionError(_format_xlsx_error(columns, values, exc)) from exc
|
|
160
|
+
row_count += 1
|
|
161
|
+
workbook.save(path)
|
|
162
|
+
return row_count
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def _row_values(
|
|
166
|
+
row: object,
|
|
167
|
+
columns: Sequence[str],
|
|
168
|
+
normalize_value: Optional[Callable[[object], object]] = None,
|
|
169
|
+
) -> List[object]:
|
|
170
|
+
if isinstance(row, dict):
|
|
171
|
+
values: List[object] = [row.get(column) for column in columns]
|
|
172
|
+
elif isinstance(row, (list, tuple)):
|
|
173
|
+
values = list(row)
|
|
174
|
+
else:
|
|
175
|
+
values = [row]
|
|
176
|
+
if normalize_value is None:
|
|
177
|
+
return values
|
|
178
|
+
return [normalize_value(value) for value in values]
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def _normalize_xlsx_value(value: object) -> object:
|
|
182
|
+
if isinstance(value, uuid.UUID):
|
|
183
|
+
return str(value)
|
|
184
|
+
if isinstance(value, datetime) and value.tzinfo is not None:
|
|
185
|
+
return value.replace(tzinfo=None)
|
|
186
|
+
if isinstance(value, datetime_time) and value.tzinfo is not None:
|
|
187
|
+
return value.replace(tzinfo=None)
|
|
188
|
+
return value
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def _format_xlsx_error(columns: Sequence[str], values: Sequence[object], exc: Exception) -> str:
|
|
192
|
+
message = str(exc) or exc.__class__.__name__
|
|
193
|
+
details = ", ".join(
|
|
194
|
+
f"{column}={type(value).__name__}"
|
|
195
|
+
for column, value in zip(columns, values)
|
|
196
|
+
)
|
|
197
|
+
return f"XLSX 导出失败: {message}; columns: {details}"
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def _resolve_output_file(output_path: str, format: str, file_name: Optional[str], overwrite: bool) -> Path:
|
|
201
|
+
base = Path(output_path).expanduser()
|
|
202
|
+
suffix = f".{format}"
|
|
203
|
+
if base.exists() and base.is_dir():
|
|
204
|
+
name = file_name or f"export_{datetime.now(timezone.utc).strftime('%Y%m%d_%H%M%S')}"
|
|
205
|
+
candidate = base / _with_suffix(name, suffix)
|
|
206
|
+
else:
|
|
207
|
+
if file_name:
|
|
208
|
+
raise QueryExecutionError("output_path 为文件路径时不能同时传 file_name。")
|
|
209
|
+
candidate = Path(_with_suffix(str(base), suffix)).expanduser()
|
|
210
|
+
if not candidate.parent.exists():
|
|
211
|
+
raise QueryExecutionError("导出目录不存在。")
|
|
212
|
+
if overwrite or not candidate.exists():
|
|
213
|
+
return candidate
|
|
214
|
+
return _next_available_path(candidate)
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def _with_suffix(value: str, suffix: str) -> str:
|
|
218
|
+
return value if value.lower().endswith(suffix) else value + suffix
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def _next_available_path(path: Path) -> Path:
|
|
222
|
+
stem = path.stem
|
|
223
|
+
suffix = path.suffix
|
|
224
|
+
parent = path.parent
|
|
225
|
+
index = 1
|
|
226
|
+
while True:
|
|
227
|
+
candidate = parent / f"{stem} ({index}){suffix}"
|
|
228
|
+
if not candidate.exists():
|
|
229
|
+
return candidate
|
|
230
|
+
index += 1
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
def _apply_statement_timeout(adapter: Any, conn: Any, timeout_ms: Optional[int]) -> None:
|
|
234
|
+
if timeout_ms is not None:
|
|
235
|
+
getattr(adapter, "set_statement_timeout")(conn, timeout_ms)
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def _open_export_cursor(adapter: Any, conn: Any) -> Any:
|
|
239
|
+
export_cursor = getattr(adapter, "export_cursor", None)
|
|
240
|
+
if callable(export_cursor):
|
|
241
|
+
return export_cursor(conn)
|
|
242
|
+
return conn.cursor()
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def _elapsed_ms(started: float) -> int:
|
|
246
|
+
return int((time.perf_counter() - started) * 1000)
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def _audit_extra(
|
|
250
|
+
config: Any,
|
|
251
|
+
file_path: Optional[Path],
|
|
252
|
+
format: str,
|
|
253
|
+
export_all: bool,
|
|
254
|
+
applied_limit: Optional[int],
|
|
255
|
+
) -> Dict[str, object]:
|
|
256
|
+
extra: Dict[str, object] = {
|
|
257
|
+
"file_path": str(file_path) if file_path is not None else None,
|
|
258
|
+
"format": format,
|
|
259
|
+
"export_all": export_all,
|
|
260
|
+
"applied_limit": applied_limit,
|
|
261
|
+
}
|
|
262
|
+
if config is not None:
|
|
263
|
+
extra["engine"] = config.engine
|
|
264
|
+
return extra
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sql-query-mcp
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.0
|
|
4
4
|
Summary: Read-only SQL MCP server for PostgreSQL and MySQL.
|
|
5
5
|
Author: Andy Wang
|
|
6
6
|
License-Expression: MIT
|
|
@@ -62,10 +62,11 @@ without exposing raw connection strings or flattening engine-specific concepts.
|
|
|
62
62
|
## What AI can do with it
|
|
63
63
|
|
|
64
64
|
The current tool set focuses on database discovery, controlled query workflows,
|
|
65
|
-
asynchronous read-only queries,
|
|
66
|
-
use it to help an AI assistant understand
|
|
67
|
-
runs a bounded query, starts a long-running
|
|
68
|
-
|
|
65
|
+
asynchronous read-only queries, batched query result exports, and one narrow
|
|
66
|
+
local file import path. You can use it to help an AI assistant understand
|
|
67
|
+
structure before it generates SQL, runs a bounded query, starts a long-running
|
|
68
|
+
read-only query, exports PostgreSQL or MySQL results to a local file, or imports
|
|
69
|
+
a prepared CSV/XLSX file into an existing table.
|
|
69
70
|
|
|
70
71
|
MySQL and Hive support `explain_query`. Hive uses `EXPLAIN` and
|
|
71
72
|
`EXPLAIN ANALYZE` for `explain_query`.
|
|
@@ -83,19 +84,22 @@ MySQL and Hive support `explain_query`. Hive uses `EXPLAIN` and
|
|
|
83
84
|
| `cancel_query(query_id)` | Yes | Yes | Yes | Cancel running async queries |
|
|
84
85
|
| `explain_query(connection_id, sql, analyze?)` | Yes | Yes | Yes | Inspect query plans |
|
|
85
86
|
| `get_table_sample(connection_id, table_name, schema?, database?, limit?)` | Yes | Yes | Yes | Fetch small table samples |
|
|
87
|
+
| `export_query_file(connection_id, sql, output_path, format?, limit?, export_all?, file_name?, overwrite?)` | Yes | Yes | No | Export query results to local CSV/XLSX files |
|
|
86
88
|
| `import_table_file(connection_id, table_name, file_path, schema?, database?, sheet_name?)` | Yes | Yes | Yes | Import local CSV/XLSX files |
|
|
87
89
|
|
|
88
90
|
These tools are useful for tasks such as listing namespaces, inspecting table
|
|
89
91
|
definitions, reviewing indexes, sampling records, running short read-only
|
|
90
92
|
queries with `run_select`, running long read-only queries with `start_query`,
|
|
91
93
|
`get_query`, and `cancel_query`, analyzing read-only queries with `EXPLAIN`, and
|
|
92
|
-
|
|
94
|
+
exporting PostgreSQL or MySQL query results to local CSV/XLSX files. You can
|
|
95
|
+
also import prepared local files. For full request and response details, see
|
|
93
96
|
`docs/api-reference.md` (Chinese).
|
|
94
97
|
|
|
95
98
|
## How boundaries are constrained
|
|
96
99
|
|
|
97
100
|
The product boundary is intentionally narrow today. PostgreSQL, MySQL, and Hive
|
|
98
|
-
are available today. Query tools remain read-only, and
|
|
101
|
+
are available today. Query tools remain read-only, PostgreSQL and MySQL query
|
|
102
|
+
results can be exported to local files, and the only database write path is a
|
|
99
103
|
controlled local CSV/XLSX import into existing tables.
|
|
100
104
|
|
|
101
105
|
The service keeps those boundaries explicit in a few ways.
|
|
@@ -112,6 +116,11 @@ The service keeps those boundaries explicit in a few ways.
|
|
|
112
116
|
queries.
|
|
113
117
|
- The server accepts only `SELECT` and `WITH ... SELECT`, rejects comments and
|
|
114
118
|
multi-statement input, and records audit logs for each call.
|
|
119
|
+
- `export_query_file` writes files on the MCP server machine. It is synchronous
|
|
120
|
+
but reads database rows and writes CSV/XLSX files in batches. Large exports can
|
|
121
|
+
still hit your MCP client's tool timeout. For XLSX output, UUID values are
|
|
122
|
+
written as text and timezone-aware datetime values are written without the
|
|
123
|
+
timezone. Hive export is not supported yet.
|
|
115
124
|
- `import_table_file` doesn't accept raw SQL. It inserts only file columns whose
|
|
116
125
|
headers exactly match existing table columns.
|
|
117
126
|
- Hive `import_table_file` is intended for small files only and rejects files
|
|
@@ -9,6 +9,7 @@ sql_query_mcp/audit.py
|
|
|
9
9
|
sql_query_mcp/config.py
|
|
10
10
|
sql_query_mcp/errors.py
|
|
11
11
|
sql_query_mcp/executor.py
|
|
12
|
+
sql_query_mcp/exporter.py
|
|
12
13
|
sql_query_mcp/importer.py
|
|
13
14
|
sql_query_mcp/introspection.py
|
|
14
15
|
sql_query_mcp/namespace.py
|
|
@@ -30,6 +31,7 @@ tests/test_async_queries.py
|
|
|
30
31
|
tests/test_audit.py
|
|
31
32
|
tests/test_config.py
|
|
32
33
|
tests/test_executor.py
|
|
34
|
+
tests/test_exporter.py
|
|
33
35
|
tests/test_importer.py
|
|
34
36
|
tests/test_metadata.py
|
|
35
37
|
tests/test_namespace.py
|
|
@@ -24,6 +24,13 @@ class AppTestCase(unittest.TestCase):
|
|
|
24
24
|
self.assertIn("get_query", tool_names)
|
|
25
25
|
self.assertIn("cancel_query", tool_names)
|
|
26
26
|
|
|
27
|
+
def test_create_app_registers_export_query_file_tool(self) -> None:
|
|
28
|
+
app = create_app()
|
|
29
|
+
|
|
30
|
+
tools = asyncio.run(app.list_tools())
|
|
31
|
+
|
|
32
|
+
self.assertIn("export_query_file", {tool.name for tool in tools})
|
|
33
|
+
|
|
27
34
|
|
|
28
35
|
if __name__ == "__main__":
|
|
29
36
|
unittest.main()
|
|
@@ -0,0 +1,395 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import csv
|
|
4
|
+
import json
|
|
5
|
+
import tempfile
|
|
6
|
+
import unittest
|
|
7
|
+
import uuid
|
|
8
|
+
from contextlib import contextmanager
|
|
9
|
+
from datetime import datetime, timezone
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import cast
|
|
12
|
+
|
|
13
|
+
from openpyxl import load_workbook
|
|
14
|
+
|
|
15
|
+
from sql_query_mcp.audit import AuditLogger
|
|
16
|
+
from sql_query_mcp.config import ConnectionConfig, ServerSettings
|
|
17
|
+
from sql_query_mcp.errors import QueryExecutionError
|
|
18
|
+
from sql_query_mcp.exporter import QueryExporter
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class _CursorStub:
|
|
22
|
+
def __init__(self, batches, description=("id", "name")) -> None:
|
|
23
|
+
self._batches = list(batches)
|
|
24
|
+
self.description = description
|
|
25
|
+
self.executed = []
|
|
26
|
+
self.fetchmany_sizes = []
|
|
27
|
+
|
|
28
|
+
def __enter__(self):
|
|
29
|
+
return self
|
|
30
|
+
|
|
31
|
+
def __exit__(self, exc_type, exc, tb) -> None:
|
|
32
|
+
return None
|
|
33
|
+
|
|
34
|
+
def execute(self, query) -> None:
|
|
35
|
+
self.executed.append(query)
|
|
36
|
+
|
|
37
|
+
def fetchmany(self, size: int):
|
|
38
|
+
self.fetchmany_sizes.append(size)
|
|
39
|
+
if self._batches:
|
|
40
|
+
return self._batches.pop(0)
|
|
41
|
+
return []
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class _ConnectionStub:
|
|
45
|
+
def __init__(self, cursor: _CursorStub) -> None:
|
|
46
|
+
self._cursor = cursor
|
|
47
|
+
|
|
48
|
+
def cursor(self):
|
|
49
|
+
return self._cursor
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class _AdapterStub:
|
|
53
|
+
def __init__(self) -> None:
|
|
54
|
+
self.set_statement_timeout_calls = []
|
|
55
|
+
|
|
56
|
+
def set_statement_timeout(self, conn: object, timeout_ms: int) -> None:
|
|
57
|
+
self.set_statement_timeout_calls.append(timeout_ms)
|
|
58
|
+
|
|
59
|
+
def column_names(self, description):
|
|
60
|
+
return list(description or [])
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class _StreamingAdapterStub(_AdapterStub):
|
|
64
|
+
def __init__(self, export_cursor) -> None:
|
|
65
|
+
super().__init__()
|
|
66
|
+
self._export_cursor = export_cursor
|
|
67
|
+
self.export_cursor_calls = 0
|
|
68
|
+
|
|
69
|
+
def export_cursor(self, conn: object):
|
|
70
|
+
self.export_cursor_calls += 1
|
|
71
|
+
return self._export_cursor
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class _RegistryStub:
|
|
75
|
+
def __init__(self, config: ConnectionConfig, adapter: object, conn: object) -> None:
|
|
76
|
+
self._config = config
|
|
77
|
+
self._adapter = adapter
|
|
78
|
+
self._conn = conn
|
|
79
|
+
self.connection_calls = 0
|
|
80
|
+
|
|
81
|
+
def get_connection_config(self, connection_id: str) -> ConnectionConfig:
|
|
82
|
+
if connection_id != self._config.connection_id:
|
|
83
|
+
raise AssertionError(connection_id)
|
|
84
|
+
return self._config
|
|
85
|
+
|
|
86
|
+
@contextmanager
|
|
87
|
+
def connection_from_config(self, config: ConnectionConfig):
|
|
88
|
+
if config != self._config:
|
|
89
|
+
raise AssertionError(config)
|
|
90
|
+
self.connection_calls += 1
|
|
91
|
+
yield self._conn, self._adapter
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _mysql_config() -> ConnectionConfig:
|
|
95
|
+
return ConnectionConfig(
|
|
96
|
+
connection_id="crm_mysql_prod_main_ro",
|
|
97
|
+
engine="mysql",
|
|
98
|
+
label="CRM MySQL",
|
|
99
|
+
env="prod",
|
|
100
|
+
tenant="main",
|
|
101
|
+
role="ro",
|
|
102
|
+
dsn_env="MYSQL_CONN",
|
|
103
|
+
enabled=True,
|
|
104
|
+
default_database="crm",
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def _postgres_config() -> ConnectionConfig:
|
|
109
|
+
return ConnectionConfig(
|
|
110
|
+
connection_id="crm_pg_prod_main_ro",
|
|
111
|
+
engine="postgres",
|
|
112
|
+
label="CRM PostgreSQL",
|
|
113
|
+
env="prod",
|
|
114
|
+
tenant="main",
|
|
115
|
+
role="ro",
|
|
116
|
+
dsn_env="PG_CONN",
|
|
117
|
+
enabled=True,
|
|
118
|
+
default_schema="public",
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def _hive_config() -> ConnectionConfig:
|
|
123
|
+
return ConnectionConfig(
|
|
124
|
+
connection_id="warehouse_hive_prod_main_ro",
|
|
125
|
+
engine="hive",
|
|
126
|
+
label="Warehouse Hive",
|
|
127
|
+
env="prod",
|
|
128
|
+
tenant="main",
|
|
129
|
+
role="ro",
|
|
130
|
+
dsn_env="HIVE_CONN",
|
|
131
|
+
enabled=True,
|
|
132
|
+
default_database="default",
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def _build_exporter(
|
|
137
|
+
config: ConnectionConfig,
|
|
138
|
+
cursor: _CursorStub,
|
|
139
|
+
temp_dir: str,
|
|
140
|
+
adapter: _AdapterStub | None = None,
|
|
141
|
+
):
|
|
142
|
+
adapter = adapter or _AdapterStub()
|
|
143
|
+
registry = _RegistryStub(config, adapter, _ConnectionStub(cursor))
|
|
144
|
+
log_path = Path(temp_dir) / "audit.jsonl"
|
|
145
|
+
exporter = QueryExporter(
|
|
146
|
+
registry=registry,
|
|
147
|
+
settings=ServerSettings(
|
|
148
|
+
default_limit=200,
|
|
149
|
+
max_limit=1000,
|
|
150
|
+
statement_timeout_ms=2500,
|
|
151
|
+
audit_log_path=log_path,
|
|
152
|
+
),
|
|
153
|
+
audit_logger=AuditLogger(log_path),
|
|
154
|
+
)
|
|
155
|
+
return exporter, registry, adapter, log_path
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def _read_audit_records(path: Path):
|
|
159
|
+
return [json.loads(line) for line in path.read_text(encoding="utf-8").splitlines()]
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
class QueryExporterTestCase(unittest.TestCase):
|
|
163
|
+
def test_export_csv_writes_multiple_fetchmany_batches(self) -> None:
|
|
164
|
+
cursor = _CursorStub(
|
|
165
|
+
batches=[
|
|
166
|
+
[(1, "Alice")],
|
|
167
|
+
[(2, "Bob")],
|
|
168
|
+
[],
|
|
169
|
+
]
|
|
170
|
+
)
|
|
171
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
|
172
|
+
exporter, _, adapter, log_path = _build_exporter(
|
|
173
|
+
_mysql_config(), cursor, temp_dir
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
result = exporter.export_query_file(
|
|
177
|
+
"crm_mysql_prod_main_ro",
|
|
178
|
+
"SELECT id, name FROM users",
|
|
179
|
+
temp_dir,
|
|
180
|
+
limit=2,
|
|
181
|
+
file_name="users",
|
|
182
|
+
)
|
|
183
|
+
file_path = cast(str, result["file_path"])
|
|
184
|
+
with Path(file_path).open("r", encoding="utf-8", newline="") as handle:
|
|
185
|
+
rows = list(csv.reader(handle))
|
|
186
|
+
records = _read_audit_records(log_path)
|
|
187
|
+
|
|
188
|
+
self.assertEqual("csv", result["format"])
|
|
189
|
+
self.assertEqual(2, result["row_count"])
|
|
190
|
+
self.assertEqual(2, result["applied_limit"])
|
|
191
|
+
self.assertEqual(["id", "name"], rows[0])
|
|
192
|
+
self.assertEqual([["1", "Alice"], ["2", "Bob"]], rows[1:])
|
|
193
|
+
self.assertIn("LIMIT 2", cursor.executed[0])
|
|
194
|
+
self.assertEqual([2500], adapter.set_statement_timeout_calls)
|
|
195
|
+
self.assertGreaterEqual(len(cursor.fetchmany_sizes), 2)
|
|
196
|
+
self.assertEqual("export_query_file", records[0]["tool"])
|
|
197
|
+
self.assertEqual(2, records[0]["row_count"])
|
|
198
|
+
|
|
199
|
+
def test_export_xlsx_writes_rows(self) -> None:
|
|
200
|
+
cursor = _CursorStub(batches=[[(1, "Alice")], []])
|
|
201
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
|
202
|
+
exporter, _, _, _ = _build_exporter(_postgres_config(), cursor, temp_dir)
|
|
203
|
+
|
|
204
|
+
result = exporter.export_query_file(
|
|
205
|
+
"crm_pg_prod_main_ro",
|
|
206
|
+
"SELECT id, name FROM users",
|
|
207
|
+
temp_dir,
|
|
208
|
+
format="xlsx",
|
|
209
|
+
file_name="users",
|
|
210
|
+
)
|
|
211
|
+
workbook = load_workbook(cast(str, result["file_path"]), read_only=True)
|
|
212
|
+
worksheet = workbook.active
|
|
213
|
+
if worksheet is None:
|
|
214
|
+
raise AssertionError("workbook has no active worksheet")
|
|
215
|
+
rows = list(worksheet.iter_rows(values_only=True))
|
|
216
|
+
workbook.close()
|
|
217
|
+
|
|
218
|
+
self.assertEqual("xlsx", result["format"])
|
|
219
|
+
self.assertEqual([("id", "name"), (1, "Alice")], rows)
|
|
220
|
+
|
|
221
|
+
def test_export_all_does_not_wrap_query_with_limit(self) -> None:
|
|
222
|
+
cursor = _CursorStub(batches=[[(1, "Alice")], []])
|
|
223
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
|
224
|
+
exporter, _, _, _ = _build_exporter(_mysql_config(), cursor, temp_dir)
|
|
225
|
+
result = exporter.export_query_file(
|
|
226
|
+
"crm_mysql_prod_main_ro",
|
|
227
|
+
"SELECT id, name FROM users",
|
|
228
|
+
temp_dir,
|
|
229
|
+
export_all=True,
|
|
230
|
+
file_name="users",
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
self.assertEqual("SELECT id, name FROM users", cursor.executed[0])
|
|
234
|
+
self.assertIsNone(result["applied_limit"])
|
|
235
|
+
|
|
236
|
+
def test_empty_result_creates_header_only_csv(self) -> None:
|
|
237
|
+
cursor = _CursorStub(batches=[[]])
|
|
238
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
|
239
|
+
exporter, _, _, _ = _build_exporter(_mysql_config(), cursor, temp_dir)
|
|
240
|
+
result = exporter.export_query_file(
|
|
241
|
+
"crm_mysql_prod_main_ro",
|
|
242
|
+
"SELECT id, name FROM users",
|
|
243
|
+
temp_dir,
|
|
244
|
+
file_name="empty",
|
|
245
|
+
)
|
|
246
|
+
file_path = cast(str, result["file_path"])
|
|
247
|
+
with Path(file_path).open("r", encoding="utf-8", newline="") as handle:
|
|
248
|
+
rows = list(csv.reader(handle))
|
|
249
|
+
|
|
250
|
+
self.assertEqual(0, result["row_count"])
|
|
251
|
+
self.assertEqual([["id", "name"]], rows)
|
|
252
|
+
|
|
253
|
+
def test_existing_file_gets_numbered_name_when_not_overwriting(self) -> None:
|
|
254
|
+
cursor = _CursorStub(batches=[[]])
|
|
255
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
|
256
|
+
existing = Path(temp_dir) / "users.csv"
|
|
257
|
+
existing.write_text("old", encoding="utf-8")
|
|
258
|
+
exporter, _, _, _ = _build_exporter(_mysql_config(), cursor, temp_dir)
|
|
259
|
+
result = exporter.export_query_file(
|
|
260
|
+
"crm_mysql_prod_main_ro",
|
|
261
|
+
"SELECT id, name FROM users",
|
|
262
|
+
temp_dir,
|
|
263
|
+
file_name="users",
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
self.assertTrue(cast(str, result["file_path"]).endswith("users (1).csv"))
|
|
267
|
+
|
|
268
|
+
def test_overwrite_replaces_existing_file(self) -> None:
|
|
269
|
+
cursor = _CursorStub(batches=[[]])
|
|
270
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
|
271
|
+
existing = Path(temp_dir) / "users.csv"
|
|
272
|
+
existing.write_text("old", encoding="utf-8")
|
|
273
|
+
exporter, _, _, _ = _build_exporter(_mysql_config(), cursor, temp_dir)
|
|
274
|
+
result = exporter.export_query_file(
|
|
275
|
+
"crm_mysql_prod_main_ro",
|
|
276
|
+
"SELECT id, name FROM users",
|
|
277
|
+
str(existing),
|
|
278
|
+
overwrite=True,
|
|
279
|
+
)
|
|
280
|
+
content = existing.read_text(encoding="utf-8")
|
|
281
|
+
|
|
282
|
+
self.assertEqual(str(existing), result["file_path"])
|
|
283
|
+
self.assertNotEqual("old", content)
|
|
284
|
+
|
|
285
|
+
def test_invalid_sql_rejected_before_connecting(self) -> None:
|
|
286
|
+
cursor = _CursorStub(batches=[])
|
|
287
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
|
288
|
+
exporter, registry, _, _ = _build_exporter(_mysql_config(), cursor, temp_dir)
|
|
289
|
+
with self.assertRaises(QueryExecutionError):
|
|
290
|
+
exporter.export_query_file(
|
|
291
|
+
"crm_mysql_prod_main_ro",
|
|
292
|
+
"DELETE FROM users",
|
|
293
|
+
temp_dir,
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
self.assertEqual(0, registry.connection_calls)
|
|
297
|
+
|
|
298
|
+
def test_hive_rejected_before_connecting(self) -> None:
|
|
299
|
+
cursor = _CursorStub(batches=[])
|
|
300
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
|
301
|
+
exporter, registry, _, _ = _build_exporter(_hive_config(), cursor, temp_dir)
|
|
302
|
+
with self.assertRaises(QueryExecutionError):
|
|
303
|
+
exporter.export_query_file(
|
|
304
|
+
"warehouse_hive_prod_main_ro",
|
|
305
|
+
"SELECT id, name FROM users",
|
|
306
|
+
temp_dir,
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
self.assertEqual(0, registry.connection_calls)
|
|
310
|
+
|
|
311
|
+
def test_export_uses_adapter_export_cursor_when_available(self) -> None:
|
|
312
|
+
default_cursor = _CursorStub(batches=[])
|
|
313
|
+
export_cursor = _CursorStub(batches=[[(1, "Alice")], []])
|
|
314
|
+
adapter = _StreamingAdapterStub(export_cursor=export_cursor)
|
|
315
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
|
316
|
+
exporter, _, _, _ = _build_exporter(
|
|
317
|
+
_mysql_config(), default_cursor, temp_dir, adapter=adapter
|
|
318
|
+
)
|
|
319
|
+
|
|
320
|
+
result = exporter.export_query_file(
|
|
321
|
+
"crm_mysql_prod_main_ro",
|
|
322
|
+
"SELECT id, name FROM users",
|
|
323
|
+
temp_dir,
|
|
324
|
+
file_name="users",
|
|
325
|
+
)
|
|
326
|
+
|
|
327
|
+
self.assertEqual(1, adapter.export_cursor_calls)
|
|
328
|
+
self.assertEqual(1, result["row_count"])
|
|
329
|
+
self.assertEqual([], default_cursor.executed)
|
|
330
|
+
self.assertEqual(1, len(export_cursor.executed))
|
|
331
|
+
|
|
332
|
+
def test_export_xlsx_converts_uuid_values_to_text(self) -> None:
|
|
333
|
+
row_id = uuid.uuid4()
|
|
334
|
+
cursor = _CursorStub(batches=[[(row_id, "Alice")], []])
|
|
335
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
|
336
|
+
exporter, _, _, _ = _build_exporter(_postgres_config(), cursor, temp_dir)
|
|
337
|
+
|
|
338
|
+
result = exporter.export_query_file(
|
|
339
|
+
"crm_pg_prod_main_ro",
|
|
340
|
+
"SELECT id, name FROM users",
|
|
341
|
+
temp_dir,
|
|
342
|
+
format="xlsx",
|
|
343
|
+
file_name="users",
|
|
344
|
+
)
|
|
345
|
+
workbook = load_workbook(cast(str, result["file_path"]), read_only=True)
|
|
346
|
+
worksheet = workbook.active
|
|
347
|
+
if worksheet is None:
|
|
348
|
+
raise AssertionError("workbook has no active worksheet")
|
|
349
|
+
rows = list(worksheet.iter_rows(values_only=True))
|
|
350
|
+
workbook.close()
|
|
351
|
+
|
|
352
|
+
self.assertEqual(str(row_id), rows[1][0])
|
|
353
|
+
|
|
354
|
+
def test_export_xlsx_removes_datetime_timezone(self) -> None:
|
|
355
|
+
value = datetime(2026, 1, 1, 8, 30, tzinfo=timezone.utc)
|
|
356
|
+
cursor = _CursorStub(batches=[[(value, "Alice")], []], description=("update_time", "name"))
|
|
357
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
|
358
|
+
exporter, _, _, _ = _build_exporter(_postgres_config(), cursor, temp_dir)
|
|
359
|
+
|
|
360
|
+
result = exporter.export_query_file(
|
|
361
|
+
"crm_pg_prod_main_ro",
|
|
362
|
+
"SELECT update_time, name FROM users",
|
|
363
|
+
temp_dir,
|
|
364
|
+
format="xlsx",
|
|
365
|
+
file_name="users",
|
|
366
|
+
)
|
|
367
|
+
workbook = load_workbook(cast(str, result["file_path"]), read_only=True)
|
|
368
|
+
worksheet = workbook.active
|
|
369
|
+
if worksheet is None:
|
|
370
|
+
raise AssertionError("workbook has no active worksheet")
|
|
371
|
+
rows = list(worksheet.iter_rows(values_only=True))
|
|
372
|
+
workbook.close()
|
|
373
|
+
|
|
374
|
+
self.assertEqual(value.replace(tzinfo=None), rows[1][0])
|
|
375
|
+
|
|
376
|
+
def test_export_xlsx_error_includes_column_and_type(self) -> None:
|
|
377
|
+
cursor = _CursorStub(batches=[[(object(), "Alice")], []])
|
|
378
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
|
379
|
+
exporter, _, _, _ = _build_exporter(_postgres_config(), cursor, temp_dir)
|
|
380
|
+
|
|
381
|
+
with self.assertRaises(QueryExecutionError) as caught:
|
|
382
|
+
exporter.export_query_file(
|
|
383
|
+
"crm_pg_prod_main_ro",
|
|
384
|
+
"SELECT id, name FROM users",
|
|
385
|
+
temp_dir,
|
|
386
|
+
format="xlsx",
|
|
387
|
+
file_name="users",
|
|
388
|
+
)
|
|
389
|
+
|
|
390
|
+
self.assertIn("id", str(caught.exception))
|
|
391
|
+
self.assertIn("object", str(caught.exception))
|
|
392
|
+
|
|
393
|
+
|
|
394
|
+
if __name__ == "__main__":
|
|
395
|
+
unittest.main()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|