sql-query-mcp 0.2.0__tar.gz → 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. {sql_query_mcp-0.2.0/sql_query_mcp.egg-info → sql_query_mcp-0.4.0}/PKG-INFO +71 -31
  2. {sql_query_mcp-0.2.0 → sql_query_mcp-0.4.0}/README.md +68 -29
  3. {sql_query_mcp-0.2.0 → sql_query_mcp-0.4.0}/pyproject.toml +3 -2
  4. sql_query_mcp-0.4.0/sql_query_mcp/adapters/__init__.py +7 -0
  5. sql_query_mcp-0.4.0/sql_query_mcp/adapters/hive.py +147 -0
  6. {sql_query_mcp-0.2.0 → sql_query_mcp-0.4.0}/sql_query_mcp/adapters/mysql.py +5 -0
  7. {sql_query_mcp-0.2.0 → sql_query_mcp-0.4.0}/sql_query_mcp/adapters/postgres.py +14 -0
  8. {sql_query_mcp-0.2.0 → sql_query_mcp-0.4.0}/sql_query_mcp/app.py +50 -2
  9. sql_query_mcp-0.4.0/sql_query_mcp/async_queries.py +388 -0
  10. {sql_query_mcp-0.2.0 → sql_query_mcp-0.4.0}/sql_query_mcp/config.py +4 -4
  11. {sql_query_mcp-0.2.0 → sql_query_mcp-0.4.0}/sql_query_mcp/executor.py +7 -1
  12. sql_query_mcp-0.4.0/sql_query_mcp/exporter.py +264 -0
  13. {sql_query_mcp-0.2.0 → sql_query_mcp-0.4.0}/sql_query_mcp/importer.py +12 -0
  14. {sql_query_mcp-0.2.0 → sql_query_mcp-0.4.0}/sql_query_mcp/introspection.py +3 -2
  15. {sql_query_mcp-0.2.0 → sql_query_mcp-0.4.0}/sql_query_mcp/namespace.py +8 -0
  16. {sql_query_mcp-0.2.0 → sql_query_mcp-0.4.0}/sql_query_mcp/registry.py +2 -1
  17. {sql_query_mcp-0.2.0 → sql_query_mcp-0.4.0}/sql_query_mcp/validator.py +6 -3
  18. {sql_query_mcp-0.2.0 → sql_query_mcp-0.4.0/sql_query_mcp.egg-info}/PKG-INFO +71 -31
  19. {sql_query_mcp-0.2.0 → sql_query_mcp-0.4.0}/sql_query_mcp.egg-info/SOURCES.txt +5 -0
  20. {sql_query_mcp-0.2.0 → sql_query_mcp-0.4.0}/sql_query_mcp.egg-info/requires.txt +1 -0
  21. sql_query_mcp-0.4.0/tests/test_app.py +36 -0
  22. sql_query_mcp-0.4.0/tests/test_async_queries.py +367 -0
  23. {sql_query_mcp-0.2.0 → sql_query_mcp-0.4.0}/tests/test_config.py +46 -0
  24. {sql_query_mcp-0.2.0 → sql_query_mcp-0.4.0}/tests/test_executor.py +100 -0
  25. sql_query_mcp-0.4.0/tests/test_exporter.py +395 -0
  26. {sql_query_mcp-0.2.0 → sql_query_mcp-0.4.0}/tests/test_importer.py +121 -0
  27. {sql_query_mcp-0.2.0 → sql_query_mcp-0.4.0}/tests/test_metadata.py +23 -0
  28. {sql_query_mcp-0.2.0 → sql_query_mcp-0.4.0}/tests/test_namespace.py +39 -0
  29. {sql_query_mcp-0.2.0 → sql_query_mcp-0.4.0}/tests/test_registry.py +20 -0
  30. sql_query_mcp-0.4.0/tests/test_validator.py +327 -0
  31. sql_query_mcp-0.2.0/sql_query_mcp/adapters/__init__.py +0 -15
  32. sql_query_mcp-0.2.0/tests/test_app.py +0 -19
  33. sql_query_mcp-0.2.0/tests/test_validator.py +0 -141
  34. {sql_query_mcp-0.2.0 → sql_query_mcp-0.4.0}/LICENSE +0 -0
  35. {sql_query_mcp-0.2.0 → sql_query_mcp-0.4.0}/setup.cfg +0 -0
  36. {sql_query_mcp-0.2.0 → sql_query_mcp-0.4.0}/sql_query_mcp/__init__.py +0 -0
  37. {sql_query_mcp-0.2.0 → sql_query_mcp-0.4.0}/sql_query_mcp/__main__.py +0 -0
  38. {sql_query_mcp-0.2.0 → sql_query_mcp-0.4.0}/sql_query_mcp/audit.py +0 -0
  39. {sql_query_mcp-0.2.0 → sql_query_mcp-0.4.0}/sql_query_mcp/errors.py +0 -0
  40. {sql_query_mcp-0.2.0 → sql_query_mcp-0.4.0}/sql_query_mcp/release_metadata.py +0 -0
  41. {sql_query_mcp-0.2.0 → sql_query_mcp-0.4.0}/sql_query_mcp.egg-info/dependency_links.txt +0 -0
  42. {sql_query_mcp-0.2.0 → sql_query_mcp-0.4.0}/sql_query_mcp.egg-info/entry_points.txt +0 -0
  43. {sql_query_mcp-0.2.0 → sql_query_mcp-0.4.0}/sql_query_mcp.egg-info/top_level.txt +0 -0
  44. {sql_query_mcp-0.2.0 → sql_query_mcp-0.4.0}/tests/test_audit.py +0 -0
  45. {sql_query_mcp-0.2.0 → sql_query_mcp-0.4.0}/tests/test_release_metadata.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sql-query-mcp
3
- Version: 0.2.0
3
+ Version: 0.4.0
4
4
  Summary: Read-only SQL MCP server for PostgreSQL and MySQL.
5
5
  Author: Andy Wang
6
6
  License-Expression: MIT
@@ -8,7 +8,7 @@ Project-URL: Homepage, https://github.com/andyWang1688/sql-query-mcp
8
8
  Project-URL: Repository, https://github.com/andyWang1688/sql-query-mcp
9
9
  Project-URL: Documentation, https://github.com/andyWang1688/sql-query-mcp/blob/main/README.md
10
10
  Project-URL: Issues, https://github.com/andyWang1688/sql-query-mcp/issues
11
- Keywords: mcp,mcp-server,sql,database,postgresql,mysql,cli,codex,chatgpt
11
+ Keywords: mcp,mcp-server,sql,database,postgresql,mysql,hive,cli,codex,chatgpt
12
12
  Classifier: Programming Language :: Python :: 3
13
13
  Classifier: Programming Language :: Python :: 3 :: Only
14
14
  Classifier: Programming Language :: Python :: 3.10
@@ -22,6 +22,7 @@ Description-Content-Type: text/markdown
22
22
  License-File: LICENSE
23
23
  Requires-Dist: mcp>=1.12.4
24
24
  Requires-Dist: openpyxl>=3.1
25
+ Requires-Dist: PyHive[hive_pure_sasl]>=0.7
25
26
  Requires-Dist: PyMySQL>=1.1
26
27
  Requires-Dist: psycopg[binary]>=3.2
27
28
  Requires-Dist: psycopg-pool>=3.2
@@ -44,6 +45,7 @@ clear boundaries.
44
45
  | --- | --- | --- |
45
46
  | PostgreSQL | Supported | Available today |
46
47
  | MySQL | Supported | Available today |
48
+ | Hive | Supported | Available today |
47
49
  | SQLite | Candidate | Not supported yet |
48
50
  | SQL Server | Candidate | Not supported yet |
49
51
  | ClickHouse | Candidate | Not supported yet |
@@ -60,53 +62,74 @@ without exposing raw connection strings or flattening engine-specific concepts.
60
62
  ## What AI can do with it
61
63
 
62
64
  The current tool set focuses on database discovery, controlled query workflows,
63
- and one narrow local file import path. You can use it to help an AI assistant
64
- understand structure before it generates SQL or imports a prepared CSV/XLSX file
65
- into an existing table.
66
-
67
- MySQL supports `explain_query`, but not `explain_query(..., analyze=True)` in
68
- the current implementation.
69
-
70
- | Tool | PostgreSQL | MySQL | Purpose |
71
- | --- | --- | --- | --- |
72
- | `list_connections()` | Yes | Yes | List configured connections |
73
- | `list_schemas(connection_id)` | Yes | No | List visible PostgreSQL schemas |
74
- | `list_databases(connection_id)` | No | Yes | List visible MySQL databases |
75
- | `list_tables(connection_id, schema?, database?)` | Yes | Yes | List tables and views |
76
- | `describe_table(connection_id, table_name, schema?, database?)` | Yes | Yes | Inspect columns, keys, and indexes |
77
- | `run_select(connection_id, sql, limit?)` | Yes | Yes | Run read-only queries |
78
- | `explain_query(connection_id, sql, analyze?)` | Yes | Yes | Inspect query plans |
79
- | `get_table_sample(connection_id, table_name, schema?, database?, limit?)` | Yes | Yes | Fetch small table samples |
80
- | `import_table_file(connection_id, table_name, file_path, schema?, database?, sheet_name?)` | Yes | Yes | Import local CSV/XLSX files |
65
+ asynchronous read-only queries, batched query result exports, and one narrow
66
+ local file import path. You can use it to help an AI assistant understand
67
+ structure before it generates SQL, runs a bounded query, starts a long-running
68
+ read-only query, exports PostgreSQL or MySQL results to a local file, or imports
69
+ a prepared CSV/XLSX file into an existing table.
70
+
71
+ MySQL and Hive support `explain_query`. Hive uses `EXPLAIN` and
72
+ `EXPLAIN ANALYZE` for `explain_query`.
73
+
74
+ | Tool | PostgreSQL | MySQL | Hive | Purpose |
75
+ | --- | --- | --- | --- | --- |
76
+ | `list_connections()` | Yes | Yes | Yes | List configured connections |
77
+ | `list_schemas(connection_id)` | Yes | No | No | List visible PostgreSQL schemas |
78
+ | `list_databases(connection_id)` | No | Yes | Yes | List visible MySQL or Hive databases |
79
+ | `list_tables(connection_id, schema?, database?)` | Yes | Yes | Yes | List tables and views |
80
+ | `describe_table(connection_id, table_name, schema?, database?)` | Yes | Yes | Yes | Inspect columns, keys, and indexes |
81
+ | `run_select(connection_id, sql, limit?)` | Yes | Yes | Yes | Run short bounded read-only queries |
82
+ | `start_query(connection_id, sql, limit?)` | Yes | Yes | Yes | Start long-running read-only queries |
83
+ | `get_query(query_id, offset?, limit?)` | Yes | Yes | Yes | Fetch async query status and paginated results |
84
+ | `cancel_query(query_id)` | Yes | Yes | Yes | Cancel running async queries |
85
+ | `explain_query(connection_id, sql, analyze?)` | Yes | Yes | Yes | Inspect query plans |
86
+ | `get_table_sample(connection_id, table_name, schema?, database?, limit?)` | Yes | Yes | Yes | Fetch small table samples |
87
+ | `export_query_file(connection_id, sql, output_path, format?, limit?, export_all?, file_name?, overwrite?)` | Yes | Yes | No | Export query results to local CSV/XLSX files |
88
+ | `import_table_file(connection_id, table_name, file_path, schema?, database?, sheet_name?)` | Yes | Yes | Yes | Import local CSV/XLSX files |
81
89
 
82
90
  These tools are useful for tasks such as listing namespaces, inspecting table
83
- definitions, reviewing indexes, sampling records, analyzing read-only queries
84
- with `EXPLAIN`, and importing prepared local files. For full request and
85
- response details, see `docs/api-reference.md` (Chinese).
91
+ definitions, reviewing indexes, sampling records, running short read-only
92
+ queries with `run_select`, running long read-only queries with `start_query`,
93
+ `get_query`, and `cancel_query`, analyzing read-only queries with `EXPLAIN`, and
94
+ exporting PostgreSQL or MySQL query results to local CSV/XLSX files. You can
95
+ also import prepared local files. For full request and response details, see
96
+ `docs/api-reference.md` (Chinese).
86
97
 
87
98
  ## How boundaries are constrained
88
99
 
89
- The product boundary is intentionally narrow today. Only PostgreSQL and MySQL
90
- are available today. Query tools remain read-only, and the only write path is a
100
+ The product boundary is intentionally narrow today. PostgreSQL, MySQL, and Hive
101
+ are available today. Query tools remain read-only, PostgreSQL and MySQL query
102
+ results can be exported to local files, and the only database write path is a
91
103
  controlled local CSV/XLSX import into existing tables.
92
104
 
93
105
  The service keeps those boundaries explicit in a few ways.
94
106
 
95
107
  - Connections declare `engine` explicitly, so the server never guesses from
96
108
  `connection_id`.
97
- - PostgreSQL uses `schema`, and MySQL uses `database`, without collapsing both
98
- into one vague namespace field.
109
+ - PostgreSQL uses `schema`, while MySQL and Hive use `database`, without
110
+ collapsing both into one vague namespace field.
99
111
  - Real DSNs stay in environment variables, while config files store only the
100
112
  environment variable names.
101
113
  - Query execution passes through `sqlglot` validation before reaching the
102
- database.
114
+ database. Use `run_select` for short bounded read-only queries, and use
115
+ `start_query`, `get_query`, and `cancel_query` for long-running read-only
116
+ queries.
103
117
  - The server accepts only `SELECT` and `WITH ... SELECT`, rejects comments and
104
118
  multi-statement input, and records audit logs for each call.
119
+ - `export_query_file` writes files on the MCP server machine. It is synchronous
120
+ but reads database rows and writes CSV/XLSX files in batches. Large exports can
121
+ still hit your MCP client's tool timeout. For XLSX output, UUID values are
122
+ written as text and timezone-aware datetime values are written without the
123
+ timezone. Hive export is not supported yet.
105
124
  - `import_table_file` doesn't accept raw SQL. It inserts only file columns whose
106
125
  headers exactly match existing table columns.
126
+ - Hive `import_table_file` is intended for small files only and rejects files
127
+ with more than 1000 data rows. Hive imports write rows one by one, so they
128
+ can be slow and can hit your MCP client's tool timeout. For bulk Hive loads,
129
+ use Hive-native `LOAD DATA`, external tables, or your existing data ingestion
130
+ pipeline.
107
131
 
108
- For MySQL, `explain_query(..., analyze=True)` is not available in the current
109
- implementation.
132
+ For Hive, `explain_query` uses `EXPLAIN` and `EXPLAIN ANALYZE`.
110
133
 
111
134
  ## Quick start
112
135
 
@@ -197,11 +220,28 @@ The example config looks like this.
197
220
  "dsn_env": "MYSQL_CONN_CRM_PROD_MAIN_RO",
198
221
  "enabled": true,
199
222
  "default_database": "crm"
223
+ },
224
+ {
225
+ "connection_id": "warehouse_hive_prod_main_ro",
226
+ "engine": "hive",
227
+ "label": "Warehouse Hive production / Main / read-only",
228
+ "env": "prod",
229
+ "tenant": "main",
230
+ "role": "ro",
231
+ "dsn_env": "HIVE_CONN_WAREHOUSE_PROD_MAIN_RO",
232
+ "enabled": true,
233
+ "default_database": "default"
200
234
  }
201
235
  ]
202
236
  }
203
237
  ```
204
238
 
239
+ Set DSNs in the MCP client environment. For Hive, use a Hive DSN such as:
240
+
241
+ ```bash
242
+ export HIVE_CONN_WAREHOUSE_PROD_MAIN_RO='hive://user:password@hive.example.com:10000/default?auth=CUSTOM'
243
+ ```
244
+
205
245
  ## Documentation
206
246
 
207
247
  If you want implementation details, setup guidance, or internal structure, use
@@ -234,7 +274,7 @@ The main entry point is `sql_query_mcp/app.py`. Core modules include:
234
274
  - `sql_query_mcp/validator.py`: read-only SQL validation
235
275
  - `sql_query_mcp/introspection.py`: metadata inspection
236
276
  - `sql_query_mcp/executor.py`: query execution and limits
237
- - `sql_query_mcp/adapters/`: PostgreSQL and MySQL adapters
277
+ - `sql_query_mcp/adapters/`: PostgreSQL, MySQL, and Hive adapters
238
278
 
239
279
  ## Contributing
240
280
 
@@ -13,6 +13,7 @@ clear boundaries.
13
13
  | --- | --- | --- |
14
14
  | PostgreSQL | Supported | Available today |
15
15
  | MySQL | Supported | Available today |
16
+ | Hive | Supported | Available today |
16
17
  | SQLite | Candidate | Not supported yet |
17
18
  | SQL Server | Candidate | Not supported yet |
18
19
  | ClickHouse | Candidate | Not supported yet |
@@ -29,53 +30,74 @@ without exposing raw connection strings or flattening engine-specific concepts.
29
30
  ## What AI can do with it
30
31
 
31
32
  The current tool set focuses on database discovery, controlled query workflows,
32
- and one narrow local file import path. You can use it to help an AI assistant
33
- understand structure before it generates SQL or imports a prepared CSV/XLSX file
34
- into an existing table.
35
-
36
- MySQL supports `explain_query`, but not `explain_query(..., analyze=True)` in
37
- the current implementation.
38
-
39
- | Tool | PostgreSQL | MySQL | Purpose |
40
- | --- | --- | --- | --- |
41
- | `list_connections()` | Yes | Yes | List configured connections |
42
- | `list_schemas(connection_id)` | Yes | No | List visible PostgreSQL schemas |
43
- | `list_databases(connection_id)` | No | Yes | List visible MySQL databases |
44
- | `list_tables(connection_id, schema?, database?)` | Yes | Yes | List tables and views |
45
- | `describe_table(connection_id, table_name, schema?, database?)` | Yes | Yes | Inspect columns, keys, and indexes |
46
- | `run_select(connection_id, sql, limit?)` | Yes | Yes | Run read-only queries |
47
- | `explain_query(connection_id, sql, analyze?)` | Yes | Yes | Inspect query plans |
48
- | `get_table_sample(connection_id, table_name, schema?, database?, limit?)` | Yes | Yes | Fetch small table samples |
49
- | `import_table_file(connection_id, table_name, file_path, schema?, database?, sheet_name?)` | Yes | Yes | Import local CSV/XLSX files |
33
+ asynchronous read-only queries, batched query result exports, and one narrow
34
+ local file import path. You can use it to help an AI assistant understand
35
+ structure before it generates SQL, runs a bounded query, starts a long-running
36
+ read-only query, exports PostgreSQL or MySQL results to a local file, or imports
37
+ a prepared CSV/XLSX file into an existing table.
38
+
39
+ MySQL and Hive support `explain_query`. Hive uses `EXPLAIN` and
40
+ `EXPLAIN ANALYZE` for `explain_query`.
41
+
42
+ | Tool | PostgreSQL | MySQL | Hive | Purpose |
43
+ | --- | --- | --- | --- | --- |
44
+ | `list_connections()` | Yes | Yes | Yes | List configured connections |
45
+ | `list_schemas(connection_id)` | Yes | No | No | List visible PostgreSQL schemas |
46
+ | `list_databases(connection_id)` | No | Yes | Yes | List visible MySQL or Hive databases |
47
+ | `list_tables(connection_id, schema?, database?)` | Yes | Yes | Yes | List tables and views |
48
+ | `describe_table(connection_id, table_name, schema?, database?)` | Yes | Yes | Yes | Inspect columns, keys, and indexes |
49
+ | `run_select(connection_id, sql, limit?)` | Yes | Yes | Yes | Run short bounded read-only queries |
50
+ | `start_query(connection_id, sql, limit?)` | Yes | Yes | Yes | Start long-running read-only queries |
51
+ | `get_query(query_id, offset?, limit?)` | Yes | Yes | Yes | Fetch async query status and paginated results |
52
+ | `cancel_query(query_id)` | Yes | Yes | Yes | Cancel running async queries |
53
+ | `explain_query(connection_id, sql, analyze?)` | Yes | Yes | Yes | Inspect query plans |
54
+ | `get_table_sample(connection_id, table_name, schema?, database?, limit?)` | Yes | Yes | Yes | Fetch small table samples |
55
+ | `export_query_file(connection_id, sql, output_path, format?, limit?, export_all?, file_name?, overwrite?)` | Yes | Yes | No | Export query results to local CSV/XLSX files |
56
+ | `import_table_file(connection_id, table_name, file_path, schema?, database?, sheet_name?)` | Yes | Yes | Yes | Import local CSV/XLSX files |
50
57
 
51
58
  These tools are useful for tasks such as listing namespaces, inspecting table
52
- definitions, reviewing indexes, sampling records, analyzing read-only queries
53
- with `EXPLAIN`, and importing prepared local files. For full request and
54
- response details, see `docs/api-reference.md` (Chinese).
59
+ definitions, reviewing indexes, sampling records, running short read-only
60
+ queries with `run_select`, running long read-only queries with `start_query`,
61
+ `get_query`, and `cancel_query`, analyzing read-only queries with `EXPLAIN`, and
62
+ exporting PostgreSQL or MySQL query results to local CSV/XLSX files. You can
63
+ also import prepared local files. For full request and response details, see
64
+ `docs/api-reference.md` (Chinese).
55
65
 
56
66
  ## How boundaries are constrained
57
67
 
58
- The product boundary is intentionally narrow today. Only PostgreSQL and MySQL
59
- are available today. Query tools remain read-only, and the only write path is a
68
+ The product boundary is intentionally narrow today. PostgreSQL, MySQL, and Hive
69
+ are available today. Query tools remain read-only, PostgreSQL and MySQL query
70
+ results can be exported to local files, and the only database write path is a
60
71
  controlled local CSV/XLSX import into existing tables.
61
72
 
62
73
  The service keeps those boundaries explicit in a few ways.
63
74
 
64
75
  - Connections declare `engine` explicitly, so the server never guesses from
65
76
  `connection_id`.
66
- - PostgreSQL uses `schema`, and MySQL uses `database`, without collapsing both
67
- into one vague namespace field.
77
+ - PostgreSQL uses `schema`, while MySQL and Hive use `database`, without
78
+ collapsing both into one vague namespace field.
68
79
  - Real DSNs stay in environment variables, while config files store only the
69
80
  environment variable names.
70
81
  - Query execution passes through `sqlglot` validation before reaching the
71
- database.
82
+ database. Use `run_select` for short bounded read-only queries, and use
83
+ `start_query`, `get_query`, and `cancel_query` for long-running read-only
84
+ queries.
72
85
  - The server accepts only `SELECT` and `WITH ... SELECT`, rejects comments and
73
86
  multi-statement input, and records audit logs for each call.
87
+ - `export_query_file` writes files on the MCP server machine. It is synchronous
88
+ but reads database rows and writes CSV/XLSX files in batches. Large exports can
89
+ still hit your MCP client's tool timeout. For XLSX output, UUID values are
90
+ written as text and timezone-aware datetime values are written without the
91
+ timezone. Hive export is not supported yet.
74
92
  - `import_table_file` doesn't accept raw SQL. It inserts only file columns whose
75
93
  headers exactly match existing table columns.
94
+ - Hive `import_table_file` is intended for small files only and rejects files
95
+ with more than 1000 data rows. Hive imports write rows one by one, so they
96
+ can be slow and can hit your MCP client's tool timeout. For bulk Hive loads,
97
+ use Hive-native `LOAD DATA`, external tables, or your existing data ingestion
98
+ pipeline.
76
99
 
77
- For MySQL, `explain_query(..., analyze=True)` is not available in the current
78
- implementation.
100
+ For Hive, `explain_query` uses `EXPLAIN` and `EXPLAIN ANALYZE`.
79
101
 
80
102
  ## Quick start
81
103
 
@@ -166,11 +188,28 @@ The example config looks like this.
166
188
  "dsn_env": "MYSQL_CONN_CRM_PROD_MAIN_RO",
167
189
  "enabled": true,
168
190
  "default_database": "crm"
191
+ },
192
+ {
193
+ "connection_id": "warehouse_hive_prod_main_ro",
194
+ "engine": "hive",
195
+ "label": "Warehouse Hive production / Main / read-only",
196
+ "env": "prod",
197
+ "tenant": "main",
198
+ "role": "ro",
199
+ "dsn_env": "HIVE_CONN_WAREHOUSE_PROD_MAIN_RO",
200
+ "enabled": true,
201
+ "default_database": "default"
169
202
  }
170
203
  ]
171
204
  }
172
205
  ```
173
206
 
207
+ Set DSNs in the MCP client environment. For Hive, use a Hive DSN such as:
208
+
209
+ ```bash
210
+ export HIVE_CONN_WAREHOUSE_PROD_MAIN_RO='hive://user:password@hive.example.com:10000/default?auth=CUSTOM'
211
+ ```
212
+
174
213
  ## Documentation
175
214
 
176
215
  If you want implementation details, setup guidance, or internal structure, use
@@ -203,7 +242,7 @@ The main entry point is `sql_query_mcp/app.py`. Core modules include:
203
242
  - `sql_query_mcp/validator.py`: read-only SQL validation
204
243
  - `sql_query_mcp/introspection.py`: metadata inspection
205
244
  - `sql_query_mcp/executor.py`: query execution and limits
206
- - `sql_query_mcp/adapters/`: PostgreSQL and MySQL adapters
245
+ - `sql_query_mcp/adapters/`: PostgreSQL, MySQL, and Hive adapters
207
246
 
208
247
  ## Contributing
209
248
 
@@ -4,14 +4,14 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "sql-query-mcp"
7
- version = "0.2.0"
7
+ version = "0.4.0"
8
8
  description = "Read-only SQL MCP server for PostgreSQL and MySQL."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
11
11
  license = "MIT"
12
12
  license-files = ["LICENSE"]
13
13
  authors = [{ name = "Andy Wang" }]
14
- keywords = ["mcp", "mcp-server", "sql", "database", "postgresql", "mysql", "cli", "codex", "chatgpt"]
14
+ keywords = ["mcp", "mcp-server", "sql", "database", "postgresql", "mysql", "hive", "cli", "codex", "chatgpt"]
15
15
  classifiers = [
16
16
  "Programming Language :: Python :: 3",
17
17
  "Programming Language :: Python :: 3 :: Only",
@@ -25,6 +25,7 @@ classifiers = [
25
25
  dependencies = [
26
26
  "mcp>=1.12.4",
27
27
  "openpyxl>=3.1",
28
+ "PyHive[hive_pure_sasl]>=0.7",
28
29
  "PyMySQL>=1.1",
29
30
  "psycopg[binary]>=3.2",
30
31
  "psycopg-pool>=3.2",
@@ -0,0 +1,7 @@
1
+ """Database adapters."""
2
+
3
+ from .hive import HiveAdapter
4
+ from .mysql import MySQLAdapter
5
+ from .postgres import PostgresAdapter
6
+
7
+ __all__ = ["HiveAdapter", "MySQLAdapter", "PostgresAdapter"]
@@ -0,0 +1,147 @@
1
+ """Hive adapter."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from contextlib import contextmanager
6
+ from typing import Iterator, List
7
+ from urllib.parse import parse_qs, unquote, urlparse
8
+
9
+ try:
10
+ from pyhive import hive
11
+ except ImportError: # pragma: no cover - runtime dependency
12
+ hive = None
13
+
14
+ from ..errors import ConfigurationError
15
+
16
+
17
+ class HiveAdapter:
18
+ engine = "hive"
19
+
20
+ @contextmanager
21
+ def connection(self, connection_id: str, dsn: str) -> Iterator[object]:
22
+ if hive is None:
23
+ raise ConfigurationError("缺少 PyHive 依赖,请先安装项目依赖。")
24
+
25
+ conn = hive.Connection(**self._parse_dsn(dsn))
26
+ try:
27
+ yield conn
28
+ finally:
29
+ conn.close()
30
+
31
+ def close(self) -> None:
32
+ return None
33
+
34
+ def set_statement_timeout(self, conn: object, timeout_ms: int) -> None:
35
+ return None
36
+
37
+ def build_sample_query(self, database: str, table_name: str, sentinel_limit: int) -> str:
38
+ return f"SELECT * FROM {self._qualified_table(database, table_name)} LIMIT {int(sentinel_limit)}"
39
+
40
+ def build_insert_query(self, database: str, table_name: str, columns: List[str]) -> str:
41
+ quoted_columns = ", ".join(self._quote_identifier(column) for column in columns)
42
+ placeholders = ", ".join(["%s"] * len(columns))
43
+ return f"INSERT INTO {self._qualified_table(database, table_name)} ({quoted_columns}) VALUES ({placeholders})"
44
+
45
+ def build_explain_query(self, sql_text: str, analyze: bool = False) -> str:
46
+ prefix = "EXPLAIN ANALYZE" if analyze else "EXPLAIN"
47
+ return f"{prefix} {sql_text}"
48
+
49
+ def extract_plan(self, rows):
50
+ return [self._first_value(row) for row in rows]
51
+
52
+ def column_names(self, description) -> List[str]:
53
+ return [column[0] for column in (description or [])]
54
+
55
+ def normalize_rows(self, rows, columns: List[str]) -> List[dict]:
56
+ return [dict(zip(columns, row)) for row in rows]
57
+
58
+ def list_databases(self, conn: object) -> List[str]:
59
+ with conn.cursor() as cur:
60
+ cur.execute("SHOW DATABASES")
61
+ return [self._first_value(row) for row in cur.fetchall()]
62
+
63
+ def list_tables(self, conn: object, database: str):
64
+ with conn.cursor() as cur:
65
+ cur.execute(f"SHOW TABLES IN {self._quote_identifier(database)}")
66
+ return [
67
+ {
68
+ "database_name": database,
69
+ "table_name": self._first_value(row),
70
+ "table_type": None,
71
+ }
72
+ for row in cur.fetchall()
73
+ ]
74
+
75
+ def describe_table(self, conn: object, database: str, table_name: str):
76
+ with conn.cursor() as cur:
77
+ cur.execute(f"DESCRIBE {self._qualified_table(database, table_name)}")
78
+ rows = cur.fetchall()
79
+
80
+ columns = []
81
+ in_partitions = False
82
+ for row in rows:
83
+ name = self._first_value(row)
84
+ if not name:
85
+ continue
86
+ if str(name).startswith("# Partition Information"):
87
+ in_partitions = True
88
+ continue
89
+ if str(name).startswith("#"):
90
+ continue
91
+ values = self._row_values(row)
92
+ data_type = values[1] if len(values) > 1 else None
93
+ comment = values[2] if len(values) > 2 else None
94
+ columns.append(
95
+ {
96
+ "column_name": name,
97
+ "data_type": data_type,
98
+ "udt_name": None,
99
+ "nullable": True,
100
+ "default": None,
101
+ "primary_key": False,
102
+ "extra": comment,
103
+ "partition_key": in_partitions,
104
+ }
105
+ )
106
+
107
+ if not columns:
108
+ return None
109
+ return {"columns": columns, "indexes": []}
110
+
111
+ def _parse_dsn(self, dsn: str) -> dict:
112
+ parsed = urlparse(dsn)
113
+ if parsed.scheme not in {"hive", "hive+pyhive"}:
114
+ raise ConfigurationError(f"Hive DSN 必须使用 hive:// 或 hive+pyhive://,当前为 {parsed.scheme}")
115
+
116
+ supported_query_keys = {"auth", "kerberos_service_name", "password"}
117
+ query_params = {key: values[-1] for key, values in parse_qs(parsed.query).items()}
118
+ unsupported = sorted(set(query_params) - supported_query_keys)
119
+ if unsupported:
120
+ raise ConfigurationError(f"Hive DSN 包含暂不支持的参数: {unsupported}")
121
+
122
+ connect_args = {
123
+ "host": parsed.hostname or "localhost",
124
+ "port": parsed.port or 10000,
125
+ "username": unquote(parsed.username) if parsed.username else None,
126
+ "password": unquote(parsed.password) if parsed.password else query_params.get("password"),
127
+ "database": parsed.path.lstrip("/") or None,
128
+ "auth": query_params.get("auth"),
129
+ "kerberos_service_name": query_params.get("kerberos_service_name"),
130
+ }
131
+ return {key: value for key, value in connect_args.items() if value is not None}
132
+
133
+ def _quote_identifier(self, value: str) -> str:
134
+ return "`" + value.replace("`", "``") + "`"
135
+
136
+ def _qualified_table(self, database: str, table_name: str) -> str:
137
+ return f"{self._quote_identifier(database)}.{self._quote_identifier(table_name)}"
138
+
139
+ def _first_value(self, row):
140
+ if isinstance(row, dict):
141
+ return next(iter(row.values()))
142
+ return row[0]
143
+
144
+ def _row_values(self, row):
145
+ if isinstance(row, dict):
146
+ return list(row.values())
147
+ return list(row)
@@ -42,6 +42,11 @@ class MySQLAdapter:
42
42
  with conn.cursor() as cur:
43
43
  cur.execute("SET SESSION max_execution_time = %s", (int(timeout_ms),))
44
44
 
45
+ def export_cursor(self, conn: object):
46
+ if pymysql is None:
47
+ raise ConfigurationError("缺少 PyMySQL 依赖,请先安装项目依赖。")
48
+ return conn.cursor(pymysql.cursors.SSDictCursor)
49
+
45
50
  def list_databases(self, conn: object) -> List[str]:
46
51
  with conn.cursor() as cur:
47
52
  cur.execute(
@@ -3,6 +3,7 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  from contextlib import contextmanager
6
+ from uuid import uuid4
6
7
  from typing import Iterator, List
7
8
 
8
9
  try:
@@ -37,6 +38,19 @@ class PostgresAdapter:
37
38
  with conn.cursor() as cur:
38
39
  cur.execute("SELECT set_config('statement_timeout', %s, false)", (str(timeout_ms),))
39
40
 
41
+ @contextmanager
42
+ def export_cursor(self, conn: object) -> Iterator[object]:
43
+ previous_autocommit = getattr(conn, "autocommit", None)
44
+ if previous_autocommit is True:
45
+ conn.autocommit = False
46
+ try:
47
+ with conn.cursor(name=f"sql_query_mcp_export_{uuid4().hex}") as cur:
48
+ yield cur
49
+ finally:
50
+ if previous_autocommit is True:
51
+ conn.rollback()
52
+ conn.autocommit = True
53
+
40
54
  def list_schemas(self, conn: object) -> List[str]:
41
55
  with conn.cursor() as cur:
42
56
  cur.execute(
@@ -6,10 +6,12 @@ from typing import Optional
6
6
 
7
7
  from mcp.server.fastmcp import FastMCP
8
8
 
9
+ from .async_queries import AsyncQueryService
9
10
  from .audit import AuditLogger
10
11
  from .config import load_config
11
12
  from .errors import SqlQueryMCPError
12
13
  from .executor import QueryExecutor
14
+ from .exporter import QueryExporter
13
15
  from .importer import TableFileImporter
14
16
  from .introspection import MetadataService
15
17
  from .registry import ConnectionRegistry
@@ -21,7 +23,9 @@ def create_app() -> FastMCP:
21
23
  audit_logger = AuditLogger(app_config.settings.audit_log_path)
22
24
  metadata = MetadataService(registry, app_config.settings, audit_logger)
23
25
  executor = QueryExecutor(registry, app_config.settings, audit_logger)
26
+ exporter = QueryExporter(registry, app_config.settings, audit_logger)
24
27
  importer = TableFileImporter(registry, app_config.settings, audit_logger)
28
+ async_queries = AsyncQueryService(registry, app_config.settings, audit_logger)
25
29
 
26
30
  mcp = FastMCP("sql-query-mcp", json_response=True)
27
31
 
@@ -39,7 +43,7 @@ def create_app() -> FastMCP:
39
43
 
40
44
  @mcp.tool()
41
45
  def list_databases(connection_id: str) -> dict:
42
- """List visible databases for a MySQL connection."""
46
+ """List visible databases for a MySQL or Hive connection."""
43
47
 
44
48
  return _run_tool(lambda: metadata.list_databases(connection_id))
45
49
 
@@ -49,7 +53,7 @@ def create_app() -> FastMCP:
49
53
  schema: Optional[str] = None,
50
54
  database: Optional[str] = None,
51
55
  ) -> dict:
52
- """List tables and views for a resolved PostgreSQL schema or MySQL database."""
56
+ """List tables and views for a resolved schema or database."""
53
57
 
54
58
  return _run_tool(lambda: metadata.list_tables(connection_id, schema, database))
55
59
 
@@ -88,6 +92,50 @@ def create_app() -> FastMCP:
88
92
 
89
93
  return _run_tool(lambda: executor.get_table_sample(connection_id, table_name, schema, database, limit))
90
94
 
95
+ @mcp.tool()
96
+ def export_query_file(
97
+ connection_id: str,
98
+ sql: str,
99
+ output_path: str,
100
+ format: str = "csv",
101
+ limit: Optional[int] = 1000,
102
+ export_all: bool = False,
103
+ file_name: Optional[str] = None,
104
+ overwrite: bool = False,
105
+ ) -> dict:
106
+ """Export a read-only query result to a local CSV or XLSX file."""
107
+
108
+ return _run_tool(
109
+ lambda: exporter.export_query_file(
110
+ connection_id,
111
+ sql,
112
+ output_path,
113
+ format,
114
+ limit,
115
+ export_all,
116
+ file_name,
117
+ overwrite,
118
+ )
119
+ )
120
+
121
+ @mcp.tool()
122
+ def start_query(connection_id: str, sql: str, limit: Optional[int] = None) -> dict:
123
+ """Start an asynchronous read-only SELECT or CTE query."""
124
+
125
+ return _run_tool(lambda: async_queries.start_query(connection_id, sql, limit))
126
+
127
+ @mcp.tool()
128
+ def get_query(query_id: str, offset: int = 0, limit: Optional[int] = None) -> dict:
129
+ """Get asynchronous query status and paginated results when complete."""
130
+
131
+ return _run_tool(lambda: async_queries.get_query(query_id, offset, limit))
132
+
133
+ @mcp.tool()
134
+ def cancel_query(query_id: str) -> dict:
135
+ """Cancel a running asynchronous query."""
136
+
137
+ return _run_tool(lambda: async_queries.cancel_query(query_id))
138
+
91
139
  @mcp.tool()
92
140
  def import_table_file(
93
141
  connection_id: str,