sql-query-mcp 0.1.4__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. {sql_query_mcp-0.1.4/sql_query_mcp.egg-info → sql_query_mcp-0.3.0}/PKG-INFO +68 -29
  2. {sql_query_mcp-0.1.4 → sql_query_mcp-0.3.0}/README.md +64 -27
  3. {sql_query_mcp-0.1.4 → sql_query_mcp-0.3.0}/pyproject.toml +4 -2
  4. sql_query_mcp-0.3.0/sql_query_mcp/adapters/__init__.py +7 -0
  5. sql_query_mcp-0.3.0/sql_query_mcp/adapters/hive.py +147 -0
  6. {sql_query_mcp-0.1.4 → sql_query_mcp-0.3.0}/sql_query_mcp/adapters/mysql.py +8 -0
  7. {sql_query_mcp-0.1.4 → sql_query_mcp-0.3.0}/sql_query_mcp/adapters/postgres.py +10 -0
  8. {sql_query_mcp-0.1.4 → sql_query_mcp-0.3.0}/sql_query_mcp/app.py +46 -2
  9. sql_query_mcp-0.3.0/sql_query_mcp/async_queries.py +388 -0
  10. {sql_query_mcp-0.1.4 → sql_query_mcp-0.3.0}/sql_query_mcp/config.py +4 -4
  11. {sql_query_mcp-0.1.4 → sql_query_mcp-0.3.0}/sql_query_mcp/executor.py +7 -1
  12. sql_query_mcp-0.3.0/sql_query_mcp/importer.py +235 -0
  13. {sql_query_mcp-0.1.4 → sql_query_mcp-0.3.0}/sql_query_mcp/introspection.py +3 -2
  14. {sql_query_mcp-0.1.4 → sql_query_mcp-0.3.0}/sql_query_mcp/namespace.py +8 -0
  15. {sql_query_mcp-0.1.4 → sql_query_mcp-0.3.0}/sql_query_mcp/registry.py +2 -1
  16. {sql_query_mcp-0.1.4 → sql_query_mcp-0.3.0}/sql_query_mcp/validator.py +6 -3
  17. {sql_query_mcp-0.1.4 → sql_query_mcp-0.3.0/sql_query_mcp.egg-info}/PKG-INFO +68 -29
  18. {sql_query_mcp-0.1.4 → sql_query_mcp-0.3.0}/sql_query_mcp.egg-info/SOURCES.txt +6 -0
  19. {sql_query_mcp-0.1.4 → sql_query_mcp-0.3.0}/sql_query_mcp.egg-info/requires.txt +2 -0
  20. sql_query_mcp-0.3.0/tests/test_app.py +29 -0
  21. sql_query_mcp-0.3.0/tests/test_async_queries.py +367 -0
  22. {sql_query_mcp-0.1.4 → sql_query_mcp-0.3.0}/tests/test_config.py +46 -0
  23. {sql_query_mcp-0.1.4 → sql_query_mcp-0.3.0}/tests/test_executor.py +100 -0
  24. sql_query_mcp-0.3.0/tests/test_importer.py +451 -0
  25. {sql_query_mcp-0.1.4 → sql_query_mcp-0.3.0}/tests/test_metadata.py +23 -0
  26. {sql_query_mcp-0.1.4 → sql_query_mcp-0.3.0}/tests/test_namespace.py +39 -0
  27. {sql_query_mcp-0.1.4 → sql_query_mcp-0.3.0}/tests/test_registry.py +20 -0
  28. sql_query_mcp-0.3.0/tests/test_validator.py +327 -0
  29. sql_query_mcp-0.1.4/sql_query_mcp/adapters/__init__.py +0 -15
  30. sql_query_mcp-0.1.4/tests/test_validator.py +0 -121
  31. {sql_query_mcp-0.1.4 → sql_query_mcp-0.3.0}/LICENSE +0 -0
  32. {sql_query_mcp-0.1.4 → sql_query_mcp-0.3.0}/setup.cfg +0 -0
  33. {sql_query_mcp-0.1.4 → sql_query_mcp-0.3.0}/sql_query_mcp/__init__.py +0 -0
  34. {sql_query_mcp-0.1.4 → sql_query_mcp-0.3.0}/sql_query_mcp/__main__.py +0 -0
  35. {sql_query_mcp-0.1.4 → sql_query_mcp-0.3.0}/sql_query_mcp/audit.py +0 -0
  36. {sql_query_mcp-0.1.4 → sql_query_mcp-0.3.0}/sql_query_mcp/errors.py +0 -0
  37. {sql_query_mcp-0.1.4 → sql_query_mcp-0.3.0}/sql_query_mcp/release_metadata.py +0 -0
  38. {sql_query_mcp-0.1.4 → sql_query_mcp-0.3.0}/sql_query_mcp.egg-info/dependency_links.txt +0 -0
  39. {sql_query_mcp-0.1.4 → sql_query_mcp-0.3.0}/sql_query_mcp.egg-info/entry_points.txt +0 -0
  40. {sql_query_mcp-0.1.4 → sql_query_mcp-0.3.0}/sql_query_mcp.egg-info/top_level.txt +0 -0
  41. {sql_query_mcp-0.1.4 → sql_query_mcp-0.3.0}/tests/test_audit.py +0 -0
  42. {sql_query_mcp-0.1.4 → sql_query_mcp-0.3.0}/tests/test_release_metadata.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sql-query-mcp
3
- Version: 0.1.4
3
+ Version: 0.3.0
4
4
  Summary: Read-only SQL MCP server for PostgreSQL and MySQL.
5
5
  Author: Andy Wang
6
6
  License-Expression: MIT
@@ -8,7 +8,7 @@ Project-URL: Homepage, https://github.com/andyWang1688/sql-query-mcp
8
8
  Project-URL: Repository, https://github.com/andyWang1688/sql-query-mcp
9
9
  Project-URL: Documentation, https://github.com/andyWang1688/sql-query-mcp/blob/main/README.md
10
10
  Project-URL: Issues, https://github.com/andyWang1688/sql-query-mcp/issues
11
- Keywords: mcp,mcp-server,sql,database,postgresql,mysql,cli,codex,chatgpt
11
+ Keywords: mcp,mcp-server,sql,database,postgresql,mysql,hive,cli,codex,chatgpt
12
12
  Classifier: Programming Language :: Python :: 3
13
13
  Classifier: Programming Language :: Python :: 3 :: Only
14
14
  Classifier: Programming Language :: Python :: 3.10
@@ -21,6 +21,8 @@ Requires-Python: >=3.10
21
21
  Description-Content-Type: text/markdown
22
22
  License-File: LICENSE
23
23
  Requires-Dist: mcp>=1.12.4
24
+ Requires-Dist: openpyxl>=3.1
25
+ Requires-Dist: PyHive[hive_pure_sasl]>=0.7
24
26
  Requires-Dist: PyMySQL>=1.1
25
27
  Requires-Dist: psycopg[binary]>=3.2
26
28
  Requires-Dist: psycopg-pool>=3.2
@@ -35,12 +37,15 @@ Dynamic: license-file
35
37
  A general-purpose MCP server that lets AI work with multiple databases within
36
38
  clear boundaries.
37
39
 
40
+ [![sql-query-mcp MCP server](https://glama.ai/mcp/servers/andyWang1688/sql-query-mcp/badges/card.svg)](https://glama.ai/mcp/servers/andyWang1688/sql-query-mcp)
41
+
38
42
  ## Current database support
39
43
 
40
44
  | Database | Status | Current availability |
41
45
  | --- | --- | --- |
42
46
  | PostgreSQL | Supported | Available today |
43
47
  | MySQL | Supported | Available today |
48
+ | Hive | Supported | Available today |
44
49
  | SQLite | Candidate | Not supported yet |
45
50
  | SQL Server | Candidate | Not supported yet |
46
51
  | ClickHouse | Candidate | Not supported yet |
@@ -56,49 +61,66 @@ without exposing raw connection strings or flattening engine-specific concepts.
56
61
 
57
62
  ## What AI can do with it
58
63
 
59
- The current tool set focuses on database discovery and controlled query
60
- workflows. You can use it to help an AI assistant understand structure before
61
- it generates or refines SQL.
62
-
63
- MySQL supports `explain_query`, but not `explain_query(..., analyze=True)` in
64
- the current implementation.
65
-
66
- | Tool | PostgreSQL | MySQL | Purpose |
67
- | --- | --- | --- | --- |
68
- | `list_connections()` | Yes | Yes | List configured connections |
69
- | `list_schemas(connection_id)` | Yes | No | List visible PostgreSQL schemas |
70
- | `list_databases(connection_id)` | No | Yes | List visible MySQL databases |
71
- | `list_tables(connection_id, schema?, database?)` | Yes | Yes | List tables and views |
72
- | `describe_table(connection_id, table_name, schema?, database?)` | Yes | Yes | Inspect columns, keys, and indexes |
73
- | `run_select(connection_id, sql, limit?)` | Yes | Yes | Run read-only queries |
74
- | `explain_query(connection_id, sql, analyze?)` | Yes | Yes | Inspect query plans |
75
- | `get_table_sample(connection_id, table_name, schema?, database?, limit?)` | Yes | Yes | Fetch small table samples |
64
+ The current tool set focuses on database discovery, controlled query workflows,
65
+ asynchronous read-only queries, and one narrow local file import path. You can
66
+ use it to help an AI assistant understand structure before it generates SQL,
67
+ runs a bounded query, starts a long-running read-only query, or imports a
68
+ prepared CSV/XLSX file into an existing table.
69
+
70
+ MySQL and Hive support `explain_query`. Hive uses `EXPLAIN` and
71
+ `EXPLAIN ANALYZE` for `explain_query`.
72
+
73
+ | Tool | PostgreSQL | MySQL | Hive | Purpose |
74
+ | --- | --- | --- | --- | --- |
75
+ | `list_connections()` | Yes | Yes | Yes | List configured connections |
76
+ | `list_schemas(connection_id)` | Yes | No | No | List visible PostgreSQL schemas |
77
+ | `list_databases(connection_id)` | No | Yes | Yes | List visible MySQL or Hive databases |
78
+ | `list_tables(connection_id, schema?, database?)` | Yes | Yes | Yes | List tables and views |
79
+ | `describe_table(connection_id, table_name, schema?, database?)` | Yes | Yes | Yes | Inspect columns, keys, and indexes |
80
+ | `run_select(connection_id, sql, limit?)` | Yes | Yes | Yes | Run short bounded read-only queries |
81
+ | `start_query(connection_id, sql, limit?)` | Yes | Yes | Yes | Start long-running read-only queries |
82
+ | `get_query(query_id, offset?, limit?)` | Yes | Yes | Yes | Fetch async query status and paginated results |
83
+ | `cancel_query(query_id)` | Yes | Yes | Yes | Cancel running async queries |
84
+ | `explain_query(connection_id, sql, analyze?)` | Yes | Yes | Yes | Inspect query plans |
85
+ | `get_table_sample(connection_id, table_name, schema?, database?, limit?)` | Yes | Yes | Yes | Fetch small table samples |
86
+ | `import_table_file(connection_id, table_name, file_path, schema?, database?, sheet_name?)` | Yes | Yes | Yes | Import local CSV/XLSX files |
76
87
 
77
88
  These tools are useful for tasks such as listing namespaces, inspecting table
78
- definitions, reviewing indexes, sampling records, and analyzing read-only
79
- queries with `EXPLAIN`. For full request and response details, see
89
+ definitions, reviewing indexes, sampling records, running short read-only
90
+ queries with `run_select`, running long read-only queries with `start_query`,
91
+ `get_query`, and `cancel_query`, analyzing read-only queries with `EXPLAIN`, and
92
+ importing prepared local files. For full request and response details, see
80
93
  `docs/api-reference.md` (Chinese).
81
94
 
82
95
  ## How boundaries are constrained
83
96
 
84
- The product boundary is intentionally narrow today. Only PostgreSQL and MySQL
85
- are available today, and the current tool set is fully read-only.
97
+ The product boundary is intentionally narrow today. PostgreSQL, MySQL, and Hive
98
+ are available today. Query tools remain read-only, and the only write path is a
99
+ controlled local CSV/XLSX import into existing tables.
86
100
 
87
101
  The service keeps those boundaries explicit in a few ways.
88
102
 
89
103
  - Connections declare `engine` explicitly, so the server never guesses from
90
104
  `connection_id`.
91
- - PostgreSQL uses `schema`, and MySQL uses `database`, without collapsing both
92
- into one vague namespace field.
105
+ - PostgreSQL uses `schema`, while MySQL and Hive use `database`, without
106
+ collapsing both into one vague namespace field.
93
107
  - Real DSNs stay in environment variables, while config files store only the
94
108
  environment variable names.
95
109
  - Query execution passes through `sqlglot` validation before reaching the
96
- database.
110
+ database. Use `run_select` for short bounded read-only queries, and use
111
+ `start_query`, `get_query`, and `cancel_query` for long-running read-only
112
+ queries.
97
113
  - The server accepts only `SELECT` and `WITH ... SELECT`, rejects comments and
98
114
  multi-statement input, and records audit logs for each call.
115
+ - `import_table_file` doesn't accept raw SQL. It inserts only file columns whose
116
+ headers exactly match existing table columns.
117
+ - Hive `import_table_file` is intended for small files only and rejects files
118
+ with more than 1000 data rows. Hive imports write rows one by one, so they
119
+ can be slow and can hit your MCP client's tool timeout. For bulk Hive loads,
120
+ use Hive-native `LOAD DATA`, external tables, or your existing data ingestion
121
+ pipeline.
99
122
 
100
- For MySQL, `explain_query(..., analyze=True)` is not available in the current
101
- implementation.
123
+ For Hive, `explain_query` uses `EXPLAIN` and `EXPLAIN ANALYZE`.
102
124
 
103
125
  ## Quick start
104
126
 
@@ -189,11 +211,28 @@ The example config looks like this.
189
211
  "dsn_env": "MYSQL_CONN_CRM_PROD_MAIN_RO",
190
212
  "enabled": true,
191
213
  "default_database": "crm"
214
+ },
215
+ {
216
+ "connection_id": "warehouse_hive_prod_main_ro",
217
+ "engine": "hive",
218
+ "label": "Warehouse Hive production / Main / read-only",
219
+ "env": "prod",
220
+ "tenant": "main",
221
+ "role": "ro",
222
+ "dsn_env": "HIVE_CONN_WAREHOUSE_PROD_MAIN_RO",
223
+ "enabled": true,
224
+ "default_database": "default"
192
225
  }
193
226
  ]
194
227
  }
195
228
  ```
196
229
 
230
+ Set DSNs in the MCP client environment. For Hive, use a Hive DSN such as:
231
+
232
+ ```bash
233
+ export HIVE_CONN_WAREHOUSE_PROD_MAIN_RO='hive://user:password@hive.example.com:10000/default?auth=CUSTOM'
234
+ ```
235
+
197
236
  ## Documentation
198
237
 
199
238
  If you want implementation details, setup guidance, or internal structure, use
@@ -226,7 +265,7 @@ The main entry point is `sql_query_mcp/app.py`. Core modules include:
226
265
  - `sql_query_mcp/validator.py`: read-only SQL validation
227
266
  - `sql_query_mcp/introspection.py`: metadata inspection
228
267
  - `sql_query_mcp/executor.py`: query execution and limits
229
- - `sql_query_mcp/adapters/`: PostgreSQL and MySQL adapters
268
+ - `sql_query_mcp/adapters/`: PostgreSQL, MySQL, and Hive adapters
230
269
 
231
270
  ## Contributing
232
271
 
@@ -5,12 +5,15 @@
5
5
  A general-purpose MCP server that lets AI work with multiple databases within
6
6
  clear boundaries.
7
7
 
8
+ [![sql-query-mcp MCP server](https://glama.ai/mcp/servers/andyWang1688/sql-query-mcp/badges/card.svg)](https://glama.ai/mcp/servers/andyWang1688/sql-query-mcp)
9
+
8
10
  ## Current database support
9
11
 
10
12
  | Database | Status | Current availability |
11
13
  | --- | --- | --- |
12
14
  | PostgreSQL | Supported | Available today |
13
15
  | MySQL | Supported | Available today |
16
+ | Hive | Supported | Available today |
14
17
  | SQLite | Candidate | Not supported yet |
15
18
  | SQL Server | Candidate | Not supported yet |
16
19
  | ClickHouse | Candidate | Not supported yet |
@@ -26,49 +29,66 @@ without exposing raw connection strings or flattening engine-specific concepts.
26
29
 
27
30
  ## What AI can do with it
28
31
 
29
- The current tool set focuses on database discovery and controlled query
30
- workflows. You can use it to help an AI assistant understand structure before
31
- it generates or refines SQL.
32
-
33
- MySQL supports `explain_query`, but not `explain_query(..., analyze=True)` in
34
- the current implementation.
35
-
36
- | Tool | PostgreSQL | MySQL | Purpose |
37
- | --- | --- | --- | --- |
38
- | `list_connections()` | Yes | Yes | List configured connections |
39
- | `list_schemas(connection_id)` | Yes | No | List visible PostgreSQL schemas |
40
- | `list_databases(connection_id)` | No | Yes | List visible MySQL databases |
41
- | `list_tables(connection_id, schema?, database?)` | Yes | Yes | List tables and views |
42
- | `describe_table(connection_id, table_name, schema?, database?)` | Yes | Yes | Inspect columns, keys, and indexes |
43
- | `run_select(connection_id, sql, limit?)` | Yes | Yes | Run read-only queries |
44
- | `explain_query(connection_id, sql, analyze?)` | Yes | Yes | Inspect query plans |
45
- | `get_table_sample(connection_id, table_name, schema?, database?, limit?)` | Yes | Yes | Fetch small table samples |
32
+ The current tool set focuses on database discovery, controlled query workflows,
33
+ asynchronous read-only queries, and one narrow local file import path. You can
34
+ use it to help an AI assistant understand structure before it generates SQL,
35
+ runs a bounded query, starts a long-running read-only query, or imports a
36
+ prepared CSV/XLSX file into an existing table.
37
+
38
+ MySQL and Hive support `explain_query`. Hive uses `EXPLAIN` and
39
+ `EXPLAIN ANALYZE` for `explain_query`.
40
+
41
+ | Tool | PostgreSQL | MySQL | Hive | Purpose |
42
+ | --- | --- | --- | --- | --- |
43
+ | `list_connections()` | Yes | Yes | Yes | List configured connections |
44
+ | `list_schemas(connection_id)` | Yes | No | No | List visible PostgreSQL schemas |
45
+ | `list_databases(connection_id)` | No | Yes | Yes | List visible MySQL or Hive databases |
46
+ | `list_tables(connection_id, schema?, database?)` | Yes | Yes | Yes | List tables and views |
47
+ | `describe_table(connection_id, table_name, schema?, database?)` | Yes | Yes | Yes | Inspect columns, keys, and indexes |
48
+ | `run_select(connection_id, sql, limit?)` | Yes | Yes | Yes | Run short bounded read-only queries |
49
+ | `start_query(connection_id, sql, limit?)` | Yes | Yes | Yes | Start long-running read-only queries |
50
+ | `get_query(query_id, offset?, limit?)` | Yes | Yes | Yes | Fetch async query status and paginated results |
51
+ | `cancel_query(query_id)` | Yes | Yes | Yes | Cancel running async queries |
52
+ | `explain_query(connection_id, sql, analyze?)` | Yes | Yes | Yes | Inspect query plans |
53
+ | `get_table_sample(connection_id, table_name, schema?, database?, limit?)` | Yes | Yes | Yes | Fetch small table samples |
54
+ | `import_table_file(connection_id, table_name, file_path, schema?, database?, sheet_name?)` | Yes | Yes | Yes | Import local CSV/XLSX files |
46
55
 
47
56
  These tools are useful for tasks such as listing namespaces, inspecting table
48
- definitions, reviewing indexes, sampling records, and analyzing read-only
49
- queries with `EXPLAIN`. For full request and response details, see
57
+ definitions, reviewing indexes, sampling records, running short read-only
58
+ queries with `run_select`, running long read-only queries with `start_query`,
59
+ `get_query`, and `cancel_query`, analyzing read-only queries with `EXPLAIN`, and
60
+ importing prepared local files. For full request and response details, see
50
61
  `docs/api-reference.md` (Chinese).
51
62
 
52
63
  ## How boundaries are constrained
53
64
 
54
- The product boundary is intentionally narrow today. Only PostgreSQL and MySQL
55
- are available today, and the current tool set is fully read-only.
65
+ The product boundary is intentionally narrow today. PostgreSQL, MySQL, and Hive
66
+ are available today. Query tools remain read-only, and the only write path is a
67
+ controlled local CSV/XLSX import into existing tables.
56
68
 
57
69
  The service keeps those boundaries explicit in a few ways.
58
70
 
59
71
  - Connections declare `engine` explicitly, so the server never guesses from
60
72
  `connection_id`.
61
- - PostgreSQL uses `schema`, and MySQL uses `database`, without collapsing both
62
- into one vague namespace field.
73
+ - PostgreSQL uses `schema`, while MySQL and Hive use `database`, without
74
+ collapsing both into one vague namespace field.
63
75
  - Real DSNs stay in environment variables, while config files store only the
64
76
  environment variable names.
65
77
  - Query execution passes through `sqlglot` validation before reaching the
66
- database.
78
+ database. Use `run_select` for short bounded read-only queries, and use
79
+ `start_query`, `get_query`, and `cancel_query` for long-running read-only
80
+ queries.
67
81
  - The server accepts only `SELECT` and `WITH ... SELECT`, rejects comments and
68
82
  multi-statement input, and records audit logs for each call.
83
+ - `import_table_file` doesn't accept raw SQL. It inserts only file columns whose
84
+ headers exactly match existing table columns.
85
+ - Hive `import_table_file` is intended for small files only and rejects files
86
+ with more than 1000 data rows. Hive imports write rows one by one, so they
87
+ can be slow and can hit your MCP client's tool timeout. For bulk Hive loads,
88
+ use Hive-native `LOAD DATA`, external tables, or your existing data ingestion
89
+ pipeline.
69
90
 
70
- For MySQL, `explain_query(..., analyze=True)` is not available in the current
71
- implementation.
91
+ For Hive, `explain_query` uses `EXPLAIN` and `EXPLAIN ANALYZE`.
72
92
 
73
93
  ## Quick start
74
94
 
@@ -159,11 +179,28 @@ The example config looks like this.
159
179
  "dsn_env": "MYSQL_CONN_CRM_PROD_MAIN_RO",
160
180
  "enabled": true,
161
181
  "default_database": "crm"
182
+ },
183
+ {
184
+ "connection_id": "warehouse_hive_prod_main_ro",
185
+ "engine": "hive",
186
+ "label": "Warehouse Hive production / Main / read-only",
187
+ "env": "prod",
188
+ "tenant": "main",
189
+ "role": "ro",
190
+ "dsn_env": "HIVE_CONN_WAREHOUSE_PROD_MAIN_RO",
191
+ "enabled": true,
192
+ "default_database": "default"
162
193
  }
163
194
  ]
164
195
  }
165
196
  ```
166
197
 
198
+ Set DSNs in the MCP client environment. For Hive, use a Hive DSN such as:
199
+
200
+ ```bash
201
+ export HIVE_CONN_WAREHOUSE_PROD_MAIN_RO='hive://user:password@hive.example.com:10000/default?auth=CUSTOM'
202
+ ```
203
+
167
204
  ## Documentation
168
205
 
169
206
  If you want implementation details, setup guidance, or internal structure, use
@@ -196,7 +233,7 @@ The main entry point is `sql_query_mcp/app.py`. Core modules include:
196
233
  - `sql_query_mcp/validator.py`: read-only SQL validation
197
234
  - `sql_query_mcp/introspection.py`: metadata inspection
198
235
  - `sql_query_mcp/executor.py`: query execution and limits
199
- - `sql_query_mcp/adapters/`: PostgreSQL and MySQL adapters
236
+ - `sql_query_mcp/adapters/`: PostgreSQL, MySQL, and Hive adapters
200
237
 
201
238
  ## Contributing
202
239
 
@@ -4,14 +4,14 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "sql-query-mcp"
7
- version = "0.1.4"
7
+ version = "0.3.0"
8
8
  description = "Read-only SQL MCP server for PostgreSQL and MySQL."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
11
11
  license = "MIT"
12
12
  license-files = ["LICENSE"]
13
13
  authors = [{ name = "Andy Wang" }]
14
- keywords = ["mcp", "mcp-server", "sql", "database", "postgresql", "mysql", "cli", "codex", "chatgpt"]
14
+ keywords = ["mcp", "mcp-server", "sql", "database", "postgresql", "mysql", "hive", "cli", "codex", "chatgpt"]
15
15
  classifiers = [
16
16
  "Programming Language :: Python :: 3",
17
17
  "Programming Language :: Python :: 3 :: Only",
@@ -24,6 +24,8 @@ classifiers = [
24
24
  ]
25
25
  dependencies = [
26
26
  "mcp>=1.12.4",
27
+ "openpyxl>=3.1",
28
+ "PyHive[hive_pure_sasl]>=0.7",
27
29
  "PyMySQL>=1.1",
28
30
  "psycopg[binary]>=3.2",
29
31
  "psycopg-pool>=3.2",
@@ -0,0 +1,7 @@
1
+ """Database adapters."""
2
+
3
+ from .hive import HiveAdapter
4
+ from .mysql import MySQLAdapter
5
+ from .postgres import PostgresAdapter
6
+
7
+ __all__ = ["HiveAdapter", "MySQLAdapter", "PostgresAdapter"]
@@ -0,0 +1,147 @@
1
+ """Hive adapter."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from contextlib import contextmanager
6
+ from typing import Iterator, List
7
+ from urllib.parse import parse_qs, unquote, urlparse
8
+
9
+ try:
10
+ from pyhive import hive
11
+ except ImportError: # pragma: no cover - runtime dependency
12
+ hive = None
13
+
14
+ from ..errors import ConfigurationError
15
+
16
+
17
+ class HiveAdapter:
18
+ engine = "hive"
19
+
20
+ @contextmanager
21
+ def connection(self, connection_id: str, dsn: str) -> Iterator[object]:
22
+ if hive is None:
23
+ raise ConfigurationError("缺少 PyHive 依赖,请先安装项目依赖。")
24
+
25
+ conn = hive.Connection(**self._parse_dsn(dsn))
26
+ try:
27
+ yield conn
28
+ finally:
29
+ conn.close()
30
+
31
+ def close(self) -> None:
32
+ return None
33
+
34
+ def set_statement_timeout(self, conn: object, timeout_ms: int) -> None:
35
+ return None
36
+
37
+ def build_sample_query(self, database: str, table_name: str, sentinel_limit: int) -> str:
38
+ return f"SELECT * FROM {self._qualified_table(database, table_name)} LIMIT {int(sentinel_limit)}"
39
+
40
+ def build_insert_query(self, database: str, table_name: str, columns: List[str]) -> str:
41
+ quoted_columns = ", ".join(self._quote_identifier(column) for column in columns)
42
+ placeholders = ", ".join(["%s"] * len(columns))
43
+ return f"INSERT INTO {self._qualified_table(database, table_name)} ({quoted_columns}) VALUES ({placeholders})"
44
+
45
+ def build_explain_query(self, sql_text: str, analyze: bool = False) -> str:
46
+ prefix = "EXPLAIN ANALYZE" if analyze else "EXPLAIN"
47
+ return f"{prefix} {sql_text}"
48
+
49
+ def extract_plan(self, rows):
50
+ return [self._first_value(row) for row in rows]
51
+
52
+ def column_names(self, description) -> List[str]:
53
+ return [column[0] for column in (description or [])]
54
+
55
+ def normalize_rows(self, rows, columns: List[str]) -> List[dict]:
56
+ return [dict(zip(columns, row)) for row in rows]
57
+
58
+ def list_databases(self, conn: object) -> List[str]:
59
+ with conn.cursor() as cur:
60
+ cur.execute("SHOW DATABASES")
61
+ return [self._first_value(row) for row in cur.fetchall()]
62
+
63
+ def list_tables(self, conn: object, database: str):
64
+ with conn.cursor() as cur:
65
+ cur.execute(f"SHOW TABLES IN {self._quote_identifier(database)}")
66
+ return [
67
+ {
68
+ "database_name": database,
69
+ "table_name": self._first_value(row),
70
+ "table_type": None,
71
+ }
72
+ for row in cur.fetchall()
73
+ ]
74
+
75
+ def describe_table(self, conn: object, database: str, table_name: str):
76
+ with conn.cursor() as cur:
77
+ cur.execute(f"DESCRIBE {self._qualified_table(database, table_name)}")
78
+ rows = cur.fetchall()
79
+
80
+ columns = []
81
+ in_partitions = False
82
+ for row in rows:
83
+ name = self._first_value(row)
84
+ if not name:
85
+ continue
86
+ if str(name).startswith("# Partition Information"):
87
+ in_partitions = True
88
+ continue
89
+ if str(name).startswith("#"):
90
+ continue
91
+ values = self._row_values(row)
92
+ data_type = values[1] if len(values) > 1 else None
93
+ comment = values[2] if len(values) > 2 else None
94
+ columns.append(
95
+ {
96
+ "column_name": name,
97
+ "data_type": data_type,
98
+ "udt_name": None,
99
+ "nullable": True,
100
+ "default": None,
101
+ "primary_key": False,
102
+ "extra": comment,
103
+ "partition_key": in_partitions,
104
+ }
105
+ )
106
+
107
+ if not columns:
108
+ return None
109
+ return {"columns": columns, "indexes": []}
110
+
111
+ def _parse_dsn(self, dsn: str) -> dict:
112
+ parsed = urlparse(dsn)
113
+ if parsed.scheme not in {"hive", "hive+pyhive"}:
114
+ raise ConfigurationError(f"Hive DSN 必须使用 hive:// 或 hive+pyhive://,当前为 {parsed.scheme}")
115
+
116
+ supported_query_keys = {"auth", "kerberos_service_name", "password"}
117
+ query_params = {key: values[-1] for key, values in parse_qs(parsed.query).items()}
118
+ unsupported = sorted(set(query_params) - supported_query_keys)
119
+ if unsupported:
120
+ raise ConfigurationError(f"Hive DSN 包含暂不支持的参数: {unsupported}")
121
+
122
+ connect_args = {
123
+ "host": parsed.hostname or "localhost",
124
+ "port": parsed.port or 10000,
125
+ "username": unquote(parsed.username) if parsed.username else None,
126
+ "password": unquote(parsed.password) if parsed.password else query_params.get("password"),
127
+ "database": parsed.path.lstrip("/") or None,
128
+ "auth": query_params.get("auth"),
129
+ "kerberos_service_name": query_params.get("kerberos_service_name"),
130
+ }
131
+ return {key: value for key, value in connect_args.items() if value is not None}
132
+
133
+ def _quote_identifier(self, value: str) -> str:
134
+ return "`" + value.replace("`", "``") + "`"
135
+
136
+ def _qualified_table(self, database: str, table_name: str) -> str:
137
+ return f"{self._quote_identifier(database)}.{self._quote_identifier(table_name)}"
138
+
139
+ def _first_value(self, row):
140
+ if isinstance(row, dict):
141
+ return next(iter(row.values()))
142
+ return row[0]
143
+
144
+ def _row_values(self, row):
145
+ if isinstance(row, dict):
146
+ return list(row.values())
147
+ return list(row)
@@ -115,6 +115,14 @@ class MySQLAdapter:
115
115
  f"{self._quote_identifier(table_name)} LIMIT {int(sentinel_limit)}"
116
116
  )
117
117
 
118
+ def build_insert_query(self, database: str, table_name: str, columns: List[str]) -> str:
119
+ quoted_columns = ", ".join(self._quote_identifier(column) for column in columns)
120
+ placeholders = ", ".join(["%s"] * len(columns))
121
+ return (
122
+ f"INSERT INTO {self._quote_identifier(database)}."
123
+ f"{self._quote_identifier(table_name)} ({quoted_columns}) VALUES ({placeholders})"
124
+ )
125
+
118
126
  def build_explain_query(self, sql_text: str, analyze: bool = False) -> str:
119
127
  if analyze:
120
128
  raise SecurityError("MySQL 首版不支持 analyze=True。")
@@ -155,6 +155,16 @@ class PostgresAdapter:
155
155
  sql.Literal(sentinel_limit),
156
156
  )
157
157
 
158
+ def build_insert_query(self, schema: str, table_name: str, columns: List[str]):
159
+ if sql is None:
160
+ raise ConfigurationError("缺少 psycopg 依赖,请先安装项目依赖。")
161
+ return sql.SQL("INSERT INTO {}.{} ({}) VALUES ({})").format(
162
+ sql.Identifier(schema),
163
+ sql.Identifier(table_name),
164
+ sql.SQL(", ").join(sql.Identifier(column) for column in columns),
165
+ sql.SQL(", ").join(sql.Placeholder() for _ in columns),
166
+ )
167
+
158
168
  def build_explain_query(self, sql_text: str, analyze: bool = False) -> str:
159
169
  return f"EXPLAIN (FORMAT JSON, ANALYZE {'TRUE' if analyze else 'FALSE'}) {sql_text}"
160
170
 
@@ -6,10 +6,12 @@ from typing import Optional
6
6
 
7
7
  from mcp.server.fastmcp import FastMCP
8
8
 
9
+ from .async_queries import AsyncQueryService
9
10
  from .audit import AuditLogger
10
11
  from .config import load_config
11
12
  from .errors import SqlQueryMCPError
12
13
  from .executor import QueryExecutor
14
+ from .importer import TableFileImporter
13
15
  from .introspection import MetadataService
14
16
  from .registry import ConnectionRegistry
15
17
 
@@ -20,6 +22,8 @@ def create_app() -> FastMCP:
20
22
  audit_logger = AuditLogger(app_config.settings.audit_log_path)
21
23
  metadata = MetadataService(registry, app_config.settings, audit_logger)
22
24
  executor = QueryExecutor(registry, app_config.settings, audit_logger)
25
+ importer = TableFileImporter(registry, app_config.settings, audit_logger)
26
+ async_queries = AsyncQueryService(registry, app_config.settings, audit_logger)
23
27
 
24
28
  mcp = FastMCP("sql-query-mcp", json_response=True)
25
29
 
@@ -37,7 +41,7 @@ def create_app() -> FastMCP:
37
41
 
38
42
  @mcp.tool()
39
43
  def list_databases(connection_id: str) -> dict:
40
- """List visible databases for a MySQL connection."""
44
+ """List visible databases for a MySQL or Hive connection."""
41
45
 
42
46
  return _run_tool(lambda: metadata.list_databases(connection_id))
43
47
 
@@ -47,7 +51,7 @@ def create_app() -> FastMCP:
47
51
  schema: Optional[str] = None,
48
52
  database: Optional[str] = None,
49
53
  ) -> dict:
50
- """List tables and views for a resolved PostgreSQL schema or MySQL database."""
54
+ """List tables and views for a resolved schema or database."""
51
55
 
52
56
  return _run_tool(lambda: metadata.list_tables(connection_id, schema, database))
53
57
 
@@ -86,6 +90,46 @@ def create_app() -> FastMCP:
86
90
 
87
91
  return _run_tool(lambda: executor.get_table_sample(connection_id, table_name, schema, database, limit))
88
92
 
93
+ @mcp.tool()
94
+ def start_query(connection_id: str, sql: str, limit: Optional[int] = None) -> dict:
95
+ """Start an asynchronous read-only SELECT or CTE query."""
96
+
97
+ return _run_tool(lambda: async_queries.start_query(connection_id, sql, limit))
98
+
99
+ @mcp.tool()
100
+ def get_query(query_id: str, offset: int = 0, limit: Optional[int] = None) -> dict:
101
+ """Get asynchronous query status and paginated results when complete."""
102
+
103
+ return _run_tool(lambda: async_queries.get_query(query_id, offset, limit))
104
+
105
+ @mcp.tool()
106
+ def cancel_query(query_id: str) -> dict:
107
+ """Cancel a running asynchronous query."""
108
+
109
+ return _run_tool(lambda: async_queries.cancel_query(query_id))
110
+
111
+ @mcp.tool()
112
+ def import_table_file(
113
+ connection_id: str,
114
+ table_name: str,
115
+ file_path: str,
116
+ schema: Optional[str] = None,
117
+ database: Optional[str] = None,
118
+ sheet_name: Optional[str] = None,
119
+ ) -> dict:
120
+ """Import a local CSV or XLSX file into an existing table."""
121
+
122
+ return _run_tool(
123
+ lambda: importer.import_table_file(
124
+ connection_id,
125
+ table_name,
126
+ file_path,
127
+ schema,
128
+ database,
129
+ sheet_name,
130
+ )
131
+ )
132
+
89
133
  return mcp
90
134
 
91
135