sql-query-mcp 0.2.0__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sql_query_mcp-0.2.0/sql_query_mcp.egg-info → sql_query_mcp-0.3.0}/PKG-INFO +61 -30
- {sql_query_mcp-0.2.0 → sql_query_mcp-0.3.0}/README.md +58 -28
- {sql_query_mcp-0.2.0 → sql_query_mcp-0.3.0}/pyproject.toml +3 -2
- sql_query_mcp-0.3.0/sql_query_mcp/adapters/__init__.py +7 -0
- sql_query_mcp-0.3.0/sql_query_mcp/adapters/hive.py +147 -0
- {sql_query_mcp-0.2.0 → sql_query_mcp-0.3.0}/sql_query_mcp/app.py +22 -2
- sql_query_mcp-0.3.0/sql_query_mcp/async_queries.py +388 -0
- {sql_query_mcp-0.2.0 → sql_query_mcp-0.3.0}/sql_query_mcp/config.py +4 -4
- {sql_query_mcp-0.2.0 → sql_query_mcp-0.3.0}/sql_query_mcp/executor.py +7 -1
- {sql_query_mcp-0.2.0 → sql_query_mcp-0.3.0}/sql_query_mcp/importer.py +12 -0
- {sql_query_mcp-0.2.0 → sql_query_mcp-0.3.0}/sql_query_mcp/introspection.py +3 -2
- {sql_query_mcp-0.2.0 → sql_query_mcp-0.3.0}/sql_query_mcp/namespace.py +8 -0
- {sql_query_mcp-0.2.0 → sql_query_mcp-0.3.0}/sql_query_mcp/registry.py +2 -1
- {sql_query_mcp-0.2.0 → sql_query_mcp-0.3.0}/sql_query_mcp/validator.py +6 -3
- {sql_query_mcp-0.2.0 → sql_query_mcp-0.3.0/sql_query_mcp.egg-info}/PKG-INFO +61 -30
- {sql_query_mcp-0.2.0 → sql_query_mcp-0.3.0}/sql_query_mcp.egg-info/SOURCES.txt +3 -0
- {sql_query_mcp-0.2.0 → sql_query_mcp-0.3.0}/sql_query_mcp.egg-info/requires.txt +1 -0
- {sql_query_mcp-0.2.0 → sql_query_mcp-0.3.0}/tests/test_app.py +10 -0
- sql_query_mcp-0.3.0/tests/test_async_queries.py +367 -0
- {sql_query_mcp-0.2.0 → sql_query_mcp-0.3.0}/tests/test_config.py +46 -0
- {sql_query_mcp-0.2.0 → sql_query_mcp-0.3.0}/tests/test_executor.py +100 -0
- {sql_query_mcp-0.2.0 → sql_query_mcp-0.3.0}/tests/test_importer.py +121 -0
- {sql_query_mcp-0.2.0 → sql_query_mcp-0.3.0}/tests/test_metadata.py +23 -0
- {sql_query_mcp-0.2.0 → sql_query_mcp-0.3.0}/tests/test_namespace.py +39 -0
- {sql_query_mcp-0.2.0 → sql_query_mcp-0.3.0}/tests/test_registry.py +20 -0
- sql_query_mcp-0.3.0/tests/test_validator.py +327 -0
- sql_query_mcp-0.2.0/sql_query_mcp/adapters/__init__.py +0 -15
- sql_query_mcp-0.2.0/tests/test_validator.py +0 -141
- {sql_query_mcp-0.2.0 → sql_query_mcp-0.3.0}/LICENSE +0 -0
- {sql_query_mcp-0.2.0 → sql_query_mcp-0.3.0}/setup.cfg +0 -0
- {sql_query_mcp-0.2.0 → sql_query_mcp-0.3.0}/sql_query_mcp/__init__.py +0 -0
- {sql_query_mcp-0.2.0 → sql_query_mcp-0.3.0}/sql_query_mcp/__main__.py +0 -0
- {sql_query_mcp-0.2.0 → sql_query_mcp-0.3.0}/sql_query_mcp/adapters/mysql.py +0 -0
- {sql_query_mcp-0.2.0 → sql_query_mcp-0.3.0}/sql_query_mcp/adapters/postgres.py +0 -0
- {sql_query_mcp-0.2.0 → sql_query_mcp-0.3.0}/sql_query_mcp/audit.py +0 -0
- {sql_query_mcp-0.2.0 → sql_query_mcp-0.3.0}/sql_query_mcp/errors.py +0 -0
- {sql_query_mcp-0.2.0 → sql_query_mcp-0.3.0}/sql_query_mcp/release_metadata.py +0 -0
- {sql_query_mcp-0.2.0 → sql_query_mcp-0.3.0}/sql_query_mcp.egg-info/dependency_links.txt +0 -0
- {sql_query_mcp-0.2.0 → sql_query_mcp-0.3.0}/sql_query_mcp.egg-info/entry_points.txt +0 -0
- {sql_query_mcp-0.2.0 → sql_query_mcp-0.3.0}/sql_query_mcp.egg-info/top_level.txt +0 -0
- {sql_query_mcp-0.2.0 → sql_query_mcp-0.3.0}/tests/test_audit.py +0 -0
- {sql_query_mcp-0.2.0 → sql_query_mcp-0.3.0}/tests/test_release_metadata.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sql-query-mcp
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: Read-only SQL MCP server for PostgreSQL and MySQL.
|
|
5
5
|
Author: Andy Wang
|
|
6
6
|
License-Expression: MIT
|
|
@@ -8,7 +8,7 @@ Project-URL: Homepage, https://github.com/andyWang1688/sql-query-mcp
|
|
|
8
8
|
Project-URL: Repository, https://github.com/andyWang1688/sql-query-mcp
|
|
9
9
|
Project-URL: Documentation, https://github.com/andyWang1688/sql-query-mcp/blob/main/README.md
|
|
10
10
|
Project-URL: Issues, https://github.com/andyWang1688/sql-query-mcp/issues
|
|
11
|
-
Keywords: mcp,mcp-server,sql,database,postgresql,mysql,cli,codex,chatgpt
|
|
11
|
+
Keywords: mcp,mcp-server,sql,database,postgresql,mysql,hive,cli,codex,chatgpt
|
|
12
12
|
Classifier: Programming Language :: Python :: 3
|
|
13
13
|
Classifier: Programming Language :: Python :: 3 :: Only
|
|
14
14
|
Classifier: Programming Language :: Python :: 3.10
|
|
@@ -22,6 +22,7 @@ Description-Content-Type: text/markdown
|
|
|
22
22
|
License-File: LICENSE
|
|
23
23
|
Requires-Dist: mcp>=1.12.4
|
|
24
24
|
Requires-Dist: openpyxl>=3.1
|
|
25
|
+
Requires-Dist: PyHive[hive_pure_sasl]>=0.7
|
|
25
26
|
Requires-Dist: PyMySQL>=1.1
|
|
26
27
|
Requires-Dist: psycopg[binary]>=3.2
|
|
27
28
|
Requires-Dist: psycopg-pool>=3.2
|
|
@@ -44,6 +45,7 @@ clear boundaries.
|
|
|
44
45
|
| --- | --- | --- |
|
|
45
46
|
| PostgreSQL | Supported | Available today |
|
|
46
47
|
| MySQL | Supported | Available today |
|
|
48
|
+
| Hive | Supported | Available today |
|
|
47
49
|
| SQLite | Candidate | Not supported yet |
|
|
48
50
|
| SQL Server | Candidate | Not supported yet |
|
|
49
51
|
| ClickHouse | Candidate | Not supported yet |
|
|
@@ -60,33 +62,39 @@ without exposing raw connection strings or flattening engine-specific concepts.
|
|
|
60
62
|
## What AI can do with it
|
|
61
63
|
|
|
62
64
|
The current tool set focuses on database discovery, controlled query workflows,
|
|
63
|
-
and one narrow local file import path. You can
|
|
64
|
-
understand structure before it generates SQL
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
|
72
|
-
|
|
|
73
|
-
| `
|
|
74
|
-
| `
|
|
75
|
-
| `
|
|
76
|
-
| `
|
|
77
|
-
| `
|
|
78
|
-
| `
|
|
79
|
-
| `
|
|
80
|
-
| `
|
|
65
|
+
asynchronous read-only queries, and one narrow local file import path. You can
|
|
66
|
+
use it to help an AI assistant understand structure before it generates SQL,
|
|
67
|
+
runs a bounded query, starts a long-running read-only query, or imports a
|
|
68
|
+
prepared CSV/XLSX file into an existing table.
|
|
69
|
+
|
|
70
|
+
MySQL and Hive support `explain_query`. Hive uses `EXPLAIN` and
|
|
71
|
+
`EXPLAIN ANALYZE` for `explain_query`.
|
|
72
|
+
|
|
73
|
+
| Tool | PostgreSQL | MySQL | Hive | Purpose |
|
|
74
|
+
| --- | --- | --- | --- | --- |
|
|
75
|
+
| `list_connections()` | Yes | Yes | Yes | List configured connections |
|
|
76
|
+
| `list_schemas(connection_id)` | Yes | No | No | List visible PostgreSQL schemas |
|
|
77
|
+
| `list_databases(connection_id)` | No | Yes | Yes | List visible MySQL or Hive databases |
|
|
78
|
+
| `list_tables(connection_id, schema?, database?)` | Yes | Yes | Yes | List tables and views |
|
|
79
|
+
| `describe_table(connection_id, table_name, schema?, database?)` | Yes | Yes | Yes | Inspect columns, keys, and indexes |
|
|
80
|
+
| `run_select(connection_id, sql, limit?)` | Yes | Yes | Yes | Run short bounded read-only queries |
|
|
81
|
+
| `start_query(connection_id, sql, limit?)` | Yes | Yes | Yes | Start long-running read-only queries |
|
|
82
|
+
| `get_query(query_id, offset?, limit?)` | Yes | Yes | Yes | Fetch async query status and paginated results |
|
|
83
|
+
| `cancel_query(query_id)` | Yes | Yes | Yes | Cancel running async queries |
|
|
84
|
+
| `explain_query(connection_id, sql, analyze?)` | Yes | Yes | Yes | Inspect query plans |
|
|
85
|
+
| `get_table_sample(connection_id, table_name, schema?, database?, limit?)` | Yes | Yes | Yes | Fetch small table samples |
|
|
86
|
+
| `import_table_file(connection_id, table_name, file_path, schema?, database?, sheet_name?)` | Yes | Yes | Yes | Import local CSV/XLSX files |
|
|
81
87
|
|
|
82
88
|
These tools are useful for tasks such as listing namespaces, inspecting table
|
|
83
|
-
definitions, reviewing indexes, sampling records,
|
|
84
|
-
with `
|
|
85
|
-
|
|
89
|
+
definitions, reviewing indexes, sampling records, running short read-only
|
|
90
|
+
queries with `run_select`, running long read-only queries with `start_query`,
|
|
91
|
+
`get_query`, and `cancel_query`, analyzing read-only queries with `EXPLAIN`, and
|
|
92
|
+
importing prepared local files. For full request and response details, see
|
|
93
|
+
`docs/api-reference.md` (Chinese).
|
|
86
94
|
|
|
87
95
|
## How boundaries are constrained
|
|
88
96
|
|
|
89
|
-
The product boundary is intentionally narrow today.
|
|
97
|
+
The product boundary is intentionally narrow today. PostgreSQL, MySQL, and Hive
|
|
90
98
|
are available today. Query tools remain read-only, and the only write path is a
|
|
91
99
|
controlled local CSV/XLSX import into existing tables.
|
|
92
100
|
|
|
@@ -94,19 +102,25 @@ The service keeps those boundaries explicit in a few ways.
|
|
|
94
102
|
|
|
95
103
|
- Connections declare `engine` explicitly, so the server never guesses from
|
|
96
104
|
`connection_id`.
|
|
97
|
-
- PostgreSQL uses `schema`,
|
|
98
|
-
into one vague namespace field.
|
|
105
|
+
- PostgreSQL uses `schema`, while MySQL and Hive use `database`, without
|
|
106
|
+
collapsing both into one vague namespace field.
|
|
99
107
|
- Real DSNs stay in environment variables, while config files store only the
|
|
100
108
|
environment variable names.
|
|
101
109
|
- Query execution passes through `sqlglot` validation before reaching the
|
|
102
|
-
database.
|
|
110
|
+
database. Use `run_select` for short bounded read-only queries, and use
|
|
111
|
+
`start_query`, `get_query`, and `cancel_query` for long-running read-only
|
|
112
|
+
queries.
|
|
103
113
|
- The server accepts only `SELECT` and `WITH ... SELECT`, rejects comments and
|
|
104
114
|
multi-statement input, and records audit logs for each call.
|
|
105
115
|
- `import_table_file` doesn't accept raw SQL. It inserts only file columns whose
|
|
106
116
|
headers exactly match existing table columns.
|
|
117
|
+
- Hive `import_table_file` is intended for small files only and rejects files
|
|
118
|
+
with more than 1000 data rows. Hive imports write rows one by one, so they
|
|
119
|
+
can be slow and can hit your MCP client's tool timeout. For bulk Hive loads,
|
|
120
|
+
use Hive-native `LOAD DATA`, external tables, or your existing data ingestion
|
|
121
|
+
pipeline.
|
|
107
122
|
|
|
108
|
-
For
|
|
109
|
-
implementation.
|
|
123
|
+
For Hive, `explain_query` uses `EXPLAIN` and `EXPLAIN ANALYZE`.
|
|
110
124
|
|
|
111
125
|
## Quick start
|
|
112
126
|
|
|
@@ -197,11 +211,28 @@ The example config looks like this.
|
|
|
197
211
|
"dsn_env": "MYSQL_CONN_CRM_PROD_MAIN_RO",
|
|
198
212
|
"enabled": true,
|
|
199
213
|
"default_database": "crm"
|
|
214
|
+
},
|
|
215
|
+
{
|
|
216
|
+
"connection_id": "warehouse_hive_prod_main_ro",
|
|
217
|
+
"engine": "hive",
|
|
218
|
+
"label": "Warehouse Hive production / Main / read-only",
|
|
219
|
+
"env": "prod",
|
|
220
|
+
"tenant": "main",
|
|
221
|
+
"role": "ro",
|
|
222
|
+
"dsn_env": "HIVE_CONN_WAREHOUSE_PROD_MAIN_RO",
|
|
223
|
+
"enabled": true,
|
|
224
|
+
"default_database": "default"
|
|
200
225
|
}
|
|
201
226
|
]
|
|
202
227
|
}
|
|
203
228
|
```
|
|
204
229
|
|
|
230
|
+
Set DSNs in the MCP client environment. For Hive, use a Hive DSN such as:
|
|
231
|
+
|
|
232
|
+
```bash
|
|
233
|
+
export HIVE_CONN_WAREHOUSE_PROD_MAIN_RO='hive://user:password@hive.example.com:10000/default?auth=CUSTOM'
|
|
234
|
+
```
|
|
235
|
+
|
|
205
236
|
## Documentation
|
|
206
237
|
|
|
207
238
|
If you want implementation details, setup guidance, or internal structure, use
|
|
@@ -234,7 +265,7 @@ The main entry point is `sql_query_mcp/app.py`. Core modules include:
|
|
|
234
265
|
- `sql_query_mcp/validator.py`: read-only SQL validation
|
|
235
266
|
- `sql_query_mcp/introspection.py`: metadata inspection
|
|
236
267
|
- `sql_query_mcp/executor.py`: query execution and limits
|
|
237
|
-
- `sql_query_mcp/adapters/`: PostgreSQL and
|
|
268
|
+
- `sql_query_mcp/adapters/`: PostgreSQL, MySQL, and Hive adapters
|
|
238
269
|
|
|
239
270
|
## Contributing
|
|
240
271
|
|
|
@@ -13,6 +13,7 @@ clear boundaries.
|
|
|
13
13
|
| --- | --- | --- |
|
|
14
14
|
| PostgreSQL | Supported | Available today |
|
|
15
15
|
| MySQL | Supported | Available today |
|
|
16
|
+
| Hive | Supported | Available today |
|
|
16
17
|
| SQLite | Candidate | Not supported yet |
|
|
17
18
|
| SQL Server | Candidate | Not supported yet |
|
|
18
19
|
| ClickHouse | Candidate | Not supported yet |
|
|
@@ -29,33 +30,39 @@ without exposing raw connection strings or flattening engine-specific concepts.
|
|
|
29
30
|
## What AI can do with it
|
|
30
31
|
|
|
31
32
|
The current tool set focuses on database discovery, controlled query workflows,
|
|
32
|
-
and one narrow local file import path. You can
|
|
33
|
-
understand structure before it generates SQL
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
|
41
|
-
|
|
|
42
|
-
| `
|
|
43
|
-
| `
|
|
44
|
-
| `
|
|
45
|
-
| `
|
|
46
|
-
| `
|
|
47
|
-
| `
|
|
48
|
-
| `
|
|
49
|
-
| `
|
|
33
|
+
asynchronous read-only queries, and one narrow local file import path. You can
|
|
34
|
+
use it to help an AI assistant understand structure before it generates SQL,
|
|
35
|
+
runs a bounded query, starts a long-running read-only query, or imports a
|
|
36
|
+
prepared CSV/XLSX file into an existing table.
|
|
37
|
+
|
|
38
|
+
MySQL and Hive support `explain_query`. Hive uses `EXPLAIN` and
|
|
39
|
+
`EXPLAIN ANALYZE` for `explain_query`.
|
|
40
|
+
|
|
41
|
+
| Tool | PostgreSQL | MySQL | Hive | Purpose |
|
|
42
|
+
| --- | --- | --- | --- | --- |
|
|
43
|
+
| `list_connections()` | Yes | Yes | Yes | List configured connections |
|
|
44
|
+
| `list_schemas(connection_id)` | Yes | No | No | List visible PostgreSQL schemas |
|
|
45
|
+
| `list_databases(connection_id)` | No | Yes | Yes | List visible MySQL or Hive databases |
|
|
46
|
+
| `list_tables(connection_id, schema?, database?)` | Yes | Yes | Yes | List tables and views |
|
|
47
|
+
| `describe_table(connection_id, table_name, schema?, database?)` | Yes | Yes | Yes | Inspect columns, keys, and indexes |
|
|
48
|
+
| `run_select(connection_id, sql, limit?)` | Yes | Yes | Yes | Run short bounded read-only queries |
|
|
49
|
+
| `start_query(connection_id, sql, limit?)` | Yes | Yes | Yes | Start long-running read-only queries |
|
|
50
|
+
| `get_query(query_id, offset?, limit?)` | Yes | Yes | Yes | Fetch async query status and paginated results |
|
|
51
|
+
| `cancel_query(query_id)` | Yes | Yes | Yes | Cancel running async queries |
|
|
52
|
+
| `explain_query(connection_id, sql, analyze?)` | Yes | Yes | Yes | Inspect query plans |
|
|
53
|
+
| `get_table_sample(connection_id, table_name, schema?, database?, limit?)` | Yes | Yes | Yes | Fetch small table samples |
|
|
54
|
+
| `import_table_file(connection_id, table_name, file_path, schema?, database?, sheet_name?)` | Yes | Yes | Yes | Import local CSV/XLSX files |
|
|
50
55
|
|
|
51
56
|
These tools are useful for tasks such as listing namespaces, inspecting table
|
|
52
|
-
definitions, reviewing indexes, sampling records,
|
|
53
|
-
with `
|
|
54
|
-
|
|
57
|
+
definitions, reviewing indexes, sampling records, running short read-only
|
|
58
|
+
queries with `run_select`, running long read-only queries with `start_query`,
|
|
59
|
+
`get_query`, and `cancel_query`, analyzing read-only queries with `EXPLAIN`, and
|
|
60
|
+
importing prepared local files. For full request and response details, see
|
|
61
|
+
`docs/api-reference.md` (Chinese).
|
|
55
62
|
|
|
56
63
|
## How boundaries are constrained
|
|
57
64
|
|
|
58
|
-
The product boundary is intentionally narrow today.
|
|
65
|
+
The product boundary is intentionally narrow today. PostgreSQL, MySQL, and Hive
|
|
59
66
|
are available today. Query tools remain read-only, and the only write path is a
|
|
60
67
|
controlled local CSV/XLSX import into existing tables.
|
|
61
68
|
|
|
@@ -63,19 +70,25 @@ The service keeps those boundaries explicit in a few ways.
|
|
|
63
70
|
|
|
64
71
|
- Connections declare `engine` explicitly, so the server never guesses from
|
|
65
72
|
`connection_id`.
|
|
66
|
-
- PostgreSQL uses `schema`,
|
|
67
|
-
into one vague namespace field.
|
|
73
|
+
- PostgreSQL uses `schema`, while MySQL and Hive use `database`, without
|
|
74
|
+
collapsing both into one vague namespace field.
|
|
68
75
|
- Real DSNs stay in environment variables, while config files store only the
|
|
69
76
|
environment variable names.
|
|
70
77
|
- Query execution passes through `sqlglot` validation before reaching the
|
|
71
|
-
database.
|
|
78
|
+
database. Use `run_select` for short bounded read-only queries, and use
|
|
79
|
+
`start_query`, `get_query`, and `cancel_query` for long-running read-only
|
|
80
|
+
queries.
|
|
72
81
|
- The server accepts only `SELECT` and `WITH ... SELECT`, rejects comments and
|
|
73
82
|
multi-statement input, and records audit logs for each call.
|
|
74
83
|
- `import_table_file` doesn't accept raw SQL. It inserts only file columns whose
|
|
75
84
|
headers exactly match existing table columns.
|
|
85
|
+
- Hive `import_table_file` is intended for small files only and rejects files
|
|
86
|
+
with more than 1000 data rows. Hive imports write rows one by one, so they
|
|
87
|
+
can be slow and can hit your MCP client's tool timeout. For bulk Hive loads,
|
|
88
|
+
use Hive-native `LOAD DATA`, external tables, or your existing data ingestion
|
|
89
|
+
pipeline.
|
|
76
90
|
|
|
77
|
-
For
|
|
78
|
-
implementation.
|
|
91
|
+
For Hive, `explain_query` uses `EXPLAIN` and `EXPLAIN ANALYZE`.
|
|
79
92
|
|
|
80
93
|
## Quick start
|
|
81
94
|
|
|
@@ -166,11 +179,28 @@ The example config looks like this.
|
|
|
166
179
|
"dsn_env": "MYSQL_CONN_CRM_PROD_MAIN_RO",
|
|
167
180
|
"enabled": true,
|
|
168
181
|
"default_database": "crm"
|
|
182
|
+
},
|
|
183
|
+
{
|
|
184
|
+
"connection_id": "warehouse_hive_prod_main_ro",
|
|
185
|
+
"engine": "hive",
|
|
186
|
+
"label": "Warehouse Hive production / Main / read-only",
|
|
187
|
+
"env": "prod",
|
|
188
|
+
"tenant": "main",
|
|
189
|
+
"role": "ro",
|
|
190
|
+
"dsn_env": "HIVE_CONN_WAREHOUSE_PROD_MAIN_RO",
|
|
191
|
+
"enabled": true,
|
|
192
|
+
"default_database": "default"
|
|
169
193
|
}
|
|
170
194
|
]
|
|
171
195
|
}
|
|
172
196
|
```
|
|
173
197
|
|
|
198
|
+
Set DSNs in the MCP client environment. For Hive, use a Hive DSN such as:
|
|
199
|
+
|
|
200
|
+
```bash
|
|
201
|
+
export HIVE_CONN_WAREHOUSE_PROD_MAIN_RO='hive://user:password@hive.example.com:10000/default?auth=CUSTOM'
|
|
202
|
+
```
|
|
203
|
+
|
|
174
204
|
## Documentation
|
|
175
205
|
|
|
176
206
|
If you want implementation details, setup guidance, or internal structure, use
|
|
@@ -203,7 +233,7 @@ The main entry point is `sql_query_mcp/app.py`. Core modules include:
|
|
|
203
233
|
- `sql_query_mcp/validator.py`: read-only SQL validation
|
|
204
234
|
- `sql_query_mcp/introspection.py`: metadata inspection
|
|
205
235
|
- `sql_query_mcp/executor.py`: query execution and limits
|
|
206
|
-
- `sql_query_mcp/adapters/`: PostgreSQL and
|
|
236
|
+
- `sql_query_mcp/adapters/`: PostgreSQL, MySQL, and Hive adapters
|
|
207
237
|
|
|
208
238
|
## Contributing
|
|
209
239
|
|
|
@@ -4,14 +4,14 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "sql-query-mcp"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.3.0"
|
|
8
8
|
description = "Read-only SQL MCP server for PostgreSQL and MySQL."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.10"
|
|
11
11
|
license = "MIT"
|
|
12
12
|
license-files = ["LICENSE"]
|
|
13
13
|
authors = [{ name = "Andy Wang" }]
|
|
14
|
-
keywords = ["mcp", "mcp-server", "sql", "database", "postgresql", "mysql", "cli", "codex", "chatgpt"]
|
|
14
|
+
keywords = ["mcp", "mcp-server", "sql", "database", "postgresql", "mysql", "hive", "cli", "codex", "chatgpt"]
|
|
15
15
|
classifiers = [
|
|
16
16
|
"Programming Language :: Python :: 3",
|
|
17
17
|
"Programming Language :: Python :: 3 :: Only",
|
|
@@ -25,6 +25,7 @@ classifiers = [
|
|
|
25
25
|
dependencies = [
|
|
26
26
|
"mcp>=1.12.4",
|
|
27
27
|
"openpyxl>=3.1",
|
|
28
|
+
"PyHive[hive_pure_sasl]>=0.7",
|
|
28
29
|
"PyMySQL>=1.1",
|
|
29
30
|
"psycopg[binary]>=3.2",
|
|
30
31
|
"psycopg-pool>=3.2",
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
"""Hive adapter."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from contextlib import contextmanager
|
|
6
|
+
from typing import Iterator, List
|
|
7
|
+
from urllib.parse import parse_qs, unquote, urlparse
|
|
8
|
+
|
|
9
|
+
try:
|
|
10
|
+
from pyhive import hive
|
|
11
|
+
except ImportError: # pragma: no cover - runtime dependency
|
|
12
|
+
hive = None
|
|
13
|
+
|
|
14
|
+
from ..errors import ConfigurationError
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class HiveAdapter:
|
|
18
|
+
engine = "hive"
|
|
19
|
+
|
|
20
|
+
@contextmanager
|
|
21
|
+
def connection(self, connection_id: str, dsn: str) -> Iterator[object]:
|
|
22
|
+
if hive is None:
|
|
23
|
+
raise ConfigurationError("缺少 PyHive 依赖,请先安装项目依赖。")
|
|
24
|
+
|
|
25
|
+
conn = hive.Connection(**self._parse_dsn(dsn))
|
|
26
|
+
try:
|
|
27
|
+
yield conn
|
|
28
|
+
finally:
|
|
29
|
+
conn.close()
|
|
30
|
+
|
|
31
|
+
def close(self) -> None:
|
|
32
|
+
return None
|
|
33
|
+
|
|
34
|
+
def set_statement_timeout(self, conn: object, timeout_ms: int) -> None:
|
|
35
|
+
return None
|
|
36
|
+
|
|
37
|
+
def build_sample_query(self, database: str, table_name: str, sentinel_limit: int) -> str:
|
|
38
|
+
return f"SELECT * FROM {self._qualified_table(database, table_name)} LIMIT {int(sentinel_limit)}"
|
|
39
|
+
|
|
40
|
+
def build_insert_query(self, database: str, table_name: str, columns: List[str]) -> str:
|
|
41
|
+
quoted_columns = ", ".join(self._quote_identifier(column) for column in columns)
|
|
42
|
+
placeholders = ", ".join(["%s"] * len(columns))
|
|
43
|
+
return f"INSERT INTO {self._qualified_table(database, table_name)} ({quoted_columns}) VALUES ({placeholders})"
|
|
44
|
+
|
|
45
|
+
def build_explain_query(self, sql_text: str, analyze: bool = False) -> str:
|
|
46
|
+
prefix = "EXPLAIN ANALYZE" if analyze else "EXPLAIN"
|
|
47
|
+
return f"{prefix} {sql_text}"
|
|
48
|
+
|
|
49
|
+
def extract_plan(self, rows):
|
|
50
|
+
return [self._first_value(row) for row in rows]
|
|
51
|
+
|
|
52
|
+
def column_names(self, description) -> List[str]:
|
|
53
|
+
return [column[0] for column in (description or [])]
|
|
54
|
+
|
|
55
|
+
def normalize_rows(self, rows, columns: List[str]) -> List[dict]:
|
|
56
|
+
return [dict(zip(columns, row)) for row in rows]
|
|
57
|
+
|
|
58
|
+
def list_databases(self, conn: object) -> List[str]:
|
|
59
|
+
with conn.cursor() as cur:
|
|
60
|
+
cur.execute("SHOW DATABASES")
|
|
61
|
+
return [self._first_value(row) for row in cur.fetchall()]
|
|
62
|
+
|
|
63
|
+
def list_tables(self, conn: object, database: str):
|
|
64
|
+
with conn.cursor() as cur:
|
|
65
|
+
cur.execute(f"SHOW TABLES IN {self._quote_identifier(database)}")
|
|
66
|
+
return [
|
|
67
|
+
{
|
|
68
|
+
"database_name": database,
|
|
69
|
+
"table_name": self._first_value(row),
|
|
70
|
+
"table_type": None,
|
|
71
|
+
}
|
|
72
|
+
for row in cur.fetchall()
|
|
73
|
+
]
|
|
74
|
+
|
|
75
|
+
def describe_table(self, conn: object, database: str, table_name: str):
|
|
76
|
+
with conn.cursor() as cur:
|
|
77
|
+
cur.execute(f"DESCRIBE {self._qualified_table(database, table_name)}")
|
|
78
|
+
rows = cur.fetchall()
|
|
79
|
+
|
|
80
|
+
columns = []
|
|
81
|
+
in_partitions = False
|
|
82
|
+
for row in rows:
|
|
83
|
+
name = self._first_value(row)
|
|
84
|
+
if not name:
|
|
85
|
+
continue
|
|
86
|
+
if str(name).startswith("# Partition Information"):
|
|
87
|
+
in_partitions = True
|
|
88
|
+
continue
|
|
89
|
+
if str(name).startswith("#"):
|
|
90
|
+
continue
|
|
91
|
+
values = self._row_values(row)
|
|
92
|
+
data_type = values[1] if len(values) > 1 else None
|
|
93
|
+
comment = values[2] if len(values) > 2 else None
|
|
94
|
+
columns.append(
|
|
95
|
+
{
|
|
96
|
+
"column_name": name,
|
|
97
|
+
"data_type": data_type,
|
|
98
|
+
"udt_name": None,
|
|
99
|
+
"nullable": True,
|
|
100
|
+
"default": None,
|
|
101
|
+
"primary_key": False,
|
|
102
|
+
"extra": comment,
|
|
103
|
+
"partition_key": in_partitions,
|
|
104
|
+
}
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
if not columns:
|
|
108
|
+
return None
|
|
109
|
+
return {"columns": columns, "indexes": []}
|
|
110
|
+
|
|
111
|
+
def _parse_dsn(self, dsn: str) -> dict:
|
|
112
|
+
parsed = urlparse(dsn)
|
|
113
|
+
if parsed.scheme not in {"hive", "hive+pyhive"}:
|
|
114
|
+
raise ConfigurationError(f"Hive DSN 必须使用 hive:// 或 hive+pyhive://,当前为 {parsed.scheme}")
|
|
115
|
+
|
|
116
|
+
supported_query_keys = {"auth", "kerberos_service_name", "password"}
|
|
117
|
+
query_params = {key: values[-1] for key, values in parse_qs(parsed.query).items()}
|
|
118
|
+
unsupported = sorted(set(query_params) - supported_query_keys)
|
|
119
|
+
if unsupported:
|
|
120
|
+
raise ConfigurationError(f"Hive DSN 包含暂不支持的参数: {unsupported}")
|
|
121
|
+
|
|
122
|
+
connect_args = {
|
|
123
|
+
"host": parsed.hostname or "localhost",
|
|
124
|
+
"port": parsed.port or 10000,
|
|
125
|
+
"username": unquote(parsed.username) if parsed.username else None,
|
|
126
|
+
"password": unquote(parsed.password) if parsed.password else query_params.get("password"),
|
|
127
|
+
"database": parsed.path.lstrip("/") or None,
|
|
128
|
+
"auth": query_params.get("auth"),
|
|
129
|
+
"kerberos_service_name": query_params.get("kerberos_service_name"),
|
|
130
|
+
}
|
|
131
|
+
return {key: value for key, value in connect_args.items() if value is not None}
|
|
132
|
+
|
|
133
|
+
def _quote_identifier(self, value: str) -> str:
|
|
134
|
+
return "`" + value.replace("`", "``") + "`"
|
|
135
|
+
|
|
136
|
+
def _qualified_table(self, database: str, table_name: str) -> str:
|
|
137
|
+
return f"{self._quote_identifier(database)}.{self._quote_identifier(table_name)}"
|
|
138
|
+
|
|
139
|
+
def _first_value(self, row):
|
|
140
|
+
if isinstance(row, dict):
|
|
141
|
+
return next(iter(row.values()))
|
|
142
|
+
return row[0]
|
|
143
|
+
|
|
144
|
+
def _row_values(self, row):
|
|
145
|
+
if isinstance(row, dict):
|
|
146
|
+
return list(row.values())
|
|
147
|
+
return list(row)
|
|
@@ -6,6 +6,7 @@ from typing import Optional
|
|
|
6
6
|
|
|
7
7
|
from mcp.server.fastmcp import FastMCP
|
|
8
8
|
|
|
9
|
+
from .async_queries import AsyncQueryService
|
|
9
10
|
from .audit import AuditLogger
|
|
10
11
|
from .config import load_config
|
|
11
12
|
from .errors import SqlQueryMCPError
|
|
@@ -22,6 +23,7 @@ def create_app() -> FastMCP:
|
|
|
22
23
|
metadata = MetadataService(registry, app_config.settings, audit_logger)
|
|
23
24
|
executor = QueryExecutor(registry, app_config.settings, audit_logger)
|
|
24
25
|
importer = TableFileImporter(registry, app_config.settings, audit_logger)
|
|
26
|
+
async_queries = AsyncQueryService(registry, app_config.settings, audit_logger)
|
|
25
27
|
|
|
26
28
|
mcp = FastMCP("sql-query-mcp", json_response=True)
|
|
27
29
|
|
|
@@ -39,7 +41,7 @@ def create_app() -> FastMCP:
|
|
|
39
41
|
|
|
40
42
|
@mcp.tool()
|
|
41
43
|
def list_databases(connection_id: str) -> dict:
|
|
42
|
-
"""List visible databases for a MySQL connection."""
|
|
44
|
+
"""List visible databases for a MySQL or Hive connection."""
|
|
43
45
|
|
|
44
46
|
return _run_tool(lambda: metadata.list_databases(connection_id))
|
|
45
47
|
|
|
@@ -49,7 +51,7 @@ def create_app() -> FastMCP:
|
|
|
49
51
|
schema: Optional[str] = None,
|
|
50
52
|
database: Optional[str] = None,
|
|
51
53
|
) -> dict:
|
|
52
|
-
"""List tables and views for a resolved
|
|
54
|
+
"""List tables and views for a resolved schema or database."""
|
|
53
55
|
|
|
54
56
|
return _run_tool(lambda: metadata.list_tables(connection_id, schema, database))
|
|
55
57
|
|
|
@@ -88,6 +90,24 @@ def create_app() -> FastMCP:
|
|
|
88
90
|
|
|
89
91
|
return _run_tool(lambda: executor.get_table_sample(connection_id, table_name, schema, database, limit))
|
|
90
92
|
|
|
93
|
+
@mcp.tool()
|
|
94
|
+
def start_query(connection_id: str, sql: str, limit: Optional[int] = None) -> dict:
|
|
95
|
+
"""Start an asynchronous read-only SELECT or CTE query."""
|
|
96
|
+
|
|
97
|
+
return _run_tool(lambda: async_queries.start_query(connection_id, sql, limit))
|
|
98
|
+
|
|
99
|
+
@mcp.tool()
|
|
100
|
+
def get_query(query_id: str, offset: int = 0, limit: Optional[int] = None) -> dict:
|
|
101
|
+
"""Get asynchronous query status and paginated results when complete."""
|
|
102
|
+
|
|
103
|
+
return _run_tool(lambda: async_queries.get_query(query_id, offset, limit))
|
|
104
|
+
|
|
105
|
+
@mcp.tool()
|
|
106
|
+
def cancel_query(query_id: str) -> dict:
|
|
107
|
+
"""Cancel a running asynchronous query."""
|
|
108
|
+
|
|
109
|
+
return _run_tool(lambda: async_queries.cancel_query(query_id))
|
|
110
|
+
|
|
91
111
|
@mcp.tool()
|
|
92
112
|
def import_table_file(
|
|
93
113
|
connection_id: str,
|