sqlserver-semantic-mcp 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. sqlserver_semantic_mcp/__init__.py +1 -0
  2. sqlserver_semantic_mcp/config.py +78 -0
  3. sqlserver_semantic_mcp/domain/__init__.py +0 -0
  4. sqlserver_semantic_mcp/domain/enums.py +48 -0
  5. sqlserver_semantic_mcp/domain/models/__init__.py +0 -0
  6. sqlserver_semantic_mcp/domain/models/column.py +14 -0
  7. sqlserver_semantic_mcp/domain/models/object.py +13 -0
  8. sqlserver_semantic_mcp/domain/models/relationship.py +11 -0
  9. sqlserver_semantic_mcp/domain/models/table.py +29 -0
  10. sqlserver_semantic_mcp/infrastructure/__init__.py +0 -0
  11. sqlserver_semantic_mcp/infrastructure/background.py +59 -0
  12. sqlserver_semantic_mcp/infrastructure/cache/__init__.py +0 -0
  13. sqlserver_semantic_mcp/infrastructure/cache/semantic.py +132 -0
  14. sqlserver_semantic_mcp/infrastructure/cache/store.py +152 -0
  15. sqlserver_semantic_mcp/infrastructure/cache/structural.py +203 -0
  16. sqlserver_semantic_mcp/infrastructure/connection.py +78 -0
  17. sqlserver_semantic_mcp/infrastructure/queries/__init__.py +0 -0
  18. sqlserver_semantic_mcp/infrastructure/queries/comment_queries.py +18 -0
  19. sqlserver_semantic_mcp/infrastructure/queries/metadata_queries.py +70 -0
  20. sqlserver_semantic_mcp/infrastructure/queries/object_queries.py +15 -0
  21. sqlserver_semantic_mcp/main.py +90 -0
  22. sqlserver_semantic_mcp/policy/__init__.py +0 -0
  23. sqlserver_semantic_mcp/policy/analyzer.py +194 -0
  24. sqlserver_semantic_mcp/policy/enforcer.py +104 -0
  25. sqlserver_semantic_mcp/policy/intents/__init__.py +16 -0
  26. sqlserver_semantic_mcp/policy/intents/ast_analyzer.py +24 -0
  27. sqlserver_semantic_mcp/policy/intents/base.py +17 -0
  28. sqlserver_semantic_mcp/policy/intents/regex_analyzer.py +11 -0
  29. sqlserver_semantic_mcp/policy/intents/router.py +21 -0
  30. sqlserver_semantic_mcp/policy/loader.py +90 -0
  31. sqlserver_semantic_mcp/policy/models.py +43 -0
  32. sqlserver_semantic_mcp/server/__init__.py +0 -0
  33. sqlserver_semantic_mcp/server/app.py +125 -0
  34. sqlserver_semantic_mcp/server/compact.py +74 -0
  35. sqlserver_semantic_mcp/server/prompts/__init__.py +5 -0
  36. sqlserver_semantic_mcp/server/prompts/analysis.py +56 -0
  37. sqlserver_semantic_mcp/server/prompts/discovery.py +55 -0
  38. sqlserver_semantic_mcp/server/prompts/execution.py +64 -0
  39. sqlserver_semantic_mcp/server/prompts/registry.py +41 -0
  40. sqlserver_semantic_mcp/server/resources/__init__.py +1 -0
  41. sqlserver_semantic_mcp/server/resources/schema.py +144 -0
  42. sqlserver_semantic_mcp/server/tools/__init__.py +42 -0
  43. sqlserver_semantic_mcp/server/tools/cache.py +24 -0
  44. sqlserver_semantic_mcp/server/tools/metadata.py +167 -0
  45. sqlserver_semantic_mcp/server/tools/metrics.py +44 -0
  46. sqlserver_semantic_mcp/server/tools/object_tool.py +113 -0
  47. sqlserver_semantic_mcp/server/tools/policy.py +48 -0
  48. sqlserver_semantic_mcp/server/tools/query.py +159 -0
  49. sqlserver_semantic_mcp/server/tools/relationship.py +104 -0
  50. sqlserver_semantic_mcp/server/tools/semantic.py +112 -0
  51. sqlserver_semantic_mcp/server/tools/shape.py +204 -0
  52. sqlserver_semantic_mcp/server/tools/workflow.py +307 -0
  53. sqlserver_semantic_mcp/services/__init__.py +0 -0
  54. sqlserver_semantic_mcp/services/metadata_service.py +173 -0
  55. sqlserver_semantic_mcp/services/metrics_service.py +124 -0
  56. sqlserver_semantic_mcp/services/object_service.py +187 -0
  57. sqlserver_semantic_mcp/services/policy_service.py +59 -0
  58. sqlserver_semantic_mcp/services/query_service.py +321 -0
  59. sqlserver_semantic_mcp/services/relationship_service.py +160 -0
  60. sqlserver_semantic_mcp/services/semantic_service.py +277 -0
  61. sqlserver_semantic_mcp/workflows/__init__.py +26 -0
  62. sqlserver_semantic_mcp/workflows/bundle.py +157 -0
  63. sqlserver_semantic_mcp/workflows/contracts.py +64 -0
  64. sqlserver_semantic_mcp/workflows/discovery_flow.py +116 -0
  65. sqlserver_semantic_mcp/workflows/facade.py +117 -0
  66. sqlserver_semantic_mcp/workflows/query_flow.py +120 -0
  67. sqlserver_semantic_mcp/workflows/recommendations.py +161 -0
  68. sqlserver_semantic_mcp/workflows/router.py +59 -0
  69. sqlserver_semantic_mcp-0.5.0.dist-info/METADATA +679 -0
  70. sqlserver_semantic_mcp-0.5.0.dist-info/RECORD +74 -0
  71. sqlserver_semantic_mcp-0.5.0.dist-info/WHEEL +5 -0
  72. sqlserver_semantic_mcp-0.5.0.dist-info/entry_points.txt +2 -0
  73. sqlserver_semantic_mcp-0.5.0.dist-info/licenses/LICENSE +21 -0
  74. sqlserver_semantic_mcp-0.5.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,679 @@
1
+ Metadata-Version: 2.4
2
+ Name: sqlserver-semantic-mcp
3
+ Version: 0.5.0
4
+ Summary: Semantic intelligence layer for SQL Server databases, exposed via MCP. A database understanding engine for AI agents (Claude Code CLI, Codex CLI, Claude Desktop, etc.).
5
+ Author-email: Luke <popo555155@gmail.com>
6
+ License: MIT License
7
+
8
+ Copyright (c) 2026 lukedev999-boom
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Project-URL: Homepage, https://github.com/lukedev999-boom/sqlserver-semantic-mcp
29
+ Project-URL: Repository, https://github.com/lukedev999-boom/sqlserver-semantic-mcp
30
+ Project-URL: Issues, https://github.com/lukedev999-boom/sqlserver-semantic-mcp/issues
31
+ Project-URL: Documentation, https://github.com/lukedev999-boom/sqlserver-semantic-mcp#readme
32
+ Keywords: mcp,model-context-protocol,sql-server,mssql,claude,claude-code,codex,ai-agent,semantic,database
33
+ Classifier: Development Status :: 4 - Beta
34
+ Classifier: Intended Audience :: Developers
35
+ Classifier: License :: OSI Approved :: MIT License
36
+ Classifier: Operating System :: OS Independent
37
+ Classifier: Programming Language :: Python :: 3
38
+ Classifier: Programming Language :: Python :: 3.11
39
+ Classifier: Programming Language :: Python :: 3.12
40
+ Classifier: Programming Language :: Python :: 3.13
41
+ Classifier: Topic :: Database
42
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
43
+ Classifier: Typing :: Typed
44
+ Requires-Python: >=3.11
45
+ Description-Content-Type: text/markdown
46
+ License-File: LICENSE
47
+ Requires-Dist: mcp>=1.0.0
48
+ Requires-Dist: pymssql>=2.3.0
49
+ Requires-Dist: pydantic>=2.0.0
50
+ Requires-Dist: pydantic-settings>=2.0.0
51
+ Requires-Dist: aiosqlite>=0.20.0
52
+ Provides-Extra: dev
53
+ Requires-Dist: pytest>=8.0.0; extra == "dev"
54
+ Requires-Dist: pytest-asyncio>=0.23.0; extra == "dev"
55
+ Requires-Dist: pytest-mock>=3.0.0; extra == "dev"
56
+ Dynamic: license-file
57
+
58
+ # sqlserver-semantic-mcp
59
+
60
+ [![License](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE)
61
+ [![Python](https://img.shields.io/badge/python-3.11%2B-blue.svg)](https://www.python.org/)
62
+ [![MCP](https://img.shields.io/badge/MCP-1.0%2B-purple.svg)](https://modelcontextprotocol.io)
63
+ [![Version](https://img.shields.io/badge/version-0.5.0-green.svg)](pyproject.toml)
64
+ [![繁體中文](https://img.shields.io/badge/lang-繁體中文-red.svg)](README.zh-TW.md)
65
+
66
+ > **Semantic intelligence layer for SQL Server databases, exposed via MCP.**
67
+ > Not a SQL executor — a database understanding engine for AI agents.
68
+
69
+ AI agents don't need raw `execute_sql`. They need to understand schema structure, relationships, object dependencies, and — most importantly — to operate inside a safety boundary that an operator can define.
70
+
71
+ `sqlserver-semantic-mcp` provides all of this through 29 MCP tools, 1 concrete MCP resource, and 5 MCP resource templates, backed by a two-tier SQLite cache for speed and a JSON-based policy system for safety.
72
+
73
+ ---
74
+
75
+ ## Quick Start
76
+
77
+ Pick the path that matches your client. All paths use [`uvx`](https://docs.astral.sh/uv/) — no `git clone`, no virtualenv, no manual install. `uvx` downloads and runs the package on demand and caches it for next time.
78
+
79
+ > **Prerequisite:** Install [uv](https://docs.astral.sh/uv/getting-started/installation/) once (`curl -LsSf https://astral.sh/uv/install.sh | sh`). Python 3.11+ is fetched automatically by `uv` if needed.
80
+
81
+ > **Replace** `localhost` / `YourDatabase` / `sa` / `YourPassword` in every example with your real SQL Server credentials.
82
+
83
+ ### 🤖 Claude Code CLI
84
+
85
+ One command registers the server. `uvx` resolves and caches `sqlserver-semantic-mcp` on first use:
86
+
87
+ ```bash
88
+ claude mcp add sqlserver-semantic -- uvx sqlserver-semantic-mcp \
89
+ -e SEMANTIC_MCP_MSSQL_SERVER=localhost \
90
+ -e SEMANTIC_MCP_MSSQL_DATABASE=YourDatabase \
91
+ -e SEMANTIC_MCP_MSSQL_USER=sa \
92
+ -e SEMANTIC_MCP_MSSQL_PASSWORD=YourPassword
93
+ ```
94
+
95
+ Or commit the config to your repo as `.mcp.json` for the whole team to share:
96
+
97
+ ```json
98
+ {
99
+ "mcpServers": {
100
+ "sqlserver-semantic": {
101
+ "command": "uvx",
102
+ "args": ["sqlserver-semantic-mcp"],
103
+ "env": {
104
+ "SEMANTIC_MCP_MSSQL_SERVER": "localhost",
105
+ "SEMANTIC_MCP_MSSQL_DATABASE": "YourDatabase",
106
+ "SEMANTIC_MCP_MSSQL_USER": "sa",
107
+ "SEMANTIC_MCP_MSSQL_PASSWORD": "YourPassword"
108
+ }
109
+ }
110
+ }
111
+ }
112
+ ```
113
+
114
+ Verify with `claude mcp list`. The server speaks MCP over stdio and will be ready as soon as Claude Code launches a session.
115
+
116
+ ### 🛠 Codex CLI
117
+
118
+ Add this block to `~/.codex/config.toml`:
119
+
120
+ ```toml
121
+ [mcp_servers.sqlserver-semantic]
122
+ command = "uvx"
123
+ args = ["sqlserver-semantic-mcp"]
124
+ env = { SEMANTIC_MCP_MSSQL_SERVER = "localhost", SEMANTIC_MCP_MSSQL_DATABASE = "YourDatabase", SEMANTIC_MCP_MSSQL_USER = "sa", SEMANTIC_MCP_MSSQL_PASSWORD = "YourPassword" }
125
+ ```
126
+
127
+ Then run `codex` — the server will appear in your MCP tool list.
128
+
129
+ ### 🖥 Claude Desktop
130
+
131
+ Edit your config file:
132
+
133
+ - **macOS:** `~/Library/Application Support/Claude/claude_desktop_config.json`
134
+ - **Windows:** `%APPDATA%\Claude\claude_desktop_config.json`
135
+
136
+ ```json
137
+ {
138
+ "mcpServers": {
139
+ "sqlserver-semantic": {
140
+ "command": "uvx",
141
+ "args": ["sqlserver-semantic-mcp"],
142
+ "env": {
143
+ "SEMANTIC_MCP_MSSQL_SERVER": "localhost",
144
+ "SEMANTIC_MCP_MSSQL_DATABASE": "YourDatabase",
145
+ "SEMANTIC_MCP_MSSQL_USER": "sa",
146
+ "SEMANTIC_MCP_MSSQL_PASSWORD": "YourPassword"
147
+ }
148
+ }
149
+ }
150
+ }
151
+ ```
152
+
153
+ Restart Claude Desktop after saving.
154
+
155
+ ### 🧪 Smoke test (optional, all clients)
156
+
157
+ Confirm the package can run before wiring it into a host:
158
+
159
+ ```bash
160
+ SEMANTIC_MCP_MSSQL_SERVER=localhost \
161
+ SEMANTIC_MCP_MSSQL_DATABASE=YourDatabase \
162
+ SEMANTIC_MCP_MSSQL_USER=sa \
163
+ SEMANTIC_MCP_MSSQL_PASSWORD=YourPassword \
164
+ uvx sqlserver-semantic-mcp
165
+ ```
166
+
167
+ You should see startup log lines confirming cache initialisation and tool registration. Press `Ctrl+C` to stop.
168
+
169
+ ### 🧰 Local development (contributors only)
170
+
171
+ Skip this section if you just want to use the server.
172
+
173
+ ```bash
174
+ git clone https://github.com/lukedev999-boom/sqlserver-semantic-mcp.git
175
+ cd sqlserver-semantic-mcp
176
+ cp .env.example .env # then fill in MSSQL credentials
177
+ uv sync --dev # creates .venv with dev deps
178
+ uv run python -m sqlserver_semantic_mcp.main
179
+ ```
180
+
181
+ For an editable install with pip instead:
182
+
183
+ ```bash
184
+ pip install -e ".[dev]"
185
+ sqlserver-semantic-mcp
186
+ ```
187
+
188
+ When pointing an MCP client at a local checkout, replace `uvx sqlserver-semantic-mcp` with:
189
+
190
+ ```json
191
+ "command": "uv",
192
+ "args": ["run", "--project", "/absolute/path/to/sqlserver-semantic-mcp",
193
+ "python", "-m", "sqlserver_semantic_mcp.main"]
194
+ ```
195
+
196
+ See the full env-var matrix in [Configuration](#configuration).
197
+
198
+ ---
199
+
200
+ ## Features
201
+
202
+ - **29 MCP tools** across 9 capability groups (metadata, relationship, semantic, object, query, policy, cache, metrics, workflow)
203
+ - **Two-tier SQLite cache** — Structural Cache (warm on startup) + Semantic Cache (lazy + background fill)
204
+ - **Cache-first startup** — reuse existing structural cache by default and avoid mandatory full warmup on every process start
205
+ - **3-hash schema versioning** — detect when structural / object / comment changes invalidate cached analysis
206
+ - **Policy-gated execution** — SELECT/INSERT/UPDATE/DELETE/… permissions, WHERE-clause requirements, row caps, schema/table allowlists
207
+ - **Semantic classification** — automatic detection of fact / dimension / lookup / bridge / audit tables
208
+ - **Join path discovery** — BFS over the FK graph to find how two tables relate
209
+ - **Object inspection** — view / procedure / function definitions with dependency tracing plus read/write split
210
+ - **Workflow shortcuts** — discovery, risk estimation, context bundling, and direct execution fast-path tools
211
+ - **Payload metrics** — built-in measurement for per-tool response size
212
+ - **Graceful degradation** — missing or malformed policy file falls back to read-only; unreachable DB doesn't corrupt cache
213
+
214
+ ---
215
+
216
+ ## Architecture
217
+
218
+ Five-layer architecture with strict one-way dependencies:
219
+
220
+ ```
221
+ MCP Interface (server/) ← tool / resource registration
222
+
223
+ Application (services/) ← 6 services orchestrate cache + policy + DB
224
+
225
+ Policy / Domain (policy/, domain/) ← models, SQL intent analysis, enforcement
226
+
227
+ Infrastructure (infrastructure/) ← pymssql + SQLite + background task
228
+
229
+ SQL Server + SQLite
230
+ ```
231
+
232
+ ### Cache Model
233
+
234
+ | Layer | Contents | Strategy | Invalidation |
235
+ |---|---|---|---|
236
+ | **Structural Cache** | tables, columns, PK/FK, indexes, objects list, comments | warm on startup, SQLite persisted | `structural_hash` / `object_hash` / `comment_hash` mismatch |
237
+ | **Semantic Cache** | table classification, column semantics, object definitions, dependencies | lazy + background incremental fill | hash change → rows marked `dirty` → recomputed |
238
+
239
+ ---
240
+
241
+ ## Installation
242
+
243
+ > Most users should follow [Quick Start](#quick-start) instead — it uses `uvx` and needs no install step. This section is for contributors and offline / air-gapped setups.
244
+
245
+ Requires Python 3.11+.
246
+
247
+ **One-shot run via uvx** (no install, recommended for end users):
248
+
249
+ ```bash
250
+ uvx sqlserver-semantic-mcp
251
+ ```
252
+
253
+ **Install globally as a CLI tool:**
254
+
255
+ ```bash
256
+ uv tool install sqlserver-semantic-mcp
257
+ # or:
258
+ pipx install sqlserver-semantic-mcp
259
+ ```
260
+
261
+ **Editable install from source with pip** (registers the `sqlserver-semantic-mcp` console script on your PATH):
262
+
263
+ ```bash
264
+ pip install -e ".[dev]"
265
+ ```
266
+
267
+ **Install with uv from source:**
268
+
269
+ ```bash
270
+ uv sync
271
+ # include dev dependencies:
272
+ uv sync --dev
273
+ ```
274
+
275
+ **Run without installing** (uv project mode):
276
+
277
+ ```bash
278
+ uv run python -m sqlserver_semantic_mcp.main
279
+ ```
280
+
281
+ Installed dependencies:
282
+
283
+ | Package | Role |
284
+ |---|---|
285
+ | `mcp` | MCP SDK (stdio transport) |
286
+ | `pymssql` | SQL Server wire driver (wraps FreeTDS) |
287
+ | `pydantic` + `pydantic-settings` | Config validation, env var loading |
288
+ | `aiosqlite` | Async SQLite for the two-tier cache |
289
+
290
+ Dev-only dependencies: `pytest`, `pytest-asyncio`, `pytest-mock`.
291
+
292
+ > **Linux note:** `pymssql` links against FreeTDS. If `pip install` fails with a compiler error, install system headers first — see [Troubleshooting](#troubleshooting).
293
+
294
+ ---
295
+
296
+ ## Configuration
297
+
298
+ All configuration is via environment variables with the `SEMANTIC_MCP_` prefix. A `.env` file in the working directory is also loaded automatically. Start from `.env.example`.
299
+
300
+ ### Required
301
+
302
+ | Variable | Description |
303
+ |---|---|
304
+ | `SEMANTIC_MCP_MSSQL_SERVER` | SQL Server host (supports `(localdb)\Instance` and `*.database.windows.net`) |
305
+ | `SEMANTIC_MCP_MSSQL_DATABASE` | Target database name |
306
+ | `SEMANTIC_MCP_MSSQL_USER` | SQL auth user (not required when `SEMANTIC_MCP_MSSQL_WINDOWS_AUTH=true`) |
307
+ | `SEMANTIC_MCP_MSSQL_PASSWORD` | SQL auth password |
308
+
309
+ ### Optional
310
+
311
+ | Variable | Default | Description |
312
+ |---|---|---|
313
+ | `SEMANTIC_MCP_MSSQL_PORT` | `1433` | TCP port |
314
+ | `SEMANTIC_MCP_MSSQL_WINDOWS_AUTH` | `false` | Use Windows Authentication |
315
+ | `SEMANTIC_MCP_MSSQL_ENCRYPT` | `false` | Force TLS (auto-enabled for Azure SQL) |
316
+ | `SEMANTIC_MCP_CACHE_PATH` | `./cache/semantic_mcp.db` | SQLite cache file location |
317
+ | `SEMANTIC_MCP_CACHE_ENABLED` | `true` | Disable to skip startup warmup |
318
+ | `SEMANTIC_MCP_STARTUP_MODE` | `cache_first` | `cache_first` reuses existing cache on restart; `full` always refreshes from SQL Server before serving |
319
+ | `SEMANTIC_MCP_BACKGROUND_BATCH_SIZE` | `5` | Tables processed per background batch |
320
+ | `SEMANTIC_MCP_BACKGROUND_INTERVAL_MS` | `500` | Delay between batches |
321
+ | `SEMANTIC_MCP_POLICY_FILE` | *(builtin readonly)* | Path to policy JSON |
322
+ | `SEMANTIC_MCP_POLICY_PROFILE` | *(file's active_profile)* | Override which profile is active |
323
+ | `SEMANTIC_MCP_MAX_ROWS_RETURNED` | `1000` | Override SELECT row cap |
324
+ | `SEMANTIC_MCP_MAX_ROWS_AFFECTED` | `100` | Override DML affected-row cap |
325
+ | `SEMANTIC_MCP_QUERY_TIMEOUT` | `30` | Query timeout in seconds |
326
+ | `SEMANTIC_MCP_TOOL_PROFILE` | `all` | Comma-separated tool groups: metadata, relationship, semantic, object, query, policy, cache, metrics, workflow |
327
+ | `SEMANTIC_MCP_WORKFLOW_TOOLS_ENABLED` | `true` | Disable workflow-layer shortcut tools |
328
+ | `SEMANTIC_MCP_METRICS_ENABLED` | `true` | Enable per-tool response size metrics |
329
+ | `SEMANTIC_MCP_DEFAULT_DETAIL` | `brief` | Default detail tier for agent-facing tools |
330
+ | `SEMANTIC_MCP_DEFAULT_RESPONSE_MODE` | `summary` | Default query execution response shape |
331
+ | `SEMANTIC_MCP_DEFAULT_TOKEN_BUDGET_HINT` | `low` | Default sampling budget for query payloads |
332
+ | `SEMANTIC_MCP_DIRECT_EXECUTE_ENABLED` | `true` | Allow workflow fast-path direct execution when policy approves |
333
+ | `SEMANTIC_MCP_STRICT_ROWS_AFFECTED_CAP` | `true` | Roll back writes that exceed affected-row cap by default |
334
+ | `SEMANTIC_MCP_INTENT_ANALYZER` | `regex` | SQL intent analyzer backend (`regex` or `ast`) |
335
+
336
+ ---
337
+
338
+ ## Connection Scenarios
339
+
340
+ Copy the relevant block into your `.env` file (or into the `env` map in your MCP client config).
341
+
342
+ ### SQL Authentication (default)
343
+
344
+ The most common setup. SQL Server Authentication must be enabled on the instance.
345
+
346
+ ```env
347
+ SEMANTIC_MCP_MSSQL_SERVER=localhost
348
+ SEMANTIC_MCP_MSSQL_DATABASE=YourDatabase
349
+ SEMANTIC_MCP_MSSQL_USER=sa
350
+ SEMANTIC_MCP_MSSQL_PASSWORD=YourPassword
351
+ ```
352
+
353
+ ### Windows Authentication
354
+
355
+ Omit `MSSQL_USER` and `MSSQL_PASSWORD`; the process must run under a Windows account that has SQL Server access. Use double-backslash for named instances in `.env` files.
356
+
357
+ ```env
358
+ SEMANTIC_MCP_MSSQL_SERVER=MY-PC\\SQLEXPRESS
359
+ SEMANTIC_MCP_MSSQL_DATABASE=YourDatabase
360
+ SEMANTIC_MCP_MSSQL_WINDOWS_AUTH=true
361
+ ```
362
+
363
+ > Windows Authentication is only available on Windows. pymssql does not support it on Linux or macOS — use SQL Authentication on those platforms.
364
+
365
+ ### Azure SQL Database
366
+
367
+ TLS is automatically enabled when the server name ends in `.database.windows.net`; you do not need to set `MSSQL_ENCRYPT` explicitly.
368
+
369
+ ```env
370
+ SEMANTIC_MCP_MSSQL_SERVER=yourserver.database.windows.net
371
+ SEMANTIC_MCP_MSSQL_DATABASE=YourDatabase
372
+ SEMANTIC_MCP_MSSQL_USER=youradmin@yourserver
373
+ SEMANTIC_MCP_MSSQL_PASSWORD=YourPassword
374
+ ```
375
+
376
+ ### LocalDB (Windows only)
377
+
378
+ LocalDB communicates over a named pipe — no TCP port is required. Windows Authentication is used by default.
379
+
380
+ ```env
381
+ SEMANTIC_MCP_MSSQL_SERVER=(localdb)\MSSQLLocalDB
382
+ SEMANTIC_MCP_MSSQL_DATABASE=YourDatabase
383
+ SEMANTIC_MCP_MSSQL_WINDOWS_AUTH=true
384
+ ```
385
+
386
+ ### Custom Policy File
387
+
388
+ Point `SEMANTIC_MCP_POLICY_FILE` at a JSON policy file you control. Without this setting the server operates in built-in read-only mode.
389
+
390
+ ```env
391
+ SEMANTIC_MCP_POLICY_FILE=./config/policy.example.json
392
+ SEMANTIC_MCP_POLICY_PROFILE=read_write_safe
393
+ ```
394
+
395
+ See [Policy System](#policy-system) for the full policy file format and available profiles.
396
+
397
+ ### Custom Cache Location
398
+
399
+ Useful when running multiple server instances against different databases, or when the default `./cache/` directory is not writable.
400
+
401
+ ```env
402
+ SEMANTIC_MCP_CACHE_PATH=/var/lib/sqlserver-mcp/mydb.db
403
+ ```
404
+
405
+ ---
406
+
407
+ ## Policy System
408
+
409
+ **Default security posture:** if no policy file is configured, the server operates in built-in **read-only** mode by default — no configuration needed to enforce it. In this mode: only `SELECT` statements are permitted, results are capped at 1000 rows, multi-statement queries are rejected, and every query still passes through the policy enforcer before reaching `cursor.execute()`. No unrestricted SQL execution path exists.
410
+
411
+ To enable writes or change any constraint, create a policy JSON file and point `SEMANTIC_MCP_POLICY_FILE` at it (see `config/policy.example.json`):
412
+
413
+ ```json
414
+ {
415
+ "active_profile": "read_write_safe",
416
+ "profiles": {
417
+ "readonly": { "operations": { "select": true } },
418
+ "read_write_safe": {
419
+ "operations": { "select": true, "insert": true, "update": true },
420
+ "constraints": {
421
+ "require_where_for_update": true,
422
+ "max_rows_affected": 100
423
+ }
424
+ },
425
+ "admin": {
426
+ "operations": { "select": true, "insert": true, "update": true, "delete": true },
427
+ "constraints": { "allow_multi_statement": true }
428
+ }
429
+ }
430
+ }
431
+ ```
432
+
433
+ ### Policy fields
434
+
435
+ **Operations** — 10 flags (select / insert / update / delete / truncate / create / alter / drop / execute / merge)
436
+
437
+ **Constraints** — `require_where_for_update`, `require_where_for_delete`, `require_top_for_select`, `max_rows_returned`, `max_rows_affected`, `allow_multi_statement`, `query_timeout_seconds`
438
+
439
+ **Scope** — `allowed_databases`, `allowed_schemas`, `allowed_tables`, `denied_tables`
440
+
441
+ ### Profile quick reference
442
+
443
+ | Profile | SELECT | INSERT | UPDATE | DELETE | WHERE required | Row cap |
444
+ |---|---|---|---|---|---|---|
445
+ | `readonly` (builtin default) | Yes | No | No | No | N/A | 1000 returned |
446
+ | `read_write_safe` | Yes | Yes | Yes | No | UPDATE requires WHERE | 100 affected |
447
+ | `admin` | Yes | Yes | Yes | Yes | No | 10 000 affected |
448
+
449
+ > **Safety note:** when `allowed_schemas` is set, queries that reference a table without a schema prefix (e.g. `SELECT * FROM Users` instead of `dbo.Users`) are rejected — you cannot bypass schema-level access control with implicit defaults.
450
+
451
+ ### Failure behavior
452
+
453
+ | Condition | Behavior |
454
+ |---|---|
455
+ | Policy file path unset | Builtin readonly, log warning |
456
+ | Policy file missing | Builtin readonly, log warning |
457
+ | Policy file unreadable | Builtin readonly, log error |
458
+ | Policy file has invalid JSON | Builtin readonly, log error |
459
+ | Policy file fails schema validation | Builtin readonly, log error |
460
+ | `active_profile` / override points to a missing profile | Server refuses to start (misconfiguration surfaced) |
461
+
462
+ ---
463
+
464
+ ## MCP Tools
465
+
466
+ Current tool groups:
467
+
468
+ - `metadata` (3): `get_tables`, `describe_table`, `get_columns`
469
+ - `relationship` (3): `get_table_relationships`, `find_join_path`, `get_dependency_chain`
470
+ - `semantic` (3): `classify_table`, `analyze_columns`, `detect_lookup_tables`
471
+ - `object` (3): `describe_view`, `describe_procedure`, `trace_object_dependencies`
472
+ - `query` (5): `validate_query`, `run_safe_query`, `plan_or_execute_query`, `preview_safe_query`, `estimate_execution_risk`
473
+ - `policy` (3): `get_execution_policy`, `validate_sql_against_policy`, `refresh_policy`
474
+ - `cache` (1): `refresh_schema_cache`
475
+ - `metrics` (2): `get_tool_metrics`, `reset_tool_metrics`
476
+ - `workflow` (6): `discover_relevant_tables`, `suggest_next_tool`, `bundle_context_for_next_step`, `score_join_candidate`, `summarize_table_for_joining`, `summarize_object_for_impact`
477
+
478
+ For smaller prompts and faster discovery, prefer the workflow tools plus `detail="brief"` and filtered metadata calls.
479
+
480
+ ---
481
+
482
+ ## MCP Resources
483
+
484
+ Auto-listed concrete resources:
485
+
486
+ - `semantic://summary/database`
487
+
488
+ Auto-listed resource templates:
489
+
490
+ - `semantic://schema/tables/{qualified}`
491
+ - `semantic://analysis/classification/{qualified}`
492
+ - `semantic://summary/table/{qualified}`
493
+ - `semantic://summary/object/{type}/{qualified}`
494
+ - `semantic://bundle/joining/{qualified}`
495
+
496
+ Backward-compatible direct reads are also supported for:
497
+
498
+ - `semantic://schema/tables`
499
+ - `semantic://analysis/dependencies/{type}/{schema}.{name}`
500
+
501
+ ---
502
+
503
+ ## Running the Server
504
+
505
+ **Via the installed console script** (after `pip install -e .`):
506
+
507
+ ```bash
508
+ sqlserver-semantic-mcp
509
+ ```
510
+
511
+ **Via uv (without installing):**
512
+
513
+ ```bash
514
+ uv run python -m sqlserver_semantic_mcp.main
515
+ ```
516
+
517
+ **Via Python directly** (when the package is already on `sys.path`):
518
+
519
+ ```bash
520
+ python -m sqlserver_semantic_mcp.main
521
+ ```
522
+
523
+ The server speaks MCP over stdio. On startup it:
524
+
525
+ 1. Opens (or creates) the SQLite cache
526
+ 2. Reuses the existing Structural cache when `SEMANTIC_MCP_STARTUP_MODE=cache_first`, otherwise refreshes from SQL Server
527
+ 3. Enqueues all tables for Semantic analysis
528
+ 4. Launches the background fill task
529
+ 5. Accepts MCP tool/resource calls
530
+
531
+ Background fill uses exponential backoff (2ⁿ seconds, capped at 60s) on persistent errors to avoid log spam or CPU burn.
532
+
533
+ ---
534
+
535
+ ## Development
536
+
537
+ ### Running tests
538
+
539
+ ```bash
540
+ uv run --extra dev pytest tests/unit
541
+ uv run --extra dev pytest tests/integration -m integration
542
+ ```
543
+
544
+ ### Publishing a release (maintainers)
545
+
546
+ The package is distributed on PyPI so end users can run `uvx sqlserver-semantic-mcp` without cloning. Release flow:
547
+
548
+ 1. Bump `version` in `pyproject.toml` (semantic versioning).
549
+ 2. Update the changelog in `docs/` and the version badge at the top of this README.
550
+ 3. Commit and tag:
551
+ ```bash
552
+ git commit -am "chore: bump to vX.Y.Z"
553
+ git tag vX.Y.Z
554
+ git push origin main --tags
555
+ ```
556
+ 4. Build and verify the artifacts locally:
557
+ ```bash
558
+ uv build # produces dist/*.whl + dist/*.tar.gz
559
+ uvx --from twine twine check dist/*
560
+ ```
561
+ 5. (Recommended) Smoke test the wheel against TestPyPI first:
562
+ ```bash
563
+ uvx --from twine twine upload --repository testpypi dist/*
564
+ uvx --index-url https://test.pypi.org/simple/ sqlserver-semantic-mcp
565
+ ```
566
+ 6. Publish to PyPI:
567
+ ```bash
568
+ uv publish # uses UV_PUBLISH_TOKEN or ~/.pypirc
569
+ ```
570
+
571
+ > Configure a PyPI API token once: `export UV_PUBLISH_TOKEN=pypi-…` (or store under `[pypi]` in `~/.pypirc`).
572
+
573
+ ### Project structure
574
+
575
+ ```
576
+ sqlserver_semantic_mcp/
577
+ ├── config.py — env-backed Pydantic settings
578
+ ├── main.py — stdio server + startup + background task
579
+ ├── domain/
580
+ │ ├── enums.py — TableType, ObjectType, CacheStatus, RiskLevel, SqlOperation
581
+ │ └── models/ — Column, Table, ForeignKey, Index, Relationship, DbObject
582
+ ├── policy/
583
+ │ ├── models.py — PolicyProfile / PolicyOperations / PolicyConstraints / PolicyScope
584
+ │ ├── loader.py — JSON loading with graceful fallback
585
+ │ ├── analyzer.py — regex-based SQL intent extraction
586
+ │ └── enforcer.py — policy decision (allow/reject + reason)
587
+ ├── infrastructure/
588
+ │ ├── connection.py — pymssql connection + helpers
589
+ │ ├── background.py — background semantic fill loop with backoff
590
+ │ ├── cache/
591
+ │ │ ├── store.py — SQLite DDL + init
592
+ │ │ ├── structural.py — hashing + warmup + snapshot persistence
593
+ │ │ └── semantic.py — analysis/definition I/O + pending queue
594
+ │ └── queries/ — SQL Server queries (metadata / comments / objects)
595
+ ├── services/ — 6 services (metadata / relationship / semantic / object / policy / query)
596
+ └── server/
597
+ ├── app.py — MCP Server, tool registry, JSON envelope
598
+ ├── tools/ — 7 tool modules (one per capability group)
599
+ └── resources/ — schema / analysis / summary URIs
600
+ ```
601
+
602
+ ### Testing conventions
603
+
604
+ - **Unit tests** use in-memory or tmp-dir SQLite and mock pymssql.
605
+ - **Integration tests** are marked `@pytest.mark.integration` and skip unless `SEMANTIC_MCP_MSSQL_SERVER` is set.
606
+ - Pydantic models are exercised directly; infrastructure layers are tested with mocked connections.
607
+
608
+ ---
609
+
610
+ ## Troubleshooting
611
+
612
+ ### pymssql / FreeTDS installation fails on Linux
613
+
614
+ `pymssql` links against FreeTDS. On Debian/Ubuntu, install the required system libraries before running `pip install`:
615
+
616
+ ```bash
617
+ sudo apt-get install -y libssl-dev libkrb5-dev freetds-dev
618
+ pip install pymssql
619
+ ```
620
+
621
+ On Alpine / Docker: `apk add freetds-dev openssl-dev krb5-dev`.
622
+
623
+ ### "Cannot open server" or connection refused
624
+
625
+ - Confirm the server name and port are correct (`SEMANTIC_MCP_MSSQL_SERVER`, `SEMANTIC_MCP_MSSQL_PORT`).
626
+ - Check that TCP/IP is enabled in SQL Server Configuration Manager.
627
+ - If using a named instance (e.g. `MY-PC\SQLEXPRESS`), confirm SQL Server Browser is running so the port can be resolved dynamically.
628
+ - If using a non-default port, set `SEMANTIC_MCP_MSSQL_PORT` explicitly — SQL Server Browser is not needed when the port is fixed.
629
+ - Check firewall rules: port 1433 (or your custom port) must be reachable from the machine running the MCP server.
630
+
631
+ ### "Login failed for user"
632
+
633
+ - Confirm `SEMANTIC_MCP_MSSQL_USER` and `SEMANTIC_MCP_MSSQL_PASSWORD` are correct.
634
+ - Verify that SQL Server Authentication is enabled on the instance (Server Properties → Security → SQL Server and Windows Authentication mode).
635
+ - For Azure SQL, the user may need to be in the format `user@servername` depending on your driver version.
636
+
637
+ ### Windows Authentication not working on Linux or macOS
638
+
639
+ pymssql does not support Windows Authentication (Kerberos/NTLM) on non-Windows platforms. Use SQL Authentication (`MSSQL_USER` + `MSSQL_PASSWORD`) instead. `SEMANTIC_MCP_MSSQL_WINDOWS_AUTH=true` is only effective on Windows.
640
+
641
+ ### LocalDB not connecting
642
+
643
+ - LocalDB is Windows-only and communicates over a named pipe, not TCP.
644
+ - Use the exact format `(localdb)\MSSQLLocalDB` (or your instance name) for `SEMANTIC_MCP_MSSQL_SERVER`.
645
+ - Set `SEMANTIC_MCP_MSSQL_WINDOWS_AUTH=true`; SQL auth is not supported by LocalDB by default.
646
+ - Run `sqllocaldb info` in a Windows terminal to list available instances and confirm they are running.
647
+
648
+ ### Policy file not found or ignored
649
+
650
+ When the policy file cannot be read (missing, unreadable, or invalid JSON), the server falls back to built-in read-only mode and logs a warning. Check the startup logs for lines containing `policy`. If `SEMANTIC_MCP_POLICY_FILE` is set to a relative path, it is resolved from the process working directory — use an absolute path to avoid ambiguity.
651
+
652
+ ### Server starts but tools return empty results
653
+
654
+ The Structural Cache may not have been populated yet. Check the startup logs for warmup progress. You can force a full refresh with the `refresh_schema_cache` MCP tool. Also verify that the connected database user has `VIEW DEFINITION` permission; without it, object definitions and comments will be absent from the cache.
655
+
656
+ ---
657
+
658
+ ## Security Design
659
+
660
+ - **Default read-only**: if no policy is configured, only `SELECT` is allowed.
661
+ - **SQL validation required**: every query passes through the intent analyzer and policy enforcer before reaching `cursor.execute()`.
662
+ - **Denied dangerous statements**: `DROP` / `TRUNCATE` are classified as `CRITICAL` risk level; blocked unless explicitly allowed.
663
+ - **Schema-aware access control**: `allowed_schemas` rejects implicit-schema queries to prevent schema-default bypass.
664
+ - **Policy hardening**: malformed policy files fall back to read-only rather than crashing the server.
665
+
666
+ ---
667
+
668
+ ## Limitations / Future Work
669
+
670
+ - SQL intent analyzer is regex-based, not a full T-SQL parser — CTE-defined names may appear as tables. Use `validate_sql_against_policy` first when in doubt.
671
+ - `STRING_AGG` used in the index query requires SQL Server 2017+. Older versions will need an alternative query.
672
+ - `sys.extended_properties` reads require `VIEW DEFINITION` permission; comments on restricted objects won't appear in the cache.
673
+ - Background fill is single-worker; on very large schemas the Semantic Cache may take time to converge (use `refresh_schema_cache` to force a structural refresh; semantic classification still fills lazily).
674
+
675
+ ---
676
+
677
+ ## License
678
+
679
+ Licensed under the MIT License — see `LICENSE` for details.