sqlserver-semantic-mcp 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlserver_semantic_mcp/__init__.py +1 -0
- sqlserver_semantic_mcp/config.py +78 -0
- sqlserver_semantic_mcp/domain/__init__.py +0 -0
- sqlserver_semantic_mcp/domain/enums.py +48 -0
- sqlserver_semantic_mcp/domain/models/__init__.py +0 -0
- sqlserver_semantic_mcp/domain/models/column.py +14 -0
- sqlserver_semantic_mcp/domain/models/object.py +13 -0
- sqlserver_semantic_mcp/domain/models/relationship.py +11 -0
- sqlserver_semantic_mcp/domain/models/table.py +29 -0
- sqlserver_semantic_mcp/infrastructure/__init__.py +0 -0
- sqlserver_semantic_mcp/infrastructure/background.py +59 -0
- sqlserver_semantic_mcp/infrastructure/cache/__init__.py +0 -0
- sqlserver_semantic_mcp/infrastructure/cache/semantic.py +132 -0
- sqlserver_semantic_mcp/infrastructure/cache/store.py +152 -0
- sqlserver_semantic_mcp/infrastructure/cache/structural.py +203 -0
- sqlserver_semantic_mcp/infrastructure/connection.py +78 -0
- sqlserver_semantic_mcp/infrastructure/queries/__init__.py +0 -0
- sqlserver_semantic_mcp/infrastructure/queries/comment_queries.py +18 -0
- sqlserver_semantic_mcp/infrastructure/queries/metadata_queries.py +70 -0
- sqlserver_semantic_mcp/infrastructure/queries/object_queries.py +15 -0
- sqlserver_semantic_mcp/main.py +90 -0
- sqlserver_semantic_mcp/policy/__init__.py +0 -0
- sqlserver_semantic_mcp/policy/analyzer.py +194 -0
- sqlserver_semantic_mcp/policy/enforcer.py +104 -0
- sqlserver_semantic_mcp/policy/intents/__init__.py +16 -0
- sqlserver_semantic_mcp/policy/intents/ast_analyzer.py +24 -0
- sqlserver_semantic_mcp/policy/intents/base.py +17 -0
- sqlserver_semantic_mcp/policy/intents/regex_analyzer.py +11 -0
- sqlserver_semantic_mcp/policy/intents/router.py +21 -0
- sqlserver_semantic_mcp/policy/loader.py +90 -0
- sqlserver_semantic_mcp/policy/models.py +43 -0
- sqlserver_semantic_mcp/server/__init__.py +0 -0
- sqlserver_semantic_mcp/server/app.py +125 -0
- sqlserver_semantic_mcp/server/compact.py +74 -0
- sqlserver_semantic_mcp/server/prompts/__init__.py +5 -0
- sqlserver_semantic_mcp/server/prompts/analysis.py +56 -0
- sqlserver_semantic_mcp/server/prompts/discovery.py +55 -0
- sqlserver_semantic_mcp/server/prompts/execution.py +64 -0
- sqlserver_semantic_mcp/server/prompts/registry.py +41 -0
- sqlserver_semantic_mcp/server/resources/__init__.py +1 -0
- sqlserver_semantic_mcp/server/resources/schema.py +144 -0
- sqlserver_semantic_mcp/server/tools/__init__.py +42 -0
- sqlserver_semantic_mcp/server/tools/cache.py +24 -0
- sqlserver_semantic_mcp/server/tools/metadata.py +167 -0
- sqlserver_semantic_mcp/server/tools/metrics.py +44 -0
- sqlserver_semantic_mcp/server/tools/object_tool.py +113 -0
- sqlserver_semantic_mcp/server/tools/policy.py +48 -0
- sqlserver_semantic_mcp/server/tools/query.py +159 -0
- sqlserver_semantic_mcp/server/tools/relationship.py +104 -0
- sqlserver_semantic_mcp/server/tools/semantic.py +112 -0
- sqlserver_semantic_mcp/server/tools/shape.py +204 -0
- sqlserver_semantic_mcp/server/tools/workflow.py +307 -0
- sqlserver_semantic_mcp/services/__init__.py +0 -0
- sqlserver_semantic_mcp/services/metadata_service.py +173 -0
- sqlserver_semantic_mcp/services/metrics_service.py +124 -0
- sqlserver_semantic_mcp/services/object_service.py +187 -0
- sqlserver_semantic_mcp/services/policy_service.py +59 -0
- sqlserver_semantic_mcp/services/query_service.py +321 -0
- sqlserver_semantic_mcp/services/relationship_service.py +160 -0
- sqlserver_semantic_mcp/services/semantic_service.py +277 -0
- sqlserver_semantic_mcp/workflows/__init__.py +26 -0
- sqlserver_semantic_mcp/workflows/bundle.py +157 -0
- sqlserver_semantic_mcp/workflows/contracts.py +64 -0
- sqlserver_semantic_mcp/workflows/discovery_flow.py +116 -0
- sqlserver_semantic_mcp/workflows/facade.py +117 -0
- sqlserver_semantic_mcp/workflows/query_flow.py +120 -0
- sqlserver_semantic_mcp/workflows/recommendations.py +161 -0
- sqlserver_semantic_mcp/workflows/router.py +59 -0
- sqlserver_semantic_mcp-0.5.0.dist-info/METADATA +679 -0
- sqlserver_semantic_mcp-0.5.0.dist-info/RECORD +74 -0
- sqlserver_semantic_mcp-0.5.0.dist-info/WHEEL +5 -0
- sqlserver_semantic_mcp-0.5.0.dist-info/entry_points.txt +2 -0
- sqlserver_semantic_mcp-0.5.0.dist-info/licenses/LICENSE +21 -0
- sqlserver_semantic_mcp-0.5.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
import hashlib
|
|
2
|
+
import json
|
|
3
|
+
import logging
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from datetime import datetime, timezone
|
|
6
|
+
from typing import Any, Optional
|
|
7
|
+
|
|
8
|
+
import aiosqlite
|
|
9
|
+
|
|
10
|
+
from ...config import Config
|
|
11
|
+
from ..connection import open_connection
|
|
12
|
+
from ..queries.metadata_queries import (
|
|
13
|
+
GET_TABLES, GET_COLUMNS, GET_PRIMARY_KEYS,
|
|
14
|
+
GET_FOREIGN_KEYS, GET_INDEXES, GET_OBJECTS,
|
|
15
|
+
)
|
|
16
|
+
from ..queries.comment_queries import GET_COMMENTS
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class StructuralSnapshot:
|
|
23
|
+
tables: list[tuple] # (schema, table)
|
|
24
|
+
columns: list[tuple] # (schema, table, col, type, maxlen, nullable, default, ordinal)
|
|
25
|
+
primary_keys: list[tuple] # (schema, table, col)
|
|
26
|
+
foreign_keys: list[tuple] # (schema, table, col, ref_schema, ref_table, ref_col)
|
|
27
|
+
indexes: list[tuple] # (schema, table, index_name, is_unique, is_pk, cols)
|
|
28
|
+
objects: list[tuple] # (schema, name, type)
|
|
29
|
+
comments: list[tuple] # (schema, object, column, description)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _sha256(obj: Any) -> str:
|
|
33
|
+
payload = json.dumps(obj, sort_keys=True, default=str)
|
|
34
|
+
return hashlib.sha256(payload.encode("utf-8")).hexdigest()
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def compute_structural_hash(
|
|
38
|
+
tables, columns, primary_keys, foreign_keys, indexes,
|
|
39
|
+
) -> str:
|
|
40
|
+
return _sha256({
|
|
41
|
+
"tables": sorted([list(t) for t in tables]),
|
|
42
|
+
"columns": sorted([list(c) for c in columns]),
|
|
43
|
+
"primary_keys": sorted([list(p) for p in primary_keys]),
|
|
44
|
+
"foreign_keys": sorted([list(f) for f in foreign_keys]),
|
|
45
|
+
"indexes": sorted([list(i) for i in indexes]),
|
|
46
|
+
})
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def compute_object_hash(objects) -> str:
|
|
50
|
+
return _sha256({"objects": sorted([list(o) for o in objects])})
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def compute_comment_hash(comments) -> str:
|
|
54
|
+
return _sha256({"comments": sorted([list(c) for c in comments])})
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
async def read_schema_version(db_path: str, database: str) -> Optional[dict]:
|
|
58
|
+
async with aiosqlite.connect(db_path) as db:
|
|
59
|
+
db.row_factory = aiosqlite.Row
|
|
60
|
+
cur = await db.execute(
|
|
61
|
+
"SELECT * FROM schema_version WHERE database_name = ?",
|
|
62
|
+
(database,),
|
|
63
|
+
)
|
|
64
|
+
row = await cur.fetchone()
|
|
65
|
+
return dict(row) if row else None
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
async def write_structural_snapshot(
|
|
69
|
+
db_path: str, database: str, snap: StructuralSnapshot,
|
|
70
|
+
) -> dict:
|
|
71
|
+
structural_hash = compute_structural_hash(
|
|
72
|
+
snap.tables, snap.columns, snap.primary_keys,
|
|
73
|
+
snap.foreign_keys, snap.indexes,
|
|
74
|
+
)
|
|
75
|
+
object_hash = compute_object_hash(snap.objects)
|
|
76
|
+
comment_hash = compute_comment_hash(snap.comments)
|
|
77
|
+
captured_at = datetime.now(timezone.utc).isoformat()
|
|
78
|
+
|
|
79
|
+
async with aiosqlite.connect(db_path) as db:
|
|
80
|
+
await db.execute("BEGIN")
|
|
81
|
+
try:
|
|
82
|
+
for tbl in [
|
|
83
|
+
"sc_tables", "sc_columns", "sc_primary_keys",
|
|
84
|
+
"sc_foreign_keys", "sc_indexes", "sc_objects", "sc_comments",
|
|
85
|
+
]:
|
|
86
|
+
await db.execute(
|
|
87
|
+
f"DELETE FROM {tbl} WHERE database_name = ?", (database,),
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
await db.executemany(
|
|
91
|
+
"INSERT INTO sc_tables (database_name, schema_name, table_name) "
|
|
92
|
+
"VALUES (?,?,?)",
|
|
93
|
+
[(database, s, t) for (s, t) in snap.tables],
|
|
94
|
+
)
|
|
95
|
+
await db.executemany(
|
|
96
|
+
"INSERT INTO sc_columns "
|
|
97
|
+
"(database_name, schema_name, table_name, column_name, data_type, "
|
|
98
|
+
"max_length, is_nullable, column_default, ordinal_position) "
|
|
99
|
+
"VALUES (?,?,?,?,?,?,?,?,?)",
|
|
100
|
+
[(database, *row) for row in snap.columns],
|
|
101
|
+
)
|
|
102
|
+
await db.executemany(
|
|
103
|
+
"INSERT INTO sc_primary_keys "
|
|
104
|
+
"(database_name, schema_name, table_name, column_name) "
|
|
105
|
+
"VALUES (?,?,?,?)",
|
|
106
|
+
[(database, *row) for row in snap.primary_keys],
|
|
107
|
+
)
|
|
108
|
+
await db.executemany(
|
|
109
|
+
"INSERT INTO sc_foreign_keys "
|
|
110
|
+
"(database_name, schema_name, table_name, column_name, "
|
|
111
|
+
"ref_schema, ref_table, ref_column) VALUES (?,?,?,?,?,?,?)",
|
|
112
|
+
[(database, *row) for row in snap.foreign_keys],
|
|
113
|
+
)
|
|
114
|
+
await db.executemany(
|
|
115
|
+
"INSERT INTO sc_indexes "
|
|
116
|
+
"(database_name, schema_name, table_name, index_name, "
|
|
117
|
+
"is_unique, is_primary_key, columns) VALUES (?,?,?,?,?,?,?)",
|
|
118
|
+
[(database, *row) for row in snap.indexes],
|
|
119
|
+
)
|
|
120
|
+
await db.executemany(
|
|
121
|
+
"INSERT INTO sc_objects "
|
|
122
|
+
"(database_name, schema_name, object_name, object_type) "
|
|
123
|
+
"VALUES (?,?,?,?)",
|
|
124
|
+
[(database, *row) for row in snap.objects],
|
|
125
|
+
)
|
|
126
|
+
await db.executemany(
|
|
127
|
+
"INSERT INTO sc_comments "
|
|
128
|
+
"(database_name, schema_name, object_name, column_name, description) "
|
|
129
|
+
"VALUES (?,?,?,?,?)",
|
|
130
|
+
[(database, *row) for row in snap.comments],
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
await db.execute(
|
|
134
|
+
"INSERT OR REPLACE INTO schema_version "
|
|
135
|
+
"(database_name, structural_hash, object_hash, comment_hash, "
|
|
136
|
+
" captured_at) VALUES (?,?,?,?,?)",
|
|
137
|
+
(database, structural_hash, object_hash, comment_hash, captured_at),
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
# Cascade: mark stale semantic rows dirty
|
|
141
|
+
await db.execute(
|
|
142
|
+
"UPDATE sem_table_analysis SET status='dirty' "
|
|
143
|
+
"WHERE database_name=? AND structural_hash<>?",
|
|
144
|
+
(database, structural_hash),
|
|
145
|
+
)
|
|
146
|
+
await db.execute(
|
|
147
|
+
"UPDATE sem_object_definitions SET status='dirty' "
|
|
148
|
+
"WHERE database_name=? AND object_hash<>?",
|
|
149
|
+
(database, object_hash),
|
|
150
|
+
)
|
|
151
|
+
await db.commit()
|
|
152
|
+
except Exception:
|
|
153
|
+
await db.rollback()
|
|
154
|
+
raise
|
|
155
|
+
|
|
156
|
+
return {
|
|
157
|
+
"structural_hash": structural_hash,
|
|
158
|
+
"object_hash": object_hash,
|
|
159
|
+
"comment_hash": comment_hash,
|
|
160
|
+
"captured_at": captured_at,
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def fetch_snapshot_from_server(cfg: Config) -> StructuralSnapshot:
|
|
165
|
+
queries = (
|
|
166
|
+
GET_TABLES,
|
|
167
|
+
GET_COLUMNS,
|
|
168
|
+
GET_PRIMARY_KEYS,
|
|
169
|
+
GET_FOREIGN_KEYS,
|
|
170
|
+
GET_INDEXES,
|
|
171
|
+
GET_OBJECTS,
|
|
172
|
+
GET_COMMENTS,
|
|
173
|
+
)
|
|
174
|
+
results: list[list[tuple]] = []
|
|
175
|
+
with open_connection(cfg) as conn:
|
|
176
|
+
cursor = conn.cursor()
|
|
177
|
+
try:
|
|
178
|
+
for sql in queries:
|
|
179
|
+
cursor.execute(sql)
|
|
180
|
+
results.append(list(cursor.fetchall()))
|
|
181
|
+
finally:
|
|
182
|
+
cursor.close()
|
|
183
|
+
|
|
184
|
+
return StructuralSnapshot(
|
|
185
|
+
tables=results[0],
|
|
186
|
+
columns=results[1],
|
|
187
|
+
primary_keys=results[2],
|
|
188
|
+
foreign_keys=results[3],
|
|
189
|
+
indexes=results[4],
|
|
190
|
+
objects=results[5],
|
|
191
|
+
comments=results[6],
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
async def warmup_structural_cache(cfg: Config) -> dict:
|
|
196
|
+
"""Fetch snapshot from SQL Server and write to SQLite. Returns hashes."""
|
|
197
|
+
snap = fetch_snapshot_from_server(cfg)
|
|
198
|
+
logger.info(
|
|
199
|
+
"Structural snapshot: %d tables, %d columns, %d FKs, %d objects",
|
|
200
|
+
len(snap.tables), len(snap.columns),
|
|
201
|
+
len(snap.foreign_keys), len(snap.objects),
|
|
202
|
+
)
|
|
203
|
+
return await write_structural_snapshot(cfg.cache_path, cfg.mssql_database, snap)
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from contextlib import contextmanager
|
|
3
|
+
from typing import Any, Iterator
|
|
4
|
+
|
|
5
|
+
import pymssql
|
|
6
|
+
|
|
7
|
+
from ..config import Config, get_config
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def build_pymssql_kwargs(cfg: Config) -> dict[str, Any]:
|
|
13
|
+
server = cfg.mssql_server
|
|
14
|
+
if server.lower().startswith("(localdb)\\"):
|
|
15
|
+
instance = server.split("\\", 1)[1]
|
|
16
|
+
server = f".\\{instance}"
|
|
17
|
+
|
|
18
|
+
kwargs: dict[str, Any] = {
|
|
19
|
+
"server": server,
|
|
20
|
+
"database": cfg.mssql_database,
|
|
21
|
+
"port": cfg.mssql_port,
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
if ".database.windows.net" in server.lower():
|
|
25
|
+
kwargs["tds_version"] = "7.4"
|
|
26
|
+
|
|
27
|
+
if cfg.mssql_encrypt:
|
|
28
|
+
kwargs["tds_version"] = "7.4"
|
|
29
|
+
|
|
30
|
+
if not cfg.mssql_windows_auth:
|
|
31
|
+
if cfg.mssql_user is None or cfg.mssql_password is None:
|
|
32
|
+
raise ValueError(
|
|
33
|
+
"SQL auth requires SEMANTIC_MCP_MSSQL_USER and "
|
|
34
|
+
"SEMANTIC_MCP_MSSQL_PASSWORD"
|
|
35
|
+
)
|
|
36
|
+
kwargs["user"] = cfg.mssql_user
|
|
37
|
+
kwargs["password"] = cfg.mssql_password
|
|
38
|
+
|
|
39
|
+
return kwargs
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@contextmanager
|
|
43
|
+
def open_connection(cfg: Config | None = None) -> Iterator[Any]:
|
|
44
|
+
cfg = cfg or get_config()
|
|
45
|
+
kwargs = build_pymssql_kwargs(cfg)
|
|
46
|
+
conn = pymssql.connect(**kwargs)
|
|
47
|
+
try:
|
|
48
|
+
yield conn
|
|
49
|
+
finally:
|
|
50
|
+
conn.close()
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def fetch_all(cfg: Config, sql: str, params: tuple = ()) -> list[tuple]:
|
|
54
|
+
with open_connection(cfg) as conn:
|
|
55
|
+
cursor = conn.cursor()
|
|
56
|
+
cursor.execute(sql, params)
|
|
57
|
+
rows = cursor.fetchall()
|
|
58
|
+
cursor.close()
|
|
59
|
+
return rows
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def fetch_one(cfg: Config, sql: str, params: tuple = ()) -> tuple | None:
|
|
63
|
+
with open_connection(cfg) as conn:
|
|
64
|
+
cursor = conn.cursor()
|
|
65
|
+
cursor.execute(sql, params)
|
|
66
|
+
row = cursor.fetchone()
|
|
67
|
+
cursor.close()
|
|
68
|
+
return row
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def execute(cfg: Config, sql: str, params: tuple = ()) -> int:
|
|
72
|
+
with open_connection(cfg) as conn:
|
|
73
|
+
cursor = conn.cursor()
|
|
74
|
+
cursor.execute(sql, params)
|
|
75
|
+
affected = cursor.rowcount
|
|
76
|
+
conn.commit()
|
|
77
|
+
cursor.close()
|
|
78
|
+
return affected
|
|
File without changes
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
GET_COMMENTS = """
|
|
2
|
+
SELECT
|
|
3
|
+
s.name AS SCHEMA_NAME,
|
|
4
|
+
o.name AS OBJECT_NAME,
|
|
5
|
+
COALESCE(c.name, '') AS COLUMN_NAME,
|
|
6
|
+
CAST(ep.value AS NVARCHAR(MAX)) AS DESCRIPTION
|
|
7
|
+
FROM sys.extended_properties ep
|
|
8
|
+
JOIN sys.objects o ON ep.major_id = o.object_id
|
|
9
|
+
JOIN sys.schemas s ON o.schema_id = s.schema_id
|
|
10
|
+
LEFT JOIN sys.columns c
|
|
11
|
+
ON ep.major_id = c.object_id AND ep.minor_id = c.column_id
|
|
12
|
+
WHERE ep.name = 'MS_Description' AND ep.class = 1
|
|
13
|
+
ORDER BY s.name, o.name, ep.minor_id
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
GET_OBJECT_DEFINITION = """
|
|
17
|
+
SELECT OBJECT_DEFINITION(OBJECT_ID(%s))
|
|
18
|
+
"""
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
GET_TABLES = """
|
|
2
|
+
SELECT TABLE_SCHEMA, TABLE_NAME
|
|
3
|
+
FROM INFORMATION_SCHEMA.TABLES
|
|
4
|
+
WHERE TABLE_TYPE = 'BASE TABLE'
|
|
5
|
+
ORDER BY TABLE_SCHEMA, TABLE_NAME
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
GET_COLUMNS = """
|
|
9
|
+
SELECT
|
|
10
|
+
TABLE_SCHEMA, TABLE_NAME, COLUMN_NAME,
|
|
11
|
+
DATA_TYPE, CHARACTER_MAXIMUM_LENGTH,
|
|
12
|
+
CASE WHEN IS_NULLABLE = 'YES' THEN 1 ELSE 0 END AS IS_NULLABLE,
|
|
13
|
+
COLUMN_DEFAULT, ORDINAL_POSITION
|
|
14
|
+
FROM INFORMATION_SCHEMA.COLUMNS
|
|
15
|
+
ORDER BY TABLE_SCHEMA, TABLE_NAME, ORDINAL_POSITION
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
GET_PRIMARY_KEYS = """
|
|
19
|
+
SELECT tc.TABLE_SCHEMA, tc.TABLE_NAME, kcu.COLUMN_NAME
|
|
20
|
+
FROM INFORMATION_SCHEMA.TABLE_CONSTRAINTS tc
|
|
21
|
+
JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE kcu
|
|
22
|
+
ON tc.CONSTRAINT_NAME = kcu.CONSTRAINT_NAME
|
|
23
|
+
AND tc.TABLE_SCHEMA = kcu.TABLE_SCHEMA
|
|
24
|
+
WHERE tc.CONSTRAINT_TYPE = 'PRIMARY KEY'
|
|
25
|
+
ORDER BY tc.TABLE_SCHEMA, tc.TABLE_NAME, kcu.ORDINAL_POSITION
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
GET_FOREIGN_KEYS = """
|
|
29
|
+
SELECT
|
|
30
|
+
fk.TABLE_SCHEMA, fk.TABLE_NAME, fk.COLUMN_NAME,
|
|
31
|
+
pk.TABLE_SCHEMA AS REF_SCHEMA,
|
|
32
|
+
pk.TABLE_NAME AS REF_TABLE,
|
|
33
|
+
pk.COLUMN_NAME AS REF_COLUMN
|
|
34
|
+
FROM INFORMATION_SCHEMA.REFERENTIAL_CONSTRAINTS rc
|
|
35
|
+
JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE fk
|
|
36
|
+
ON rc.CONSTRAINT_NAME = fk.CONSTRAINT_NAME
|
|
37
|
+
JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE pk
|
|
38
|
+
ON rc.UNIQUE_CONSTRAINT_NAME = pk.CONSTRAINT_NAME
|
|
39
|
+
AND fk.ORDINAL_POSITION = pk.ORDINAL_POSITION
|
|
40
|
+
ORDER BY fk.TABLE_SCHEMA, fk.TABLE_NAME, fk.ORDINAL_POSITION
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
GET_INDEXES = """
|
|
44
|
+
SELECT
|
|
45
|
+
s.name AS SCHEMA_NAME,
|
|
46
|
+
t.name AS TABLE_NAME,
|
|
47
|
+
i.name AS INDEX_NAME,
|
|
48
|
+
CAST(i.is_unique AS INT),
|
|
49
|
+
CAST(i.is_primary_key AS INT),
|
|
50
|
+
STRING_AGG(c.name, ',') WITHIN GROUP (ORDER BY ic.key_ordinal) AS COLS
|
|
51
|
+
FROM sys.indexes i
|
|
52
|
+
JOIN sys.tables t ON i.object_id = t.object_id
|
|
53
|
+
JOIN sys.schemas s ON t.schema_id = s.schema_id
|
|
54
|
+
JOIN sys.index_columns ic
|
|
55
|
+
ON i.object_id = ic.object_id AND i.index_id = ic.index_id
|
|
56
|
+
JOIN sys.columns c
|
|
57
|
+
ON ic.object_id = c.object_id AND ic.column_id = c.column_id
|
|
58
|
+
WHERE i.name IS NOT NULL AND ic.is_included_column = 0
|
|
59
|
+
GROUP BY s.name, t.name, i.name, i.is_unique, i.is_primary_key
|
|
60
|
+
ORDER BY s.name, t.name, i.name
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
GET_OBJECTS = """
|
|
64
|
+
SELECT ROUTINE_SCHEMA, ROUTINE_NAME, ROUTINE_TYPE
|
|
65
|
+
FROM INFORMATION_SCHEMA.ROUTINES
|
|
66
|
+
UNION ALL
|
|
67
|
+
SELECT TABLE_SCHEMA, TABLE_NAME, 'VIEW'
|
|
68
|
+
FROM INFORMATION_SCHEMA.VIEWS
|
|
69
|
+
ORDER BY 1, 2, 3
|
|
70
|
+
"""
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
GET_OBJECT_DEFINITION = """
|
|
2
|
+
SELECT OBJECT_DEFINITION(OBJECT_ID(%s))
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
GET_OBJECT_DEPENDENCIES = """
|
|
6
|
+
SELECT
|
|
7
|
+
OBJECT_SCHEMA_NAME(d.referenced_id) AS REF_SCHEMA,
|
|
8
|
+
OBJECT_NAME(d.referenced_id) AS REF_NAME,
|
|
9
|
+
o.type_desc AS REF_TYPE
|
|
10
|
+
FROM sys.sql_expression_dependencies d
|
|
11
|
+
LEFT JOIN sys.objects o ON d.referenced_id = o.object_id
|
|
12
|
+
WHERE d.referencing_id = OBJECT_ID(%s)
|
|
13
|
+
AND d.referenced_id IS NOT NULL
|
|
14
|
+
ORDER BY REF_SCHEMA, REF_NAME
|
|
15
|
+
"""
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import logging
|
|
3
|
+
|
|
4
|
+
from mcp.server.stdio import stdio_server
|
|
5
|
+
|
|
6
|
+
from .config import get_config
|
|
7
|
+
from .infrastructure.background import background_fill_loop
|
|
8
|
+
from .infrastructure.cache.semantic import enqueue_all_tables
|
|
9
|
+
from .infrastructure.cache.store import init_store
|
|
10
|
+
from .infrastructure.cache.structural import (
|
|
11
|
+
read_schema_version, warmup_structural_cache,
|
|
12
|
+
)
|
|
13
|
+
from .server.app import app, get_context
|
|
14
|
+
from .server import resources # noqa: F401
|
|
15
|
+
from .server.prompts import register_prompts
|
|
16
|
+
from .server.tools import register_all
|
|
17
|
+
|
|
18
|
+
logging.basicConfig(
|
|
19
|
+
level=logging.INFO,
|
|
20
|
+
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
|
21
|
+
)
|
|
22
|
+
logger = logging.getLogger("sqlserver_semantic_mcp")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
async def _startup() -> asyncio.Task | None:
|
|
26
|
+
cfg = get_config()
|
|
27
|
+
logger.info(
|
|
28
|
+
"Starting sqlserver-semantic-mcp against %s/%s",
|
|
29
|
+
cfg.mssql_server, cfg.mssql_database,
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
await init_store(cfg.cache_path)
|
|
33
|
+
|
|
34
|
+
bg_task: asyncio.Task | None = None
|
|
35
|
+
if cfg.cache_enabled:
|
|
36
|
+
existing = await read_schema_version(cfg.cache_path, cfg.mssql_database)
|
|
37
|
+
should_warmup = (
|
|
38
|
+
cfg.startup_mode == "full"
|
|
39
|
+
or existing is None
|
|
40
|
+
)
|
|
41
|
+
if should_warmup:
|
|
42
|
+
if existing is None:
|
|
43
|
+
logger.info("No cache found; running structural warmup")
|
|
44
|
+
else:
|
|
45
|
+
logger.info(
|
|
46
|
+
"Startup mode '%s' requires a fresh structural warmup "
|
|
47
|
+
"(cached_at=%s)",
|
|
48
|
+
cfg.startup_mode,
|
|
49
|
+
existing["captured_at"],
|
|
50
|
+
)
|
|
51
|
+
result = await warmup_structural_cache(cfg)
|
|
52
|
+
structural_hash = result["structural_hash"]
|
|
53
|
+
else:
|
|
54
|
+
logger.info(
|
|
55
|
+
"Startup mode '%s' reuses existing cache (captured_at=%s)",
|
|
56
|
+
cfg.startup_mode,
|
|
57
|
+
existing["captured_at"],
|
|
58
|
+
)
|
|
59
|
+
structural_hash = existing["structural_hash"]
|
|
60
|
+
await enqueue_all_tables(
|
|
61
|
+
cfg.cache_path, cfg.mssql_database, structural_hash,
|
|
62
|
+
)
|
|
63
|
+
bg_task = asyncio.create_task(background_fill_loop(cfg))
|
|
64
|
+
|
|
65
|
+
register_all()
|
|
66
|
+
register_prompts()
|
|
67
|
+
get_context()
|
|
68
|
+
return bg_task
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
async def _run() -> None:
|
|
72
|
+
bg_task = await _startup()
|
|
73
|
+
try:
|
|
74
|
+
async with stdio_server() as (r, w):
|
|
75
|
+
await app.run(r, w, app.create_initialization_options())
|
|
76
|
+
finally:
|
|
77
|
+
if bg_task is not None:
|
|
78
|
+
bg_task.cancel()
|
|
79
|
+
try:
|
|
80
|
+
await bg_task
|
|
81
|
+
except (asyncio.CancelledError, Exception):
|
|
82
|
+
pass
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def main() -> None:
|
|
86
|
+
asyncio.run(_run())
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
if __name__ == "__main__":
|
|
90
|
+
main()
|
|
File without changes
|
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from dataclasses import dataclass, field
|
|
3
|
+
from typing import List
|
|
4
|
+
|
|
5
|
+
from ..domain.enums import SqlOperation, RiskLevel
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass
|
|
9
|
+
class SqlIntent:
|
|
10
|
+
primary_operation: SqlOperation
|
|
11
|
+
has_where_clause: bool
|
|
12
|
+
has_top_clause: bool
|
|
13
|
+
affected_tables: List[str] = field(default_factory=list)
|
|
14
|
+
risk_level: RiskLevel = RiskLevel.LOW
|
|
15
|
+
is_multi_statement: bool = False
|
|
16
|
+
statement_count: int = 1
|
|
17
|
+
# v0.5 heuristics — used by workflow router / risk estimation.
|
|
18
|
+
is_sql_like: bool = True
|
|
19
|
+
confidence: float = 0.9
|
|
20
|
+
requires_discovery: bool = False
|
|
21
|
+
has_unqualified_tables: bool = False
|
|
22
|
+
contains_dynamic_sql: bool = False
|
|
23
|
+
contains_cte: bool = False
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
_OP_MAP = {
|
|
27
|
+
"SELECT": SqlOperation.SELECT,
|
|
28
|
+
"INSERT": SqlOperation.INSERT,
|
|
29
|
+
"UPDATE": SqlOperation.UPDATE,
|
|
30
|
+
"DELETE": SqlOperation.DELETE,
|
|
31
|
+
"TRUNCATE": SqlOperation.TRUNCATE,
|
|
32
|
+
"CREATE": SqlOperation.CREATE,
|
|
33
|
+
"ALTER": SqlOperation.ALTER,
|
|
34
|
+
"DROP": SqlOperation.DROP,
|
|
35
|
+
"EXEC": SqlOperation.EXEC,
|
|
36
|
+
"EXECUTE": SqlOperation.EXECUTE,
|
|
37
|
+
"MERGE": SqlOperation.MERGE,
|
|
38
|
+
"WITH": SqlOperation.SELECT, # CTE — treat body as SELECT for routing
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
_IDENT = r"\[?[\w]+\]?(?:\.\[?[\w]+\]?)?"
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _strip_comments(sql: str) -> str:
|
|
45
|
+
sql = re.sub(r"/\*.*?\*/", "", sql, flags=re.DOTALL)
|
|
46
|
+
sql = re.sub(r"--[^\n]*", "", sql)
|
|
47
|
+
return sql.strip()
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _split_statements(sql: str) -> list[str]:
|
|
51
|
+
parts = re.split(r";\s*", sql)
|
|
52
|
+
return [p.strip() for p in parts if p.strip()]
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _detect_operation(sql: str) -> SqlOperation:
|
|
56
|
+
m = re.match(r"\s*([A-Za-z]+)", sql)
|
|
57
|
+
if not m:
|
|
58
|
+
return SqlOperation.UNKNOWN
|
|
59
|
+
return _OP_MAP.get(m.group(1).upper(), SqlOperation.UNKNOWN)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _extract_tables(sql: str, operation: SqlOperation) -> list[str]:
|
|
63
|
+
tables: list[str] = []
|
|
64
|
+
tables.extend(re.findall(rf"\bFROM\s+({_IDENT})", sql, re.IGNORECASE))
|
|
65
|
+
tables.extend(re.findall(rf"\bJOIN\s+({_IDENT})", sql, re.IGNORECASE))
|
|
66
|
+
|
|
67
|
+
if operation == SqlOperation.UPDATE:
|
|
68
|
+
m = re.search(rf"\bUPDATE\s+({_IDENT})", sql, re.IGNORECASE)
|
|
69
|
+
if m:
|
|
70
|
+
tables.append(m.group(1))
|
|
71
|
+
elif operation == SqlOperation.INSERT:
|
|
72
|
+
m = re.search(rf"\bINTO\s+({_IDENT})", sql, re.IGNORECASE)
|
|
73
|
+
if m:
|
|
74
|
+
tables.append(m.group(1))
|
|
75
|
+
elif operation == SqlOperation.DELETE:
|
|
76
|
+
m = re.search(rf"\bDELETE\s+(?:FROM\s+)?({_IDENT})", sql, re.IGNORECASE)
|
|
77
|
+
if m:
|
|
78
|
+
tables.append(m.group(1))
|
|
79
|
+
elif operation == SqlOperation.MERGE:
|
|
80
|
+
m = re.search(rf"\bMERGE\s+(?:INTO\s+)?({_IDENT})", sql, re.IGNORECASE)
|
|
81
|
+
if m:
|
|
82
|
+
tables.append(m.group(1))
|
|
83
|
+
elif operation in (SqlOperation.TRUNCATE, SqlOperation.DROP, SqlOperation.ALTER):
|
|
84
|
+
m = re.search(rf"\b(?:TABLE|VIEW)\s+({_IDENT})", sql, re.IGNORECASE)
|
|
85
|
+
if m:
|
|
86
|
+
tables.append(m.group(1))
|
|
87
|
+
|
|
88
|
+
seen = set()
|
|
89
|
+
out = []
|
|
90
|
+
for t in tables:
|
|
91
|
+
if t.lower() not in seen:
|
|
92
|
+
seen.add(t.lower())
|
|
93
|
+
out.append(t)
|
|
94
|
+
return out
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def _compute_risk(
|
|
98
|
+
operation: SqlOperation, has_where: bool, is_multi: bool,
|
|
99
|
+
) -> RiskLevel:
|
|
100
|
+
if operation in (SqlOperation.DROP, SqlOperation.TRUNCATE):
|
|
101
|
+
return RiskLevel.CRITICAL
|
|
102
|
+
if operation == SqlOperation.ALTER:
|
|
103
|
+
return RiskLevel.HIGH
|
|
104
|
+
if operation == SqlOperation.DELETE:
|
|
105
|
+
return RiskLevel.HIGH if not has_where else RiskLevel.MEDIUM
|
|
106
|
+
if operation in (SqlOperation.UPDATE, SqlOperation.MERGE):
|
|
107
|
+
return RiskLevel.HIGH if not has_where else RiskLevel.MEDIUM
|
|
108
|
+
if operation in (SqlOperation.INSERT, SqlOperation.CREATE,
|
|
109
|
+
SqlOperation.EXEC, SqlOperation.EXECUTE):
|
|
110
|
+
return RiskLevel.MEDIUM
|
|
111
|
+
if is_multi:
|
|
112
|
+
return RiskLevel.MEDIUM
|
|
113
|
+
return RiskLevel.LOW
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
_SQL_KEYWORDS = (
|
|
117
|
+
"select", "insert", "update", "delete", "truncate", "create",
|
|
118
|
+
"alter", "drop", "exec", "execute", "merge", "with", "from", "where",
|
|
119
|
+
"join",
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def _looks_sql_like(text: str) -> bool:
|
|
124
|
+
"""Heuristic: does this look like SQL rather than a natural-language ask?"""
|
|
125
|
+
if not text:
|
|
126
|
+
return False
|
|
127
|
+
lowered = text.lower()
|
|
128
|
+
if not any(kw in lowered for kw in _SQL_KEYWORDS):
|
|
129
|
+
return False
|
|
130
|
+
words = [w for w in re.split(r"\s+", lowered) if w]
|
|
131
|
+
if not words:
|
|
132
|
+
return False
|
|
133
|
+
punct_ratio = sum(1 for ch in text if ch in ",();.=*") / max(len(text), 1)
|
|
134
|
+
if words[0] in _SQL_KEYWORDS:
|
|
135
|
+
return True
|
|
136
|
+
return punct_ratio >= 0.02
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def _has_unqualified(tables: list[str]) -> bool:
|
|
140
|
+
return any("." not in t.strip("[]") for t in tables)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
_DYNAMIC_PAT = re.compile(
|
|
144
|
+
r"\b(EXEC(?:UTE)?\s*\(|sp_executesql)\b", re.IGNORECASE,
|
|
145
|
+
)
|
|
146
|
+
_CTE_PAT = re.compile(r"^\s*WITH\b", re.IGNORECASE)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def analyze_sql(sql: str) -> SqlIntent:
|
|
150
|
+
clean = _strip_comments(sql)
|
|
151
|
+
statements = _split_statements(clean)
|
|
152
|
+
is_multi = len(statements) > 1
|
|
153
|
+
first = statements[0] if statements else ""
|
|
154
|
+
|
|
155
|
+
operation = _detect_operation(first)
|
|
156
|
+
has_where = bool(re.search(r"\bWHERE\b", first, re.IGNORECASE))
|
|
157
|
+
has_top = bool(re.search(r"\bTOP\b", first, re.IGNORECASE))
|
|
158
|
+
tables = _extract_tables(first, operation)
|
|
159
|
+
risk = _compute_risk(operation, has_where, is_multi)
|
|
160
|
+
|
|
161
|
+
is_sql_like = _looks_sql_like(clean)
|
|
162
|
+
contains_cte = bool(_CTE_PAT.match(first))
|
|
163
|
+
contains_dynamic = bool(_DYNAMIC_PAT.search(first))
|
|
164
|
+
unqualified = _has_unqualified(tables)
|
|
165
|
+
|
|
166
|
+
if operation == SqlOperation.UNKNOWN:
|
|
167
|
+
confidence = 0.1 if not is_sql_like else 0.4
|
|
168
|
+
elif unqualified and operation != SqlOperation.SELECT:
|
|
169
|
+
confidence = 0.6
|
|
170
|
+
elif contains_dynamic:
|
|
171
|
+
confidence = 0.5
|
|
172
|
+
else:
|
|
173
|
+
confidence = 0.9
|
|
174
|
+
|
|
175
|
+
requires_discovery = (
|
|
176
|
+
not is_sql_like
|
|
177
|
+
or operation == SqlOperation.UNKNOWN
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
return SqlIntent(
|
|
181
|
+
primary_operation=operation,
|
|
182
|
+
has_where_clause=has_where,
|
|
183
|
+
has_top_clause=has_top,
|
|
184
|
+
affected_tables=tables,
|
|
185
|
+
risk_level=risk,
|
|
186
|
+
is_multi_statement=is_multi,
|
|
187
|
+
statement_count=len(statements),
|
|
188
|
+
is_sql_like=is_sql_like,
|
|
189
|
+
confidence=confidence,
|
|
190
|
+
requires_discovery=requires_discovery,
|
|
191
|
+
has_unqualified_tables=unqualified,
|
|
192
|
+
contains_dynamic_sql=contains_dynamic,
|
|
193
|
+
contains_cte=contains_cte,
|
|
194
|
+
)
|