sqlserver-semantic-mcp 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. sqlserver_semantic_mcp/__init__.py +1 -0
  2. sqlserver_semantic_mcp/config.py +78 -0
  3. sqlserver_semantic_mcp/domain/__init__.py +0 -0
  4. sqlserver_semantic_mcp/domain/enums.py +48 -0
  5. sqlserver_semantic_mcp/domain/models/__init__.py +0 -0
  6. sqlserver_semantic_mcp/domain/models/column.py +14 -0
  7. sqlserver_semantic_mcp/domain/models/object.py +13 -0
  8. sqlserver_semantic_mcp/domain/models/relationship.py +11 -0
  9. sqlserver_semantic_mcp/domain/models/table.py +29 -0
  10. sqlserver_semantic_mcp/infrastructure/__init__.py +0 -0
  11. sqlserver_semantic_mcp/infrastructure/background.py +59 -0
  12. sqlserver_semantic_mcp/infrastructure/cache/__init__.py +0 -0
  13. sqlserver_semantic_mcp/infrastructure/cache/semantic.py +132 -0
  14. sqlserver_semantic_mcp/infrastructure/cache/store.py +152 -0
  15. sqlserver_semantic_mcp/infrastructure/cache/structural.py +203 -0
  16. sqlserver_semantic_mcp/infrastructure/connection.py +78 -0
  17. sqlserver_semantic_mcp/infrastructure/queries/__init__.py +0 -0
  18. sqlserver_semantic_mcp/infrastructure/queries/comment_queries.py +18 -0
  19. sqlserver_semantic_mcp/infrastructure/queries/metadata_queries.py +70 -0
  20. sqlserver_semantic_mcp/infrastructure/queries/object_queries.py +15 -0
  21. sqlserver_semantic_mcp/main.py +90 -0
  22. sqlserver_semantic_mcp/policy/__init__.py +0 -0
  23. sqlserver_semantic_mcp/policy/analyzer.py +194 -0
  24. sqlserver_semantic_mcp/policy/enforcer.py +104 -0
  25. sqlserver_semantic_mcp/policy/intents/__init__.py +16 -0
  26. sqlserver_semantic_mcp/policy/intents/ast_analyzer.py +24 -0
  27. sqlserver_semantic_mcp/policy/intents/base.py +17 -0
  28. sqlserver_semantic_mcp/policy/intents/regex_analyzer.py +11 -0
  29. sqlserver_semantic_mcp/policy/intents/router.py +21 -0
  30. sqlserver_semantic_mcp/policy/loader.py +90 -0
  31. sqlserver_semantic_mcp/policy/models.py +43 -0
  32. sqlserver_semantic_mcp/server/__init__.py +0 -0
  33. sqlserver_semantic_mcp/server/app.py +125 -0
  34. sqlserver_semantic_mcp/server/compact.py +74 -0
  35. sqlserver_semantic_mcp/server/prompts/__init__.py +5 -0
  36. sqlserver_semantic_mcp/server/prompts/analysis.py +56 -0
  37. sqlserver_semantic_mcp/server/prompts/discovery.py +55 -0
  38. sqlserver_semantic_mcp/server/prompts/execution.py +64 -0
  39. sqlserver_semantic_mcp/server/prompts/registry.py +41 -0
  40. sqlserver_semantic_mcp/server/resources/__init__.py +1 -0
  41. sqlserver_semantic_mcp/server/resources/schema.py +144 -0
  42. sqlserver_semantic_mcp/server/tools/__init__.py +42 -0
  43. sqlserver_semantic_mcp/server/tools/cache.py +24 -0
  44. sqlserver_semantic_mcp/server/tools/metadata.py +167 -0
  45. sqlserver_semantic_mcp/server/tools/metrics.py +44 -0
  46. sqlserver_semantic_mcp/server/tools/object_tool.py +113 -0
  47. sqlserver_semantic_mcp/server/tools/policy.py +48 -0
  48. sqlserver_semantic_mcp/server/tools/query.py +159 -0
  49. sqlserver_semantic_mcp/server/tools/relationship.py +104 -0
  50. sqlserver_semantic_mcp/server/tools/semantic.py +112 -0
  51. sqlserver_semantic_mcp/server/tools/shape.py +204 -0
  52. sqlserver_semantic_mcp/server/tools/workflow.py +307 -0
  53. sqlserver_semantic_mcp/services/__init__.py +0 -0
  54. sqlserver_semantic_mcp/services/metadata_service.py +173 -0
  55. sqlserver_semantic_mcp/services/metrics_service.py +124 -0
  56. sqlserver_semantic_mcp/services/object_service.py +187 -0
  57. sqlserver_semantic_mcp/services/policy_service.py +59 -0
  58. sqlserver_semantic_mcp/services/query_service.py +321 -0
  59. sqlserver_semantic_mcp/services/relationship_service.py +160 -0
  60. sqlserver_semantic_mcp/services/semantic_service.py +277 -0
  61. sqlserver_semantic_mcp/workflows/__init__.py +26 -0
  62. sqlserver_semantic_mcp/workflows/bundle.py +157 -0
  63. sqlserver_semantic_mcp/workflows/contracts.py +64 -0
  64. sqlserver_semantic_mcp/workflows/discovery_flow.py +116 -0
  65. sqlserver_semantic_mcp/workflows/facade.py +117 -0
  66. sqlserver_semantic_mcp/workflows/query_flow.py +120 -0
  67. sqlserver_semantic_mcp/workflows/recommendations.py +161 -0
  68. sqlserver_semantic_mcp/workflows/router.py +59 -0
  69. sqlserver_semantic_mcp-0.5.0.dist-info/METADATA +679 -0
  70. sqlserver_semantic_mcp-0.5.0.dist-info/RECORD +74 -0
  71. sqlserver_semantic_mcp-0.5.0.dist-info/WHEEL +5 -0
  72. sqlserver_semantic_mcp-0.5.0.dist-info/entry_points.txt +2 -0
  73. sqlserver_semantic_mcp-0.5.0.dist-info/licenses/LICENSE +21 -0
  74. sqlserver_semantic_mcp-0.5.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,203 @@
1
+ import hashlib
2
+ import json
3
+ import logging
4
+ from dataclasses import dataclass
5
+ from datetime import datetime, timezone
6
+ from typing import Any, Optional
7
+
8
+ import aiosqlite
9
+
10
+ from ...config import Config
11
+ from ..connection import open_connection
12
+ from ..queries.metadata_queries import (
13
+ GET_TABLES, GET_COLUMNS, GET_PRIMARY_KEYS,
14
+ GET_FOREIGN_KEYS, GET_INDEXES, GET_OBJECTS,
15
+ )
16
+ from ..queries.comment_queries import GET_COMMENTS
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ @dataclass
22
+ class StructuralSnapshot:
23
+ tables: list[tuple] # (schema, table)
24
+ columns: list[tuple] # (schema, table, col, type, maxlen, nullable, default, ordinal)
25
+ primary_keys: list[tuple] # (schema, table, col)
26
+ foreign_keys: list[tuple] # (schema, table, col, ref_schema, ref_table, ref_col)
27
+ indexes: list[tuple] # (schema, table, index_name, is_unique, is_pk, cols)
28
+ objects: list[tuple] # (schema, name, type)
29
+ comments: list[tuple] # (schema, object, column, description)
30
+
31
+
32
+ def _sha256(obj: Any) -> str:
33
+ payload = json.dumps(obj, sort_keys=True, default=str)
34
+ return hashlib.sha256(payload.encode("utf-8")).hexdigest()
35
+
36
+
37
+ def compute_structural_hash(
38
+ tables, columns, primary_keys, foreign_keys, indexes,
39
+ ) -> str:
40
+ return _sha256({
41
+ "tables": sorted([list(t) for t in tables]),
42
+ "columns": sorted([list(c) for c in columns]),
43
+ "primary_keys": sorted([list(p) for p in primary_keys]),
44
+ "foreign_keys": sorted([list(f) for f in foreign_keys]),
45
+ "indexes": sorted([list(i) for i in indexes]),
46
+ })
47
+
48
+
49
+ def compute_object_hash(objects) -> str:
50
+ return _sha256({"objects": sorted([list(o) for o in objects])})
51
+
52
+
53
+ def compute_comment_hash(comments) -> str:
54
+ return _sha256({"comments": sorted([list(c) for c in comments])})
55
+
56
+
57
+ async def read_schema_version(db_path: str, database: str) -> Optional[dict]:
58
+ async with aiosqlite.connect(db_path) as db:
59
+ db.row_factory = aiosqlite.Row
60
+ cur = await db.execute(
61
+ "SELECT * FROM schema_version WHERE database_name = ?",
62
+ (database,),
63
+ )
64
+ row = await cur.fetchone()
65
+ return dict(row) if row else None
66
+
67
+
68
+ async def write_structural_snapshot(
69
+ db_path: str, database: str, snap: StructuralSnapshot,
70
+ ) -> dict:
71
+ structural_hash = compute_structural_hash(
72
+ snap.tables, snap.columns, snap.primary_keys,
73
+ snap.foreign_keys, snap.indexes,
74
+ )
75
+ object_hash = compute_object_hash(snap.objects)
76
+ comment_hash = compute_comment_hash(snap.comments)
77
+ captured_at = datetime.now(timezone.utc).isoformat()
78
+
79
+ async with aiosqlite.connect(db_path) as db:
80
+ await db.execute("BEGIN")
81
+ try:
82
+ for tbl in [
83
+ "sc_tables", "sc_columns", "sc_primary_keys",
84
+ "sc_foreign_keys", "sc_indexes", "sc_objects", "sc_comments",
85
+ ]:
86
+ await db.execute(
87
+ f"DELETE FROM {tbl} WHERE database_name = ?", (database,),
88
+ )
89
+
90
+ await db.executemany(
91
+ "INSERT INTO sc_tables (database_name, schema_name, table_name) "
92
+ "VALUES (?,?,?)",
93
+ [(database, s, t) for (s, t) in snap.tables],
94
+ )
95
+ await db.executemany(
96
+ "INSERT INTO sc_columns "
97
+ "(database_name, schema_name, table_name, column_name, data_type, "
98
+ "max_length, is_nullable, column_default, ordinal_position) "
99
+ "VALUES (?,?,?,?,?,?,?,?,?)",
100
+ [(database, *row) for row in snap.columns],
101
+ )
102
+ await db.executemany(
103
+ "INSERT INTO sc_primary_keys "
104
+ "(database_name, schema_name, table_name, column_name) "
105
+ "VALUES (?,?,?,?)",
106
+ [(database, *row) for row in snap.primary_keys],
107
+ )
108
+ await db.executemany(
109
+ "INSERT INTO sc_foreign_keys "
110
+ "(database_name, schema_name, table_name, column_name, "
111
+ "ref_schema, ref_table, ref_column) VALUES (?,?,?,?,?,?,?)",
112
+ [(database, *row) for row in snap.foreign_keys],
113
+ )
114
+ await db.executemany(
115
+ "INSERT INTO sc_indexes "
116
+ "(database_name, schema_name, table_name, index_name, "
117
+ "is_unique, is_primary_key, columns) VALUES (?,?,?,?,?,?,?)",
118
+ [(database, *row) for row in snap.indexes],
119
+ )
120
+ await db.executemany(
121
+ "INSERT INTO sc_objects "
122
+ "(database_name, schema_name, object_name, object_type) "
123
+ "VALUES (?,?,?,?)",
124
+ [(database, *row) for row in snap.objects],
125
+ )
126
+ await db.executemany(
127
+ "INSERT INTO sc_comments "
128
+ "(database_name, schema_name, object_name, column_name, description) "
129
+ "VALUES (?,?,?,?,?)",
130
+ [(database, *row) for row in snap.comments],
131
+ )
132
+
133
+ await db.execute(
134
+ "INSERT OR REPLACE INTO schema_version "
135
+ "(database_name, structural_hash, object_hash, comment_hash, "
136
+ " captured_at) VALUES (?,?,?,?,?)",
137
+ (database, structural_hash, object_hash, comment_hash, captured_at),
138
+ )
139
+
140
+ # Cascade: mark stale semantic rows dirty
141
+ await db.execute(
142
+ "UPDATE sem_table_analysis SET status='dirty' "
143
+ "WHERE database_name=? AND structural_hash<>?",
144
+ (database, structural_hash),
145
+ )
146
+ await db.execute(
147
+ "UPDATE sem_object_definitions SET status='dirty' "
148
+ "WHERE database_name=? AND object_hash<>?",
149
+ (database, object_hash),
150
+ )
151
+ await db.commit()
152
+ except Exception:
153
+ await db.rollback()
154
+ raise
155
+
156
+ return {
157
+ "structural_hash": structural_hash,
158
+ "object_hash": object_hash,
159
+ "comment_hash": comment_hash,
160
+ "captured_at": captured_at,
161
+ }
162
+
163
+
164
+ def fetch_snapshot_from_server(cfg: Config) -> StructuralSnapshot:
165
+ queries = (
166
+ GET_TABLES,
167
+ GET_COLUMNS,
168
+ GET_PRIMARY_KEYS,
169
+ GET_FOREIGN_KEYS,
170
+ GET_INDEXES,
171
+ GET_OBJECTS,
172
+ GET_COMMENTS,
173
+ )
174
+ results: list[list[tuple]] = []
175
+ with open_connection(cfg) as conn:
176
+ cursor = conn.cursor()
177
+ try:
178
+ for sql in queries:
179
+ cursor.execute(sql)
180
+ results.append(list(cursor.fetchall()))
181
+ finally:
182
+ cursor.close()
183
+
184
+ return StructuralSnapshot(
185
+ tables=results[0],
186
+ columns=results[1],
187
+ primary_keys=results[2],
188
+ foreign_keys=results[3],
189
+ indexes=results[4],
190
+ objects=results[5],
191
+ comments=results[6],
192
+ )
193
+
194
+
195
+ async def warmup_structural_cache(cfg: Config) -> dict:
196
+ """Fetch snapshot from SQL Server and write to SQLite. Returns hashes."""
197
+ snap = fetch_snapshot_from_server(cfg)
198
+ logger.info(
199
+ "Structural snapshot: %d tables, %d columns, %d FKs, %d objects",
200
+ len(snap.tables), len(snap.columns),
201
+ len(snap.foreign_keys), len(snap.objects),
202
+ )
203
+ return await write_structural_snapshot(cfg.cache_path, cfg.mssql_database, snap)
@@ -0,0 +1,78 @@
1
+ import logging
2
+ from contextlib import contextmanager
3
+ from typing import Any, Iterator
4
+
5
+ import pymssql
6
+
7
+ from ..config import Config, get_config
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ def build_pymssql_kwargs(cfg: Config) -> dict[str, Any]:
13
+ server = cfg.mssql_server
14
+ if server.lower().startswith("(localdb)\\"):
15
+ instance = server.split("\\", 1)[1]
16
+ server = f".\\{instance}"
17
+
18
+ kwargs: dict[str, Any] = {
19
+ "server": server,
20
+ "database": cfg.mssql_database,
21
+ "port": cfg.mssql_port,
22
+ }
23
+
24
+ if ".database.windows.net" in server.lower():
25
+ kwargs["tds_version"] = "7.4"
26
+
27
+ if cfg.mssql_encrypt:
28
+ kwargs["tds_version"] = "7.4"
29
+
30
+ if not cfg.mssql_windows_auth:
31
+ if cfg.mssql_user is None or cfg.mssql_password is None:
32
+ raise ValueError(
33
+ "SQL auth requires SEMANTIC_MCP_MSSQL_USER and "
34
+ "SEMANTIC_MCP_MSSQL_PASSWORD"
35
+ )
36
+ kwargs["user"] = cfg.mssql_user
37
+ kwargs["password"] = cfg.mssql_password
38
+
39
+ return kwargs
40
+
41
+
42
+ @contextmanager
43
+ def open_connection(cfg: Config | None = None) -> Iterator[Any]:
44
+ cfg = cfg or get_config()
45
+ kwargs = build_pymssql_kwargs(cfg)
46
+ conn = pymssql.connect(**kwargs)
47
+ try:
48
+ yield conn
49
+ finally:
50
+ conn.close()
51
+
52
+
53
+ def fetch_all(cfg: Config, sql: str, params: tuple = ()) -> list[tuple]:
54
+ with open_connection(cfg) as conn:
55
+ cursor = conn.cursor()
56
+ cursor.execute(sql, params)
57
+ rows = cursor.fetchall()
58
+ cursor.close()
59
+ return rows
60
+
61
+
62
+ def fetch_one(cfg: Config, sql: str, params: tuple = ()) -> tuple | None:
63
+ with open_connection(cfg) as conn:
64
+ cursor = conn.cursor()
65
+ cursor.execute(sql, params)
66
+ row = cursor.fetchone()
67
+ cursor.close()
68
+ return row
69
+
70
+
71
+ def execute(cfg: Config, sql: str, params: tuple = ()) -> int:
72
+ with open_connection(cfg) as conn:
73
+ cursor = conn.cursor()
74
+ cursor.execute(sql, params)
75
+ affected = cursor.rowcount
76
+ conn.commit()
77
+ cursor.close()
78
+ return affected
@@ -0,0 +1,18 @@
1
+ GET_COMMENTS = """
2
+ SELECT
3
+ s.name AS SCHEMA_NAME,
4
+ o.name AS OBJECT_NAME,
5
+ COALESCE(c.name, '') AS COLUMN_NAME,
6
+ CAST(ep.value AS NVARCHAR(MAX)) AS DESCRIPTION
7
+ FROM sys.extended_properties ep
8
+ JOIN sys.objects o ON ep.major_id = o.object_id
9
+ JOIN sys.schemas s ON o.schema_id = s.schema_id
10
+ LEFT JOIN sys.columns c
11
+ ON ep.major_id = c.object_id AND ep.minor_id = c.column_id
12
+ WHERE ep.name = 'MS_Description' AND ep.class = 1
13
+ ORDER BY s.name, o.name, ep.minor_id
14
+ """
15
+
16
+ GET_OBJECT_DEFINITION = """
17
+ SELECT OBJECT_DEFINITION(OBJECT_ID(%s))
18
+ """
@@ -0,0 +1,70 @@
1
+ GET_TABLES = """
2
+ SELECT TABLE_SCHEMA, TABLE_NAME
3
+ FROM INFORMATION_SCHEMA.TABLES
4
+ WHERE TABLE_TYPE = 'BASE TABLE'
5
+ ORDER BY TABLE_SCHEMA, TABLE_NAME
6
+ """
7
+
8
+ GET_COLUMNS = """
9
+ SELECT
10
+ TABLE_SCHEMA, TABLE_NAME, COLUMN_NAME,
11
+ DATA_TYPE, CHARACTER_MAXIMUM_LENGTH,
12
+ CASE WHEN IS_NULLABLE = 'YES' THEN 1 ELSE 0 END AS IS_NULLABLE,
13
+ COLUMN_DEFAULT, ORDINAL_POSITION
14
+ FROM INFORMATION_SCHEMA.COLUMNS
15
+ ORDER BY TABLE_SCHEMA, TABLE_NAME, ORDINAL_POSITION
16
+ """
17
+
18
+ GET_PRIMARY_KEYS = """
19
+ SELECT tc.TABLE_SCHEMA, tc.TABLE_NAME, kcu.COLUMN_NAME
20
+ FROM INFORMATION_SCHEMA.TABLE_CONSTRAINTS tc
21
+ JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE kcu
22
+ ON tc.CONSTRAINT_NAME = kcu.CONSTRAINT_NAME
23
+ AND tc.TABLE_SCHEMA = kcu.TABLE_SCHEMA
24
+ WHERE tc.CONSTRAINT_TYPE = 'PRIMARY KEY'
25
+ ORDER BY tc.TABLE_SCHEMA, tc.TABLE_NAME, kcu.ORDINAL_POSITION
26
+ """
27
+
28
+ GET_FOREIGN_KEYS = """
29
+ SELECT
30
+ fk.TABLE_SCHEMA, fk.TABLE_NAME, fk.COLUMN_NAME,
31
+ pk.TABLE_SCHEMA AS REF_SCHEMA,
32
+ pk.TABLE_NAME AS REF_TABLE,
33
+ pk.COLUMN_NAME AS REF_COLUMN
34
+ FROM INFORMATION_SCHEMA.REFERENTIAL_CONSTRAINTS rc
35
+ JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE fk
36
+ ON rc.CONSTRAINT_NAME = fk.CONSTRAINT_NAME
37
+ JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE pk
38
+ ON rc.UNIQUE_CONSTRAINT_NAME = pk.CONSTRAINT_NAME
39
+ AND fk.ORDINAL_POSITION = pk.ORDINAL_POSITION
40
+ ORDER BY fk.TABLE_SCHEMA, fk.TABLE_NAME, fk.ORDINAL_POSITION
41
+ """
42
+
43
+ GET_INDEXES = """
44
+ SELECT
45
+ s.name AS SCHEMA_NAME,
46
+ t.name AS TABLE_NAME,
47
+ i.name AS INDEX_NAME,
48
+ CAST(i.is_unique AS INT),
49
+ CAST(i.is_primary_key AS INT),
50
+ STRING_AGG(c.name, ',') WITHIN GROUP (ORDER BY ic.key_ordinal) AS COLS
51
+ FROM sys.indexes i
52
+ JOIN sys.tables t ON i.object_id = t.object_id
53
+ JOIN sys.schemas s ON t.schema_id = s.schema_id
54
+ JOIN sys.index_columns ic
55
+ ON i.object_id = ic.object_id AND i.index_id = ic.index_id
56
+ JOIN sys.columns c
57
+ ON ic.object_id = c.object_id AND ic.column_id = c.column_id
58
+ WHERE i.name IS NOT NULL AND ic.is_included_column = 0
59
+ GROUP BY s.name, t.name, i.name, i.is_unique, i.is_primary_key
60
+ ORDER BY s.name, t.name, i.name
61
+ """
62
+
63
+ GET_OBJECTS = """
64
+ SELECT ROUTINE_SCHEMA, ROUTINE_NAME, ROUTINE_TYPE
65
+ FROM INFORMATION_SCHEMA.ROUTINES
66
+ UNION ALL
67
+ SELECT TABLE_SCHEMA, TABLE_NAME, 'VIEW'
68
+ FROM INFORMATION_SCHEMA.VIEWS
69
+ ORDER BY 1, 2, 3
70
+ """
@@ -0,0 +1,15 @@
1
+ GET_OBJECT_DEFINITION = """
2
+ SELECT OBJECT_DEFINITION(OBJECT_ID(%s))
3
+ """
4
+
5
+ GET_OBJECT_DEPENDENCIES = """
6
+ SELECT
7
+ OBJECT_SCHEMA_NAME(d.referenced_id) AS REF_SCHEMA,
8
+ OBJECT_NAME(d.referenced_id) AS REF_NAME,
9
+ o.type_desc AS REF_TYPE
10
+ FROM sys.sql_expression_dependencies d
11
+ LEFT JOIN sys.objects o ON d.referenced_id = o.object_id
12
+ WHERE d.referencing_id = OBJECT_ID(%s)
13
+ AND d.referenced_id IS NOT NULL
14
+ ORDER BY REF_SCHEMA, REF_NAME
15
+ """
@@ -0,0 +1,90 @@
1
+ import asyncio
2
+ import logging
3
+
4
+ from mcp.server.stdio import stdio_server
5
+
6
+ from .config import get_config
7
+ from .infrastructure.background import background_fill_loop
8
+ from .infrastructure.cache.semantic import enqueue_all_tables
9
+ from .infrastructure.cache.store import init_store
10
+ from .infrastructure.cache.structural import (
11
+ read_schema_version, warmup_structural_cache,
12
+ )
13
+ from .server.app import app, get_context
14
+ from .server import resources # noqa: F401
15
+ from .server.prompts import register_prompts
16
+ from .server.tools import register_all
17
+
18
+ logging.basicConfig(
19
+ level=logging.INFO,
20
+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
21
+ )
22
+ logger = logging.getLogger("sqlserver_semantic_mcp")
23
+
24
+
25
+ async def _startup() -> asyncio.Task | None:
26
+ cfg = get_config()
27
+ logger.info(
28
+ "Starting sqlserver-semantic-mcp against %s/%s",
29
+ cfg.mssql_server, cfg.mssql_database,
30
+ )
31
+
32
+ await init_store(cfg.cache_path)
33
+
34
+ bg_task: asyncio.Task | None = None
35
+ if cfg.cache_enabled:
36
+ existing = await read_schema_version(cfg.cache_path, cfg.mssql_database)
37
+ should_warmup = (
38
+ cfg.startup_mode == "full"
39
+ or existing is None
40
+ )
41
+ if should_warmup:
42
+ if existing is None:
43
+ logger.info("No cache found; running structural warmup")
44
+ else:
45
+ logger.info(
46
+ "Startup mode '%s' requires a fresh structural warmup "
47
+ "(cached_at=%s)",
48
+ cfg.startup_mode,
49
+ existing["captured_at"],
50
+ )
51
+ result = await warmup_structural_cache(cfg)
52
+ structural_hash = result["structural_hash"]
53
+ else:
54
+ logger.info(
55
+ "Startup mode '%s' reuses existing cache (captured_at=%s)",
56
+ cfg.startup_mode,
57
+ existing["captured_at"],
58
+ )
59
+ structural_hash = existing["structural_hash"]
60
+ await enqueue_all_tables(
61
+ cfg.cache_path, cfg.mssql_database, structural_hash,
62
+ )
63
+ bg_task = asyncio.create_task(background_fill_loop(cfg))
64
+
65
+ register_all()
66
+ register_prompts()
67
+ get_context()
68
+ return bg_task
69
+
70
+
71
+ async def _run() -> None:
72
+ bg_task = await _startup()
73
+ try:
74
+ async with stdio_server() as (r, w):
75
+ await app.run(r, w, app.create_initialization_options())
76
+ finally:
77
+ if bg_task is not None:
78
+ bg_task.cancel()
79
+ try:
80
+ await bg_task
81
+ except (asyncio.CancelledError, Exception):
82
+ pass
83
+
84
+
85
+ def main() -> None:
86
+ asyncio.run(_run())
87
+
88
+
89
+ if __name__ == "__main__":
90
+ main()
File without changes
@@ -0,0 +1,194 @@
1
+ import re
2
+ from dataclasses import dataclass, field
3
+ from typing import List
4
+
5
+ from ..domain.enums import SqlOperation, RiskLevel
6
+
7
+
8
+ @dataclass
9
+ class SqlIntent:
10
+ primary_operation: SqlOperation
11
+ has_where_clause: bool
12
+ has_top_clause: bool
13
+ affected_tables: List[str] = field(default_factory=list)
14
+ risk_level: RiskLevel = RiskLevel.LOW
15
+ is_multi_statement: bool = False
16
+ statement_count: int = 1
17
+ # v0.5 heuristics — used by workflow router / risk estimation.
18
+ is_sql_like: bool = True
19
+ confidence: float = 0.9
20
+ requires_discovery: bool = False
21
+ has_unqualified_tables: bool = False
22
+ contains_dynamic_sql: bool = False
23
+ contains_cte: bool = False
24
+
25
+
26
+ _OP_MAP = {
27
+ "SELECT": SqlOperation.SELECT,
28
+ "INSERT": SqlOperation.INSERT,
29
+ "UPDATE": SqlOperation.UPDATE,
30
+ "DELETE": SqlOperation.DELETE,
31
+ "TRUNCATE": SqlOperation.TRUNCATE,
32
+ "CREATE": SqlOperation.CREATE,
33
+ "ALTER": SqlOperation.ALTER,
34
+ "DROP": SqlOperation.DROP,
35
+ "EXEC": SqlOperation.EXEC,
36
+ "EXECUTE": SqlOperation.EXECUTE,
37
+ "MERGE": SqlOperation.MERGE,
38
+ "WITH": SqlOperation.SELECT, # CTE — treat body as SELECT for routing
39
+ }
40
+
41
+ _IDENT = r"\[?[\w]+\]?(?:\.\[?[\w]+\]?)?"
42
+
43
+
44
+ def _strip_comments(sql: str) -> str:
45
+ sql = re.sub(r"/\*.*?\*/", "", sql, flags=re.DOTALL)
46
+ sql = re.sub(r"--[^\n]*", "", sql)
47
+ return sql.strip()
48
+
49
+
50
+ def _split_statements(sql: str) -> list[str]:
51
+ parts = re.split(r";\s*", sql)
52
+ return [p.strip() for p in parts if p.strip()]
53
+
54
+
55
+ def _detect_operation(sql: str) -> SqlOperation:
56
+ m = re.match(r"\s*([A-Za-z]+)", sql)
57
+ if not m:
58
+ return SqlOperation.UNKNOWN
59
+ return _OP_MAP.get(m.group(1).upper(), SqlOperation.UNKNOWN)
60
+
61
+
62
+ def _extract_tables(sql: str, operation: SqlOperation) -> list[str]:
63
+ tables: list[str] = []
64
+ tables.extend(re.findall(rf"\bFROM\s+({_IDENT})", sql, re.IGNORECASE))
65
+ tables.extend(re.findall(rf"\bJOIN\s+({_IDENT})", sql, re.IGNORECASE))
66
+
67
+ if operation == SqlOperation.UPDATE:
68
+ m = re.search(rf"\bUPDATE\s+({_IDENT})", sql, re.IGNORECASE)
69
+ if m:
70
+ tables.append(m.group(1))
71
+ elif operation == SqlOperation.INSERT:
72
+ m = re.search(rf"\bINTO\s+({_IDENT})", sql, re.IGNORECASE)
73
+ if m:
74
+ tables.append(m.group(1))
75
+ elif operation == SqlOperation.DELETE:
76
+ m = re.search(rf"\bDELETE\s+(?:FROM\s+)?({_IDENT})", sql, re.IGNORECASE)
77
+ if m:
78
+ tables.append(m.group(1))
79
+ elif operation == SqlOperation.MERGE:
80
+ m = re.search(rf"\bMERGE\s+(?:INTO\s+)?({_IDENT})", sql, re.IGNORECASE)
81
+ if m:
82
+ tables.append(m.group(1))
83
+ elif operation in (SqlOperation.TRUNCATE, SqlOperation.DROP, SqlOperation.ALTER):
84
+ m = re.search(rf"\b(?:TABLE|VIEW)\s+({_IDENT})", sql, re.IGNORECASE)
85
+ if m:
86
+ tables.append(m.group(1))
87
+
88
+ seen = set()
89
+ out = []
90
+ for t in tables:
91
+ if t.lower() not in seen:
92
+ seen.add(t.lower())
93
+ out.append(t)
94
+ return out
95
+
96
+
97
+ def _compute_risk(
98
+ operation: SqlOperation, has_where: bool, is_multi: bool,
99
+ ) -> RiskLevel:
100
+ if operation in (SqlOperation.DROP, SqlOperation.TRUNCATE):
101
+ return RiskLevel.CRITICAL
102
+ if operation == SqlOperation.ALTER:
103
+ return RiskLevel.HIGH
104
+ if operation == SqlOperation.DELETE:
105
+ return RiskLevel.HIGH if not has_where else RiskLevel.MEDIUM
106
+ if operation in (SqlOperation.UPDATE, SqlOperation.MERGE):
107
+ return RiskLevel.HIGH if not has_where else RiskLevel.MEDIUM
108
+ if operation in (SqlOperation.INSERT, SqlOperation.CREATE,
109
+ SqlOperation.EXEC, SqlOperation.EXECUTE):
110
+ return RiskLevel.MEDIUM
111
+ if is_multi:
112
+ return RiskLevel.MEDIUM
113
+ return RiskLevel.LOW
114
+
115
+
116
+ _SQL_KEYWORDS = (
117
+ "select", "insert", "update", "delete", "truncate", "create",
118
+ "alter", "drop", "exec", "execute", "merge", "with", "from", "where",
119
+ "join",
120
+ )
121
+
122
+
123
+ def _looks_sql_like(text: str) -> bool:
124
+ """Heuristic: does this look like SQL rather than a natural-language ask?"""
125
+ if not text:
126
+ return False
127
+ lowered = text.lower()
128
+ if not any(kw in lowered for kw in _SQL_KEYWORDS):
129
+ return False
130
+ words = [w for w in re.split(r"\s+", lowered) if w]
131
+ if not words:
132
+ return False
133
+ punct_ratio = sum(1 for ch in text if ch in ",();.=*") / max(len(text), 1)
134
+ if words[0] in _SQL_KEYWORDS:
135
+ return True
136
+ return punct_ratio >= 0.02
137
+
138
+
139
+ def _has_unqualified(tables: list[str]) -> bool:
140
+ return any("." not in t.strip("[]") for t in tables)
141
+
142
+
143
+ _DYNAMIC_PAT = re.compile(
144
+ r"\b(EXEC(?:UTE)?\s*\(|sp_executesql)\b", re.IGNORECASE,
145
+ )
146
+ _CTE_PAT = re.compile(r"^\s*WITH\b", re.IGNORECASE)
147
+
148
+
149
+ def analyze_sql(sql: str) -> SqlIntent:
150
+ clean = _strip_comments(sql)
151
+ statements = _split_statements(clean)
152
+ is_multi = len(statements) > 1
153
+ first = statements[0] if statements else ""
154
+
155
+ operation = _detect_operation(first)
156
+ has_where = bool(re.search(r"\bWHERE\b", first, re.IGNORECASE))
157
+ has_top = bool(re.search(r"\bTOP\b", first, re.IGNORECASE))
158
+ tables = _extract_tables(first, operation)
159
+ risk = _compute_risk(operation, has_where, is_multi)
160
+
161
+ is_sql_like = _looks_sql_like(clean)
162
+ contains_cte = bool(_CTE_PAT.match(first))
163
+ contains_dynamic = bool(_DYNAMIC_PAT.search(first))
164
+ unqualified = _has_unqualified(tables)
165
+
166
+ if operation == SqlOperation.UNKNOWN:
167
+ confidence = 0.1 if not is_sql_like else 0.4
168
+ elif unqualified and operation != SqlOperation.SELECT:
169
+ confidence = 0.6
170
+ elif contains_dynamic:
171
+ confidence = 0.5
172
+ else:
173
+ confidence = 0.9
174
+
175
+ requires_discovery = (
176
+ not is_sql_like
177
+ or operation == SqlOperation.UNKNOWN
178
+ )
179
+
180
+ return SqlIntent(
181
+ primary_operation=operation,
182
+ has_where_clause=has_where,
183
+ has_top_clause=has_top,
184
+ affected_tables=tables,
185
+ risk_level=risk,
186
+ is_multi_statement=is_multi,
187
+ statement_count=len(statements),
188
+ is_sql_like=is_sql_like,
189
+ confidence=confidence,
190
+ requires_discovery=requires_discovery,
191
+ has_unqualified_tables=unqualified,
192
+ contains_dynamic_sql=contains_dynamic,
193
+ contains_cte=contains_cte,
194
+ )