sqlserver-semantic-mcp 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlserver_semantic_mcp/__init__.py +1 -0
- sqlserver_semantic_mcp/config.py +78 -0
- sqlserver_semantic_mcp/domain/__init__.py +0 -0
- sqlserver_semantic_mcp/domain/enums.py +48 -0
- sqlserver_semantic_mcp/domain/models/__init__.py +0 -0
- sqlserver_semantic_mcp/domain/models/column.py +14 -0
- sqlserver_semantic_mcp/domain/models/object.py +13 -0
- sqlserver_semantic_mcp/domain/models/relationship.py +11 -0
- sqlserver_semantic_mcp/domain/models/table.py +29 -0
- sqlserver_semantic_mcp/infrastructure/__init__.py +0 -0
- sqlserver_semantic_mcp/infrastructure/background.py +59 -0
- sqlserver_semantic_mcp/infrastructure/cache/__init__.py +0 -0
- sqlserver_semantic_mcp/infrastructure/cache/semantic.py +132 -0
- sqlserver_semantic_mcp/infrastructure/cache/store.py +152 -0
- sqlserver_semantic_mcp/infrastructure/cache/structural.py +203 -0
- sqlserver_semantic_mcp/infrastructure/connection.py +78 -0
- sqlserver_semantic_mcp/infrastructure/queries/__init__.py +0 -0
- sqlserver_semantic_mcp/infrastructure/queries/comment_queries.py +18 -0
- sqlserver_semantic_mcp/infrastructure/queries/metadata_queries.py +70 -0
- sqlserver_semantic_mcp/infrastructure/queries/object_queries.py +15 -0
- sqlserver_semantic_mcp/main.py +90 -0
- sqlserver_semantic_mcp/policy/__init__.py +0 -0
- sqlserver_semantic_mcp/policy/analyzer.py +194 -0
- sqlserver_semantic_mcp/policy/enforcer.py +104 -0
- sqlserver_semantic_mcp/policy/intents/__init__.py +16 -0
- sqlserver_semantic_mcp/policy/intents/ast_analyzer.py +24 -0
- sqlserver_semantic_mcp/policy/intents/base.py +17 -0
- sqlserver_semantic_mcp/policy/intents/regex_analyzer.py +11 -0
- sqlserver_semantic_mcp/policy/intents/router.py +21 -0
- sqlserver_semantic_mcp/policy/loader.py +90 -0
- sqlserver_semantic_mcp/policy/models.py +43 -0
- sqlserver_semantic_mcp/server/__init__.py +0 -0
- sqlserver_semantic_mcp/server/app.py +125 -0
- sqlserver_semantic_mcp/server/compact.py +74 -0
- sqlserver_semantic_mcp/server/prompts/__init__.py +5 -0
- sqlserver_semantic_mcp/server/prompts/analysis.py +56 -0
- sqlserver_semantic_mcp/server/prompts/discovery.py +55 -0
- sqlserver_semantic_mcp/server/prompts/execution.py +64 -0
- sqlserver_semantic_mcp/server/prompts/registry.py +41 -0
- sqlserver_semantic_mcp/server/resources/__init__.py +1 -0
- sqlserver_semantic_mcp/server/resources/schema.py +144 -0
- sqlserver_semantic_mcp/server/tools/__init__.py +42 -0
- sqlserver_semantic_mcp/server/tools/cache.py +24 -0
- sqlserver_semantic_mcp/server/tools/metadata.py +167 -0
- sqlserver_semantic_mcp/server/tools/metrics.py +44 -0
- sqlserver_semantic_mcp/server/tools/object_tool.py +113 -0
- sqlserver_semantic_mcp/server/tools/policy.py +48 -0
- sqlserver_semantic_mcp/server/tools/query.py +159 -0
- sqlserver_semantic_mcp/server/tools/relationship.py +104 -0
- sqlserver_semantic_mcp/server/tools/semantic.py +112 -0
- sqlserver_semantic_mcp/server/tools/shape.py +204 -0
- sqlserver_semantic_mcp/server/tools/workflow.py +307 -0
- sqlserver_semantic_mcp/services/__init__.py +0 -0
- sqlserver_semantic_mcp/services/metadata_service.py +173 -0
- sqlserver_semantic_mcp/services/metrics_service.py +124 -0
- sqlserver_semantic_mcp/services/object_service.py +187 -0
- sqlserver_semantic_mcp/services/policy_service.py +59 -0
- sqlserver_semantic_mcp/services/query_service.py +321 -0
- sqlserver_semantic_mcp/services/relationship_service.py +160 -0
- sqlserver_semantic_mcp/services/semantic_service.py +277 -0
- sqlserver_semantic_mcp/workflows/__init__.py +26 -0
- sqlserver_semantic_mcp/workflows/bundle.py +157 -0
- sqlserver_semantic_mcp/workflows/contracts.py +64 -0
- sqlserver_semantic_mcp/workflows/discovery_flow.py +116 -0
- sqlserver_semantic_mcp/workflows/facade.py +117 -0
- sqlserver_semantic_mcp/workflows/query_flow.py +120 -0
- sqlserver_semantic_mcp/workflows/recommendations.py +161 -0
- sqlserver_semantic_mcp/workflows/router.py +59 -0
- sqlserver_semantic_mcp-0.5.0.dist-info/METADATA +679 -0
- sqlserver_semantic_mcp-0.5.0.dist-info/RECORD +74 -0
- sqlserver_semantic_mcp-0.5.0.dist-info/WHEEL +5 -0
- sqlserver_semantic_mcp-0.5.0.dist-info/entry_points.txt +2 -0
- sqlserver_semantic_mcp-0.5.0.dist-info/licenses/LICENSE +21 -0
- sqlserver_semantic_mcp-0.5.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
from collections import deque
|
|
3
|
+
import aiosqlite
|
|
4
|
+
|
|
5
|
+
from ..infrastructure.cache.structural import read_schema_version
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
_GRAPH_CACHE: dict[tuple[str, str, str], dict[tuple[str, str], list[dict]]] = {}
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
async def get_table_relationships(
|
|
12
|
+
db_path: str, database: str, schema: str, table: str,
|
|
13
|
+
) -> list[dict]:
|
|
14
|
+
async with aiosqlite.connect(db_path) as db:
|
|
15
|
+
db.row_factory = aiosqlite.Row
|
|
16
|
+
cur = await db.execute(
|
|
17
|
+
"SELECT schema_name, table_name, column_name, "
|
|
18
|
+
" ref_schema, ref_table, ref_column "
|
|
19
|
+
"FROM sc_foreign_keys "
|
|
20
|
+
"WHERE database_name=? AND schema_name=? AND table_name=?",
|
|
21
|
+
(database, schema, table),
|
|
22
|
+
)
|
|
23
|
+
outbound = [dict(r) for r in await cur.fetchall()]
|
|
24
|
+
|
|
25
|
+
cur = await db.execute(
|
|
26
|
+
"SELECT schema_name, table_name, column_name, "
|
|
27
|
+
" ref_schema, ref_table, ref_column "
|
|
28
|
+
"FROM sc_foreign_keys "
|
|
29
|
+
"WHERE database_name=? AND ref_schema=? AND ref_table=?",
|
|
30
|
+
(database, schema, table),
|
|
31
|
+
)
|
|
32
|
+
inbound = [dict(r) for r in await cur.fetchall()]
|
|
33
|
+
|
|
34
|
+
results = []
|
|
35
|
+
for r in outbound:
|
|
36
|
+
results.append({
|
|
37
|
+
"direction": "outbound",
|
|
38
|
+
"from_schema": r["schema_name"], "from_table": r["table_name"],
|
|
39
|
+
"from_column": r["column_name"],
|
|
40
|
+
"to_schema": r["ref_schema"], "to_table": r["ref_table"],
|
|
41
|
+
"to_column": r["ref_column"],
|
|
42
|
+
"type": "many_to_one",
|
|
43
|
+
})
|
|
44
|
+
for r in inbound:
|
|
45
|
+
results.append({
|
|
46
|
+
"direction": "inbound",
|
|
47
|
+
"from_schema": r["schema_name"], "from_table": r["table_name"],
|
|
48
|
+
"from_column": r["column_name"],
|
|
49
|
+
"to_schema": r["ref_schema"], "to_table": r["ref_table"],
|
|
50
|
+
"to_column": r["ref_column"],
|
|
51
|
+
"type": "one_to_many",
|
|
52
|
+
})
|
|
53
|
+
return results
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
async def _load_fk_graph(
|
|
57
|
+
db_path: str, database: str,
|
|
58
|
+
) -> dict[tuple[str, str], list[dict]]:
|
|
59
|
+
ver = await read_schema_version(db_path, database)
|
|
60
|
+
structural_hash = ver["structural_hash"] if ver else ""
|
|
61
|
+
cache_key = (db_path, database, structural_hash)
|
|
62
|
+
cached = _GRAPH_CACHE.get(cache_key)
|
|
63
|
+
if cached is not None:
|
|
64
|
+
return cached
|
|
65
|
+
|
|
66
|
+
graph: dict[tuple[str, str], list[dict]] = {}
|
|
67
|
+
async with aiosqlite.connect(db_path) as db:
|
|
68
|
+
db.row_factory = aiosqlite.Row
|
|
69
|
+
cur = await db.execute(
|
|
70
|
+
"SELECT schema_name, table_name, column_name, "
|
|
71
|
+
" ref_schema, ref_table, ref_column "
|
|
72
|
+
"FROM sc_foreign_keys WHERE database_name=?",
|
|
73
|
+
(database,),
|
|
74
|
+
)
|
|
75
|
+
for r in await cur.fetchall():
|
|
76
|
+
src = (r["schema_name"], r["table_name"])
|
|
77
|
+
dst = (r["ref_schema"], r["ref_table"])
|
|
78
|
+
graph.setdefault(src, []).append({
|
|
79
|
+
"from_schema": src[0], "from_table": src[1],
|
|
80
|
+
"from_column": r["column_name"],
|
|
81
|
+
"to_schema": dst[0], "to_table": dst[1],
|
|
82
|
+
"to_column": r["ref_column"],
|
|
83
|
+
"direction": "outbound",
|
|
84
|
+
})
|
|
85
|
+
graph.setdefault(dst, []).append({
|
|
86
|
+
"from_schema": dst[0], "from_table": dst[1],
|
|
87
|
+
"from_column": r["ref_column"],
|
|
88
|
+
"to_schema": src[0], "to_table": src[1],
|
|
89
|
+
"to_column": r["column_name"],
|
|
90
|
+
"direction": "inbound",
|
|
91
|
+
})
|
|
92
|
+
stale_keys = [
|
|
93
|
+
key for key in _GRAPH_CACHE
|
|
94
|
+
if key[:2] == (db_path, database) and key != cache_key
|
|
95
|
+
]
|
|
96
|
+
for key in stale_keys:
|
|
97
|
+
_GRAPH_CACHE.pop(key, None)
|
|
98
|
+
_GRAPH_CACHE[cache_key] = graph
|
|
99
|
+
return graph
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
async def find_join_path(
|
|
103
|
+
db_path: str, database: str,
|
|
104
|
+
from_schema: str, from_table: str,
|
|
105
|
+
to_schema: str, to_table: str,
|
|
106
|
+
max_hops: int = 5,
|
|
107
|
+
) -> Optional[list[dict]]:
|
|
108
|
+
graph = await _load_fk_graph(db_path, database)
|
|
109
|
+
start = (from_schema, from_table)
|
|
110
|
+
target = (to_schema, to_table)
|
|
111
|
+
if start == target:
|
|
112
|
+
return []
|
|
113
|
+
|
|
114
|
+
queue = deque([(start, [])])
|
|
115
|
+
visited = {start}
|
|
116
|
+
while queue:
|
|
117
|
+
node, path = queue.popleft()
|
|
118
|
+
if len(path) >= max_hops:
|
|
119
|
+
continue
|
|
120
|
+
for edge in graph.get(node, []):
|
|
121
|
+
nxt = (edge["to_schema"], edge["to_table"])
|
|
122
|
+
if nxt in visited:
|
|
123
|
+
continue
|
|
124
|
+
new_path = path + [edge]
|
|
125
|
+
if nxt == target:
|
|
126
|
+
return new_path
|
|
127
|
+
visited.add(nxt)
|
|
128
|
+
queue.append((nxt, new_path))
|
|
129
|
+
return None
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
async def get_dependency_chain(
|
|
133
|
+
db_path: str, database: str, schema: str, table: str,
|
|
134
|
+
max_depth: int = 10,
|
|
135
|
+
*, schemas: Optional[list[str]] = None,
|
|
136
|
+
) -> list[dict]:
|
|
137
|
+
graph = await _load_fk_graph(db_path, database)
|
|
138
|
+
start = (schema, table)
|
|
139
|
+
visited: dict[tuple[str, str], int] = {start: 0}
|
|
140
|
+
queue = deque([start])
|
|
141
|
+
allowed = set(schemas) if schemas else None
|
|
142
|
+
|
|
143
|
+
while queue:
|
|
144
|
+
node = queue.popleft()
|
|
145
|
+
depth = visited[node]
|
|
146
|
+
if depth >= max_depth:
|
|
147
|
+
continue
|
|
148
|
+
for edge in graph.get(node, []):
|
|
149
|
+
nxt = (edge["to_schema"], edge["to_table"])
|
|
150
|
+
if nxt not in visited:
|
|
151
|
+
if allowed is not None and nxt[0] not in allowed:
|
|
152
|
+
continue
|
|
153
|
+
visited[nxt] = depth + 1
|
|
154
|
+
queue.append(nxt)
|
|
155
|
+
|
|
156
|
+
return [
|
|
157
|
+
{"schema_name": s, "table_name": t, "depth": d}
|
|
158
|
+
for (s, t), d in visited.items()
|
|
159
|
+
if (s, t) != start
|
|
160
|
+
]
|
|
@@ -0,0 +1,277 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import re
|
|
3
|
+
from typing import Optional
|
|
4
|
+
import aiosqlite
|
|
5
|
+
|
|
6
|
+
from ..infrastructure.cache.semantic import (
|
|
7
|
+
upsert_table_analysis, get_table_analysis,
|
|
8
|
+
)
|
|
9
|
+
from ..infrastructure.cache.structural import read_schema_version
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
_AUDIT_COL_PATTERNS = {
|
|
13
|
+
"audit_timestamp": re.compile(
|
|
14
|
+
r"^(created|updated|modified|deleted)(_?at|_?on|_?time|_?date)?$", re.I),
|
|
15
|
+
"audit_user": re.compile(
|
|
16
|
+
r"^(created|updated|modified|deleted)_?by$", re.I),
|
|
17
|
+
"soft_delete": re.compile(r"^(is_)?deleted$|^deleted_at$", re.I),
|
|
18
|
+
"status": re.compile(r"^(status|state)(_?code|_?id)?$", re.I),
|
|
19
|
+
"type": re.compile(r"^(type|category|kind)(_?code|_?id)?$", re.I),
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
_LOOKUP_NAME_COLS = {"code", "name", "label", "description", "value"}
|
|
23
|
+
|
|
24
|
+
_COMMON_FILTER_SEMANTICS = frozenset({
|
|
25
|
+
"status", "type", "audit_timestamp", "soft_delete",
|
|
26
|
+
})
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
async def _load_table_structure(
|
|
30
|
+
db_path: str, database: str, schema: str, table: str,
|
|
31
|
+
) -> Optional[dict]:
|
|
32
|
+
async with aiosqlite.connect(db_path) as db:
|
|
33
|
+
db.row_factory = aiosqlite.Row
|
|
34
|
+
cur = await db.execute(
|
|
35
|
+
"SELECT 1 FROM sc_tables WHERE database_name=? "
|
|
36
|
+
"AND schema_name=? AND table_name=?",
|
|
37
|
+
(database, schema, table),
|
|
38
|
+
)
|
|
39
|
+
if not await cur.fetchone():
|
|
40
|
+
return None
|
|
41
|
+
|
|
42
|
+
cur = await db.execute(
|
|
43
|
+
"SELECT column_name, data_type, max_length, is_nullable, ordinal_position "
|
|
44
|
+
"FROM sc_columns WHERE database_name=? AND schema_name=? AND table_name=? "
|
|
45
|
+
"ORDER BY ordinal_position",
|
|
46
|
+
(database, schema, table),
|
|
47
|
+
)
|
|
48
|
+
columns = [dict(r) for r in await cur.fetchall()]
|
|
49
|
+
|
|
50
|
+
cur = await db.execute(
|
|
51
|
+
"SELECT column_name FROM sc_primary_keys "
|
|
52
|
+
"WHERE database_name=? AND schema_name=? AND table_name=?",
|
|
53
|
+
(database, schema, table),
|
|
54
|
+
)
|
|
55
|
+
pk = [r["column_name"] for r in await cur.fetchall()]
|
|
56
|
+
|
|
57
|
+
cur = await db.execute(
|
|
58
|
+
"SELECT column_name, ref_schema, ref_table FROM sc_foreign_keys "
|
|
59
|
+
"WHERE database_name=? AND schema_name=? AND table_name=?",
|
|
60
|
+
(database, schema, table),
|
|
61
|
+
)
|
|
62
|
+
fks = [dict(r) for r in await cur.fetchall()]
|
|
63
|
+
|
|
64
|
+
return {"columns": columns, "primary_key": pk, "foreign_keys": fks}
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _column_semantic(col: dict) -> Optional[str]:
|
|
68
|
+
name = col["column_name"]
|
|
69
|
+
for sem, pat in _AUDIT_COL_PATTERNS.items():
|
|
70
|
+
if pat.match(name):
|
|
71
|
+
return sem
|
|
72
|
+
return None
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def _classify(struct: dict, table: str) -> dict:
|
|
76
|
+
cols = struct["columns"]
|
|
77
|
+
fks = struct["foreign_keys"]
|
|
78
|
+
col_names = [c["column_name"].lower() for c in cols]
|
|
79
|
+
|
|
80
|
+
reasons: list[str] = []
|
|
81
|
+
|
|
82
|
+
# Audit heuristic
|
|
83
|
+
audit_like = sum(1 for c in cols if _column_semantic(c) in (
|
|
84
|
+
"audit_timestamp", "audit_user"))
|
|
85
|
+
if audit_like >= 2 and len(cols) <= 6:
|
|
86
|
+
reasons.append(f"{audit_like} audit-style columns dominate")
|
|
87
|
+
return {"type": "audit", "confidence": 0.75, "reasons": reasons}
|
|
88
|
+
|
|
89
|
+
# Bridge: 2+ FKs and almost all columns are FKs
|
|
90
|
+
if len(fks) >= 2 and len(cols) <= len(fks) + 2:
|
|
91
|
+
reasons.append(f"{len(fks)} FKs over {len(cols)} columns")
|
|
92
|
+
return {"type": "bridge", "confidence": 0.8, "reasons": reasons}
|
|
93
|
+
|
|
94
|
+
# Fact: >= 2 FKs
|
|
95
|
+
if len(fks) >= 2:
|
|
96
|
+
reasons.append(f"{len(fks)} FKs")
|
|
97
|
+
return {"type": "fact", "confidence": 0.7, "reasons": reasons}
|
|
98
|
+
|
|
99
|
+
# Lookup: few columns, contains code/name-like column names
|
|
100
|
+
small = len(cols) <= 4
|
|
101
|
+
lookup_cols = sum(1 for n in col_names if n in _LOOKUP_NAME_COLS)
|
|
102
|
+
name_suggests_lookup = bool(re.search(
|
|
103
|
+
r"(status|code|type|category|kind|lookup)$", table, re.I,
|
|
104
|
+
))
|
|
105
|
+
if small and (lookup_cols >= 2 or name_suggests_lookup):
|
|
106
|
+
reasons.append("small row width + lookup-like columns/name")
|
|
107
|
+
return {"type": "lookup", "confidence": 0.75, "reasons": reasons}
|
|
108
|
+
|
|
109
|
+
# Dimension fallback
|
|
110
|
+
if len(fks) <= 1 and len(cols) >= 3:
|
|
111
|
+
reasons.append("few FKs with multiple descriptive columns")
|
|
112
|
+
return {"type": "dimension", "confidence": 0.5, "reasons": reasons}
|
|
113
|
+
|
|
114
|
+
return {"type": "unknown", "confidence": 0.2,
|
|
115
|
+
"reasons": reasons or ["no rule matched"]}
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
async def classify_table(
|
|
119
|
+
db_path: str, database: str, schema: str, table: str,
|
|
120
|
+
*, force: bool = False,
|
|
121
|
+
) -> dict:
|
|
122
|
+
ver = await read_schema_version(db_path, database)
|
|
123
|
+
structural_hash = ver["structural_hash"] if ver else ""
|
|
124
|
+
|
|
125
|
+
if not force:
|
|
126
|
+
cached = await get_table_analysis(db_path, database, schema, table)
|
|
127
|
+
if cached and cached["status"] == "ready" \
|
|
128
|
+
and cached.get("structural_hash") == structural_hash:
|
|
129
|
+
return cached["classification"]
|
|
130
|
+
|
|
131
|
+
struct = await _load_table_structure(db_path, database, schema, table)
|
|
132
|
+
if struct is None:
|
|
133
|
+
return {"type": "unknown", "confidence": 0.0,
|
|
134
|
+
"reasons": ["table not found"]}
|
|
135
|
+
|
|
136
|
+
classification = _classify(struct, table)
|
|
137
|
+
column_analysis = [
|
|
138
|
+
{"column": c["column_name"],
|
|
139
|
+
"semantic_type": _column_semantic(c) or "generic"}
|
|
140
|
+
for c in struct["columns"]
|
|
141
|
+
]
|
|
142
|
+
is_lookup = classification["type"] == "lookup"
|
|
143
|
+
|
|
144
|
+
await upsert_table_analysis(
|
|
145
|
+
db_path, database, schema, table,
|
|
146
|
+
structural_hash=structural_hash, status="ready",
|
|
147
|
+
classification=classification,
|
|
148
|
+
column_analysis=column_analysis,
|
|
149
|
+
is_lookup=is_lookup,
|
|
150
|
+
)
|
|
151
|
+
return classification
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
async def analyze_columns(
|
|
155
|
+
db_path: str, database: str, schema: str, table: str,
|
|
156
|
+
) -> list[dict]:
|
|
157
|
+
await classify_table(db_path, database, schema, table)
|
|
158
|
+
cached = await get_table_analysis(db_path, database, schema, table)
|
|
159
|
+
return cached.get("column_analysis", []) if cached else []
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
async def summarize_for_joining(
|
|
163
|
+
db_path: str, database: str, schema: str, table: str,
|
|
164
|
+
) -> Optional[dict]:
|
|
165
|
+
"""Return reasoning-ready info for joining against this table.
|
|
166
|
+
|
|
167
|
+
Shape: {table, pk, classification, join_candidates, common_filter_columns}
|
|
168
|
+
"""
|
|
169
|
+
struct = await _load_table_structure(db_path, database, schema, table)
|
|
170
|
+
if struct is None:
|
|
171
|
+
return None
|
|
172
|
+
|
|
173
|
+
join_candidates = [
|
|
174
|
+
{"via_column": fk["column_name"],
|
|
175
|
+
"to_table": f"{fk['ref_schema']}.{fk['ref_table']}"}
|
|
176
|
+
for fk in struct["foreign_keys"]
|
|
177
|
+
]
|
|
178
|
+
|
|
179
|
+
common_filter_columns = [
|
|
180
|
+
c["column_name"] for c in struct["columns"]
|
|
181
|
+
if (_column_semantic(c) or "") in _COMMON_FILTER_SEMANTICS
|
|
182
|
+
]
|
|
183
|
+
|
|
184
|
+
classification = _classify(struct, table)
|
|
185
|
+
|
|
186
|
+
return {
|
|
187
|
+
"table": f"{schema}.{table}",
|
|
188
|
+
"pk": struct["primary_key"],
|
|
189
|
+
"classification": classification["type"],
|
|
190
|
+
"join_candidates": join_candidates,
|
|
191
|
+
"common_filter_columns": common_filter_columns,
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
async def detect_lookup_tables(
|
|
196
|
+
db_path: str, database: str, *,
|
|
197
|
+
schemas: Optional[list[str]] = None,
|
|
198
|
+
keyword: Optional[str] = None,
|
|
199
|
+
confidence_min: float = 0.0,
|
|
200
|
+
) -> list[dict]:
|
|
201
|
+
ver = await read_schema_version(db_path, database)
|
|
202
|
+
current_hash = ver["structural_hash"] if ver else ""
|
|
203
|
+
|
|
204
|
+
results: list[dict] = []
|
|
205
|
+
need_classify: list[tuple[str, str]] = []
|
|
206
|
+
|
|
207
|
+
kw_lower = keyword.lower() if keyword else None
|
|
208
|
+
|
|
209
|
+
def passes_filter(s: str, t: str) -> bool:
|
|
210
|
+
if schemas and s not in schemas:
|
|
211
|
+
return False
|
|
212
|
+
if kw_lower and kw_lower not in f"{s}.{t}".lower():
|
|
213
|
+
return False
|
|
214
|
+
return True
|
|
215
|
+
|
|
216
|
+
async with aiosqlite.connect(db_path) as db:
|
|
217
|
+
db.row_factory = aiosqlite.Row
|
|
218
|
+
cur = await db.execute(
|
|
219
|
+
"SELECT schema_name, table_name FROM sc_tables "
|
|
220
|
+
"WHERE database_name=?",
|
|
221
|
+
(database,),
|
|
222
|
+
)
|
|
223
|
+
all_tables = [(r["schema_name"], r["table_name"])
|
|
224
|
+
for r in await cur.fetchall()
|
|
225
|
+
if passes_filter(r["schema_name"], r["table_name"])]
|
|
226
|
+
|
|
227
|
+
# Fast path: read ready+fresh lookup rows from cache
|
|
228
|
+
cur = await db.execute(
|
|
229
|
+
"SELECT schema_name, table_name, classification FROM sem_table_analysis "
|
|
230
|
+
"WHERE database_name=? AND status='ready' "
|
|
231
|
+
"AND structural_hash=? AND is_lookup=1",
|
|
232
|
+
(database, current_hash),
|
|
233
|
+
)
|
|
234
|
+
cached_hits = {
|
|
235
|
+
(r["schema_name"], r["table_name"]):
|
|
236
|
+
json.loads(r["classification"]) if r["classification"] else None
|
|
237
|
+
for r in await cur.fetchall()
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
# Tables whose analysis is missing / stale / non-lookup-ready
|
|
241
|
+
cur = await db.execute(
|
|
242
|
+
"SELECT schema_name, table_name, status, structural_hash "
|
|
243
|
+
"FROM sem_table_analysis "
|
|
244
|
+
"WHERE database_name=?",
|
|
245
|
+
(database,),
|
|
246
|
+
)
|
|
247
|
+
cache_state = {
|
|
248
|
+
(r["schema_name"], r["table_name"]):
|
|
249
|
+
(r["status"], r["structural_hash"])
|
|
250
|
+
for r in await cur.fetchall()
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
for (s, t) in all_tables:
|
|
254
|
+
if (s, t) in cached_hits:
|
|
255
|
+
cls = cached_hits[(s, t)] or {"confidence": 0.75}
|
|
256
|
+
conf = cls.get("confidence", 0.75)
|
|
257
|
+
if conf >= confidence_min:
|
|
258
|
+
results.append({
|
|
259
|
+
"schema_name": s, "table_name": t,
|
|
260
|
+
"confidence": conf,
|
|
261
|
+
})
|
|
262
|
+
continue
|
|
263
|
+
state = cache_state.get((s, t))
|
|
264
|
+
# Needs classification if: no row, dirty/pending, or hash mismatch
|
|
265
|
+
if state is None or state[0] != "ready" or state[1] != current_hash:
|
|
266
|
+
need_classify.append((s, t))
|
|
267
|
+
|
|
268
|
+
for (s, t) in need_classify:
|
|
269
|
+
c = await classify_table(db_path, database, s, t)
|
|
270
|
+
if c.get("type") == "lookup":
|
|
271
|
+
conf = c.get("confidence", 0.75)
|
|
272
|
+
if conf >= confidence_min:
|
|
273
|
+
results.append({
|
|
274
|
+
"schema_name": s, "table_name": t,
|
|
275
|
+
"confidence": conf,
|
|
276
|
+
})
|
|
277
|
+
return results
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"""Agent-oriented workflow layer.
|
|
2
|
+
|
|
3
|
+
Sits between ``server/tools`` and ``services``. Responsible for
|
|
4
|
+
route decision, handoff contracts, context bundling, and the
|
|
5
|
+
direct-execution fast path.
|
|
6
|
+
"""
|
|
7
|
+
from .contracts import ToolEnvelope, RouteDecision, Route
|
|
8
|
+
from .router import route_query
|
|
9
|
+
from .query_flow import plan_or_execute_query
|
|
10
|
+
from .discovery_flow import discover_relevant_tables
|
|
11
|
+
from .bundle import bundle_context_for_next_step
|
|
12
|
+
from .recommendations import suggest_next_tool, estimate_execution_risk
|
|
13
|
+
from .facade import WorkflowFacade
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
"ToolEnvelope",
|
|
17
|
+
"RouteDecision",
|
|
18
|
+
"Route",
|
|
19
|
+
"route_query",
|
|
20
|
+
"plan_or_execute_query",
|
|
21
|
+
"discover_relevant_tables",
|
|
22
|
+
"bundle_context_for_next_step",
|
|
23
|
+
"suggest_next_tool",
|
|
24
|
+
"estimate_execution_risk",
|
|
25
|
+
"WorkflowFacade",
|
|
26
|
+
]
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
"""Bundle prior tool results into a compact handoff for the next step."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from typing import Optional
|
|
5
|
+
|
|
6
|
+
from ..config import Config, get_config
|
|
7
|
+
from ..services import metadata_service, object_service, semantic_service
|
|
8
|
+
from ..services.semantic_service import _column_semantic
|
|
9
|
+
from .contracts import ToolEnvelope
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
_JOIN_IMPORTANT = 6
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
async def _table_summary_for_joining(
|
|
16
|
+
db_path: str, database: str, schema: str, table: str,
|
|
17
|
+
) -> Optional[dict]:
|
|
18
|
+
full = await metadata_service.describe_table(db_path, database, schema, table)
|
|
19
|
+
if full is None:
|
|
20
|
+
return None
|
|
21
|
+
cls = await semantic_service.classify_table(db_path, database, schema, table)
|
|
22
|
+
pk = full.get("primary_key", []) or []
|
|
23
|
+
fks = full.get("foreign_keys", []) or []
|
|
24
|
+
|
|
25
|
+
important: list[str] = []
|
|
26
|
+
seen: set[str] = set()
|
|
27
|
+
|
|
28
|
+
def push(name: Optional[str]) -> None:
|
|
29
|
+
if not name or name in seen:
|
|
30
|
+
return
|
|
31
|
+
seen.add(name)
|
|
32
|
+
important.append(name)
|
|
33
|
+
|
|
34
|
+
for col in pk:
|
|
35
|
+
push(col)
|
|
36
|
+
for fk in fks:
|
|
37
|
+
push(fk.get("column_name"))
|
|
38
|
+
for c in full.get("columns", []):
|
|
39
|
+
if len(important) >= _JOIN_IMPORTANT:
|
|
40
|
+
break
|
|
41
|
+
sem = _column_semantic(c)
|
|
42
|
+
if sem and sem != "generic":
|
|
43
|
+
push(c["column_name"])
|
|
44
|
+
for c in full.get("columns", []):
|
|
45
|
+
if len(important) >= _JOIN_IMPORTANT:
|
|
46
|
+
break
|
|
47
|
+
push(c["column_name"])
|
|
48
|
+
|
|
49
|
+
fk_edges = [
|
|
50
|
+
{
|
|
51
|
+
"via_column": fk.get("column_name"),
|
|
52
|
+
"to_table": f"{fk.get('ref_schema')}.{fk.get('ref_table')}",
|
|
53
|
+
"to_column": fk.get("ref_column"),
|
|
54
|
+
}
|
|
55
|
+
for fk in fks
|
|
56
|
+
]
|
|
57
|
+
|
|
58
|
+
return {
|
|
59
|
+
"table": f"{schema}.{table}",
|
|
60
|
+
"classification": cls.get("type", "unknown"),
|
|
61
|
+
"pk": list(pk),
|
|
62
|
+
"important_columns": important[:_JOIN_IMPORTANT],
|
|
63
|
+
"fk_edges": fk_edges,
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
async def _object_summary_for_impact(
|
|
68
|
+
schema: str,
|
|
69
|
+
object_name: str,
|
|
70
|
+
object_type: str,
|
|
71
|
+
cfg: Config,
|
|
72
|
+
) -> Optional[dict]:
|
|
73
|
+
obj = await object_service.describe_object(
|
|
74
|
+
schema, object_name, object_type, cfg,
|
|
75
|
+
)
|
|
76
|
+
if not obj:
|
|
77
|
+
return None
|
|
78
|
+
return {
|
|
79
|
+
"object": f"{schema}.{object_name}",
|
|
80
|
+
"type": object_type,
|
|
81
|
+
"reads": list(obj.get("read_tables", []) or []),
|
|
82
|
+
"writes": list(obj.get("write_tables", []) or []),
|
|
83
|
+
"depends_on": list(obj.get("dependencies", []) or []),
|
|
84
|
+
"status": obj.get("status"),
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
async def bundle_context_for_next_step(
|
|
89
|
+
items: list[dict],
|
|
90
|
+
*,
|
|
91
|
+
goal: str = "joining",
|
|
92
|
+
detail: str = "brief",
|
|
93
|
+
cfg: Optional[Config] = None,
|
|
94
|
+
) -> dict:
|
|
95
|
+
"""Compress prior discoveries into the minimum context the next
|
|
96
|
+
tool needs. Supported goals: ``joining``, ``object_impact``.
|
|
97
|
+
"""
|
|
98
|
+
cfg = cfg or get_config()
|
|
99
|
+
db_path = cfg.cache_path
|
|
100
|
+
database = cfg.mssql_database
|
|
101
|
+
|
|
102
|
+
if goal == "joining":
|
|
103
|
+
tables: list[dict] = []
|
|
104
|
+
for item in items or []:
|
|
105
|
+
if item.get("kind") != "table":
|
|
106
|
+
continue
|
|
107
|
+
schema = item["schema"]
|
|
108
|
+
table = item["table"]
|
|
109
|
+
summary = await _table_summary_for_joining(
|
|
110
|
+
db_path, database, schema, table,
|
|
111
|
+
)
|
|
112
|
+
if summary is not None:
|
|
113
|
+
tables.append(summary)
|
|
114
|
+
return ToolEnvelope(
|
|
115
|
+
kind="bundle_context_for_next_step",
|
|
116
|
+
detail=detail,
|
|
117
|
+
next_action="find_or_score_join",
|
|
118
|
+
recommended_tool="score_join_candidate",
|
|
119
|
+
bundle_key="joining",
|
|
120
|
+
data={
|
|
121
|
+
"bundle_type": "joining",
|
|
122
|
+
"tables": tables,
|
|
123
|
+
},
|
|
124
|
+
).to_dict()
|
|
125
|
+
|
|
126
|
+
if goal == "object_impact":
|
|
127
|
+
objects: list[dict] = []
|
|
128
|
+
for item in items or []:
|
|
129
|
+
if item.get("kind") != "object":
|
|
130
|
+
continue
|
|
131
|
+
summary = await _object_summary_for_impact(
|
|
132
|
+
item["schema"], item["object_name"], item["object_type"], cfg,
|
|
133
|
+
)
|
|
134
|
+
if summary is not None:
|
|
135
|
+
objects.append(summary)
|
|
136
|
+
return ToolEnvelope(
|
|
137
|
+
kind="bundle_context_for_next_step",
|
|
138
|
+
detail=detail,
|
|
139
|
+
next_action="trace_impact",
|
|
140
|
+
recommended_tool="trace_object_dependencies",
|
|
141
|
+
bundle_key="object_impact",
|
|
142
|
+
data={
|
|
143
|
+
"bundle_type": "object_impact",
|
|
144
|
+
"objects": objects,
|
|
145
|
+
},
|
|
146
|
+
).to_dict()
|
|
147
|
+
|
|
148
|
+
return ToolEnvelope(
|
|
149
|
+
kind="bundle_context_for_next_step",
|
|
150
|
+
detail=detail,
|
|
151
|
+
next_action="none",
|
|
152
|
+
data={
|
|
153
|
+
"bundle_type": goal,
|
|
154
|
+
"error": f"unsupported goal '{goal}'",
|
|
155
|
+
"supported_goals": ["joining", "object_impact"],
|
|
156
|
+
},
|
|
157
|
+
).to_dict()
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
"""Handoff contracts for workflow tools.
|
|
2
|
+
|
|
3
|
+
All workflow tools return structured envelopes so a downstream agent
|
|
4
|
+
knows what to do next without re-parsing payloads.
|
|
5
|
+
"""
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
from dataclasses import dataclass, field
|
|
9
|
+
from typing import Any, Literal, Optional
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
Route = Literal[
|
|
13
|
+
"direct_execute",
|
|
14
|
+
"direct_validate",
|
|
15
|
+
"discovery",
|
|
16
|
+
"object_analysis",
|
|
17
|
+
"policy_only",
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class RouteDecision:
|
|
23
|
+
route: Route
|
|
24
|
+
reason: str
|
|
25
|
+
recommended_tools: list[str] = field(default_factory=list)
|
|
26
|
+
confidence: float = 1.0
|
|
27
|
+
|
|
28
|
+
def to_dict(self) -> dict:
|
|
29
|
+
return {
|
|
30
|
+
"route": self.route,
|
|
31
|
+
"reason": self.reason,
|
|
32
|
+
"recommended_tools": list(self.recommended_tools),
|
|
33
|
+
"confidence": self.confidence,
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@dataclass
|
|
38
|
+
class ToolEnvelope:
|
|
39
|
+
"""Uniform envelope returned by workflow tools.
|
|
40
|
+
|
|
41
|
+
``data`` carries the tool-specific payload. Top-level fields are the
|
|
42
|
+
agent-visible routing cues.
|
|
43
|
+
"""
|
|
44
|
+
kind: str
|
|
45
|
+
detail: str = "brief"
|
|
46
|
+
confidence: Optional[float] = None
|
|
47
|
+
next_action: Optional[str] = None
|
|
48
|
+
recommended_tool: Optional[str] = None
|
|
49
|
+
bundle_key: Optional[str] = None
|
|
50
|
+
data: Any = None
|
|
51
|
+
|
|
52
|
+
def to_dict(self) -> dict:
|
|
53
|
+
out: dict[str, Any] = {"kind": self.kind, "detail": self.detail}
|
|
54
|
+
if self.confidence is not None:
|
|
55
|
+
out["confidence"] = self.confidence
|
|
56
|
+
if self.next_action is not None:
|
|
57
|
+
out["next_action"] = self.next_action
|
|
58
|
+
if self.recommended_tool is not None:
|
|
59
|
+
out["recommended_tool"] = self.recommended_tool
|
|
60
|
+
if self.bundle_key is not None:
|
|
61
|
+
out["bundle_key"] = self.bundle_key
|
|
62
|
+
if self.data is not None:
|
|
63
|
+
out["data"] = self.data
|
|
64
|
+
return out
|