sqlserver-semantic-mcp 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. sqlserver_semantic_mcp/__init__.py +1 -0
  2. sqlserver_semantic_mcp/config.py +78 -0
  3. sqlserver_semantic_mcp/domain/__init__.py +0 -0
  4. sqlserver_semantic_mcp/domain/enums.py +48 -0
  5. sqlserver_semantic_mcp/domain/models/__init__.py +0 -0
  6. sqlserver_semantic_mcp/domain/models/column.py +14 -0
  7. sqlserver_semantic_mcp/domain/models/object.py +13 -0
  8. sqlserver_semantic_mcp/domain/models/relationship.py +11 -0
  9. sqlserver_semantic_mcp/domain/models/table.py +29 -0
  10. sqlserver_semantic_mcp/infrastructure/__init__.py +0 -0
  11. sqlserver_semantic_mcp/infrastructure/background.py +59 -0
  12. sqlserver_semantic_mcp/infrastructure/cache/__init__.py +0 -0
  13. sqlserver_semantic_mcp/infrastructure/cache/semantic.py +132 -0
  14. sqlserver_semantic_mcp/infrastructure/cache/store.py +152 -0
  15. sqlserver_semantic_mcp/infrastructure/cache/structural.py +203 -0
  16. sqlserver_semantic_mcp/infrastructure/connection.py +78 -0
  17. sqlserver_semantic_mcp/infrastructure/queries/__init__.py +0 -0
  18. sqlserver_semantic_mcp/infrastructure/queries/comment_queries.py +18 -0
  19. sqlserver_semantic_mcp/infrastructure/queries/metadata_queries.py +70 -0
  20. sqlserver_semantic_mcp/infrastructure/queries/object_queries.py +15 -0
  21. sqlserver_semantic_mcp/main.py +90 -0
  22. sqlserver_semantic_mcp/policy/__init__.py +0 -0
  23. sqlserver_semantic_mcp/policy/analyzer.py +194 -0
  24. sqlserver_semantic_mcp/policy/enforcer.py +104 -0
  25. sqlserver_semantic_mcp/policy/intents/__init__.py +16 -0
  26. sqlserver_semantic_mcp/policy/intents/ast_analyzer.py +24 -0
  27. sqlserver_semantic_mcp/policy/intents/base.py +17 -0
  28. sqlserver_semantic_mcp/policy/intents/regex_analyzer.py +11 -0
  29. sqlserver_semantic_mcp/policy/intents/router.py +21 -0
  30. sqlserver_semantic_mcp/policy/loader.py +90 -0
  31. sqlserver_semantic_mcp/policy/models.py +43 -0
  32. sqlserver_semantic_mcp/server/__init__.py +0 -0
  33. sqlserver_semantic_mcp/server/app.py +125 -0
  34. sqlserver_semantic_mcp/server/compact.py +74 -0
  35. sqlserver_semantic_mcp/server/prompts/__init__.py +5 -0
  36. sqlserver_semantic_mcp/server/prompts/analysis.py +56 -0
  37. sqlserver_semantic_mcp/server/prompts/discovery.py +55 -0
  38. sqlserver_semantic_mcp/server/prompts/execution.py +64 -0
  39. sqlserver_semantic_mcp/server/prompts/registry.py +41 -0
  40. sqlserver_semantic_mcp/server/resources/__init__.py +1 -0
  41. sqlserver_semantic_mcp/server/resources/schema.py +144 -0
  42. sqlserver_semantic_mcp/server/tools/__init__.py +42 -0
  43. sqlserver_semantic_mcp/server/tools/cache.py +24 -0
  44. sqlserver_semantic_mcp/server/tools/metadata.py +167 -0
  45. sqlserver_semantic_mcp/server/tools/metrics.py +44 -0
  46. sqlserver_semantic_mcp/server/tools/object_tool.py +113 -0
  47. sqlserver_semantic_mcp/server/tools/policy.py +48 -0
  48. sqlserver_semantic_mcp/server/tools/query.py +159 -0
  49. sqlserver_semantic_mcp/server/tools/relationship.py +104 -0
  50. sqlserver_semantic_mcp/server/tools/semantic.py +112 -0
  51. sqlserver_semantic_mcp/server/tools/shape.py +204 -0
  52. sqlserver_semantic_mcp/server/tools/workflow.py +307 -0
  53. sqlserver_semantic_mcp/services/__init__.py +0 -0
  54. sqlserver_semantic_mcp/services/metadata_service.py +173 -0
  55. sqlserver_semantic_mcp/services/metrics_service.py +124 -0
  56. sqlserver_semantic_mcp/services/object_service.py +187 -0
  57. sqlserver_semantic_mcp/services/policy_service.py +59 -0
  58. sqlserver_semantic_mcp/services/query_service.py +321 -0
  59. sqlserver_semantic_mcp/services/relationship_service.py +160 -0
  60. sqlserver_semantic_mcp/services/semantic_service.py +277 -0
  61. sqlserver_semantic_mcp/workflows/__init__.py +26 -0
  62. sqlserver_semantic_mcp/workflows/bundle.py +157 -0
  63. sqlserver_semantic_mcp/workflows/contracts.py +64 -0
  64. sqlserver_semantic_mcp/workflows/discovery_flow.py +116 -0
  65. sqlserver_semantic_mcp/workflows/facade.py +117 -0
  66. sqlserver_semantic_mcp/workflows/query_flow.py +120 -0
  67. sqlserver_semantic_mcp/workflows/recommendations.py +161 -0
  68. sqlserver_semantic_mcp/workflows/router.py +59 -0
  69. sqlserver_semantic_mcp-0.5.0.dist-info/METADATA +679 -0
  70. sqlserver_semantic_mcp-0.5.0.dist-info/RECORD +74 -0
  71. sqlserver_semantic_mcp-0.5.0.dist-info/WHEEL +5 -0
  72. sqlserver_semantic_mcp-0.5.0.dist-info/entry_points.txt +2 -0
  73. sqlserver_semantic_mcp-0.5.0.dist-info/licenses/LICENSE +21 -0
  74. sqlserver_semantic_mcp-0.5.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,160 @@
1
+ from typing import Optional
2
+ from collections import deque
3
+ import aiosqlite
4
+
5
+ from ..infrastructure.cache.structural import read_schema_version
6
+
7
+
8
+ _GRAPH_CACHE: dict[tuple[str, str, str], dict[tuple[str, str], list[dict]]] = {}
9
+
10
+
11
+ async def get_table_relationships(
12
+ db_path: str, database: str, schema: str, table: str,
13
+ ) -> list[dict]:
14
+ async with aiosqlite.connect(db_path) as db:
15
+ db.row_factory = aiosqlite.Row
16
+ cur = await db.execute(
17
+ "SELECT schema_name, table_name, column_name, "
18
+ " ref_schema, ref_table, ref_column "
19
+ "FROM sc_foreign_keys "
20
+ "WHERE database_name=? AND schema_name=? AND table_name=?",
21
+ (database, schema, table),
22
+ )
23
+ outbound = [dict(r) for r in await cur.fetchall()]
24
+
25
+ cur = await db.execute(
26
+ "SELECT schema_name, table_name, column_name, "
27
+ " ref_schema, ref_table, ref_column "
28
+ "FROM sc_foreign_keys "
29
+ "WHERE database_name=? AND ref_schema=? AND ref_table=?",
30
+ (database, schema, table),
31
+ )
32
+ inbound = [dict(r) for r in await cur.fetchall()]
33
+
34
+ results = []
35
+ for r in outbound:
36
+ results.append({
37
+ "direction": "outbound",
38
+ "from_schema": r["schema_name"], "from_table": r["table_name"],
39
+ "from_column": r["column_name"],
40
+ "to_schema": r["ref_schema"], "to_table": r["ref_table"],
41
+ "to_column": r["ref_column"],
42
+ "type": "many_to_one",
43
+ })
44
+ for r in inbound:
45
+ results.append({
46
+ "direction": "inbound",
47
+ "from_schema": r["schema_name"], "from_table": r["table_name"],
48
+ "from_column": r["column_name"],
49
+ "to_schema": r["ref_schema"], "to_table": r["ref_table"],
50
+ "to_column": r["ref_column"],
51
+ "type": "one_to_many",
52
+ })
53
+ return results
54
+
55
+
56
+ async def _load_fk_graph(
57
+ db_path: str, database: str,
58
+ ) -> dict[tuple[str, str], list[dict]]:
59
+ ver = await read_schema_version(db_path, database)
60
+ structural_hash = ver["structural_hash"] if ver else ""
61
+ cache_key = (db_path, database, structural_hash)
62
+ cached = _GRAPH_CACHE.get(cache_key)
63
+ if cached is not None:
64
+ return cached
65
+
66
+ graph: dict[tuple[str, str], list[dict]] = {}
67
+ async with aiosqlite.connect(db_path) as db:
68
+ db.row_factory = aiosqlite.Row
69
+ cur = await db.execute(
70
+ "SELECT schema_name, table_name, column_name, "
71
+ " ref_schema, ref_table, ref_column "
72
+ "FROM sc_foreign_keys WHERE database_name=?",
73
+ (database,),
74
+ )
75
+ for r in await cur.fetchall():
76
+ src = (r["schema_name"], r["table_name"])
77
+ dst = (r["ref_schema"], r["ref_table"])
78
+ graph.setdefault(src, []).append({
79
+ "from_schema": src[0], "from_table": src[1],
80
+ "from_column": r["column_name"],
81
+ "to_schema": dst[0], "to_table": dst[1],
82
+ "to_column": r["ref_column"],
83
+ "direction": "outbound",
84
+ })
85
+ graph.setdefault(dst, []).append({
86
+ "from_schema": dst[0], "from_table": dst[1],
87
+ "from_column": r["ref_column"],
88
+ "to_schema": src[0], "to_table": src[1],
89
+ "to_column": r["column_name"],
90
+ "direction": "inbound",
91
+ })
92
+ stale_keys = [
93
+ key for key in _GRAPH_CACHE
94
+ if key[:2] == (db_path, database) and key != cache_key
95
+ ]
96
+ for key in stale_keys:
97
+ _GRAPH_CACHE.pop(key, None)
98
+ _GRAPH_CACHE[cache_key] = graph
99
+ return graph
100
+
101
+
102
+ async def find_join_path(
103
+ db_path: str, database: str,
104
+ from_schema: str, from_table: str,
105
+ to_schema: str, to_table: str,
106
+ max_hops: int = 5,
107
+ ) -> Optional[list[dict]]:
108
+ graph = await _load_fk_graph(db_path, database)
109
+ start = (from_schema, from_table)
110
+ target = (to_schema, to_table)
111
+ if start == target:
112
+ return []
113
+
114
+ queue = deque([(start, [])])
115
+ visited = {start}
116
+ while queue:
117
+ node, path = queue.popleft()
118
+ if len(path) >= max_hops:
119
+ continue
120
+ for edge in graph.get(node, []):
121
+ nxt = (edge["to_schema"], edge["to_table"])
122
+ if nxt in visited:
123
+ continue
124
+ new_path = path + [edge]
125
+ if nxt == target:
126
+ return new_path
127
+ visited.add(nxt)
128
+ queue.append((nxt, new_path))
129
+ return None
130
+
131
+
132
+ async def get_dependency_chain(
133
+ db_path: str, database: str, schema: str, table: str,
134
+ max_depth: int = 10,
135
+ *, schemas: Optional[list[str]] = None,
136
+ ) -> list[dict]:
137
+ graph = await _load_fk_graph(db_path, database)
138
+ start = (schema, table)
139
+ visited: dict[tuple[str, str], int] = {start: 0}
140
+ queue = deque([start])
141
+ allowed = set(schemas) if schemas else None
142
+
143
+ while queue:
144
+ node = queue.popleft()
145
+ depth = visited[node]
146
+ if depth >= max_depth:
147
+ continue
148
+ for edge in graph.get(node, []):
149
+ nxt = (edge["to_schema"], edge["to_table"])
150
+ if nxt not in visited:
151
+ if allowed is not None and nxt[0] not in allowed:
152
+ continue
153
+ visited[nxt] = depth + 1
154
+ queue.append(nxt)
155
+
156
+ return [
157
+ {"schema_name": s, "table_name": t, "depth": d}
158
+ for (s, t), d in visited.items()
159
+ if (s, t) != start
160
+ ]
@@ -0,0 +1,277 @@
1
+ import json
2
+ import re
3
+ from typing import Optional
4
+ import aiosqlite
5
+
6
+ from ..infrastructure.cache.semantic import (
7
+ upsert_table_analysis, get_table_analysis,
8
+ )
9
+ from ..infrastructure.cache.structural import read_schema_version
10
+
11
+
12
+ _AUDIT_COL_PATTERNS = {
13
+ "audit_timestamp": re.compile(
14
+ r"^(created|updated|modified|deleted)(_?at|_?on|_?time|_?date)?$", re.I),
15
+ "audit_user": re.compile(
16
+ r"^(created|updated|modified|deleted)_?by$", re.I),
17
+ "soft_delete": re.compile(r"^(is_)?deleted$|^deleted_at$", re.I),
18
+ "status": re.compile(r"^(status|state)(_?code|_?id)?$", re.I),
19
+ "type": re.compile(r"^(type|category|kind)(_?code|_?id)?$", re.I),
20
+ }
21
+
22
+ _LOOKUP_NAME_COLS = {"code", "name", "label", "description", "value"}
23
+
24
+ _COMMON_FILTER_SEMANTICS = frozenset({
25
+ "status", "type", "audit_timestamp", "soft_delete",
26
+ })
27
+
28
+
29
+ async def _load_table_structure(
30
+ db_path: str, database: str, schema: str, table: str,
31
+ ) -> Optional[dict]:
32
+ async with aiosqlite.connect(db_path) as db:
33
+ db.row_factory = aiosqlite.Row
34
+ cur = await db.execute(
35
+ "SELECT 1 FROM sc_tables WHERE database_name=? "
36
+ "AND schema_name=? AND table_name=?",
37
+ (database, schema, table),
38
+ )
39
+ if not await cur.fetchone():
40
+ return None
41
+
42
+ cur = await db.execute(
43
+ "SELECT column_name, data_type, max_length, is_nullable, ordinal_position "
44
+ "FROM sc_columns WHERE database_name=? AND schema_name=? AND table_name=? "
45
+ "ORDER BY ordinal_position",
46
+ (database, schema, table),
47
+ )
48
+ columns = [dict(r) for r in await cur.fetchall()]
49
+
50
+ cur = await db.execute(
51
+ "SELECT column_name FROM sc_primary_keys "
52
+ "WHERE database_name=? AND schema_name=? AND table_name=?",
53
+ (database, schema, table),
54
+ )
55
+ pk = [r["column_name"] for r in await cur.fetchall()]
56
+
57
+ cur = await db.execute(
58
+ "SELECT column_name, ref_schema, ref_table FROM sc_foreign_keys "
59
+ "WHERE database_name=? AND schema_name=? AND table_name=?",
60
+ (database, schema, table),
61
+ )
62
+ fks = [dict(r) for r in await cur.fetchall()]
63
+
64
+ return {"columns": columns, "primary_key": pk, "foreign_keys": fks}
65
+
66
+
67
+ def _column_semantic(col: dict) -> Optional[str]:
68
+ name = col["column_name"]
69
+ for sem, pat in _AUDIT_COL_PATTERNS.items():
70
+ if pat.match(name):
71
+ return sem
72
+ return None
73
+
74
+
75
+ def _classify(struct: dict, table: str) -> dict:
76
+ cols = struct["columns"]
77
+ fks = struct["foreign_keys"]
78
+ col_names = [c["column_name"].lower() for c in cols]
79
+
80
+ reasons: list[str] = []
81
+
82
+ # Audit heuristic
83
+ audit_like = sum(1 for c in cols if _column_semantic(c) in (
84
+ "audit_timestamp", "audit_user"))
85
+ if audit_like >= 2 and len(cols) <= 6:
86
+ reasons.append(f"{audit_like} audit-style columns dominate")
87
+ return {"type": "audit", "confidence": 0.75, "reasons": reasons}
88
+
89
+ # Bridge: 2+ FKs and almost all columns are FKs
90
+ if len(fks) >= 2 and len(cols) <= len(fks) + 2:
91
+ reasons.append(f"{len(fks)} FKs over {len(cols)} columns")
92
+ return {"type": "bridge", "confidence": 0.8, "reasons": reasons}
93
+
94
+ # Fact: >= 2 FKs
95
+ if len(fks) >= 2:
96
+ reasons.append(f"{len(fks)} FKs")
97
+ return {"type": "fact", "confidence": 0.7, "reasons": reasons}
98
+
99
+ # Lookup: few columns, contains code/name-like column names
100
+ small = len(cols) <= 4
101
+ lookup_cols = sum(1 for n in col_names if n in _LOOKUP_NAME_COLS)
102
+ name_suggests_lookup = bool(re.search(
103
+ r"(status|code|type|category|kind|lookup)$", table, re.I,
104
+ ))
105
+ if small and (lookup_cols >= 2 or name_suggests_lookup):
106
+ reasons.append("small row width + lookup-like columns/name")
107
+ return {"type": "lookup", "confidence": 0.75, "reasons": reasons}
108
+
109
+ # Dimension fallback
110
+ if len(fks) <= 1 and len(cols) >= 3:
111
+ reasons.append("few FKs with multiple descriptive columns")
112
+ return {"type": "dimension", "confidence": 0.5, "reasons": reasons}
113
+
114
+ return {"type": "unknown", "confidence": 0.2,
115
+ "reasons": reasons or ["no rule matched"]}
116
+
117
+
118
+ async def classify_table(
119
+ db_path: str, database: str, schema: str, table: str,
120
+ *, force: bool = False,
121
+ ) -> dict:
122
+ ver = await read_schema_version(db_path, database)
123
+ structural_hash = ver["structural_hash"] if ver else ""
124
+
125
+ if not force:
126
+ cached = await get_table_analysis(db_path, database, schema, table)
127
+ if cached and cached["status"] == "ready" \
128
+ and cached.get("structural_hash") == structural_hash:
129
+ return cached["classification"]
130
+
131
+ struct = await _load_table_structure(db_path, database, schema, table)
132
+ if struct is None:
133
+ return {"type": "unknown", "confidence": 0.0,
134
+ "reasons": ["table not found"]}
135
+
136
+ classification = _classify(struct, table)
137
+ column_analysis = [
138
+ {"column": c["column_name"],
139
+ "semantic_type": _column_semantic(c) or "generic"}
140
+ for c in struct["columns"]
141
+ ]
142
+ is_lookup = classification["type"] == "lookup"
143
+
144
+ await upsert_table_analysis(
145
+ db_path, database, schema, table,
146
+ structural_hash=structural_hash, status="ready",
147
+ classification=classification,
148
+ column_analysis=column_analysis,
149
+ is_lookup=is_lookup,
150
+ )
151
+ return classification
152
+
153
+
154
+ async def analyze_columns(
155
+ db_path: str, database: str, schema: str, table: str,
156
+ ) -> list[dict]:
157
+ await classify_table(db_path, database, schema, table)
158
+ cached = await get_table_analysis(db_path, database, schema, table)
159
+ return cached.get("column_analysis", []) if cached else []
160
+
161
+
162
+ async def summarize_for_joining(
163
+ db_path: str, database: str, schema: str, table: str,
164
+ ) -> Optional[dict]:
165
+ """Return reasoning-ready info for joining against this table.
166
+
167
+ Shape: {table, pk, classification, join_candidates, common_filter_columns}
168
+ """
169
+ struct = await _load_table_structure(db_path, database, schema, table)
170
+ if struct is None:
171
+ return None
172
+
173
+ join_candidates = [
174
+ {"via_column": fk["column_name"],
175
+ "to_table": f"{fk['ref_schema']}.{fk['ref_table']}"}
176
+ for fk in struct["foreign_keys"]
177
+ ]
178
+
179
+ common_filter_columns = [
180
+ c["column_name"] for c in struct["columns"]
181
+ if (_column_semantic(c) or "") in _COMMON_FILTER_SEMANTICS
182
+ ]
183
+
184
+ classification = _classify(struct, table)
185
+
186
+ return {
187
+ "table": f"{schema}.{table}",
188
+ "pk": struct["primary_key"],
189
+ "classification": classification["type"],
190
+ "join_candidates": join_candidates,
191
+ "common_filter_columns": common_filter_columns,
192
+ }
193
+
194
+
195
+ async def detect_lookup_tables(
196
+ db_path: str, database: str, *,
197
+ schemas: Optional[list[str]] = None,
198
+ keyword: Optional[str] = None,
199
+ confidence_min: float = 0.0,
200
+ ) -> list[dict]:
201
+ ver = await read_schema_version(db_path, database)
202
+ current_hash = ver["structural_hash"] if ver else ""
203
+
204
+ results: list[dict] = []
205
+ need_classify: list[tuple[str, str]] = []
206
+
207
+ kw_lower = keyword.lower() if keyword else None
208
+
209
+ def passes_filter(s: str, t: str) -> bool:
210
+ if schemas and s not in schemas:
211
+ return False
212
+ if kw_lower and kw_lower not in f"{s}.{t}".lower():
213
+ return False
214
+ return True
215
+
216
+ async with aiosqlite.connect(db_path) as db:
217
+ db.row_factory = aiosqlite.Row
218
+ cur = await db.execute(
219
+ "SELECT schema_name, table_name FROM sc_tables "
220
+ "WHERE database_name=?",
221
+ (database,),
222
+ )
223
+ all_tables = [(r["schema_name"], r["table_name"])
224
+ for r in await cur.fetchall()
225
+ if passes_filter(r["schema_name"], r["table_name"])]
226
+
227
+ # Fast path: read ready+fresh lookup rows from cache
228
+ cur = await db.execute(
229
+ "SELECT schema_name, table_name, classification FROM sem_table_analysis "
230
+ "WHERE database_name=? AND status='ready' "
231
+ "AND structural_hash=? AND is_lookup=1",
232
+ (database, current_hash),
233
+ )
234
+ cached_hits = {
235
+ (r["schema_name"], r["table_name"]):
236
+ json.loads(r["classification"]) if r["classification"] else None
237
+ for r in await cur.fetchall()
238
+ }
239
+
240
+ # Tables whose analysis is missing / stale / non-lookup-ready
241
+ cur = await db.execute(
242
+ "SELECT schema_name, table_name, status, structural_hash "
243
+ "FROM sem_table_analysis "
244
+ "WHERE database_name=?",
245
+ (database,),
246
+ )
247
+ cache_state = {
248
+ (r["schema_name"], r["table_name"]):
249
+ (r["status"], r["structural_hash"])
250
+ for r in await cur.fetchall()
251
+ }
252
+
253
+ for (s, t) in all_tables:
254
+ if (s, t) in cached_hits:
255
+ cls = cached_hits[(s, t)] or {"confidence": 0.75}
256
+ conf = cls.get("confidence", 0.75)
257
+ if conf >= confidence_min:
258
+ results.append({
259
+ "schema_name": s, "table_name": t,
260
+ "confidence": conf,
261
+ })
262
+ continue
263
+ state = cache_state.get((s, t))
264
+ # Needs classification if: no row, dirty/pending, or hash mismatch
265
+ if state is None or state[0] != "ready" or state[1] != current_hash:
266
+ need_classify.append((s, t))
267
+
268
+ for (s, t) in need_classify:
269
+ c = await classify_table(db_path, database, s, t)
270
+ if c.get("type") == "lookup":
271
+ conf = c.get("confidence", 0.75)
272
+ if conf >= confidence_min:
273
+ results.append({
274
+ "schema_name": s, "table_name": t,
275
+ "confidence": conf,
276
+ })
277
+ return results
@@ -0,0 +1,26 @@
1
+ """Agent-oriented workflow layer.
2
+
3
+ Sits between ``server/tools`` and ``services``. Responsible for
4
+ route decision, handoff contracts, context bundling, and the
5
+ direct-execution fast path.
6
+ """
7
+ from .contracts import ToolEnvelope, RouteDecision, Route
8
+ from .router import route_query
9
+ from .query_flow import plan_or_execute_query
10
+ from .discovery_flow import discover_relevant_tables
11
+ from .bundle import bundle_context_for_next_step
12
+ from .recommendations import suggest_next_tool, estimate_execution_risk
13
+ from .facade import WorkflowFacade
14
+
15
+ __all__ = [
16
+ "ToolEnvelope",
17
+ "RouteDecision",
18
+ "Route",
19
+ "route_query",
20
+ "plan_or_execute_query",
21
+ "discover_relevant_tables",
22
+ "bundle_context_for_next_step",
23
+ "suggest_next_tool",
24
+ "estimate_execution_risk",
25
+ "WorkflowFacade",
26
+ ]
@@ -0,0 +1,157 @@
1
+ """Bundle prior tool results into a compact handoff for the next step."""
2
+ from __future__ import annotations
3
+
4
+ from typing import Optional
5
+
6
+ from ..config import Config, get_config
7
+ from ..services import metadata_service, object_service, semantic_service
8
+ from ..services.semantic_service import _column_semantic
9
+ from .contracts import ToolEnvelope
10
+
11
+
12
+ _JOIN_IMPORTANT = 6
13
+
14
+
15
+ async def _table_summary_for_joining(
16
+ db_path: str, database: str, schema: str, table: str,
17
+ ) -> Optional[dict]:
18
+ full = await metadata_service.describe_table(db_path, database, schema, table)
19
+ if full is None:
20
+ return None
21
+ cls = await semantic_service.classify_table(db_path, database, schema, table)
22
+ pk = full.get("primary_key", []) or []
23
+ fks = full.get("foreign_keys", []) or []
24
+
25
+ important: list[str] = []
26
+ seen: set[str] = set()
27
+
28
+ def push(name: Optional[str]) -> None:
29
+ if not name or name in seen:
30
+ return
31
+ seen.add(name)
32
+ important.append(name)
33
+
34
+ for col in pk:
35
+ push(col)
36
+ for fk in fks:
37
+ push(fk.get("column_name"))
38
+ for c in full.get("columns", []):
39
+ if len(important) >= _JOIN_IMPORTANT:
40
+ break
41
+ sem = _column_semantic(c)
42
+ if sem and sem != "generic":
43
+ push(c["column_name"])
44
+ for c in full.get("columns", []):
45
+ if len(important) >= _JOIN_IMPORTANT:
46
+ break
47
+ push(c["column_name"])
48
+
49
+ fk_edges = [
50
+ {
51
+ "via_column": fk.get("column_name"),
52
+ "to_table": f"{fk.get('ref_schema')}.{fk.get('ref_table')}",
53
+ "to_column": fk.get("ref_column"),
54
+ }
55
+ for fk in fks
56
+ ]
57
+
58
+ return {
59
+ "table": f"{schema}.{table}",
60
+ "classification": cls.get("type", "unknown"),
61
+ "pk": list(pk),
62
+ "important_columns": important[:_JOIN_IMPORTANT],
63
+ "fk_edges": fk_edges,
64
+ }
65
+
66
+
67
+ async def _object_summary_for_impact(
68
+ schema: str,
69
+ object_name: str,
70
+ object_type: str,
71
+ cfg: Config,
72
+ ) -> Optional[dict]:
73
+ obj = await object_service.describe_object(
74
+ schema, object_name, object_type, cfg,
75
+ )
76
+ if not obj:
77
+ return None
78
+ return {
79
+ "object": f"{schema}.{object_name}",
80
+ "type": object_type,
81
+ "reads": list(obj.get("read_tables", []) or []),
82
+ "writes": list(obj.get("write_tables", []) or []),
83
+ "depends_on": list(obj.get("dependencies", []) or []),
84
+ "status": obj.get("status"),
85
+ }
86
+
87
+
88
+ async def bundle_context_for_next_step(
89
+ items: list[dict],
90
+ *,
91
+ goal: str = "joining",
92
+ detail: str = "brief",
93
+ cfg: Optional[Config] = None,
94
+ ) -> dict:
95
+ """Compress prior discoveries into the minimum context the next
96
+ tool needs. Supported goals: ``joining``, ``object_impact``.
97
+ """
98
+ cfg = cfg or get_config()
99
+ db_path = cfg.cache_path
100
+ database = cfg.mssql_database
101
+
102
+ if goal == "joining":
103
+ tables: list[dict] = []
104
+ for item in items or []:
105
+ if item.get("kind") != "table":
106
+ continue
107
+ schema = item["schema"]
108
+ table = item["table"]
109
+ summary = await _table_summary_for_joining(
110
+ db_path, database, schema, table,
111
+ )
112
+ if summary is not None:
113
+ tables.append(summary)
114
+ return ToolEnvelope(
115
+ kind="bundle_context_for_next_step",
116
+ detail=detail,
117
+ next_action="find_or_score_join",
118
+ recommended_tool="score_join_candidate",
119
+ bundle_key="joining",
120
+ data={
121
+ "bundle_type": "joining",
122
+ "tables": tables,
123
+ },
124
+ ).to_dict()
125
+
126
+ if goal == "object_impact":
127
+ objects: list[dict] = []
128
+ for item in items or []:
129
+ if item.get("kind") != "object":
130
+ continue
131
+ summary = await _object_summary_for_impact(
132
+ item["schema"], item["object_name"], item["object_type"], cfg,
133
+ )
134
+ if summary is not None:
135
+ objects.append(summary)
136
+ return ToolEnvelope(
137
+ kind="bundle_context_for_next_step",
138
+ detail=detail,
139
+ next_action="trace_impact",
140
+ recommended_tool="trace_object_dependencies",
141
+ bundle_key="object_impact",
142
+ data={
143
+ "bundle_type": "object_impact",
144
+ "objects": objects,
145
+ },
146
+ ).to_dict()
147
+
148
+ return ToolEnvelope(
149
+ kind="bundle_context_for_next_step",
150
+ detail=detail,
151
+ next_action="none",
152
+ data={
153
+ "bundle_type": goal,
154
+ "error": f"unsupported goal '{goal}'",
155
+ "supported_goals": ["joining", "object_impact"],
156
+ },
157
+ ).to_dict()
@@ -0,0 +1,64 @@
1
+ """Handoff contracts for workflow tools.
2
+
3
+ All workflow tools return structured envelopes so a downstream agent
4
+ knows what to do next without re-parsing payloads.
5
+ """
6
+ from __future__ import annotations
7
+
8
+ from dataclasses import dataclass, field
9
+ from typing import Any, Literal, Optional
10
+
11
+
12
+ Route = Literal[
13
+ "direct_execute",
14
+ "direct_validate",
15
+ "discovery",
16
+ "object_analysis",
17
+ "policy_only",
18
+ ]
19
+
20
+
21
+ @dataclass
22
+ class RouteDecision:
23
+ route: Route
24
+ reason: str
25
+ recommended_tools: list[str] = field(default_factory=list)
26
+ confidence: float = 1.0
27
+
28
+ def to_dict(self) -> dict:
29
+ return {
30
+ "route": self.route,
31
+ "reason": self.reason,
32
+ "recommended_tools": list(self.recommended_tools),
33
+ "confidence": self.confidence,
34
+ }
35
+
36
+
37
+ @dataclass
38
+ class ToolEnvelope:
39
+ """Uniform envelope returned by workflow tools.
40
+
41
+ ``data`` carries the tool-specific payload. Top-level fields are the
42
+ agent-visible routing cues.
43
+ """
44
+ kind: str
45
+ detail: str = "brief"
46
+ confidence: Optional[float] = None
47
+ next_action: Optional[str] = None
48
+ recommended_tool: Optional[str] = None
49
+ bundle_key: Optional[str] = None
50
+ data: Any = None
51
+
52
+ def to_dict(self) -> dict:
53
+ out: dict[str, Any] = {"kind": self.kind, "detail": self.detail}
54
+ if self.confidence is not None:
55
+ out["confidence"] = self.confidence
56
+ if self.next_action is not None:
57
+ out["next_action"] = self.next_action
58
+ if self.recommended_tool is not None:
59
+ out["recommended_tool"] = self.recommended_tool
60
+ if self.bundle_key is not None:
61
+ out["bundle_key"] = self.bundle_key
62
+ if self.data is not None:
63
+ out["data"] = self.data
64
+ return out