PyPI - sqlserver-semantic-mcp - Versions diffs - 0.5.0__py3-none-any.whl - Mend

sqlserver-semantic-mcp 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (74) hide show

sqlserver_semantic_mcp/__init__.py +1 -0
sqlserver_semantic_mcp/config.py +78 -0
sqlserver_semantic_mcp/domain/__init__.py +0 -0
sqlserver_semantic_mcp/domain/enums.py +48 -0
sqlserver_semantic_mcp/domain/models/__init__.py +0 -0
sqlserver_semantic_mcp/domain/models/column.py +14 -0
sqlserver_semantic_mcp/domain/models/object.py +13 -0
sqlserver_semantic_mcp/domain/models/relationship.py +11 -0
sqlserver_semantic_mcp/domain/models/table.py +29 -0
sqlserver_semantic_mcp/infrastructure/__init__.py +0 -0
sqlserver_semantic_mcp/infrastructure/background.py +59 -0
sqlserver_semantic_mcp/infrastructure/cache/__init__.py +0 -0
sqlserver_semantic_mcp/infrastructure/cache/semantic.py +132 -0
sqlserver_semantic_mcp/infrastructure/cache/store.py +152 -0
sqlserver_semantic_mcp/infrastructure/cache/structural.py +203 -0
sqlserver_semantic_mcp/infrastructure/connection.py +78 -0
sqlserver_semantic_mcp/infrastructure/queries/__init__.py +0 -0
sqlserver_semantic_mcp/infrastructure/queries/comment_queries.py +18 -0
sqlserver_semantic_mcp/infrastructure/queries/metadata_queries.py +70 -0
sqlserver_semantic_mcp/infrastructure/queries/object_queries.py +15 -0
sqlserver_semantic_mcp/main.py +90 -0
sqlserver_semantic_mcp/policy/__init__.py +0 -0
sqlserver_semantic_mcp/policy/analyzer.py +194 -0
sqlserver_semantic_mcp/policy/enforcer.py +104 -0
sqlserver_semantic_mcp/policy/intents/__init__.py +16 -0
sqlserver_semantic_mcp/policy/intents/ast_analyzer.py +24 -0
sqlserver_semantic_mcp/policy/intents/base.py +17 -0
sqlserver_semantic_mcp/policy/intents/regex_analyzer.py +11 -0
sqlserver_semantic_mcp/policy/intents/router.py +21 -0
sqlserver_semantic_mcp/policy/loader.py +90 -0
sqlserver_semantic_mcp/policy/models.py +43 -0
sqlserver_semantic_mcp/server/__init__.py +0 -0
sqlserver_semantic_mcp/server/app.py +125 -0
sqlserver_semantic_mcp/server/compact.py +74 -0
sqlserver_semantic_mcp/server/prompts/__init__.py +5 -0
sqlserver_semantic_mcp/server/prompts/analysis.py +56 -0
sqlserver_semantic_mcp/server/prompts/discovery.py +55 -0
sqlserver_semantic_mcp/server/prompts/execution.py +64 -0
sqlserver_semantic_mcp/server/prompts/registry.py +41 -0
sqlserver_semantic_mcp/server/resources/__init__.py +1 -0
sqlserver_semantic_mcp/server/resources/schema.py +144 -0
sqlserver_semantic_mcp/server/tools/__init__.py +42 -0
sqlserver_semantic_mcp/server/tools/cache.py +24 -0
sqlserver_semantic_mcp/server/tools/metadata.py +167 -0
sqlserver_semantic_mcp/server/tools/metrics.py +44 -0
sqlserver_semantic_mcp/server/tools/object_tool.py +113 -0
sqlserver_semantic_mcp/server/tools/policy.py +48 -0
sqlserver_semantic_mcp/server/tools/query.py +159 -0
sqlserver_semantic_mcp/server/tools/relationship.py +104 -0
sqlserver_semantic_mcp/server/tools/semantic.py +112 -0
sqlserver_semantic_mcp/server/tools/shape.py +204 -0
sqlserver_semantic_mcp/server/tools/workflow.py +307 -0
sqlserver_semantic_mcp/services/__init__.py +0 -0
sqlserver_semantic_mcp/services/metadata_service.py +173 -0
sqlserver_semantic_mcp/services/metrics_service.py +124 -0
sqlserver_semantic_mcp/services/object_service.py +187 -0
sqlserver_semantic_mcp/services/policy_service.py +59 -0
sqlserver_semantic_mcp/services/query_service.py +321 -0
sqlserver_semantic_mcp/services/relationship_service.py +160 -0
sqlserver_semantic_mcp/services/semantic_service.py +277 -0
sqlserver_semantic_mcp/workflows/__init__.py +26 -0
sqlserver_semantic_mcp/workflows/bundle.py +157 -0
sqlserver_semantic_mcp/workflows/contracts.py +64 -0
sqlserver_semantic_mcp/workflows/discovery_flow.py +116 -0
sqlserver_semantic_mcp/workflows/facade.py +117 -0
sqlserver_semantic_mcp/workflows/query_flow.py +120 -0
sqlserver_semantic_mcp/workflows/recommendations.py +161 -0
sqlserver_semantic_mcp/workflows/router.py +59 -0
sqlserver_semantic_mcp-0.5.0.dist-info/METADATA +679 -0
sqlserver_semantic_mcp-0.5.0.dist-info/RECORD +74 -0
sqlserver_semantic_mcp-0.5.0.dist-info/WHEEL +5 -0
sqlserver_semantic_mcp-0.5.0.dist-info/entry_points.txt +2 -0
sqlserver_semantic_mcp-0.5.0.dist-info/licenses/LICENSE +21 -0
sqlserver_semantic_mcp-0.5.0.dist-info/top_level.txt +1 -0

sqlserver_semantic_mcp/workflows/discovery_flow.py ADDED Viewed

@@ -0,0 +1,116 @@
+"""Discovery path — narrows the candidate set before explore/describe."""
+from __future__ import annotations
+from typing import Optional
+from ..config import Config, get_config
+from ..services import metadata_service, semantic_service
+from .contracts import ToolEnvelope
+_STOPWORDS = {
+    "the", "a", "an", "of", "to", "and", "or", "for", "on", "in", "by",
+    "with", "is", "are", "what", "which", "how", "show", "list", "me",
+    "my", "our", "their", "please", "need", "want", "find",
+}
+def _tokenize(goal: str) -> list[str]:
+    if not goal:
+        return []
+    tokens = [t.lower().strip(".,;:!?()[]{}\"'`") for t in goal.split()]
+    return [t for t in tokens if t and t not in _STOPWORDS and len(t) > 1]
+def _score(table: dict, tokens: list[str]) -> tuple[float, list[str]]:
+    """Return (score, reasons) for a candidate table."""
+    name = f"{table['schema_name']}.{table['table_name']}".lower()
+    bare = table["table_name"].lower()
+    reasons: list[str] = []
+    score = 0.0
+    for tok in tokens:
+        if tok == bare:
+            score += 0.6
+            reasons.append(f"exact table match: {tok}")
+        elif tok in bare:
+            score += 0.35
+            reasons.append(f"table name contains '{tok}'")
+        elif tok in name:
+            score += 0.2
+            reasons.append(f"qualified name contains '{tok}'")
+    return min(score, 1.0), reasons
+async def discover_relevant_tables(
+    goal: str,
+    *,
+    schemas: Optional[list[str]] = None,
+    keyword: Optional[str] = None,
+    limit: int = 10,
+    classify: bool = False,
+    cfg: Optional[Config] = None,
+) -> dict:
+    """Return a small ranked candidate set for a natural-language ask.
+    The server intentionally stays dumb (keyword scoring only) so the
+    response is cheap. Agents can follow up with ``describe_table`` or
+    ``classify_table`` for the short list.
+    """
+    cfg = cfg or get_config()
+    db_path = cfg.cache_path
+    database = cfg.mssql_database
+    tokens = _tokenize(goal)
+    if keyword and keyword.lower() not in tokens:
+        tokens.append(keyword.lower())
+    tables = await metadata_service.list_tables(
+        db_path, database,
+        schemas=schemas, keyword=keyword if keyword else None,
+    )
+    scored: list[dict] = []
+    for t in tables:
+        score, reasons = _score(t, tokens)
+        if score <= 0 and not keyword:
+            continue
+        scored.append({
+            "table": f"{t['schema_name']}.{t['table_name']}",
+            "schema": t["schema_name"],
+            "name": t["table_name"],
+            "score": round(score, 3),
+            "why": reasons,
+        })
+    scored.sort(key=lambda x: x["score"], reverse=True)
+    top = scored[:limit] if limit else scored
+    if classify:
+        for row in top:
+            cls = await semantic_service.classify_table(
+                db_path, database, row["schema"], row["name"],
+            )
+            row["classification"] = cls.get("type")
+            row["classification_confidence"] = cls.get("confidence")
+    return ToolEnvelope(
+        kind="discover_relevant_tables",
+        detail="brief",
+        next_action=(
+            "describe_table" if top else "broaden_search"
+        ),
+        recommended_tool=(
+            "describe_table" if top else "get_tables"
+        ),
+        data={
+            "goal": goal,
+            "token_hits": tokens,
+            "total_scanned": len(tables),
+            "candidates": [
+                {k: v for k, v in row.items() if k not in ("schema", "name")}
+                for row in top
+            ],
+        },
+    ).to_dict()

sqlserver_semantic_mcp/workflows/facade.py ADDED Viewed

@@ -0,0 +1,117 @@
+"""Single workflow entry point exposed to ``server.app.Context``."""
+from __future__ import annotations
+from typing import Optional
+from ..config import Config, get_config
+from ..services.policy_service import PolicyService
+from ..services.query_service import QueryService
+from .bundle import bundle_context_for_next_step
+from .discovery_flow import discover_relevant_tables
+from .query_flow import plan_or_execute_query
+from .recommendations import estimate_execution_risk, suggest_next_tool
+from .router import route_query
+class WorkflowFacade:
+    """Thin wrapper so tool modules reach workflow helpers via one object."""
+    def __init__(
+        self,
+        cfg: Config,
+        policy: PolicyService,
+        query: QueryService,
+    ) -> None:
+        self.cfg = cfg
+        self.policy = policy
+        self.query = query
+    # ---- synchronous helpers ------------------------------------------------
+    def route_query(self, query: Optional[str]) -> dict:
+        return route_query(
+            query, policy=self.policy, database=self.cfg.mssql_database,
+        ).to_dict()
+    def plan_or_execute_query(
+        self,
+        query: str,
+        *,
+        mode: str = "auto",
+        max_rows: Optional[int] = None,
+        return_mode: Optional[str] = None,
+        detail: str = "brief",
+        token_budget_hint: Optional[str] = None,
+        affected_rows_policy: Optional[str] = None,
+    ) -> dict:
+        return plan_or_execute_query(
+            query,
+            policy=self.policy,
+            query_service=self.query,
+            mode=mode,
+            max_rows=max_rows,
+            return_mode=return_mode,
+            detail=detail,
+            token_budget_hint=token_budget_hint,
+            affected_rows_policy=affected_rows_policy,
+            cfg=self.cfg,
+        )
+    def preview_safe_query(
+        self,
+        query: str,
+        *,
+        max_rows: Optional[int] = None,
+    ) -> dict:
+        preview = self.query.preview_query(
+            query, max_rows=max_rows, database=self.cfg.mssql_database,
+        )
+        return {
+            "kind": "preview_safe_query",
+            "detail": "brief",
+            "next_action": preview["next_action"],
+            "recommended_tool": (
+                "plan_or_execute_query" if preview["allowed"]
+                else "validate_query"
+            ),
+            "data": preview,
+        }
+    def suggest_next_tool(self, **kwargs) -> dict:
+        return suggest_next_tool(policy=self.policy, cfg=self.cfg, **kwargs)
+    def estimate_execution_risk(self, query: str) -> dict:
+        return estimate_execution_risk(
+            query, policy=self.policy, cfg=self.cfg,
+        )
+    # ---- async helpers ------------------------------------------------------
+    async def discover_relevant_tables(
+        self,
+        goal: str,
+        *,
+        schemas: Optional[list[str]] = None,
+        keyword: Optional[str] = None,
+        limit: int = 10,
+        classify: bool = False,
+    ) -> dict:
+        return await discover_relevant_tables(
+            goal,
+            schemas=schemas,
+            keyword=keyword,
+            limit=limit,
+            classify=classify,
+            cfg=self.cfg,
+        )
+    async def bundle_context_for_next_step(
+        self,
+        items: list[dict],
+        *,
+        goal: str = "joining",
+        detail: str = "brief",
+    ) -> dict:
+        return await bundle_context_for_next_step(
+            items, goal=goal, detail=detail, cfg=self.cfg,
+        )

sqlserver_semantic_mcp/workflows/query_flow.py ADDED Viewed

@@ -0,0 +1,120 @@
+"""Direct-execution fast path for SQL-ready agents."""
+from __future__ import annotations
+from typing import Optional
+from ..config import Config, get_config
+from ..services.policy_service import PolicyService
+from ..services.query_service import QueryService
+from .contracts import ToolEnvelope
+from .router import route_query
+def plan_or_execute_query(
+    query: str,
+    *,
+    policy: PolicyService,
+    query_service: QueryService,
+    mode: str = "auto",
+    max_rows: Optional[int] = None,
+    return_mode: Optional[str] = None,
+    detail: str = "brief",
+    token_budget_hint: Optional[str] = None,
+    affected_rows_policy: Optional[str] = None,
+    cfg: Optional[Config] = None,
+) -> dict:
+    """Single entry point for agents holding ready-to-run SQL.
+    mode:
+      * ``auto``           — execute if safe, otherwise return plan
+      * ``validate_only``  — validate and stop
+      * ``dry_run``        — return preview (validation + shape, no side effects)
+      * ``execute_if_safe``— same as ``auto`` (kept as alias for clarity)
+    """
+    cfg = cfg or get_config()
+    database = cfg.mssql_database
+    # Explicit sub-modes short-circuit routing.
+    if mode == "validate_only":
+        payload = query_service.validate_query(query, database=database)
+        return ToolEnvelope(
+            kind="plan_or_execute_query",
+            detail=detail,
+            confidence=payload["intent"]["confidence"],
+            next_action=payload["next_action"],
+            recommended_tool=(
+                "plan_or_execute_query" if payload["allowed"] else "validate_query"
+            ),
+            data={"path": "direct_validate", "executed": False, **payload},
+        ).to_dict()
+    if mode == "dry_run":
+        preview = query_service.preview_query(
+            query, max_rows=max_rows, database=database,
+        )
+        return ToolEnvelope(
+            kind="plan_or_execute_query",
+            detail=detail,
+            next_action=preview["next_action"],
+            recommended_tool=(
+                "plan_or_execute_query" if preview["allowed"] else "validate_query"
+            ),
+            data={"path": "dry_run", "executed": False, **preview},
+        ).to_dict()
+    decision = route_query(query, policy=policy, database=database)
+    if decision.route == "direct_execute" and cfg.direct_execute_enabled:
+        result = query_service.execute_query(
+            query,
+            max_rows=max_rows,
+            response_mode=return_mode,
+            token_budget_hint=token_budget_hint,
+            affected_rows_policy=affected_rows_policy,
+            database=database,
+        )
+        return ToolEnvelope(
+            kind="plan_or_execute_query",
+            detail=detail,
+            confidence=decision.confidence,
+            next_action=result.get("next_action", "done"),
+            recommended_tool=None,
+            data={
+                "path": "direct_execute",
+                **result,
+                "route": decision.to_dict(),
+            },
+        ).to_dict()
+    if decision.route == "direct_validate":
+        # Policy denied — don't execute even under mode=auto.
+        payload = query_service.validate_query(query, database=database)
+        return ToolEnvelope(
+            kind="plan_or_execute_query",
+            detail=detail,
+            confidence=decision.confidence,
+            next_action=payload["next_action"],
+            recommended_tool="validate_query",
+            data={
+                "path": "direct_validate",
+                "executed": False,
+                **payload,
+                "route": decision.to_dict(),
+            },
+        ).to_dict()
+    # discovery / policy_only
+    return ToolEnvelope(
+        kind="plan_or_execute_query",
+        detail=detail,
+        confidence=decision.confidence,
+        next_action="discover",
+        recommended_tool=decision.recommended_tools[0]
+        if decision.recommended_tools else "discover_relevant_tables",
+        data={
+            "path": decision.route,
+            "executed": False,
+            "reason": decision.reason,
+            "route": decision.to_dict(),
+        },
+    ).to_dict()

sqlserver_semantic_mcp/workflows/recommendations.py ADDED Viewed

@@ -0,0 +1,161 @@
+"""Recommendation + risk-estimation helpers."""
+from __future__ import annotations
+from typing import Optional
+from ..config import Config, get_config
+from ..services.policy_service import PolicyService
+from .contracts import ToolEnvelope
+from .router import route_query
+def suggest_next_tool(
+    *,
+    policy: PolicyService,
+    cfg: Optional[Config] = None,
+    query: Optional[str] = None,
+    goal: Optional[str] = None,
+    have_candidates: bool = False,
+    have_join_path: bool = False,
+    have_object: Optional[str] = None,
+) -> dict:
+    """Look at the agent's current state and recommend the next call.
+    This does not invoke any DB tools — it only applies routing logic.
+    """
+    cfg = cfg or get_config()
+    rationale: list[str] = []
+    if query:
+        decision = route_query(query, policy=policy, database=cfg.mssql_database)
+        rationale.append(
+            f"query routed to '{decision.route}' ({decision.reason})"
+        )
+        return ToolEnvelope(
+            kind="suggest_next_tool",
+            detail="brief",
+            confidence=decision.confidence,
+            next_action=decision.route,
+            recommended_tool=(decision.recommended_tools[0]
+                              if decision.recommended_tools else None),
+            data={
+                "recommended_tools": list(decision.recommended_tools),
+                "route": decision.to_dict(),
+                "rationale": rationale,
+            },
+        ).to_dict()
+    recommended: list[str] = []
+    next_action: str
+    if have_object:
+        recommended = ["trace_object_dependencies", "bundle_context_for_next_step"]
+        next_action = "trace_impact"
+        rationale.append("object context available — trace its dependencies")
+    elif have_join_path:
+        recommended = ["plan_or_execute_query", "preview_safe_query"]
+        next_action = "execute"
+        rationale.append("join path ready — draft SQL and execute via fast path")
+    elif have_candidates:
+        recommended = ["describe_table", "find_join_path", "score_join_candidate"]
+        next_action = "inspect_or_join"
+        rationale.append("candidates narrowed — inspect and compute join path")
+    elif goal:
+        recommended = ["discover_relevant_tables", "get_tables"]
+        next_action = "discover"
+        rationale.append("no candidates yet — start from discovery")
+    else:
+        recommended = ["get_tables", "get_execution_policy"]
+        next_action = "orient"
+        rationale.append("no query, goal, or candidates — orient first")
+    return ToolEnvelope(
+        kind="suggest_next_tool",
+        detail="brief",
+        next_action=next_action,
+        recommended_tool=recommended[0] if recommended else None,
+        data={
+            "recommended_tools": recommended,
+            "rationale": rationale,
+        },
+    ).to_dict()
+def estimate_execution_risk(
+    query: str,
+    *,
+    policy: PolicyService,
+    cfg: Optional[Config] = None,
+) -> dict:
+    cfg = cfg or get_config()
+    intent = policy.analyze(query)
+    constraints = policy.current_policy().constraints
+    risks: list[dict] = []
+    level = "low"
+    def bump(new_level: str) -> None:
+        nonlocal level
+        order = {"low": 0, "medium": 1, "high": 2, "critical": 3}
+        if order[new_level] > order[level]:
+            level = new_level
+    if intent.risk_level.value in ("critical", "high"):
+        bump(intent.risk_level.value)
+        risks.append({
+            "kind": "policy_risk",
+            "detail": f"operation {intent.primary_operation.value} is "
+                      f"{intent.risk_level.value}-risk",
+        })
+    if intent.is_multi_statement and not constraints.allow_multi_statement:
+        bump("high")
+        risks.append({
+            "kind": "policy_risk",
+            "detail": "multi-statement query is disallowed",
+        })
+    if intent.has_unqualified_tables:
+        bump("medium")
+        risks.append({
+            "kind": "schema_qualification_risk",
+            "detail": "query references unqualified tables",
+        })
+    if intent.contains_dynamic_sql:
+        bump("high")
+        risks.append({
+            "kind": "dynamic_sql_risk",
+            "detail": "query executes dynamic SQL; analyzer cannot inspect it",
+        })
+    if intent.primary_operation.value == "SELECT" \
+            and not intent.has_top_clause \
+            and not intent.has_where_clause:
+        bump("medium")
+        risks.append({
+            "kind": "payload_risk",
+            "detail": "SELECT without TOP or WHERE may return large payloads",
+        })
+    validation = policy.validate(query, database=cfg.mssql_database)
+    allowed = validation["allowed"]
+    return ToolEnvelope(
+        kind="estimate_execution_risk",
+        detail="brief",
+        confidence=intent.confidence,
+        next_action="execute" if allowed and level in ("low", "medium") else "revise_query",
+        recommended_tool=(
+            "plan_or_execute_query" if allowed else "validate_query"
+        ),
+        data={
+            "operation": intent.primary_operation.value,
+            "tables": intent.affected_tables,
+            "risk_level": level,
+            "risks": risks,
+            "allowed_by_policy": allowed,
+            "policy_reason": validation["reason"],
+            "max_rows_returned": constraints.max_rows_returned,
+            "max_rows_affected": constraints.max_rows_affected,
+        },
+    ).to_dict()

sqlserver_semantic_mcp/workflows/router.py ADDED Viewed

@@ -0,0 +1,59 @@
+"""Route the agent's request down the shortest safe path."""
+from __future__ import annotations
+from typing import Optional
+from ..policy.analyzer import SqlIntent
+from ..services.policy_service import PolicyService
+from .contracts import RouteDecision
+def route_query(
+    query: Optional[str],
+    *,
+    policy: PolicyService,
+    database: str = "",
+) -> RouteDecision:
+    """Decide which path a ``query`` argument belongs to.
+    * ``direct_execute``   — SQL-ready and currently allowed by policy
+    * ``direct_validate``  — SQL-ready but policy denies → agent should revise
+    * ``discovery``        — natural-language / unparseable → agent must explore
+    * ``object_analysis``  — identified procedure/view reference (future hook)
+    * ``policy_only``      — empty input / explicit policy inspection
+    """
+    if not query or not query.strip():
+        return RouteDecision(
+            route="policy_only",
+            reason="empty query; nothing to execute or validate",
+            recommended_tools=["get_execution_policy"],
+            confidence=1.0,
+        )
+    intent: SqlIntent = policy.analyze(query)
+    if not intent.is_sql_like or intent.requires_discovery:
+        return RouteDecision(
+            route="discovery",
+            reason="input does not look like executable SQL",
+            recommended_tools=[
+                "discover_relevant_tables",
+                "describe_table",
+                "find_join_path",
+            ],
+            confidence=max(intent.confidence, 0.4),
+        )
+    validation = policy.validate(query, database=database)
+    if validation["allowed"]:
+        return RouteDecision(
+            route="direct_execute",
+            reason="policy allows direct execution",
+            recommended_tools=["plan_or_execute_query", "run_safe_query"],
+            confidence=intent.confidence,
+        )
+    return RouteDecision(
+        route="direct_validate",
+        reason=validation["reason"],
+        recommended_tools=["validate_query", "estimate_execution_risk"],
+        confidence=intent.confidence,
+    )