PyPI - sqlserver-semantic-mcp - Versions diffs - 0.5.0__py3-none-any.whl - Mend

sqlserver-semantic-mcp 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (74) hide show

sqlserver_semantic_mcp/__init__.py +1 -0
sqlserver_semantic_mcp/config.py +78 -0
sqlserver_semantic_mcp/domain/__init__.py +0 -0
sqlserver_semantic_mcp/domain/enums.py +48 -0
sqlserver_semantic_mcp/domain/models/__init__.py +0 -0
sqlserver_semantic_mcp/domain/models/column.py +14 -0
sqlserver_semantic_mcp/domain/models/object.py +13 -0
sqlserver_semantic_mcp/domain/models/relationship.py +11 -0
sqlserver_semantic_mcp/domain/models/table.py +29 -0
sqlserver_semantic_mcp/infrastructure/__init__.py +0 -0
sqlserver_semantic_mcp/infrastructure/background.py +59 -0
sqlserver_semantic_mcp/infrastructure/cache/__init__.py +0 -0
sqlserver_semantic_mcp/infrastructure/cache/semantic.py +132 -0
sqlserver_semantic_mcp/infrastructure/cache/store.py +152 -0
sqlserver_semantic_mcp/infrastructure/cache/structural.py +203 -0
sqlserver_semantic_mcp/infrastructure/connection.py +78 -0
sqlserver_semantic_mcp/infrastructure/queries/__init__.py +0 -0
sqlserver_semantic_mcp/infrastructure/queries/comment_queries.py +18 -0
sqlserver_semantic_mcp/infrastructure/queries/metadata_queries.py +70 -0
sqlserver_semantic_mcp/infrastructure/queries/object_queries.py +15 -0
sqlserver_semantic_mcp/main.py +90 -0
sqlserver_semantic_mcp/policy/__init__.py +0 -0
sqlserver_semantic_mcp/policy/analyzer.py +194 -0
sqlserver_semantic_mcp/policy/enforcer.py +104 -0
sqlserver_semantic_mcp/policy/intents/__init__.py +16 -0
sqlserver_semantic_mcp/policy/intents/ast_analyzer.py +24 -0
sqlserver_semantic_mcp/policy/intents/base.py +17 -0
sqlserver_semantic_mcp/policy/intents/regex_analyzer.py +11 -0
sqlserver_semantic_mcp/policy/intents/router.py +21 -0
sqlserver_semantic_mcp/policy/loader.py +90 -0
sqlserver_semantic_mcp/policy/models.py +43 -0
sqlserver_semantic_mcp/server/__init__.py +0 -0
sqlserver_semantic_mcp/server/app.py +125 -0
sqlserver_semantic_mcp/server/compact.py +74 -0
sqlserver_semantic_mcp/server/prompts/__init__.py +5 -0
sqlserver_semantic_mcp/server/prompts/analysis.py +56 -0
sqlserver_semantic_mcp/server/prompts/discovery.py +55 -0
sqlserver_semantic_mcp/server/prompts/execution.py +64 -0
sqlserver_semantic_mcp/server/prompts/registry.py +41 -0
sqlserver_semantic_mcp/server/resources/__init__.py +1 -0
sqlserver_semantic_mcp/server/resources/schema.py +144 -0
sqlserver_semantic_mcp/server/tools/__init__.py +42 -0
sqlserver_semantic_mcp/server/tools/cache.py +24 -0
sqlserver_semantic_mcp/server/tools/metadata.py +167 -0
sqlserver_semantic_mcp/server/tools/metrics.py +44 -0
sqlserver_semantic_mcp/server/tools/object_tool.py +113 -0
sqlserver_semantic_mcp/server/tools/policy.py +48 -0
sqlserver_semantic_mcp/server/tools/query.py +159 -0
sqlserver_semantic_mcp/server/tools/relationship.py +104 -0
sqlserver_semantic_mcp/server/tools/semantic.py +112 -0
sqlserver_semantic_mcp/server/tools/shape.py +204 -0
sqlserver_semantic_mcp/server/tools/workflow.py +307 -0
sqlserver_semantic_mcp/services/__init__.py +0 -0
sqlserver_semantic_mcp/services/metadata_service.py +173 -0
sqlserver_semantic_mcp/services/metrics_service.py +124 -0
sqlserver_semantic_mcp/services/object_service.py +187 -0
sqlserver_semantic_mcp/services/policy_service.py +59 -0
sqlserver_semantic_mcp/services/query_service.py +321 -0
sqlserver_semantic_mcp/services/relationship_service.py +160 -0
sqlserver_semantic_mcp/services/semantic_service.py +277 -0
sqlserver_semantic_mcp/workflows/__init__.py +26 -0
sqlserver_semantic_mcp/workflows/bundle.py +157 -0
sqlserver_semantic_mcp/workflows/contracts.py +64 -0
sqlserver_semantic_mcp/workflows/discovery_flow.py +116 -0
sqlserver_semantic_mcp/workflows/facade.py +117 -0
sqlserver_semantic_mcp/workflows/query_flow.py +120 -0
sqlserver_semantic_mcp/workflows/recommendations.py +161 -0
sqlserver_semantic_mcp/workflows/router.py +59 -0
sqlserver_semantic_mcp-0.5.0.dist-info/METADATA +679 -0
sqlserver_semantic_mcp-0.5.0.dist-info/RECORD +74 -0
sqlserver_semantic_mcp-0.5.0.dist-info/WHEEL +5 -0
sqlserver_semantic_mcp-0.5.0.dist-info/entry_points.txt +2 -0
sqlserver_semantic_mcp-0.5.0.dist-info/licenses/LICENSE +21 -0
sqlserver_semantic_mcp-0.5.0.dist-info/top_level.txt +1 -0

sqlserver_semantic_mcp/services/object_service.py ADDED Viewed

@@ -0,0 +1,187 @@
+import logging
+import re
+from typing import Optional
+from ..config import Config, get_config
+from ..domain.enums import SqlOperation
+from ..infrastructure.cache.semantic import (
+    get_object_definition, upsert_object_definition,
+)
+from ..infrastructure.cache.structural import read_schema_version
+from ..infrastructure.connection import fetch_one, fetch_all
+from ..infrastructure.queries.object_queries import (
+    GET_OBJECT_DEFINITION, GET_OBJECT_DEPENDENCIES,
+)
+from ..policy.analyzer import (
+    _strip_comments, _split_statements, _detect_operation, _IDENT,
+)
+logger = logging.getLogger(__name__)
+_WRITE_OPS = {
+    SqlOperation.UPDATE, SqlOperation.INSERT, SqlOperation.DELETE,
+    SqlOperation.MERGE, SqlOperation.TRUNCATE,
+    SqlOperation.DROP, SqlOperation.ALTER, SqlOperation.CREATE,
+    SqlOperation.EXEC, SqlOperation.EXECUTE,
+}
+def _write_target(sql: str, operation: SqlOperation) -> Optional[str]:
+    patterns = {
+        SqlOperation.UPDATE:   rf"\bUPDATE\s+({_IDENT})",
+        SqlOperation.INSERT:   rf"\bINTO\s+({_IDENT})",
+        SqlOperation.DELETE:   rf"\bDELETE\s+(?:FROM\s+)?({_IDENT})",
+        SqlOperation.MERGE:    rf"\bMERGE\s+(?:INTO\s+)?({_IDENT})",
+        SqlOperation.TRUNCATE: rf"\bTRUNCATE\s+TABLE\s+({_IDENT})",
+    }
+    pat = patterns.get(operation)
+    if not pat:
+        return None
+    m = re.search(pat, sql, re.IGNORECASE)
+    return m.group(1) if m else None
+def _from_join_sources(sql: str) -> list[str]:
+    tables: list[str] = []
+    tables.extend(re.findall(rf"\bFROM\s+({_IDENT})", sql, re.IGNORECASE))
+    tables.extend(re.findall(rf"\bJOIN\s+({_IDENT})", sql, re.IGNORECASE))
+    return tables
+_WRITE_PATTERNS = [
+    rf"\bUPDATE\s+({_IDENT})",
+    rf"\bINSERT\s+INTO\s+({_IDENT})",
+    rf"\bDELETE\s+FROM\s+({_IDENT})",
+    rf"\bMERGE\s+(?:INTO\s+)?({_IDENT})",
+    rf"\bTRUNCATE\s+TABLE\s+({_IDENT})",
+]
+def split_read_write(sql: str) -> tuple[list[str], list[str]]:
+    """Split a SQL body (e.g. a PROCEDURE definition) into (read_tables, write_tables).
+    Regex-based. Scans the entire SQL for write-operation patterns (UPDATE/INSERT/
+    DELETE/MERGE/TRUNCATE TABLE) and for read-source patterns (FROM/JOIN).
+    Write targets are excluded from reads even if they also appear as FROM aliases
+    in the same statement (write-intent wins).
+    Known limitations: CTE names may appear as reads; dynamic SQL is invisible.
+    Returns ([], []) on empty input.
+    """
+    if not sql or not sql.strip():
+        return [], []
+    clean = _strip_comments(sql)
+    writes: list[str] = []
+    for pat in _WRITE_PATTERNS:
+        writes.extend(re.findall(pat, clean, re.IGNORECASE))
+    # Read sources = FROM / JOIN, excluding DELETE FROM target
+    # Strip DELETE FROM fragments so they don't double-count
+    read_scan = re.sub(
+        rf"\bDELETE\s+FROM\s+{_IDENT}", "", clean, flags=re.IGNORECASE,
+    )
+    reads: list[str] = []
+    reads.extend(re.findall(rf"\bFROM\s+({_IDENT})", read_scan, re.IGNORECASE))
+    reads.extend(re.findall(rf"\bJOIN\s+({_IDENT})", read_scan, re.IGNORECASE))
+    # Dedup preserving order
+    def _dedup(items: list[str]) -> list[str]:
+        seen: set[str] = set()
+        out: list[str] = []
+        for t in items:
+            k = t.lower()
+            if k not in seen:
+                seen.add(k)
+                out.append(t)
+        return out
+    writes_d = _dedup(writes)
+    # Reads: dedup, then remove any table that is also in writes (write-intent wins)
+    write_keys = {w.lower() for w in writes_d}
+    reads_d = [r for r in _dedup(reads) if r.lower() not in write_keys]
+    return reads_d, writes_d
+def _augment_read_write(obj: dict) -> dict:
+    """Add read_tables/write_tables derived from the cached definition."""
+    if not obj:
+        return obj
+    definition = obj.get("definition")
+    if isinstance(definition, str) and definition:
+        try:
+            reads, writes = split_read_write(definition)
+        except Exception:
+            logger.exception("split_read_write failed; falling back")
+            reads, writes = obj.get("dependencies", []) or [], []
+        out = dict(obj)
+        out["read_tables"] = reads
+        out["write_tables"] = writes
+        # Legacy: affected_tables aliases write_tables (name now matches intent)
+        out["affected_tables"] = writes
+        return out
+    return obj
+async def describe_object(
+    schema: str, object_name: str, object_type: str,
+    cfg: Optional[Config] = None,
+) -> dict:
+    cfg = cfg or get_config()
+    db = cfg.mssql_database
+    ver = await read_schema_version(cfg.cache_path, db)
+    object_hash = ver["object_hash"] if ver else ""
+    cached = await get_object_definition(
+        cfg.cache_path, db, schema, object_name, object_type,
+    )
+    if cached and cached["status"] == "ready" \
+            and cached.get("object_hash") == object_hash:
+        return _augment_read_write(cached)
+    qualified = f"{schema}.{object_name}"
+    try:
+        def_row = fetch_one(cfg, GET_OBJECT_DEFINITION, (qualified,))
+        definition = def_row[0] if def_row and def_row[0] else None
+        dep_rows = fetch_all(cfg, GET_OBJECT_DEPENDENCIES, (qualified,))
+        dependencies = [f"{r[0]}.{r[1]}" for r in dep_rows if r[0]]
+        affected = [
+            f"{r[0]}.{r[1]}" for r in dep_rows
+            if r[2] and "TABLE" in str(r[2]).upper()
+        ]
+        await upsert_object_definition(
+            cfg.cache_path, db, schema, object_name, object_type,
+            object_hash=object_hash, status="ready",
+            definition=definition, dependencies=dependencies,
+            affected_tables=affected,
+        )
+        return _augment_read_write({
+            "database_name": db,
+            "schema": schema,
+            "object_name": object_name,
+            "object_type": object_type,
+            "object_hash": object_hash,
+            "status": "ready",
+            "definition": definition,
+            "dependencies": dependencies,
+            "affected_tables": affected,
+        })
+    except Exception as e:
+        logger.exception("describe_object failed")
+        await upsert_object_definition(
+            cfg.cache_path, db, schema, object_name, object_type,
+            object_hash=object_hash, status="error",
+            error_message=str(e),
+        )
+        return {"status": "error", "error_message": str(e)}
+async def trace_dependencies(
+    schema: str, object_name: str, object_type: str,
+    cfg: Optional[Config] = None,
+) -> list[str]:
+    obj = await describe_object(schema, object_name, object_type, cfg)
+    return obj.get("dependencies", []) if obj else []

sqlserver_semantic_mcp/services/policy_service.py ADDED Viewed

@@ -0,0 +1,59 @@
+from typing import Optional
+from ..config import Config, get_config
+from ..policy.analyzer import SqlIntent
+from ..policy.enforcer import enforce
+from ..policy.intents import get_analyzer
+from ..policy.loader import load_active_policy
+from ..policy.models import PolicyProfile
+class PolicyService:
+    def __init__(self, cfg: Optional[Config] = None) -> None:
+        self._cfg = cfg or get_config()
+        self._policy: Optional[PolicyProfile] = None
+        self._analyzer = get_analyzer(self._cfg)
+    def load(self) -> None:
+        self._policy = load_active_policy(self._cfg)
+    def reload(self) -> None:
+        self.load()
+        self._analyzer = get_analyzer(self._cfg)
+    def current_policy(self) -> PolicyProfile:
+        if self._policy is None:
+            self.load()
+        assert self._policy is not None
+        return self._policy
+    def analyze(self, sql: str) -> SqlIntent:
+        return self._analyzer.analyze(sql)
+    def validate(self, sql: str, database: str = "") -> dict:
+        policy = self.current_policy()
+        intent = self._analyzer.analyze(sql)
+        result = enforce(intent, policy, database=database)
+        return {
+            "allowed": result.allowed,
+            "reason": result.reason,
+            "intent": intent_to_dict(intent),
+        }
+def intent_to_dict(intent: SqlIntent) -> dict:
+    return {
+        "primary_operation": intent.primary_operation.value,
+        "has_where_clause": intent.has_where_clause,
+        "has_top_clause": intent.has_top_clause,
+        "affected_tables": intent.affected_tables,
+        "risk_level": intent.risk_level.value,
+        "is_multi_statement": intent.is_multi_statement,
+        "statement_count": intent.statement_count,
+        "is_sql_like": intent.is_sql_like,
+        "confidence": intent.confidence,
+        "requires_discovery": intent.requires_discovery,
+        "has_unqualified_tables": intent.has_unqualified_tables,
+        "contains_dynamic_sql": intent.contains_dynamic_sql,
+        "contains_cte": intent.contains_cte,
+    }

sqlserver_semantic_mcp/services/query_service.py ADDED Viewed

@@ -0,0 +1,321 @@
+"""Query service — validation / preview / execution.
+v0.5 splits the old ``run_safe_query()`` into three explicit phases so
+the workflow layer can route an agent's request down the shortest safe
+path. ``run_safe_query()`` is kept as a thin wrapper over
+``execute_query`` for backwards compatibility.
+"""
+from __future__ import annotations
+import logging
+from enum import Enum
+from typing import Any, Optional
+from ..config import Config, get_config
+from ..infrastructure.connection import open_connection
+from ..policy.analyzer import SqlIntent
+from .policy_service import PolicyService, intent_to_dict
+logger = logging.getLogger(__name__)
+class QueryExecutionMode(str, Enum):
+    VALIDATE_ONLY   = "validate_only"
+    DRY_RUN         = "dry_run"
+    EXECUTE_IF_SAFE = "execute_if_safe"
+class AffectedRowsPolicyMode(str, Enum):
+    STRICT = "strict"
+    REPORT = "report"
+# ---- response_mode helpers --------------------------------------------------
+_VALID_RESPONSE_MODES = {"summary", "rows", "sample", "count_only"}
+def _normalize_response_mode(value: Optional[str], default: str) -> str:
+    if value is None:
+        return default
+    if value not in _VALID_RESPONSE_MODES:
+        raise ValueError(
+            f"invalid response_mode '{value}'; "
+            f"expected one of {sorted(_VALID_RESPONSE_MODES)}"
+        )
+    return value
+# ---- budget hint ------------------------------------------------------------
+_BUDGET_SAMPLE_ROWS = {
+    "tiny":   3,
+    "low":    10,
+    "medium": 50,
+    "high":   200,
+}
+def sample_row_cap(budget: Optional[str]) -> int:
+    return _BUDGET_SAMPLE_ROWS.get(budget or "low", 10)
+# ---- service ----------------------------------------------------------------
+class QueryService:
+    def __init__(
+        self,
+        policy_service: PolicyService,
+        cfg: Optional[Config] = None,
+    ) -> None:
+        self._policy = policy_service
+        self._cfg = cfg or get_config()
+    # ------------------------------------------------------------------ 1. validate
+    def validate(self, sql: str, database: str = "") -> dict:
+        """Backwards-compatible validation façade."""
+        db = database or self._cfg.mssql_database
+        return self._policy.validate(sql, database=db)
+    def validate_query(self, sql: str, database: str = "") -> dict:
+        """Return validation + intent, agent-envelope friendly."""
+        db = database or self._cfg.mssql_database
+        validation = self._policy.validate(sql, database=db)
+        intent = validation["intent"]
+        next_action = "execute" if validation["allowed"] else "revise_query"
+        return {
+            "kind": "query_validation",
+            "allowed": validation["allowed"],
+            "reason": validation["reason"],
+            "intent": intent,
+            "risk": intent["risk_level"],
+            "tables": intent["affected_tables"],
+            "next_action": next_action,
+        }
+    # ------------------------------------------------------------------ 2. preview
+    def preview_query(
+        self,
+        sql: str,
+        *,
+        max_rows: Optional[int] = None,
+        database: str = "",
+    ) -> dict:
+        """Cheap dry-run: return what WOULD happen, without side effects."""
+        db = database or self._cfg.mssql_database
+        policy = self._policy.current_policy()
+        validation = self._policy.validate(sql, database=db)
+        intent = validation["intent"]
+        limit = max_rows or policy.constraints.max_rows_returned
+        return {
+            "kind": "query_preview",
+            "operation": intent["primary_operation"],
+            "tables": intent["affected_tables"],
+            "allowed": validation["allowed"],
+            "reason": validation["reason"],
+            "risk": intent["risk_level"],
+            "max_rows_applied": limit,
+            "max_rows_affected": policy.constraints.max_rows_affected,
+            "is_multi_statement": intent["is_multi_statement"],
+            "has_where_clause": intent["has_where_clause"],
+            "has_unqualified_tables": intent["has_unqualified_tables"],
+            "contains_dynamic_sql": intent["contains_dynamic_sql"],
+            "next_action": "execute" if validation["allowed"] else "revise_query",
+        }
+    # ------------------------------------------------------------------ 3. execute
+    def execute_query(
+        self,
+        sql: str,
+        *,
+        max_rows: Optional[int] = None,
+        response_mode: Optional[str] = None,
+        token_budget_hint: Optional[str] = None,
+        affected_rows_policy: Optional[str] = None,
+        database: str = "",
+    ) -> dict:
+        """Execute SQL after policy validation.
+        response_mode:
+          summary    — columns + row_count only
+          rows       — columns + rows (default when op=SELECT)
+          sample     — columns + first N rows (N = budget-derived)
+          count_only — row_count only
+        """
+        mode = _normalize_response_mode(
+            response_mode, self._cfg.default_response_mode,
+        )
+        budget = token_budget_hint or self._cfg.default_token_budget_hint
+        strict_cap = self._cfg.strict_rows_affected_cap
+        if affected_rows_policy is not None:
+            strict_cap = affected_rows_policy == "strict"
+        db = database or self._cfg.mssql_database
+        policy = self._policy.current_policy()
+        limit = max_rows or policy.constraints.max_rows_returned
+        validation = self._policy.validate(sql, database=db)
+        if not validation["allowed"]:
+            return {
+                "executed": False,
+                "validation": validation,
+                "error": validation["reason"],
+                "next_action": "revise_query",
+            }
+        op = validation["intent"]["primary_operation"]
+        try:
+            with open_connection(self._cfg) as conn:
+                cursor = conn.cursor()
+                try:
+                    cursor.execute(sql)
+                    if op == "SELECT":
+                        return self._shape_select(
+                            cursor, limit, mode, budget, validation,
+                        )
+                    return self._shape_non_select(
+                        cursor, conn, policy.constraints.max_rows_affected,
+                        strict_cap, validation,
+                    )
+                finally:
+                    try:
+                        cursor.close()
+                    except Exception:
+                        logger.warning("Failed to close cursor", exc_info=True)
+        except Exception as e:
+            logger.exception("Query execution failed")
+            return {
+                "executed": False,
+                "validation": validation,
+                "error": str(e),
+                "next_action": "revise_query",
+            }
+    # ------------------------------------------------------------------ helpers
+    def _shape_select(
+        self,
+        cursor: Any,
+        limit: int,
+        mode: str,
+        budget: Optional[str],
+        validation: dict,
+    ) -> dict:
+        columns = [d[0] for d in cursor.description]
+        rows = cursor.fetchmany(limit + 1)
+        truncated = len(rows) > limit
+        rows = rows[:limit]
+        if mode == "count_only":
+            return {
+                "executed": True,
+                "validation": validation,
+                "row_count": len(rows),
+                "truncated": truncated,
+                "next_action": "done",
+            }
+        if mode == "summary":
+            return {
+                "executed": True,
+                "validation": validation,
+                "columns": columns,
+                "row_count": len(rows),
+                "truncated": truncated,
+                "next_action": "refine_or_done",
+            }
+        if mode == "sample":
+            cap = min(sample_row_cap(budget), len(rows))
+            return {
+                "executed": True,
+                "validation": validation,
+                "columns": columns,
+                "row_count": len(rows),
+                "truncated": truncated,
+                "sample_rows": [list(r) for r in rows[:cap]],
+                "sample_size": cap,
+                "next_action": "refine_or_done",
+            }
+        # default: rows
+        return {
+            "executed": True,
+            "validation": validation,
+            "columns": columns,
+            "rows": [list(r) for r in rows],
+            "row_count": len(rows),
+            "truncated": truncated,
+            "next_action": "done",
+        }
+    def _shape_non_select(
+        self,
+        cursor: Any,
+        conn: Any,
+        cap: int,
+        strict_cap: bool,
+        validation: dict,
+    ) -> dict:
+        affected = cursor.rowcount
+        exceeded = affected > cap
+        if strict_cap and exceeded:
+            try:
+                conn.rollback()
+            except Exception:
+                logger.warning("Rollback failed", exc_info=True)
+            return {
+                "executed": False,
+                "validation": validation,
+                "rows_affected": affected,
+                "exceeded_cap": True,
+                "error": (
+                    f"Affected rows {affected} exceeds cap {cap} under "
+                    f"strict rows-affected policy; transaction rolled back"
+                ),
+                "next_action": "revise_query",
+            }
+        conn.commit()
+        return {
+            "executed": True,
+            "validation": validation,
+            "rows_affected": affected,
+            "exceeded_cap": exceeded,
+            "next_action": "done",
+        }
+    # ------------------------------------------------------------------ legacy
+    def run_safe_query(
+        self,
+        sql: str,
+        max_rows: Optional[int] = None,
+    ) -> dict:
+        """Legacy wrapper — preserved for v0.4 clients."""
+        return self.execute_query(
+            sql,
+            max_rows=max_rows,
+            response_mode="rows",
+            affected_rows_policy="report",
+        )
+__all__ = [
+    "QueryService",
+    "QueryExecutionMode",
+    "AffectedRowsPolicyMode",
+    "sample_row_cap",
+    "intent_to_dict",
+    "SqlIntent",
+]