PyPI - sql-code-graph - Versions diffs - 0.2.1__py3-none-any.whl - Mend

sql-code-graph 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

sql_code_graph-0.2.1.dist-info/METADATA +171 -0
sql_code_graph-0.2.1.dist-info/RECORD +55 -0
sql_code_graph-0.2.1.dist-info/WHEEL +4 -0
sql_code_graph-0.2.1.dist-info/entry_points.txt +2 -0
sqlcg/__init__.py +5 -0
sqlcg/__main__.py +6 -0
sqlcg/cli/__init__.py +1 -0
sqlcg/cli/commands/__init__.py +1 -0
sqlcg/cli/commands/analyze.py +93 -0
sqlcg/cli/commands/db.py +83 -0
sqlcg/cli/commands/find.py +63 -0
sqlcg/cli/commands/gain.py +169 -0
sqlcg/cli/commands/git.py +73 -0
sqlcg/cli/commands/index.py +92 -0
sqlcg/cli/commands/install.py +60 -0
sqlcg/cli/commands/mcp.py +54 -0
sqlcg/cli/commands/report.py +135 -0
sqlcg/cli/commands/watch.py +57 -0
sqlcg/cli/main.py +40 -0
sqlcg/core/__init__.py +8 -0
sqlcg/core/config.py +104 -0
sqlcg/core/graph_db.py +179 -0
sqlcg/core/jobs.py +105 -0
sqlcg/core/kuzu_backend.py +269 -0
sqlcg/core/neo4j_backend.py +195 -0
sqlcg/core/queries.py +82 -0
sqlcg/core/schema.cypher +104 -0
sqlcg/core/schema.py +48 -0
sqlcg/indexer/__init__.py +1 -0
sqlcg/indexer/dbt_adapter.py +23 -0
sqlcg/indexer/indexer.py +317 -0
sqlcg/indexer/walker.py +55 -0
sqlcg/indexer/watcher.py +195 -0
sqlcg/lineage/__init__.py +1 -0
sqlcg/lineage/aggregator.py +58 -0
sqlcg/lineage/schema_resolver.py +198 -0
sqlcg/metrics/__init__.py +5 -0
sqlcg/metrics/store.py +273 -0
sqlcg/parsers/__init__.py +30 -0
sqlcg/parsers/ansi_parser.py +215 -0
sqlcg/parsers/base.py +414 -0
sqlcg/parsers/bigquery_parser.py +77 -0
sqlcg/parsers/postgres_parser.py +27 -0
sqlcg/parsers/registry.py +46 -0
sqlcg/parsers/snowflake_parser.py +148 -0
sqlcg/parsers/tsql_parser.py +27 -0
sqlcg/server/__init__.py +1 -0
sqlcg/server/exceptions.py +20 -0
sqlcg/server/models.py +83 -0
sqlcg/server/server.py +57 -0
sqlcg/server/tools.py +663 -0
sqlcg/utils/__init__.py +6 -0
sqlcg/utils/hashing.py +18 -0
sqlcg/utils/ignore.py +36 -0
sqlcg/utils/logging.py +29 -0

sqlcg/core/graph_db.py ADDED Viewed

@@ -0,0 +1,179 @@
+"""Abstract base class for graph database backends."""
+from abc import ABC, abstractmethod
+from collections.abc import Iterator
+from contextlib import contextmanager
+from typing import Any
+from sqlcg.core.schema import NodeLabel
+from sqlcg.utils.logging import getLogger
+logger = getLogger(__name__)
+class GraphBackend(ABC):
+    """Abstract interface for graph database operations.
+    All upsert operations are idempotent (MERGE-based, not INSERT).
+    Transaction support is optional; the default no-op logs a warning
+    if not overridden by a subclass.
+    All methods must be idempotent when called multiple times with the
+    same inputs.
+    """
+    @abstractmethod
+    def init_schema(self) -> None:
+        """Initialize the database schema if not already present.
+        Creates all node and relationship tables from the schema definition.
+        Idempotent: safe to call multiple times.
+        """
+    @abstractmethod
+    def upsert_node(self, label: str, key: str, properties: dict[str, Any]) -> None:
+        """Upsert a node with the given label and properties.
+        Idempotent MERGE: if the node exists, update its properties;
+        otherwise create it.
+        Args:
+            label: Node label (e.g., "Table", "Column")
+            key: Primary key value for identifying the node
+            properties: Dict of properties to set/update on the node
+        """
+    @abstractmethod
+    def upsert_edge(
+        self,
+        src_label: str,
+        src_key: str,
+        dst_label: str,
+        dst_key: str,
+        rel_type: str,
+        properties: dict[str, Any],
+    ) -> None:
+        """Upsert a relationship between two nodes.
+        Idempotent MERGE: if the relationship exists, update its properties;
+        otherwise create it.
+        Args:
+            src_label: Source node label
+            src_key: Source node primary key
+            dst_label: Destination node label
+            dst_key: Destination node primary key
+            rel_type: Relationship type (e.g., "COLUMN_LINEAGE")
+            properties: Dict of properties to set/update on the relationship
+        """
+    @abstractmethod
+    def run_read(self, query: str, params: dict[str, Any]) -> list[dict[str, Any]]:
+        """Execute a read-only query and return results.
+        Args:
+            query: Query string (Cypher for KùzuDB/Neo4j)
+            params: Parameters to bind in the query
+        Returns:
+            List of result dicts (one dict per row)
+        """
+    @abstractmethod
+    def run_write(self, query: str, params: dict[str, Any]) -> None:
+        """Execute a write query (mutation).
+        Args:
+            query: Query string (Cypher for KùzuDB/Neo4j)
+            params: Parameters to bind in the query
+        """
+    @abstractmethod
+    def delete_nodes_for_file(self, file_path: str) -> None:
+        """Delete all nodes associated with a file and its relationships.
+        Removes:
+        - Column nodes for tables defined in this file
+        - Query nodes defined in this file
+        - Table nodes defined in this file
+        - The File node itself
+        This operation is used when re-indexing a file to ensure a clean re-parse.
+        Args:
+            file_path: Absolute path to the file
+        """
+    @abstractmethod
+    def get_schema_version(self) -> str | None:
+        """Get the stored schema version from the database.
+        Returns:
+            The schema version string, or None if not set.
+        """
+    @abstractmethod
+    def close(self) -> None:
+        """Close the database connection."""
+    def __enter__(self) -> "GraphBackend":
+        """Context manager entry point."""
+        return self
+    def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
+        """Context manager exit point — closes the database connection."""
+        self.close()
+    @staticmethod
+    def _pk_field(label: str) -> str:
+        """Return the primary key field name for a node label.
+        Args:
+            label: Node label (e.g., "Repo", "File", "SqlTable", "SqlColumn", "SqlQuery")
+        Returns:
+            Primary key field name for the label
+        """
+        match label:
+            case NodeLabel.REPO | NodeLabel.FILE:
+                return "path"
+            case NodeLabel.TABLE:
+                return "qualified"
+            case _:
+                return "id"
+    @staticmethod
+    def _validate_props(properties: dict[str, Any]) -> None:
+        """Validate that all property keys are safe identifiers.
+        Guards against Cypher injection via property key interpolation.
+        Args:
+            properties: Dictionary of properties to validate
+        Raises:
+            ValueError: If any property key is not a valid identifier
+        """
+        for key in properties:
+            if not key.isidentifier():
+                raise ValueError(f"Invalid property key: {key!r}")
+    @contextmanager
+    def transaction(self) -> Iterator["GraphBackend"]:
+        """Context manager for database transactions.
+        The base implementation is a no-op that logs a warning.
+        Subclasses should override to provide ACID guarantees.
+        Yields:
+            self (the GraphBackend instance)
+        Raises:
+            Any exception raised in the context is logged; the caller
+            must decide whether to re-raise.
+        """
+        logger.warning("transaction() not overridden — no rollback guarantee")
+        try:
+            yield self
+        except Exception:
+            raise

sqlcg/core/jobs.py ADDED Viewed

@@ -0,0 +1,105 @@
+"""Watch job manager for file-change-triggered reindexing."""
+import threading
+from collections.abc import Callable
+from sqlcg.utils.logging import getLogger
+logger = getLogger(__name__)
+class WatchJobManager:
+    """Manages debounced reindex jobs for file changes.
+    Uses per-file threading.Timer instances with debouncing. Rapid changes
+    to the same file cancel the previous timer and schedule a new one.
+    """
+    def __init__(
+        self,
+        indexer,
+        db,
+        dialect: str | None,
+        debounce_seconds: float = 2.0,
+        _timer_factory: Callable | None = None,
+    ):
+        """Initialize the watch job manager.
+        Args:
+            indexer: Indexer instance
+            db: GraphBackend instance
+            dialect: SQL dialect
+            debounce_seconds: Debounce delay in seconds
+            _timer_factory: Optional timer factory (for testing)
+        """
+        self._indexer = indexer
+        self._db = db
+        self._dialect = dialect
+        self._debounce = debounce_seconds
+        self._timers: dict[str, threading.Timer] = {}
+        self._lock = threading.Lock()
+        self._timer_factory = _timer_factory or threading.Timer
+        self._paused = False
+        self._queued: list[str] = []
+    def schedule(self, file_path: str) -> None:
+        """Schedule a reindex job for a file.
+        If a job is already scheduled for this path, it is canceled and
+        a new one is scheduled. If the manager is paused, the path is queued
+        instead of starting a timer.
+        Args:
+            file_path: Path to the file to reindex
+        """
+        with self._lock:
+            if self._paused:
+                # Queue the path for later processing
+                if file_path not in self._queued:
+                    self._queued.append(file_path)
+                return
+            if file_path in self._timers:
+                self._timers[file_path].cancel()
+            t = self._timer_factory(self._debounce, self._run_job, args=[file_path])
+            self._timers[file_path] = t
+            t.start()
+    def _run_job(self, file_path: str) -> None:
+        """Execute a reindex job (called after debounce delay).
+        Args:
+            file_path: Path to the file to reindex
+        """
+        try:
+            self._indexer.reindex_file(file_path, self._db, self._dialect)
+        except Exception as exc:
+            logger.error("reindex_file failed: %s: %s", file_path, exc)
+        finally:
+            with self._lock:
+                self._timers.pop(file_path, None)
+    def cancel_all(self) -> None:
+        """Cancel all pending timers."""
+        with self._lock:
+            for t in self._timers.values():
+                t.cancel()
+            self._timers.clear()
+    def set_paused(self, paused: bool) -> None:
+        """Set the paused state.
+        Args:
+            paused: True to pause scheduling, False to resume
+        """
+        with self._lock:
+            self._paused = paused
+    def drain_queued(self) -> None:
+        """Drain queued file paths and schedule them for reindexing."""
+        with self._lock:
+            queued_copy = self._queued.copy()
+            self._queued.clear()
+        for file_path in queued_copy:
+            self.schedule(file_path)

sqlcg/core/kuzu_backend.py ADDED Viewed

@@ -0,0 +1,269 @@
+"""KùzuDB implementation of GraphBackend."""
+from collections.abc import Iterator
+from contextlib import contextmanager
+from typing import Any
+import kuzu
+from sqlcg.core.graph_db import GraphBackend
+from sqlcg.core.queries import (
+    DELETE_COLUMNS_FOR_FILE,
+    DELETE_FILE,
+    DELETE_QUERIES_FOR_FILE,
+    DELETE_TABLES_FOR_FILE,
+)
+from sqlcg.core.schema import (
+    NODE_REPO,
+    SCHEMA_DDL,
+    SCHEMA_VERSION,
+)
+from sqlcg.utils.logging import getLogger
+logger = getLogger(__name__)
+class KuzuBackend(GraphBackend):
+    """KùzuDB implementation of the graph database backend."""
+    def __init__(self, db_path: str):
+        """Initialize KùzuDB backend.
+        Args:
+            db_path: Path to the KùzuDB database file (or ':memory:' for in-memory)
+        """
+        self._db_path = db_path
+        self._db = kuzu.database.Database(db_path)
+        self._conn = kuzu.Connection(self._db)
+        self._in_transaction = False
+    def init_schema(self) -> None:
+        """Initialize the database schema if not already present.
+        Creates all node and relationship tables from the schema DDL.
+        """
+        # Check if Repo node table exists (first table in DDL)
+        try:
+            self._conn.execute(f"MATCH (n:{NODE_REPO}) RETURN COUNT(*) as count LIMIT 1")
+            # If we get here, the schema is already initialized
+            logger.debug("Schema already initialized")
+            return
+        except Exception:
+            # Schema not initialized, proceed with initialization
+            pass
+        # Remove comments and split statements
+        # Split by ";" to get individual statements
+        raw_statements = []
+        current = []
+        for line in SCHEMA_DDL.split("\n"):
+            line = line.strip()
+            if line and not line.startswith("--"):
+                current.append(line)
+                if line.endswith(";"):
+                    raw_statements.append(" ".join(current))
+                    current = []
+        # Execute each statement
+        for stmt in raw_statements:
+            if stmt.strip():
+                try:
+                    self._conn.execute(stmt)
+                    logger.debug(f"Executed DDL: {stmt[:50]}...")
+                except Exception as e:
+                    logger.error(f"DDL execution failed: {stmt[:50]}...: {e}")
+                    raise
+        # Upsert the schema version
+        try:
+            self._conn.execute(
+                "MERGE (v:SchemaVersion {version: $v})",
+                {"v": SCHEMA_VERSION},
+            )
+            logger.debug(f"Wrote schema version: {SCHEMA_VERSION}")
+        except Exception as e:
+            logger.error(f"Failed to write schema version: {e}")
+            raise
+    def upsert_node(self, label: str, key: str, properties: dict[str, Any]) -> None:
+        """Upsert a node with the given label and properties.
+        Note: The key parameter is used for the primary key field. The actual
+        primary key field name depends on the label. For now, we use the key
+        as the primary key identifier.
+        Note: Properties that match the primary key field are skipped in the SET clause.
+        """
+        # Validate property keys to prevent Cypher injection
+        self._validate_props(properties)
+        pk_field = self._pk_field(label)
+        # Build the MERGE statement
+        # Format: MERGE (n:Label {pk_field: $key}) SET n.field = $field, ...
+        params = {"key": key}
+        # Filter out the primary key field from properties (cannot be updated via SET)
+        set_properties = {k: v for k, v in properties.items() if k != pk_field}
+        for k, v in set_properties.items():
+            params[k] = v
+        query = f"MERGE (n:{label} {{{pk_field}: $key}})"
+        if set_properties:
+            set_parts = [f"n.{k} = ${k}" for k in set_properties.keys()]
+            query += f" SET {', '.join(set_parts)}"
+        try:
+            self._conn.execute(query, params)
+        except Exception as e:
+            logger.error(f"upsert_node failed: {label} {key}: {e}")
+            raise
+    def upsert_edge(
+        self,
+        src_label: str,
+        src_key: str,
+        dst_label: str,
+        dst_key: str,
+        rel_type: str,
+        properties: dict[str, Any],
+    ) -> None:
+        """Upsert a relationship between two nodes."""
+        # Validate property keys to prevent Cypher injection
+        self._validate_props(properties)
+        src_pk_field = self._pk_field(src_label)
+        dst_pk_field = self._pk_field(dst_label)
+        # langchain_kuzu incompatible with our typed DDL schema.
+        query = f"""
+            MATCH (src:{src_label} {{{src_pk_field}: $src_key}})
+            MATCH (dst:{dst_label} {{{dst_pk_field}: $dst_key}})
+            MERGE (src)-[r:{rel_type}]->(dst)
+        """
+        params = {"src_key": src_key, "dst_key": dst_key}
+        if properties:
+            set_parts = [f"r.{k} = ${k}" for k in properties.keys()]
+            query += f" SET {', '.join(set_parts)}"
+            for k, v in properties.items():
+                params[k] = v
+        try:
+            self._conn.execute(query, params)
+        except Exception as e:
+            logger.error(f"upsert_edge failed: {src_label} -> {rel_type} -> {dst_label}: {e}")
+            raise
+    def run_read(self, query: str, params: dict[str, Any]) -> list[dict[str, Any]]:
+        """Execute a read-only query and return results."""
+        try:
+            result = self._conn.execute(query, params)
+            # KùzuDB returns a QueryResult that we need to convert to list of dicts
+            rows = []
+            column_names = result.get_column_names()  # type: ignore[union-attr]
+            for row in result:
+                # Each row is a tuple-like object with column names
+                rows.append(dict(zip(column_names, row, strict=True)))
+            return rows
+        except Exception as e:
+            logger.error(f"run_read failed: {e}")
+            raise
+    def run_write(self, query: str, params: dict[str, Any]) -> None:
+        """Execute a write query (mutation)."""
+        try:
+            self._conn.execute(query, params)
+        except Exception as e:
+            logger.error(f"run_write failed: {e}")
+            raise
+    def delete_nodes_for_file(self, file_path: str) -> None:
+        """Delete all nodes and relationships associated with a file.
+        This executes four separate Cypher statements:
+        1. Delete Column nodes for tables defined in this file
+        2. Delete Query nodes and their edges
+        3. Delete Table nodes defined in this file
+        4. Delete the File node itself
+        KùzuDB does not support multiple statements in a single execute() call,
+        so each statement is executed separately within the active transaction.
+        """
+        try:
+            # Step A: Delete Column nodes for tables defined in this file
+            self._conn.execute(DELETE_COLUMNS_FOR_FILE, {"path": file_path})
+            logger.debug(f"Deleted Column nodes for {file_path}")
+            # Step B: Delete Query nodes and their edges
+            self._conn.execute(DELETE_QUERIES_FOR_FILE, {"path": file_path})
+            logger.debug(f"Deleted Query nodes for {file_path}")
+            # Step C: Delete Table nodes defined in this file
+            self._conn.execute(DELETE_TABLES_FOR_FILE, {"path": file_path})
+            logger.debug(f"Deleted Table nodes for {file_path}")
+            # Step D: Delete the File node itself
+            self._conn.execute(DELETE_FILE, {"path": file_path})
+            logger.debug(f"Deleted File node for {file_path}")
+        except Exception as e:
+            logger.error(f"delete_nodes_for_file failed for {file_path}: {e}")
+            raise
+    def get_schema_version(self) -> str | None:
+        """Get the stored schema version from the database.
+        Returns:
+            The schema version string, or None if not set.
+        """
+        try:
+            result = self.run_read(
+                "MATCH (v:SchemaVersion) RETURN v.version AS version LIMIT 1", {}
+            )
+            return result[0]["version"] if result else None
+        except Exception as e:
+            logger.warning(f"Failed to read schema version: {e}")
+            return None
+    def close(self) -> None:
+        """Close the database connection."""
+        try:
+            self._conn.close()
+            self._db.close()
+            logger.debug("KuzuBackend connection closed")
+        except Exception as e:
+            logger.error(f"Error closing KuzuBackend: {e}")
+            raise
+    @contextmanager
+    def transaction(self) -> Iterator["KuzuBackend"]:
+        """Context manager for KùzuDB transactions.
+        Uses Cypher's BEGIN TRANSACTION / COMMIT / ROLLBACK commands.
+        KùzuDB 0.11.3 transaction API: execute("BEGIN TRANSACTION"),
+        then execute("COMMIT") or execute("ROLLBACK").
+        Yields:
+            self (the KuzuBackend instance)
+        Raises:
+            Any exception raised in the context triggers ROLLBACK.
+        """
+        try:
+            self._conn.execute("BEGIN TRANSACTION")
+            self._in_transaction = True
+            yield self
+            self._conn.execute("COMMIT")
+            self._in_transaction = False
+        except Exception:
+            try:
+                self._conn.execute("ROLLBACK")
+                self._in_transaction = False
+            except Exception as rollback_err:
+                logger.error(f"Rollback failed: {rollback_err}")
+                self._in_transaction = False  # defensive reset
+            raise