PyPI - sql-glider - Versions diffs - 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl - Mend

sql-glider 0.1.2py3-none-any.whl → 0.1.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

{sql_glider-0.1.2.dist-info → sql_glider-0.1.4.dist-info}/METADATA +177 -5
sql_glider-0.1.4.dist-info/RECORD +34 -0
{sql_glider-0.1.2.dist-info → sql_glider-0.1.4.dist-info}/entry_points.txt +3 -0
sqlglider/_version.py +2 -2
sqlglider/catalog/__init__.py +30 -0
sqlglider/catalog/base.py +99 -0
sqlglider/catalog/databricks.py +255 -0
sqlglider/catalog/registry.py +121 -0
sqlglider/cli.py +467 -15
sqlglider/dissection/__init__.py +17 -0
sqlglider/dissection/analyzer.py +767 -0
sqlglider/dissection/formatters.py +222 -0
sqlglider/dissection/models.py +112 -0
sqlglider/graph/builder.py +46 -8
sqlglider/lineage/analyzer.py +281 -13
sqlglider/utils/config.py +25 -0
sql_glider-0.1.2.dist-info/RECORD +0 -26
{sql_glider-0.1.2.dist-info → sql_glider-0.1.4.dist-info}/WHEEL +0 -0
{sql_glider-0.1.2.dist-info → sql_glider-0.1.4.dist-info}/licenses/LICENSE +0 -0

sqlglider/dissection/formatters.py ADDED Viewed

@@ -0,0 +1,222 @@
+"""Output formatters for dissection results."""
+import csv
+import json
+from io import StringIO
+from pathlib import Path
+from typing import List, Optional
+from rich.console import Console
+from rich.table import Table
+from sqlglider.dissection.models import QueryDissectionResult
+class DissectionTextFormatter:
+    """Format dissection results as Rich tables for terminal display."""
+    @staticmethod
+    def format(results: List[QueryDissectionResult], console: Console) -> None:
+        """
+        Format and print dissection results as Rich tables.
+        Creates a styled table for each query showing all extracted components.
+        Args:
+            results: List of QueryDissectionResult objects
+            console: Rich Console instance for output
+        """
+        if not results:
+            console.print("[yellow]No dissection results found.[/yellow]")
+            return
+        for i, result in enumerate(results):
+            # Add spacing between queries (except for first)
+            if i > 0:
+                console.print()
+            # Create table with query info as title
+            title = (
+                f"Query {result.metadata.query_index} "
+                f"({result.metadata.statement_type}): "
+                f"{result.metadata.query_preview}"
+            )
+            table = Table(title=title, title_style="bold")
+            table.add_column("Index", style="dim", width=6)
+            table.add_column("Type", style="cyan", width=16)
+            table.add_column("Name", style="green", min_width=10)
+            table.add_column("Depth", style="yellow", width=6)
+            table.add_column("Exec?", style="magenta", width=6)
+            table.add_column("Location", style="blue", min_width=15)
+            table.add_column("SQL Preview", style="dim", min_width=30)
+            # Add rows for each component
+            for component in result.components:
+                # Truncate SQL for preview
+                sql_preview = " ".join(component.sql.split())[:50]
+                if len(component.sql) > 50:
+                    sql_preview += "..."
+                table.add_row(
+                    str(component.component_index),
+                    component.component_type.value,
+                    component.name or "-",
+                    str(component.depth),
+                    "Yes" if component.is_executable else "No",
+                    component.location[:35] + "..."
+                    if len(component.location) > 35
+                    else component.location,
+                    sql_preview,
+                )
+            console.print(table)
+            console.print(
+                f"[dim]Total components: {result.metadata.total_components}[/dim]"
+            )
+class DissectionJsonFormatter:
+    """Format dissection results as JSON."""
+    @staticmethod
+    def format(results: List[QueryDissectionResult]) -> str:
+        """
+        Format dissection results as JSON.
+        Output format:
+        {
+          "queries": [
+            {
+              "query_index": 0,
+              "query_preview": "SELECT ...",
+              "statement_type": "INSERT",
+              "total_components": 5,
+              "components": [
+                {
+                  "component_type": "CTE",
+                  "component_index": 0,
+                  "name": "order_totals",
+                  "sql": "SELECT ...",
+                  "parent_index": null,
+                  "depth": 0,
+                  "is_executable": true,
+                  "dependencies": [],
+                  "location": "WITH clause"
+                }
+              ],
+              "original_sql": "WITH order_totals AS ..."
+            }
+          ]
+        }
+        Args:
+            results: List of QueryDissectionResult objects
+        Returns:
+            JSON-formatted string
+        """
+        queries = []
+        for result in results:
+            query_data = {
+                "query_index": result.metadata.query_index,
+                "query_preview": result.metadata.query_preview,
+                "statement_type": result.metadata.statement_type,
+                "total_components": result.metadata.total_components,
+                "components": [
+                    {
+                        "component_type": component.component_type.value,
+                        "component_index": component.component_index,
+                        "name": component.name,
+                        "sql": component.sql,
+                        "parent_index": component.parent_index,
+                        "depth": component.depth,
+                        "is_executable": component.is_executable,
+                        "dependencies": component.dependencies,
+                        "location": component.location,
+                    }
+                    for component in result.components
+                ],
+                "original_sql": result.original_sql,
+            }
+            queries.append(query_data)
+        return json.dumps({"queries": queries}, indent=2)
+class DissectionCsvFormatter:
+    """Format dissection results as CSV."""
+    @staticmethod
+    def format(results: List[QueryDissectionResult]) -> str:
+        """
+        Format dissection results as CSV.
+        Output format:
+        query_index,component_index,component_type,name,depth,is_executable,location,dependencies,sql
+        0,0,CTE,order_totals,0,true,WITH clause,,"SELECT ..."
+        Args:
+            results: List of QueryDissectionResult objects
+        Returns:
+            CSV-formatted string
+        """
+        if not results:
+            return ""
+        output = StringIO()
+        headers = [
+            "query_index",
+            "component_index",
+            "component_type",
+            "name",
+            "depth",
+            "is_executable",
+            "location",
+            "dependencies",
+            "sql",
+        ]
+        writer = csv.writer(output)
+        writer.writerow(headers)
+        # Write data rows
+        for result in results:
+            query_index = result.metadata.query_index
+            for component in result.components:
+                # Join dependencies with semicolon
+                deps_str = ";".join(component.dependencies)
+                writer.writerow(
+                    [
+                        query_index,
+                        component.component_index,
+                        component.component_type.value,
+                        component.name or "",
+                        component.depth,
+                        "true" if component.is_executable else "false",
+                        component.location,
+                        deps_str,
+                        component.sql,
+                    ]
+                )
+        return output.getvalue()
+class OutputWriter:
+    """Write formatted output to file or stdout."""
+    @staticmethod
+    def write(content: str, output_file: Optional[Path] = None) -> None:
+        """
+        Write content to file or stdout.
+        Args:
+            content: The content to write
+            output_file: Optional file path. If None, writes to stdout.
+        """
+        if output_file:
+            output_file.write_text(content, encoding="utf-8")
+        else:
+            print(content)

sqlglider/dissection/models.py ADDED Viewed

@@ -0,0 +1,112 @@
+"""Pydantic models for SQL dissection results."""
+from enum import Enum
+from typing import List, Optional
+from pydantic import BaseModel, Field
+class ComponentType(str, Enum):
+    """Type of SQL component extracted from a query."""
+    CTE = "CTE"
+    MAIN_QUERY = "MAIN_QUERY"
+    SUBQUERY = "SUBQUERY"
+    SCALAR_SUBQUERY = "SCALAR_SUBQUERY"
+    TARGET_TABLE = "TARGET_TABLE"
+    SOURCE_QUERY = "SOURCE_QUERY"
+    UNION_BRANCH = "UNION_BRANCH"
+class SQLComponent(BaseModel):
+    """Represents a single SQL component extracted from a query."""
+    component_type: ComponentType = Field(
+        ..., description="Type of component (CTE, SUBQUERY, etc.)"
+    )
+    component_index: int = Field(
+        ..., description="0-based index within query (order of extraction)"
+    )
+    name: Optional[str] = Field(
+        None,
+        description="Name/alias of component (CTE name, subquery alias, target table)",
+    )
+    sql: str = Field(
+        ..., description="Extracted SQL for this component (executable if applicable)"
+    )
+    parent_index: Optional[int] = Field(
+        None, description="Index of parent component (for nested subqueries)"
+    )
+    depth: int = Field(
+        default=0, description="Nesting depth (0 = top-level, 1+ = nested)"
+    )
+    is_executable: bool = Field(
+        default=True, description="Whether this SQL can be executed standalone"
+    )
+    dependencies: List[str] = Field(
+        default_factory=list,
+        description="Names of CTEs this component depends on",
+    )
+    location: str = Field(
+        ..., description="Human-readable location context (e.g., 'WITH clause')"
+    )
+class QueryMetadata(BaseModel):
+    """Metadata about a dissected query."""
+    query_index: int = Field(..., description="0-based query index in multi-query file")
+    query_preview: str = Field(..., description="First 100 chars of original query")
+    statement_type: str = Field(
+        ..., description="Type of SQL statement (SELECT, INSERT, CREATE, etc.)"
+    )
+    total_components: int = Field(
+        ..., description="Total number of components extracted"
+    )
+class QueryDissectionResult(BaseModel):
+    """Complete dissection result for a single query."""
+    metadata: QueryMetadata
+    components: List[SQLComponent] = Field(
+        default_factory=list,
+        description="All extracted components in order",
+    )
+    original_sql: str = Field(
+        ..., description="Original SQL query for reference/validation"
+    )
+    def get_component_by_name(self, name: str) -> Optional[SQLComponent]:
+        """Find a component by name (case-insensitive).
+        Args:
+            name: The component name to search for.
+        Returns:
+            The matching SQLComponent or None if not found.
+        """
+        name_lower = name.lower()
+        for component in self.components:
+            if component.name and component.name.lower() == name_lower:
+                return component
+        return None
+    def get_components_by_type(self, comp_type: ComponentType) -> List[SQLComponent]:
+        """Get all components of a specific type.
+        Args:
+            comp_type: The ComponentType to filter by.
+        Returns:
+            List of matching SQLComponent objects.
+        """
+        return [c for c in self.components if c.component_type == comp_type]
+    def get_executable_components(self) -> List[SQLComponent]:
+        """Get all executable components.
+        Returns:
+            List of SQLComponent objects that can be executed standalone.
+        """
+        return [c for c in self.components if c.is_executable]

sqlglider/graph/builder.py CHANGED Viewed

@@ -6,6 +6,7 @@ from typing import Callable, Dict, List, Optional, Set
 import rustworkx as rx
 from rich.console import Console
+from rich.progress import BarColumn, Progress, TaskProgressColumn, TextColumn
 from sqlglider.global_models import AnalysisLevel, NodeFormat
 from sqlglider.graph.models import (
@@ -166,11 +167,8 @@ class GraphBuilder:
         else:
             pattern = glob_pattern
-        for sql_file in sorted(dir_path.glob(pattern)):
-            if sql_file.is_file():
-                self.add_file(sql_file, dialect)
-        return self
+        sql_files = [f for f in sorted(dir_path.glob(pattern)) if f.is_file()]
+        return self.add_files(sql_files, dialect)
     def add_manifest(
         self,
@@ -194,6 +192,8 @@ class GraphBuilder:
         manifest = Manifest.from_csv(manifest_path)
         base_dir = manifest_path.parent
+        # Collect files with their dialects
+        files_with_dialects: List[tuple[Path, str]] = []
         for entry in manifest.entries:
             # Resolve file path relative to manifest location
             file_path = Path(entry.file_path)
@@ -202,7 +202,25 @@ class GraphBuilder:
             # Use entry dialect, then CLI dialect, then builder default
             entry_dialect = entry.dialect or dialect or self.dialect
-            self.add_file(file_path, entry_dialect)
+            files_with_dialects.append((file_path, entry_dialect))
+        # Process with progress
+        if files_with_dialects:
+            total = len(files_with_dialects)
+            with Progress(
+                TextColumn("[progress.description]{task.description}"),
+                BarColumn(),
+                TaskProgressColumn(),
+                console=console,
+                transient=False,
+            ) as progress:
+                task = progress.add_task("Parsing", total=total)
+                for i, (file_path, file_dialect) in enumerate(
+                    files_with_dialects, start=1
+                ):
+                    console.print(f"Parsing file {i}/{total}: {file_path.name}")
+                    self.add_file(file_path, file_dialect)
+                    progress.advance(task)
         return self
@@ -210,6 +228,7 @@ class GraphBuilder:
         self,
         file_paths: List[Path],
         dialect: Optional[str] = None,
+        show_progress: bool = True,
     ) -> "GraphBuilder":
         """
         Add lineage from multiple SQL files.
@@ -217,12 +236,31 @@ class GraphBuilder:
         Args:
             file_paths: List of paths to SQL files
             dialect: SQL dialect (uses builder default if not specified)
+            show_progress: Whether to print progress messages
         Returns:
             self for method chaining
         """
-        for file_path in file_paths:
-            self.add_file(file_path, dialect)
+        if not file_paths:
+            return self
+        if show_progress:
+            total = len(file_paths)
+            with Progress(
+                TextColumn("[progress.description]{task.description}"),
+                BarColumn(),
+                TaskProgressColumn(),
+                console=console,
+                transient=False,
+            ) as progress:
+                task = progress.add_task("Parsing", total=total)
+                for i, file_path in enumerate(file_paths, start=1):
+                    console.print(f"Parsing file {i}/{total}: {file_path.name}")
+                    self.add_file(file_path, dialect)
+                    progress.advance(task)
+        else:
+            for file_path in file_paths:
+                self.add_file(file_path, dialect)
         return self
     def _ensure_node(

sql-glider 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

sql-glider 0.1.2py3-none-any.whl → 0.1.4py3-none-any.whl