PyPI - sql-glider - Versions diffs - 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl - Mend

sql-glider 0.1.2py3-none-any.whl → 0.1.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

{sql_glider-0.1.2.dist-info → sql_glider-0.1.4.dist-info}/METADATA +177 -5
sql_glider-0.1.4.dist-info/RECORD +34 -0
{sql_glider-0.1.2.dist-info → sql_glider-0.1.4.dist-info}/entry_points.txt +3 -0
sqlglider/_version.py +2 -2
sqlglider/catalog/__init__.py +30 -0
sqlglider/catalog/base.py +99 -0
sqlglider/catalog/databricks.py +255 -0
sqlglider/catalog/registry.py +121 -0
sqlglider/cli.py +467 -15
sqlglider/dissection/__init__.py +17 -0
sqlglider/dissection/analyzer.py +767 -0
sqlglider/dissection/formatters.py +222 -0
sqlglider/dissection/models.py +112 -0
sqlglider/graph/builder.py +46 -8
sqlglider/lineage/analyzer.py +281 -13
sqlglider/utils/config.py +25 -0
sql_glider-0.1.2.dist-info/RECORD +0 -26
{sql_glider-0.1.2.dist-info → sql_glider-0.1.4.dist-info}/WHEEL +0 -0
{sql_glider-0.1.2.dist-info → sql_glider-0.1.4.dist-info}/licenses/LICENSE +0 -0

sqlglider/catalog/registry.py ADDED Viewed

@@ -0,0 +1,121 @@
+"""Catalog registry with plugin discovery via entry points.
+This module handles discovering and instantiating catalog providers from
+Python entry points, allowing third-party packages to register
+custom catalogs.
+"""
+import sys
+from typing import Dict, List, Type
+from sqlglider.catalog.base import Catalog, CatalogError
+# Cache for discovered catalogs
+_catalog_cache: Dict[str, Type[Catalog]] = {}
+_discovery_done: bool = False
+def _discover_catalogs() -> None:
+    """Discover catalogs from entry points.
+    Uses importlib.metadata to find all registered catalogs
+    in the 'sqlglider.catalogs' entry point group.
+    """
+    global _discovery_done, _catalog_cache
+    if _discovery_done:
+        return
+    if sys.version_info >= (3, 10):
+        from importlib.metadata import entry_points
+        eps = entry_points(group="sqlglider.catalogs")
+    else:
+        from importlib.metadata import entry_points
+        all_eps = entry_points()
+        eps = all_eps.get("sqlglider.catalogs", [])
+    for ep in eps:
+        try:
+            catalog_class = ep.load()
+            if isinstance(catalog_class, type) and issubclass(catalog_class, Catalog):
+                _catalog_cache[ep.name] = catalog_class
+        except Exception:
+            # Skip catalogs that fail to load
+            # This allows graceful handling of missing optional dependencies
+            pass
+    _discovery_done = True
+def get_catalog(name: str) -> Catalog:
+    """Get a catalog instance by name.
+    Args:
+        name: The name of the catalog (e.g., "databricks").
+    Returns:
+        An instance of the requested catalog.
+    Raises:
+        CatalogError: If the catalog is not found.
+    Example:
+        >>> catalog = get_catalog("databricks")
+        >>> ddl = catalog.get_ddl("my_catalog.my_schema.my_table")
+    """
+    _discover_catalogs()
+    if name not in _catalog_cache:
+        available = ", ".join(sorted(_catalog_cache.keys()))
+        raise CatalogError(
+            f"Unknown catalog '{name}'. Available catalogs: {available or 'none'}. "
+            f"You may need to install an optional dependency (e.g., pip install sql-glider[databricks])."
+        )
+    return _catalog_cache[name]()
+def list_catalogs() -> List[str]:
+    """List all available catalog names.
+    Returns:
+        A sorted list of available catalog names.
+    Example:
+        >>> catalogs = list_catalogs()
+        >>> print(catalogs)
+        ['databricks']
+    """
+    _discover_catalogs()
+    return sorted(_catalog_cache.keys())
+def register_catalog(name: str, catalog_class: Type[Catalog]) -> None:
+    """Register a catalog programmatically.
+    This is primarily useful for testing or for registering catalogs
+    that aren't installed via entry points.
+    Args:
+        name: The name to register the catalog under.
+        catalog_class: The catalog class to register.
+    Raises:
+        ValueError: If catalog_class is not a subclass of Catalog.
+    """
+    if not isinstance(catalog_class, type) or not issubclass(catalog_class, Catalog):
+        raise ValueError(f"{catalog_class} must be a subclass of Catalog")
+    _catalog_cache[name] = catalog_class
+def clear_registry() -> None:
+    """Clear the catalog registry.
+    This is primarily useful for testing.
+    """
+    global _discovery_done, _catalog_cache
+    _catalog_cache.clear()
+    _discovery_done = False

sqlglider/cli.py CHANGED Viewed

@@ -332,8 +332,16 @@ def lineage(
         raise typer.Exit(1)
-@app.command()
-def tables(
+# Tables command group
+tables_app = typer.Typer(
+    name="tables",
+    help="Table-related analysis commands.",
+)
+app.add_typer(tables_app, name="tables")
+@tables_app.command("overview")
+def tables_overview(
     sql_file: Annotated[
         typer.FileText,
         typer.Argument(
@@ -396,22 +404,22 @@ def tables(
     Examples:
         # List all tables in a SQL file
-        sqlglider tables query.sql
+        sqlglider tables overview query.sql
         # Export to JSON
-        sqlglider tables query.sql --output-format json
+        sqlglider tables overview query.sql --output-format json
         # Export to CSV file
-        sqlglider tables query.sql --output-format csv --output-file tables.csv
+        sqlglider tables overview query.sql --output-format csv --output-file tables.csv
         # Use different SQL dialect
-        sqlglider tables query.sql --dialect postgres
+        sqlglider tables overview query.sql --dialect postgres
         # Filter to queries referencing a specific table
-        sqlglider tables query.sql --table customers
+        sqlglider tables overview query.sql --table customers
         # Analyze templated SQL with Jinja2
-        sqlglider tables query.sql --templater jinja --var schema=analytics
+        sqlglider tables overview query.sql --templater jinja --var schema=analytics
     """
     # Load configuration from sqlglider.toml (if it exists)
     config = load_config()
@@ -517,6 +525,263 @@ def tables(
         raise typer.Exit(1)
+@tables_app.command("pull")
+def tables_pull(
+    sql_file: Annotated[
+        typer.FileText,
+        typer.Argument(
+            default_factory=lambda: sys.stdin,
+            show_default="stdin",
+            help="Path to SQL file to analyze (reads from stdin if not provided)",
+        ),
+    ],
+    catalog_type: Optional[str] = typer.Option(
+        None,
+        "--catalog-type",
+        "-c",
+        help="Catalog provider (e.g., 'databricks'). Required if not in config.",
+    ),
+    ddl_folder: Optional[Path] = typer.Option(
+        None,
+        "--ddl-folder",
+        "-o",
+        help="Output folder for DDL files. If not provided, outputs to stdout.",
+    ),
+    dialect: Optional[str] = typer.Option(
+        None,
+        "--dialect",
+        "-d",
+        help="SQL dialect (default: spark, or from config)",
+    ),
+    templater: Optional[str] = typer.Option(
+        None,
+        "--templater",
+        "-t",
+        help="Templater for SQL preprocessing (e.g., 'jinja', 'none')",
+    ),
+    var: Optional[List[str]] = typer.Option(
+        None,
+        "--var",
+        "-v",
+        help="Template variable in key=value format (repeatable)",
+    ),
+    vars_file: Optional[Path] = typer.Option(
+        None,
+        "--vars-file",
+        exists=True,
+        help="Path to variables file (JSON or YAML)",
+    ),
+    list_available: bool = typer.Option(
+        False,
+        "--list",
+        "-l",
+        help="List available catalog providers and exit",
+    ),
+) -> None:
+    """
+    Pull DDL definitions from a remote catalog for tables used in SQL.
+    Analyzes the SQL file to find referenced tables, then fetches their DDL
+    from the specified catalog provider (e.g., Databricks Unity Catalog).
+    CTEs are automatically excluded since they don't exist in remote catalogs.
+    Configuration can be set in sqlglider.toml in the current directory.
+    CLI arguments override configuration file values.
+    Examples:
+        # Pull DDL for tables in a SQL file (output to stdout)
+        sqlglider tables pull query.sql --catalog-type databricks
+        # Pull DDL to a folder (one file per table)
+        sqlglider tables pull query.sql -c databricks -o ./ddl/
+        # Use config file for catalog settings
+        sqlglider tables pull query.sql
+        # With templating
+        sqlglider tables pull query.sql -c databricks --templater jinja --var schema=prod
+        # List available catalog providers
+        sqlglider tables pull --list
+    """
+    from sqlglider.catalog import CatalogError, get_catalog, list_catalogs
+    from sqlglider.lineage.analyzer import ObjectType
+    # Handle --list option
+    if list_available:
+        available = list_catalogs()
+        if available:
+            console.print("[bold]Available catalog providers:[/bold]")
+            for name in available:
+                console.print(f"  - {name}")
+        else:
+            console.print(
+                "[yellow]No catalog providers available.[/yellow]\n"
+                "Install a provider with: pip install sql-glider[databricks]"
+            )
+        raise typer.Exit(0)
+    # Load configuration from sqlglider.toml (if it exists)
+    config = load_config()
+    # Apply priority resolution: CLI args > config > defaults
+    dialect = dialect or config.dialect or "spark"
+    templater = templater or config.templater  # None means no templating
+    catalog_type = catalog_type or config.catalog_type
+    ddl_folder_str = config.ddl_folder if ddl_folder is None else None
+    if ddl_folder is None and ddl_folder_str:
+        ddl_folder = Path(ddl_folder_str)
+    # Validate catalog_type is provided
+    if not catalog_type:
+        err_console.print(
+            "[red]Error:[/red] No catalog provider specified. "
+            "Use --catalog-type or set catalog_type in sqlglider.toml."
+        )
+        raise typer.Exit(1)
+    # Check if reading from stdin (cross-platform: name is "<stdin>" on all OS)
+    is_stdin = sql_file.name == "<stdin>"
+    try:
+        # Check if stdin is being used without input
+        if is_stdin and sys.stdin.isatty():
+            err_console.print(
+                "[red]Error:[/red] No SQL file provided and stdin is interactive. "
+                "Provide a SQL file path or pipe SQL via stdin."
+            )
+            raise typer.Exit(1)
+        # Read SQL from file or stdin
+        sql = sql_file.read()
+        # Determine source path for templating (None if stdin)
+        source_path = None if is_stdin else Path(sql_file.name)
+        # Apply templating if specified
+        sql = _apply_templating(
+            sql,
+            templater_name=templater,
+            cli_vars=var,
+            vars_file=vars_file,
+            config=config,
+            source_path=source_path,
+        )
+        # Create analyzer and extract tables
+        analyzer = LineageAnalyzer(sql, dialect=dialect)
+        table_results = analyzer.analyze_tables()
+        # Collect unique table names, excluding CTEs
+        table_names: set[str] = set()
+        for result in table_results:
+            for table_info in result.tables:
+                if table_info.object_type != ObjectType.CTE:
+                    table_names.add(table_info.name)
+        if not table_names:
+            console.print("[yellow]No tables found in SQL (CTEs excluded).[/yellow]")
+            raise typer.Exit(0)
+        # Get catalog instance and configure it
+        catalog = get_catalog(catalog_type)
+        # Build catalog config from config file
+        catalog_config: dict[str, str] = {}
+        if (
+            config.catalog
+            and catalog_type == "databricks"
+            and config.catalog.databricks
+        ):
+            db_config = config.catalog.databricks
+            if db_config.warehouse_id:
+                catalog_config["warehouse_id"] = db_config.warehouse_id
+            if db_config.profile:
+                catalog_config["profile"] = db_config.profile
+            if db_config.host:
+                catalog_config["host"] = db_config.host
+            if db_config.token:
+                catalog_config["token"] = db_config.token
+        catalog.configure(catalog_config)
+        # Fetch DDL for all tables
+        console.print(
+            f"[dim]Fetching DDL for {len(table_names)} table(s) from {catalog_type}...[/dim]"
+        )
+        ddl_results = catalog.get_ddl_batch(list(table_names))
+        # Count successes and failures
+        successes = 0
+        failures = 0
+        # Output DDL
+        if ddl_folder:
+            # Create output folder if it doesn't exist
+            ddl_folder.mkdir(parents=True, exist_ok=True)
+            for table_name, ddl in ddl_results.items():
+                if ddl.startswith("ERROR:"):
+                    err_console.print(f"[yellow]Warning:[/yellow] {table_name}: {ddl}")
+                    failures += 1
+                else:
+                    # Write DDL to file named by table identifier
+                    file_name = f"{table_name}.sql"
+                    file_path = ddl_folder / file_name
+                    file_path.write_text(ddl, encoding="utf-8")
+                    successes += 1
+            console.print(
+                f"[green]Success:[/green] Wrote {successes} DDL file(s) to {ddl_folder}"
+            )
+            if failures > 0:
+                console.print(
+                    f"[yellow]Warning:[/yellow] {failures} table(s) failed to fetch"
+                )
+        else:
+            # Output to stdout
+            for table_name, ddl in ddl_results.items():
+                if ddl.startswith("ERROR:"):
+                    err_console.print(f"[yellow]Warning:[/yellow] {table_name}: {ddl}")
+                    failures += 1
+                else:
+                    print(f"-- Table: {table_name}")
+                    print(ddl)
+                    print()
+                    successes += 1
+            if failures > 0:
+                err_console.print(
+                    f"\n[yellow]Warning:[/yellow] {failures} table(s) failed to fetch"
+                )
+    except FileNotFoundError as e:
+        err_console.print(f"[red]Error:[/red] {e}")
+        raise typer.Exit(1)
+    except ParseError as e:
+        err_console.print(f"[red]Error:[/red] Failed to parse SQL: {e}")
+        raise typer.Exit(1)
+    except TemplaterError as e:
+        err_console.print(f"[red]Error:[/red] {e}")
+        raise typer.Exit(1)
+    except CatalogError as e:
+        err_console.print(f"[red]Error:[/red] {e}")
+        raise typer.Exit(1)
+    except ValueError as e:
+        err_console.print(f"[red]Error:[/red] {e}")
+        raise typer.Exit(1)
+    except Exception as e:
+        err_console.print(f"[red]Error:[/red] Unexpected error: {e}")
+        raise typer.Exit(1)
 @app.command()
 def template(
     sql_file: Annotated[
@@ -821,21 +1086,21 @@ def graph_build(
         if manifest:
             builder.add_manifest(manifest, dialect=dialect)
-        # Process paths
+        # Process paths - collect all files first for progress tracking
         if paths:
+            all_files: list[Path] = []
             for path in paths:
                 if path.is_dir():
-                    builder.add_directory(
-                        path,
-                        recursive=recursive,
-                        glob_pattern=glob_pattern,
-                        dialect=dialect,
+                    pattern = f"**/{glob_pattern}" if recursive else glob_pattern
+                    all_files.extend(
+                        f for f in sorted(path.glob(pattern)) if f.is_file()
                     )
                 elif path.is_file():
-                    builder.add_file(path, dialect=dialect)
+                    all_files.append(path)
                 else:
                     err_console.print(f"[red]Error:[/red] Path not found: {path}")
                     raise typer.Exit(1)
+            builder.add_files(all_files, dialect=dialect)
         # Build and save graph
         graph = builder.build()
@@ -1133,5 +1398,192 @@ def _format_query_result_csv(result) -> None:
         )
+@app.command()
+def dissect(
+    sql_file: Annotated[
+        typer.FileText,
+        typer.Argument(
+            default_factory=lambda: sys.stdin,
+            show_default="stdin",
+            help="Path to SQL file to dissect (reads from stdin if not provided)",
+        ),
+    ],
+    dialect: Optional[str] = typer.Option(
+        None,
+        "--dialect",
+        "-d",
+        help="SQL dialect (default: spark, or from config)",
+    ),
+    output_format: Optional[str] = typer.Option(
+        None,
+        "--output-format",
+        "-f",
+        help="Output format: 'text', 'json', or 'csv' (default: text, or from config)",
+    ),
+    output_file: Optional[Path] = typer.Option(
+        None,
+        "--output-file",
+        "-o",
+        help="Write output to file instead of stdout",
+    ),
+    templater: Optional[str] = typer.Option(
+        None,
+        "--templater",
+        "-t",
+        help="Templater for SQL preprocessing (e.g., 'jinja', 'none')",
+    ),
+    var: Optional[List[str]] = typer.Option(
+        None,
+        "--var",
+        "-v",
+        help="Template variable in key=value format (repeatable)",
+    ),
+    vars_file: Optional[Path] = typer.Option(
+        None,
+        "--vars-file",
+        exists=True,
+        help="Path to variables file (JSON or YAML)",
+    ),
+) -> None:
+    """
+    Dissect SQL queries into constituent components.
+    Extracts CTEs, subqueries, main query, DML targets, source SELECTs,
+    UNION branches, and scalar subqueries for analysis and unit testing.
+    Configuration can be set in sqlglider.toml in the current directory.
+    CLI arguments override configuration file values.
+    Examples:
+        # Dissect a SQL file
+        sqlglider dissect query.sql
+        # Export to JSON format
+        sqlglider dissect query.sql --output-format json
+        # Export to CSV file
+        sqlglider dissect query.sql --output-format csv --output-file dissected.csv
+        # Use different SQL dialect
+        sqlglider dissect query.sql --dialect postgres
+        # Dissect templated SQL with Jinja2
+        sqlglider dissect query.sql --templater jinja --var schema=analytics
+    """
+    from sqlglider.dissection.analyzer import DissectionAnalyzer
+    from sqlglider.dissection.formatters import (
+        DissectionCsvFormatter,
+        DissectionJsonFormatter,
+        DissectionTextFormatter,
+    )
+    from sqlglider.dissection.formatters import (
+        OutputWriter as DissectionOutputWriter,
+    )
+    # Load configuration from sqlglider.toml (if it exists)
+    config = load_config()
+    # Apply priority resolution: CLI args > config > defaults
+    dialect = dialect or config.dialect or "spark"
+    output_format = output_format or config.output_format or "text"
+    templater = templater or config.templater  # None means no templating
+    # Validate output format
+    if output_format not in ["text", "json", "csv"]:
+        err_console.print(
+            f"[red]Error:[/red] Invalid output format '{output_format}'. "
+            "Use 'text', 'json', or 'csv'."
+        )
+        raise typer.Exit(1)
+    # Check if reading from stdin
+    is_stdin = sql_file.name == "<stdin>"
+    try:
+        # Check if stdin is being used without input
+        if is_stdin and sys.stdin.isatty():
+            err_console.print(
+                "[red]Error:[/red] No SQL file provided and stdin is interactive. "
+                "Provide a SQL file path or pipe SQL via stdin."
+            )
+            raise typer.Exit(1)
+        # Read SQL from file or stdin
+        sql = sql_file.read()
+        # Determine source path for templating (None if stdin)
+        source_path = None if is_stdin else Path(sql_file.name)
+        # Apply templating if specified
+        sql = _apply_templating(
+            sql,
+            templater_name=templater,
+            cli_vars=var,
+            vars_file=vars_file,
+            config=config,
+            source_path=source_path,
+        )
+        # Create analyzer
+        analyzer = DissectionAnalyzer(sql, dialect=dialect)
+        # Dissect queries
+        results = analyzer.dissect_queries()
+        # Format and output based on output format
+        if output_format == "text":
+            if output_file:
+                # For file output, use a string-based console to capture output
+                from io import StringIO
+                from rich.console import Console as FileConsole
+                string_buffer = StringIO()
+                file_console = FileConsole(file=string_buffer, force_terminal=False)
+                DissectionTextFormatter.format(results, file_console)
+                output_file.write_text(string_buffer.getvalue(), encoding="utf-8")
+                console.print(
+                    f"[green]Success:[/green] Dissection written to {output_file}"
+                )
+            else:
+                # Direct console output with Rich formatting
+                DissectionTextFormatter.format(results, console)
+        elif output_format == "json":
+            formatted = DissectionJsonFormatter.format(results)
+            DissectionOutputWriter.write(formatted, output_file)
+            if output_file:
+                console.print(
+                    f"[green]Success:[/green] Dissection written to {output_file}"
+                )
+        else:  # csv
+            formatted = DissectionCsvFormatter.format(results)
+            DissectionOutputWriter.write(formatted, output_file)
+            if output_file:
+                console.print(
+                    f"[green]Success:[/green] Dissection written to {output_file}"
+                )
+    except FileNotFoundError as e:
+        err_console.print(f"[red]Error:[/red] {e}")
+        raise typer.Exit(1)
+    except ParseError as e:
+        err_console.print(f"[red]Error:[/red] Failed to parse SQL: {e}")
+        raise typer.Exit(1)
+    except TemplaterError as e:
+        err_console.print(f"[red]Error:[/red] {e}")
+        raise typer.Exit(1)
+    except ValueError as e:
+        err_console.print(f"[red]Error:[/red] {e}")
+        raise typer.Exit(1)
+    except Exception as e:
+        err_console.print(f"[red]Error:[/red] Unexpected error: {e}")
+        raise typer.Exit(1)
 if __name__ == "__main__":
     app()

sqlglider/dissection/__init__.py ADDED Viewed

@@ -0,0 +1,17 @@
+"""SQL query dissection module for decomposing queries into components."""
+from sqlglider.dissection.analyzer import DissectionAnalyzer
+from sqlglider.dissection.models import (
+    ComponentType,
+    QueryDissectionResult,
+    QueryMetadata,
+    SQLComponent,
+)
+__all__ = [
+    "ComponentType",
+    "DissectionAnalyzer",
+    "QueryDissectionResult",
+    "QueryMetadata",
+    "SQLComponent",
+]

sql-glider 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

sql-glider 0.1.2py3-none-any.whl → 0.1.4py3-none-any.whl