PyPI - sql-glider - Versions diffs - 0.1.12__py3-none-any.whl → 0.1.13__py3-none-any.whl - Mend

sql-glider 0.1.12py3-none-any.whl → 0.1.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

{sql_glider-0.1.12.dist-info → sql_glider-0.1.13.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: sql-glider
-Version: 0.1.12
+Version: 0.1.13
 Summary: SQL Utility Toolkit for better understanding, use, and governance of your queries in a native environment.
 Project-URL: Homepage, https://github.com/rycowhi/sql-glider/
 Project-URL: Repository, https://github.com/rycowhi/sql-glider/

{sql_glider-0.1.12.dist-info → sql_glider-0.1.13.dist-info}/RECORD RENAMED Viewed

@@ -1,6 +1,6 @@
 sqlglider/__init__.py,sha256=gDf7s52dMcX7JuCZ1SLawcB1vb3U0yJCohu9RQAATBY,125
-sqlglider/_version.py,sha256=cEPXLUpTV7EzqolnyXW8nf8Hr6IVyBji9CzB6Cq_Ar0,706
-sqlglider/cli.py,sha256=qEDLZ1a6yr-BzrtkBsJEHPByMmRERsGKZsYFTn9kaMY,55624
+sqlglider/_version.py,sha256=Xz5RLbyPcCHHXte393JYfUy4Dt7uaeWyrGVw9SmJ0eg,706
+sqlglider/cli.py,sha256=FDTjRmor_cQlcwfiD_uHTrQao2sMf3ev21IUyUSt7Qs,56401
 sqlglider/global_models.py,sha256=2vyJXAuXOsXQpE-D3F0ejj7eR9z0nDWFjTkielhzM8k,356
 sqlglider/catalog/__init__.py,sha256=2PqFPyzFXJ14FpSUcBmVK2L-a_ypWQHAbHFHxLDk_LE,814
 sqlglider/catalog/base.py,sha256=R7htHC43InpH4uRjYk33dMYYji6oylHns7Ye_mgfjJE,3116
@@ -11,14 +11,14 @@ sqlglider/dissection/analyzer.py,sha256=-GD3-lTbfBthq1BW6HiDjvJx2y4LDmnUVHIVIb0H
 sqlglider/dissection/formatters.py,sha256=M7gsmTNljRIeLIRv4D0vHvqJVrTqWSpsg7vem83zSzY,7302
 sqlglider/dissection/models.py,sha256=RRD3RIteqbUBY6e-74skKDvMH3qeAUaqA2sFcrjP5GQ,3618
 sqlglider/graph/__init__.py,sha256=4DDdrPM75CmeQWt7wHdBsjCm1s70BHGLYdijIbaUEKY,871
-sqlglider/graph/builder.py,sha256=o0SnH5eWUUPpzRSdsdCXEva3QTlhLDagJulJ2hRFQqA,19895
+sqlglider/graph/builder.py,sha256=suxc_hymHvHnkgltgXqwwIoxlay7zhy1Enbs6HNC3m8,20107
 sqlglider/graph/formatters.py,sha256=EGgdxTr9Mctz9tTN54XIjoX0KGNcpiSKsW3o27dhMxo,2549
 sqlglider/graph/merge.py,sha256=uUZlm4BN3S9gRL66Cc2mzhbtuh4SVAv2n4cN4eUEQBU,4077
 sqlglider/graph/models.py,sha256=EYmjv_WzDSNp_WfhJ6H-qBIOkAcoNKS7GRUryfKrHuY,9330
 sqlglider/graph/query.py,sha256=LHU8Cvn7ZPPSEnqdDn2pF8f1_LQjIvNIrZqs8cFlb6U,9433
 sqlglider/graph/serialization.py,sha256=vMXn7s35jA499e7l90vNVaJE_3QR_VHf3rEfQ9ZlgTQ,2781
 sqlglider/lineage/__init__.py,sha256=llXMeI5_PIZaiBo8tKk3-wOubF4m_6QBHbn1FtWxT7k,256
-sqlglider/lineage/analyzer.py,sha256=-LUeVNEsjfEWoKAJ2qVIiJO1noqwae4jQkwkkkVbAT8,75950
+sqlglider/lineage/analyzer.py,sha256=08pFR5aGFFPhSbRW6EqiX2d3mp91v-orcs6dm_T1FJg,76484
 sqlglider/lineage/formatters.py,sha256=_Y9wcTX4JXn1vVnZ1xI656g1FF2rMjcAVc-GHjbd9QA,10389
 sqlglider/templating/__init__.py,sha256=g3_wb6rSDI0usq2UUMDpn-J5kVwlAw3NtLdwbxL6UHs,1435
 sqlglider/templating/base.py,sha256=y5bWAW7qXl_4pPyo5KycfHwNVvt1-7slZ63DAsvTE1s,2902
@@ -29,8 +29,8 @@ sqlglider/utils/__init__.py,sha256=KGp9-UzKz_OFBOTFoSy-g-NXDZsvyWXG_9-1zcC6ePE,2
 sqlglider/utils/config.py,sha256=qx5zE9pjLCCzHQDFVPLVd7LgJ-lghxUa2x-aZOAHByY,4962
 sqlglider/utils/file_utils.py,sha256=5_ff28E0r1R7emZzsOnRuHd-7zIX6873eyr1SuPEr4E,1093
 sqlglider/utils/schema.py,sha256=-0Vd1A3EggBH3reXTiabO0zFeTENROgmDg861X1D7Qs,1867
-sql_glider-0.1.12.dist-info/METADATA,sha256=73yuoWaAE5DKE9wobDXxbERSP2Pq-WpdqCnaswAa9fQ,28446
-sql_glider-0.1.12.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
-sql_glider-0.1.12.dist-info/entry_points.txt,sha256=HDuakHqHS5C0HFKsMIxMYmDU7-BLBGrnIJcYaVRu-s0,251
-sql_glider-0.1.12.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-sql_glider-0.1.12.dist-info/RECORD,,
+sql_glider-0.1.13.dist-info/METADATA,sha256=z-utivkULH1BBhygNpLcWN9UdU1DbwfF3EzUhGtWXes,28446
+sql_glider-0.1.13.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+sql_glider-0.1.13.dist-info/entry_points.txt,sha256=HDuakHqHS5C0HFKsMIxMYmDU7-BLBGrnIJcYaVRu-s0,251
+sql_glider-0.1.13.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+sql_glider-0.1.13.dist-info/RECORD,,

sqlglider/_version.py CHANGED Viewed

@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
 commit_id: COMMIT_ID
 __commit_id__: COMMIT_ID
-__version__ = version = '0.1.12'
-__version_tuple__ = version_tuple = (0, 1, 12)
+__version__ = version = '0.1.13'
+__version_tuple__ = version_tuple = (0, 1, 13)
 __commit_id__ = commit_id = None

sqlglider/cli.py CHANGED Viewed

@@ -1166,36 +1166,57 @@ def graph_build(
             strict_schema=strict_schema,
         )
-        # Process manifest if provided
-        if manifest:
-            builder.add_manifest(manifest, dialect=dialect)
-        # Process paths - collect all files first for progress tracking
+        # Collect file paths for schema extraction
+        path_files: list[Path] = []
         if paths:
-            all_files: list[Path] = []
             for path in paths:
                 if path.is_dir():
                     pattern = f"**/{glob_pattern}" if recursive else glob_pattern
-                    all_files.extend(
+                    path_files.extend(
                         f for f in sorted(path.glob(pattern)) if f.is_file()
                     )
                 elif path.is_file():
-                    all_files.append(path)
+                    path_files.append(path)
                 else:
                     err_console.print(f"[red]Error:[/red] Path not found: {path}")
                     raise typer.Exit(1)
-            builder.add_files(all_files, dialect=dialect)
-        # Dump resolved schema if requested
-        if dump_schema:
-            from sqlglider.graph.formatters import format_schema
+        manifest_files: list[Path] = []
+        if manifest:
+            from sqlglider.graph.models import Manifest
+            manifest_data = Manifest.from_csv(manifest)
+            base_dir = manifest.parent
+            for entry in manifest_data.entries:
+                file_path = Path(entry.file_path)
+                if not file_path.is_absolute():
+                    file_path = (base_dir / entry.file_path).resolve()
+                manifest_files.append(file_path)
+        # Extract schema upfront if requested, then dump before graph building
+        all_files = manifest_files + path_files
+        if resolve_schema and all_files:
+            builder.extract_schemas(all_files, dialect=dialect)
+            if dump_schema:
+                from sqlglider.graph.formatters import format_schema
+                schema_content = format_schema(
+                    builder.resolved_schema, dump_schema_format
+                )
+                dump_schema.write_text(schema_content, encoding="utf-8")
+                console.print(
+                    f"[green]Schema dumped to {dump_schema} "
+                    f"({len(builder.resolved_schema)} table(s))[/green]"
+                )
+        # Process manifest if provided
+        if manifest:
+            builder.add_manifest(manifest, dialect=dialect)
-            schema_content = format_schema(builder.resolved_schema, dump_schema_format)
-            dump_schema.write_text(schema_content, encoding="utf-8")
-            console.print(
-                f"[green]Schema dumped to {dump_schema} "
-                f"({len(builder.resolved_schema)} table(s))[/green]"
-            )
+        # Process path-based files
+        if path_files:
+            builder.add_files(path_files, dialect=dialect)
         # Build and save graph
         graph = builder.build()

sqlglider/graph/builder.py CHANGED Viewed

@@ -235,19 +235,10 @@ class GraphBuilder:
         if not files_with_dialects:
             return self
-        # Two-pass schema resolution
-        if self.resolve_schema:
-            console.print("[blue]Pass 1: Extracting schema from files[/blue]")
+        # Two-pass schema resolution (skip if already resolved)
+        if self.resolve_schema and not self._resolved_schema:
             file_paths_only = [fp for fp, _ in files_with_dialects]
-            self._resolved_schema = self._extract_schemas(file_paths_only, dialect)
-            if self.catalog_type:
-                self._resolved_schema = self._fill_schema_from_catalog(
-                    self._resolved_schema, file_paths_only, dialect
-                )
-            console.print(
-                f"[blue]Schema resolved for "
-                f"{len(self._resolved_schema)} table(s)[/blue]"
-            )
+            self.extract_schemas(file_paths_only, dialect)
         total = len(files_with_dialects)
         description = "Pass 2: Analyzing lineage" if self.resolve_schema else "Parsing"
@@ -286,18 +277,9 @@ class GraphBuilder:
         if not file_paths:
             return self
-        # Two-pass schema resolution: extract schema from all files first
-        if self.resolve_schema:
-            console.print("[blue]Pass 1: Extracting schema from files[/blue]")
-            self._resolved_schema = self._extract_schemas(file_paths, dialect)
-            if self.catalog_type:
-                self._resolved_schema = self._fill_schema_from_catalog(
-                    self._resolved_schema, file_paths, dialect
-                )
-            console.print(
-                f"[blue]Schema resolved for "
-                f"{len(self._resolved_schema)} table(s)[/blue]"
-            )
+        # Two-pass schema resolution (skip if already resolved)
+        if self.resolve_schema and not self._resolved_schema:
+            self.extract_schemas(file_paths, dialect)
         if show_progress:
             total = len(file_paths)
@@ -321,6 +303,34 @@ class GraphBuilder:
                 self.add_file(file_path, dialect)
         return self
+    def extract_schemas(
+        self,
+        file_paths: List[Path],
+        dialect: Optional[str] = None,
+    ) -> Dict[str, Dict[str, str]]:
+        """Run schema extraction pass and optionally fill from catalog.
+        Call this before add_files/add_manifest to resolve schema upfront.
+        The resolved schema is stored internally and also returned.
+        Args:
+            file_paths: SQL files to extract schema from
+            dialect: SQL dialect override
+        Returns:
+            Resolved schema dict
+        """
+        console.print("[blue]Pass 1: Extracting schema from files[/blue]")
+        self._resolved_schema = self._extract_schemas(file_paths, dialect)
+        if self.catalog_type:
+            self._resolved_schema = self._fill_schema_from_catalog(
+                self._resolved_schema, file_paths, dialect
+            )
+        console.print(
+            f"[blue]Schema resolved for {len(self._resolved_schema)} table(s)[/blue]"
+        )
+        return self._resolved_schema.copy()
     def _extract_schemas(
         self,
         file_paths: List[Path],

sqlglider/lineage/analyzer.py CHANGED Viewed

@@ -859,18 +859,31 @@ class LineageAnalyzer:
         else:
             current_query_sql = self.expr.sql(dialect=self.dialect)
+        # Prune schema to only tables referenced in this query to avoid
+        # sqlglot.lineage() performance degradation with large schema dicts
+        pruned_schema: Optional[Dict[str, Dict[str, str]]] = None
+        if self._file_schema:
+            referenced = {t.lower() for t in self._get_query_tables()}
+            pruned_schema = {
+                table: cols
+                for table, cols in self._file_schema.items()
+                if table.lower() in referenced
+            }
+            if not pruned_schema:
+                pruned_schema = None
         for col in columns_to_analyze:
             try:
                 # Get the column name that lineage expects
                 lineage_col = self._column_mapping.get(col, col)
                 # Get lineage tree for this column using current query SQL only
-                # Pass file schema to enable SELECT * expansion for known tables/views
+                # Pass pruned schema to enable SELECT * expansion for known tables/views
                 node = lineage(
                     lineage_col,
                     current_query_sql,
                     dialect=self.dialect,
-                    schema=self._file_schema if self._file_schema else None,
+                    schema=pruned_schema,
                 )
                 # Collect all source columns

{sql_glider-0.1.12.dist-info → sql_glider-0.1.13.dist-info}/WHEEL RENAMED Viewed

File without changes

{sql_glider-0.1.12.dist-info → sql_glider-0.1.13.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{sql_glider-0.1.12.dist-info → sql_glider-0.1.13.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

sql-glider 0.1.12__py3-none-any.whl → 0.1.13__py3-none-any.whl

sql-glider 0.1.12py3-none-any.whl → 0.1.13py3-none-any.whl