sql-glider 0.1.12__py3-none-any.whl → 0.1.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sql-glider
3
- Version: 0.1.12
3
+ Version: 0.1.13
4
4
  Summary: SQL Utility Toolkit for better understanding, use, and governance of your queries in a native environment.
5
5
  Project-URL: Homepage, https://github.com/rycowhi/sql-glider/
6
6
  Project-URL: Repository, https://github.com/rycowhi/sql-glider/
@@ -1,6 +1,6 @@
1
1
  sqlglider/__init__.py,sha256=gDf7s52dMcX7JuCZ1SLawcB1vb3U0yJCohu9RQAATBY,125
2
- sqlglider/_version.py,sha256=cEPXLUpTV7EzqolnyXW8nf8Hr6IVyBji9CzB6Cq_Ar0,706
3
- sqlglider/cli.py,sha256=qEDLZ1a6yr-BzrtkBsJEHPByMmRERsGKZsYFTn9kaMY,55624
2
+ sqlglider/_version.py,sha256=Xz5RLbyPcCHHXte393JYfUy4Dt7uaeWyrGVw9SmJ0eg,706
3
+ sqlglider/cli.py,sha256=FDTjRmor_cQlcwfiD_uHTrQao2sMf3ev21IUyUSt7Qs,56401
4
4
  sqlglider/global_models.py,sha256=2vyJXAuXOsXQpE-D3F0ejj7eR9z0nDWFjTkielhzM8k,356
5
5
  sqlglider/catalog/__init__.py,sha256=2PqFPyzFXJ14FpSUcBmVK2L-a_ypWQHAbHFHxLDk_LE,814
6
6
  sqlglider/catalog/base.py,sha256=R7htHC43InpH4uRjYk33dMYYji6oylHns7Ye_mgfjJE,3116
@@ -11,14 +11,14 @@ sqlglider/dissection/analyzer.py,sha256=-GD3-lTbfBthq1BW6HiDjvJx2y4LDmnUVHIVIb0H
11
11
  sqlglider/dissection/formatters.py,sha256=M7gsmTNljRIeLIRv4D0vHvqJVrTqWSpsg7vem83zSzY,7302
12
12
  sqlglider/dissection/models.py,sha256=RRD3RIteqbUBY6e-74skKDvMH3qeAUaqA2sFcrjP5GQ,3618
13
13
  sqlglider/graph/__init__.py,sha256=4DDdrPM75CmeQWt7wHdBsjCm1s70BHGLYdijIbaUEKY,871
14
- sqlglider/graph/builder.py,sha256=o0SnH5eWUUPpzRSdsdCXEva3QTlhLDagJulJ2hRFQqA,19895
14
+ sqlglider/graph/builder.py,sha256=suxc_hymHvHnkgltgXqwwIoxlay7zhy1Enbs6HNC3m8,20107
15
15
  sqlglider/graph/formatters.py,sha256=EGgdxTr9Mctz9tTN54XIjoX0KGNcpiSKsW3o27dhMxo,2549
16
16
  sqlglider/graph/merge.py,sha256=uUZlm4BN3S9gRL66Cc2mzhbtuh4SVAv2n4cN4eUEQBU,4077
17
17
  sqlglider/graph/models.py,sha256=EYmjv_WzDSNp_WfhJ6H-qBIOkAcoNKS7GRUryfKrHuY,9330
18
18
  sqlglider/graph/query.py,sha256=LHU8Cvn7ZPPSEnqdDn2pF8f1_LQjIvNIrZqs8cFlb6U,9433
19
19
  sqlglider/graph/serialization.py,sha256=vMXn7s35jA499e7l90vNVaJE_3QR_VHf3rEfQ9ZlgTQ,2781
20
20
  sqlglider/lineage/__init__.py,sha256=llXMeI5_PIZaiBo8tKk3-wOubF4m_6QBHbn1FtWxT7k,256
21
- sqlglider/lineage/analyzer.py,sha256=-LUeVNEsjfEWoKAJ2qVIiJO1noqwae4jQkwkkkVbAT8,75950
21
+ sqlglider/lineage/analyzer.py,sha256=08pFR5aGFFPhSbRW6EqiX2d3mp91v-orcs6dm_T1FJg,76484
22
22
  sqlglider/lineage/formatters.py,sha256=_Y9wcTX4JXn1vVnZ1xI656g1FF2rMjcAVc-GHjbd9QA,10389
23
23
  sqlglider/templating/__init__.py,sha256=g3_wb6rSDI0usq2UUMDpn-J5kVwlAw3NtLdwbxL6UHs,1435
24
24
  sqlglider/templating/base.py,sha256=y5bWAW7qXl_4pPyo5KycfHwNVvt1-7slZ63DAsvTE1s,2902
@@ -29,8 +29,8 @@ sqlglider/utils/__init__.py,sha256=KGp9-UzKz_OFBOTFoSy-g-NXDZsvyWXG_9-1zcC6ePE,2
29
29
  sqlglider/utils/config.py,sha256=qx5zE9pjLCCzHQDFVPLVd7LgJ-lghxUa2x-aZOAHByY,4962
30
30
  sqlglider/utils/file_utils.py,sha256=5_ff28E0r1R7emZzsOnRuHd-7zIX6873eyr1SuPEr4E,1093
31
31
  sqlglider/utils/schema.py,sha256=-0Vd1A3EggBH3reXTiabO0zFeTENROgmDg861X1D7Qs,1867
32
- sql_glider-0.1.12.dist-info/METADATA,sha256=73yuoWaAE5DKE9wobDXxbERSP2Pq-WpdqCnaswAa9fQ,28446
33
- sql_glider-0.1.12.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
34
- sql_glider-0.1.12.dist-info/entry_points.txt,sha256=HDuakHqHS5C0HFKsMIxMYmDU7-BLBGrnIJcYaVRu-s0,251
35
- sql_glider-0.1.12.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
36
- sql_glider-0.1.12.dist-info/RECORD,,
32
+ sql_glider-0.1.13.dist-info/METADATA,sha256=z-utivkULH1BBhygNpLcWN9UdU1DbwfF3EzUhGtWXes,28446
33
+ sql_glider-0.1.13.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
34
+ sql_glider-0.1.13.dist-info/entry_points.txt,sha256=HDuakHqHS5C0HFKsMIxMYmDU7-BLBGrnIJcYaVRu-s0,251
35
+ sql_glider-0.1.13.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
36
+ sql_glider-0.1.13.dist-info/RECORD,,
sqlglider/_version.py CHANGED
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.1.12'
32
- __version_tuple__ = version_tuple = (0, 1, 12)
31
+ __version__ = version = '0.1.13'
32
+ __version_tuple__ = version_tuple = (0, 1, 13)
33
33
 
34
34
  __commit_id__ = commit_id = None
sqlglider/cli.py CHANGED
@@ -1166,36 +1166,57 @@ def graph_build(
1166
1166
  strict_schema=strict_schema,
1167
1167
  )
1168
1168
 
1169
- # Process manifest if provided
1170
- if manifest:
1171
- builder.add_manifest(manifest, dialect=dialect)
1172
-
1173
- # Process paths - collect all files first for progress tracking
1169
+ # Collect file paths for schema extraction
1170
+ path_files: list[Path] = []
1174
1171
  if paths:
1175
- all_files: list[Path] = []
1176
1172
  for path in paths:
1177
1173
  if path.is_dir():
1178
1174
  pattern = f"**/{glob_pattern}" if recursive else glob_pattern
1179
- all_files.extend(
1175
+ path_files.extend(
1180
1176
  f for f in sorted(path.glob(pattern)) if f.is_file()
1181
1177
  )
1182
1178
  elif path.is_file():
1183
- all_files.append(path)
1179
+ path_files.append(path)
1184
1180
  else:
1185
1181
  err_console.print(f"[red]Error:[/red] Path not found: {path}")
1186
1182
  raise typer.Exit(1)
1187
- builder.add_files(all_files, dialect=dialect)
1188
1183
 
1189
- # Dump resolved schema if requested
1190
- if dump_schema:
1191
- from sqlglider.graph.formatters import format_schema
1184
+ manifest_files: list[Path] = []
1185
+ if manifest:
1186
+ from sqlglider.graph.models import Manifest
1187
+
1188
+ manifest_data = Manifest.from_csv(manifest)
1189
+ base_dir = manifest.parent
1190
+ for entry in manifest_data.entries:
1191
+ file_path = Path(entry.file_path)
1192
+ if not file_path.is_absolute():
1193
+ file_path = (base_dir / entry.file_path).resolve()
1194
+ manifest_files.append(file_path)
1195
+
1196
+ # Extract schema upfront if requested, then dump before graph building
1197
+ all_files = manifest_files + path_files
1198
+ if resolve_schema and all_files:
1199
+ builder.extract_schemas(all_files, dialect=dialect)
1200
+
1201
+ if dump_schema:
1202
+ from sqlglider.graph.formatters import format_schema
1203
+
1204
+ schema_content = format_schema(
1205
+ builder.resolved_schema, dump_schema_format
1206
+ )
1207
+ dump_schema.write_text(schema_content, encoding="utf-8")
1208
+ console.print(
1209
+ f"[green]Schema dumped to {dump_schema} "
1210
+ f"({len(builder.resolved_schema)} table(s))[/green]"
1211
+ )
1212
+
1213
+ # Process manifest if provided
1214
+ if manifest:
1215
+ builder.add_manifest(manifest, dialect=dialect)
1192
1216
 
1193
- schema_content = format_schema(builder.resolved_schema, dump_schema_format)
1194
- dump_schema.write_text(schema_content, encoding="utf-8")
1195
- console.print(
1196
- f"[green]Schema dumped to {dump_schema} "
1197
- f"({len(builder.resolved_schema)} table(s))[/green]"
1198
- )
1217
+ # Process path-based files
1218
+ if path_files:
1219
+ builder.add_files(path_files, dialect=dialect)
1199
1220
 
1200
1221
  # Build and save graph
1201
1222
  graph = builder.build()
@@ -235,19 +235,10 @@ class GraphBuilder:
235
235
  if not files_with_dialects:
236
236
  return self
237
237
 
238
- # Two-pass schema resolution
239
- if self.resolve_schema:
240
- console.print("[blue]Pass 1: Extracting schema from files[/blue]")
238
+ # Two-pass schema resolution (skip if already resolved)
239
+ if self.resolve_schema and not self._resolved_schema:
241
240
  file_paths_only = [fp for fp, _ in files_with_dialects]
242
- self._resolved_schema = self._extract_schemas(file_paths_only, dialect)
243
- if self.catalog_type:
244
- self._resolved_schema = self._fill_schema_from_catalog(
245
- self._resolved_schema, file_paths_only, dialect
246
- )
247
- console.print(
248
- f"[blue]Schema resolved for "
249
- f"{len(self._resolved_schema)} table(s)[/blue]"
250
- )
241
+ self.extract_schemas(file_paths_only, dialect)
251
242
 
252
243
  total = len(files_with_dialects)
253
244
  description = "Pass 2: Analyzing lineage" if self.resolve_schema else "Parsing"
@@ -286,18 +277,9 @@ class GraphBuilder:
286
277
  if not file_paths:
287
278
  return self
288
279
 
289
- # Two-pass schema resolution: extract schema from all files first
290
- if self.resolve_schema:
291
- console.print("[blue]Pass 1: Extracting schema from files[/blue]")
292
- self._resolved_schema = self._extract_schemas(file_paths, dialect)
293
- if self.catalog_type:
294
- self._resolved_schema = self._fill_schema_from_catalog(
295
- self._resolved_schema, file_paths, dialect
296
- )
297
- console.print(
298
- f"[blue]Schema resolved for "
299
- f"{len(self._resolved_schema)} table(s)[/blue]"
300
- )
280
+ # Two-pass schema resolution (skip if already resolved)
281
+ if self.resolve_schema and not self._resolved_schema:
282
+ self.extract_schemas(file_paths, dialect)
301
283
 
302
284
  if show_progress:
303
285
  total = len(file_paths)
@@ -321,6 +303,34 @@ class GraphBuilder:
321
303
  self.add_file(file_path, dialect)
322
304
  return self
323
305
 
306
+ def extract_schemas(
307
+ self,
308
+ file_paths: List[Path],
309
+ dialect: Optional[str] = None,
310
+ ) -> Dict[str, Dict[str, str]]:
311
+ """Run schema extraction pass and optionally fill from catalog.
312
+
313
+ Call this before add_files/add_manifest to resolve schema upfront.
314
+ The resolved schema is stored internally and also returned.
315
+
316
+ Args:
317
+ file_paths: SQL files to extract schema from
318
+ dialect: SQL dialect override
319
+
320
+ Returns:
321
+ Resolved schema dict
322
+ """
323
+ console.print("[blue]Pass 1: Extracting schema from files[/blue]")
324
+ self._resolved_schema = self._extract_schemas(file_paths, dialect)
325
+ if self.catalog_type:
326
+ self._resolved_schema = self._fill_schema_from_catalog(
327
+ self._resolved_schema, file_paths, dialect
328
+ )
329
+ console.print(
330
+ f"[blue]Schema resolved for {len(self._resolved_schema)} table(s)[/blue]"
331
+ )
332
+ return self._resolved_schema.copy()
333
+
324
334
  def _extract_schemas(
325
335
  self,
326
336
  file_paths: List[Path],
@@ -859,18 +859,31 @@ class LineageAnalyzer:
859
859
  else:
860
860
  current_query_sql = self.expr.sql(dialect=self.dialect)
861
861
 
862
+ # Prune schema to only tables referenced in this query to avoid
863
+ # sqlglot.lineage() performance degradation with large schema dicts
864
+ pruned_schema: Optional[Dict[str, Dict[str, str]]] = None
865
+ if self._file_schema:
866
+ referenced = {t.lower() for t in self._get_query_tables()}
867
+ pruned_schema = {
868
+ table: cols
869
+ for table, cols in self._file_schema.items()
870
+ if table.lower() in referenced
871
+ }
872
+ if not pruned_schema:
873
+ pruned_schema = None
874
+
862
875
  for col in columns_to_analyze:
863
876
  try:
864
877
  # Get the column name that lineage expects
865
878
  lineage_col = self._column_mapping.get(col, col)
866
879
 
867
880
  # Get lineage tree for this column using current query SQL only
868
- # Pass file schema to enable SELECT * expansion for known tables/views
881
+ # Pass pruned schema to enable SELECT * expansion for known tables/views
869
882
  node = lineage(
870
883
  lineage_col,
871
884
  current_query_sql,
872
885
  dialect=self.dialect,
873
- schema=self._file_schema if self._file_schema else None,
886
+ schema=pruned_schema,
874
887
  )
875
888
 
876
889
  # Collect all source columns