sql-glider 0.1.12__py3-none-any.whl → 0.1.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sql_glider-0.1.12.dist-info → sql_glider-0.1.13.dist-info}/METADATA +1 -1
- {sql_glider-0.1.12.dist-info → sql_glider-0.1.13.dist-info}/RECORD +9 -9
- sqlglider/_version.py +2 -2
- sqlglider/cli.py +39 -18
- sqlglider/graph/builder.py +34 -24
- sqlglider/lineage/analyzer.py +15 -2
- {sql_glider-0.1.12.dist-info → sql_glider-0.1.13.dist-info}/WHEEL +0 -0
- {sql_glider-0.1.12.dist-info → sql_glider-0.1.13.dist-info}/entry_points.txt +0 -0
- {sql_glider-0.1.12.dist-info → sql_glider-0.1.13.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sql-glider
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.13
|
|
4
4
|
Summary: SQL Utility Toolkit for better understanding, use, and governance of your queries in a native environment.
|
|
5
5
|
Project-URL: Homepage, https://github.com/rycowhi/sql-glider/
|
|
6
6
|
Project-URL: Repository, https://github.com/rycowhi/sql-glider/
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
sqlglider/__init__.py,sha256=gDf7s52dMcX7JuCZ1SLawcB1vb3U0yJCohu9RQAATBY,125
|
|
2
|
-
sqlglider/_version.py,sha256=
|
|
3
|
-
sqlglider/cli.py,sha256=
|
|
2
|
+
sqlglider/_version.py,sha256=Xz5RLbyPcCHHXte393JYfUy4Dt7uaeWyrGVw9SmJ0eg,706
|
|
3
|
+
sqlglider/cli.py,sha256=FDTjRmor_cQlcwfiD_uHTrQao2sMf3ev21IUyUSt7Qs,56401
|
|
4
4
|
sqlglider/global_models.py,sha256=2vyJXAuXOsXQpE-D3F0ejj7eR9z0nDWFjTkielhzM8k,356
|
|
5
5
|
sqlglider/catalog/__init__.py,sha256=2PqFPyzFXJ14FpSUcBmVK2L-a_ypWQHAbHFHxLDk_LE,814
|
|
6
6
|
sqlglider/catalog/base.py,sha256=R7htHC43InpH4uRjYk33dMYYji6oylHns7Ye_mgfjJE,3116
|
|
@@ -11,14 +11,14 @@ sqlglider/dissection/analyzer.py,sha256=-GD3-lTbfBthq1BW6HiDjvJx2y4LDmnUVHIVIb0H
|
|
|
11
11
|
sqlglider/dissection/formatters.py,sha256=M7gsmTNljRIeLIRv4D0vHvqJVrTqWSpsg7vem83zSzY,7302
|
|
12
12
|
sqlglider/dissection/models.py,sha256=RRD3RIteqbUBY6e-74skKDvMH3qeAUaqA2sFcrjP5GQ,3618
|
|
13
13
|
sqlglider/graph/__init__.py,sha256=4DDdrPM75CmeQWt7wHdBsjCm1s70BHGLYdijIbaUEKY,871
|
|
14
|
-
sqlglider/graph/builder.py,sha256=
|
|
14
|
+
sqlglider/graph/builder.py,sha256=suxc_hymHvHnkgltgXqwwIoxlay7zhy1Enbs6HNC3m8,20107
|
|
15
15
|
sqlglider/graph/formatters.py,sha256=EGgdxTr9Mctz9tTN54XIjoX0KGNcpiSKsW3o27dhMxo,2549
|
|
16
16
|
sqlglider/graph/merge.py,sha256=uUZlm4BN3S9gRL66Cc2mzhbtuh4SVAv2n4cN4eUEQBU,4077
|
|
17
17
|
sqlglider/graph/models.py,sha256=EYmjv_WzDSNp_WfhJ6H-qBIOkAcoNKS7GRUryfKrHuY,9330
|
|
18
18
|
sqlglider/graph/query.py,sha256=LHU8Cvn7ZPPSEnqdDn2pF8f1_LQjIvNIrZqs8cFlb6U,9433
|
|
19
19
|
sqlglider/graph/serialization.py,sha256=vMXn7s35jA499e7l90vNVaJE_3QR_VHf3rEfQ9ZlgTQ,2781
|
|
20
20
|
sqlglider/lineage/__init__.py,sha256=llXMeI5_PIZaiBo8tKk3-wOubF4m_6QBHbn1FtWxT7k,256
|
|
21
|
-
sqlglider/lineage/analyzer.py,sha256
|
|
21
|
+
sqlglider/lineage/analyzer.py,sha256=08pFR5aGFFPhSbRW6EqiX2d3mp91v-orcs6dm_T1FJg,76484
|
|
22
22
|
sqlglider/lineage/formatters.py,sha256=_Y9wcTX4JXn1vVnZ1xI656g1FF2rMjcAVc-GHjbd9QA,10389
|
|
23
23
|
sqlglider/templating/__init__.py,sha256=g3_wb6rSDI0usq2UUMDpn-J5kVwlAw3NtLdwbxL6UHs,1435
|
|
24
24
|
sqlglider/templating/base.py,sha256=y5bWAW7qXl_4pPyo5KycfHwNVvt1-7slZ63DAsvTE1s,2902
|
|
@@ -29,8 +29,8 @@ sqlglider/utils/__init__.py,sha256=KGp9-UzKz_OFBOTFoSy-g-NXDZsvyWXG_9-1zcC6ePE,2
|
|
|
29
29
|
sqlglider/utils/config.py,sha256=qx5zE9pjLCCzHQDFVPLVd7LgJ-lghxUa2x-aZOAHByY,4962
|
|
30
30
|
sqlglider/utils/file_utils.py,sha256=5_ff28E0r1R7emZzsOnRuHd-7zIX6873eyr1SuPEr4E,1093
|
|
31
31
|
sqlglider/utils/schema.py,sha256=-0Vd1A3EggBH3reXTiabO0zFeTENROgmDg861X1D7Qs,1867
|
|
32
|
-
sql_glider-0.1.
|
|
33
|
-
sql_glider-0.1.
|
|
34
|
-
sql_glider-0.1.
|
|
35
|
-
sql_glider-0.1.
|
|
36
|
-
sql_glider-0.1.
|
|
32
|
+
sql_glider-0.1.13.dist-info/METADATA,sha256=z-utivkULH1BBhygNpLcWN9UdU1DbwfF3EzUhGtWXes,28446
|
|
33
|
+
sql_glider-0.1.13.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
34
|
+
sql_glider-0.1.13.dist-info/entry_points.txt,sha256=HDuakHqHS5C0HFKsMIxMYmDU7-BLBGrnIJcYaVRu-s0,251
|
|
35
|
+
sql_glider-0.1.13.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
36
|
+
sql_glider-0.1.13.dist-info/RECORD,,
|
sqlglider/_version.py
CHANGED
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '0.1.
|
|
32
|
-
__version_tuple__ = version_tuple = (0, 1,
|
|
31
|
+
__version__ = version = '0.1.13'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 1, 13)
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
sqlglider/cli.py
CHANGED
|
@@ -1166,36 +1166,57 @@ def graph_build(
|
|
|
1166
1166
|
strict_schema=strict_schema,
|
|
1167
1167
|
)
|
|
1168
1168
|
|
|
1169
|
-
#
|
|
1170
|
-
|
|
1171
|
-
builder.add_manifest(manifest, dialect=dialect)
|
|
1172
|
-
|
|
1173
|
-
# Process paths - collect all files first for progress tracking
|
|
1169
|
+
# Collect file paths for schema extraction
|
|
1170
|
+
path_files: list[Path] = []
|
|
1174
1171
|
if paths:
|
|
1175
|
-
all_files: list[Path] = []
|
|
1176
1172
|
for path in paths:
|
|
1177
1173
|
if path.is_dir():
|
|
1178
1174
|
pattern = f"**/{glob_pattern}" if recursive else glob_pattern
|
|
1179
|
-
|
|
1175
|
+
path_files.extend(
|
|
1180
1176
|
f for f in sorted(path.glob(pattern)) if f.is_file()
|
|
1181
1177
|
)
|
|
1182
1178
|
elif path.is_file():
|
|
1183
|
-
|
|
1179
|
+
path_files.append(path)
|
|
1184
1180
|
else:
|
|
1185
1181
|
err_console.print(f"[red]Error:[/red] Path not found: {path}")
|
|
1186
1182
|
raise typer.Exit(1)
|
|
1187
|
-
builder.add_files(all_files, dialect=dialect)
|
|
1188
1183
|
|
|
1189
|
-
|
|
1190
|
-
if
|
|
1191
|
-
from sqlglider.graph.
|
|
1184
|
+
manifest_files: list[Path] = []
|
|
1185
|
+
if manifest:
|
|
1186
|
+
from sqlglider.graph.models import Manifest
|
|
1187
|
+
|
|
1188
|
+
manifest_data = Manifest.from_csv(manifest)
|
|
1189
|
+
base_dir = manifest.parent
|
|
1190
|
+
for entry in manifest_data.entries:
|
|
1191
|
+
file_path = Path(entry.file_path)
|
|
1192
|
+
if not file_path.is_absolute():
|
|
1193
|
+
file_path = (base_dir / entry.file_path).resolve()
|
|
1194
|
+
manifest_files.append(file_path)
|
|
1195
|
+
|
|
1196
|
+
# Extract schema upfront if requested, then dump before graph building
|
|
1197
|
+
all_files = manifest_files + path_files
|
|
1198
|
+
if resolve_schema and all_files:
|
|
1199
|
+
builder.extract_schemas(all_files, dialect=dialect)
|
|
1200
|
+
|
|
1201
|
+
if dump_schema:
|
|
1202
|
+
from sqlglider.graph.formatters import format_schema
|
|
1203
|
+
|
|
1204
|
+
schema_content = format_schema(
|
|
1205
|
+
builder.resolved_schema, dump_schema_format
|
|
1206
|
+
)
|
|
1207
|
+
dump_schema.write_text(schema_content, encoding="utf-8")
|
|
1208
|
+
console.print(
|
|
1209
|
+
f"[green]Schema dumped to {dump_schema} "
|
|
1210
|
+
f"({len(builder.resolved_schema)} table(s))[/green]"
|
|
1211
|
+
)
|
|
1212
|
+
|
|
1213
|
+
# Process manifest if provided
|
|
1214
|
+
if manifest:
|
|
1215
|
+
builder.add_manifest(manifest, dialect=dialect)
|
|
1192
1216
|
|
|
1193
|
-
|
|
1194
|
-
|
|
1195
|
-
|
|
1196
|
-
f"[green]Schema dumped to {dump_schema} "
|
|
1197
|
-
f"({len(builder.resolved_schema)} table(s))[/green]"
|
|
1198
|
-
)
|
|
1217
|
+
# Process path-based files
|
|
1218
|
+
if path_files:
|
|
1219
|
+
builder.add_files(path_files, dialect=dialect)
|
|
1199
1220
|
|
|
1200
1221
|
# Build and save graph
|
|
1201
1222
|
graph = builder.build()
|
sqlglider/graph/builder.py
CHANGED
|
@@ -235,19 +235,10 @@ class GraphBuilder:
|
|
|
235
235
|
if not files_with_dialects:
|
|
236
236
|
return self
|
|
237
237
|
|
|
238
|
-
# Two-pass schema resolution
|
|
239
|
-
if self.resolve_schema:
|
|
240
|
-
console.print("[blue]Pass 1: Extracting schema from files[/blue]")
|
|
238
|
+
# Two-pass schema resolution (skip if already resolved)
|
|
239
|
+
if self.resolve_schema and not self._resolved_schema:
|
|
241
240
|
file_paths_only = [fp for fp, _ in files_with_dialects]
|
|
242
|
-
self.
|
|
243
|
-
if self.catalog_type:
|
|
244
|
-
self._resolved_schema = self._fill_schema_from_catalog(
|
|
245
|
-
self._resolved_schema, file_paths_only, dialect
|
|
246
|
-
)
|
|
247
|
-
console.print(
|
|
248
|
-
f"[blue]Schema resolved for "
|
|
249
|
-
f"{len(self._resolved_schema)} table(s)[/blue]"
|
|
250
|
-
)
|
|
241
|
+
self.extract_schemas(file_paths_only, dialect)
|
|
251
242
|
|
|
252
243
|
total = len(files_with_dialects)
|
|
253
244
|
description = "Pass 2: Analyzing lineage" if self.resolve_schema else "Parsing"
|
|
@@ -286,18 +277,9 @@ class GraphBuilder:
|
|
|
286
277
|
if not file_paths:
|
|
287
278
|
return self
|
|
288
279
|
|
|
289
|
-
# Two-pass schema resolution
|
|
290
|
-
if self.resolve_schema:
|
|
291
|
-
|
|
292
|
-
self._resolved_schema = self._extract_schemas(file_paths, dialect)
|
|
293
|
-
if self.catalog_type:
|
|
294
|
-
self._resolved_schema = self._fill_schema_from_catalog(
|
|
295
|
-
self._resolved_schema, file_paths, dialect
|
|
296
|
-
)
|
|
297
|
-
console.print(
|
|
298
|
-
f"[blue]Schema resolved for "
|
|
299
|
-
f"{len(self._resolved_schema)} table(s)[/blue]"
|
|
300
|
-
)
|
|
280
|
+
# Two-pass schema resolution (skip if already resolved)
|
|
281
|
+
if self.resolve_schema and not self._resolved_schema:
|
|
282
|
+
self.extract_schemas(file_paths, dialect)
|
|
301
283
|
|
|
302
284
|
if show_progress:
|
|
303
285
|
total = len(file_paths)
|
|
@@ -321,6 +303,34 @@ class GraphBuilder:
|
|
|
321
303
|
self.add_file(file_path, dialect)
|
|
322
304
|
return self
|
|
323
305
|
|
|
306
|
+
def extract_schemas(
|
|
307
|
+
self,
|
|
308
|
+
file_paths: List[Path],
|
|
309
|
+
dialect: Optional[str] = None,
|
|
310
|
+
) -> Dict[str, Dict[str, str]]:
|
|
311
|
+
"""Run schema extraction pass and optionally fill from catalog.
|
|
312
|
+
|
|
313
|
+
Call this before add_files/add_manifest to resolve schema upfront.
|
|
314
|
+
The resolved schema is stored internally and also returned.
|
|
315
|
+
|
|
316
|
+
Args:
|
|
317
|
+
file_paths: SQL files to extract schema from
|
|
318
|
+
dialect: SQL dialect override
|
|
319
|
+
|
|
320
|
+
Returns:
|
|
321
|
+
Resolved schema dict
|
|
322
|
+
"""
|
|
323
|
+
console.print("[blue]Pass 1: Extracting schema from files[/blue]")
|
|
324
|
+
self._resolved_schema = self._extract_schemas(file_paths, dialect)
|
|
325
|
+
if self.catalog_type:
|
|
326
|
+
self._resolved_schema = self._fill_schema_from_catalog(
|
|
327
|
+
self._resolved_schema, file_paths, dialect
|
|
328
|
+
)
|
|
329
|
+
console.print(
|
|
330
|
+
f"[blue]Schema resolved for {len(self._resolved_schema)} table(s)[/blue]"
|
|
331
|
+
)
|
|
332
|
+
return self._resolved_schema.copy()
|
|
333
|
+
|
|
324
334
|
def _extract_schemas(
|
|
325
335
|
self,
|
|
326
336
|
file_paths: List[Path],
|
sqlglider/lineage/analyzer.py
CHANGED
|
@@ -859,18 +859,31 @@ class LineageAnalyzer:
|
|
|
859
859
|
else:
|
|
860
860
|
current_query_sql = self.expr.sql(dialect=self.dialect)
|
|
861
861
|
|
|
862
|
+
# Prune schema to only tables referenced in this query to avoid
|
|
863
|
+
# sqlglot.lineage() performance degradation with large schema dicts
|
|
864
|
+
pruned_schema: Optional[Dict[str, Dict[str, str]]] = None
|
|
865
|
+
if self._file_schema:
|
|
866
|
+
referenced = {t.lower() for t in self._get_query_tables()}
|
|
867
|
+
pruned_schema = {
|
|
868
|
+
table: cols
|
|
869
|
+
for table, cols in self._file_schema.items()
|
|
870
|
+
if table.lower() in referenced
|
|
871
|
+
}
|
|
872
|
+
if not pruned_schema:
|
|
873
|
+
pruned_schema = None
|
|
874
|
+
|
|
862
875
|
for col in columns_to_analyze:
|
|
863
876
|
try:
|
|
864
877
|
# Get the column name that lineage expects
|
|
865
878
|
lineage_col = self._column_mapping.get(col, col)
|
|
866
879
|
|
|
867
880
|
# Get lineage tree for this column using current query SQL only
|
|
868
|
-
# Pass
|
|
881
|
+
# Pass pruned schema to enable SELECT * expansion for known tables/views
|
|
869
882
|
node = lineage(
|
|
870
883
|
lineage_col,
|
|
871
884
|
current_query_sql,
|
|
872
885
|
dialect=self.dialect,
|
|
873
|
-
schema=
|
|
886
|
+
schema=pruned_schema,
|
|
874
887
|
)
|
|
875
888
|
|
|
876
889
|
# Collect all source columns
|
|
File without changes
|
|
File without changes
|
|
File without changes
|