sql-glider 0.1.14__py3-none-any.whl → 0.1.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sql-glider
3
- Version: 0.1.14
3
+ Version: 0.1.15
4
4
  Summary: SQL Utility Toolkit for better understanding, use, and governance of your queries in a native environment.
5
5
  Project-URL: Homepage, https://github.com/rycowhi/sql-glider/
6
6
  Project-URL: Repository, https://github.com/rycowhi/sql-glider/
@@ -1,6 +1,6 @@
1
1
  sqlglider/__init__.py,sha256=gDf7s52dMcX7JuCZ1SLawcB1vb3U0yJCohu9RQAATBY,125
2
- sqlglider/_version.py,sha256=1asLxKIxr0ym19WewGl0URtkfnEuN7mK8ZckB9dZw6Q,706
3
- sqlglider/cli.py,sha256=UvDaeDhQRu98M1PaUtWsIL_F_LtulOf58kWGn6SxUzE,64175
2
+ sqlglider/_version.py,sha256=HPqQHR9pVxIxlFt4vovkyoe7k6UO3ag2isBN2lHFL8g,706
3
+ sqlglider/cli.py,sha256=9zNMaw3rgcqb6uG05VJTYbLUXmZzdX87gAOJ4Zg3xjY,65319
4
4
  sqlglider/global_models.py,sha256=2vyJXAuXOsXQpE-D3F0ejj7eR9z0nDWFjTkielhzM8k,356
5
5
  sqlglider/catalog/__init__.py,sha256=2PqFPyzFXJ14FpSUcBmVK2L-a_ypWQHAbHFHxLDk_LE,814
6
6
  sqlglider/catalog/base.py,sha256=R7htHC43InpH4uRjYk33dMYYji6oylHns7Ye_mgfjJE,3116
@@ -11,8 +11,8 @@ sqlglider/dissection/analyzer.py,sha256=-GD3-lTbfBthq1BW6HiDjvJx2y4LDmnUVHIVIb0H
11
11
  sqlglider/dissection/formatters.py,sha256=M7gsmTNljRIeLIRv4D0vHvqJVrTqWSpsg7vem83zSzY,7302
12
12
  sqlglider/dissection/models.py,sha256=RRD3RIteqbUBY6e-74skKDvMH3qeAUaqA2sFcrjP5GQ,3618
13
13
  sqlglider/graph/__init__.py,sha256=4DDdrPM75CmeQWt7wHdBsjCm1s70BHGLYdijIbaUEKY,871
14
- sqlglider/graph/builder.py,sha256=fS6p-73zyjuYIHRzM3uXFTFZ8zyal0s7oBdyO2Fv8vQ,15224
15
- sqlglider/graph/formatters.py,sha256=EGgdxTr9Mctz9tTN54XIjoX0KGNcpiSKsW3o27dhMxo,2549
14
+ sqlglider/graph/builder.py,sha256=VNBdsDlkiaId3JGvr2G4h6OIFek_9zPsGMIYL9GpJlk,15796
15
+ sqlglider/graph/formatters.py,sha256=p85-WN9oPmEETsAtWSo1sIQELF36w85QoFEJyfBZGoM,4800
16
16
  sqlglider/graph/merge.py,sha256=uUZlm4BN3S9gRL66Cc2mzhbtuh4SVAv2n4cN4eUEQBU,4077
17
17
  sqlglider/graph/models.py,sha256=EYmjv_WzDSNp_WfhJ6H-qBIOkAcoNKS7GRUryfKrHuY,9330
18
18
  sqlglider/graph/query.py,sha256=LHU8Cvn7ZPPSEnqdDn2pF8f1_LQjIvNIrZqs8cFlb6U,9433
@@ -21,7 +21,7 @@ sqlglider/lineage/__init__.py,sha256=llXMeI5_PIZaiBo8tKk3-wOubF4m_6QBHbn1FtWxT7k
21
21
  sqlglider/lineage/analyzer.py,sha256=08pFR5aGFFPhSbRW6EqiX2d3mp91v-orcs6dm_T1FJg,76484
22
22
  sqlglider/lineage/formatters.py,sha256=_Y9wcTX4JXn1vVnZ1xI656g1FF2rMjcAVc-GHjbd9QA,10389
23
23
  sqlglider/schema/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
24
- sqlglider/schema/extractor.py,sha256=iOi13ZStR4ngC2GkZGXjB0lsgmDqJ-OYwTRgH72hy1w,7082
24
+ sqlglider/schema/extractor.py,sha256=WW31wbHkL-V749pLb7EAyUOJuziZQK-5hLZVW6f970U,7234
25
25
  sqlglider/templating/__init__.py,sha256=g3_wb6rSDI0usq2UUMDpn-J5kVwlAw3NtLdwbxL6UHs,1435
26
26
  sqlglider/templating/base.py,sha256=y5bWAW7qXl_4pPyo5KycfHwNVvt1-7slZ63DAsvTE1s,2902
27
27
  sqlglider/templating/jinja.py,sha256=o01UG72N4G1-tOT5LKK1Wkccv4nJH2VN4VFaMi5c1-g,5220
@@ -31,8 +31,8 @@ sqlglider/utils/__init__.py,sha256=KGp9-UzKz_OFBOTFoSy-g-NXDZsvyWXG_9-1zcC6ePE,2
31
31
  sqlglider/utils/config.py,sha256=qx5zE9pjLCCzHQDFVPLVd7LgJ-lghxUa2x-aZOAHByY,4962
32
32
  sqlglider/utils/file_utils.py,sha256=5_ff28E0r1R7emZzsOnRuHd-7zIX6873eyr1SuPEr4E,1093
33
33
  sqlglider/utils/schema.py,sha256=-0Vd1A3EggBH3reXTiabO0zFeTENROgmDg861X1D7Qs,1867
34
- sql_glider-0.1.14.dist-info/METADATA,sha256=SdedCDEPwWR2Kqrg8_mMgb0PNmNZj0OExHiapyKZ63A,28446
35
- sql_glider-0.1.14.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
36
- sql_glider-0.1.14.dist-info/entry_points.txt,sha256=HDuakHqHS5C0HFKsMIxMYmDU7-BLBGrnIJcYaVRu-s0,251
37
- sql_glider-0.1.14.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
38
- sql_glider-0.1.14.dist-info/RECORD,,
34
+ sql_glider-0.1.15.dist-info/METADATA,sha256=IF0dZD6rOriyausbDZhHPMfYnhHyRlxyi9v_ihTgCUo,28446
35
+ sql_glider-0.1.15.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
36
+ sql_glider-0.1.15.dist-info/entry_points.txt,sha256=HDuakHqHS5C0HFKsMIxMYmDU7-BLBGrnIJcYaVRu-s0,251
37
+ sql_glider-0.1.15.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
38
+ sql_glider-0.1.15.dist-info/RECORD,,
sqlglider/_version.py CHANGED
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.1.14'
32
- __version_tuple__ = version_tuple = (0, 1, 14)
31
+ __version__ = version = '0.1.15'
32
+ __version_tuple__ = version_tuple = (0, 1, 15)
33
33
 
34
34
  __commit_id__ = commit_id = None
sqlglider/cli.py CHANGED
@@ -171,6 +171,12 @@ def lineage(
171
171
  "--no-star",
172
172
  help="Fail if SELECT * cannot be resolved to actual columns",
173
173
  ),
174
+ provide_schema: Optional[Path] = typer.Option(
175
+ None,
176
+ "--provide-schema",
177
+ exists=True,
178
+ help="Path to a schema file (JSON, CSV, or text) for star resolution",
179
+ ),
174
180
  ) -> None:
175
181
  """
176
182
  Analyze column or table lineage for a SQL file.
@@ -266,8 +272,15 @@ def lineage(
266
272
  source_path=source_path,
267
273
  )
268
274
 
275
+ # Load provided schema if specified
276
+ schema = None
277
+ if provide_schema:
278
+ from sqlglider.graph.formatters import load_schema_file
279
+
280
+ schema = load_schema_file(provide_schema)
281
+
269
282
  # Create analyzer
270
- analyzer = LineageAnalyzer(sql, dialect=dialect, no_star=no_star)
283
+ analyzer = LineageAnalyzer(sql, dialect=dialect, no_star=no_star, schema=schema)
271
284
 
272
285
  # Unified lineage analysis (handles both single and multi-query files)
273
286
  results = analyzer.analyze_queries(
@@ -1292,6 +1305,13 @@ def graph_build(
1292
1305
  "--dump-schema-format",
1293
1306
  help="Format for dumped schema: 'text' (default), 'json', or 'csv'",
1294
1307
  ),
1308
+ provide_schema: Optional[Path] = typer.Option(
1309
+ None,
1310
+ "--provide-schema",
1311
+ exists=True,
1312
+ help="Path to a schema file (JSON, CSV, or text) to use for star resolution. "
1313
+ "Can be combined with --resolve-schema to merge file-extracted schema on top.",
1314
+ ),
1295
1315
  strict_schema: bool = typer.Option(
1296
1316
  False,
1297
1317
  "--strict-schema",
@@ -1434,6 +1454,17 @@ def graph_build(
1434
1454
  strict_schema=strict_schema,
1435
1455
  )
1436
1456
 
1457
+ # Load provided schema file if specified
1458
+ if provide_schema:
1459
+ from sqlglider.graph.formatters import load_schema_file
1460
+
1461
+ loaded_schema = load_schema_file(provide_schema)
1462
+ builder.set_schema(loaded_schema)
1463
+ console.print(
1464
+ f"[green]Loaded schema from {provide_schema} "
1465
+ f"({len(loaded_schema)} table(s))[/green]"
1466
+ )
1467
+
1437
1468
  # Collect file paths for schema extraction
1438
1469
  manifest_files, path_files = _collect_sql_files(
1439
1470
  paths, manifest, recursive, glob_pattern
@@ -303,6 +303,21 @@ class GraphBuilder:
303
303
  self.add_file(file_path, dialect)
304
304
  return self
305
305
 
306
+ def set_schema(self, schema: Dict[str, Dict[str, str]]) -> "GraphBuilder":
307
+ """Pre-seed the resolved schema from an external source.
308
+
309
+ This allows skipping the schema extraction pass when the schema
310
+ is already known (e.g., loaded from a file).
311
+
312
+ Args:
313
+ schema: Schema dictionary mapping table names to column dicts.
314
+
315
+ Returns:
316
+ self for method chaining
317
+ """
318
+ self._resolved_schema = schema
319
+ return self
320
+
306
321
  def extract_schemas(
307
322
  self,
308
323
  file_paths: List[Path],
@@ -325,6 +340,7 @@ class GraphBuilder:
325
340
  file_paths,
326
341
  dialect=file_dialect,
327
342
  sql_preprocessor=self.sql_preprocessor,
343
+ initial_schema=self._resolved_schema if self._resolved_schema else None,
328
344
  strict_schema=self.strict_schema,
329
345
  catalog_type=self.catalog_type,
330
346
  catalog_config=self.catalog_config,
@@ -1,8 +1,9 @@
1
- """Output formatters for resolved schema data."""
1
+ """Output formatters and parsers for resolved schema data."""
2
2
 
3
3
  import csv
4
4
  import json
5
5
  from io import StringIO
6
+ from pathlib import Path
6
7
  from typing import Dict
7
8
 
8
9
  SchemaDict = Dict[str, Dict[str, str]]
@@ -96,3 +97,93 @@ def format_schema(schema: SchemaDict, output_format: str = "text") -> str:
96
97
  f"Invalid schema format '{output_format}'. Use 'text', 'json', or 'csv'."
97
98
  )
98
99
  return formatter(schema)
100
+
101
+
102
+ def parse_schema_json(content: str) -> SchemaDict:
103
+ """Parse schema from JSON format.
104
+
105
+ Args:
106
+ content: JSON string with table -> {column -> type} structure.
107
+
108
+ Returns:
109
+ Parsed schema dictionary.
110
+ """
111
+ return json.loads(content) # type: ignore[no-any-return]
112
+
113
+
114
+ def parse_schema_csv(content: str) -> SchemaDict:
115
+ """Parse schema from CSV format.
116
+
117
+ Expects columns: table, column, type.
118
+
119
+ Args:
120
+ content: CSV string with header row.
121
+
122
+ Returns:
123
+ Parsed schema dictionary.
124
+ """
125
+ schema: SchemaDict = {}
126
+ reader = csv.DictReader(StringIO(content))
127
+ for row in reader:
128
+ table = row["table"]
129
+ column = row["column"]
130
+ col_type = row.get("type", "UNKNOWN")
131
+ if table not in schema:
132
+ schema[table] = {}
133
+ schema[table][column] = col_type
134
+ return schema
135
+
136
+
137
+ def parse_schema_text(content: str) -> SchemaDict:
138
+ """Parse schema from indented text format.
139
+
140
+ Expected format:
141
+ table_name
142
+ column1
143
+ column2
144
+
145
+ other_table
146
+ col_a
147
+
148
+ Args:
149
+ content: Text-formatted schema string.
150
+
151
+ Returns:
152
+ Parsed schema dictionary.
153
+ """
154
+ schema: SchemaDict = {}
155
+ current_table: str | None = None
156
+ for line in content.splitlines():
157
+ if not line or not line.strip():
158
+ continue
159
+ if line.startswith(" "):
160
+ if current_table is not None:
161
+ schema[current_table][line.strip()] = "UNKNOWN"
162
+ else:
163
+ current_table = line.strip()
164
+ schema[current_table] = {}
165
+ return schema
166
+
167
+
168
+ def load_schema_file(path: Path) -> SchemaDict:
169
+ """Load a schema file, auto-detecting format from extension.
170
+
171
+ `.json` → JSON, `.csv` → CSV, otherwise text.
172
+
173
+ Args:
174
+ path: Path to schema file.
175
+
176
+ Returns:
177
+ Parsed schema dictionary.
178
+
179
+ Raises:
180
+ FileNotFoundError: If the file does not exist.
181
+ """
182
+ content = path.read_text(encoding="utf-8")
183
+ suffix = path.suffix.lower()
184
+ if suffix == ".json":
185
+ return parse_schema_json(content)
186
+ elif suffix == ".csv":
187
+ return parse_schema_csv(content)
188
+ else:
189
+ return parse_schema_text(content)
@@ -153,6 +153,7 @@ def extract_and_resolve_schema(
153
153
  file_paths: List[Path],
154
154
  dialect: str = "spark",
155
155
  sql_preprocessor: Optional[SqlPreprocessor] = None,
156
+ initial_schema: Optional[SchemaDict] = None,
156
157
  strict_schema: bool = False,
157
158
  catalog_type: Optional[str] = None,
158
159
  catalog_config: Optional[Dict[str, object]] = None,
@@ -167,6 +168,7 @@ def extract_and_resolve_schema(
167
168
  file_paths: SQL files to extract schema from.
168
169
  dialect: SQL dialect.
169
170
  sql_preprocessor: Optional SQL preprocessor.
171
+ initial_schema: Optional starting schema to build upon.
170
172
  strict_schema: If True, fail on ambiguous column attribution.
171
173
  catalog_type: Optional catalog provider name.
172
174
  catalog_config: Optional provider-specific configuration dict.
@@ -183,6 +185,7 @@ def extract_and_resolve_schema(
183
185
  file_paths,
184
186
  dialect=dialect,
185
187
  sql_preprocessor=sql_preprocessor,
188
+ initial_schema=initial_schema,
186
189
  strict_schema=strict_schema,
187
190
  console=console,
188
191
  )