sql-glider 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sql_glider-0.1.3.dist-info → sql_glider-0.1.5.dist-info}/METADATA +1 -1
- {sql_glider-0.1.3.dist-info → sql_glider-0.1.5.dist-info}/RECORD +7 -7
- sqlglider/_version.py +2 -2
- sqlglider/lineage/analyzer.py +402 -14
- {sql_glider-0.1.3.dist-info → sql_glider-0.1.5.dist-info}/WHEEL +0 -0
- {sql_glider-0.1.3.dist-info → sql_glider-0.1.5.dist-info}/entry_points.txt +0 -0
- {sql_glider-0.1.3.dist-info → sql_glider-0.1.5.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sql-glider
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.5
|
|
4
4
|
Summary: SQL Utility Toolkit for better understanding, use, and governance of your queries in a native environment.
|
|
5
5
|
Project-URL: Homepage, https://github.com/rycowhi/sql-glider/
|
|
6
6
|
Project-URL: Repository, https://github.com/rycowhi/sql-glider/
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
sqlglider/__init__.py,sha256=gDf7s52dMcX7JuCZ1SLawcB1vb3U0yJCohu9RQAATBY,125
|
|
2
|
-
sqlglider/_version.py,sha256=
|
|
2
|
+
sqlglider/_version.py,sha256=rdxBMYpwzYxiWk08QbPLHSAxHoDfeKWwyaJIAM0lSic,704
|
|
3
3
|
sqlglider/cli.py,sha256=9sweHRVLk2iBSzCzT2Gcj8y1g1XKzq26iApQsMaFbx4,51786
|
|
4
4
|
sqlglider/global_models.py,sha256=2vyJXAuXOsXQpE-D3F0ejj7eR9z0nDWFjTkielhzM8k,356
|
|
5
5
|
sqlglider/catalog/__init__.py,sha256=2PqFPyzFXJ14FpSUcBmVK2L-a_ypWQHAbHFHxLDk_LE,814
|
|
@@ -17,7 +17,7 @@ sqlglider/graph/models.py,sha256=EYmjv_WzDSNp_WfhJ6H-qBIOkAcoNKS7GRUryfKrHuY,933
|
|
|
17
17
|
sqlglider/graph/query.py,sha256=LHU8Cvn7ZPPSEnqdDn2pF8f1_LQjIvNIrZqs8cFlb6U,9433
|
|
18
18
|
sqlglider/graph/serialization.py,sha256=7JJo31rwSlxnDhdqdTJdK4Dr_ZcSYetXfx3_CmndSac,2662
|
|
19
19
|
sqlglider/lineage/__init__.py,sha256=llXMeI5_PIZaiBo8tKk3-wOubF4m_6QBHbn1FtWxT7k,256
|
|
20
|
-
sqlglider/lineage/analyzer.py,sha256=
|
|
20
|
+
sqlglider/lineage/analyzer.py,sha256=9Ly93TBcpc2sRP9NuwxB0jCL3YK-yCV1rih93MFM-S0,64313
|
|
21
21
|
sqlglider/lineage/formatters.py,sha256=_Y9wcTX4JXn1vVnZ1xI656g1FF2rMjcAVc-GHjbd9QA,10389
|
|
22
22
|
sqlglider/templating/__init__.py,sha256=g3_wb6rSDI0usq2UUMDpn-J5kVwlAw3NtLdwbxL6UHs,1435
|
|
23
23
|
sqlglider/templating/base.py,sha256=y5bWAW7qXl_4pPyo5KycfHwNVvt1-7slZ63DAsvTE1s,2902
|
|
@@ -27,8 +27,8 @@ sqlglider/templating/variables.py,sha256=5593PtLBcOxsnMCSRm2pGAD5I0Y9f__VV3_J_Hf
|
|
|
27
27
|
sqlglider/utils/__init__.py,sha256=KGp9-UzKz_OFBOTFoSy-g-NXDZsvyWXG_9-1zcC6ePE,276
|
|
28
28
|
sqlglider/utils/config.py,sha256=iNJgSXFw3pmL2MCdvW3SJp4X2T3AQP2QyQuXIXT-6H0,4761
|
|
29
29
|
sqlglider/utils/file_utils.py,sha256=5_ff28E0r1R7emZzsOnRuHd-7zIX6873eyr1SuPEr4E,1093
|
|
30
|
-
sql_glider-0.1.
|
|
31
|
-
sql_glider-0.1.
|
|
32
|
-
sql_glider-0.1.
|
|
33
|
-
sql_glider-0.1.
|
|
34
|
-
sql_glider-0.1.
|
|
30
|
+
sql_glider-0.1.5.dist-info/METADATA,sha256=uLFCtpVoKp5F8ORuqJfJQtNZFjAJ3BQLavtRl-Bh0JA,28445
|
|
31
|
+
sql_glider-0.1.5.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
32
|
+
sql_glider-0.1.5.dist-info/entry_points.txt,sha256=HDuakHqHS5C0HFKsMIxMYmDU7-BLBGrnIJcYaVRu-s0,251
|
|
33
|
+
sql_glider-0.1.5.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
34
|
+
sql_glider-0.1.5.dist-info/RECORD,,
|
sqlglider/_version.py
CHANGED
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '0.1.
|
|
32
|
-
__version_tuple__ = version_tuple = (0, 1,
|
|
31
|
+
__version__ = version = '0.1.5'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 1, 5)
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
sqlglider/lineage/analyzer.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""Core lineage analysis using SQLGlot."""
|
|
2
2
|
|
|
3
3
|
from enum import Enum
|
|
4
|
-
from typing import Callable, Iterator, List, Optional, Set, Tuple, Union
|
|
4
|
+
from typing import Callable, Dict, Iterator, List, Optional, Set, Tuple, Union
|
|
5
5
|
|
|
6
6
|
from pydantic import BaseModel, Field
|
|
7
7
|
from sqlglot import exp, parse
|
|
@@ -99,6 +99,9 @@ class LineageAnalyzer:
|
|
|
99
99
|
self.sql = sql
|
|
100
100
|
self.dialect = dialect
|
|
101
101
|
self._skipped_queries: List[SkippedQuery] = []
|
|
102
|
+
# File-scoped schema context for cross-statement lineage
|
|
103
|
+
# Maps table/view names to their column definitions
|
|
104
|
+
self._file_schema: Dict[str, Dict[str, str]] = {}
|
|
102
105
|
|
|
103
106
|
try:
|
|
104
107
|
# Parse all statements in the SQL string
|
|
@@ -156,26 +159,66 @@ class LineageAnalyzer:
|
|
|
156
159
|
# DML/DDL: Use target table for output column qualification
|
|
157
160
|
# The columns are from the SELECT, but qualified with the target table
|
|
158
161
|
projections = self._get_select_projections(select_node)
|
|
162
|
+
first_select = self._get_first_select(select_node)
|
|
163
|
+
|
|
159
164
|
for projection in projections:
|
|
165
|
+
# Handle SELECT * by resolving from file schema
|
|
166
|
+
if isinstance(projection, exp.Star):
|
|
167
|
+
if first_select:
|
|
168
|
+
star_columns = self._resolve_star_columns(first_select)
|
|
169
|
+
for star_col in star_columns:
|
|
170
|
+
qualified_name = f"{target_table}.{star_col}"
|
|
171
|
+
columns.append(qualified_name)
|
|
172
|
+
self._column_mapping[qualified_name] = star_col
|
|
173
|
+
if not columns:
|
|
174
|
+
# Fallback: can't resolve *, use * as column name
|
|
175
|
+
qualified_name = f"{target_table}.*"
|
|
176
|
+
columns.append(qualified_name)
|
|
177
|
+
self._column_mapping[qualified_name] = "*"
|
|
178
|
+
continue
|
|
179
|
+
|
|
160
180
|
# Get the underlying expression (unwrap alias if present)
|
|
161
181
|
if isinstance(projection, exp.Alias):
|
|
162
182
|
# For aliased columns, use the alias as the column name
|
|
163
183
|
column_name = projection.alias
|
|
164
184
|
lineage_name = column_name # SQLGlot lineage uses the alias
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
185
|
+
# Qualify with target table
|
|
186
|
+
qualified_name = f"{target_table}.{column_name}"
|
|
187
|
+
columns.append(qualified_name)
|
|
188
|
+
self._column_mapping[qualified_name] = lineage_name
|
|
189
|
+
elif isinstance(projection, exp.Column):
|
|
190
|
+
# Check if this is a table-qualified star (e.g., t.*)
|
|
191
|
+
if isinstance(projection.this, exp.Star):
|
|
192
|
+
source_table = projection.table
|
|
193
|
+
qualified_star_cols: List[str] = []
|
|
194
|
+
if source_table and first_select:
|
|
195
|
+
qualified_star_cols = self._resolve_qualified_star(
|
|
196
|
+
source_table, first_select
|
|
197
|
+
)
|
|
198
|
+
for col in qualified_star_cols:
|
|
199
|
+
qualified_name = f"{target_table}.{col}"
|
|
200
|
+
columns.append(qualified_name)
|
|
201
|
+
self._column_mapping[qualified_name] = col
|
|
202
|
+
if not qualified_star_cols:
|
|
203
|
+
# Fallback: can't resolve t.*, use * as column name
|
|
204
|
+
qualified_name = f"{target_table}.*"
|
|
205
|
+
columns.append(qualified_name)
|
|
206
|
+
self._column_mapping[qualified_name] = "*"
|
|
170
207
|
else:
|
|
171
|
-
|
|
172
|
-
column_name = source_expr.sql(dialect=self.dialect)
|
|
208
|
+
column_name = projection.name
|
|
173
209
|
lineage_name = column_name
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
210
|
+
# Qualify with target table
|
|
211
|
+
qualified_name = f"{target_table}.{column_name}"
|
|
212
|
+
columns.append(qualified_name)
|
|
213
|
+
self._column_mapping[qualified_name] = lineage_name
|
|
214
|
+
else:
|
|
215
|
+
# For expressions, use the SQL representation
|
|
216
|
+
column_name = projection.sql(dialect=self.dialect)
|
|
217
|
+
lineage_name = column_name
|
|
218
|
+
# Qualify with target table
|
|
219
|
+
qualified_name = f"{target_table}.{column_name}"
|
|
220
|
+
columns.append(qualified_name)
|
|
221
|
+
self._column_mapping[qualified_name] = lineage_name
|
|
179
222
|
|
|
180
223
|
else:
|
|
181
224
|
# DQL (pure SELECT): Use the SELECT columns as output
|
|
@@ -324,6 +367,7 @@ class LineageAnalyzer:
|
|
|
324
367
|
"""
|
|
325
368
|
results = []
|
|
326
369
|
self._skipped_queries = [] # Reset skipped queries for this analysis
|
|
370
|
+
self._file_schema = {} # Reset file schema for this analysis run
|
|
327
371
|
|
|
328
372
|
for query_index, expr, preview in self._iterate_queries(table_filter):
|
|
329
373
|
# Temporarily swap self.expr to analyze this query
|
|
@@ -375,6 +419,9 @@ class LineageAnalyzer:
|
|
|
375
419
|
)
|
|
376
420
|
)
|
|
377
421
|
finally:
|
|
422
|
+
# Extract schema from this statement AFTER analysis
|
|
423
|
+
# This builds up context for subsequent statements to use
|
|
424
|
+
self._extract_schema_from_statement(expr)
|
|
378
425
|
# Restore original expression
|
|
379
426
|
self.expr = original_expr
|
|
380
427
|
|
|
@@ -702,7 +749,13 @@ class LineageAnalyzer:
|
|
|
702
749
|
lineage_col = self._column_mapping.get(col, col)
|
|
703
750
|
|
|
704
751
|
# Get lineage tree for this column using current query SQL only
|
|
705
|
-
|
|
752
|
+
# Pass file schema to enable SELECT * expansion for known tables/views
|
|
753
|
+
node = lineage(
|
|
754
|
+
lineage_col,
|
|
755
|
+
current_query_sql,
|
|
756
|
+
dialect=self.dialect,
|
|
757
|
+
schema=self._file_schema if self._file_schema else None,
|
|
758
|
+
)
|
|
706
759
|
|
|
707
760
|
# Collect all source columns
|
|
708
761
|
sources: Set[str] = set()
|
|
@@ -1235,3 +1288,338 @@ class LineageAnalyzer:
|
|
|
1235
1288
|
preview = self._generate_query_preview(expr)
|
|
1236
1289
|
|
|
1237
1290
|
yield idx, expr, preview
|
|
1291
|
+
|
|
1292
|
+
# -------------------------------------------------------------------------
|
|
1293
|
+
# File-scoped schema context methods
|
|
1294
|
+
# -------------------------------------------------------------------------
|
|
1295
|
+
|
|
1296
|
+
def _extract_schema_from_statement(self, expr: exp.Expression) -> None:
|
|
1297
|
+
"""
|
|
1298
|
+
Extract column definitions from CREATE VIEW/TABLE AS SELECT statements.
|
|
1299
|
+
|
|
1300
|
+
This method builds up file-scoped schema context as statements are processed,
|
|
1301
|
+
enabling SQLGlot to correctly expand SELECT * and trace cross-statement references.
|
|
1302
|
+
|
|
1303
|
+
Args:
|
|
1304
|
+
expr: The SQL expression to extract schema from
|
|
1305
|
+
"""
|
|
1306
|
+
# Only handle CREATE VIEW or CREATE TABLE (AS SELECT)
|
|
1307
|
+
if not isinstance(expr, exp.Create):
|
|
1308
|
+
return
|
|
1309
|
+
if expr.kind not in ("VIEW", "TABLE"):
|
|
1310
|
+
return
|
|
1311
|
+
|
|
1312
|
+
# Get target table/view name
|
|
1313
|
+
target = expr.this
|
|
1314
|
+
if isinstance(target, exp.Schema):
|
|
1315
|
+
target = target.this
|
|
1316
|
+
if not isinstance(target, exp.Table):
|
|
1317
|
+
return
|
|
1318
|
+
|
|
1319
|
+
target_name = self._get_qualified_table_name(target)
|
|
1320
|
+
|
|
1321
|
+
# Get the SELECT node from the CREATE statement
|
|
1322
|
+
select_node = expr.expression
|
|
1323
|
+
if select_node is None:
|
|
1324
|
+
return
|
|
1325
|
+
|
|
1326
|
+
# Handle Subquery wrapper (e.g., CREATE VIEW AS (SELECT ...))
|
|
1327
|
+
if isinstance(select_node, exp.Subquery):
|
|
1328
|
+
select_node = select_node.this
|
|
1329
|
+
|
|
1330
|
+
if not isinstance(
|
|
1331
|
+
select_node, (exp.Select, exp.Union, exp.Intersect, exp.Except)
|
|
1332
|
+
):
|
|
1333
|
+
return
|
|
1334
|
+
|
|
1335
|
+
# Extract column names from the SELECT
|
|
1336
|
+
columns = self._extract_columns_from_select(select_node)
|
|
1337
|
+
|
|
1338
|
+
if columns:
|
|
1339
|
+
# Store with UNKNOWN type - SQLGlot only needs column names for expansion
|
|
1340
|
+
self._file_schema[target_name] = {col: "UNKNOWN" for col in columns}
|
|
1341
|
+
|
|
1342
|
+
def _extract_columns_from_select(
|
|
1343
|
+
self, select_node: Union[exp.Select, exp.Union, exp.Intersect, exp.Except]
|
|
1344
|
+
) -> List[str]:
|
|
1345
|
+
"""
|
|
1346
|
+
Extract column names from a SELECT statement.
|
|
1347
|
+
|
|
1348
|
+
Handles aliases, direct column references, and SELECT * by resolving
|
|
1349
|
+
against the known file schema.
|
|
1350
|
+
|
|
1351
|
+
Args:
|
|
1352
|
+
select_node: The SELECT or set operation expression
|
|
1353
|
+
|
|
1354
|
+
Returns:
|
|
1355
|
+
List of column names
|
|
1356
|
+
"""
|
|
1357
|
+
columns: List[str] = []
|
|
1358
|
+
|
|
1359
|
+
# Get projections (for UNION, use first branch)
|
|
1360
|
+
projections = self._get_select_projections(select_node)
|
|
1361
|
+
first_select = self._get_first_select(select_node)
|
|
1362
|
+
|
|
1363
|
+
for projection in projections:
|
|
1364
|
+
if isinstance(projection, exp.Alias):
|
|
1365
|
+
# Use the alias name as the column name
|
|
1366
|
+
columns.append(projection.alias)
|
|
1367
|
+
elif isinstance(projection, exp.Column):
|
|
1368
|
+
# Check if this is a table-qualified star (e.g., t.*)
|
|
1369
|
+
if isinstance(projection.this, exp.Star):
|
|
1370
|
+
# Resolve table-qualified star from known schema
|
|
1371
|
+
table_name = projection.table
|
|
1372
|
+
if table_name and first_select:
|
|
1373
|
+
qualified_star_cols = self._resolve_qualified_star(
|
|
1374
|
+
table_name, first_select
|
|
1375
|
+
)
|
|
1376
|
+
columns.extend(qualified_star_cols)
|
|
1377
|
+
else:
|
|
1378
|
+
# Use the column name
|
|
1379
|
+
columns.append(projection.name)
|
|
1380
|
+
elif isinstance(projection, exp.Star):
|
|
1381
|
+
# Resolve SELECT * from known schema
|
|
1382
|
+
if first_select:
|
|
1383
|
+
star_columns = self._resolve_star_columns(first_select)
|
|
1384
|
+
columns.extend(star_columns)
|
|
1385
|
+
else:
|
|
1386
|
+
# For expressions without alias, use SQL representation
|
|
1387
|
+
col_sql = projection.sql(dialect=self.dialect)
|
|
1388
|
+
columns.append(col_sql)
|
|
1389
|
+
|
|
1390
|
+
return columns
|
|
1391
|
+
|
|
1392
|
+
def _resolve_star_columns(self, select_node: exp.Select) -> List[str]:
|
|
1393
|
+
"""
|
|
1394
|
+
Resolve SELECT * to actual column names from known file schema or CTEs.
|
|
1395
|
+
|
|
1396
|
+
Args:
|
|
1397
|
+
select_node: The SELECT node containing the * reference
|
|
1398
|
+
|
|
1399
|
+
Returns:
|
|
1400
|
+
List of column names if source is known, empty list otherwise
|
|
1401
|
+
"""
|
|
1402
|
+
columns: List[str] = []
|
|
1403
|
+
|
|
1404
|
+
# Get the source table(s) from FROM clause
|
|
1405
|
+
from_clause = select_node.args.get("from")
|
|
1406
|
+
if not from_clause or not isinstance(from_clause, exp.From):
|
|
1407
|
+
return columns
|
|
1408
|
+
|
|
1409
|
+
source = from_clause.this
|
|
1410
|
+
|
|
1411
|
+
# Handle table reference from FROM clause
|
|
1412
|
+
columns.extend(self._resolve_source_columns(source, select_node))
|
|
1413
|
+
|
|
1414
|
+
# Handle JOIN clauses - collect columns from all joined tables
|
|
1415
|
+
joins = select_node.args.get("joins")
|
|
1416
|
+
if joins:
|
|
1417
|
+
for join in joins:
|
|
1418
|
+
if isinstance(join, exp.Join):
|
|
1419
|
+
join_source = join.this
|
|
1420
|
+
columns.extend(
|
|
1421
|
+
self._resolve_source_columns(join_source, select_node)
|
|
1422
|
+
)
|
|
1423
|
+
|
|
1424
|
+
# Handle LATERAL VIEW clauses - collect generated columns
|
|
1425
|
+
laterals = select_node.args.get("laterals")
|
|
1426
|
+
if laterals:
|
|
1427
|
+
for lateral in laterals:
|
|
1428
|
+
if isinstance(lateral, exp.Lateral):
|
|
1429
|
+
lateral_cols = self._resolve_lateral_columns(lateral)
|
|
1430
|
+
columns.extend(lateral_cols)
|
|
1431
|
+
|
|
1432
|
+
return columns
|
|
1433
|
+
|
|
1434
|
+
def _resolve_lateral_columns(self, lateral: exp.Lateral) -> List[str]:
|
|
1435
|
+
"""
|
|
1436
|
+
Extract generated column names from a LATERAL VIEW clause.
|
|
1437
|
+
|
|
1438
|
+
Args:
|
|
1439
|
+
lateral: The Lateral expression node
|
|
1440
|
+
|
|
1441
|
+
Returns:
|
|
1442
|
+
List of generated column names (e.g., ['elem'] for explode,
|
|
1443
|
+
['pos', 'elem'] for posexplode)
|
|
1444
|
+
"""
|
|
1445
|
+
# Use SQLGlot's built-in property to get alias column names
|
|
1446
|
+
return lateral.alias_column_names or []
|
|
1447
|
+
|
|
1448
|
+
def _resolve_source_columns(
|
|
1449
|
+
self, source: exp.Expression, select_node: exp.Select
|
|
1450
|
+
) -> List[str]:
|
|
1451
|
+
"""
|
|
1452
|
+
Resolve columns from a single source (table, subquery, etc.).
|
|
1453
|
+
|
|
1454
|
+
Args:
|
|
1455
|
+
source: The source expression (Table, Subquery, etc.)
|
|
1456
|
+
select_node: The containing SELECT node for CTE resolution
|
|
1457
|
+
|
|
1458
|
+
Returns:
|
|
1459
|
+
List of column names from the source
|
|
1460
|
+
"""
|
|
1461
|
+
columns: List[str] = []
|
|
1462
|
+
|
|
1463
|
+
# Handle table reference
|
|
1464
|
+
if isinstance(source, exp.Table):
|
|
1465
|
+
source_name = self._get_qualified_table_name(source)
|
|
1466
|
+
|
|
1467
|
+
# First check file schema (views/tables from previous statements)
|
|
1468
|
+
if source_name in self._file_schema:
|
|
1469
|
+
columns.extend(self._file_schema[source_name].keys())
|
|
1470
|
+
else:
|
|
1471
|
+
# Check if this is a CTE reference within the same statement
|
|
1472
|
+
cte_columns = self._resolve_cte_columns(source_name, select_node)
|
|
1473
|
+
columns.extend(cte_columns)
|
|
1474
|
+
|
|
1475
|
+
# Handle subquery with alias
|
|
1476
|
+
elif isinstance(source, exp.Subquery):
|
|
1477
|
+
# First check if this subquery alias is in file schema
|
|
1478
|
+
if source.alias and source.alias in self._file_schema:
|
|
1479
|
+
columns.extend(self._file_schema[source.alias].keys())
|
|
1480
|
+
else:
|
|
1481
|
+
# Extract columns from the subquery's SELECT
|
|
1482
|
+
inner_select = source.this
|
|
1483
|
+
if isinstance(inner_select, exp.Select):
|
|
1484
|
+
subquery_cols = self._extract_subquery_columns(inner_select)
|
|
1485
|
+
columns.extend(subquery_cols)
|
|
1486
|
+
|
|
1487
|
+
return columns
|
|
1488
|
+
|
|
1489
|
+
def _resolve_qualified_star(
|
|
1490
|
+
self, table_name: str, select_node: exp.Select
|
|
1491
|
+
) -> List[str]:
|
|
1492
|
+
"""
|
|
1493
|
+
Resolve a table-qualified star (e.g., t.*) to actual column names.
|
|
1494
|
+
|
|
1495
|
+
Args:
|
|
1496
|
+
table_name: The table/alias name qualifying the star
|
|
1497
|
+
select_node: The SELECT node for context
|
|
1498
|
+
|
|
1499
|
+
Returns:
|
|
1500
|
+
List of column names from the specified table
|
|
1501
|
+
"""
|
|
1502
|
+
# First check file schema
|
|
1503
|
+
if table_name in self._file_schema:
|
|
1504
|
+
return list(self._file_schema[table_name].keys())
|
|
1505
|
+
|
|
1506
|
+
# Check if it's a CTE reference
|
|
1507
|
+
cte_columns = self._resolve_cte_columns(table_name, select_node)
|
|
1508
|
+
if cte_columns:
|
|
1509
|
+
return cte_columns
|
|
1510
|
+
|
|
1511
|
+
# Check if the table name is an alias - need to resolve the actual table
|
|
1512
|
+
from_clause = select_node.args.get("from")
|
|
1513
|
+
if from_clause and isinstance(from_clause, exp.From):
|
|
1514
|
+
source = from_clause.this
|
|
1515
|
+
if isinstance(source, exp.Table) and source.alias == table_name:
|
|
1516
|
+
actual_name = self._get_qualified_table_name(source)
|
|
1517
|
+
if actual_name in self._file_schema:
|
|
1518
|
+
return list(self._file_schema[actual_name].keys())
|
|
1519
|
+
|
|
1520
|
+
# Check JOIN clauses for aliased tables
|
|
1521
|
+
joins = select_node.args.get("joins")
|
|
1522
|
+
if joins:
|
|
1523
|
+
for join in joins:
|
|
1524
|
+
if isinstance(join, exp.Join):
|
|
1525
|
+
join_source = join.this
|
|
1526
|
+
if (
|
|
1527
|
+
isinstance(join_source, exp.Table)
|
|
1528
|
+
and join_source.alias == table_name
|
|
1529
|
+
):
|
|
1530
|
+
actual_name = self._get_qualified_table_name(join_source)
|
|
1531
|
+
if actual_name in self._file_schema:
|
|
1532
|
+
return list(self._file_schema[actual_name].keys())
|
|
1533
|
+
|
|
1534
|
+
return []
|
|
1535
|
+
|
|
1536
|
+
def _extract_subquery_columns(self, subquery_select: exp.Select) -> List[str]:
|
|
1537
|
+
"""
|
|
1538
|
+
Extract column names from a subquery's SELECT statement.
|
|
1539
|
+
|
|
1540
|
+
Args:
|
|
1541
|
+
subquery_select: The SELECT expression within the subquery
|
|
1542
|
+
|
|
1543
|
+
Returns:
|
|
1544
|
+
List of column names
|
|
1545
|
+
"""
|
|
1546
|
+
columns: List[str] = []
|
|
1547
|
+
|
|
1548
|
+
for projection in subquery_select.expressions:
|
|
1549
|
+
if isinstance(projection, exp.Alias):
|
|
1550
|
+
columns.append(projection.alias)
|
|
1551
|
+
elif isinstance(projection, exp.Column):
|
|
1552
|
+
# Check for table-qualified star (t.*)
|
|
1553
|
+
if isinstance(projection.this, exp.Star):
|
|
1554
|
+
table_name = projection.table
|
|
1555
|
+
if table_name:
|
|
1556
|
+
qualified_cols = self._resolve_qualified_star(
|
|
1557
|
+
table_name, subquery_select
|
|
1558
|
+
)
|
|
1559
|
+
columns.extend(qualified_cols)
|
|
1560
|
+
else:
|
|
1561
|
+
columns.append(projection.name)
|
|
1562
|
+
elif isinstance(projection, exp.Star):
|
|
1563
|
+
# Resolve SELECT * in subquery
|
|
1564
|
+
star_columns = self._resolve_star_columns(subquery_select)
|
|
1565
|
+
columns.extend(star_columns)
|
|
1566
|
+
else:
|
|
1567
|
+
col_sql = projection.sql(dialect=self.dialect)
|
|
1568
|
+
columns.append(col_sql)
|
|
1569
|
+
|
|
1570
|
+
return columns
|
|
1571
|
+
|
|
1572
|
+
def _resolve_cte_columns(self, cte_name: str, select_node: exp.Select) -> List[str]:
|
|
1573
|
+
"""
|
|
1574
|
+
Resolve columns from a CTE definition within the same statement.
|
|
1575
|
+
|
|
1576
|
+
Args:
|
|
1577
|
+
cte_name: Name of the CTE to resolve
|
|
1578
|
+
select_node: The SELECT node that references the CTE
|
|
1579
|
+
|
|
1580
|
+
Returns:
|
|
1581
|
+
List of column names from the CTE, empty if CTE not found
|
|
1582
|
+
"""
|
|
1583
|
+
# Walk up the tree to find the WITH clause containing this CTE
|
|
1584
|
+
parent = select_node
|
|
1585
|
+
while parent:
|
|
1586
|
+
if hasattr(parent, "args") and parent.args.get("with"):
|
|
1587
|
+
with_clause = parent.args["with"]
|
|
1588
|
+
for cte in with_clause.expressions:
|
|
1589
|
+
if isinstance(cte, exp.CTE) and cte.alias == cte_name:
|
|
1590
|
+
# Found the CTE - extract its columns
|
|
1591
|
+
cte_select = cte.this
|
|
1592
|
+
if isinstance(cte_select, exp.Select):
|
|
1593
|
+
return self._extract_cte_select_columns(cte_select)
|
|
1594
|
+
parent = parent.parent if hasattr(parent, "parent") else None
|
|
1595
|
+
|
|
1596
|
+
return []
|
|
1597
|
+
|
|
1598
|
+
def _extract_cte_select_columns(self, cte_select: exp.Select) -> List[str]:
|
|
1599
|
+
"""
|
|
1600
|
+
Extract column names from a CTE's SELECT statement.
|
|
1601
|
+
|
|
1602
|
+
This handles SELECT * within the CTE by resolving against file schema.
|
|
1603
|
+
|
|
1604
|
+
Args:
|
|
1605
|
+
cte_select: The SELECT expression within the CTE
|
|
1606
|
+
|
|
1607
|
+
Returns:
|
|
1608
|
+
List of column names
|
|
1609
|
+
"""
|
|
1610
|
+
columns: List[str] = []
|
|
1611
|
+
|
|
1612
|
+
for projection in cte_select.expressions:
|
|
1613
|
+
if isinstance(projection, exp.Alias):
|
|
1614
|
+
columns.append(projection.alias)
|
|
1615
|
+
elif isinstance(projection, exp.Column):
|
|
1616
|
+
columns.append(projection.name)
|
|
1617
|
+
elif isinstance(projection, exp.Star):
|
|
1618
|
+
# Resolve SELECT * in CTE from file schema
|
|
1619
|
+
star_columns = self._resolve_star_columns(cte_select)
|
|
1620
|
+
columns.extend(star_columns)
|
|
1621
|
+
else:
|
|
1622
|
+
col_sql = projection.sql(dialect=self.dialect)
|
|
1623
|
+
columns.append(col_sql)
|
|
1624
|
+
|
|
1625
|
+
return columns
|
|
File without changes
|
|
File without changes
|
|
File without changes
|