sql-glider 0.1.4__tar.gz → 0.1.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. {sql_glider-0.1.4 → sql_glider-0.1.5}/PKG-INFO +1 -1
  2. sql_glider-0.1.5/plans/2026-01-26-file-scoped-schema-context.md +199 -0
  3. {sql_glider-0.1.4 → sql_glider-0.1.5}/src/sqlglider/_version.py +2 -2
  4. {sql_glider-0.1.4 → sql_glider-0.1.5}/src/sqlglider/lineage/analyzer.py +192 -18
  5. {sql_glider-0.1.4 → sql_glider-0.1.5}/tests/sqlglider/lineage/test_analyzer.py +217 -0
  6. {sql_glider-0.1.4 → sql_glider-0.1.5}/.github/workflows/ci.yml +0 -0
  7. {sql_glider-0.1.4 → sql_glider-0.1.5}/.github/workflows/publish.yml +0 -0
  8. {sql_glider-0.1.4 → sql_glider-0.1.5}/.gitignore +0 -0
  9. {sql_glider-0.1.4 → sql_glider-0.1.5}/.python-version +0 -0
  10. {sql_glider-0.1.4 → sql_glider-0.1.5}/ARCHITECTURE.md +0 -0
  11. {sql_glider-0.1.4 → sql_glider-0.1.5}/CLAUDE.md +0 -0
  12. {sql_glider-0.1.4 → sql_glider-0.1.5}/LICENSE +0 -0
  13. {sql_glider-0.1.4 → sql_glider-0.1.5}/README.md +0 -0
  14. {sql_glider-0.1.4 → sql_glider-0.1.5}/plans/2025-12-05-column-level-lineage.md +0 -0
  15. {sql_glider-0.1.4 → sql_glider-0.1.5}/plans/2025-12-05-reverse-lineage.md +0 -0
  16. {sql_glider-0.1.4 → sql_glider-0.1.5}/plans/2025-12-06-config-file-support.md +0 -0
  17. {sql_glider-0.1.4 → sql_glider-0.1.5}/plans/2025-12-06-graph-lineage.md +0 -0
  18. {sql_glider-0.1.4 → sql_glider-0.1.5}/plans/2025-12-06-unify-single-multi-query.md +0 -0
  19. {sql_glider-0.1.4 → sql_glider-0.1.5}/plans/2025-12-07-sample-data-model.md +0 -0
  20. {sql_glider-0.1.4 → sql_glider-0.1.5}/plans/2025-12-07-sql-templating.md +0 -0
  21. {sql_glider-0.1.4 → sql_glider-0.1.5}/plans/2025-12-08-tables-command.md +0 -0
  22. {sql_glider-0.1.4 → sql_glider-0.1.5}/plans/2025-12-09-graph-query-paths.md +0 -0
  23. {sql_glider-0.1.4 → sql_glider-0.1.5}/plans/2025-12-13-dissect-command.md +0 -0
  24. {sql_glider-0.1.4 → sql_glider-0.1.5}/plans/2025-12-14-tables-pull-command.md +0 -0
  25. {sql_glider-0.1.4 → sql_glider-0.1.5}/plans/2026-01-25-fix-union-lineage-chain.md +0 -0
  26. {sql_glider-0.1.4 → sql_glider-0.1.5}/pyproject.toml +0 -0
  27. {sql_glider-0.1.4 → sql_glider-0.1.5}/sample_data_model/README.md +0 -0
  28. {sql_glider-0.1.4 → sql_glider-0.1.5}/sample_data_model/business/expire_dim_customer.sql +0 -0
  29. {sql_glider-0.1.4 → sql_glider-0.1.5}/sample_data_model/business/load_fact_orders.sql +0 -0
  30. {sql_glider-0.1.4 → sql_glider-0.1.5}/sample_data_model/business/load_fact_payments.sql +0 -0
  31. {sql_glider-0.1.4 → sql_glider-0.1.5}/sample_data_model/business/merge_dim_customer.sql +0 -0
  32. {sql_glider-0.1.4 → sql_glider-0.1.5}/sample_data_model/business/merge_dim_product.sql +0 -0
  33. {sql_glider-0.1.4 → sql_glider-0.1.5}/sample_data_model/business/update_dim_customer_metrics.sql +0 -0
  34. {sql_glider-0.1.4 → sql_glider-0.1.5}/sample_data_model/complex/conditional_merge.sql +0 -0
  35. {sql_glider-0.1.4 → sql_glider-0.1.5}/sample_data_model/complex/cte_insert.sql +0 -0
  36. {sql_glider-0.1.4 → sql_glider-0.1.5}/sample_data_model/complex/multi_table_transform.sql +0 -0
  37. {sql_glider-0.1.4 → sql_glider-0.1.5}/sample_data_model/ddl/dim_customer.sql +0 -0
  38. {sql_glider-0.1.4 → sql_glider-0.1.5}/sample_data_model/ddl/dim_product.sql +0 -0
  39. {sql_glider-0.1.4 → sql_glider-0.1.5}/sample_data_model/ddl/fact_orders.sql +0 -0
  40. {sql_glider-0.1.4 → sql_glider-0.1.5}/sample_data_model/ddl/fact_payments.sql +0 -0
  41. {sql_glider-0.1.4 → sql_glider-0.1.5}/sample_data_model/ddl/raw_addresses.sql +0 -0
  42. {sql_glider-0.1.4 → sql_glider-0.1.5}/sample_data_model/ddl/raw_customers.sql +0 -0
  43. {sql_glider-0.1.4 → sql_glider-0.1.5}/sample_data_model/ddl/raw_order_items.sql +0 -0
  44. {sql_glider-0.1.4 → sql_glider-0.1.5}/sample_data_model/ddl/raw_orders.sql +0 -0
  45. {sql_glider-0.1.4 → sql_glider-0.1.5}/sample_data_model/ddl/raw_payments.sql +0 -0
  46. {sql_glider-0.1.4 → sql_glider-0.1.5}/sample_data_model/ddl/raw_products.sql +0 -0
  47. {sql_glider-0.1.4 → sql_glider-0.1.5}/sample_data_model/ddl/stg_customers.sql +0 -0
  48. {sql_glider-0.1.4 → sql_glider-0.1.5}/sample_data_model/ddl/stg_orders.sql +0 -0
  49. {sql_glider-0.1.4 → sql_glider-0.1.5}/sample_data_model/ddl/stg_payments.sql +0 -0
  50. {sql_glider-0.1.4 → sql_glider-0.1.5}/sample_data_model/ddl/stg_products.sql +0 -0
  51. {sql_glider-0.1.4 → sql_glider-0.1.5}/sample_data_model/incremental/incr_fact_orders.sql +0 -0
  52. {sql_glider-0.1.4 → sql_glider-0.1.5}/sample_data_model/incremental/incr_fact_payments.sql +0 -0
  53. {sql_glider-0.1.4 → sql_glider-0.1.5}/sample_data_model/incremental/incr_pres_sales_summary.sql +0 -0
  54. {sql_glider-0.1.4 → sql_glider-0.1.5}/sample_data_model/maintenance/delete_expired_customers.sql +0 -0
  55. {sql_glider-0.1.4 → sql_glider-0.1.5}/sample_data_model/maintenance/update_product_status.sql +0 -0
  56. {sql_glider-0.1.4 → sql_glider-0.1.5}/sample_data_model/presentation/load_pres_customer_360.sql +0 -0
  57. {sql_glider-0.1.4 → sql_glider-0.1.5}/sample_data_model/presentation/load_pres_customer_cohort.sql +0 -0
  58. {sql_glider-0.1.4 → sql_glider-0.1.5}/sample_data_model/presentation/load_pres_product_performance.sql +0 -0
  59. {sql_glider-0.1.4 → sql_glider-0.1.5}/sample_data_model/presentation/load_pres_sales_summary.sql +0 -0
  60. {sql_glider-0.1.4 → sql_glider-0.1.5}/sample_data_model/staging/load_stg_customers.sql +0 -0
  61. {sql_glider-0.1.4 → sql_glider-0.1.5}/sample_data_model/staging/load_stg_orders.sql +0 -0
  62. {sql_glider-0.1.4 → sql_glider-0.1.5}/sample_data_model/staging/load_stg_payments.sql +0 -0
  63. {sql_glider-0.1.4 → sql_glider-0.1.5}/sample_data_model/staging/load_stg_products.sql +0 -0
  64. {sql_glider-0.1.4 → sql_glider-0.1.5}/sqlglider.toml.example +0 -0
  65. {sql_glider-0.1.4 → sql_glider-0.1.5}/src/sqlglider/__init__.py +0 -0
  66. {sql_glider-0.1.4 → sql_glider-0.1.5}/src/sqlglider/catalog/__init__.py +0 -0
  67. {sql_glider-0.1.4 → sql_glider-0.1.5}/src/sqlglider/catalog/base.py +0 -0
  68. {sql_glider-0.1.4 → sql_glider-0.1.5}/src/sqlglider/catalog/databricks.py +0 -0
  69. {sql_glider-0.1.4 → sql_glider-0.1.5}/src/sqlglider/catalog/registry.py +0 -0
  70. {sql_glider-0.1.4 → sql_glider-0.1.5}/src/sqlglider/cli.py +0 -0
  71. {sql_glider-0.1.4 → sql_glider-0.1.5}/src/sqlglider/dissection/__init__.py +0 -0
  72. {sql_glider-0.1.4 → sql_glider-0.1.5}/src/sqlglider/dissection/analyzer.py +0 -0
  73. {sql_glider-0.1.4 → sql_glider-0.1.5}/src/sqlglider/dissection/formatters.py +0 -0
  74. {sql_glider-0.1.4 → sql_glider-0.1.5}/src/sqlglider/dissection/models.py +0 -0
  75. {sql_glider-0.1.4 → sql_glider-0.1.5}/src/sqlglider/global_models.py +0 -0
  76. {sql_glider-0.1.4 → sql_glider-0.1.5}/src/sqlglider/graph/__init__.py +0 -0
  77. {sql_glider-0.1.4 → sql_glider-0.1.5}/src/sqlglider/graph/builder.py +0 -0
  78. {sql_glider-0.1.4 → sql_glider-0.1.5}/src/sqlglider/graph/merge.py +0 -0
  79. {sql_glider-0.1.4 → sql_glider-0.1.5}/src/sqlglider/graph/models.py +0 -0
  80. {sql_glider-0.1.4 → sql_glider-0.1.5}/src/sqlglider/graph/query.py +0 -0
  81. {sql_glider-0.1.4 → sql_glider-0.1.5}/src/sqlglider/graph/serialization.py +0 -0
  82. {sql_glider-0.1.4 → sql_glider-0.1.5}/src/sqlglider/lineage/__init__.py +0 -0
  83. {sql_glider-0.1.4 → sql_glider-0.1.5}/src/sqlglider/lineage/formatters.py +0 -0
  84. {sql_glider-0.1.4 → sql_glider-0.1.5}/src/sqlglider/templating/__init__.py +0 -0
  85. {sql_glider-0.1.4 → sql_glider-0.1.5}/src/sqlglider/templating/base.py +0 -0
  86. {sql_glider-0.1.4 → sql_glider-0.1.5}/src/sqlglider/templating/jinja.py +0 -0
  87. {sql_glider-0.1.4 → sql_glider-0.1.5}/src/sqlglider/templating/registry.py +0 -0
  88. {sql_glider-0.1.4 → sql_glider-0.1.5}/src/sqlglider/templating/variables.py +0 -0
  89. {sql_glider-0.1.4 → sql_glider-0.1.5}/src/sqlglider/utils/__init__.py +0 -0
  90. {sql_glider-0.1.4 → sql_glider-0.1.5}/src/sqlglider/utils/config.py +0 -0
  91. {sql_glider-0.1.4 → sql_glider-0.1.5}/src/sqlglider/utils/file_utils.py +0 -0
  92. {sql_glider-0.1.4 → sql_glider-0.1.5}/tests/__init__.py +0 -0
  93. {sql_glider-0.1.4 → sql_glider-0.1.5}/tests/fixtures/multi_file_queries/analytics_pipeline.sql +0 -0
  94. {sql_glider-0.1.4 → sql_glider-0.1.5}/tests/fixtures/multi_file_queries/analytics_pipeline_union_merge.sql +0 -0
  95. {sql_glider-0.1.4 → sql_glider-0.1.5}/tests/fixtures/multi_file_queries/customers.sql +0 -0
  96. {sql_glider-0.1.4 → sql_glider-0.1.5}/tests/fixtures/multi_file_queries/orders.sql +0 -0
  97. {sql_glider-0.1.4 → sql_glider-0.1.5}/tests/fixtures/multi_file_queries/reports.sql +0 -0
  98. {sql_glider-0.1.4 → sql_glider-0.1.5}/tests/fixtures/multi_file_queries/view_based_merge.sql +0 -0
  99. {sql_glider-0.1.4 → sql_glider-0.1.5}/tests/fixtures/original_queries/test_cte.sql +0 -0
  100. {sql_glider-0.1.4 → sql_glider-0.1.5}/tests/fixtures/original_queries/test_cte_query.sql +0 -0
  101. {sql_glider-0.1.4 → sql_glider-0.1.5}/tests/fixtures/original_queries/test_generated_column_query.sql +0 -0
  102. {sql_glider-0.1.4 → sql_glider-0.1.5}/tests/fixtures/original_queries/test_multi.sql +0 -0
  103. {sql_glider-0.1.4 → sql_glider-0.1.5}/tests/fixtures/original_queries/test_multi_query.sql +0 -0
  104. {sql_glider-0.1.4 → sql_glider-0.1.5}/tests/fixtures/original_queries/test_single_query.sql +0 -0
  105. {sql_glider-0.1.4 → sql_glider-0.1.5}/tests/fixtures/original_queries/test_subquery.sql +0 -0
  106. {sql_glider-0.1.4 → sql_glider-0.1.5}/tests/fixtures/original_queries/test_tables.sql +0 -0
  107. {sql_glider-0.1.4 → sql_glider-0.1.5}/tests/fixtures/original_queries/test_view.sql +0 -0
  108. {sql_glider-0.1.4 → sql_glider-0.1.5}/tests/fixtures/original_queries/test_view_window_cte.sql +0 -0
  109. {sql_glider-0.1.4 → sql_glider-0.1.5}/tests/fixtures/sample_manifest.csv +0 -0
  110. {sql_glider-0.1.4 → sql_glider-0.1.5}/tests/sqlglider/__init__.py +0 -0
  111. {sql_glider-0.1.4 → sql_glider-0.1.5}/tests/sqlglider/catalog/__init__.py +0 -0
  112. {sql_glider-0.1.4 → sql_glider-0.1.5}/tests/sqlglider/catalog/test_base.py +0 -0
  113. {sql_glider-0.1.4 → sql_glider-0.1.5}/tests/sqlglider/catalog/test_databricks.py +0 -0
  114. {sql_glider-0.1.4 → sql_glider-0.1.5}/tests/sqlglider/catalog/test_registry.py +0 -0
  115. {sql_glider-0.1.4 → sql_glider-0.1.5}/tests/sqlglider/dissection/__init__.py +0 -0
  116. {sql_glider-0.1.4 → sql_glider-0.1.5}/tests/sqlglider/dissection/test_analyzer.py +0 -0
  117. {sql_glider-0.1.4 → sql_glider-0.1.5}/tests/sqlglider/dissection/test_formatters.py +0 -0
  118. {sql_glider-0.1.4 → sql_glider-0.1.5}/tests/sqlglider/dissection/test_models.py +0 -0
  119. {sql_glider-0.1.4 → sql_glider-0.1.5}/tests/sqlglider/graph/__init__.py +0 -0
  120. {sql_glider-0.1.4 → sql_glider-0.1.5}/tests/sqlglider/graph/test_builder.py +0 -0
  121. {sql_glider-0.1.4 → sql_glider-0.1.5}/tests/sqlglider/graph/test_merge.py +0 -0
  122. {sql_glider-0.1.4 → sql_glider-0.1.5}/tests/sqlglider/graph/test_models.py +0 -0
  123. {sql_glider-0.1.4 → sql_glider-0.1.5}/tests/sqlglider/graph/test_query.py +0 -0
  124. {sql_glider-0.1.4 → sql_glider-0.1.5}/tests/sqlglider/graph/test_serialization.py +0 -0
  125. {sql_glider-0.1.4 → sql_glider-0.1.5}/tests/sqlglider/lineage/__init__.py +0 -0
  126. {sql_glider-0.1.4 → sql_glider-0.1.5}/tests/sqlglider/lineage/test_formatters.py +0 -0
  127. {sql_glider-0.1.4 → sql_glider-0.1.5}/tests/sqlglider/templating/__init__.py +0 -0
  128. {sql_glider-0.1.4 → sql_glider-0.1.5}/tests/sqlglider/templating/test_base.py +0 -0
  129. {sql_glider-0.1.4 → sql_glider-0.1.5}/tests/sqlglider/templating/test_jinja.py +0 -0
  130. {sql_glider-0.1.4 → sql_glider-0.1.5}/tests/sqlglider/templating/test_registry.py +0 -0
  131. {sql_glider-0.1.4 → sql_glider-0.1.5}/tests/sqlglider/templating/test_variables.py +0 -0
  132. {sql_glider-0.1.4 → sql_glider-0.1.5}/tests/sqlglider/test_cli.py +0 -0
  133. {sql_glider-0.1.4 → sql_glider-0.1.5}/tests/sqlglider/utils/__init__.py +0 -0
  134. {sql_glider-0.1.4 → sql_glider-0.1.5}/tests/sqlglider/utils/test_config.py +0 -0
  135. {sql_glider-0.1.4 → sql_glider-0.1.5}/tests/sqlglider/utils/test_file_utils.py +0 -0
  136. {sql_glider-0.1.4 → sql_glider-0.1.5}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sql-glider
3
- Version: 0.1.4
3
+ Version: 0.1.5
4
4
  Summary: SQL Utility Toolkit for better understanding, use, and governance of your queries in a native environment.
5
5
  Project-URL: Homepage, https://github.com/rycowhi/sql-glider/
6
6
  Project-URL: Repository, https://github.com/rycowhi/sql-glider/
@@ -0,0 +1,199 @@
1
+ # Plan: File-Scoped Schema Context for SQL Lineage Analyzer
2
+
3
+ **Status:** Completed
4
+
5
+ ## Summary
6
+
7
+ Add file-scoped schema context to the SQL Glider lineage analyzer so that SQLGlot can correctly expand `SELECT *` and trace cross-statement references when a file contains multiple related statements.
8
+
9
+ ## Problem
10
+
11
+ When analyzing this SQL:
12
+ ```sql
13
+ CREATE TEMPORARY VIEW first_view AS (SELECT a, b, c FROM source_table);
14
+ CREATE TEMPORARY VIEW second_view AS
15
+ WITH first_view_cte AS (
16
+ SELECT *, row_number() OVER (PARTITION BY a ORDER BY b DESC) AS row_num
17
+ FROM first_view
18
+ )
19
+ SELECT * FROM first_view_cte WHERE c = 1;
20
+ ```
21
+
22
+ **Previous output:** `* -> second_view.*` (useless - no column-level lineage)
23
+ **Expected output:** `first_view.a -> second_view.a`, `first_view.b -> second_view.b`, etc.
24
+
25
+ ## Root Cause
26
+
27
+ SQLGlot's `lineage()` function accepts a `schema` parameter that provides table/view column definitions. Without this schema context, SQLGlot cannot expand `SELECT *` to actual column names.
28
+
29
+ ## Solution
30
+
31
+ Build up schema context incrementally as CREATE VIEW/TABLE statements are processed, then pass that schema to subsequent `lineage()` calls.
32
+
33
+ ---
34
+
35
+ ## Implementation Steps
36
+
37
+ ### 1. Add Schema Instance Variable
38
+
39
+ - [x] Add `_file_schema: Dict[str, Dict[str, str]] = {}` to `LineageAnalyzer.__init__()`
40
+
41
+ ### 2. Add Schema Extraction Methods
42
+
43
+ - [x] `_extract_schema_from_statement()` - Extract columns from CREATE VIEW/TABLE AS SELECT
44
+ - [x] `_extract_columns_from_select()` - Extract column names from SELECT projections
45
+ - [x] `_resolve_star_columns()` - Resolve SELECT * from file schema or CTEs
46
+ - [x] `_resolve_source_columns()` - Resolve columns from a single source (table, subquery)
47
+ - [x] `_resolve_qualified_star()` - Resolve table-qualified star (e.g., `t.*`)
48
+ - [x] `_extract_subquery_columns()` - Extract columns from subquery's SELECT
49
+ - [x] `_resolve_cte_columns()` - Resolve columns from CTE definitions
50
+ - [x] `_extract_cte_select_columns()` - Extract columns from CTE's SELECT
51
+
52
+ ### 3. Integrate Schema Building into Analysis Loop
53
+
54
+ - [x] Reset `_file_schema = {}` at start of `analyze_queries()`
55
+ - [x] Call `_extract_schema_from_statement(expr)` in `finally` block AFTER analysis
56
+ - [x] Critical: Schema must be extracted AFTER analysis to avoid confusing SQLGlot
57
+
58
+ ### 4. Pass Schema to lineage() Calls
59
+
60
+ - [x] Modify `_analyze_column_lineage_internal()` to pass schema:
61
+ ```python
62
+ node = lineage(
63
+ lineage_col,
64
+ current_query_sql,
65
+ dialect=self.dialect,
66
+ schema=self._file_schema if self._file_schema else None,
67
+ )
68
+ ```
69
+
70
+ ### 5. Handle SELECT * in get_output_columns()
71
+
72
+ - [x] Handle `exp.Star` projections by resolving from file schema
73
+ - [x] Handle table-qualified stars (`t.*`) represented as `exp.Column` with `exp.Star` as `this`
74
+
75
+ ---
76
+
77
+ ## Edge Cases Handled
78
+
79
+ | Case | Implementation |
80
+ |------|----------------|
81
+ | `SELECT *` from unknown table | Returns empty columns, falls back to `*` behavior |
82
+ | Nested `SELECT *` through CTEs | Resolves CTE source from schema first |
83
+ | UNION in CREATE VIEW | Uses first branch's columns |
84
+ | Expressions without aliases | Uses SQL representation as column name |
85
+ | TEMPORARY VIEW | Treated same as regular VIEW |
86
+ | Multiple JOINs | Collects columns from all joined tables |
87
+ | LEFT/RIGHT/FULL OUTER JOIN | Same handling as INNER JOIN |
88
+ | CROSS JOIN | Same handling as INNER JOIN |
89
+ | Subquery in FROM clause | Extracts columns from inner SELECT |
90
+ | Table aliases (`v1 AS x`) | Resolves alias to actual table name |
91
+ | Schema-qualified names | Handles `schema.table` correctly |
92
+ | CTE referencing earlier CTE | Recursive CTE column resolution |
93
+ | `SELECT *, extra_col` | Combines * expansion with extra columns |
94
+ | Table-qualified `t.*` | Handles `v1.*` style syntax |
95
+ | LATERAL VIEW explode | Collects generated columns from `laterals` clause |
96
+ | LATERAL VIEW posexplode | Collects both position and element columns |
97
+ | Multiple LATERAL VIEWs | Collects columns from all LATERAL VIEWs |
98
+ | LATERAL VIEW OUTER | Same handling as regular LATERAL VIEW |
99
+
100
+ ---
101
+
102
+ ## Files Modified
103
+
104
+ | File | Changes |
105
+ |------|---------|
106
+ | `src/sqlglider/lineage/analyzer.py` | Added `_file_schema` instance variable; Added 9 schema extraction methods (including `_resolve_lateral_columns`); Modified `analyze_queries()` and `_analyze_column_lineage_internal()` and `get_output_columns()` |
107
+ | `tests/sqlglider/lineage/test_analyzer.py` | Added `TestFileSchemaExtraction` (9 tests), `TestCrossStatementLineage` (12 tests), and `TestLateralViewColumnResolution` (5 tests) |
108
+
109
+ ---
110
+
111
+ ## Testing
112
+
113
+ ### Test Classes Added
114
+
115
+ **TestFileSchemaExtraction (9 tests):**
116
+ - `test_extract_schema_from_create_view`
117
+ - `test_extract_schema_from_create_temporary_view`
118
+ - `test_extract_schema_from_create_table_as`
119
+ - `test_extract_schema_with_aliases`
120
+ - `test_extract_schema_select_star_from_known_table`
121
+ - `test_extract_schema_select_star_from_unknown_table`
122
+ - `test_schema_not_extracted_from_pure_select`
123
+ - `test_schema_not_extracted_from_insert`
124
+ - `test_schema_reset_between_analysis_calls`
125
+
126
+ **TestCrossStatementLineage (12 tests):**
127
+ - `test_view_referencing_earlier_view`
128
+ - `test_select_star_expansion_through_view`
129
+ - `test_cte_with_select_star_from_view`
130
+ - `test_window_function_with_select_star`
131
+ - `test_insert_from_view_lineage`
132
+ - `test_multi_hop_view_lineage`
133
+ - `test_original_problem_scenario`
134
+ - `test_select_star_from_join`
135
+ - `test_nested_ctes_and_views_with_select_star`
136
+ - `test_select_star_from_subquery`
137
+ - `test_table_qualified_star`
138
+ - `test_table_qualified_star_with_alias`
139
+
140
+ **TestLateralViewColumnResolution (5 tests):**
141
+ - `test_select_star_with_lateral_view_explode`
142
+ - `test_select_star_with_lateral_view_posexplode`
143
+ - `test_select_star_with_multiple_lateral_views`
144
+ - `test_select_star_with_lateral_view_outer`
145
+ - `test_lateral_view_with_join`
146
+
147
+ ### Verification Commands
148
+
149
+ ```bash
150
+ # Run all tests
151
+ uv run pytest --cov=sqlglider --cov-fail-under=80
152
+
153
+ # Run schema-related tests
154
+ uv run pytest tests/sqlglider/lineage/test_analyzer.py -k "schema or CrossStatement" -v
155
+
156
+ # Test the original problem scenario
157
+ uv run sqlglider graph build test_view_window_cte.sql --dialect spark --output graph.json
158
+ ```
159
+
160
+ ---
161
+
162
+ ## Implementation Notes
163
+
164
+ ### Critical Timing Issue
165
+
166
+ Initially, schema extraction was done BEFORE analysis in the loop, which caused SQLGlot to return unqualified column names (e.g., `customer_id` instead of `orders.customer_id`).
167
+
168
+ **Fix:** Move `_extract_schema_from_statement(expr)` to the `finally` block AFTER analysis completes. This ensures:
169
+ 1. The current statement is analyzed without its own schema (correct behavior)
170
+ 2. The schema is then extracted for use by subsequent statements
171
+
172
+ ### Table-Qualified Star Handling
173
+
174
+ Table-qualified stars (`v1.*`) are represented differently than unqualified stars (`*`):
175
+ - `*` is `exp.Star`
176
+ - `v1.*` is `exp.Column` with `this` being `exp.Star` and `table` being `v1`
177
+
178
+ Both cases needed handling in:
179
+ - `_extract_columns_from_select()` for schema extraction
180
+ - `get_output_columns()` for lineage analysis output
181
+
182
+ ### Subquery Column Resolution
183
+
184
+ For `SELECT * FROM (SELECT * FROM v1) sub`, the code:
185
+ 1. Detects the subquery in `_resolve_source_columns()`
186
+ 2. Extracts columns from the inner SELECT via `_extract_subquery_columns()`
187
+ 3. Recursively resolves any `SELECT *` in the inner query
188
+
189
+ ---
190
+
191
+ ## Lessons Learned
192
+
193
+ 1. **Timing matters:** Schema context must be built AFTER analyzing a statement, not before, to avoid confusing SQLGlot's lineage tracing.
194
+
195
+ 2. **AST structure varies:** Different SQL constructs have different AST representations (e.g., `*` vs `t.*`), requiring multiple code paths.
196
+
197
+ 3. **Recursive resolution:** CTEs and subqueries can reference other CTEs/views, requiring recursive column resolution.
198
+
199
+ 4. **Edge cases compound:** JOINs + aliases + qualified stars can all combine, requiring careful handling of each case.
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.1.4'
32
- __version_tuple__ = version_tuple = (0, 1, 4)
31
+ __version__ = version = '0.1.5'
32
+ __version_tuple__ = version_tuple = (0, 1, 5)
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -182,20 +182,43 @@ class LineageAnalyzer:
182
182
  # For aliased columns, use the alias as the column name
183
183
  column_name = projection.alias
184
184
  lineage_name = column_name # SQLGlot lineage uses the alias
185
- else:
186
- source_expr = projection
187
- if isinstance(source_expr, exp.Column):
188
- column_name = source_expr.name
189
- lineage_name = column_name
185
+ # Qualify with target table
186
+ qualified_name = f"{target_table}.{column_name}"
187
+ columns.append(qualified_name)
188
+ self._column_mapping[qualified_name] = lineage_name
189
+ elif isinstance(projection, exp.Column):
190
+ # Check if this is a table-qualified star (e.g., t.*)
191
+ if isinstance(projection.this, exp.Star):
192
+ source_table = projection.table
193
+ qualified_star_cols: List[str] = []
194
+ if source_table and first_select:
195
+ qualified_star_cols = self._resolve_qualified_star(
196
+ source_table, first_select
197
+ )
198
+ for col in qualified_star_cols:
199
+ qualified_name = f"{target_table}.{col}"
200
+ columns.append(qualified_name)
201
+ self._column_mapping[qualified_name] = col
202
+ if not qualified_star_cols:
203
+ # Fallback: can't resolve t.*, use * as column name
204
+ qualified_name = f"{target_table}.*"
205
+ columns.append(qualified_name)
206
+ self._column_mapping[qualified_name] = "*"
190
207
  else:
191
- # For expressions, use the SQL representation
192
- column_name = source_expr.sql(dialect=self.dialect)
208
+ column_name = projection.name
193
209
  lineage_name = column_name
194
-
195
- # Qualify with target table
196
- qualified_name = f"{target_table}.{column_name}"
197
- columns.append(qualified_name)
198
- self._column_mapping[qualified_name] = lineage_name
210
+ # Qualify with target table
211
+ qualified_name = f"{target_table}.{column_name}"
212
+ columns.append(qualified_name)
213
+ self._column_mapping[qualified_name] = lineage_name
214
+ else:
215
+ # For expressions, use the SQL representation
216
+ column_name = projection.sql(dialect=self.dialect)
217
+ lineage_name = column_name
218
+ # Qualify with target table
219
+ qualified_name = f"{target_table}.{column_name}"
220
+ columns.append(qualified_name)
221
+ self._column_mapping[qualified_name] = lineage_name
199
222
 
200
223
  else:
201
224
  # DQL (pure SELECT): Use the SELECT columns as output
@@ -1342,8 +1365,18 @@ class LineageAnalyzer:
1342
1365
  # Use the alias name as the column name
1343
1366
  columns.append(projection.alias)
1344
1367
  elif isinstance(projection, exp.Column):
1345
- # Use the column name
1346
- columns.append(projection.name)
1368
+ # Check if this is a table-qualified star (e.g., t.*)
1369
+ if isinstance(projection.this, exp.Star):
1370
+ # Resolve table-qualified star from known schema
1371
+ table_name = projection.table
1372
+ if table_name and first_select:
1373
+ qualified_star_cols = self._resolve_qualified_star(
1374
+ table_name, first_select
1375
+ )
1376
+ columns.extend(qualified_star_cols)
1377
+ else:
1378
+ # Use the column name
1379
+ columns.append(projection.name)
1347
1380
  elif isinstance(projection, exp.Star):
1348
1381
  # Resolve SELECT * from known schema
1349
1382
  if first_select:
@@ -1375,6 +1408,58 @@ class LineageAnalyzer:
1375
1408
 
1376
1409
  source = from_clause.this
1377
1410
 
1411
+ # Handle table reference from FROM clause
1412
+ columns.extend(self._resolve_source_columns(source, select_node))
1413
+
1414
+ # Handle JOIN clauses - collect columns from all joined tables
1415
+ joins = select_node.args.get("joins")
1416
+ if joins:
1417
+ for join in joins:
1418
+ if isinstance(join, exp.Join):
1419
+ join_source = join.this
1420
+ columns.extend(
1421
+ self._resolve_source_columns(join_source, select_node)
1422
+ )
1423
+
1424
+ # Handle LATERAL VIEW clauses - collect generated columns
1425
+ laterals = select_node.args.get("laterals")
1426
+ if laterals:
1427
+ for lateral in laterals:
1428
+ if isinstance(lateral, exp.Lateral):
1429
+ lateral_cols = self._resolve_lateral_columns(lateral)
1430
+ columns.extend(lateral_cols)
1431
+
1432
+ return columns
1433
+
1434
+ def _resolve_lateral_columns(self, lateral: exp.Lateral) -> List[str]:
1435
+ """
1436
+ Extract generated column names from a LATERAL VIEW clause.
1437
+
1438
+ Args:
1439
+ lateral: The Lateral expression node
1440
+
1441
+ Returns:
1442
+ List of generated column names (e.g., ['elem'] for explode,
1443
+ ['pos', 'elem'] for posexplode)
1444
+ """
1445
+ # Use SQLGlot's built-in property to get alias column names
1446
+ return lateral.alias_column_names or []
1447
+
1448
+ def _resolve_source_columns(
1449
+ self, source: exp.Expression, select_node: exp.Select
1450
+ ) -> List[str]:
1451
+ """
1452
+ Resolve columns from a single source (table, subquery, etc.).
1453
+
1454
+ Args:
1455
+ source: The source expression (Table, Subquery, etc.)
1456
+ select_node: The containing SELECT node for CTE resolution
1457
+
1458
+ Returns:
1459
+ List of column names from the source
1460
+ """
1461
+ columns: List[str] = []
1462
+
1378
1463
  # Handle table reference
1379
1464
  if isinstance(source, exp.Table):
1380
1465
  source_name = self._get_qualified_table_name(source)
@@ -1387,11 +1472,100 @@ class LineageAnalyzer:
1387
1472
  cte_columns = self._resolve_cte_columns(source_name, select_node)
1388
1473
  columns.extend(cte_columns)
1389
1474
 
1390
- # Handle subquery - can't resolve without deeper analysis
1391
- elif isinstance(source, exp.Subquery) and source.alias:
1392
- # Check if this subquery alias is in file schema (unlikely)
1393
- if source.alias in self._file_schema:
1475
+ # Handle subquery with alias
1476
+ elif isinstance(source, exp.Subquery):
1477
+ # First check if this subquery alias is in file schema
1478
+ if source.alias and source.alias in self._file_schema:
1394
1479
  columns.extend(self._file_schema[source.alias].keys())
1480
+ else:
1481
+ # Extract columns from the subquery's SELECT
1482
+ inner_select = source.this
1483
+ if isinstance(inner_select, exp.Select):
1484
+ subquery_cols = self._extract_subquery_columns(inner_select)
1485
+ columns.extend(subquery_cols)
1486
+
1487
+ return columns
1488
+
1489
+ def _resolve_qualified_star(
1490
+ self, table_name: str, select_node: exp.Select
1491
+ ) -> List[str]:
1492
+ """
1493
+ Resolve a table-qualified star (e.g., t.*) to actual column names.
1494
+
1495
+ Args:
1496
+ table_name: The table/alias name qualifying the star
1497
+ select_node: The SELECT node for context
1498
+
1499
+ Returns:
1500
+ List of column names from the specified table
1501
+ """
1502
+ # First check file schema
1503
+ if table_name in self._file_schema:
1504
+ return list(self._file_schema[table_name].keys())
1505
+
1506
+ # Check if it's a CTE reference
1507
+ cte_columns = self._resolve_cte_columns(table_name, select_node)
1508
+ if cte_columns:
1509
+ return cte_columns
1510
+
1511
+ # Check if the table name is an alias - need to resolve the actual table
1512
+ from_clause = select_node.args.get("from")
1513
+ if from_clause and isinstance(from_clause, exp.From):
1514
+ source = from_clause.this
1515
+ if isinstance(source, exp.Table) and source.alias == table_name:
1516
+ actual_name = self._get_qualified_table_name(source)
1517
+ if actual_name in self._file_schema:
1518
+ return list(self._file_schema[actual_name].keys())
1519
+
1520
+ # Check JOIN clauses for aliased tables
1521
+ joins = select_node.args.get("joins")
1522
+ if joins:
1523
+ for join in joins:
1524
+ if isinstance(join, exp.Join):
1525
+ join_source = join.this
1526
+ if (
1527
+ isinstance(join_source, exp.Table)
1528
+ and join_source.alias == table_name
1529
+ ):
1530
+ actual_name = self._get_qualified_table_name(join_source)
1531
+ if actual_name in self._file_schema:
1532
+ return list(self._file_schema[actual_name].keys())
1533
+
1534
+ return []
1535
+
1536
+ def _extract_subquery_columns(self, subquery_select: exp.Select) -> List[str]:
1537
+ """
1538
+ Extract column names from a subquery's SELECT statement.
1539
+
1540
+ Args:
1541
+ subquery_select: The SELECT expression within the subquery
1542
+
1543
+ Returns:
1544
+ List of column names
1545
+ """
1546
+ columns: List[str] = []
1547
+
1548
+ for projection in subquery_select.expressions:
1549
+ if isinstance(projection, exp.Alias):
1550
+ columns.append(projection.alias)
1551
+ elif isinstance(projection, exp.Column):
1552
+ # Check for table-qualified star (t.*)
1553
+ if isinstance(projection.this, exp.Star):
1554
+ table_name = projection.table
1555
+ if table_name:
1556
+ qualified_cols = self._resolve_qualified_star(
1557
+ table_name, subquery_select
1558
+ )
1559
+ columns.extend(qualified_cols)
1560
+ else:
1561
+ columns.append(projection.name)
1562
+ elif isinstance(projection, exp.Star):
1563
+ # Resolve SELECT * in subquery
1564
+ star_columns = self._resolve_star_columns(subquery_select)
1565
+ columns.extend(star_columns)
1566
+ else:
1567
+ col_sql = projection.sql(dialect=self.dialect)
1568
+ columns.append(col_sql)
1395
1569
 
1396
1570
  return columns
1397
1571
 
@@ -2549,3 +2549,220 @@ class TestCrossStatementLineage:
2549
2549
  assert "output_table.b" in third_outputs
2550
2550
  assert "output_table.c" in third_outputs
2551
2551
  assert "output_table.row_num" in third_outputs
2552
+
2553
+ def test_select_star_from_join(self):
2554
+ """SELECT * from JOIN should include columns from all joined tables."""
2555
+ sql = """
2556
+ CREATE VIEW v1 AS SELECT a, b FROM t1;
2557
+ CREATE VIEW v2 AS SELECT c, d FROM t2;
2558
+ CREATE VIEW v3 AS SELECT * FROM v1 JOIN v2 ON v1.a = v2.c;
2559
+ """
2560
+ analyzer = LineageAnalyzer(sql, dialect="spark")
2561
+ results = analyzer.analyze_queries(level=AnalysisLevel.COLUMN)
2562
+
2563
+ assert len(results) == 3
2564
+
2565
+ # Third view should have all columns from both v1 and v2
2566
+ third_result = results[2]
2567
+ third_outputs = {item.output_name for item in third_result.lineage_items}
2568
+ assert "v3.a" in third_outputs
2569
+ assert "v3.b" in third_outputs
2570
+ assert "v3.c" in third_outputs
2571
+ assert "v3.d" in third_outputs
2572
+
2573
+ # Sources should be from both v1 and v2
2574
+ third_sources = {item.source_name for item in third_result.lineage_items}
2575
+ assert "v1.a" in third_sources
2576
+ assert "v1.b" in third_sources
2577
+ assert "v2.c" in third_sources
2578
+ assert "v2.d" in third_sources
2579
+
2580
+ def test_nested_ctes_and_views_with_select_star(self):
2581
+ """Complex nested CTEs and views with SELECT * should resolve correctly."""
2582
+ sql = """
2583
+ CREATE VIEW v1 AS SELECT a, b FROM t1;
2584
+ CREATE VIEW v2 AS SELECT c, d FROM t2;
2585
+ CREATE VIEW v3 AS
2586
+ WITH cte1 AS (SELECT * FROM v1)
2587
+ SELECT * FROM cte1;
2588
+ CREATE VIEW v4 AS
2589
+ SELECT * FROM v3 JOIN v2 ON v3.a = v2.c;
2590
+ CREATE VIEW v5 AS
2591
+ WITH
2592
+ cte1 AS (SELECT * FROM v4),
2593
+ cte2 AS (SELECT * FROM cte1)
2594
+ SELECT * FROM cte2;
2595
+ """
2596
+ analyzer = LineageAnalyzer(sql, dialect="spark")
2597
+ results = analyzer.analyze_queries(level=AnalysisLevel.COLUMN)
2598
+
2599
+ assert len(results) == 5
2600
+
2601
+ # Verify file schema was correctly built
2602
+ assert "v1" in analyzer._file_schema
2603
+ assert set(analyzer._file_schema["v1"].keys()) == {"a", "b"}
2604
+
2605
+ assert "v2" in analyzer._file_schema
2606
+ assert set(analyzer._file_schema["v2"].keys()) == {"c", "d"}
2607
+
2608
+ assert "v3" in analyzer._file_schema
2609
+ assert set(analyzer._file_schema["v3"].keys()) == {"a", "b"}
2610
+
2611
+ assert "v4" in analyzer._file_schema
2612
+ assert set(analyzer._file_schema["v4"].keys()) == {"a", "b", "c", "d"}
2613
+
2614
+ assert "v5" in analyzer._file_schema
2615
+ assert set(analyzer._file_schema["v5"].keys()) == {"a", "b", "c", "d"}
2616
+
2617
+ # Final view should have all columns
2618
+ fifth_result = results[4]
2619
+ fifth_outputs = {item.output_name for item in fifth_result.lineage_items}
2620
+ assert "v5.a" in fifth_outputs
2621
+ assert "v5.b" in fifth_outputs
2622
+ assert "v5.c" in fifth_outputs
2623
+ assert "v5.d" in fifth_outputs
2624
+
2625
+ def test_select_star_from_subquery(self):
2626
+ """SELECT * from subquery should resolve columns from inner SELECT."""
2627
+ sql = """
2628
+ CREATE VIEW v1 AS SELECT a, b FROM t1;
2629
+ CREATE VIEW v2 AS SELECT * FROM (SELECT * FROM v1) sub;
2630
+ """
2631
+ analyzer = LineageAnalyzer(sql, dialect="spark")
2632
+ results = analyzer.analyze_queries(level=AnalysisLevel.COLUMN)
2633
+
2634
+ assert len(results) == 2
2635
+
2636
+ # Second view should have columns from subquery
2637
+ second_result = results[1]
2638
+ second_outputs = {item.output_name for item in second_result.lineage_items}
2639
+ assert "v2.a" in second_outputs
2640
+ assert "v2.b" in second_outputs
2641
+
2642
+ # File schema should also be correct
2643
+ assert set(analyzer._file_schema["v2"].keys()) == {"a", "b"}
2644
+
2645
+ def test_table_qualified_star(self):
2646
+ """Table-qualified star (t.*) should resolve to table columns."""
2647
+ sql = """
2648
+ CREATE VIEW v1 AS SELECT a, b FROM t1;
2649
+ CREATE VIEW v2 AS SELECT c FROM t2;
2650
+ CREATE VIEW v3 AS SELECT v1.*, v2.c FROM v1 JOIN v2 ON v1.a = v2.c;
2651
+ """
2652
+ analyzer = LineageAnalyzer(sql, dialect="spark")
2653
+ results = analyzer.analyze_queries(level=AnalysisLevel.COLUMN)
2654
+
2655
+ assert len(results) == 3
2656
+
2657
+ # Third view should have all columns
2658
+ third_result = results[2]
2659
+ third_outputs = {item.output_name for item in third_result.lineage_items}
2660
+ assert "v3.a" in third_outputs
2661
+ assert "v3.b" in third_outputs
2662
+ assert "v3.c" in third_outputs
2663
+
2664
+ # File schema should be correct
2665
+ assert set(analyzer._file_schema["v3"].keys()) == {"a", "b", "c"}
2666
+
2667
+ def test_table_qualified_star_with_alias(self):
2668
+ """Table-qualified star with alias (x.*) should resolve correctly."""
2669
+ sql = """
2670
+ CREATE VIEW v1 AS SELECT a, b FROM t1;
2671
+ CREATE VIEW v2 AS SELECT c FROM t2;
2672
+ CREATE VIEW v3 AS SELECT x.*, y.c FROM v1 AS x JOIN v2 AS y ON x.a = y.c;
2673
+ """
2674
+ analyzer = LineageAnalyzer(sql, dialect="spark")
2675
+ results = analyzer.analyze_queries(level=AnalysisLevel.COLUMN)
2676
+
2677
+ assert len(results) == 3
2678
+
2679
+ # Third view should have all columns
2680
+ third_result = results[2]
2681
+ third_outputs = {item.output_name for item in third_result.lineage_items}
2682
+ assert "v3.a" in third_outputs
2683
+ assert "v3.b" in third_outputs
2684
+ assert "v3.c" in third_outputs
2685
+
2686
+ # File schema should be correct
2687
+ assert set(analyzer._file_schema["v3"].keys()) == {"a", "b", "c"}
2688
+
2689
+
2690
+ class TestLateralViewColumnResolution:
2691
+ """Tests for LATERAL VIEW column resolution in SELECT *."""
2692
+
2693
+ def test_select_star_with_lateral_view_explode(self):
2694
+ """SELECT * should include explode-generated columns."""
2695
+ sql = """
2696
+ CREATE VIEW v1 AS SELECT arr FROM t1;
2697
+ CREATE VIEW v2 AS SELECT * FROM v1 LATERAL VIEW explode(arr) t AS elem;
2698
+ """
2699
+ analyzer = LineageAnalyzer(sql, dialect="spark")
2700
+ analyzer.analyze_queries(level=AnalysisLevel.COLUMN)
2701
+
2702
+ # v2 schema should include both arr and elem
2703
+ assert "v2" in analyzer._file_schema
2704
+ assert set(analyzer._file_schema["v2"].keys()) == {"arr", "elem"}
2705
+
2706
+ def test_select_star_with_lateral_view_posexplode(self):
2707
+ """SELECT * should include posexplode-generated columns (pos + elem)."""
2708
+ sql = """
2709
+ CREATE VIEW v1 AS SELECT arr FROM t1;
2710
+ CREATE VIEW v2 AS SELECT * FROM v1 LATERAL VIEW posexplode(arr) t AS pos, elem;
2711
+ """
2712
+ analyzer = LineageAnalyzer(sql, dialect="spark")
2713
+ analyzer.analyze_queries(level=AnalysisLevel.COLUMN)
2714
+
2715
+ # v2 schema should include arr, pos, and elem
2716
+ assert "v2" in analyzer._file_schema
2717
+ assert set(analyzer._file_schema["v2"].keys()) == {"arr", "pos", "elem"}
2718
+
2719
+ def test_select_star_with_multiple_lateral_views(self):
2720
+ """SELECT * should include columns from multiple LATERAL VIEWs."""
2721
+ sql = """
2722
+ CREATE VIEW v1 AS SELECT arr1, arr2 FROM t1;
2723
+ CREATE VIEW v2 AS
2724
+ SELECT * FROM v1
2725
+ LATERAL VIEW explode(arr1) t1 AS elem1
2726
+ LATERAL VIEW explode(arr2) t2 AS elem2;
2727
+ """
2728
+ analyzer = LineageAnalyzer(sql, dialect="spark")
2729
+ analyzer.analyze_queries(level=AnalysisLevel.COLUMN)
2730
+
2731
+ # v2 schema should include all columns
2732
+ assert "v2" in analyzer._file_schema
2733
+ assert set(analyzer._file_schema["v2"].keys()) == {
2734
+ "arr1",
2735
+ "arr2",
2736
+ "elem1",
2737
+ "elem2",
2738
+ }
2739
+
2740
+ def test_select_star_with_lateral_view_outer(self):
2741
+ """LATERAL VIEW OUTER should work the same as regular LATERAL VIEW."""
2742
+ sql = """
2743
+ CREATE VIEW v1 AS SELECT arr FROM t1;
2744
+ CREATE VIEW v2 AS SELECT * FROM v1 LATERAL VIEW OUTER explode(arr) t AS elem;
2745
+ """
2746
+ analyzer = LineageAnalyzer(sql, dialect="spark")
2747
+ analyzer.analyze_queries(level=AnalysisLevel.COLUMN)
2748
+
2749
+ # v2 schema should include both arr and elem
2750
+ assert "v2" in analyzer._file_schema
2751
+ assert set(analyzer._file_schema["v2"].keys()) == {"arr", "elem"}
2752
+
2753
+ def test_lateral_view_with_join(self):
2754
+ """LATERAL VIEW combined with JOIN should resolve all columns."""
2755
+ sql = """
2756
+ CREATE VIEW v1 AS SELECT id, arr FROM t1;
2757
+ CREATE VIEW v2 AS SELECT name FROM t2;
2758
+ CREATE VIEW v3 AS
2759
+ SELECT * FROM v1
2760
+ JOIN v2 ON v1.id = v2.name
2761
+ LATERAL VIEW explode(arr) t AS elem;
2762
+ """
2763
+ analyzer = LineageAnalyzer(sql, dialect="spark")
2764
+ analyzer.analyze_queries(level=AnalysisLevel.COLUMN)
2765
+
2766
+ # v3 schema should include columns from v1, v2, and the lateral view
2767
+ assert "v3" in analyzer._file_schema
2768
+ assert set(analyzer._file_schema["v3"].keys()) == {"id", "arr", "name", "elem"}
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes