sql-glider 0.1.16__tar.gz → 0.1.19__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. {sql_glider-0.1.16 → sql_glider-0.1.19}/PKG-INFO +1 -1
  2. {sql_glider-0.1.16 → sql_glider-0.1.19}/src/sqlglider/_version.py +2 -2
  3. {sql_glider-0.1.16 → sql_glider-0.1.19}/src/sqlglider/lineage/analyzer.py +34 -23
  4. {sql_glider-0.1.16 → sql_glider-0.1.19}/src/sqlglider/schema/extractor.py +13 -2
  5. {sql_glider-0.1.16 → sql_glider-0.1.19}/src/sqlglider/utils/schema.py +2 -2
  6. {sql_glider-0.1.16 → sql_glider-0.1.19}/tests/sqlglider/lineage/test_analyzer.py +72 -16
  7. {sql_glider-0.1.16 → sql_glider-0.1.19}/tests/sqlglider/schema/test_extractor.py +52 -0
  8. {sql_glider-0.1.16 → sql_glider-0.1.19}/.github/workflows/ci.yml +0 -0
  9. {sql_glider-0.1.16 → sql_glider-0.1.19}/.github/workflows/publish.yml +0 -0
  10. {sql_glider-0.1.16 → sql_glider-0.1.19}/.gitignore +0 -0
  11. {sql_glider-0.1.16 → sql_glider-0.1.19}/.python-version +0 -0
  12. {sql_glider-0.1.16 → sql_glider-0.1.19}/ARCHITECTURE.md +0 -0
  13. {sql_glider-0.1.16 → sql_glider-0.1.19}/CLAUDE.md +0 -0
  14. {sql_glider-0.1.16 → sql_glider-0.1.19}/LICENSE +0 -0
  15. {sql_glider-0.1.16 → sql_glider-0.1.19}/README.md +0 -0
  16. {sql_glider-0.1.16 → sql_glider-0.1.19}/plans/2025-12-05-column-level-lineage.md +0 -0
  17. {sql_glider-0.1.16 → sql_glider-0.1.19}/plans/2025-12-05-reverse-lineage.md +0 -0
  18. {sql_glider-0.1.16 → sql_glider-0.1.19}/plans/2025-12-06-config-file-support.md +0 -0
  19. {sql_glider-0.1.16 → sql_glider-0.1.19}/plans/2025-12-06-graph-lineage.md +0 -0
  20. {sql_glider-0.1.16 → sql_glider-0.1.19}/plans/2025-12-06-unify-single-multi-query.md +0 -0
  21. {sql_glider-0.1.16 → sql_glider-0.1.19}/plans/2025-12-07-sample-data-model.md +0 -0
  22. {sql_glider-0.1.16 → sql_glider-0.1.19}/plans/2025-12-07-sql-templating.md +0 -0
  23. {sql_glider-0.1.16 → sql_glider-0.1.19}/plans/2025-12-08-tables-command.md +0 -0
  24. {sql_glider-0.1.16 → sql_glider-0.1.19}/plans/2025-12-09-graph-query-paths.md +0 -0
  25. {sql_glider-0.1.16 → sql_glider-0.1.19}/plans/2025-12-13-dissect-command.md +0 -0
  26. {sql_glider-0.1.16 → sql_glider-0.1.19}/plans/2025-12-14-tables-pull-command.md +0 -0
  27. {sql_glider-0.1.16 → sql_glider-0.1.19}/plans/2026-01-25-fix-union-lineage-chain.md +0 -0
  28. {sql_glider-0.1.16 → sql_glider-0.1.19}/plans/2026-01-26-file-scoped-schema-context.md +0 -0
  29. {sql_glider-0.1.16 → sql_glider-0.1.19}/plans/2026-01-28-sparksql-table-extraction.md +0 -0
  30. {sql_glider-0.1.16 → sql_glider-0.1.19}/plans/2026-01-29-no-star-flag.md +0 -0
  31. {sql_glider-0.1.16 → sql_glider-0.1.19}/plans/2026-01-29-resolve-schema.md +0 -0
  32. {sql_glider-0.1.16 → sql_glider-0.1.19}/plans/2026-01-29-schema-pruning-optimization.md +0 -0
  33. {sql_glider-0.1.16 → sql_glider-0.1.19}/plans/2026-01-29-tables-scrape-command.md +0 -0
  34. {sql_glider-0.1.16 → sql_glider-0.1.19}/pyproject.toml +0 -0
  35. {sql_glider-0.1.16 → sql_glider-0.1.19}/sample_data_model/README.md +0 -0
  36. {sql_glider-0.1.16 → sql_glider-0.1.19}/sample_data_model/business/expire_dim_customer.sql +0 -0
  37. {sql_glider-0.1.16 → sql_glider-0.1.19}/sample_data_model/business/load_fact_orders.sql +0 -0
  38. {sql_glider-0.1.16 → sql_glider-0.1.19}/sample_data_model/business/load_fact_payments.sql +0 -0
  39. {sql_glider-0.1.16 → sql_glider-0.1.19}/sample_data_model/business/merge_dim_customer.sql +0 -0
  40. {sql_glider-0.1.16 → sql_glider-0.1.19}/sample_data_model/business/merge_dim_product.sql +0 -0
  41. {sql_glider-0.1.16 → sql_glider-0.1.19}/sample_data_model/business/update_dim_customer_metrics.sql +0 -0
  42. {sql_glider-0.1.16 → sql_glider-0.1.19}/sample_data_model/complex/conditional_merge.sql +0 -0
  43. {sql_glider-0.1.16 → sql_glider-0.1.19}/sample_data_model/complex/cte_insert.sql +0 -0
  44. {sql_glider-0.1.16 → sql_glider-0.1.19}/sample_data_model/complex/multi_table_transform.sql +0 -0
  45. {sql_glider-0.1.16 → sql_glider-0.1.19}/sample_data_model/ddl/dim_customer.sql +0 -0
  46. {sql_glider-0.1.16 → sql_glider-0.1.19}/sample_data_model/ddl/dim_product.sql +0 -0
  47. {sql_glider-0.1.16 → sql_glider-0.1.19}/sample_data_model/ddl/fact_orders.sql +0 -0
  48. {sql_glider-0.1.16 → sql_glider-0.1.19}/sample_data_model/ddl/fact_payments.sql +0 -0
  49. {sql_glider-0.1.16 → sql_glider-0.1.19}/sample_data_model/ddl/raw_addresses.sql +0 -0
  50. {sql_glider-0.1.16 → sql_glider-0.1.19}/sample_data_model/ddl/raw_customers.sql +0 -0
  51. {sql_glider-0.1.16 → sql_glider-0.1.19}/sample_data_model/ddl/raw_order_items.sql +0 -0
  52. {sql_glider-0.1.16 → sql_glider-0.1.19}/sample_data_model/ddl/raw_orders.sql +0 -0
  53. {sql_glider-0.1.16 → sql_glider-0.1.19}/sample_data_model/ddl/raw_payments.sql +0 -0
  54. {sql_glider-0.1.16 → sql_glider-0.1.19}/sample_data_model/ddl/raw_products.sql +0 -0
  55. {sql_glider-0.1.16 → sql_glider-0.1.19}/sample_data_model/ddl/stg_customers.sql +0 -0
  56. {sql_glider-0.1.16 → sql_glider-0.1.19}/sample_data_model/ddl/stg_orders.sql +0 -0
  57. {sql_glider-0.1.16 → sql_glider-0.1.19}/sample_data_model/ddl/stg_payments.sql +0 -0
  58. {sql_glider-0.1.16 → sql_glider-0.1.19}/sample_data_model/ddl/stg_products.sql +0 -0
  59. {sql_glider-0.1.16 → sql_glider-0.1.19}/sample_data_model/incremental/incr_fact_orders.sql +0 -0
  60. {sql_glider-0.1.16 → sql_glider-0.1.19}/sample_data_model/incremental/incr_fact_payments.sql +0 -0
  61. {sql_glider-0.1.16 → sql_glider-0.1.19}/sample_data_model/incremental/incr_pres_sales_summary.sql +0 -0
  62. {sql_glider-0.1.16 → sql_glider-0.1.19}/sample_data_model/maintenance/delete_expired_customers.sql +0 -0
  63. {sql_glider-0.1.16 → sql_glider-0.1.19}/sample_data_model/maintenance/update_product_status.sql +0 -0
  64. {sql_glider-0.1.16 → sql_glider-0.1.19}/sample_data_model/presentation/load_pres_customer_360.sql +0 -0
  65. {sql_glider-0.1.16 → sql_glider-0.1.19}/sample_data_model/presentation/load_pres_customer_cohort.sql +0 -0
  66. {sql_glider-0.1.16 → sql_glider-0.1.19}/sample_data_model/presentation/load_pres_product_performance.sql +0 -0
  67. {sql_glider-0.1.16 → sql_glider-0.1.19}/sample_data_model/presentation/load_pres_sales_summary.sql +0 -0
  68. {sql_glider-0.1.16 → sql_glider-0.1.19}/sample_data_model/staging/load_stg_customers.sql +0 -0
  69. {sql_glider-0.1.16 → sql_glider-0.1.19}/sample_data_model/staging/load_stg_orders.sql +0 -0
  70. {sql_glider-0.1.16 → sql_glider-0.1.19}/sample_data_model/staging/load_stg_payments.sql +0 -0
  71. {sql_glider-0.1.16 → sql_glider-0.1.19}/sample_data_model/staging/load_stg_products.sql +0 -0
  72. {sql_glider-0.1.16 → sql_glider-0.1.19}/sqlglider.toml.example +0 -0
  73. {sql_glider-0.1.16 → sql_glider-0.1.19}/src/sqlglider/__init__.py +0 -0
  74. {sql_glider-0.1.16 → sql_glider-0.1.19}/src/sqlglider/catalog/__init__.py +0 -0
  75. {sql_glider-0.1.16 → sql_glider-0.1.19}/src/sqlglider/catalog/base.py +0 -0
  76. {sql_glider-0.1.16 → sql_glider-0.1.19}/src/sqlglider/catalog/databricks.py +0 -0
  77. {sql_glider-0.1.16 → sql_glider-0.1.19}/src/sqlglider/catalog/registry.py +0 -0
  78. {sql_glider-0.1.16 → sql_glider-0.1.19}/src/sqlglider/cli.py +0 -0
  79. {sql_glider-0.1.16 → sql_glider-0.1.19}/src/sqlglider/dissection/__init__.py +0 -0
  80. {sql_glider-0.1.16 → sql_glider-0.1.19}/src/sqlglider/dissection/analyzer.py +0 -0
  81. {sql_glider-0.1.16 → sql_glider-0.1.19}/src/sqlglider/dissection/formatters.py +0 -0
  82. {sql_glider-0.1.16 → sql_glider-0.1.19}/src/sqlglider/dissection/models.py +0 -0
  83. {sql_glider-0.1.16 → sql_glider-0.1.19}/src/sqlglider/global_models.py +0 -0
  84. {sql_glider-0.1.16 → sql_glider-0.1.19}/src/sqlglider/graph/__init__.py +0 -0
  85. {sql_glider-0.1.16 → sql_glider-0.1.19}/src/sqlglider/graph/builder.py +0 -0
  86. {sql_glider-0.1.16 → sql_glider-0.1.19}/src/sqlglider/graph/formatters.py +0 -0
  87. {sql_glider-0.1.16 → sql_glider-0.1.19}/src/sqlglider/graph/merge.py +0 -0
  88. {sql_glider-0.1.16 → sql_glider-0.1.19}/src/sqlglider/graph/models.py +0 -0
  89. {sql_glider-0.1.16 → sql_glider-0.1.19}/src/sqlglider/graph/query.py +0 -0
  90. {sql_glider-0.1.16 → sql_glider-0.1.19}/src/sqlglider/graph/serialization.py +0 -0
  91. {sql_glider-0.1.16 → sql_glider-0.1.19}/src/sqlglider/lineage/__init__.py +0 -0
  92. {sql_glider-0.1.16 → sql_glider-0.1.19}/src/sqlglider/lineage/formatters.py +0 -0
  93. {sql_glider-0.1.16 → sql_glider-0.1.19}/src/sqlglider/schema/__init__.py +0 -0
  94. {sql_glider-0.1.16 → sql_glider-0.1.19}/src/sqlglider/templating/__init__.py +0 -0
  95. {sql_glider-0.1.16 → sql_glider-0.1.19}/src/sqlglider/templating/base.py +0 -0
  96. {sql_glider-0.1.16 → sql_glider-0.1.19}/src/sqlglider/templating/jinja.py +0 -0
  97. {sql_glider-0.1.16 → sql_glider-0.1.19}/src/sqlglider/templating/registry.py +0 -0
  98. {sql_glider-0.1.16 → sql_glider-0.1.19}/src/sqlglider/templating/variables.py +0 -0
  99. {sql_glider-0.1.16 → sql_glider-0.1.19}/src/sqlglider/utils/__init__.py +0 -0
  100. {sql_glider-0.1.16 → sql_glider-0.1.19}/src/sqlglider/utils/config.py +0 -0
  101. {sql_glider-0.1.16 → sql_glider-0.1.19}/src/sqlglider/utils/file_utils.py +0 -0
  102. {sql_glider-0.1.16 → sql_glider-0.1.19}/tests/__init__.py +0 -0
  103. {sql_glider-0.1.16 → sql_glider-0.1.19}/tests/fixtures/multi_file_queries/analytics_pipeline.sql +0 -0
  104. {sql_glider-0.1.16 → sql_glider-0.1.19}/tests/fixtures/multi_file_queries/analytics_pipeline_union_merge.sql +0 -0
  105. {sql_glider-0.1.16 → sql_glider-0.1.19}/tests/fixtures/multi_file_queries/customers.sql +0 -0
  106. {sql_glider-0.1.16 → sql_glider-0.1.19}/tests/fixtures/multi_file_queries/orders.sql +0 -0
  107. {sql_glider-0.1.16 → sql_glider-0.1.19}/tests/fixtures/multi_file_queries/reports.sql +0 -0
  108. {sql_glider-0.1.16 → sql_glider-0.1.19}/tests/fixtures/multi_file_queries/view_based_merge.sql +0 -0
  109. {sql_glider-0.1.16 → sql_glider-0.1.19}/tests/fixtures/original_queries/test_cte.sql +0 -0
  110. {sql_glider-0.1.16 → sql_glider-0.1.19}/tests/fixtures/original_queries/test_cte_query.sql +0 -0
  111. {sql_glider-0.1.16 → sql_glider-0.1.19}/tests/fixtures/original_queries/test_cte_view_star.sql +0 -0
  112. {sql_glider-0.1.16 → sql_glider-0.1.19}/tests/fixtures/original_queries/test_generated_column_query.sql +0 -0
  113. {sql_glider-0.1.16 → sql_glider-0.1.19}/tests/fixtures/original_queries/test_multi.sql +0 -0
  114. {sql_glider-0.1.16 → sql_glider-0.1.19}/tests/fixtures/original_queries/test_multi_query.sql +0 -0
  115. {sql_glider-0.1.16 → sql_glider-0.1.19}/tests/fixtures/original_queries/test_single_query.sql +0 -0
  116. {sql_glider-0.1.16 → sql_glider-0.1.19}/tests/fixtures/original_queries/test_subquery.sql +0 -0
  117. {sql_glider-0.1.16 → sql_glider-0.1.19}/tests/fixtures/original_queries/test_tables.sql +0 -0
  118. {sql_glider-0.1.16 → sql_glider-0.1.19}/tests/fixtures/original_queries/test_view.sql +0 -0
  119. {sql_glider-0.1.16 → sql_glider-0.1.19}/tests/fixtures/original_queries/test_view_window_cte.sql +0 -0
  120. {sql_glider-0.1.16 → sql_glider-0.1.19}/tests/fixtures/sample_manifest.csv +0 -0
  121. {sql_glider-0.1.16 → sql_glider-0.1.19}/tests/sqlglider/__init__.py +0 -0
  122. {sql_glider-0.1.16 → sql_glider-0.1.19}/tests/sqlglider/catalog/__init__.py +0 -0
  123. {sql_glider-0.1.16 → sql_glider-0.1.19}/tests/sqlglider/catalog/test_base.py +0 -0
  124. {sql_glider-0.1.16 → sql_glider-0.1.19}/tests/sqlglider/catalog/test_databricks.py +0 -0
  125. {sql_glider-0.1.16 → sql_glider-0.1.19}/tests/sqlglider/catalog/test_registry.py +0 -0
  126. {sql_glider-0.1.16 → sql_glider-0.1.19}/tests/sqlglider/dissection/__init__.py +0 -0
  127. {sql_glider-0.1.16 → sql_glider-0.1.19}/tests/sqlglider/dissection/test_analyzer.py +0 -0
  128. {sql_glider-0.1.16 → sql_glider-0.1.19}/tests/sqlglider/dissection/test_formatters.py +0 -0
  129. {sql_glider-0.1.16 → sql_glider-0.1.19}/tests/sqlglider/dissection/test_models.py +0 -0
  130. {sql_glider-0.1.16 → sql_glider-0.1.19}/tests/sqlglider/graph/__init__.py +0 -0
  131. {sql_glider-0.1.16 → sql_glider-0.1.19}/tests/sqlglider/graph/test_builder.py +0 -0
  132. {sql_glider-0.1.16 → sql_glider-0.1.19}/tests/sqlglider/graph/test_formatters.py +0 -0
  133. {sql_glider-0.1.16 → sql_glider-0.1.19}/tests/sqlglider/graph/test_merge.py +0 -0
  134. {sql_glider-0.1.16 → sql_glider-0.1.19}/tests/sqlglider/graph/test_models.py +0 -0
  135. {sql_glider-0.1.16 → sql_glider-0.1.19}/tests/sqlglider/graph/test_query.py +0 -0
  136. {sql_glider-0.1.16 → sql_glider-0.1.19}/tests/sqlglider/graph/test_serialization.py +0 -0
  137. {sql_glider-0.1.16 → sql_glider-0.1.19}/tests/sqlglider/lineage/__init__.py +0 -0
  138. {sql_glider-0.1.16 → sql_glider-0.1.19}/tests/sqlglider/lineage/test_formatters.py +0 -0
  139. {sql_glider-0.1.16 → sql_glider-0.1.19}/tests/sqlglider/schema/__init__.py +0 -0
  140. {sql_glider-0.1.16 → sql_glider-0.1.19}/tests/sqlglider/templating/__init__.py +0 -0
  141. {sql_glider-0.1.16 → sql_glider-0.1.19}/tests/sqlglider/templating/test_base.py +0 -0
  142. {sql_glider-0.1.16 → sql_glider-0.1.19}/tests/sqlglider/templating/test_jinja.py +0 -0
  143. {sql_glider-0.1.16 → sql_glider-0.1.19}/tests/sqlglider/templating/test_registry.py +0 -0
  144. {sql_glider-0.1.16 → sql_glider-0.1.19}/tests/sqlglider/templating/test_variables.py +0 -0
  145. {sql_glider-0.1.16 → sql_glider-0.1.19}/tests/sqlglider/test_cli.py +0 -0
  146. {sql_glider-0.1.16 → sql_glider-0.1.19}/tests/sqlglider/utils/__init__.py +0 -0
  147. {sql_glider-0.1.16 → sql_glider-0.1.19}/tests/sqlglider/utils/test_config.py +0 -0
  148. {sql_glider-0.1.16 → sql_glider-0.1.19}/tests/sqlglider/utils/test_file_utils.py +0 -0
  149. {sql_glider-0.1.16 → sql_glider-0.1.19}/tests/sqlglider/utils/test_schema.py +0 -0
  150. {sql_glider-0.1.16 → sql_glider-0.1.19}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sql-glider
3
- Version: 0.1.16
3
+ Version: 0.1.19
4
4
  Summary: SQL Utility Toolkit for better understanding, use, and governance of your queries in a native environment.
5
5
  Project-URL: Homepage, https://github.com/rycowhi/sql-glider/
6
6
  Project-URL: Repository, https://github.com/rycowhi/sql-glider/
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.1.16'
32
- __version_tuple__ = version_tuple = (0, 1, 16)
31
+ __version__ = version = '0.1.19'
32
+ __version_tuple__ = version_tuple = (0, 1, 19)
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -1177,7 +1177,7 @@ class LineageAnalyzer:
1177
1177
  if table.db:
1178
1178
  parts.append(table.db)
1179
1179
  parts.append(table.name)
1180
- return ".".join(parts)
1180
+ return ".".join(parts).lower()
1181
1181
 
1182
1182
  def _resolve_table_reference(self, ref: str, select_node: exp.Select) -> str:
1183
1183
  """
@@ -1480,7 +1480,7 @@ class LineageAnalyzer:
1480
1480
 
1481
1481
  def _extract_schema_from_statement(self, expr: exp.Expression) -> None:
1482
1482
  """
1483
- Extract column definitions from CREATE VIEW/TABLE AS SELECT statements.
1483
+ Extract column definitions from CREATE VIEW/TABLE AS SELECT and CACHE TABLE statements.
1484
1484
 
1485
1485
  This method builds up file-scoped schema context as statements are processed,
1486
1486
  enabling SQLGlot to correctly expand SELECT * and trace cross-statement references.
@@ -1488,29 +1488,39 @@ class LineageAnalyzer:
1488
1488
  Args:
1489
1489
  expr: The SQL expression to extract schema from
1490
1490
  """
1491
- # Only handle CREATE VIEW or CREATE TABLE (AS SELECT)
1492
- if not isinstance(expr, exp.Create):
1493
- return
1494
- if expr.kind not in ("VIEW", "TABLE"):
1495
- return
1491
+ target_name: str | None = None
1492
+ select_node: exp.Expression | None = None
1496
1493
 
1497
- # Get target table/view name
1498
- target = expr.this
1499
- if isinstance(target, exp.Schema):
1500
- target = target.this
1501
- if not isinstance(target, exp.Table):
1502
- return
1494
+ if isinstance(expr, exp.Create):
1495
+ if expr.kind not in ("VIEW", "TABLE"):
1496
+ return
1497
+
1498
+ target = expr.this
1499
+ if isinstance(target, exp.Schema):
1500
+ target = target.this
1501
+ if not isinstance(target, exp.Table):
1502
+ return
1503
1503
 
1504
- target_name = self._get_qualified_table_name(target)
1504
+ target_name = self._get_qualified_table_name(target)
1505
+ select_node = expr.expression
1505
1506
 
1506
- # Get the SELECT node from the CREATE statement
1507
- select_node = expr.expression
1508
- if select_node is None:
1507
+ # Handle Subquery wrapper (e.g., CREATE VIEW AS (SELECT ...))
1508
+ if isinstance(select_node, exp.Subquery):
1509
+ select_node = select_node.this
1510
+
1511
+ elif isinstance(expr, exp.Cache):
1512
+ target = expr.this
1513
+ if not isinstance(target, exp.Table):
1514
+ return
1515
+
1516
+ target_name = self._get_qualified_table_name(target)
1517
+ select_node = expr.expression
1518
+
1519
+ else:
1509
1520
  return
1510
1521
 
1511
- # Handle Subquery wrapper (e.g., CREATE VIEW AS (SELECT ...))
1512
- if isinstance(select_node, exp.Subquery):
1513
- select_node = select_node.this
1522
+ if target_name is None or select_node is None:
1523
+ return
1514
1524
 
1515
1525
  if not isinstance(
1516
1526
  select_node, (exp.Select, exp.Union, exp.Intersect, exp.Except)
@@ -1522,7 +1532,7 @@ class LineageAnalyzer:
1522
1532
 
1523
1533
  if columns:
1524
1534
  # Store with UNKNOWN type - SQLGlot only needs column names for expansion
1525
- self._file_schema[target_name] = {col: "UNKNOWN" for col in columns}
1535
+ self._file_schema[target_name] = {col.lower(): "UNKNOWN" for col in columns}
1526
1536
 
1527
1537
  def _extract_schema_from_dql(self, expr: exp.Expression) -> None:
1528
1538
  """Infer table schemas from column references in DQL.
@@ -1634,8 +1644,9 @@ class LineageAnalyzer:
1634
1644
 
1635
1645
  if actual_table not in self._file_schema:
1636
1646
  self._file_schema[actual_table] = {}
1637
- if col_name not in self._file_schema[actual_table]:
1638
- self._file_schema[actual_table][col_name] = "UNKNOWN"
1647
+ col_lower = col_name.lower()
1648
+ if col_lower not in self._file_schema[actual_table]:
1649
+ self._file_schema[actual_table][col_lower] = "UNKNOWN"
1639
1650
 
1640
1651
  def _extract_columns_from_select(
1641
1652
  self, select_node: Union[exp.Select, exp.Union, exp.Intersect, exp.Except]
@@ -41,7 +41,14 @@ def extract_schemas_from_files(
41
41
  if console is None:
42
42
  console = Console(stderr=True)
43
43
 
44
- schema: SchemaDict = dict(initial_schema) if initial_schema else {}
44
+ schema: SchemaDict = (
45
+ {
46
+ k.lower(): {c.lower(): v for c, v in cols.items()}
47
+ for k, cols in initial_schema.items()
48
+ }
49
+ if initial_schema
50
+ else {}
51
+ )
45
52
  total = len(file_paths)
46
53
 
47
54
  with Progress(
@@ -65,7 +72,11 @@ def extract_schemas_from_files(
65
72
  strict_schema=strict_schema,
66
73
  )
67
74
  file_schema = analyzer.extract_schema_only()
68
- schema.update(file_schema)
75
+ for table_name, columns in file_schema.items():
76
+ if table_name in schema:
77
+ schema[table_name].update(columns)
78
+ else:
79
+ schema[table_name] = columns
69
80
  except SchemaResolutionError:
70
81
  raise
71
82
  except Exception:
@@ -46,7 +46,7 @@ def parse_ddl_to_schema(ddl: str, dialect: str = "spark") -> Dict[str, Dict[str,
46
46
  table_name = _get_qualified_name(target)
47
47
 
48
48
  if columns:
49
- schema[table_name] = {col: "UNKNOWN" for col in columns}
49
+ schema[table_name] = {col.lower(): "UNKNOWN" for col in columns}
50
50
 
51
51
  return schema
52
52
 
@@ -59,4 +59,4 @@ def _get_qualified_name(table: exp.Table) -> str:
59
59
  if table.db:
60
60
  parts.append(table.db)
61
61
  parts.append(table.name)
62
- return ".".join(parts)
62
+ return ".".join(parts).lower()
@@ -119,39 +119,39 @@ class TestCaseInsensitiveForwardLineage:
119
119
  # Lowercase
120
120
  (
121
121
  "target_table.customer_name",
122
- "TARGET_TABLE.customer_name",
122
+ "target_table.customer_name",
123
123
  ["customers.customer_name"],
124
124
  ),
125
125
  (
126
126
  "target_table.region",
127
- "TARGET_TABLE.region",
127
+ "target_table.region",
128
128
  ["customers.region"],
129
129
  ),
130
130
  (
131
131
  "target_table.total_amount",
132
- "TARGET_TABLE.total_amount",
132
+ "target_table.total_amount",
133
133
  ["orders.order_amount"],
134
134
  ),
135
135
  # Uppercase
136
136
  (
137
- "TARGET_TABLE.CUSTOMER_NAME",
138
- "TARGET_TABLE.customer_name",
137
+ "target_table.CUSTOMER_NAME",
138
+ "target_table.customer_name",
139
139
  ["customers.customer_name"],
140
140
  ),
141
141
  (
142
- "TARGET_TABLE.REGION",
143
- "TARGET_TABLE.region",
142
+ "target_table.REGION",
143
+ "target_table.region",
144
144
  ["customers.region"],
145
145
  ),
146
146
  # Mixed case
147
147
  (
148
148
  "TaRgEt_TaBlE.CuStOmEr_NaMe",
149
- "TARGET_TABLE.customer_name",
149
+ "target_table.customer_name",
150
150
  ["customers.customer_name"],
151
151
  ),
152
152
  (
153
153
  "target_TABLE.REGION",
154
- "TARGET_TABLE.region",
154
+ "target_table.region",
155
155
  ["customers.region"],
156
156
  ),
157
157
  ],
@@ -346,39 +346,39 @@ class TestCaseInsensitiveReverseLineage:
346
346
  (
347
347
  "customers.customer_name",
348
348
  "customers.customer_name",
349
- ["TARGET_TABLE.customer_name"],
349
+ ["target_table.customer_name"],
350
350
  ),
351
351
  (
352
352
  "customers.region",
353
353
  "customers.region",
354
- ["TARGET_TABLE.region"],
354
+ ["target_table.region"],
355
355
  ),
356
356
  (
357
357
  "orders.order_amount",
358
358
  "orders.order_amount",
359
- ["TARGET_TABLE.segment", "TARGET_TABLE.total_amount"],
359
+ ["target_table.segment", "target_table.total_amount"],
360
360
  ),
361
361
  # Uppercase
362
362
  (
363
363
  "CUSTOMERS.CUSTOMER_NAME",
364
364
  "customers.customer_name",
365
- ["TARGET_TABLE.customer_name"],
365
+ ["target_table.customer_name"],
366
366
  ),
367
367
  (
368
368
  "CUSTOMERS.REGION",
369
369
  "customers.region",
370
- ["TARGET_TABLE.region"],
370
+ ["target_table.region"],
371
371
  ),
372
372
  # Mixed case
373
373
  (
374
374
  "CuStOmErS.CuStOmEr_NaMe",
375
375
  "customers.customer_name",
376
- ["TARGET_TABLE.customer_name"],
376
+ ["target_table.customer_name"],
377
377
  ),
378
378
  (
379
379
  "cUsToMeRs.ReGiOn",
380
380
  "customers.region",
381
- ["TARGET_TABLE.region"],
381
+ ["target_table.region"],
382
382
  ),
383
383
  ],
384
384
  )
@@ -2949,6 +2949,62 @@ class TestCacheTableStatements:
2949
2949
  assert len(skipped) == 1
2950
2950
  assert "DELETE" in skipped[0].statement_type
2951
2951
 
2952
+ def test_cache_table_star_resolution_in_subsequent_query(self):
2953
+ """SELECT * FROM a cached table should resolve columns from the CACHE statement."""
2954
+ sql = """
2955
+ CACHE TABLE cached_orders AS SELECT customer_id, order_total FROM orders;
2956
+ SELECT * FROM cached_orders;
2957
+ """
2958
+ analyzer = LineageAnalyzer(sql, dialect="spark")
2959
+ results = analyzer.analyze_queries(level=AnalysisLevel.COLUMN)
2960
+
2961
+ assert len(results) == 2
2962
+ # Second query should have resolved the star
2963
+ star_result = results[1]
2964
+ output_names = sorted(item.output_name for item in star_result.lineage_items)
2965
+ output_names = sorted(item.output_name for item in star_result.lineage_items)
2966
+ assert output_names == ["customer_id", "order_total"]
2967
+
2968
+ def test_cache_table_qualified_star_resolution(self):
2969
+ """table.* on a cached table should resolve columns."""
2970
+ sql = """
2971
+ CACHE TABLE cached_orders AS SELECT customer_id, order_total FROM orders;
2972
+ SELECT cached_orders.* FROM cached_orders;
2973
+ """
2974
+ analyzer = LineageAnalyzer(sql, dialect="spark")
2975
+ results = analyzer.analyze_queries(level=AnalysisLevel.COLUMN)
2976
+
2977
+ assert len(results) == 2
2978
+ star_result = results[1]
2979
+ output_names = sorted(item.output_name for item in star_result.lineage_items)
2980
+ assert output_names == ["customer_id", "order_total"]
2981
+
2982
+ def test_cache_lazy_table_star_resolution(self):
2983
+ """CACHE LAZY TABLE should also register schema for star resolution."""
2984
+ sql = """
2985
+ CACHE LAZY TABLE cached_users AS SELECT id, name, email FROM users;
2986
+ SELECT * FROM cached_users;
2987
+ """
2988
+ analyzer = LineageAnalyzer(sql, dialect="spark")
2989
+ results = analyzer.analyze_queries(level=AnalysisLevel.COLUMN)
2990
+
2991
+ assert len(results) == 2
2992
+ star_result = results[1]
2993
+ output_names = sorted(item.output_name for item in star_result.lineage_items)
2994
+ assert output_names == ["email", "id", "name"]
2995
+
2996
+ def test_cache_table_schema_registered(self):
2997
+ """Cached table schema should be registered for downstream star resolution."""
2998
+ sql = """
2999
+ CACHE TABLE cached_orders AS SELECT customer_id, order_total FROM orders;
3000
+ SELECT * FROM cached_orders;
3001
+ """
3002
+ analyzer = LineageAnalyzer(sql, dialect="spark")
3003
+ schema = analyzer.extract_schema_only()
3004
+ assert "cached_orders" in schema
3005
+ assert "customer_id" in schema["cached_orders"]
3006
+ assert "order_total" in schema["cached_orders"]
3007
+
2952
3008
 
2953
3009
  class TestNoStar:
2954
3010
  """Tests for the --no-star flag that fails on unresolvable SELECT *."""
@@ -81,6 +81,58 @@ class TestExtractSchemasFromFiles:
81
81
  assert "id" in schema["customers"]
82
82
  assert "order_id" in schema["orders"]
83
83
 
84
+ def test_merges_columns_for_same_table(self, tmp_path, console):
85
+ """Test that columns are merged when the same table appears in multiple files."""
86
+ file1 = tmp_path / "a.sql"
87
+ file1.write_text("SELECT c.id, c.name FROM customers c;")
88
+
89
+ file2 = tmp_path / "b.sql"
90
+ file2.write_text("SELECT c.id, c.age FROM customers c;")
91
+
92
+ schema = extract_schemas_from_files(
93
+ [file1, file2], dialect="spark", console=console
94
+ )
95
+
96
+ assert "customers" in schema
97
+ assert "id" in schema["customers"]
98
+ assert "name" in schema["customers"]
99
+ assert "age" in schema["customers"]
100
+
101
+ def test_merges_columns_case_insensitive(self, tmp_path, console):
102
+ """Test that tables with different casing are merged into one entry."""
103
+ file1 = tmp_path / "a.sql"
104
+ file1.write_text("SELECT c.id, c.name FROM Customers c;")
105
+
106
+ file2 = tmp_path / "b.sql"
107
+ file2.write_text("SELECT c.id, c.AGE FROM customers c;")
108
+
109
+ schema = extract_schemas_from_files(
110
+ [file1, file2], dialect="spark", console=console
111
+ )
112
+
113
+ assert "customers" in schema
114
+ assert len([k for k in schema if k.lower() == "customers"]) == 1
115
+ assert "id" in schema["customers"]
116
+ assert "name" in schema["customers"]
117
+ assert "age" in schema["customers"]
118
+
119
+ def test_initial_schema_normalized(self, tmp_path, console):
120
+ """Test that initial schema keys are normalized to lowercase."""
121
+ sql_file = tmp_path / "query.sql"
122
+ sql_file.write_text("SELECT o.id FROM orders o;")
123
+
124
+ initial = {"Existing_Table": {"Col1": "UNKNOWN"}}
125
+ schema = extract_schemas_from_files(
126
+ [sql_file],
127
+ dialect="spark",
128
+ initial_schema=initial,
129
+ console=console,
130
+ )
131
+
132
+ assert "existing_table" in schema
133
+ assert "col1" in schema["existing_table"]
134
+ assert "orders" in schema
135
+
84
136
  def test_initial_schema_preserved(self, tmp_path, console):
85
137
  """Test that initial schema is included in result."""
86
138
  sql_file = tmp_path / "query.sql"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes