sql-glider 0.1.15__py3-none-any.whl → 0.1.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sql-glider
3
- Version: 0.1.15
3
+ Version: 0.1.18
4
4
  Summary: SQL Utility Toolkit for better understanding, use, and governance of your queries in a native environment.
5
5
  Project-URL: Homepage, https://github.com/rycowhi/sql-glider/
6
6
  Project-URL: Repository, https://github.com/rycowhi/sql-glider/
@@ -1,5 +1,5 @@
1
1
  sqlglider/__init__.py,sha256=gDf7s52dMcX7JuCZ1SLawcB1vb3U0yJCohu9RQAATBY,125
2
- sqlglider/_version.py,sha256=HPqQHR9pVxIxlFt4vovkyoe7k6UO3ag2isBN2lHFL8g,706
2
+ sqlglider/_version.py,sha256=Nx3lULyklTDQB2p2ofjQ59zAxYunJHGjMIsvHePGZsI,706
3
3
  sqlglider/cli.py,sha256=9zNMaw3rgcqb6uG05VJTYbLUXmZzdX87gAOJ4Zg3xjY,65319
4
4
  sqlglider/global_models.py,sha256=2vyJXAuXOsXQpE-D3F0ejj7eR9z0nDWFjTkielhzM8k,356
5
5
  sqlglider/catalog/__init__.py,sha256=2PqFPyzFXJ14FpSUcBmVK2L-a_ypWQHAbHFHxLDk_LE,814
@@ -18,10 +18,10 @@ sqlglider/graph/models.py,sha256=EYmjv_WzDSNp_WfhJ6H-qBIOkAcoNKS7GRUryfKrHuY,933
18
18
  sqlglider/graph/query.py,sha256=LHU8Cvn7ZPPSEnqdDn2pF8f1_LQjIvNIrZqs8cFlb6U,9433
19
19
  sqlglider/graph/serialization.py,sha256=vMXn7s35jA499e7l90vNVaJE_3QR_VHf3rEfQ9ZlgTQ,2781
20
20
  sqlglider/lineage/__init__.py,sha256=llXMeI5_PIZaiBo8tKk3-wOubF4m_6QBHbn1FtWxT7k,256
21
- sqlglider/lineage/analyzer.py,sha256=08pFR5aGFFPhSbRW6EqiX2d3mp91v-orcs6dm_T1FJg,76484
21
+ sqlglider/lineage/analyzer.py,sha256=46VjvTpC4v50dwDT_SDZWtnb3b0VdbxEOlZJZL1-cBg,78169
22
22
  sqlglider/lineage/formatters.py,sha256=_Y9wcTX4JXn1vVnZ1xI656g1FF2rMjcAVc-GHjbd9QA,10389
23
23
  sqlglider/schema/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
24
- sqlglider/schema/extractor.py,sha256=WW31wbHkL-V749pLb7EAyUOJuziZQK-5hLZVW6f970U,7234
24
+ sqlglider/schema/extractor.py,sha256=P7sVCmBACQ55D8wHS7munbTg0dvJ3-NnUzXf9ZgCAOA,7574
25
25
  sqlglider/templating/__init__.py,sha256=g3_wb6rSDI0usq2UUMDpn-J5kVwlAw3NtLdwbxL6UHs,1435
26
26
  sqlglider/templating/base.py,sha256=y5bWAW7qXl_4pPyo5KycfHwNVvt1-7slZ63DAsvTE1s,2902
27
27
  sqlglider/templating/jinja.py,sha256=o01UG72N4G1-tOT5LKK1Wkccv4nJH2VN4VFaMi5c1-g,5220
@@ -30,9 +30,9 @@ sqlglider/templating/variables.py,sha256=5593PtLBcOxsnMCSRm2pGAD5I0Y9f__VV3_J_Hf
30
30
  sqlglider/utils/__init__.py,sha256=KGp9-UzKz_OFBOTFoSy-g-NXDZsvyWXG_9-1zcC6ePE,276
31
31
  sqlglider/utils/config.py,sha256=qx5zE9pjLCCzHQDFVPLVd7LgJ-lghxUa2x-aZOAHByY,4962
32
32
  sqlglider/utils/file_utils.py,sha256=5_ff28E0r1R7emZzsOnRuHd-7zIX6873eyr1SuPEr4E,1093
33
- sqlglider/utils/schema.py,sha256=-0Vd1A3EggBH3reXTiabO0zFeTENROgmDg861X1D7Qs,1867
34
- sql_glider-0.1.15.dist-info/METADATA,sha256=IF0dZD6rOriyausbDZhHPMfYnhHyRlxyi9v_ihTgCUo,28446
35
- sql_glider-0.1.15.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
36
- sql_glider-0.1.15.dist-info/entry_points.txt,sha256=HDuakHqHS5C0HFKsMIxMYmDU7-BLBGrnIJcYaVRu-s0,251
37
- sql_glider-0.1.15.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
38
- sql_glider-0.1.15.dist-info/RECORD,,
33
+ sqlglider/utils/schema.py,sha256=LiWrYDunXKJdoSlpKmIaIQ2hLSaIN1iQHqkXjMpGzRE,1883
34
+ sql_glider-0.1.18.dist-info/METADATA,sha256=lPAHSo1k6J-j2wgGSJ3dAHtZBUouCRiW2L_sxuM1ewo,28446
35
+ sql_glider-0.1.18.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
36
+ sql_glider-0.1.18.dist-info/entry_points.txt,sha256=HDuakHqHS5C0HFKsMIxMYmDU7-BLBGrnIJcYaVRu-s0,251
37
+ sql_glider-0.1.18.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
38
+ sql_glider-0.1.18.dist-info/RECORD,,
sqlglider/_version.py CHANGED
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.1.15'
32
- __version_tuple__ = version_tuple = (0, 1, 15)
31
+ __version__ = version = '0.1.18'
32
+ __version_tuple__ = version_tuple = (0, 1, 18)
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -11,6 +11,48 @@ from sqlglot.lineage import Node, lineage
11
11
  from sqlglider.global_models import AnalysisLevel
12
12
 
13
13
 
14
+ def _flat_schema_to_nested(
15
+ schema: Dict[str, Dict[str, str]],
16
+ ) -> Dict[str, object]:
17
+ """Convert flat dot-notation schema keys to the nested dict structure sqlglot expects.
18
+
19
+ sqlglot's MappingSchema requires consistent nesting depth across all tables.
20
+ Flat keys like ``"db.table"`` are split on dots and nested accordingly.
21
+ Shorter keys are padded with empty-string prefixes to match the max depth.
22
+
23
+ Examples::
24
+
25
+ {"users": {"id": "UNKNOWN"}}
26
+ → {"users": {"id": "UNKNOWN"}} (depth 1, no change)
27
+
28
+ {"db.users": {"id": "UNKNOWN"}, "my_view": {"x": "UNKNOWN"}}
29
+ → {"db": {"users": {"id": "UNKNOWN"}}, "": {"my_view": {"x": "UNKNOWN"}}}
30
+ """
31
+ if not schema:
32
+ return {}
33
+
34
+ # Split all keys into parts
35
+ entries = [(key.split("."), cols) for key, cols in schema.items()]
36
+ max_depth = max(len(parts) for parts, _ in entries)
37
+
38
+ # If all keys are single-part (unqualified), return as-is
39
+ if max_depth == 1:
40
+ return schema # type: ignore[return-value]
41
+
42
+ # Pad shorter keys with empty-string prefixes to match max depth
43
+ nested: Dict[str, object] = {}
44
+ for parts, cols in entries:
45
+ while len(parts) < max_depth:
46
+ parts.insert(0, "")
47
+ d: Dict[str, object] = nested
48
+ for part in parts[:-1]:
49
+ if part not in d:
50
+ d[part] = {}
51
+ d = d[part] # type: ignore[assignment]
52
+ d[parts[-1]] = cols
53
+ return nested
54
+
55
+
14
56
  class StarResolutionError(Exception):
15
57
  """Raised when SELECT * cannot be resolved and no_star mode is enabled."""
16
58
 
@@ -860,8 +902,10 @@ class LineageAnalyzer:
860
902
  current_query_sql = self.expr.sql(dialect=self.dialect)
861
903
 
862
904
  # Prune schema to only tables referenced in this query to avoid
863
- # sqlglot.lineage() performance degradation with large schema dicts
864
- pruned_schema: Optional[Dict[str, Dict[str, str]]] = None
905
+ # sqlglot.lineage() performance degradation with large schema dicts.
906
+ # Then convert from flat dot-notation keys to the nested dict structure
907
+ # that sqlglot's MappingSchema expects.
908
+ lineage_schema: Optional[Dict[str, object]] = None
865
909
  if self._file_schema:
866
910
  referenced = {t.lower() for t in self._get_query_tables()}
867
911
  pruned_schema = {
@@ -869,8 +913,8 @@ class LineageAnalyzer:
869
913
  for table, cols in self._file_schema.items()
870
914
  if table.lower() in referenced
871
915
  }
872
- if not pruned_schema:
873
- pruned_schema = None
916
+ if pruned_schema:
917
+ lineage_schema = _flat_schema_to_nested(pruned_schema)
874
918
 
875
919
  for col in columns_to_analyze:
876
920
  try:
@@ -883,7 +927,7 @@ class LineageAnalyzer:
883
927
  lineage_col,
884
928
  current_query_sql,
885
929
  dialect=self.dialect,
886
- schema=pruned_schema,
930
+ schema=lineage_schema,
887
931
  )
888
932
 
889
933
  # Collect all source columns
@@ -1133,7 +1177,7 @@ class LineageAnalyzer:
1133
1177
  if table.db:
1134
1178
  parts.append(table.db)
1135
1179
  parts.append(table.name)
1136
- return ".".join(parts)
1180
+ return ".".join(parts).lower()
1137
1181
 
1138
1182
  def _resolve_table_reference(self, ref: str, select_node: exp.Select) -> str:
1139
1183
  """
@@ -1478,7 +1522,7 @@ class LineageAnalyzer:
1478
1522
 
1479
1523
  if columns:
1480
1524
  # Store with UNKNOWN type - SQLGlot only needs column names for expansion
1481
- self._file_schema[target_name] = {col: "UNKNOWN" for col in columns}
1525
+ self._file_schema[target_name] = {col.lower(): "UNKNOWN" for col in columns}
1482
1526
 
1483
1527
  def _extract_schema_from_dql(self, expr: exp.Expression) -> None:
1484
1528
  """Infer table schemas from column references in DQL.
@@ -1590,8 +1634,9 @@ class LineageAnalyzer:
1590
1634
 
1591
1635
  if actual_table not in self._file_schema:
1592
1636
  self._file_schema[actual_table] = {}
1593
- if col_name not in self._file_schema[actual_table]:
1594
- self._file_schema[actual_table][col_name] = "UNKNOWN"
1637
+ col_lower = col_name.lower()
1638
+ if col_lower not in self._file_schema[actual_table]:
1639
+ self._file_schema[actual_table][col_lower] = "UNKNOWN"
1595
1640
 
1596
1641
  def _extract_columns_from_select(
1597
1642
  self, select_node: Union[exp.Select, exp.Union, exp.Intersect, exp.Except]
@@ -41,7 +41,14 @@ def extract_schemas_from_files(
41
41
  if console is None:
42
42
  console = Console(stderr=True)
43
43
 
44
- schema: SchemaDict = dict(initial_schema) if initial_schema else {}
44
+ schema: SchemaDict = (
45
+ {
46
+ k.lower(): {c.lower(): v for c, v in cols.items()}
47
+ for k, cols in initial_schema.items()
48
+ }
49
+ if initial_schema
50
+ else {}
51
+ )
45
52
  total = len(file_paths)
46
53
 
47
54
  with Progress(
@@ -65,7 +72,11 @@ def extract_schemas_from_files(
65
72
  strict_schema=strict_schema,
66
73
  )
67
74
  file_schema = analyzer.extract_schema_only()
68
- schema.update(file_schema)
75
+ for table_name, columns in file_schema.items():
76
+ if table_name in schema:
77
+ schema[table_name].update(columns)
78
+ else:
79
+ schema[table_name] = columns
69
80
  except SchemaResolutionError:
70
81
  raise
71
82
  except Exception:
sqlglider/utils/schema.py CHANGED
@@ -46,7 +46,7 @@ def parse_ddl_to_schema(ddl: str, dialect: str = "spark") -> Dict[str, Dict[str,
46
46
  table_name = _get_qualified_name(target)
47
47
 
48
48
  if columns:
49
- schema[table_name] = {col: "UNKNOWN" for col in columns}
49
+ schema[table_name] = {col.lower(): "UNKNOWN" for col in columns}
50
50
 
51
51
  return schema
52
52
 
@@ -59,4 +59,4 @@ def _get_qualified_name(table: exp.Table) -> str:
59
59
  if table.db:
60
60
  parts.append(table.db)
61
61
  parts.append(table.name)
62
- return ".".join(parts)
62
+ return ".".join(parts).lower()