sql-code-graph 1.4.0__py3-none-any.whl → 1.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sql-code-graph
3
- Version: 1.4.0
3
+ Version: 1.4.1
4
4
  Summary: SQL code graph analyzer and lineage tracer
5
5
  Project-URL: Homepage, https://github.com/Warhorze/sql-code-graph
6
6
  Project-URL: Repository, https://github.com/Warhorze/sql-code-graph
@@ -1,9 +1,9 @@
1
- sqlcg/__init__.py,sha256=spIaM-bJwyRvB5fw8VYDKkrB5Lz7cKAgRuvBBgKYX3g,115
1
+ sqlcg/__init__.py,sha256=Q9RK3IHj_JNrfASKHyNaH76PD2u74uMssIwE5Mi7F7Q,115
2
2
  sqlcg/__main__.py,sha256=1YoFLcqEgTwYq1J3TbUwpkdG0zeeLIf2fJvwWI-CLFU,109
3
3
  sqlcg/cli/__init__.py,sha256=W8fD0LpMq2xm_5WKGNMvJh2WBL1ho5E8hUeAqXQYT1g,28
4
4
  sqlcg/cli/main.py,sha256=WmdTjsOlz1ozi2Y3Aq4ezR_FCRl-Lc1YOKw3_d48dlY,1650
5
5
  sqlcg/cli/commands/__init__.py,sha256=oSHtr6VD-jNubOjuCQyZj2tBppjMEpQDh-IGQ8of9eA,30
6
- sqlcg/cli/commands/analyze.py,sha256=YG5N1iYG-ierDgAJ36mFpQ4BMf2yJ4YF-UV7Rx-bmYA,11793
6
+ sqlcg/cli/commands/analyze.py,sha256=_aC5ML3w7YdLi7DL3TFS9OiCEIipuNZxWR6S4peTcn4,12154
7
7
  sqlcg/cli/commands/db.py,sha256=TMhPCHRWSo8YmlNnSxmGxrR0_1r3K4e0Bn8unZTkvU4,7248
8
8
  sqlcg/cli/commands/find.py,sha256=p5Vyyx-VBk8YDWYQN16UhECh7PIeMaEyCPEcUFcRFlM,2598
9
9
  sqlcg/cli/commands/gain.py,sha256=SJU1c51a7MgNbZItqQnaBfOWGnV5xpXQctbew5Dr9BE,9062
@@ -41,7 +41,7 @@ sqlcg/metrics/__init__.py,sha256=hLJ6wm4St8qqYwKh3o9QG7lcEt1BEYM31ccqO9tGpIg,133
41
41
  sqlcg/metrics/store.py,sha256=KuDtxvyAgug9_KtiSCpvgKM2VZM7VSaI3D11uMLjJJk,10604
42
42
  sqlcg/parsers/__init__.py,sha256=AamA8wBbDZV9_zEtZCI4Hyen5UAVKHmBwjTghTt2PZE,785
43
43
  sqlcg/parsers/ansi_parser.py,sha256=mGZvijMOMQ4i1BybpwU29a8jnIGViefhy9fxzkSpsRM,17193
44
- sqlcg/parsers/base.py,sha256=IiOkVsm6jz9-48RqDCXiW-UXAraNxQ4pKXvSA7aolnA,49907
44
+ sqlcg/parsers/base.py,sha256=N6uqQWHO2lpzTIWgPqzewAXmij0ikZdNmNVJkIQ8Mr0,54889
45
45
  sqlcg/parsers/bigquery_parser.py,sha256=mOnWTfXB_Dp4JwFE1PVYOB6CDPf5nYE0Dea8kJCl9uQ,2827
46
46
  sqlcg/parsers/postgres_parser.py,sha256=lYfUpQY6j4Qm7ndXBtXbgPoGzYqYddWt5YeFnWKdA6I,946
47
47
  sqlcg/parsers/registry.py,sha256=LXy1F6rqQI6VdxpRvZg_tNpoEucW3mXZHYBMlMONbX4,1496
@@ -61,7 +61,7 @@ sqlcg/utils/__init__.py,sha256=--iqt5ThTXmT8Wz7da8hs3n0zDfYPl8P-z5OgRJ_77E,154
61
61
  sqlcg/utils/hashing.py,sha256=H25-sYfxHKb3_IERFnHyAIYNiXN470Oqo5sJT_D3YOA,438
62
62
  sqlcg/utils/ignore.py,sha256=wJjwa0mjnQ_xJExOUxk25y00g065XmmzJapqV3ifD5o,1151
63
63
  sqlcg/utils/logging.py,sha256=u0fCmYsLj9o81vawm3xZTHaw68GQYVm7JxG-gP81u8A,840
64
- sql_code_graph-1.4.0.dist-info/METADATA,sha256=rjzA7ons9zPtU1rsKtlhdx3z3qJUrVHL7tYfdg1HcrE,14085
65
- sql_code_graph-1.4.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
66
- sql_code_graph-1.4.0.dist-info/entry_points.txt,sha256=Wfe49sVzV9p4eVFGo5RxcV-frr3HOP0yzzst8JBxQLQ,46
67
- sql_code_graph-1.4.0.dist-info/RECORD,,
64
+ sql_code_graph-1.4.1.dist-info/METADATA,sha256=7psrSu1Vh3GiZ7PK7FED45QzazeUrHrltrRla4twAVQ,14085
65
+ sql_code_graph-1.4.1.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
66
+ sql_code_graph-1.4.1.dist-info/entry_points.txt,sha256=Wfe49sVzV9p4eVFGo5RxcV-frr3HOP0yzzst8JBxQLQ,46
67
+ sql_code_graph-1.4.1.dist-info/RECORD,,
sqlcg/__init__.py CHANGED
@@ -1,5 +1,5 @@
1
1
  """SQL Code Graph - SQL lineage and dependency analysis tool."""
2
2
 
3
- __version__ = "1.4.0"
3
+ __version__ = "1.4.1"
4
4
 
5
5
  __all__ = ["__version__"]
@@ -139,6 +139,7 @@ def upstream( # noqa: B008
139
139
  console.print("[red]Error: --depth must be between 1 and 100[/red]")
140
140
  raise typer.Exit(1)
141
141
 
142
+ ref = ref.lower() # graph keys are lowercased at index time (C2 normalization)
142
143
  sql = _upstream_sql(depth, include_intermediate)
143
144
  results = run_read_routed(sql, {"ref": ref})
144
145
  if not results and len(ref.split(".")) >= 3:
@@ -175,6 +176,7 @@ def downstream( # noqa: B008
175
176
  console.print("[red]Error: --depth must be between 1 and 100[/red]")
176
177
  raise typer.Exit(1)
177
178
 
179
+ ref = ref.lower() # graph keys are lowercased at index time (C2 normalization)
178
180
  sql = _downstream_sql(depth, include_intermediate)
179
181
  results = run_read_routed(sql, {"ref": ref})
180
182
  if not results and len(ref.split(".")) >= 3:
@@ -291,7 +293,12 @@ def unused(
291
293
 
292
294
 
293
295
  def _bare_ref(ref: str) -> str:
294
- """Strip schema prefix from a ref string, keeping table.column."""
296
+ """Strip schema prefix from a ref string, keeping table.column.
297
+
298
+ Lowercases defensively so this is safe to call even if the caller did not
299
+ first fold the ref — graph keys are lowercased at index time (C2 normalization).
300
+ """
301
+ ref = ref.lower()
295
302
  parts = ref.split(".")
296
303
  if len(parts) >= 3:
297
304
  return ".".join(parts[1:])
sqlcg/parsers/base.py CHANGED
@@ -515,6 +515,32 @@ class SqlParser(ABC):
515
515
  _walk(root)
516
516
  return edges
517
517
 
518
+ def _table_node_to_ref(self, table_node: Any) -> "TableRef | None":
519
+ """Convert a sqlglot exp.Table AST node to a TableRef.
520
+
521
+ Used by the #49 mis-bind override to enumerate candidate source tables
522
+ from a CTE body's FROM/JOIN once per CTE body (before the per-projection
523
+ loop). Does NOT call qualify/build_scope — preserves the O(1)-per-body
524
+ perf invariant.
525
+
526
+ Schema aliases are applied via _apply_table_alias so the emitted edges
527
+ carry the canonical (post-alias) table identity.
528
+
529
+ Args:
530
+ table_node: sqlglot.expressions.Table AST node
531
+
532
+ Returns:
533
+ TableRef with catalog/db/name extracted and alias-resolved, or None
534
+ if the alias resolution returns None (treated as an unresolvable ref).
535
+ """
536
+ return self._apply_table_alias(
537
+ TableRef(
538
+ catalog=table_node.catalog if table_node.catalog else None,
539
+ db=table_node.db if table_node.db else None,
540
+ name=table_node.name if table_node.name else str(table_node),
541
+ )
542
+ )
543
+
518
544
  def _lineage_node_to_table_ref(self, node: Any) -> "TableRef | None":
519
545
  """Extract a TableRef from a sqlglot LineageNode's source attribute.
520
546
 
@@ -987,6 +1013,20 @@ class SqlParser(ABC):
987
1013
  # string for every column rather than re-serializing O(N_cols) times.
988
1014
  cte_body_sql = cte_body.sql(dialect=self.DIALECT)
989
1015
 
1016
+ # Compute the candidate source-table set ONCE per CTE body
1017
+ # (before the per-projection loop) — never inside it.
1018
+ # Uses find_all(exp.Table) on the already-built AST; does NOT
1019
+ # call qualify/build_scope, preserving O(1) per CTE body.
1020
+ # This set is reused across all projections to detect ambiguity
1021
+ # (#49 mis-bind override).
1022
+ cte_source_tables: list[TableRef] = [
1023
+ ref
1024
+ for t in cte_body.find_all(exp.Table)
1025
+ if t.name # skip anonymous / subquery placeholders
1026
+ for ref in (self._table_node_to_ref(t),)
1027
+ if ref is not None
1028
+ ]
1029
+
990
1030
  # For each projection in the CTE, extract lineage.
991
1031
  # Only iterate projections from left branch for column names, but pass
992
1032
  # entire Union body to sg_lineage so sqlglot resolves both branches.
@@ -1010,6 +1050,48 @@ class SqlParser(ABC):
1010
1050
  )
1011
1051
  if not cte_col_name or cte_col_name == "*":
1012
1052
  continue
1053
+
1054
+ # #49 mis-bind override: detect bare (unqualified) columns
1055
+ # in a ≥2-table CTE body.
1056
+ #
1057
+ # sqlglot's sg_lineage (called with no schema and no scope)
1058
+ # re-qualifies the CTE body internally and mis-binds bare
1059
+ # columns to the last-joined table — emitting a confident
1060
+ # WRONG edge (confirmed live: bare `m` from fact_daily was
1061
+ # bound to dim_time). This is not a missing edge; it is an
1062
+ # incorrect one.
1063
+ #
1064
+ # Override: when any bare column appears inside the projection
1065
+ # expression AND the CTE body has ≥2 source tables, skip
1066
+ # sg_lineage for this projection and instead emit one
1067
+ # CTE_PROJECTION_AMBIGUOUS edge per candidate source table
1068
+ # (confidence=0.5). Over-attribution is the safe failure mode
1069
+ # for impact analysis; a wrong single edge is not acceptable.
1070
+ #
1071
+ # Single-table bodies: no ambiguity; existing path unchanged.
1072
+ # Qualified columns in any body: no bare columns; existing path.
1073
+ bare_cols_in_expr = [
1074
+ c
1075
+ for c in cte_col_expr.find_all(exp.Column)
1076
+ if not c.table # bare = no qualifier
1077
+ ]
1078
+ if bare_cols_in_expr and len(cte_source_tables) >= 2:
1079
+ # Emit one ambiguous edge per candidate source table.
1080
+ # bare_col.name is the column name to attribute;
1081
+ # for aggregates/CASE the bare col name is the leaf.
1082
+ bare_col_name = bare_cols_in_expr[0].name or cte_col_name
1083
+ dst_col_ref = ColumnRef(cte_dst_table, cte_col_name)
1084
+ for src_tbl in cte_source_tables:
1085
+ edges.append(
1086
+ LineageEdge(
1087
+ src=ColumnRef(src_tbl, bare_col_name),
1088
+ dst=dst_col_ref,
1089
+ transform="CTE_PROJECTION_AMBIGUOUS",
1090
+ confidence=0.5,
1091
+ )
1092
+ )
1093
+ continue # skip sg_lineage for this projection
1094
+
1013
1095
  try:
1014
1096
  # No schema: resolver.as_dict() {table:[cols]} triggers
1015
1097
  # sqlglot nesting-level errors on fresh string parses.