sql-code-graph 1.4.0__py3-none-any.whl → 1.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sql_code_graph-1.4.0.dist-info → sql_code_graph-1.4.1.dist-info}/METADATA +1 -1
- {sql_code_graph-1.4.0.dist-info → sql_code_graph-1.4.1.dist-info}/RECORD +7 -7
- sqlcg/__init__.py +1 -1
- sqlcg/cli/commands/analyze.py +8 -1
- sqlcg/parsers/base.py +82 -0
- {sql_code_graph-1.4.0.dist-info → sql_code_graph-1.4.1.dist-info}/WHEEL +0 -0
- {sql_code_graph-1.4.0.dist-info → sql_code_graph-1.4.1.dist-info}/entry_points.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sql-code-graph
|
|
3
|
-
Version: 1.4.
|
|
3
|
+
Version: 1.4.1
|
|
4
4
|
Summary: SQL code graph analyzer and lineage tracer
|
|
5
5
|
Project-URL: Homepage, https://github.com/Warhorze/sql-code-graph
|
|
6
6
|
Project-URL: Repository, https://github.com/Warhorze/sql-code-graph
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
sqlcg/__init__.py,sha256=
|
|
1
|
+
sqlcg/__init__.py,sha256=Q9RK3IHj_JNrfASKHyNaH76PD2u74uMssIwE5Mi7F7Q,115
|
|
2
2
|
sqlcg/__main__.py,sha256=1YoFLcqEgTwYq1J3TbUwpkdG0zeeLIf2fJvwWI-CLFU,109
|
|
3
3
|
sqlcg/cli/__init__.py,sha256=W8fD0LpMq2xm_5WKGNMvJh2WBL1ho5E8hUeAqXQYT1g,28
|
|
4
4
|
sqlcg/cli/main.py,sha256=WmdTjsOlz1ozi2Y3Aq4ezR_FCRl-Lc1YOKw3_d48dlY,1650
|
|
5
5
|
sqlcg/cli/commands/__init__.py,sha256=oSHtr6VD-jNubOjuCQyZj2tBppjMEpQDh-IGQ8of9eA,30
|
|
6
|
-
sqlcg/cli/commands/analyze.py,sha256=
|
|
6
|
+
sqlcg/cli/commands/analyze.py,sha256=_aC5ML3w7YdLi7DL3TFS9OiCEIipuNZxWR6S4peTcn4,12154
|
|
7
7
|
sqlcg/cli/commands/db.py,sha256=TMhPCHRWSo8YmlNnSxmGxrR0_1r3K4e0Bn8unZTkvU4,7248
|
|
8
8
|
sqlcg/cli/commands/find.py,sha256=p5Vyyx-VBk8YDWYQN16UhECh7PIeMaEyCPEcUFcRFlM,2598
|
|
9
9
|
sqlcg/cli/commands/gain.py,sha256=SJU1c51a7MgNbZItqQnaBfOWGnV5xpXQctbew5Dr9BE,9062
|
|
@@ -41,7 +41,7 @@ sqlcg/metrics/__init__.py,sha256=hLJ6wm4St8qqYwKh3o9QG7lcEt1BEYM31ccqO9tGpIg,133
|
|
|
41
41
|
sqlcg/metrics/store.py,sha256=KuDtxvyAgug9_KtiSCpvgKM2VZM7VSaI3D11uMLjJJk,10604
|
|
42
42
|
sqlcg/parsers/__init__.py,sha256=AamA8wBbDZV9_zEtZCI4Hyen5UAVKHmBwjTghTt2PZE,785
|
|
43
43
|
sqlcg/parsers/ansi_parser.py,sha256=mGZvijMOMQ4i1BybpwU29a8jnIGViefhy9fxzkSpsRM,17193
|
|
44
|
-
sqlcg/parsers/base.py,sha256=
|
|
44
|
+
sqlcg/parsers/base.py,sha256=N6uqQWHO2lpzTIWgPqzewAXmij0ikZdNmNVJkIQ8Mr0,54889
|
|
45
45
|
sqlcg/parsers/bigquery_parser.py,sha256=mOnWTfXB_Dp4JwFE1PVYOB6CDPf5nYE0Dea8kJCl9uQ,2827
|
|
46
46
|
sqlcg/parsers/postgres_parser.py,sha256=lYfUpQY6j4Qm7ndXBtXbgPoGzYqYddWt5YeFnWKdA6I,946
|
|
47
47
|
sqlcg/parsers/registry.py,sha256=LXy1F6rqQI6VdxpRvZg_tNpoEucW3mXZHYBMlMONbX4,1496
|
|
@@ -61,7 +61,7 @@ sqlcg/utils/__init__.py,sha256=--iqt5ThTXmT8Wz7da8hs3n0zDfYPl8P-z5OgRJ_77E,154
|
|
|
61
61
|
sqlcg/utils/hashing.py,sha256=H25-sYfxHKb3_IERFnHyAIYNiXN470Oqo5sJT_D3YOA,438
|
|
62
62
|
sqlcg/utils/ignore.py,sha256=wJjwa0mjnQ_xJExOUxk25y00g065XmmzJapqV3ifD5o,1151
|
|
63
63
|
sqlcg/utils/logging.py,sha256=u0fCmYsLj9o81vawm3xZTHaw68GQYVm7JxG-gP81u8A,840
|
|
64
|
-
sql_code_graph-1.4.
|
|
65
|
-
sql_code_graph-1.4.
|
|
66
|
-
sql_code_graph-1.4.
|
|
67
|
-
sql_code_graph-1.4.
|
|
64
|
+
sql_code_graph-1.4.1.dist-info/METADATA,sha256=7psrSu1Vh3GiZ7PK7FED45QzazeUrHrltrRla4twAVQ,14085
|
|
65
|
+
sql_code_graph-1.4.1.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
66
|
+
sql_code_graph-1.4.1.dist-info/entry_points.txt,sha256=Wfe49sVzV9p4eVFGo5RxcV-frr3HOP0yzzst8JBxQLQ,46
|
|
67
|
+
sql_code_graph-1.4.1.dist-info/RECORD,,
|
sqlcg/__init__.py
CHANGED
sqlcg/cli/commands/analyze.py
CHANGED
|
@@ -139,6 +139,7 @@ def upstream( # noqa: B008
|
|
|
139
139
|
console.print("[red]Error: --depth must be between 1 and 100[/red]")
|
|
140
140
|
raise typer.Exit(1)
|
|
141
141
|
|
|
142
|
+
ref = ref.lower() # graph keys are lowercased at index time (C2 normalization)
|
|
142
143
|
sql = _upstream_sql(depth, include_intermediate)
|
|
143
144
|
results = run_read_routed(sql, {"ref": ref})
|
|
144
145
|
if not results and len(ref.split(".")) >= 3:
|
|
@@ -175,6 +176,7 @@ def downstream( # noqa: B008
|
|
|
175
176
|
console.print("[red]Error: --depth must be between 1 and 100[/red]")
|
|
176
177
|
raise typer.Exit(1)
|
|
177
178
|
|
|
179
|
+
ref = ref.lower() # graph keys are lowercased at index time (C2 normalization)
|
|
178
180
|
sql = _downstream_sql(depth, include_intermediate)
|
|
179
181
|
results = run_read_routed(sql, {"ref": ref})
|
|
180
182
|
if not results and len(ref.split(".")) >= 3:
|
|
@@ -291,7 +293,12 @@ def unused(
|
|
|
291
293
|
|
|
292
294
|
|
|
293
295
|
def _bare_ref(ref: str) -> str:
|
|
294
|
-
"""Strip schema prefix from a ref string, keeping table.column.
|
|
296
|
+
"""Strip schema prefix from a ref string, keeping table.column.
|
|
297
|
+
|
|
298
|
+
Lowercases defensively so this is safe to call even if the caller did not
|
|
299
|
+
first fold the ref — graph keys are lowercased at index time (C2 normalization).
|
|
300
|
+
"""
|
|
301
|
+
ref = ref.lower()
|
|
295
302
|
parts = ref.split(".")
|
|
296
303
|
if len(parts) >= 3:
|
|
297
304
|
return ".".join(parts[1:])
|
sqlcg/parsers/base.py
CHANGED
|
@@ -515,6 +515,32 @@ class SqlParser(ABC):
|
|
|
515
515
|
_walk(root)
|
|
516
516
|
return edges
|
|
517
517
|
|
|
518
|
+
def _table_node_to_ref(self, table_node: Any) -> "TableRef | None":
|
|
519
|
+
"""Convert a sqlglot exp.Table AST node to a TableRef.
|
|
520
|
+
|
|
521
|
+
Used by the #49 mis-bind override to enumerate candidate source tables
|
|
522
|
+
from a CTE body's FROM/JOIN once per CTE body (before the per-projection
|
|
523
|
+
loop). Does NOT call qualify/build_scope — preserves the O(1)-per-body
|
|
524
|
+
perf invariant.
|
|
525
|
+
|
|
526
|
+
Schema aliases are applied via _apply_table_alias so the emitted edges
|
|
527
|
+
carry the canonical (post-alias) table identity.
|
|
528
|
+
|
|
529
|
+
Args:
|
|
530
|
+
table_node: sqlglot.expressions.Table AST node
|
|
531
|
+
|
|
532
|
+
Returns:
|
|
533
|
+
TableRef with catalog/db/name extracted and alias-resolved, or None
|
|
534
|
+
if the alias resolution returns None (treated as an unresolvable ref).
|
|
535
|
+
"""
|
|
536
|
+
return self._apply_table_alias(
|
|
537
|
+
TableRef(
|
|
538
|
+
catalog=table_node.catalog if table_node.catalog else None,
|
|
539
|
+
db=table_node.db if table_node.db else None,
|
|
540
|
+
name=table_node.name if table_node.name else str(table_node),
|
|
541
|
+
)
|
|
542
|
+
)
|
|
543
|
+
|
|
518
544
|
def _lineage_node_to_table_ref(self, node: Any) -> "TableRef | None":
|
|
519
545
|
"""Extract a TableRef from a sqlglot LineageNode's source attribute.
|
|
520
546
|
|
|
@@ -987,6 +1013,20 @@ class SqlParser(ABC):
|
|
|
987
1013
|
# string for every column rather than re-serializing O(N_cols) times.
|
|
988
1014
|
cte_body_sql = cte_body.sql(dialect=self.DIALECT)
|
|
989
1015
|
|
|
1016
|
+
# Compute the candidate source-table set ONCE per CTE body
|
|
1017
|
+
# (before the per-projection loop) — never inside it.
|
|
1018
|
+
# Uses find_all(exp.Table) on the already-built AST; does NOT
|
|
1019
|
+
# call qualify/build_scope, preserving O(1) per CTE body.
|
|
1020
|
+
# This set is reused across all projections to detect ambiguity
|
|
1021
|
+
# (#49 mis-bind override).
|
|
1022
|
+
cte_source_tables: list[TableRef] = [
|
|
1023
|
+
ref
|
|
1024
|
+
for t in cte_body.find_all(exp.Table)
|
|
1025
|
+
if t.name # skip anonymous / subquery placeholders
|
|
1026
|
+
for ref in (self._table_node_to_ref(t),)
|
|
1027
|
+
if ref is not None
|
|
1028
|
+
]
|
|
1029
|
+
|
|
990
1030
|
# For each projection in the CTE, extract lineage.
|
|
991
1031
|
# Only iterate projections from left branch for column names, but pass
|
|
992
1032
|
# entire Union body to sg_lineage so sqlglot resolves both branches.
|
|
@@ -1010,6 +1050,48 @@ class SqlParser(ABC):
|
|
|
1010
1050
|
)
|
|
1011
1051
|
if not cte_col_name or cte_col_name == "*":
|
|
1012
1052
|
continue
|
|
1053
|
+
|
|
1054
|
+
# #49 mis-bind override: detect bare (unqualified) columns
|
|
1055
|
+
# in a ≥2-table CTE body.
|
|
1056
|
+
#
|
|
1057
|
+
# sqlglot's sg_lineage (called with no schema and no scope)
|
|
1058
|
+
# re-qualifies the CTE body internally and mis-binds bare
|
|
1059
|
+
# columns to the last-joined table — emitting a confident
|
|
1060
|
+
# WRONG edge (confirmed live: bare `m` from fact_daily was
|
|
1061
|
+
# bound to dim_time). This is not a missing edge; it is an
|
|
1062
|
+
# incorrect one.
|
|
1063
|
+
#
|
|
1064
|
+
# Override: when any bare column appears inside the projection
|
|
1065
|
+
# expression AND the CTE body has ≥2 source tables, skip
|
|
1066
|
+
# sg_lineage for this projection and instead emit one
|
|
1067
|
+
# CTE_PROJECTION_AMBIGUOUS edge per candidate source table
|
|
1068
|
+
# (confidence=0.5). Over-attribution is the safe failure mode
|
|
1069
|
+
# for impact analysis; a wrong single edge is not acceptable.
|
|
1070
|
+
#
|
|
1071
|
+
# Single-table bodies: no ambiguity; existing path unchanged.
|
|
1072
|
+
# Qualified columns in any body: no bare columns; existing path.
|
|
1073
|
+
bare_cols_in_expr = [
|
|
1074
|
+
c
|
|
1075
|
+
for c in cte_col_expr.find_all(exp.Column)
|
|
1076
|
+
if not c.table # bare = no qualifier
|
|
1077
|
+
]
|
|
1078
|
+
if bare_cols_in_expr and len(cte_source_tables) >= 2:
|
|
1079
|
+
# Emit one ambiguous edge per candidate source table.
|
|
1080
|
+
# bare_col.name is the column name to attribute;
|
|
1081
|
+
# for aggregates/CASE the bare col name is the leaf.
|
|
1082
|
+
bare_col_name = bare_cols_in_expr[0].name or cte_col_name
|
|
1083
|
+
dst_col_ref = ColumnRef(cte_dst_table, cte_col_name)
|
|
1084
|
+
for src_tbl in cte_source_tables:
|
|
1085
|
+
edges.append(
|
|
1086
|
+
LineageEdge(
|
|
1087
|
+
src=ColumnRef(src_tbl, bare_col_name),
|
|
1088
|
+
dst=dst_col_ref,
|
|
1089
|
+
transform="CTE_PROJECTION_AMBIGUOUS",
|
|
1090
|
+
confidence=0.5,
|
|
1091
|
+
)
|
|
1092
|
+
)
|
|
1093
|
+
continue # skip sg_lineage for this projection
|
|
1094
|
+
|
|
1013
1095
|
try:
|
|
1014
1096
|
# No schema: resolver.as_dict() {table:[cols]} triggers
|
|
1015
1097
|
# sqlglot nesting-level errors on fresh string parses.
|
|
File without changes
|
|
File without changes
|