sql-code-graph 1.3.0__py3-none-any.whl → 1.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sql-code-graph
3
- Version: 1.3.0
3
+ Version: 1.4.1
4
4
  Summary: SQL code graph analyzer and lineage tracer
5
5
  Project-URL: Homepage, https://github.com/Warhorze/sql-code-graph
6
6
  Project-URL: Repository, https://github.com/Warhorze/sql-code-graph
@@ -18,7 +18,7 @@ Classifier: Programming Language :: Python :: 3.13
18
18
  Classifier: Topic :: Database
19
19
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
20
20
  Requires-Python: >=3.12
21
- Requires-Dist: kuzu==0.11.3
21
+ Requires-Dist: duckdb<2.0,>=1.0.0
22
22
  Requires-Dist: mcp<2.0,>=1.27.0
23
23
  Requires-Dist: pathspec>=0.12.1
24
24
  Requires-Dist: pydantic>=2.0
@@ -30,8 +30,6 @@ Requires-Dist: typer>=0.9.0
30
30
  Requires-Dist: watchdog>=3.0.0
31
31
  Provides-Extra: dbt
32
32
  Requires-Dist: dbt-core>=1.7; extra == 'dbt'
33
- Provides-Extra: neo4j
34
- Requires-Dist: neo4j>=5.15.0; extra == 'neo4j'
35
33
  Provides-Extra: snowflake
36
34
  Requires-Dist: acryl-datahub<0.15.0,>=0.14.0; extra == 'snowflake'
37
35
  Description-Content-Type: text/markdown
@@ -1,36 +1,36 @@
1
- sqlcg/__init__.py,sha256=Zwww9eU1OV0KsLzksvKQE65bNgEFPPwTUb0XCSW4JIE,115
1
+ sqlcg/__init__.py,sha256=Q9RK3IHj_JNrfASKHyNaH76PD2u74uMssIwE5Mi7F7Q,115
2
2
  sqlcg/__main__.py,sha256=1YoFLcqEgTwYq1J3TbUwpkdG0zeeLIf2fJvwWI-CLFU,109
3
3
  sqlcg/cli/__init__.py,sha256=W8fD0LpMq2xm_5WKGNMvJh2WBL1ho5E8hUeAqXQYT1g,28
4
4
  sqlcg/cli/main.py,sha256=WmdTjsOlz1ozi2Y3Aq4ezR_FCRl-Lc1YOKw3_d48dlY,1650
5
5
  sqlcg/cli/commands/__init__.py,sha256=oSHtr6VD-jNubOjuCQyZj2tBppjMEpQDh-IGQ8of9eA,30
6
- sqlcg/cli/commands/analyze.py,sha256=xr3RHmO4eFTP4VKZn4DAx3BJzSi60_DIZmdE-QLfsHI,13601
7
- sqlcg/cli/commands/db.py,sha256=paW096LE8fMpxPNvoW9zHmZ9xjb-dEbwVmfHR1bcg7U,8676
8
- sqlcg/cli/commands/find.py,sha256=zTYN9goILalYq4R9x6lIR6MmNcydDbR17UXkx1gPRsI,2913
9
- sqlcg/cli/commands/gain.py,sha256=Kws76u1na2XxmbWN_YWrPaYHYmYBLC6DDDf7xqnltqc,9126
6
+ sqlcg/cli/commands/analyze.py,sha256=_aC5ML3w7YdLi7DL3TFS9OiCEIipuNZxWR6S4peTcn4,12154
7
+ sqlcg/cli/commands/db.py,sha256=TMhPCHRWSo8YmlNnSxmGxrR0_1r3K4e0Bn8unZTkvU4,7248
8
+ sqlcg/cli/commands/find.py,sha256=p5Vyyx-VBk8YDWYQN16UhECh7PIeMaEyCPEcUFcRFlM,2598
9
+ sqlcg/cli/commands/gain.py,sha256=SJU1c51a7MgNbZItqQnaBfOWGnV5xpXQctbew5Dr9BE,9062
10
10
  sqlcg/cli/commands/git.py,sha256=9a8T2FVxcAHq1H6Cslaq34t10w9fBGf4T2reiLk33t8,6135
11
- sqlcg/cli/commands/index.py,sha256=1ElHRPkn-CPprIz869A__98aSyf-P5E56PVA0lq7xBw,17462
11
+ sqlcg/cli/commands/index.py,sha256=RPEpT9aXmK0rixRuHgMcmAn9FLR4JtIbZ1LM4RLkMvY,17050
12
12
  sqlcg/cli/commands/install.py,sha256=KNABvrLbamPyYnmnVdCaM_MNezbDc-pr6IkignCWI8k,9186
13
13
  sqlcg/cli/commands/mcp.py,sha256=QYaupf69lLpYzIoqsPJoCPiAggLVkYBzwpuOLRzxJDU,9140
14
- sqlcg/cli/commands/reindex.py,sha256=Ki5BHbI_nuM6ML0-w7qnVZqSAhELSpsFaUo6BVqzhRo,13812
14
+ sqlcg/cli/commands/reindex.py,sha256=FqXxvQ0UrxwDS0q2V3gzYgarN3NPh9muaIZ3rgvTmHs,13810
15
15
  sqlcg/cli/commands/report.py,sha256=JU0qjyMxwOukE7bN3XvvIzOI7zMg_Gsnvk_8F6pKNpA,4915
16
- sqlcg/cli/commands/uninstall.py,sha256=IYwQaqnMmmzW0Nlls40wD-L3tVkMgKIMRXUkcXPMUc4,9398
16
+ sqlcg/cli/commands/uninstall.py,sha256=WrA1FnINxnd6mmE4-_QBK0aHBnibstJeAT8swnQKG4M,8962
17
17
  sqlcg/cli/commands/watch.py,sha256=7N6c-QuvxAEGHzDZ0C3CU2BkHSraZW9YtgoFnz7SaQo,2373
18
- sqlcg/core/__init__.py,sha256=uNsJCrCMVWVT80sHPtI_f39BYqIf5N0i6LSq8x8HsyI,283
19
- sqlcg/core/config.py,sha256=qNR-yXkfYfS8Y8WX4Qo6Zkq8PPP_ZiTrvX0DLmEZkGY,14821
20
- sqlcg/core/freshness.py,sha256=gRb8pRPw5SdIUxAYkMXIJ00DTdQ6CegRZPAvWnv0rU0,4575
21
- sqlcg/core/graph_db.py,sha256=Aa85wPFg26H-Ud9SrZyxCHH-99iitAI5S3X9T_62Yyw,7957
18
+ sqlcg/core/__init__.py,sha256=dXvLWpbQ72f5CM6sKSvBDnEGqGuIZaN5MmHyD8Vf1aA,154
19
+ sqlcg/core/config.py,sha256=rMTYt1QsZVOyhT8W7oE65XQh5LKqdOeXCQ00sMFny4U,12375
20
+ sqlcg/core/duckdb_backend.py,sha256=nnOGbjmjHGXR_dlrzJQpSw5otLescCW1WtltZthoURQ,29522
21
+ sqlcg/core/freshness.py,sha256=F9jWn2cbFs60jA9ta8KrT6MghD0mzI7SqqKs_Op9AeU,4577
22
+ sqlcg/core/graph_db.py,sha256=eLYdmiqPWrZHtKLFcKgD2aiWZHFU3wrwz6Y0A3d9NcE,8633
22
23
  sqlcg/core/jobs.py,sha256=Je-fCdSKRgiSsv1W8SgNAlp36a7t7-pJZ-qKPbka9OE,3298
23
- sqlcg/core/kuzu_backend.py,sha256=PHW7VqI7oCLKsHnm4OoBoNnE2XT19ohxUpQMMIJnjlY,17038
24
- sqlcg/core/neo4j_backend.py,sha256=AM1TncP9GBGph-rSHwalZPmGUV2kFILzaJP-PSB0UYw,8437
25
24
  sqlcg/core/queries.cypher,sha256=cvPOVe5GUOzJN4bxUvDxNI--xIIP8gm42TR-gUnea4U,4685
26
- sqlcg/core/queries.py,sha256=gkl4bhkZM8FsvbSA-IaK17sRFcO3hB5YlVCemkCXgWM,2064
25
+ sqlcg/core/queries.py,sha256=jtZR6caLpF0WqqkjncYTlf5L1GN6DZweoM5dNOAB8OY,3043
26
+ sqlcg/core/queries.sql,sha256=75EHoHA5hKz-Xs8g-CCtDXbf3lYFEw_fxghjYU48gUM,7794
27
27
  sqlcg/core/schema.cypher,sha256=rK5QMhSrzZhuj73NeNXGX6oM-rPPPvxFjex0fEyUvkQ,2859
28
- sqlcg/core/schema.py,sha256=JO5rkspYKjL9AEl5mt0VIJKn-IPOH3kJV_fVmAMuFCI,1467
28
+ sqlcg/core/schema.py,sha256=7fKf314ueIV7-tIkQQUS4O6H-OhlFKFVFFvGFqw_5Xk,1476
29
29
  sqlcg/indexer/__init__.py,sha256=Wh20Unz2OHs1oIyWLrpurPAasF0BET2g4iXtNk7mh2U,56
30
30
  sqlcg/indexer/dbt_adapter.py,sha256=EB5x1WU5Z9d-I97ADDj88S_hG1C4z4nbrv8JUCzXfy8,686
31
31
  sqlcg/indexer/error_classify.py,sha256=MYjPVprwT-ARPjBCyCzu2F9DSrZfnTVtVIoBgm8s4H8,5329
32
32
  sqlcg/indexer/git_delta.py,sha256=P-QM4vnVURT2KLiE6u3cQynRUF-mTH13cbB4I20YHPQ,4468
33
- sqlcg/indexer/indexer.py,sha256=7TCgBLl3ml3mF8Z2q4YHZJ6HdxSuLH-rPJTibnUJUe4,66658
33
+ sqlcg/indexer/indexer.py,sha256=NCOWwXmE7e6m42sraFJpImmtWFss-V95RzEuI-eDzb0,66400
34
34
  sqlcg/indexer/pool.py,sha256=BTYx-pBe6zwUG89MHh0X7nzGNVlsHN-GjovYKanVI1s,18553
35
35
  sqlcg/indexer/walker.py,sha256=umNaqDbuerr75VYG1TEOv0ATsbI40O3SIw35f7XJcDE,1931
36
36
  sqlcg/indexer/watcher.py,sha256=mJQq1LASRLKKwhz0WhCUWPLLqyPR2_-FD_8efYU6gE8,8442
@@ -38,30 +38,30 @@ sqlcg/lineage/__init__.py,sha256=Da1DlYwtK13WHv_RnHjAtNkHTOuFbhxqCjT1Le7DsWM,46
38
38
  sqlcg/lineage/aggregator.py,sha256=LVyNcmvLBHWbh8SrDsJJBKd7sLg3-2NhEWwEndG7Jbc,4144
39
39
  sqlcg/lineage/schema_resolver.py,sha256=iXt6LYF6UVWsGUpcfbmjmGn9wCgXl721lTGf_8AaWcc,7320
40
40
  sqlcg/metrics/__init__.py,sha256=hLJ6wm4St8qqYwKh3o9QG7lcEt1BEYM31ccqO9tGpIg,133
41
- sqlcg/metrics/store.py,sha256=O1UoBu4dIZYIgNBqLWIyL3vLAnSgWrJinOgSLhQigHM,10596
41
+ sqlcg/metrics/store.py,sha256=KuDtxvyAgug9_KtiSCpvgKM2VZM7VSaI3D11uMLjJJk,10604
42
42
  sqlcg/parsers/__init__.py,sha256=AamA8wBbDZV9_zEtZCI4Hyen5UAVKHmBwjTghTt2PZE,785
43
43
  sqlcg/parsers/ansi_parser.py,sha256=mGZvijMOMQ4i1BybpwU29a8jnIGViefhy9fxzkSpsRM,17193
44
- sqlcg/parsers/base.py,sha256=IiOkVsm6jz9-48RqDCXiW-UXAraNxQ4pKXvSA7aolnA,49907
44
+ sqlcg/parsers/base.py,sha256=N6uqQWHO2lpzTIWgPqzewAXmij0ikZdNmNVJkIQ8Mr0,54889
45
45
  sqlcg/parsers/bigquery_parser.py,sha256=mOnWTfXB_Dp4JwFE1PVYOB6CDPf5nYE0Dea8kJCl9uQ,2827
46
46
  sqlcg/parsers/postgres_parser.py,sha256=lYfUpQY6j4Qm7ndXBtXbgPoGzYqYddWt5YeFnWKdA6I,946
47
47
  sqlcg/parsers/registry.py,sha256=LXy1F6rqQI6VdxpRvZg_tNpoEucW3mXZHYBMlMONbX4,1496
48
48
  sqlcg/parsers/snowflake_parser.py,sha256=fovMyqfhWD2wmtEyiwTC0aoP4QWP-3XQZ8WYkXvs9hg,15511
49
49
  sqlcg/parsers/tsql_parser.py,sha256=RRj1pACtAk2tLTDaFWRYF67a0IDvaf5A1YQXWIz0bpQ,956
50
50
  sqlcg/server/__init__.py,sha256=n4wuNE7xyJIJxJZBtmtdccCMQfvTdF-IqIaZVbC4FC4,35
51
- sqlcg/server/control.py,sha256=v-r21npODiHlHnJHuo_6KWrKclQKq_E1QyrzIWjqgtY,4508
51
+ sqlcg/server/control.py,sha256=qUcztb_aDhL-_X_Nq4q6uGx17cUlbLnI6vUpoZsEjbo,4506
52
52
  sqlcg/server/exceptions.py,sha256=EONw34icOByCTpppSQrvQBW6asc4hfqaGDCAFjv96II,469
53
53
  sqlcg/server/models.py,sha256=l7ORy6sbtzBW1y3qVaeLwEukbyAgBkz9S5VIm2q4b24,19378
54
- sqlcg/server/noise_filter.py,sha256=idSBGgdKWWccJdpOo9qgbM2350Oew-2l5W6Yc9GYQqY,6337
55
- sqlcg/server/read_client.py,sha256=ncoJK7UckGhWtN9bv1CgViNMNtac96zBUE7RPYQ8_WI,7783
56
- sqlcg/server/server.py,sha256=9SilAu18cHTZUQvSo8S8e9CxSM6CUlG8rX2OnHBUh1Y,24178
57
- sqlcg/server/skill.py,sha256=GE8eeimk6yiGGJ74erGypqYAviur5peSR6_2a4QQWVM,12828
58
- sqlcg/server/tools.py,sha256=DTaDwZQmL6jzNF8vgJeNhVMGRoIrrpcxNiXOMpWvx-A,72401
59
- sqlcg/server/writer.py,sha256=gagS6EVG8A4OKpf0GAb--MUielnaiIULwVVik58pT6k,24693
54
+ sqlcg/server/noise_filter.py,sha256=Ats2FFzmzFKqvQPWvlUzK8mY9pzlUhL4m1s8P_HNnvI,6335
55
+ sqlcg/server/read_client.py,sha256=4v1OOl12HCHp6J14HrcWQApSixvJ0wwE8UIIZ0pzLyw,7808
56
+ sqlcg/server/server.py,sha256=A1soT-hYnAX78X5j97nIjw9oIZCxBejjV6p_tRDIK38,24335
57
+ sqlcg/server/skill.py,sha256=EDvmEgl-LQwCxZ9Lca4lt8zCtkk2cCgh9GLYlGgDA64,12828
58
+ sqlcg/server/tools.py,sha256=Rdl5SXAuePydk8Z12JVx-lNQeAqm3k9R-z0Q4DstCkw,69743
59
+ sqlcg/server/writer.py,sha256=HH8pW6k1c2pbY4b6g2N87Tx4gNCezYqTLNTlAmsHg2Y,18522
60
60
  sqlcg/utils/__init__.py,sha256=--iqt5ThTXmT8Wz7da8hs3n0zDfYPl8P-z5OgRJ_77E,154
61
61
  sqlcg/utils/hashing.py,sha256=H25-sYfxHKb3_IERFnHyAIYNiXN470Oqo5sJT_D3YOA,438
62
62
  sqlcg/utils/ignore.py,sha256=wJjwa0mjnQ_xJExOUxk25y00g065XmmzJapqV3ifD5o,1151
63
63
  sqlcg/utils/logging.py,sha256=u0fCmYsLj9o81vawm3xZTHaw68GQYVm7JxG-gP81u8A,840
64
- sql_code_graph-1.3.0.dist-info/METADATA,sha256=2QJqn9Q606zlPKDgeYrNzq0d1QW35jIRCxmvDCvvNIE,14148
65
- sql_code_graph-1.3.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
66
- sql_code_graph-1.3.0.dist-info/entry_points.txt,sha256=Wfe49sVzV9p4eVFGo5RxcV-frr3HOP0yzzst8JBxQLQ,46
67
- sql_code_graph-1.3.0.dist-info/RECORD,,
64
+ sql_code_graph-1.4.1.dist-info/METADATA,sha256=7psrSu1Vh3GiZ7PK7FED45QzazeUrHrltrRla4twAVQ,14085
65
+ sql_code_graph-1.4.1.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
66
+ sql_code_graph-1.4.1.dist-info/entry_points.txt,sha256=Wfe49sVzV9p4eVFGo5RxcV-frr3HOP0yzzst8JBxQLQ,46
67
+ sql_code_graph-1.4.1.dist-info/RECORD,,
sqlcg/__init__.py CHANGED
@@ -1,5 +1,5 @@
1
1
  """SQL Code Graph - SQL lineage and dependency analysis tool."""
2
2
 
3
- __version__ = "1.3.0"
3
+ __version__ = "1.4.1"
4
4
 
5
5
  __all__ = ["__version__"]
@@ -9,7 +9,6 @@ from rich.console import Console
9
9
  from rich.table import Table
10
10
 
11
11
  from sqlcg.core.queries import GET_TABLE_EXTERNAL_CONSUMERS_QUERY
12
- from sqlcg.core.schema import NodeLabel, RelType
13
12
  from sqlcg.server.read_client import run_read_routed
14
13
 
15
14
  if TYPE_CHECKING:
@@ -19,37 +18,111 @@ app = typer.Typer(help="Lineage analysis")
19
18
  console = Console()
20
19
 
21
20
 
22
- def _kind_filter(node_alias: str, *, include_intermediate: bool) -> str:
23
- """Build the SqlTable kind-filter clause for upstream/downstream queries.
21
+ def _upstream_sql(depth: int, include_intermediate: bool) -> str:
22
+ """Build the upstream recursive-CTE SQL query.
24
23
 
25
- Uses an OPTIONAL MATCH + explicit ``WITH … WHERE t.kind IS NULL OR …`` form
26
- (Half B of the #38/#39 fix) so that a node-less physical source — one whose
27
- SqlTable node is absent because it was only seen inside a CTE body before
28
- re-index is KEPT, not dropped. CTE/derived intermediates carry
29
- ``kind='cte'`` or ``kind='derived'`` and are excluded by the
30
- ``kind IN [...]`` guard; ``IS NULL`` matches the absent-node case (t = NULL
31
- t.kind = NULL → IS NULL is TRUE).
32
-
33
- The ``WITH {node_alias}, t WHERE`` clause is required: an OPTIONAL MATCH
34
- WHERE clause in KuzuDB applies to the match attempt and does not filter the
35
- surrounding row. The subsequent WITH WHERE is the actual row filter.
36
-
37
- Args:
38
- node_alias: The Cypher alias for the column node whose table is filtered
39
- (``"src"`` for upstream, ``"dst"`` for downstream).
40
- include_intermediate: When True, return an empty string (no filtering);
41
- all intermediates including CTE nodes are kept.
42
-
43
- Returns:
44
- A Cypher fragment string (with trailing space) to embed directly in the
45
- query, or an empty string when include_intermediate is True.
24
+ Traverses COLUMN_LINEAGE edges from dst→src (upstream direction).
25
+ Applies kind-filter (LEFT JOIN SqlTable) to exclude CTE/derived intermediates
26
+ unless include_intermediate=True. The kind-guard mirrors the Cypher
27
+ OPTIONAL MATCH + WITH WHERE t.kind IS NULL OR t.kind IN ['table','external']
28
+ semantics (#38/#40/19.2).
29
+ """
30
+ kind_filter = (
31
+ ""
32
+ if include_intermediate
33
+ else (
34
+ ' LEFT JOIN "SqlTable" t ON t.qualified = dr.table_qualified\n'
35
+ " WHERE t.kind IS NULL OR t.kind IN ('table', 'external')\n"
36
+ )
37
+ )
38
+ return f"""
39
+ WITH RECURSIVE reach(id, table_qualified, depth, path) AS (
40
+ SELECT
41
+ cl.src_key AS id,
42
+ c_src.table_qualified,
43
+ 1 AS depth,
44
+ ARRAY[cl.dst_key, cl.src_key] AS path
45
+ FROM "COLUMN_LINEAGE" cl
46
+ JOIN "SqlColumn" c_src ON c_src.id = cl.src_key
47
+ WHERE cl.dst_key = ?
48
+ UNION ALL
49
+ SELECT
50
+ cl2.src_key,
51
+ c2.table_qualified,
52
+ reach.depth + 1,
53
+ array_append(reach.path, cl2.src_key)
54
+ FROM reach
55
+ JOIN "COLUMN_LINEAGE" cl2 ON cl2.dst_key = reach.id
56
+ JOIN "SqlColumn" c2 ON c2.id = cl2.src_key
57
+ WHERE reach.depth < {depth}
58
+ AND NOT cl2.src_key = ANY(reach.path)
59
+ ),
60
+ distinct_reach AS (
61
+ SELECT DISTINCT id, table_qualified FROM reach
62
+ )
63
+ SELECT
64
+ dr.id,
65
+ dr.table_qualified,
66
+ min(q.file_path) AS file,
67
+ min(q.start_line) AS line
68
+ FROM distinct_reach dr
69
+ LEFT JOIN "COLUMN_LINEAGE" src_edge ON src_edge.src_key = dr.id
70
+ LEFT JOIN "SqlQuery" q ON q.id = src_edge.query_id
71
+ {kind_filter}GROUP BY dr.id, dr.table_qualified
72
+ LIMIT 100
73
+ """
74
+
75
+
76
+ def _downstream_sql(depth: int, include_intermediate: bool) -> str:
77
+ """Build the downstream recursive-CTE SQL query.
78
+
79
+ Traverses COLUMN_LINEAGE edges from src→dst (downstream direction).
80
+ Kind-filter mirrors upstream: LEFT JOIN SqlTable + IS NULL OR 'table'/'external'.
46
81
  """
47
- if include_intermediate:
48
- return ""
49
- return (
50
- f"OPTIONAL MATCH (t:SqlTable {{qualified: {node_alias}.table_qualified}}) "
51
- f"WITH {node_alias}, t WHERE t.kind IS NULL OR t.kind IN ['table', 'external'] "
82
+ kind_filter = (
83
+ ""
84
+ if include_intermediate
85
+ else (
86
+ ' LEFT JOIN "SqlTable" t ON t.qualified = dr.table_qualified\n'
87
+ " WHERE t.kind IS NULL OR t.kind IN ('table', 'external')\n"
88
+ )
52
89
  )
90
+ return f"""
91
+ WITH RECURSIVE reach(id, table_qualified, depth, path) AS (
92
+ SELECT
93
+ cl.dst_key AS id,
94
+ c_dst.table_qualified,
95
+ 1 AS depth,
96
+ ARRAY[cl.src_key, cl.dst_key] AS path
97
+ FROM "COLUMN_LINEAGE" cl
98
+ JOIN "SqlColumn" c_dst ON c_dst.id = cl.dst_key
99
+ WHERE cl.src_key = ?
100
+ UNION ALL
101
+ SELECT
102
+ cl2.dst_key,
103
+ c2.table_qualified,
104
+ reach.depth + 1,
105
+ array_append(reach.path, cl2.dst_key)
106
+ FROM reach
107
+ JOIN "COLUMN_LINEAGE" cl2 ON cl2.src_key = reach.id
108
+ JOIN "SqlColumn" c2 ON c2.id = cl2.dst_key
109
+ WHERE reach.depth < {depth}
110
+ AND NOT cl2.dst_key = ANY(reach.path)
111
+ ),
112
+ distinct_reach AS (
113
+ SELECT DISTINCT id, table_qualified FROM reach
114
+ )
115
+ SELECT
116
+ dr.id,
117
+ dr.table_qualified,
118
+ min(q.file_path) AS file,
119
+ min(q.start_line) AS line
120
+ FROM distinct_reach dr
121
+ LEFT JOIN "COLUMN_LINEAGE" dst_edge ON dst_edge.dst_key = dr.id
122
+ LEFT JOIN "SqlQuery" q ON q.id = dst_edge.query_id
123
+ {kind_filter}GROUP BY dr.id, dr.table_qualified
124
+ LIMIT 100
125
+ """
53
126
 
54
127
 
55
128
  @app.command("upstream")
@@ -62,36 +135,16 @@ def upstream( # noqa: B008
62
135
  ),
63
136
  ) -> None:
64
137
  """Trace upstream column lineage."""
65
- # Bounds check for depth to prevent performance DoS
66
138
  if depth < 1 or depth > 100:
67
139
  console.print("[red]Error: --depth must be between 1 and 100[/red]")
68
140
  raise typer.Exit(1)
69
141
 
70
- # By default, filter out CTE/derived intermediate nodes; --include-intermediate restores them
71
- kf = _kind_filter("src", include_intermediate=include_intermediate)
72
-
73
- results = run_read_routed(
74
- f"MATCH (c:{NodeLabel.COLUMN} {{id: $ref}})"
75
- f"<-[:{RelType.COLUMN_LINEAGE}*1..{depth}]-(src:{NodeLabel.COLUMN}) "
76
- f"{kf}"
77
- f"OPTIONAL MATCH (src)-[srcedge:{RelType.COLUMN_LINEAGE}]->() "
78
- "OPTIONAL MATCH (q:SqlQuery {id: srcedge.query_id}) "
79
- "WITH src, min(q.start_line) AS line, min(q.file_path) AS file "
80
- "RETURN src.id AS id, file AS file, line AS line LIMIT 100",
81
- {"ref": ref},
82
- )
142
+ ref = ref.lower() # graph keys are lowercased at index time (C2 normalization)
143
+ sql = _upstream_sql(depth, include_intermediate)
144
+ results = run_read_routed(sql, {"ref": ref})
83
145
  if not results and len(ref.split(".")) >= 3:
84
146
  bare = _bare_ref(ref)
85
- fallback_results = run_read_routed(
86
- f"MATCH (c:{NodeLabel.COLUMN} {{id: $bare}})"
87
- f"<-[:{RelType.COLUMN_LINEAGE}*1..{depth}]-(src:{NodeLabel.COLUMN}) "
88
- f"{kf}"
89
- f"OPTIONAL MATCH (src)-[srcedge:{RelType.COLUMN_LINEAGE}]->() "
90
- "OPTIONAL MATCH (q:SqlQuery {id: srcedge.query_id}) "
91
- "WITH src, min(q.start_line) AS line, min(q.file_path) AS file "
92
- "RETURN src.id AS id, file AS file, line AS line LIMIT 100",
93
- {"bare": bare},
94
- )
147
+ fallback_results = run_read_routed(sql, {"bare": bare})
95
148
  if fallback_results:
96
149
  console.print(
97
150
  f"[yellow]Hint:[/yellow] No results for '{ref}'. "
@@ -104,7 +157,7 @@ def upstream( # noqa: B008
104
157
  if not raw:
105
158
  from sqlcg.server.noise_filter import NoiseFilter
106
159
 
107
- nf = NoiseFilter.from_config() # repo_root=None → falls back to Path.cwd()
160
+ nf = NoiseFilter.from_config()
108
161
  results = _filter_column_results(results, nf)
109
162
  _print_table(_add_file_line_col(results), ["id", "file:line"])
110
163
 
@@ -119,36 +172,16 @@ def downstream( # noqa: B008
119
172
  ),
120
173
  ) -> None:
121
174
  """Trace downstream column lineage."""
122
- # Bounds check for depth to prevent performance DoS
123
175
  if depth < 1 or depth > 100:
124
176
  console.print("[red]Error: --depth must be between 1 and 100[/red]")
125
177
  raise typer.Exit(1)
126
178
 
127
- # By default, filter out CTE/derived intermediate nodes; --include-intermediate restores them
128
- kf = _kind_filter("dst", include_intermediate=include_intermediate)
129
-
130
- results = run_read_routed(
131
- f"MATCH (c:{NodeLabel.COLUMN} {{id: $ref}})"
132
- f"-[:{RelType.COLUMN_LINEAGE}*1..{depth}]->(dst:{NodeLabel.COLUMN}) "
133
- f"{kf}"
134
- f"OPTIONAL MATCH ()-[dstedge:{RelType.COLUMN_LINEAGE}]->(dst) "
135
- "OPTIONAL MATCH (q:SqlQuery {id: dstedge.query_id}) "
136
- "WITH dst, min(q.start_line) AS line, min(q.file_path) AS file "
137
- "RETURN dst.id AS id, file AS file, line AS line LIMIT 100",
138
- {"ref": ref},
139
- )
179
+ ref = ref.lower() # graph keys are lowercased at index time (C2 normalization)
180
+ sql = _downstream_sql(depth, include_intermediate)
181
+ results = run_read_routed(sql, {"ref": ref})
140
182
  if not results and len(ref.split(".")) >= 3:
141
183
  bare = _bare_ref(ref)
142
- fallback_results = run_read_routed(
143
- f"MATCH (c:{NodeLabel.COLUMN} {{id: $bare}})"
144
- f"-[:{RelType.COLUMN_LINEAGE}*1..{depth}]->(dst:{NodeLabel.COLUMN}) "
145
- f"{kf}"
146
- f"OPTIONAL MATCH ()-[dstedge:{RelType.COLUMN_LINEAGE}]->(dst) "
147
- "OPTIONAL MATCH (q:SqlQuery {id: dstedge.query_id}) "
148
- "WITH dst, min(q.start_line) AS line, min(q.file_path) AS file "
149
- "RETURN dst.id AS id, file AS file, line AS line LIMIT 100",
150
- {"bare": bare},
151
- )
184
+ fallback_results = run_read_routed(sql, {"bare": bare})
152
185
  if fallback_results:
153
186
  console.print(
154
187
  f"[yellow]Hint:[/yellow] No results for '{ref}'. "
@@ -161,18 +194,16 @@ def downstream( # noqa: B008
161
194
  if not raw:
162
195
  from sqlcg.server.noise_filter import NoiseFilter
163
196
 
164
- nf = NoiseFilter.from_config() # repo_root=None → falls back to Path.cwd()
197
+ nf = NoiseFilter.from_config()
165
198
  results = _filter_column_results(results, nf)
166
199
  _print_table(_add_file_line_col(results), ["id", "file:line"])
167
200
 
168
201
  # Append external consumer rows for terminal tables (scalar query, one per terminal).
169
- # Resolve terminal tables from the column results; fall back to the root column's table.
170
202
  terminal_tables: set[str] = set()
171
203
  for r in results:
172
204
  tbl = _col_id_to_table(r["id"])
173
205
  if tbl:
174
206
  terminal_tables.add(tbl)
175
- # Also check the root column's table (in case no downstream columns were found).
176
207
  root_parts = ref.rsplit(".", 1)
177
208
  if len(root_parts) == 2:
178
209
  terminal_tables.add(root_parts[0])
@@ -197,9 +228,11 @@ def impact( # noqa: B008
197
228
  ) -> None:
198
229
  """Show all queries impacted by a table."""
199
230
  results = run_read_routed(
200
- f"MATCH (t:{NodeLabel.TABLE} {{qualified: $t}})"
201
- f"<-[:{RelType.SELECTS_FROM}]-(q:{NodeLabel.QUERY}) "
202
- "RETURN DISTINCT q.id AS id, q.kind AS kind, q.target_table AS target LIMIT 100",
231
+ "SELECT DISTINCT q.id AS id, q.kind AS kind, q.target_table AS target"
232
+ ' FROM "SqlTable" t'
233
+ ' JOIN "SELECTS_FROM" sf ON sf.dst_key = t.qualified'
234
+ ' JOIN "SqlQuery" q ON q.id = sf.src_key'
235
+ " WHERE t.qualified = ? LIMIT 100",
203
236
  {"t": table},
204
237
  )
205
238
  if not raw:
@@ -222,22 +255,20 @@ def failures(
222
255
  ),
223
256
  limit: int = typer.Option(100, "--limit", help="Maximum rows to return"), # noqa: B008
224
257
  ) -> None:
225
- """List files that failed to parse, with their dominant cause (E-code bucket).
226
-
227
- Valid --cause buckets (from highest to lowest severity):
228
- timeout, E8, E3, E2, E5, E1, qualify_failed, func_fallback, pure_ddl_skip.
229
-
230
- Requires a graph indexed with sqlcg >= v3 (schema version 3). Re-index
231
- with 'sqlcg db reset && sqlcg index <path>' if the graph was built with
232
- an earlier version.
233
- """
234
- cypher = (
235
- f"MATCH (f:{NodeLabel.FILE}) WHERE f.parse_failed = true "
236
- "AND ($cause IS NULL OR f.parse_cause = $cause) "
237
- "RETURN f.path AS path, f.parse_cause AS cause "
238
- f"ORDER BY f.parse_cause LIMIT {limit}"
239
- )
240
- rows = run_read_routed(cypher, {"cause": cause})
258
+ """List files that failed to parse, with their dominant cause (E-code bucket)."""
259
+ if cause is not None:
260
+ rows = run_read_routed(
261
+ 'SELECT path, parse_cause AS cause FROM "File"'
262
+ " WHERE parse_failed = true AND parse_cause = ?"
263
+ f" ORDER BY parse_cause LIMIT {limit}",
264
+ {"cause": cause},
265
+ )
266
+ else:
267
+ rows = run_read_routed(
268
+ 'SELECT path, parse_cause AS cause FROM "File"'
269
+ f" WHERE parse_failed = true ORDER BY parse_cause LIMIT {limit}",
270
+ {},
271
+ )
241
272
  _print_table(rows, ["path", "cause"])
242
273
 
243
274
 
@@ -248,8 +279,9 @@ def unused(
248
279
  ) -> None:
249
280
  """Find tables with no query references."""
250
281
  results = run_read_routed(
251
- f"MATCH (t:{NodeLabel.TABLE}) WHERE NOT (t)<-[:{RelType.SELECTS_FROM}]-() "
252
- "RETURN DISTINCT t.qualified AS qualified LIMIT 100",
282
+ 'SELECT DISTINCT qualified FROM "SqlTable"'
283
+ ' WHERE qualified NOT IN (SELECT DISTINCT dst_key FROM "SELECTS_FROM")'
284
+ " LIMIT 100",
253
285
  {},
254
286
  )
255
287
  if not raw:
@@ -263,28 +295,18 @@ def unused(
263
295
  def _bare_ref(ref: str) -> str:
264
296
  """Strip schema prefix from a ref string, keeping table.column.
265
297
 
266
- For a 3-part ref ("mart.fact_t.amount") this returns "fact_t.amount".
267
- For a 2-part ref ("fact_t.amount") this returns the ref unchanged.
268
- Never uses rsplit — that would yield only the column name for 3-part refs.
298
+ Lowercases defensively so this is safe to call even if the caller did not
299
+ first fold the ref graph keys are lowercased at index time (C2 normalization).
269
300
  """
301
+ ref = ref.lower()
270
302
  parts = ref.split(".")
271
303
  if len(parts) >= 3:
272
- return ".".join(parts[1:]) # drop schema, keep table.column
273
- return ref # already bare (no schema prefix)
304
+ return ".".join(parts[1:])
305
+ return ref
274
306
 
275
307
 
276
308
  def _col_id_to_table(col_id: str) -> str:
277
- """Extract the table-qualified part from a column ID (schema.table.col → schema.table).
278
-
279
- Column IDs follow the format: schema.table.column or table.column.
280
- The table part is everything except the last component.
281
-
282
- Args:
283
- col_id: A column ID string from the graph.
284
-
285
- Returns:
286
- The table-qualified portion (all but the last dotted component).
287
- """
309
+ """Extract the table-qualified part from a column ID."""
288
310
  parts = col_id.rsplit(".", 1)
289
311
  return parts[0] if len(parts) == 2 else col_id
290
312
 
@@ -293,16 +315,12 @@ def _filter_column_results(
293
315
  results: list[dict],
294
316
  nf: NoiseFilter, # type: ignore[name-defined]
295
317
  ) -> list[dict]:
296
- """Filter column-ID result rows by NoiseFilter, dropping rows whose table is noise."""
318
+ """Filter column-ID result rows by NoiseFilter."""
297
319
  return [r for r in results if not nf.is_noise(_col_id_to_table(r["id"]))]
298
320
 
299
321
 
300
322
  def _add_file_line_col(rows: list[dict]) -> list[dict]:
301
- """Add a 'file:line' composite column from 'file' and 'line' fields.
302
-
303
- Formats as 'path/to/file.sql:N' when both are present, or '?' when either
304
- is absent (multi-hop upstream where file/line is not available).
305
- """
323
+ """Add a 'file:line' composite column from 'file' and 'line' fields."""
306
324
  result = []
307
325
  for row in rows:
308
326
  new_row = dict(row)