sql-code-graph 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sql-code-graph
3
- Version: 1.3.0
3
+ Version: 1.4.0
4
4
  Summary: SQL code graph analyzer and lineage tracer
5
5
  Project-URL: Homepage, https://github.com/Warhorze/sql-code-graph
6
6
  Project-URL: Repository, https://github.com/Warhorze/sql-code-graph
@@ -18,7 +18,7 @@ Classifier: Programming Language :: Python :: 3.13
18
18
  Classifier: Topic :: Database
19
19
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
20
20
  Requires-Python: >=3.12
21
- Requires-Dist: kuzu==0.11.3
21
+ Requires-Dist: duckdb<2.0,>=1.0.0
22
22
  Requires-Dist: mcp<2.0,>=1.27.0
23
23
  Requires-Dist: pathspec>=0.12.1
24
24
  Requires-Dist: pydantic>=2.0
@@ -30,8 +30,6 @@ Requires-Dist: typer>=0.9.0
30
30
  Requires-Dist: watchdog>=3.0.0
31
31
  Provides-Extra: dbt
32
32
  Requires-Dist: dbt-core>=1.7; extra == 'dbt'
33
- Provides-Extra: neo4j
34
- Requires-Dist: neo4j>=5.15.0; extra == 'neo4j'
35
33
  Provides-Extra: snowflake
36
34
  Requires-Dist: acryl-datahub<0.15.0,>=0.14.0; extra == 'snowflake'
37
35
  Description-Content-Type: text/markdown
@@ -1,36 +1,36 @@
1
- sqlcg/__init__.py,sha256=Zwww9eU1OV0KsLzksvKQE65bNgEFPPwTUb0XCSW4JIE,115
1
+ sqlcg/__init__.py,sha256=spIaM-bJwyRvB5fw8VYDKkrB5Lz7cKAgRuvBBgKYX3g,115
2
2
  sqlcg/__main__.py,sha256=1YoFLcqEgTwYq1J3TbUwpkdG0zeeLIf2fJvwWI-CLFU,109
3
3
  sqlcg/cli/__init__.py,sha256=W8fD0LpMq2xm_5WKGNMvJh2WBL1ho5E8hUeAqXQYT1g,28
4
4
  sqlcg/cli/main.py,sha256=WmdTjsOlz1ozi2Y3Aq4ezR_FCRl-Lc1YOKw3_d48dlY,1650
5
5
  sqlcg/cli/commands/__init__.py,sha256=oSHtr6VD-jNubOjuCQyZj2tBppjMEpQDh-IGQ8of9eA,30
6
- sqlcg/cli/commands/analyze.py,sha256=xr3RHmO4eFTP4VKZn4DAx3BJzSi60_DIZmdE-QLfsHI,13601
7
- sqlcg/cli/commands/db.py,sha256=paW096LE8fMpxPNvoW9zHmZ9xjb-dEbwVmfHR1bcg7U,8676
8
- sqlcg/cli/commands/find.py,sha256=zTYN9goILalYq4R9x6lIR6MmNcydDbR17UXkx1gPRsI,2913
9
- sqlcg/cli/commands/gain.py,sha256=Kws76u1na2XxmbWN_YWrPaYHYmYBLC6DDDf7xqnltqc,9126
6
+ sqlcg/cli/commands/analyze.py,sha256=YG5N1iYG-ierDgAJ36mFpQ4BMf2yJ4YF-UV7Rx-bmYA,11793
7
+ sqlcg/cli/commands/db.py,sha256=TMhPCHRWSo8YmlNnSxmGxrR0_1r3K4e0Bn8unZTkvU4,7248
8
+ sqlcg/cli/commands/find.py,sha256=p5Vyyx-VBk8YDWYQN16UhECh7PIeMaEyCPEcUFcRFlM,2598
9
+ sqlcg/cli/commands/gain.py,sha256=SJU1c51a7MgNbZItqQnaBfOWGnV5xpXQctbew5Dr9BE,9062
10
10
  sqlcg/cli/commands/git.py,sha256=9a8T2FVxcAHq1H6Cslaq34t10w9fBGf4T2reiLk33t8,6135
11
- sqlcg/cli/commands/index.py,sha256=1ElHRPkn-CPprIz869A__98aSyf-P5E56PVA0lq7xBw,17462
11
+ sqlcg/cli/commands/index.py,sha256=RPEpT9aXmK0rixRuHgMcmAn9FLR4JtIbZ1LM4RLkMvY,17050
12
12
  sqlcg/cli/commands/install.py,sha256=KNABvrLbamPyYnmnVdCaM_MNezbDc-pr6IkignCWI8k,9186
13
13
  sqlcg/cli/commands/mcp.py,sha256=QYaupf69lLpYzIoqsPJoCPiAggLVkYBzwpuOLRzxJDU,9140
14
- sqlcg/cli/commands/reindex.py,sha256=Ki5BHbI_nuM6ML0-w7qnVZqSAhELSpsFaUo6BVqzhRo,13812
14
+ sqlcg/cli/commands/reindex.py,sha256=FqXxvQ0UrxwDS0q2V3gzYgarN3NPh9muaIZ3rgvTmHs,13810
15
15
  sqlcg/cli/commands/report.py,sha256=JU0qjyMxwOukE7bN3XvvIzOI7zMg_Gsnvk_8F6pKNpA,4915
16
- sqlcg/cli/commands/uninstall.py,sha256=IYwQaqnMmmzW0Nlls40wD-L3tVkMgKIMRXUkcXPMUc4,9398
16
+ sqlcg/cli/commands/uninstall.py,sha256=WrA1FnINxnd6mmE4-_QBK0aHBnibstJeAT8swnQKG4M,8962
17
17
  sqlcg/cli/commands/watch.py,sha256=7N6c-QuvxAEGHzDZ0C3CU2BkHSraZW9YtgoFnz7SaQo,2373
18
- sqlcg/core/__init__.py,sha256=uNsJCrCMVWVT80sHPtI_f39BYqIf5N0i6LSq8x8HsyI,283
19
- sqlcg/core/config.py,sha256=qNR-yXkfYfS8Y8WX4Qo6Zkq8PPP_ZiTrvX0DLmEZkGY,14821
20
- sqlcg/core/freshness.py,sha256=gRb8pRPw5SdIUxAYkMXIJ00DTdQ6CegRZPAvWnv0rU0,4575
21
- sqlcg/core/graph_db.py,sha256=Aa85wPFg26H-Ud9SrZyxCHH-99iitAI5S3X9T_62Yyw,7957
18
+ sqlcg/core/__init__.py,sha256=dXvLWpbQ72f5CM6sKSvBDnEGqGuIZaN5MmHyD8Vf1aA,154
19
+ sqlcg/core/config.py,sha256=rMTYt1QsZVOyhT8W7oE65XQh5LKqdOeXCQ00sMFny4U,12375
20
+ sqlcg/core/duckdb_backend.py,sha256=nnOGbjmjHGXR_dlrzJQpSw5otLescCW1WtltZthoURQ,29522
21
+ sqlcg/core/freshness.py,sha256=F9jWn2cbFs60jA9ta8KrT6MghD0mzI7SqqKs_Op9AeU,4577
22
+ sqlcg/core/graph_db.py,sha256=eLYdmiqPWrZHtKLFcKgD2aiWZHFU3wrwz6Y0A3d9NcE,8633
22
23
  sqlcg/core/jobs.py,sha256=Je-fCdSKRgiSsv1W8SgNAlp36a7t7-pJZ-qKPbka9OE,3298
23
- sqlcg/core/kuzu_backend.py,sha256=PHW7VqI7oCLKsHnm4OoBoNnE2XT19ohxUpQMMIJnjlY,17038
24
- sqlcg/core/neo4j_backend.py,sha256=AM1TncP9GBGph-rSHwalZPmGUV2kFILzaJP-PSB0UYw,8437
25
24
  sqlcg/core/queries.cypher,sha256=cvPOVe5GUOzJN4bxUvDxNI--xIIP8gm42TR-gUnea4U,4685
26
- sqlcg/core/queries.py,sha256=gkl4bhkZM8FsvbSA-IaK17sRFcO3hB5YlVCemkCXgWM,2064
25
+ sqlcg/core/queries.py,sha256=jtZR6caLpF0WqqkjncYTlf5L1GN6DZweoM5dNOAB8OY,3043
26
+ sqlcg/core/queries.sql,sha256=75EHoHA5hKz-Xs8g-CCtDXbf3lYFEw_fxghjYU48gUM,7794
27
27
  sqlcg/core/schema.cypher,sha256=rK5QMhSrzZhuj73NeNXGX6oM-rPPPvxFjex0fEyUvkQ,2859
28
- sqlcg/core/schema.py,sha256=JO5rkspYKjL9AEl5mt0VIJKn-IPOH3kJV_fVmAMuFCI,1467
28
+ sqlcg/core/schema.py,sha256=7fKf314ueIV7-tIkQQUS4O6H-OhlFKFVFFvGFqw_5Xk,1476
29
29
  sqlcg/indexer/__init__.py,sha256=Wh20Unz2OHs1oIyWLrpurPAasF0BET2g4iXtNk7mh2U,56
30
30
  sqlcg/indexer/dbt_adapter.py,sha256=EB5x1WU5Z9d-I97ADDj88S_hG1C4z4nbrv8JUCzXfy8,686
31
31
  sqlcg/indexer/error_classify.py,sha256=MYjPVprwT-ARPjBCyCzu2F9DSrZfnTVtVIoBgm8s4H8,5329
32
32
  sqlcg/indexer/git_delta.py,sha256=P-QM4vnVURT2KLiE6u3cQynRUF-mTH13cbB4I20YHPQ,4468
33
- sqlcg/indexer/indexer.py,sha256=7TCgBLl3ml3mF8Z2q4YHZJ6HdxSuLH-rPJTibnUJUe4,66658
33
+ sqlcg/indexer/indexer.py,sha256=NCOWwXmE7e6m42sraFJpImmtWFss-V95RzEuI-eDzb0,66400
34
34
  sqlcg/indexer/pool.py,sha256=BTYx-pBe6zwUG89MHh0X7nzGNVlsHN-GjovYKanVI1s,18553
35
35
  sqlcg/indexer/walker.py,sha256=umNaqDbuerr75VYG1TEOv0ATsbI40O3SIw35f7XJcDE,1931
36
36
  sqlcg/indexer/watcher.py,sha256=mJQq1LASRLKKwhz0WhCUWPLLqyPR2_-FD_8efYU6gE8,8442
@@ -38,7 +38,7 @@ sqlcg/lineage/__init__.py,sha256=Da1DlYwtK13WHv_RnHjAtNkHTOuFbhxqCjT1Le7DsWM,46
38
38
  sqlcg/lineage/aggregator.py,sha256=LVyNcmvLBHWbh8SrDsJJBKd7sLg3-2NhEWwEndG7Jbc,4144
39
39
  sqlcg/lineage/schema_resolver.py,sha256=iXt6LYF6UVWsGUpcfbmjmGn9wCgXl721lTGf_8AaWcc,7320
40
40
  sqlcg/metrics/__init__.py,sha256=hLJ6wm4St8qqYwKh3o9QG7lcEt1BEYM31ccqO9tGpIg,133
41
- sqlcg/metrics/store.py,sha256=O1UoBu4dIZYIgNBqLWIyL3vLAnSgWrJinOgSLhQigHM,10596
41
+ sqlcg/metrics/store.py,sha256=KuDtxvyAgug9_KtiSCpvgKM2VZM7VSaI3D11uMLjJJk,10604
42
42
  sqlcg/parsers/__init__.py,sha256=AamA8wBbDZV9_zEtZCI4Hyen5UAVKHmBwjTghTt2PZE,785
43
43
  sqlcg/parsers/ansi_parser.py,sha256=mGZvijMOMQ4i1BybpwU29a8jnIGViefhy9fxzkSpsRM,17193
44
44
  sqlcg/parsers/base.py,sha256=IiOkVsm6jz9-48RqDCXiW-UXAraNxQ4pKXvSA7aolnA,49907
@@ -48,20 +48,20 @@ sqlcg/parsers/registry.py,sha256=LXy1F6rqQI6VdxpRvZg_tNpoEucW3mXZHYBMlMONbX4,149
48
48
  sqlcg/parsers/snowflake_parser.py,sha256=fovMyqfhWD2wmtEyiwTC0aoP4QWP-3XQZ8WYkXvs9hg,15511
49
49
  sqlcg/parsers/tsql_parser.py,sha256=RRj1pACtAk2tLTDaFWRYF67a0IDvaf5A1YQXWIz0bpQ,956
50
50
  sqlcg/server/__init__.py,sha256=n4wuNE7xyJIJxJZBtmtdccCMQfvTdF-IqIaZVbC4FC4,35
51
- sqlcg/server/control.py,sha256=v-r21npODiHlHnJHuo_6KWrKclQKq_E1QyrzIWjqgtY,4508
51
+ sqlcg/server/control.py,sha256=qUcztb_aDhL-_X_Nq4q6uGx17cUlbLnI6vUpoZsEjbo,4506
52
52
  sqlcg/server/exceptions.py,sha256=EONw34icOByCTpppSQrvQBW6asc4hfqaGDCAFjv96II,469
53
53
  sqlcg/server/models.py,sha256=l7ORy6sbtzBW1y3qVaeLwEukbyAgBkz9S5VIm2q4b24,19378
54
- sqlcg/server/noise_filter.py,sha256=idSBGgdKWWccJdpOo9qgbM2350Oew-2l5W6Yc9GYQqY,6337
55
- sqlcg/server/read_client.py,sha256=ncoJK7UckGhWtN9bv1CgViNMNtac96zBUE7RPYQ8_WI,7783
56
- sqlcg/server/server.py,sha256=9SilAu18cHTZUQvSo8S8e9CxSM6CUlG8rX2OnHBUh1Y,24178
57
- sqlcg/server/skill.py,sha256=GE8eeimk6yiGGJ74erGypqYAviur5peSR6_2a4QQWVM,12828
58
- sqlcg/server/tools.py,sha256=DTaDwZQmL6jzNF8vgJeNhVMGRoIrrpcxNiXOMpWvx-A,72401
59
- sqlcg/server/writer.py,sha256=gagS6EVG8A4OKpf0GAb--MUielnaiIULwVVik58pT6k,24693
54
+ sqlcg/server/noise_filter.py,sha256=Ats2FFzmzFKqvQPWvlUzK8mY9pzlUhL4m1s8P_HNnvI,6335
55
+ sqlcg/server/read_client.py,sha256=4v1OOl12HCHp6J14HrcWQApSixvJ0wwE8UIIZ0pzLyw,7808
56
+ sqlcg/server/server.py,sha256=A1soT-hYnAX78X5j97nIjw9oIZCxBejjV6p_tRDIK38,24335
57
+ sqlcg/server/skill.py,sha256=EDvmEgl-LQwCxZ9Lca4lt8zCtkk2cCgh9GLYlGgDA64,12828
58
+ sqlcg/server/tools.py,sha256=Rdl5SXAuePydk8Z12JVx-lNQeAqm3k9R-z0Q4DstCkw,69743
59
+ sqlcg/server/writer.py,sha256=HH8pW6k1c2pbY4b6g2N87Tx4gNCezYqTLNTlAmsHg2Y,18522
60
60
  sqlcg/utils/__init__.py,sha256=--iqt5ThTXmT8Wz7da8hs3n0zDfYPl8P-z5OgRJ_77E,154
61
61
  sqlcg/utils/hashing.py,sha256=H25-sYfxHKb3_IERFnHyAIYNiXN470Oqo5sJT_D3YOA,438
62
62
  sqlcg/utils/ignore.py,sha256=wJjwa0mjnQ_xJExOUxk25y00g065XmmzJapqV3ifD5o,1151
63
63
  sqlcg/utils/logging.py,sha256=u0fCmYsLj9o81vawm3xZTHaw68GQYVm7JxG-gP81u8A,840
64
- sql_code_graph-1.3.0.dist-info/METADATA,sha256=2QJqn9Q606zlPKDgeYrNzq0d1QW35jIRCxmvDCvvNIE,14148
65
- sql_code_graph-1.3.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
66
- sql_code_graph-1.3.0.dist-info/entry_points.txt,sha256=Wfe49sVzV9p4eVFGo5RxcV-frr3HOP0yzzst8JBxQLQ,46
67
- sql_code_graph-1.3.0.dist-info/RECORD,,
64
+ sql_code_graph-1.4.0.dist-info/METADATA,sha256=rjzA7ons9zPtU1rsKtlhdx3z3qJUrVHL7tYfdg1HcrE,14085
65
+ sql_code_graph-1.4.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
66
+ sql_code_graph-1.4.0.dist-info/entry_points.txt,sha256=Wfe49sVzV9p4eVFGo5RxcV-frr3HOP0yzzst8JBxQLQ,46
67
+ sql_code_graph-1.4.0.dist-info/RECORD,,
sqlcg/__init__.py CHANGED
@@ -1,5 +1,5 @@
1
1
  """SQL Code Graph - SQL lineage and dependency analysis tool."""
2
2
 
3
- __version__ = "1.3.0"
3
+ __version__ = "1.4.0"
4
4
 
5
5
  __all__ = ["__version__"]
@@ -9,7 +9,6 @@ from rich.console import Console
9
9
  from rich.table import Table
10
10
 
11
11
  from sqlcg.core.queries import GET_TABLE_EXTERNAL_CONSUMERS_QUERY
12
- from sqlcg.core.schema import NodeLabel, RelType
13
12
  from sqlcg.server.read_client import run_read_routed
14
13
 
15
14
  if TYPE_CHECKING:
@@ -19,37 +18,111 @@ app = typer.Typer(help="Lineage analysis")
19
18
  console = Console()
20
19
 
21
20
 
22
- def _kind_filter(node_alias: str, *, include_intermediate: bool) -> str:
23
- """Build the SqlTable kind-filter clause for upstream/downstream queries.
21
+ def _upstream_sql(depth: int, include_intermediate: bool) -> str:
22
+ """Build the upstream recursive-CTE SQL query.
24
23
 
25
- Uses an OPTIONAL MATCH + explicit ``WITH … WHERE t.kind IS NULL OR …`` form
26
- (Half B of the #38/#39 fix) so that a node-less physical source — one whose
27
- SqlTable node is absent because it was only seen inside a CTE body before
28
- re-index is KEPT, not dropped. CTE/derived intermediates carry
29
- ``kind='cte'`` or ``kind='derived'`` and are excluded by the
30
- ``kind IN [...]`` guard; ``IS NULL`` matches the absent-node case (t = NULL
31
- t.kind = NULL → IS NULL is TRUE).
32
-
33
- The ``WITH {node_alias}, t WHERE`` clause is required: an OPTIONAL MATCH
34
- WHERE clause in KuzuDB applies to the match attempt and does not filter the
35
- surrounding row. The subsequent WITH WHERE is the actual row filter.
36
-
37
- Args:
38
- node_alias: The Cypher alias for the column node whose table is filtered
39
- (``"src"`` for upstream, ``"dst"`` for downstream).
40
- include_intermediate: When True, return an empty string (no filtering);
41
- all intermediates including CTE nodes are kept.
42
-
43
- Returns:
44
- A Cypher fragment string (with trailing space) to embed directly in the
45
- query, or an empty string when include_intermediate is True.
24
+ Traverses COLUMN_LINEAGE edges from dst→src (upstream direction).
25
+ Applies kind-filter (LEFT JOIN SqlTable) to exclude CTE/derived intermediates
26
+ unless include_intermediate=True. The kind-guard mirrors the Cypher
27
+ OPTIONAL MATCH + WITH WHERE t.kind IS NULL OR t.kind IN ['table','external']
28
+ semantics (#38/#40/19.2).
29
+ """
30
+ kind_filter = (
31
+ ""
32
+ if include_intermediate
33
+ else (
34
+ ' LEFT JOIN "SqlTable" t ON t.qualified = dr.table_qualified\n'
35
+ " WHERE t.kind IS NULL OR t.kind IN ('table', 'external')\n"
36
+ )
37
+ )
38
+ return f"""
39
+ WITH RECURSIVE reach(id, table_qualified, depth, path) AS (
40
+ SELECT
41
+ cl.src_key AS id,
42
+ c_src.table_qualified,
43
+ 1 AS depth,
44
+ ARRAY[cl.dst_key, cl.src_key] AS path
45
+ FROM "COLUMN_LINEAGE" cl
46
+ JOIN "SqlColumn" c_src ON c_src.id = cl.src_key
47
+ WHERE cl.dst_key = ?
48
+ UNION ALL
49
+ SELECT
50
+ cl2.src_key,
51
+ c2.table_qualified,
52
+ reach.depth + 1,
53
+ array_append(reach.path, cl2.src_key)
54
+ FROM reach
55
+ JOIN "COLUMN_LINEAGE" cl2 ON cl2.dst_key = reach.id
56
+ JOIN "SqlColumn" c2 ON c2.id = cl2.src_key
57
+ WHERE reach.depth < {depth}
58
+ AND NOT cl2.src_key = ANY(reach.path)
59
+ ),
60
+ distinct_reach AS (
61
+ SELECT DISTINCT id, table_qualified FROM reach
62
+ )
63
+ SELECT
64
+ dr.id,
65
+ dr.table_qualified,
66
+ min(q.file_path) AS file,
67
+ min(q.start_line) AS line
68
+ FROM distinct_reach dr
69
+ LEFT JOIN "COLUMN_LINEAGE" src_edge ON src_edge.src_key = dr.id
70
+ LEFT JOIN "SqlQuery" q ON q.id = src_edge.query_id
71
+ {kind_filter}GROUP BY dr.id, dr.table_qualified
72
+ LIMIT 100
73
+ """
74
+
75
+
76
+ def _downstream_sql(depth: int, include_intermediate: bool) -> str:
77
+ """Build the downstream recursive-CTE SQL query.
78
+
79
+ Traverses COLUMN_LINEAGE edges from src→dst (downstream direction).
80
+ Kind-filter mirrors upstream: LEFT JOIN SqlTable + IS NULL OR 'table'/'external'.
46
81
  """
47
- if include_intermediate:
48
- return ""
49
- return (
50
- f"OPTIONAL MATCH (t:SqlTable {{qualified: {node_alias}.table_qualified}}) "
51
- f"WITH {node_alias}, t WHERE t.kind IS NULL OR t.kind IN ['table', 'external'] "
82
+ kind_filter = (
83
+ ""
84
+ if include_intermediate
85
+ else (
86
+ ' LEFT JOIN "SqlTable" t ON t.qualified = dr.table_qualified\n'
87
+ " WHERE t.kind IS NULL OR t.kind IN ('table', 'external')\n"
88
+ )
52
89
  )
90
+ return f"""
91
+ WITH RECURSIVE reach(id, table_qualified, depth, path) AS (
92
+ SELECT
93
+ cl.dst_key AS id,
94
+ c_dst.table_qualified,
95
+ 1 AS depth,
96
+ ARRAY[cl.src_key, cl.dst_key] AS path
97
+ FROM "COLUMN_LINEAGE" cl
98
+ JOIN "SqlColumn" c_dst ON c_dst.id = cl.dst_key
99
+ WHERE cl.src_key = ?
100
+ UNION ALL
101
+ SELECT
102
+ cl2.dst_key,
103
+ c2.table_qualified,
104
+ reach.depth + 1,
105
+ array_append(reach.path, cl2.dst_key)
106
+ FROM reach
107
+ JOIN "COLUMN_LINEAGE" cl2 ON cl2.src_key = reach.id
108
+ JOIN "SqlColumn" c2 ON c2.id = cl2.dst_key
109
+ WHERE reach.depth < {depth}
110
+ AND NOT cl2.dst_key = ANY(reach.path)
111
+ ),
112
+ distinct_reach AS (
113
+ SELECT DISTINCT id, table_qualified FROM reach
114
+ )
115
+ SELECT
116
+ dr.id,
117
+ dr.table_qualified,
118
+ min(q.file_path) AS file,
119
+ min(q.start_line) AS line
120
+ FROM distinct_reach dr
121
+ LEFT JOIN "COLUMN_LINEAGE" dst_edge ON dst_edge.dst_key = dr.id
122
+ LEFT JOIN "SqlQuery" q ON q.id = dst_edge.query_id
123
+ {kind_filter}GROUP BY dr.id, dr.table_qualified
124
+ LIMIT 100
125
+ """
53
126
 
54
127
 
55
128
  @app.command("upstream")
@@ -62,36 +135,15 @@ def upstream( # noqa: B008
62
135
  ),
63
136
  ) -> None:
64
137
  """Trace upstream column lineage."""
65
- # Bounds check for depth to prevent performance DoS
66
138
  if depth < 1 or depth > 100:
67
139
  console.print("[red]Error: --depth must be between 1 and 100[/red]")
68
140
  raise typer.Exit(1)
69
141
 
70
- # By default, filter out CTE/derived intermediate nodes; --include-intermediate restores them
71
- kf = _kind_filter("src", include_intermediate=include_intermediate)
72
-
73
- results = run_read_routed(
74
- f"MATCH (c:{NodeLabel.COLUMN} {{id: $ref}})"
75
- f"<-[:{RelType.COLUMN_LINEAGE}*1..{depth}]-(src:{NodeLabel.COLUMN}) "
76
- f"{kf}"
77
- f"OPTIONAL MATCH (src)-[srcedge:{RelType.COLUMN_LINEAGE}]->() "
78
- "OPTIONAL MATCH (q:SqlQuery {id: srcedge.query_id}) "
79
- "WITH src, min(q.start_line) AS line, min(q.file_path) AS file "
80
- "RETURN src.id AS id, file AS file, line AS line LIMIT 100",
81
- {"ref": ref},
82
- )
142
+ sql = _upstream_sql(depth, include_intermediate)
143
+ results = run_read_routed(sql, {"ref": ref})
83
144
  if not results and len(ref.split(".")) >= 3:
84
145
  bare = _bare_ref(ref)
85
- fallback_results = run_read_routed(
86
- f"MATCH (c:{NodeLabel.COLUMN} {{id: $bare}})"
87
- f"<-[:{RelType.COLUMN_LINEAGE}*1..{depth}]-(src:{NodeLabel.COLUMN}) "
88
- f"{kf}"
89
- f"OPTIONAL MATCH (src)-[srcedge:{RelType.COLUMN_LINEAGE}]->() "
90
- "OPTIONAL MATCH (q:SqlQuery {id: srcedge.query_id}) "
91
- "WITH src, min(q.start_line) AS line, min(q.file_path) AS file "
92
- "RETURN src.id AS id, file AS file, line AS line LIMIT 100",
93
- {"bare": bare},
94
- )
146
+ fallback_results = run_read_routed(sql, {"bare": bare})
95
147
  if fallback_results:
96
148
  console.print(
97
149
  f"[yellow]Hint:[/yellow] No results for '{ref}'. "
@@ -104,7 +156,7 @@ def upstream( # noqa: B008
104
156
  if not raw:
105
157
  from sqlcg.server.noise_filter import NoiseFilter
106
158
 
107
- nf = NoiseFilter.from_config() # repo_root=None → falls back to Path.cwd()
159
+ nf = NoiseFilter.from_config()
108
160
  results = _filter_column_results(results, nf)
109
161
  _print_table(_add_file_line_col(results), ["id", "file:line"])
110
162
 
@@ -119,36 +171,15 @@ def downstream( # noqa: B008
119
171
  ),
120
172
  ) -> None:
121
173
  """Trace downstream column lineage."""
122
- # Bounds check for depth to prevent performance DoS
123
174
  if depth < 1 or depth > 100:
124
175
  console.print("[red]Error: --depth must be between 1 and 100[/red]")
125
176
  raise typer.Exit(1)
126
177
 
127
- # By default, filter out CTE/derived intermediate nodes; --include-intermediate restores them
128
- kf = _kind_filter("dst", include_intermediate=include_intermediate)
129
-
130
- results = run_read_routed(
131
- f"MATCH (c:{NodeLabel.COLUMN} {{id: $ref}})"
132
- f"-[:{RelType.COLUMN_LINEAGE}*1..{depth}]->(dst:{NodeLabel.COLUMN}) "
133
- f"{kf}"
134
- f"OPTIONAL MATCH ()-[dstedge:{RelType.COLUMN_LINEAGE}]->(dst) "
135
- "OPTIONAL MATCH (q:SqlQuery {id: dstedge.query_id}) "
136
- "WITH dst, min(q.start_line) AS line, min(q.file_path) AS file "
137
- "RETURN dst.id AS id, file AS file, line AS line LIMIT 100",
138
- {"ref": ref},
139
- )
178
+ sql = _downstream_sql(depth, include_intermediate)
179
+ results = run_read_routed(sql, {"ref": ref})
140
180
  if not results and len(ref.split(".")) >= 3:
141
181
  bare = _bare_ref(ref)
142
- fallback_results = run_read_routed(
143
- f"MATCH (c:{NodeLabel.COLUMN} {{id: $bare}})"
144
- f"-[:{RelType.COLUMN_LINEAGE}*1..{depth}]->(dst:{NodeLabel.COLUMN}) "
145
- f"{kf}"
146
- f"OPTIONAL MATCH ()-[dstedge:{RelType.COLUMN_LINEAGE}]->(dst) "
147
- "OPTIONAL MATCH (q:SqlQuery {id: dstedge.query_id}) "
148
- "WITH dst, min(q.start_line) AS line, min(q.file_path) AS file "
149
- "RETURN dst.id AS id, file AS file, line AS line LIMIT 100",
150
- {"bare": bare},
151
- )
182
+ fallback_results = run_read_routed(sql, {"bare": bare})
152
183
  if fallback_results:
153
184
  console.print(
154
185
  f"[yellow]Hint:[/yellow] No results for '{ref}'. "
@@ -161,18 +192,16 @@ def downstream( # noqa: B008
161
192
  if not raw:
162
193
  from sqlcg.server.noise_filter import NoiseFilter
163
194
 
164
- nf = NoiseFilter.from_config() # repo_root=None → falls back to Path.cwd()
195
+ nf = NoiseFilter.from_config()
165
196
  results = _filter_column_results(results, nf)
166
197
  _print_table(_add_file_line_col(results), ["id", "file:line"])
167
198
 
168
199
  # Append external consumer rows for terminal tables (scalar query, one per terminal).
169
- # Resolve terminal tables from the column results; fall back to the root column's table.
170
200
  terminal_tables: set[str] = set()
171
201
  for r in results:
172
202
  tbl = _col_id_to_table(r["id"])
173
203
  if tbl:
174
204
  terminal_tables.add(tbl)
175
- # Also check the root column's table (in case no downstream columns were found).
176
205
  root_parts = ref.rsplit(".", 1)
177
206
  if len(root_parts) == 2:
178
207
  terminal_tables.add(root_parts[0])
@@ -197,9 +226,11 @@ def impact( # noqa: B008
197
226
  ) -> None:
198
227
  """Show all queries impacted by a table."""
199
228
  results = run_read_routed(
200
- f"MATCH (t:{NodeLabel.TABLE} {{qualified: $t}})"
201
- f"<-[:{RelType.SELECTS_FROM}]-(q:{NodeLabel.QUERY}) "
202
- "RETURN DISTINCT q.id AS id, q.kind AS kind, q.target_table AS target LIMIT 100",
229
+ "SELECT DISTINCT q.id AS id, q.kind AS kind, q.target_table AS target"
230
+ ' FROM "SqlTable" t'
231
+ ' JOIN "SELECTS_FROM" sf ON sf.dst_key = t.qualified'
232
+ ' JOIN "SqlQuery" q ON q.id = sf.src_key'
233
+ " WHERE t.qualified = ? LIMIT 100",
203
234
  {"t": table},
204
235
  )
205
236
  if not raw:
@@ -222,22 +253,20 @@ def failures(
222
253
  ),
223
254
  limit: int = typer.Option(100, "--limit", help="Maximum rows to return"), # noqa: B008
224
255
  ) -> None:
225
- """List files that failed to parse, with their dominant cause (E-code bucket).
226
-
227
- Valid --cause buckets (from highest to lowest severity):
228
- timeout, E8, E3, E2, E5, E1, qualify_failed, func_fallback, pure_ddl_skip.
229
-
230
- Requires a graph indexed with sqlcg >= v3 (schema version 3). Re-index
231
- with 'sqlcg db reset && sqlcg index <path>' if the graph was built with
232
- an earlier version.
233
- """
234
- cypher = (
235
- f"MATCH (f:{NodeLabel.FILE}) WHERE f.parse_failed = true "
236
- "AND ($cause IS NULL OR f.parse_cause = $cause) "
237
- "RETURN f.path AS path, f.parse_cause AS cause "
238
- f"ORDER BY f.parse_cause LIMIT {limit}"
239
- )
240
- rows = run_read_routed(cypher, {"cause": cause})
256
+ """List files that failed to parse, with their dominant cause (E-code bucket)."""
257
+ if cause is not None:
258
+ rows = run_read_routed(
259
+ 'SELECT path, parse_cause AS cause FROM "File"'
260
+ " WHERE parse_failed = true AND parse_cause = ?"
261
+ f" ORDER BY parse_cause LIMIT {limit}",
262
+ {"cause": cause},
263
+ )
264
+ else:
265
+ rows = run_read_routed(
266
+ 'SELECT path, parse_cause AS cause FROM "File"'
267
+ f" WHERE parse_failed = true ORDER BY parse_cause LIMIT {limit}",
268
+ {},
269
+ )
241
270
  _print_table(rows, ["path", "cause"])
242
271
 
243
272
 
@@ -248,8 +277,9 @@ def unused(
248
277
  ) -> None:
249
278
  """Find tables with no query references."""
250
279
  results = run_read_routed(
251
- f"MATCH (t:{NodeLabel.TABLE}) WHERE NOT (t)<-[:{RelType.SELECTS_FROM}]-() "
252
- "RETURN DISTINCT t.qualified AS qualified LIMIT 100",
280
+ 'SELECT DISTINCT qualified FROM "SqlTable"'
281
+ ' WHERE qualified NOT IN (SELECT DISTINCT dst_key FROM "SELECTS_FROM")'
282
+ " LIMIT 100",
253
283
  {},
254
284
  )
255
285
  if not raw:
@@ -261,30 +291,15 @@ def unused(
261
291
 
262
292
 
263
293
  def _bare_ref(ref: str) -> str:
264
- """Strip schema prefix from a ref string, keeping table.column.
265
-
266
- For a 3-part ref ("mart.fact_t.amount") this returns "fact_t.amount".
267
- For a 2-part ref ("fact_t.amount") this returns the ref unchanged.
268
- Never uses rsplit — that would yield only the column name for 3-part refs.
269
- """
294
+ """Strip schema prefix from a ref string, keeping table.column."""
270
295
  parts = ref.split(".")
271
296
  if len(parts) >= 3:
272
- return ".".join(parts[1:]) # drop schema, keep table.column
273
- return ref # already bare (no schema prefix)
297
+ return ".".join(parts[1:])
298
+ return ref
274
299
 
275
300
 
276
301
  def _col_id_to_table(col_id: str) -> str:
277
- """Extract the table-qualified part from a column ID (schema.table.col → schema.table).
278
-
279
- Column IDs follow the format: schema.table.column or table.column.
280
- The table part is everything except the last component.
281
-
282
- Args:
283
- col_id: A column ID string from the graph.
284
-
285
- Returns:
286
- The table-qualified portion (all but the last dotted component).
287
- """
302
+ """Extract the table-qualified part from a column ID."""
288
303
  parts = col_id.rsplit(".", 1)
289
304
  return parts[0] if len(parts) == 2 else col_id
290
305
 
@@ -293,16 +308,12 @@ def _filter_column_results(
293
308
  results: list[dict],
294
309
  nf: NoiseFilter, # type: ignore[name-defined]
295
310
  ) -> list[dict]:
296
- """Filter column-ID result rows by NoiseFilter, dropping rows whose table is noise."""
311
+ """Filter column-ID result rows by NoiseFilter."""
297
312
  return [r for r in results if not nf.is_noise(_col_id_to_table(r["id"]))]
298
313
 
299
314
 
300
315
  def _add_file_line_col(rows: list[dict]) -> list[dict]:
301
- """Add a 'file:line' composite column from 'file' and 'line' fields.
302
-
303
- Formats as 'path/to/file.sql:N' when both are present, or '?' when either
304
- is absent (multi-hop upstream where file/line is not available).
305
- """
316
+ """Add a 'file:line' composite column from 'file' and 'line' fields."""
306
317
  result = []
307
318
  for row in rows:
308
319
  new_row = dict(row)