sql-code-graph 1.3.0__py3-none-any.whl → 1.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sql_code_graph-1.3.0.dist-info → sql_code_graph-1.4.1.dist-info}/METADATA +2 -4
- {sql_code_graph-1.3.0.dist-info → sql_code_graph-1.4.1.dist-info}/RECORD +30 -30
- sqlcg/__init__.py +1 -1
- sqlcg/cli/commands/analyze.py +142 -124
- sqlcg/cli/commands/db.py +31 -56
- sqlcg/cli/commands/find.py +5 -9
- sqlcg/cli/commands/gain.py +14 -16
- sqlcg/cli/commands/index.py +6 -17
- sqlcg/cli/commands/reindex.py +1 -1
- sqlcg/cli/commands/uninstall.py +9 -20
- sqlcg/core/__init__.py +1 -3
- sqlcg/core/config.py +25 -81
- sqlcg/core/duckdb_backend.py +764 -0
- sqlcg/core/freshness.py +1 -1
- sqlcg/core/graph_db.py +20 -4
- sqlcg/core/queries.py +26 -7
- sqlcg/core/queries.sql +249 -0
- sqlcg/core/schema.py +1 -1
- sqlcg/indexer/indexer.py +27 -36
- sqlcg/metrics/store.py +1 -1
- sqlcg/parsers/base.py +82 -0
- sqlcg/server/control.py +1 -1
- sqlcg/server/noise_filter.py +1 -1
- sqlcg/server/read_client.py +2 -2
- sqlcg/server/server.py +26 -23
- sqlcg/server/skill.py +2 -2
- sqlcg/server/tools.py +43 -106
- sqlcg/server/writer.py +43 -218
- sqlcg/core/kuzu_backend.py +0 -449
- sqlcg/core/neo4j_backend.py +0 -233
- {sql_code_graph-1.3.0.dist-info → sql_code_graph-1.4.1.dist-info}/WHEEL +0 -0
- {sql_code_graph-1.3.0.dist-info → sql_code_graph-1.4.1.dist-info}/entry_points.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sql-code-graph
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.4.1
|
|
4
4
|
Summary: SQL code graph analyzer and lineage tracer
|
|
5
5
|
Project-URL: Homepage, https://github.com/Warhorze/sql-code-graph
|
|
6
6
|
Project-URL: Repository, https://github.com/Warhorze/sql-code-graph
|
|
@@ -18,7 +18,7 @@ Classifier: Programming Language :: Python :: 3.13
|
|
|
18
18
|
Classifier: Topic :: Database
|
|
19
19
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
20
20
|
Requires-Python: >=3.12
|
|
21
|
-
Requires-Dist:
|
|
21
|
+
Requires-Dist: duckdb<2.0,>=1.0.0
|
|
22
22
|
Requires-Dist: mcp<2.0,>=1.27.0
|
|
23
23
|
Requires-Dist: pathspec>=0.12.1
|
|
24
24
|
Requires-Dist: pydantic>=2.0
|
|
@@ -30,8 +30,6 @@ Requires-Dist: typer>=0.9.0
|
|
|
30
30
|
Requires-Dist: watchdog>=3.0.0
|
|
31
31
|
Provides-Extra: dbt
|
|
32
32
|
Requires-Dist: dbt-core>=1.7; extra == 'dbt'
|
|
33
|
-
Provides-Extra: neo4j
|
|
34
|
-
Requires-Dist: neo4j>=5.15.0; extra == 'neo4j'
|
|
35
33
|
Provides-Extra: snowflake
|
|
36
34
|
Requires-Dist: acryl-datahub<0.15.0,>=0.14.0; extra == 'snowflake'
|
|
37
35
|
Description-Content-Type: text/markdown
|
|
@@ -1,36 +1,36 @@
|
|
|
1
|
-
sqlcg/__init__.py,sha256=
|
|
1
|
+
sqlcg/__init__.py,sha256=Q9RK3IHj_JNrfASKHyNaH76PD2u74uMssIwE5Mi7F7Q,115
|
|
2
2
|
sqlcg/__main__.py,sha256=1YoFLcqEgTwYq1J3TbUwpkdG0zeeLIf2fJvwWI-CLFU,109
|
|
3
3
|
sqlcg/cli/__init__.py,sha256=W8fD0LpMq2xm_5WKGNMvJh2WBL1ho5E8hUeAqXQYT1g,28
|
|
4
4
|
sqlcg/cli/main.py,sha256=WmdTjsOlz1ozi2Y3Aq4ezR_FCRl-Lc1YOKw3_d48dlY,1650
|
|
5
5
|
sqlcg/cli/commands/__init__.py,sha256=oSHtr6VD-jNubOjuCQyZj2tBppjMEpQDh-IGQ8of9eA,30
|
|
6
|
-
sqlcg/cli/commands/analyze.py,sha256=
|
|
7
|
-
sqlcg/cli/commands/db.py,sha256=
|
|
8
|
-
sqlcg/cli/commands/find.py,sha256=
|
|
9
|
-
sqlcg/cli/commands/gain.py,sha256=
|
|
6
|
+
sqlcg/cli/commands/analyze.py,sha256=_aC5ML3w7YdLi7DL3TFS9OiCEIipuNZxWR6S4peTcn4,12154
|
|
7
|
+
sqlcg/cli/commands/db.py,sha256=TMhPCHRWSo8YmlNnSxmGxrR0_1r3K4e0Bn8unZTkvU4,7248
|
|
8
|
+
sqlcg/cli/commands/find.py,sha256=p5Vyyx-VBk8YDWYQN16UhECh7PIeMaEyCPEcUFcRFlM,2598
|
|
9
|
+
sqlcg/cli/commands/gain.py,sha256=SJU1c51a7MgNbZItqQnaBfOWGnV5xpXQctbew5Dr9BE,9062
|
|
10
10
|
sqlcg/cli/commands/git.py,sha256=9a8T2FVxcAHq1H6Cslaq34t10w9fBGf4T2reiLk33t8,6135
|
|
11
|
-
sqlcg/cli/commands/index.py,sha256=
|
|
11
|
+
sqlcg/cli/commands/index.py,sha256=RPEpT9aXmK0rixRuHgMcmAn9FLR4JtIbZ1LM4RLkMvY,17050
|
|
12
12
|
sqlcg/cli/commands/install.py,sha256=KNABvrLbamPyYnmnVdCaM_MNezbDc-pr6IkignCWI8k,9186
|
|
13
13
|
sqlcg/cli/commands/mcp.py,sha256=QYaupf69lLpYzIoqsPJoCPiAggLVkYBzwpuOLRzxJDU,9140
|
|
14
|
-
sqlcg/cli/commands/reindex.py,sha256=
|
|
14
|
+
sqlcg/cli/commands/reindex.py,sha256=FqXxvQ0UrxwDS0q2V3gzYgarN3NPh9muaIZ3rgvTmHs,13810
|
|
15
15
|
sqlcg/cli/commands/report.py,sha256=JU0qjyMxwOukE7bN3XvvIzOI7zMg_Gsnvk_8F6pKNpA,4915
|
|
16
|
-
sqlcg/cli/commands/uninstall.py,sha256=
|
|
16
|
+
sqlcg/cli/commands/uninstall.py,sha256=WrA1FnINxnd6mmE4-_QBK0aHBnibstJeAT8swnQKG4M,8962
|
|
17
17
|
sqlcg/cli/commands/watch.py,sha256=7N6c-QuvxAEGHzDZ0C3CU2BkHSraZW9YtgoFnz7SaQo,2373
|
|
18
|
-
sqlcg/core/__init__.py,sha256=
|
|
19
|
-
sqlcg/core/config.py,sha256=
|
|
20
|
-
sqlcg/core/
|
|
21
|
-
sqlcg/core/
|
|
18
|
+
sqlcg/core/__init__.py,sha256=dXvLWpbQ72f5CM6sKSvBDnEGqGuIZaN5MmHyD8Vf1aA,154
|
|
19
|
+
sqlcg/core/config.py,sha256=rMTYt1QsZVOyhT8W7oE65XQh5LKqdOeXCQ00sMFny4U,12375
|
|
20
|
+
sqlcg/core/duckdb_backend.py,sha256=nnOGbjmjHGXR_dlrzJQpSw5otLescCW1WtltZthoURQ,29522
|
|
21
|
+
sqlcg/core/freshness.py,sha256=F9jWn2cbFs60jA9ta8KrT6MghD0mzI7SqqKs_Op9AeU,4577
|
|
22
|
+
sqlcg/core/graph_db.py,sha256=eLYdmiqPWrZHtKLFcKgD2aiWZHFU3wrwz6Y0A3d9NcE,8633
|
|
22
23
|
sqlcg/core/jobs.py,sha256=Je-fCdSKRgiSsv1W8SgNAlp36a7t7-pJZ-qKPbka9OE,3298
|
|
23
|
-
sqlcg/core/kuzu_backend.py,sha256=PHW7VqI7oCLKsHnm4OoBoNnE2XT19ohxUpQMMIJnjlY,17038
|
|
24
|
-
sqlcg/core/neo4j_backend.py,sha256=AM1TncP9GBGph-rSHwalZPmGUV2kFILzaJP-PSB0UYw,8437
|
|
25
24
|
sqlcg/core/queries.cypher,sha256=cvPOVe5GUOzJN4bxUvDxNI--xIIP8gm42TR-gUnea4U,4685
|
|
26
|
-
sqlcg/core/queries.py,sha256=
|
|
25
|
+
sqlcg/core/queries.py,sha256=jtZR6caLpF0WqqkjncYTlf5L1GN6DZweoM5dNOAB8OY,3043
|
|
26
|
+
sqlcg/core/queries.sql,sha256=75EHoHA5hKz-Xs8g-CCtDXbf3lYFEw_fxghjYU48gUM,7794
|
|
27
27
|
sqlcg/core/schema.cypher,sha256=rK5QMhSrzZhuj73NeNXGX6oM-rPPPvxFjex0fEyUvkQ,2859
|
|
28
|
-
sqlcg/core/schema.py,sha256=
|
|
28
|
+
sqlcg/core/schema.py,sha256=7fKf314ueIV7-tIkQQUS4O6H-OhlFKFVFFvGFqw_5Xk,1476
|
|
29
29
|
sqlcg/indexer/__init__.py,sha256=Wh20Unz2OHs1oIyWLrpurPAasF0BET2g4iXtNk7mh2U,56
|
|
30
30
|
sqlcg/indexer/dbt_adapter.py,sha256=EB5x1WU5Z9d-I97ADDj88S_hG1C4z4nbrv8JUCzXfy8,686
|
|
31
31
|
sqlcg/indexer/error_classify.py,sha256=MYjPVprwT-ARPjBCyCzu2F9DSrZfnTVtVIoBgm8s4H8,5329
|
|
32
32
|
sqlcg/indexer/git_delta.py,sha256=P-QM4vnVURT2KLiE6u3cQynRUF-mTH13cbB4I20YHPQ,4468
|
|
33
|
-
sqlcg/indexer/indexer.py,sha256=
|
|
33
|
+
sqlcg/indexer/indexer.py,sha256=NCOWwXmE7e6m42sraFJpImmtWFss-V95RzEuI-eDzb0,66400
|
|
34
34
|
sqlcg/indexer/pool.py,sha256=BTYx-pBe6zwUG89MHh0X7nzGNVlsHN-GjovYKanVI1s,18553
|
|
35
35
|
sqlcg/indexer/walker.py,sha256=umNaqDbuerr75VYG1TEOv0ATsbI40O3SIw35f7XJcDE,1931
|
|
36
36
|
sqlcg/indexer/watcher.py,sha256=mJQq1LASRLKKwhz0WhCUWPLLqyPR2_-FD_8efYU6gE8,8442
|
|
@@ -38,30 +38,30 @@ sqlcg/lineage/__init__.py,sha256=Da1DlYwtK13WHv_RnHjAtNkHTOuFbhxqCjT1Le7DsWM,46
|
|
|
38
38
|
sqlcg/lineage/aggregator.py,sha256=LVyNcmvLBHWbh8SrDsJJBKd7sLg3-2NhEWwEndG7Jbc,4144
|
|
39
39
|
sqlcg/lineage/schema_resolver.py,sha256=iXt6LYF6UVWsGUpcfbmjmGn9wCgXl721lTGf_8AaWcc,7320
|
|
40
40
|
sqlcg/metrics/__init__.py,sha256=hLJ6wm4St8qqYwKh3o9QG7lcEt1BEYM31ccqO9tGpIg,133
|
|
41
|
-
sqlcg/metrics/store.py,sha256=
|
|
41
|
+
sqlcg/metrics/store.py,sha256=KuDtxvyAgug9_KtiSCpvgKM2VZM7VSaI3D11uMLjJJk,10604
|
|
42
42
|
sqlcg/parsers/__init__.py,sha256=AamA8wBbDZV9_zEtZCI4Hyen5UAVKHmBwjTghTt2PZE,785
|
|
43
43
|
sqlcg/parsers/ansi_parser.py,sha256=mGZvijMOMQ4i1BybpwU29a8jnIGViefhy9fxzkSpsRM,17193
|
|
44
|
-
sqlcg/parsers/base.py,sha256=
|
|
44
|
+
sqlcg/parsers/base.py,sha256=N6uqQWHO2lpzTIWgPqzewAXmij0ikZdNmNVJkIQ8Mr0,54889
|
|
45
45
|
sqlcg/parsers/bigquery_parser.py,sha256=mOnWTfXB_Dp4JwFE1PVYOB6CDPf5nYE0Dea8kJCl9uQ,2827
|
|
46
46
|
sqlcg/parsers/postgres_parser.py,sha256=lYfUpQY6j4Qm7ndXBtXbgPoGzYqYddWt5YeFnWKdA6I,946
|
|
47
47
|
sqlcg/parsers/registry.py,sha256=LXy1F6rqQI6VdxpRvZg_tNpoEucW3mXZHYBMlMONbX4,1496
|
|
48
48
|
sqlcg/parsers/snowflake_parser.py,sha256=fovMyqfhWD2wmtEyiwTC0aoP4QWP-3XQZ8WYkXvs9hg,15511
|
|
49
49
|
sqlcg/parsers/tsql_parser.py,sha256=RRj1pACtAk2tLTDaFWRYF67a0IDvaf5A1YQXWIz0bpQ,956
|
|
50
50
|
sqlcg/server/__init__.py,sha256=n4wuNE7xyJIJxJZBtmtdccCMQfvTdF-IqIaZVbC4FC4,35
|
|
51
|
-
sqlcg/server/control.py,sha256=
|
|
51
|
+
sqlcg/server/control.py,sha256=qUcztb_aDhL-_X_Nq4q6uGx17cUlbLnI6vUpoZsEjbo,4506
|
|
52
52
|
sqlcg/server/exceptions.py,sha256=EONw34icOByCTpppSQrvQBW6asc4hfqaGDCAFjv96II,469
|
|
53
53
|
sqlcg/server/models.py,sha256=l7ORy6sbtzBW1y3qVaeLwEukbyAgBkz9S5VIm2q4b24,19378
|
|
54
|
-
sqlcg/server/noise_filter.py,sha256=
|
|
55
|
-
sqlcg/server/read_client.py,sha256=
|
|
56
|
-
sqlcg/server/server.py,sha256=
|
|
57
|
-
sqlcg/server/skill.py,sha256=
|
|
58
|
-
sqlcg/server/tools.py,sha256=
|
|
59
|
-
sqlcg/server/writer.py,sha256=
|
|
54
|
+
sqlcg/server/noise_filter.py,sha256=Ats2FFzmzFKqvQPWvlUzK8mY9pzlUhL4m1s8P_HNnvI,6335
|
|
55
|
+
sqlcg/server/read_client.py,sha256=4v1OOl12HCHp6J14HrcWQApSixvJ0wwE8UIIZ0pzLyw,7808
|
|
56
|
+
sqlcg/server/server.py,sha256=A1soT-hYnAX78X5j97nIjw9oIZCxBejjV6p_tRDIK38,24335
|
|
57
|
+
sqlcg/server/skill.py,sha256=EDvmEgl-LQwCxZ9Lca4lt8zCtkk2cCgh9GLYlGgDA64,12828
|
|
58
|
+
sqlcg/server/tools.py,sha256=Rdl5SXAuePydk8Z12JVx-lNQeAqm3k9R-z0Q4DstCkw,69743
|
|
59
|
+
sqlcg/server/writer.py,sha256=HH8pW6k1c2pbY4b6g2N87Tx4gNCezYqTLNTlAmsHg2Y,18522
|
|
60
60
|
sqlcg/utils/__init__.py,sha256=--iqt5ThTXmT8Wz7da8hs3n0zDfYPl8P-z5OgRJ_77E,154
|
|
61
61
|
sqlcg/utils/hashing.py,sha256=H25-sYfxHKb3_IERFnHyAIYNiXN470Oqo5sJT_D3YOA,438
|
|
62
62
|
sqlcg/utils/ignore.py,sha256=wJjwa0mjnQ_xJExOUxk25y00g065XmmzJapqV3ifD5o,1151
|
|
63
63
|
sqlcg/utils/logging.py,sha256=u0fCmYsLj9o81vawm3xZTHaw68GQYVm7JxG-gP81u8A,840
|
|
64
|
-
sql_code_graph-1.
|
|
65
|
-
sql_code_graph-1.
|
|
66
|
-
sql_code_graph-1.
|
|
67
|
-
sql_code_graph-1.
|
|
64
|
+
sql_code_graph-1.4.1.dist-info/METADATA,sha256=7psrSu1Vh3GiZ7PK7FED45QzazeUrHrltrRla4twAVQ,14085
|
|
65
|
+
sql_code_graph-1.4.1.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
66
|
+
sql_code_graph-1.4.1.dist-info/entry_points.txt,sha256=Wfe49sVzV9p4eVFGo5RxcV-frr3HOP0yzzst8JBxQLQ,46
|
|
67
|
+
sql_code_graph-1.4.1.dist-info/RECORD,,
|
sqlcg/__init__.py
CHANGED
sqlcg/cli/commands/analyze.py
CHANGED
|
@@ -9,7 +9,6 @@ from rich.console import Console
|
|
|
9
9
|
from rich.table import Table
|
|
10
10
|
|
|
11
11
|
from sqlcg.core.queries import GET_TABLE_EXTERNAL_CONSUMERS_QUERY
|
|
12
|
-
from sqlcg.core.schema import NodeLabel, RelType
|
|
13
12
|
from sqlcg.server.read_client import run_read_routed
|
|
14
13
|
|
|
15
14
|
if TYPE_CHECKING:
|
|
@@ -19,37 +18,111 @@ app = typer.Typer(help="Lineage analysis")
|
|
|
19
18
|
console = Console()
|
|
20
19
|
|
|
21
20
|
|
|
22
|
-
def
|
|
23
|
-
"""Build the
|
|
21
|
+
def _upstream_sql(depth: int, include_intermediate: bool) -> str:
|
|
22
|
+
"""Build the upstream recursive-CTE SQL query.
|
|
24
23
|
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
24
|
+
Traverses COLUMN_LINEAGE edges from dst→src (upstream direction).
|
|
25
|
+
Applies kind-filter (LEFT JOIN SqlTable) to exclude CTE/derived intermediates
|
|
26
|
+
unless include_intermediate=True. The kind-guard mirrors the Cypher
|
|
27
|
+
OPTIONAL MATCH + WITH … WHERE t.kind IS NULL OR t.kind IN ['table','external']
|
|
28
|
+
semantics (#38/#40/19.2).
|
|
29
|
+
"""
|
|
30
|
+
kind_filter = (
|
|
31
|
+
""
|
|
32
|
+
if include_intermediate
|
|
33
|
+
else (
|
|
34
|
+
' LEFT JOIN "SqlTable" t ON t.qualified = dr.table_qualified\n'
|
|
35
|
+
" WHERE t.kind IS NULL OR t.kind IN ('table', 'external')\n"
|
|
36
|
+
)
|
|
37
|
+
)
|
|
38
|
+
return f"""
|
|
39
|
+
WITH RECURSIVE reach(id, table_qualified, depth, path) AS (
|
|
40
|
+
SELECT
|
|
41
|
+
cl.src_key AS id,
|
|
42
|
+
c_src.table_qualified,
|
|
43
|
+
1 AS depth,
|
|
44
|
+
ARRAY[cl.dst_key, cl.src_key] AS path
|
|
45
|
+
FROM "COLUMN_LINEAGE" cl
|
|
46
|
+
JOIN "SqlColumn" c_src ON c_src.id = cl.src_key
|
|
47
|
+
WHERE cl.dst_key = ?
|
|
48
|
+
UNION ALL
|
|
49
|
+
SELECT
|
|
50
|
+
cl2.src_key,
|
|
51
|
+
c2.table_qualified,
|
|
52
|
+
reach.depth + 1,
|
|
53
|
+
array_append(reach.path, cl2.src_key)
|
|
54
|
+
FROM reach
|
|
55
|
+
JOIN "COLUMN_LINEAGE" cl2 ON cl2.dst_key = reach.id
|
|
56
|
+
JOIN "SqlColumn" c2 ON c2.id = cl2.src_key
|
|
57
|
+
WHERE reach.depth < {depth}
|
|
58
|
+
AND NOT cl2.src_key = ANY(reach.path)
|
|
59
|
+
),
|
|
60
|
+
distinct_reach AS (
|
|
61
|
+
SELECT DISTINCT id, table_qualified FROM reach
|
|
62
|
+
)
|
|
63
|
+
SELECT
|
|
64
|
+
dr.id,
|
|
65
|
+
dr.table_qualified,
|
|
66
|
+
min(q.file_path) AS file,
|
|
67
|
+
min(q.start_line) AS line
|
|
68
|
+
FROM distinct_reach dr
|
|
69
|
+
LEFT JOIN "COLUMN_LINEAGE" src_edge ON src_edge.src_key = dr.id
|
|
70
|
+
LEFT JOIN "SqlQuery" q ON q.id = src_edge.query_id
|
|
71
|
+
{kind_filter}GROUP BY dr.id, dr.table_qualified
|
|
72
|
+
LIMIT 100
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _downstream_sql(depth: int, include_intermediate: bool) -> str:
|
|
77
|
+
"""Build the downstream recursive-CTE SQL query.
|
|
78
|
+
|
|
79
|
+
Traverses COLUMN_LINEAGE edges from src→dst (downstream direction).
|
|
80
|
+
Kind-filter mirrors upstream: LEFT JOIN SqlTable + IS NULL OR 'table'/'external'.
|
|
46
81
|
"""
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
82
|
+
kind_filter = (
|
|
83
|
+
""
|
|
84
|
+
if include_intermediate
|
|
85
|
+
else (
|
|
86
|
+
' LEFT JOIN "SqlTable" t ON t.qualified = dr.table_qualified\n'
|
|
87
|
+
" WHERE t.kind IS NULL OR t.kind IN ('table', 'external')\n"
|
|
88
|
+
)
|
|
52
89
|
)
|
|
90
|
+
return f"""
|
|
91
|
+
WITH RECURSIVE reach(id, table_qualified, depth, path) AS (
|
|
92
|
+
SELECT
|
|
93
|
+
cl.dst_key AS id,
|
|
94
|
+
c_dst.table_qualified,
|
|
95
|
+
1 AS depth,
|
|
96
|
+
ARRAY[cl.src_key, cl.dst_key] AS path
|
|
97
|
+
FROM "COLUMN_LINEAGE" cl
|
|
98
|
+
JOIN "SqlColumn" c_dst ON c_dst.id = cl.dst_key
|
|
99
|
+
WHERE cl.src_key = ?
|
|
100
|
+
UNION ALL
|
|
101
|
+
SELECT
|
|
102
|
+
cl2.dst_key,
|
|
103
|
+
c2.table_qualified,
|
|
104
|
+
reach.depth + 1,
|
|
105
|
+
array_append(reach.path, cl2.dst_key)
|
|
106
|
+
FROM reach
|
|
107
|
+
JOIN "COLUMN_LINEAGE" cl2 ON cl2.src_key = reach.id
|
|
108
|
+
JOIN "SqlColumn" c2 ON c2.id = cl2.dst_key
|
|
109
|
+
WHERE reach.depth < {depth}
|
|
110
|
+
AND NOT cl2.dst_key = ANY(reach.path)
|
|
111
|
+
),
|
|
112
|
+
distinct_reach AS (
|
|
113
|
+
SELECT DISTINCT id, table_qualified FROM reach
|
|
114
|
+
)
|
|
115
|
+
SELECT
|
|
116
|
+
dr.id,
|
|
117
|
+
dr.table_qualified,
|
|
118
|
+
min(q.file_path) AS file,
|
|
119
|
+
min(q.start_line) AS line
|
|
120
|
+
FROM distinct_reach dr
|
|
121
|
+
LEFT JOIN "COLUMN_LINEAGE" dst_edge ON dst_edge.dst_key = dr.id
|
|
122
|
+
LEFT JOIN "SqlQuery" q ON q.id = dst_edge.query_id
|
|
123
|
+
{kind_filter}GROUP BY dr.id, dr.table_qualified
|
|
124
|
+
LIMIT 100
|
|
125
|
+
"""
|
|
53
126
|
|
|
54
127
|
|
|
55
128
|
@app.command("upstream")
|
|
@@ -62,36 +135,16 @@ def upstream( # noqa: B008
|
|
|
62
135
|
),
|
|
63
136
|
) -> None:
|
|
64
137
|
"""Trace upstream column lineage."""
|
|
65
|
-
# Bounds check for depth to prevent performance DoS
|
|
66
138
|
if depth < 1 or depth > 100:
|
|
67
139
|
console.print("[red]Error: --depth must be between 1 and 100[/red]")
|
|
68
140
|
raise typer.Exit(1)
|
|
69
141
|
|
|
70
|
-
#
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
results = run_read_routed(
|
|
74
|
-
f"MATCH (c:{NodeLabel.COLUMN} {{id: $ref}})"
|
|
75
|
-
f"<-[:{RelType.COLUMN_LINEAGE}*1..{depth}]-(src:{NodeLabel.COLUMN}) "
|
|
76
|
-
f"{kf}"
|
|
77
|
-
f"OPTIONAL MATCH (src)-[srcedge:{RelType.COLUMN_LINEAGE}]->() "
|
|
78
|
-
"OPTIONAL MATCH (q:SqlQuery {id: srcedge.query_id}) "
|
|
79
|
-
"WITH src, min(q.start_line) AS line, min(q.file_path) AS file "
|
|
80
|
-
"RETURN src.id AS id, file AS file, line AS line LIMIT 100",
|
|
81
|
-
{"ref": ref},
|
|
82
|
-
)
|
|
142
|
+
ref = ref.lower() # graph keys are lowercased at index time (C2 normalization)
|
|
143
|
+
sql = _upstream_sql(depth, include_intermediate)
|
|
144
|
+
results = run_read_routed(sql, {"ref": ref})
|
|
83
145
|
if not results and len(ref.split(".")) >= 3:
|
|
84
146
|
bare = _bare_ref(ref)
|
|
85
|
-
fallback_results = run_read_routed(
|
|
86
|
-
f"MATCH (c:{NodeLabel.COLUMN} {{id: $bare}})"
|
|
87
|
-
f"<-[:{RelType.COLUMN_LINEAGE}*1..{depth}]-(src:{NodeLabel.COLUMN}) "
|
|
88
|
-
f"{kf}"
|
|
89
|
-
f"OPTIONAL MATCH (src)-[srcedge:{RelType.COLUMN_LINEAGE}]->() "
|
|
90
|
-
"OPTIONAL MATCH (q:SqlQuery {id: srcedge.query_id}) "
|
|
91
|
-
"WITH src, min(q.start_line) AS line, min(q.file_path) AS file "
|
|
92
|
-
"RETURN src.id AS id, file AS file, line AS line LIMIT 100",
|
|
93
|
-
{"bare": bare},
|
|
94
|
-
)
|
|
147
|
+
fallback_results = run_read_routed(sql, {"bare": bare})
|
|
95
148
|
if fallback_results:
|
|
96
149
|
console.print(
|
|
97
150
|
f"[yellow]Hint:[/yellow] No results for '{ref}'. "
|
|
@@ -104,7 +157,7 @@ def upstream( # noqa: B008
|
|
|
104
157
|
if not raw:
|
|
105
158
|
from sqlcg.server.noise_filter import NoiseFilter
|
|
106
159
|
|
|
107
|
-
nf = NoiseFilter.from_config()
|
|
160
|
+
nf = NoiseFilter.from_config()
|
|
108
161
|
results = _filter_column_results(results, nf)
|
|
109
162
|
_print_table(_add_file_line_col(results), ["id", "file:line"])
|
|
110
163
|
|
|
@@ -119,36 +172,16 @@ def downstream( # noqa: B008
|
|
|
119
172
|
),
|
|
120
173
|
) -> None:
|
|
121
174
|
"""Trace downstream column lineage."""
|
|
122
|
-
# Bounds check for depth to prevent performance DoS
|
|
123
175
|
if depth < 1 or depth > 100:
|
|
124
176
|
console.print("[red]Error: --depth must be between 1 and 100[/red]")
|
|
125
177
|
raise typer.Exit(1)
|
|
126
178
|
|
|
127
|
-
#
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
results = run_read_routed(
|
|
131
|
-
f"MATCH (c:{NodeLabel.COLUMN} {{id: $ref}})"
|
|
132
|
-
f"-[:{RelType.COLUMN_LINEAGE}*1..{depth}]->(dst:{NodeLabel.COLUMN}) "
|
|
133
|
-
f"{kf}"
|
|
134
|
-
f"OPTIONAL MATCH ()-[dstedge:{RelType.COLUMN_LINEAGE}]->(dst) "
|
|
135
|
-
"OPTIONAL MATCH (q:SqlQuery {id: dstedge.query_id}) "
|
|
136
|
-
"WITH dst, min(q.start_line) AS line, min(q.file_path) AS file "
|
|
137
|
-
"RETURN dst.id AS id, file AS file, line AS line LIMIT 100",
|
|
138
|
-
{"ref": ref},
|
|
139
|
-
)
|
|
179
|
+
ref = ref.lower() # graph keys are lowercased at index time (C2 normalization)
|
|
180
|
+
sql = _downstream_sql(depth, include_intermediate)
|
|
181
|
+
results = run_read_routed(sql, {"ref": ref})
|
|
140
182
|
if not results and len(ref.split(".")) >= 3:
|
|
141
183
|
bare = _bare_ref(ref)
|
|
142
|
-
fallback_results = run_read_routed(
|
|
143
|
-
f"MATCH (c:{NodeLabel.COLUMN} {{id: $bare}})"
|
|
144
|
-
f"-[:{RelType.COLUMN_LINEAGE}*1..{depth}]->(dst:{NodeLabel.COLUMN}) "
|
|
145
|
-
f"{kf}"
|
|
146
|
-
f"OPTIONAL MATCH ()-[dstedge:{RelType.COLUMN_LINEAGE}]->(dst) "
|
|
147
|
-
"OPTIONAL MATCH (q:SqlQuery {id: dstedge.query_id}) "
|
|
148
|
-
"WITH dst, min(q.start_line) AS line, min(q.file_path) AS file "
|
|
149
|
-
"RETURN dst.id AS id, file AS file, line AS line LIMIT 100",
|
|
150
|
-
{"bare": bare},
|
|
151
|
-
)
|
|
184
|
+
fallback_results = run_read_routed(sql, {"bare": bare})
|
|
152
185
|
if fallback_results:
|
|
153
186
|
console.print(
|
|
154
187
|
f"[yellow]Hint:[/yellow] No results for '{ref}'. "
|
|
@@ -161,18 +194,16 @@ def downstream( # noqa: B008
|
|
|
161
194
|
if not raw:
|
|
162
195
|
from sqlcg.server.noise_filter import NoiseFilter
|
|
163
196
|
|
|
164
|
-
nf = NoiseFilter.from_config()
|
|
197
|
+
nf = NoiseFilter.from_config()
|
|
165
198
|
results = _filter_column_results(results, nf)
|
|
166
199
|
_print_table(_add_file_line_col(results), ["id", "file:line"])
|
|
167
200
|
|
|
168
201
|
# Append external consumer rows for terminal tables (scalar query, one per terminal).
|
|
169
|
-
# Resolve terminal tables from the column results; fall back to the root column's table.
|
|
170
202
|
terminal_tables: set[str] = set()
|
|
171
203
|
for r in results:
|
|
172
204
|
tbl = _col_id_to_table(r["id"])
|
|
173
205
|
if tbl:
|
|
174
206
|
terminal_tables.add(tbl)
|
|
175
|
-
# Also check the root column's table (in case no downstream columns were found).
|
|
176
207
|
root_parts = ref.rsplit(".", 1)
|
|
177
208
|
if len(root_parts) == 2:
|
|
178
209
|
terminal_tables.add(root_parts[0])
|
|
@@ -197,9 +228,11 @@ def impact( # noqa: B008
|
|
|
197
228
|
) -> None:
|
|
198
229
|
"""Show all queries impacted by a table."""
|
|
199
230
|
results = run_read_routed(
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
231
|
+
"SELECT DISTINCT q.id AS id, q.kind AS kind, q.target_table AS target"
|
|
232
|
+
' FROM "SqlTable" t'
|
|
233
|
+
' JOIN "SELECTS_FROM" sf ON sf.dst_key = t.qualified'
|
|
234
|
+
' JOIN "SqlQuery" q ON q.id = sf.src_key'
|
|
235
|
+
" WHERE t.qualified = ? LIMIT 100",
|
|
203
236
|
{"t": table},
|
|
204
237
|
)
|
|
205
238
|
if not raw:
|
|
@@ -222,22 +255,20 @@ def failures(
|
|
|
222
255
|
),
|
|
223
256
|
limit: int = typer.Option(100, "--limit", help="Maximum rows to return"), # noqa: B008
|
|
224
257
|
) -> None:
|
|
225
|
-
"""List files that failed to parse, with their dominant cause (E-code bucket).
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
)
|
|
240
|
-
rows = run_read_routed(cypher, {"cause": cause})
|
|
258
|
+
"""List files that failed to parse, with their dominant cause (E-code bucket)."""
|
|
259
|
+
if cause is not None:
|
|
260
|
+
rows = run_read_routed(
|
|
261
|
+
'SELECT path, parse_cause AS cause FROM "File"'
|
|
262
|
+
" WHERE parse_failed = true AND parse_cause = ?"
|
|
263
|
+
f" ORDER BY parse_cause LIMIT {limit}",
|
|
264
|
+
{"cause": cause},
|
|
265
|
+
)
|
|
266
|
+
else:
|
|
267
|
+
rows = run_read_routed(
|
|
268
|
+
'SELECT path, parse_cause AS cause FROM "File"'
|
|
269
|
+
f" WHERE parse_failed = true ORDER BY parse_cause LIMIT {limit}",
|
|
270
|
+
{},
|
|
271
|
+
)
|
|
241
272
|
_print_table(rows, ["path", "cause"])
|
|
242
273
|
|
|
243
274
|
|
|
@@ -248,8 +279,9 @@ def unused(
|
|
|
248
279
|
) -> None:
|
|
249
280
|
"""Find tables with no query references."""
|
|
250
281
|
results = run_read_routed(
|
|
251
|
-
|
|
252
|
-
|
|
282
|
+
'SELECT DISTINCT qualified FROM "SqlTable"'
|
|
283
|
+
' WHERE qualified NOT IN (SELECT DISTINCT dst_key FROM "SELECTS_FROM")'
|
|
284
|
+
" LIMIT 100",
|
|
253
285
|
{},
|
|
254
286
|
)
|
|
255
287
|
if not raw:
|
|
@@ -263,28 +295,18 @@ def unused(
|
|
|
263
295
|
def _bare_ref(ref: str) -> str:
|
|
264
296
|
"""Strip schema prefix from a ref string, keeping table.column.
|
|
265
297
|
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
Never uses rsplit — that would yield only the column name for 3-part refs.
|
|
298
|
+
Lowercases defensively so this is safe to call even if the caller did not
|
|
299
|
+
first fold the ref — graph keys are lowercased at index time (C2 normalization).
|
|
269
300
|
"""
|
|
301
|
+
ref = ref.lower()
|
|
270
302
|
parts = ref.split(".")
|
|
271
303
|
if len(parts) >= 3:
|
|
272
|
-
return ".".join(parts[1:])
|
|
273
|
-
return ref
|
|
304
|
+
return ".".join(parts[1:])
|
|
305
|
+
return ref
|
|
274
306
|
|
|
275
307
|
|
|
276
308
|
def _col_id_to_table(col_id: str) -> str:
|
|
277
|
-
"""Extract the table-qualified part from a column ID
|
|
278
|
-
|
|
279
|
-
Column IDs follow the format: schema.table.column or table.column.
|
|
280
|
-
The table part is everything except the last component.
|
|
281
|
-
|
|
282
|
-
Args:
|
|
283
|
-
col_id: A column ID string from the graph.
|
|
284
|
-
|
|
285
|
-
Returns:
|
|
286
|
-
The table-qualified portion (all but the last dotted component).
|
|
287
|
-
"""
|
|
309
|
+
"""Extract the table-qualified part from a column ID."""
|
|
288
310
|
parts = col_id.rsplit(".", 1)
|
|
289
311
|
return parts[0] if len(parts) == 2 else col_id
|
|
290
312
|
|
|
@@ -293,16 +315,12 @@ def _filter_column_results(
|
|
|
293
315
|
results: list[dict],
|
|
294
316
|
nf: NoiseFilter, # type: ignore[name-defined]
|
|
295
317
|
) -> list[dict]:
|
|
296
|
-
"""Filter column-ID result rows by NoiseFilter
|
|
318
|
+
"""Filter column-ID result rows by NoiseFilter."""
|
|
297
319
|
return [r for r in results if not nf.is_noise(_col_id_to_table(r["id"]))]
|
|
298
320
|
|
|
299
321
|
|
|
300
322
|
def _add_file_line_col(rows: list[dict]) -> list[dict]:
|
|
301
|
-
"""Add a 'file:line' composite column from 'file' and 'line' fields.
|
|
302
|
-
|
|
303
|
-
Formats as 'path/to/file.sql:N' when both are present, or '?' when either
|
|
304
|
-
is absent (multi-hop upstream where file/line is not available).
|
|
305
|
-
"""
|
|
323
|
+
"""Add a 'file:line' composite column from 'file' and 'line' fields."""
|
|
306
324
|
result = []
|
|
307
325
|
for row in rows:
|
|
308
326
|
new_row = dict(row)
|