sql-code-graph 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sql_code_graph-1.3.0.dist-info → sql_code_graph-1.4.0.dist-info}/METADATA +2 -4
- {sql_code_graph-1.3.0.dist-info → sql_code_graph-1.4.0.dist-info}/RECORD +29 -29
- sqlcg/__init__.py +1 -1
- sqlcg/cli/commands/analyze.py +138 -127
- sqlcg/cli/commands/db.py +31 -56
- sqlcg/cli/commands/find.py +5 -9
- sqlcg/cli/commands/gain.py +14 -16
- sqlcg/cli/commands/index.py +6 -17
- sqlcg/cli/commands/reindex.py +1 -1
- sqlcg/cli/commands/uninstall.py +9 -20
- sqlcg/core/__init__.py +1 -3
- sqlcg/core/config.py +25 -81
- sqlcg/core/duckdb_backend.py +764 -0
- sqlcg/core/freshness.py +1 -1
- sqlcg/core/graph_db.py +20 -4
- sqlcg/core/queries.py +26 -7
- sqlcg/core/queries.sql +249 -0
- sqlcg/core/schema.py +1 -1
- sqlcg/indexer/indexer.py +27 -36
- sqlcg/metrics/store.py +1 -1
- sqlcg/server/control.py +1 -1
- sqlcg/server/noise_filter.py +1 -1
- sqlcg/server/read_client.py +2 -2
- sqlcg/server/server.py +26 -23
- sqlcg/server/skill.py +2 -2
- sqlcg/server/tools.py +43 -106
- sqlcg/server/writer.py +43 -218
- sqlcg/core/kuzu_backend.py +0 -449
- sqlcg/core/neo4j_backend.py +0 -233
- {sql_code_graph-1.3.0.dist-info → sql_code_graph-1.4.0.dist-info}/WHEEL +0 -0
- {sql_code_graph-1.3.0.dist-info → sql_code_graph-1.4.0.dist-info}/entry_points.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sql-code-graph
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.4.0
|
|
4
4
|
Summary: SQL code graph analyzer and lineage tracer
|
|
5
5
|
Project-URL: Homepage, https://github.com/Warhorze/sql-code-graph
|
|
6
6
|
Project-URL: Repository, https://github.com/Warhorze/sql-code-graph
|
|
@@ -18,7 +18,7 @@ Classifier: Programming Language :: Python :: 3.13
|
|
|
18
18
|
Classifier: Topic :: Database
|
|
19
19
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
20
20
|
Requires-Python: >=3.12
|
|
21
|
-
Requires-Dist:
|
|
21
|
+
Requires-Dist: duckdb<2.0,>=1.0.0
|
|
22
22
|
Requires-Dist: mcp<2.0,>=1.27.0
|
|
23
23
|
Requires-Dist: pathspec>=0.12.1
|
|
24
24
|
Requires-Dist: pydantic>=2.0
|
|
@@ -30,8 +30,6 @@ Requires-Dist: typer>=0.9.0
|
|
|
30
30
|
Requires-Dist: watchdog>=3.0.0
|
|
31
31
|
Provides-Extra: dbt
|
|
32
32
|
Requires-Dist: dbt-core>=1.7; extra == 'dbt'
|
|
33
|
-
Provides-Extra: neo4j
|
|
34
|
-
Requires-Dist: neo4j>=5.15.0; extra == 'neo4j'
|
|
35
33
|
Provides-Extra: snowflake
|
|
36
34
|
Requires-Dist: acryl-datahub<0.15.0,>=0.14.0; extra == 'snowflake'
|
|
37
35
|
Description-Content-Type: text/markdown
|
|
@@ -1,36 +1,36 @@
|
|
|
1
|
-
sqlcg/__init__.py,sha256=
|
|
1
|
+
sqlcg/__init__.py,sha256=spIaM-bJwyRvB5fw8VYDKkrB5Lz7cKAgRuvBBgKYX3g,115
|
|
2
2
|
sqlcg/__main__.py,sha256=1YoFLcqEgTwYq1J3TbUwpkdG0zeeLIf2fJvwWI-CLFU,109
|
|
3
3
|
sqlcg/cli/__init__.py,sha256=W8fD0LpMq2xm_5WKGNMvJh2WBL1ho5E8hUeAqXQYT1g,28
|
|
4
4
|
sqlcg/cli/main.py,sha256=WmdTjsOlz1ozi2Y3Aq4ezR_FCRl-Lc1YOKw3_d48dlY,1650
|
|
5
5
|
sqlcg/cli/commands/__init__.py,sha256=oSHtr6VD-jNubOjuCQyZj2tBppjMEpQDh-IGQ8of9eA,30
|
|
6
|
-
sqlcg/cli/commands/analyze.py,sha256=
|
|
7
|
-
sqlcg/cli/commands/db.py,sha256=
|
|
8
|
-
sqlcg/cli/commands/find.py,sha256=
|
|
9
|
-
sqlcg/cli/commands/gain.py,sha256=
|
|
6
|
+
sqlcg/cli/commands/analyze.py,sha256=YG5N1iYG-ierDgAJ36mFpQ4BMf2yJ4YF-UV7Rx-bmYA,11793
|
|
7
|
+
sqlcg/cli/commands/db.py,sha256=TMhPCHRWSo8YmlNnSxmGxrR0_1r3K4e0Bn8unZTkvU4,7248
|
|
8
|
+
sqlcg/cli/commands/find.py,sha256=p5Vyyx-VBk8YDWYQN16UhECh7PIeMaEyCPEcUFcRFlM,2598
|
|
9
|
+
sqlcg/cli/commands/gain.py,sha256=SJU1c51a7MgNbZItqQnaBfOWGnV5xpXQctbew5Dr9BE,9062
|
|
10
10
|
sqlcg/cli/commands/git.py,sha256=9a8T2FVxcAHq1H6Cslaq34t10w9fBGf4T2reiLk33t8,6135
|
|
11
|
-
sqlcg/cli/commands/index.py,sha256=
|
|
11
|
+
sqlcg/cli/commands/index.py,sha256=RPEpT9aXmK0rixRuHgMcmAn9FLR4JtIbZ1LM4RLkMvY,17050
|
|
12
12
|
sqlcg/cli/commands/install.py,sha256=KNABvrLbamPyYnmnVdCaM_MNezbDc-pr6IkignCWI8k,9186
|
|
13
13
|
sqlcg/cli/commands/mcp.py,sha256=QYaupf69lLpYzIoqsPJoCPiAggLVkYBzwpuOLRzxJDU,9140
|
|
14
|
-
sqlcg/cli/commands/reindex.py,sha256=
|
|
14
|
+
sqlcg/cli/commands/reindex.py,sha256=FqXxvQ0UrxwDS0q2V3gzYgarN3NPh9muaIZ3rgvTmHs,13810
|
|
15
15
|
sqlcg/cli/commands/report.py,sha256=JU0qjyMxwOukE7bN3XvvIzOI7zMg_Gsnvk_8F6pKNpA,4915
|
|
16
|
-
sqlcg/cli/commands/uninstall.py,sha256=
|
|
16
|
+
sqlcg/cli/commands/uninstall.py,sha256=WrA1FnINxnd6mmE4-_QBK0aHBnibstJeAT8swnQKG4M,8962
|
|
17
17
|
sqlcg/cli/commands/watch.py,sha256=7N6c-QuvxAEGHzDZ0C3CU2BkHSraZW9YtgoFnz7SaQo,2373
|
|
18
|
-
sqlcg/core/__init__.py,sha256=
|
|
19
|
-
sqlcg/core/config.py,sha256=
|
|
20
|
-
sqlcg/core/
|
|
21
|
-
sqlcg/core/
|
|
18
|
+
sqlcg/core/__init__.py,sha256=dXvLWpbQ72f5CM6sKSvBDnEGqGuIZaN5MmHyD8Vf1aA,154
|
|
19
|
+
sqlcg/core/config.py,sha256=rMTYt1QsZVOyhT8W7oE65XQh5LKqdOeXCQ00sMFny4U,12375
|
|
20
|
+
sqlcg/core/duckdb_backend.py,sha256=nnOGbjmjHGXR_dlrzJQpSw5otLescCW1WtltZthoURQ,29522
|
|
21
|
+
sqlcg/core/freshness.py,sha256=F9jWn2cbFs60jA9ta8KrT6MghD0mzI7SqqKs_Op9AeU,4577
|
|
22
|
+
sqlcg/core/graph_db.py,sha256=eLYdmiqPWrZHtKLFcKgD2aiWZHFU3wrwz6Y0A3d9NcE,8633
|
|
22
23
|
sqlcg/core/jobs.py,sha256=Je-fCdSKRgiSsv1W8SgNAlp36a7t7-pJZ-qKPbka9OE,3298
|
|
23
|
-
sqlcg/core/kuzu_backend.py,sha256=PHW7VqI7oCLKsHnm4OoBoNnE2XT19ohxUpQMMIJnjlY,17038
|
|
24
|
-
sqlcg/core/neo4j_backend.py,sha256=AM1TncP9GBGph-rSHwalZPmGUV2kFILzaJP-PSB0UYw,8437
|
|
25
24
|
sqlcg/core/queries.cypher,sha256=cvPOVe5GUOzJN4bxUvDxNI--xIIP8gm42TR-gUnea4U,4685
|
|
26
|
-
sqlcg/core/queries.py,sha256=
|
|
25
|
+
sqlcg/core/queries.py,sha256=jtZR6caLpF0WqqkjncYTlf5L1GN6DZweoM5dNOAB8OY,3043
|
|
26
|
+
sqlcg/core/queries.sql,sha256=75EHoHA5hKz-Xs8g-CCtDXbf3lYFEw_fxghjYU48gUM,7794
|
|
27
27
|
sqlcg/core/schema.cypher,sha256=rK5QMhSrzZhuj73NeNXGX6oM-rPPPvxFjex0fEyUvkQ,2859
|
|
28
|
-
sqlcg/core/schema.py,sha256=
|
|
28
|
+
sqlcg/core/schema.py,sha256=7fKf314ueIV7-tIkQQUS4O6H-OhlFKFVFFvGFqw_5Xk,1476
|
|
29
29
|
sqlcg/indexer/__init__.py,sha256=Wh20Unz2OHs1oIyWLrpurPAasF0BET2g4iXtNk7mh2U,56
|
|
30
30
|
sqlcg/indexer/dbt_adapter.py,sha256=EB5x1WU5Z9d-I97ADDj88S_hG1C4z4nbrv8JUCzXfy8,686
|
|
31
31
|
sqlcg/indexer/error_classify.py,sha256=MYjPVprwT-ARPjBCyCzu2F9DSrZfnTVtVIoBgm8s4H8,5329
|
|
32
32
|
sqlcg/indexer/git_delta.py,sha256=P-QM4vnVURT2KLiE6u3cQynRUF-mTH13cbB4I20YHPQ,4468
|
|
33
|
-
sqlcg/indexer/indexer.py,sha256=
|
|
33
|
+
sqlcg/indexer/indexer.py,sha256=NCOWwXmE7e6m42sraFJpImmtWFss-V95RzEuI-eDzb0,66400
|
|
34
34
|
sqlcg/indexer/pool.py,sha256=BTYx-pBe6zwUG89MHh0X7nzGNVlsHN-GjovYKanVI1s,18553
|
|
35
35
|
sqlcg/indexer/walker.py,sha256=umNaqDbuerr75VYG1TEOv0ATsbI40O3SIw35f7XJcDE,1931
|
|
36
36
|
sqlcg/indexer/watcher.py,sha256=mJQq1LASRLKKwhz0WhCUWPLLqyPR2_-FD_8efYU6gE8,8442
|
|
@@ -38,7 +38,7 @@ sqlcg/lineage/__init__.py,sha256=Da1DlYwtK13WHv_RnHjAtNkHTOuFbhxqCjT1Le7DsWM,46
|
|
|
38
38
|
sqlcg/lineage/aggregator.py,sha256=LVyNcmvLBHWbh8SrDsJJBKd7sLg3-2NhEWwEndG7Jbc,4144
|
|
39
39
|
sqlcg/lineage/schema_resolver.py,sha256=iXt6LYF6UVWsGUpcfbmjmGn9wCgXl721lTGf_8AaWcc,7320
|
|
40
40
|
sqlcg/metrics/__init__.py,sha256=hLJ6wm4St8qqYwKh3o9QG7lcEt1BEYM31ccqO9tGpIg,133
|
|
41
|
-
sqlcg/metrics/store.py,sha256=
|
|
41
|
+
sqlcg/metrics/store.py,sha256=KuDtxvyAgug9_KtiSCpvgKM2VZM7VSaI3D11uMLjJJk,10604
|
|
42
42
|
sqlcg/parsers/__init__.py,sha256=AamA8wBbDZV9_zEtZCI4Hyen5UAVKHmBwjTghTt2PZE,785
|
|
43
43
|
sqlcg/parsers/ansi_parser.py,sha256=mGZvijMOMQ4i1BybpwU29a8jnIGViefhy9fxzkSpsRM,17193
|
|
44
44
|
sqlcg/parsers/base.py,sha256=IiOkVsm6jz9-48RqDCXiW-UXAraNxQ4pKXvSA7aolnA,49907
|
|
@@ -48,20 +48,20 @@ sqlcg/parsers/registry.py,sha256=LXy1F6rqQI6VdxpRvZg_tNpoEucW3mXZHYBMlMONbX4,149
|
|
|
48
48
|
sqlcg/parsers/snowflake_parser.py,sha256=fovMyqfhWD2wmtEyiwTC0aoP4QWP-3XQZ8WYkXvs9hg,15511
|
|
49
49
|
sqlcg/parsers/tsql_parser.py,sha256=RRj1pACtAk2tLTDaFWRYF67a0IDvaf5A1YQXWIz0bpQ,956
|
|
50
50
|
sqlcg/server/__init__.py,sha256=n4wuNE7xyJIJxJZBtmtdccCMQfvTdF-IqIaZVbC4FC4,35
|
|
51
|
-
sqlcg/server/control.py,sha256=
|
|
51
|
+
sqlcg/server/control.py,sha256=qUcztb_aDhL-_X_Nq4q6uGx17cUlbLnI6vUpoZsEjbo,4506
|
|
52
52
|
sqlcg/server/exceptions.py,sha256=EONw34icOByCTpppSQrvQBW6asc4hfqaGDCAFjv96II,469
|
|
53
53
|
sqlcg/server/models.py,sha256=l7ORy6sbtzBW1y3qVaeLwEukbyAgBkz9S5VIm2q4b24,19378
|
|
54
|
-
sqlcg/server/noise_filter.py,sha256=
|
|
55
|
-
sqlcg/server/read_client.py,sha256=
|
|
56
|
-
sqlcg/server/server.py,sha256=
|
|
57
|
-
sqlcg/server/skill.py,sha256=
|
|
58
|
-
sqlcg/server/tools.py,sha256=
|
|
59
|
-
sqlcg/server/writer.py,sha256=
|
|
54
|
+
sqlcg/server/noise_filter.py,sha256=Ats2FFzmzFKqvQPWvlUzK8mY9pzlUhL4m1s8P_HNnvI,6335
|
|
55
|
+
sqlcg/server/read_client.py,sha256=4v1OOl12HCHp6J14HrcWQApSixvJ0wwE8UIIZ0pzLyw,7808
|
|
56
|
+
sqlcg/server/server.py,sha256=A1soT-hYnAX78X5j97nIjw9oIZCxBejjV6p_tRDIK38,24335
|
|
57
|
+
sqlcg/server/skill.py,sha256=EDvmEgl-LQwCxZ9Lca4lt8zCtkk2cCgh9GLYlGgDA64,12828
|
|
58
|
+
sqlcg/server/tools.py,sha256=Rdl5SXAuePydk8Z12JVx-lNQeAqm3k9R-z0Q4DstCkw,69743
|
|
59
|
+
sqlcg/server/writer.py,sha256=HH8pW6k1c2pbY4b6g2N87Tx4gNCezYqTLNTlAmsHg2Y,18522
|
|
60
60
|
sqlcg/utils/__init__.py,sha256=--iqt5ThTXmT8Wz7da8hs3n0zDfYPl8P-z5OgRJ_77E,154
|
|
61
61
|
sqlcg/utils/hashing.py,sha256=H25-sYfxHKb3_IERFnHyAIYNiXN470Oqo5sJT_D3YOA,438
|
|
62
62
|
sqlcg/utils/ignore.py,sha256=wJjwa0mjnQ_xJExOUxk25y00g065XmmzJapqV3ifD5o,1151
|
|
63
63
|
sqlcg/utils/logging.py,sha256=u0fCmYsLj9o81vawm3xZTHaw68GQYVm7JxG-gP81u8A,840
|
|
64
|
-
sql_code_graph-1.
|
|
65
|
-
sql_code_graph-1.
|
|
66
|
-
sql_code_graph-1.
|
|
67
|
-
sql_code_graph-1.
|
|
64
|
+
sql_code_graph-1.4.0.dist-info/METADATA,sha256=rjzA7ons9zPtU1rsKtlhdx3z3qJUrVHL7tYfdg1HcrE,14085
|
|
65
|
+
sql_code_graph-1.4.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
66
|
+
sql_code_graph-1.4.0.dist-info/entry_points.txt,sha256=Wfe49sVzV9p4eVFGo5RxcV-frr3HOP0yzzst8JBxQLQ,46
|
|
67
|
+
sql_code_graph-1.4.0.dist-info/RECORD,,
|
sqlcg/__init__.py
CHANGED
sqlcg/cli/commands/analyze.py
CHANGED
|
@@ -9,7 +9,6 @@ from rich.console import Console
|
|
|
9
9
|
from rich.table import Table
|
|
10
10
|
|
|
11
11
|
from sqlcg.core.queries import GET_TABLE_EXTERNAL_CONSUMERS_QUERY
|
|
12
|
-
from sqlcg.core.schema import NodeLabel, RelType
|
|
13
12
|
from sqlcg.server.read_client import run_read_routed
|
|
14
13
|
|
|
15
14
|
if TYPE_CHECKING:
|
|
@@ -19,37 +18,111 @@ app = typer.Typer(help="Lineage analysis")
|
|
|
19
18
|
console = Console()
|
|
20
19
|
|
|
21
20
|
|
|
22
|
-
def
|
|
23
|
-
"""Build the
|
|
21
|
+
def _upstream_sql(depth: int, include_intermediate: bool) -> str:
|
|
22
|
+
"""Build the upstream recursive-CTE SQL query.
|
|
24
23
|
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
24
|
+
Traverses COLUMN_LINEAGE edges from dst→src (upstream direction).
|
|
25
|
+
Applies kind-filter (LEFT JOIN SqlTable) to exclude CTE/derived intermediates
|
|
26
|
+
unless include_intermediate=True. The kind-guard mirrors the Cypher
|
|
27
|
+
OPTIONAL MATCH + WITH … WHERE t.kind IS NULL OR t.kind IN ['table','external']
|
|
28
|
+
semantics (#38/#40/19.2).
|
|
29
|
+
"""
|
|
30
|
+
kind_filter = (
|
|
31
|
+
""
|
|
32
|
+
if include_intermediate
|
|
33
|
+
else (
|
|
34
|
+
' LEFT JOIN "SqlTable" t ON t.qualified = dr.table_qualified\n'
|
|
35
|
+
" WHERE t.kind IS NULL OR t.kind IN ('table', 'external')\n"
|
|
36
|
+
)
|
|
37
|
+
)
|
|
38
|
+
return f"""
|
|
39
|
+
WITH RECURSIVE reach(id, table_qualified, depth, path) AS (
|
|
40
|
+
SELECT
|
|
41
|
+
cl.src_key AS id,
|
|
42
|
+
c_src.table_qualified,
|
|
43
|
+
1 AS depth,
|
|
44
|
+
ARRAY[cl.dst_key, cl.src_key] AS path
|
|
45
|
+
FROM "COLUMN_LINEAGE" cl
|
|
46
|
+
JOIN "SqlColumn" c_src ON c_src.id = cl.src_key
|
|
47
|
+
WHERE cl.dst_key = ?
|
|
48
|
+
UNION ALL
|
|
49
|
+
SELECT
|
|
50
|
+
cl2.src_key,
|
|
51
|
+
c2.table_qualified,
|
|
52
|
+
reach.depth + 1,
|
|
53
|
+
array_append(reach.path, cl2.src_key)
|
|
54
|
+
FROM reach
|
|
55
|
+
JOIN "COLUMN_LINEAGE" cl2 ON cl2.dst_key = reach.id
|
|
56
|
+
JOIN "SqlColumn" c2 ON c2.id = cl2.src_key
|
|
57
|
+
WHERE reach.depth < {depth}
|
|
58
|
+
AND NOT cl2.src_key = ANY(reach.path)
|
|
59
|
+
),
|
|
60
|
+
distinct_reach AS (
|
|
61
|
+
SELECT DISTINCT id, table_qualified FROM reach
|
|
62
|
+
)
|
|
63
|
+
SELECT
|
|
64
|
+
dr.id,
|
|
65
|
+
dr.table_qualified,
|
|
66
|
+
min(q.file_path) AS file,
|
|
67
|
+
min(q.start_line) AS line
|
|
68
|
+
FROM distinct_reach dr
|
|
69
|
+
LEFT JOIN "COLUMN_LINEAGE" src_edge ON src_edge.src_key = dr.id
|
|
70
|
+
LEFT JOIN "SqlQuery" q ON q.id = src_edge.query_id
|
|
71
|
+
{kind_filter}GROUP BY dr.id, dr.table_qualified
|
|
72
|
+
LIMIT 100
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _downstream_sql(depth: int, include_intermediate: bool) -> str:
|
|
77
|
+
"""Build the downstream recursive-CTE SQL query.
|
|
78
|
+
|
|
79
|
+
Traverses COLUMN_LINEAGE edges from src→dst (downstream direction).
|
|
80
|
+
Kind-filter mirrors upstream: LEFT JOIN SqlTable + IS NULL OR 'table'/'external'.
|
|
46
81
|
"""
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
82
|
+
kind_filter = (
|
|
83
|
+
""
|
|
84
|
+
if include_intermediate
|
|
85
|
+
else (
|
|
86
|
+
' LEFT JOIN "SqlTable" t ON t.qualified = dr.table_qualified\n'
|
|
87
|
+
" WHERE t.kind IS NULL OR t.kind IN ('table', 'external')\n"
|
|
88
|
+
)
|
|
52
89
|
)
|
|
90
|
+
return f"""
|
|
91
|
+
WITH RECURSIVE reach(id, table_qualified, depth, path) AS (
|
|
92
|
+
SELECT
|
|
93
|
+
cl.dst_key AS id,
|
|
94
|
+
c_dst.table_qualified,
|
|
95
|
+
1 AS depth,
|
|
96
|
+
ARRAY[cl.src_key, cl.dst_key] AS path
|
|
97
|
+
FROM "COLUMN_LINEAGE" cl
|
|
98
|
+
JOIN "SqlColumn" c_dst ON c_dst.id = cl.dst_key
|
|
99
|
+
WHERE cl.src_key = ?
|
|
100
|
+
UNION ALL
|
|
101
|
+
SELECT
|
|
102
|
+
cl2.dst_key,
|
|
103
|
+
c2.table_qualified,
|
|
104
|
+
reach.depth + 1,
|
|
105
|
+
array_append(reach.path, cl2.dst_key)
|
|
106
|
+
FROM reach
|
|
107
|
+
JOIN "COLUMN_LINEAGE" cl2 ON cl2.src_key = reach.id
|
|
108
|
+
JOIN "SqlColumn" c2 ON c2.id = cl2.dst_key
|
|
109
|
+
WHERE reach.depth < {depth}
|
|
110
|
+
AND NOT cl2.dst_key = ANY(reach.path)
|
|
111
|
+
),
|
|
112
|
+
distinct_reach AS (
|
|
113
|
+
SELECT DISTINCT id, table_qualified FROM reach
|
|
114
|
+
)
|
|
115
|
+
SELECT
|
|
116
|
+
dr.id,
|
|
117
|
+
dr.table_qualified,
|
|
118
|
+
min(q.file_path) AS file,
|
|
119
|
+
min(q.start_line) AS line
|
|
120
|
+
FROM distinct_reach dr
|
|
121
|
+
LEFT JOIN "COLUMN_LINEAGE" dst_edge ON dst_edge.dst_key = dr.id
|
|
122
|
+
LEFT JOIN "SqlQuery" q ON q.id = dst_edge.query_id
|
|
123
|
+
{kind_filter}GROUP BY dr.id, dr.table_qualified
|
|
124
|
+
LIMIT 100
|
|
125
|
+
"""
|
|
53
126
|
|
|
54
127
|
|
|
55
128
|
@app.command("upstream")
|
|
@@ -62,36 +135,15 @@ def upstream( # noqa: B008
|
|
|
62
135
|
),
|
|
63
136
|
) -> None:
|
|
64
137
|
"""Trace upstream column lineage."""
|
|
65
|
-
# Bounds check for depth to prevent performance DoS
|
|
66
138
|
if depth < 1 or depth > 100:
|
|
67
139
|
console.print("[red]Error: --depth must be between 1 and 100[/red]")
|
|
68
140
|
raise typer.Exit(1)
|
|
69
141
|
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
results = run_read_routed(
|
|
74
|
-
f"MATCH (c:{NodeLabel.COLUMN} {{id: $ref}})"
|
|
75
|
-
f"<-[:{RelType.COLUMN_LINEAGE}*1..{depth}]-(src:{NodeLabel.COLUMN}) "
|
|
76
|
-
f"{kf}"
|
|
77
|
-
f"OPTIONAL MATCH (src)-[srcedge:{RelType.COLUMN_LINEAGE}]->() "
|
|
78
|
-
"OPTIONAL MATCH (q:SqlQuery {id: srcedge.query_id}) "
|
|
79
|
-
"WITH src, min(q.start_line) AS line, min(q.file_path) AS file "
|
|
80
|
-
"RETURN src.id AS id, file AS file, line AS line LIMIT 100",
|
|
81
|
-
{"ref": ref},
|
|
82
|
-
)
|
|
142
|
+
sql = _upstream_sql(depth, include_intermediate)
|
|
143
|
+
results = run_read_routed(sql, {"ref": ref})
|
|
83
144
|
if not results and len(ref.split(".")) >= 3:
|
|
84
145
|
bare = _bare_ref(ref)
|
|
85
|
-
fallback_results = run_read_routed(
|
|
86
|
-
f"MATCH (c:{NodeLabel.COLUMN} {{id: $bare}})"
|
|
87
|
-
f"<-[:{RelType.COLUMN_LINEAGE}*1..{depth}]-(src:{NodeLabel.COLUMN}) "
|
|
88
|
-
f"{kf}"
|
|
89
|
-
f"OPTIONAL MATCH (src)-[srcedge:{RelType.COLUMN_LINEAGE}]->() "
|
|
90
|
-
"OPTIONAL MATCH (q:SqlQuery {id: srcedge.query_id}) "
|
|
91
|
-
"WITH src, min(q.start_line) AS line, min(q.file_path) AS file "
|
|
92
|
-
"RETURN src.id AS id, file AS file, line AS line LIMIT 100",
|
|
93
|
-
{"bare": bare},
|
|
94
|
-
)
|
|
146
|
+
fallback_results = run_read_routed(sql, {"bare": bare})
|
|
95
147
|
if fallback_results:
|
|
96
148
|
console.print(
|
|
97
149
|
f"[yellow]Hint:[/yellow] No results for '{ref}'. "
|
|
@@ -104,7 +156,7 @@ def upstream( # noqa: B008
|
|
|
104
156
|
if not raw:
|
|
105
157
|
from sqlcg.server.noise_filter import NoiseFilter
|
|
106
158
|
|
|
107
|
-
nf = NoiseFilter.from_config()
|
|
159
|
+
nf = NoiseFilter.from_config()
|
|
108
160
|
results = _filter_column_results(results, nf)
|
|
109
161
|
_print_table(_add_file_line_col(results), ["id", "file:line"])
|
|
110
162
|
|
|
@@ -119,36 +171,15 @@ def downstream( # noqa: B008
|
|
|
119
171
|
),
|
|
120
172
|
) -> None:
|
|
121
173
|
"""Trace downstream column lineage."""
|
|
122
|
-
# Bounds check for depth to prevent performance DoS
|
|
123
174
|
if depth < 1 or depth > 100:
|
|
124
175
|
console.print("[red]Error: --depth must be between 1 and 100[/red]")
|
|
125
176
|
raise typer.Exit(1)
|
|
126
177
|
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
results = run_read_routed(
|
|
131
|
-
f"MATCH (c:{NodeLabel.COLUMN} {{id: $ref}})"
|
|
132
|
-
f"-[:{RelType.COLUMN_LINEAGE}*1..{depth}]->(dst:{NodeLabel.COLUMN}) "
|
|
133
|
-
f"{kf}"
|
|
134
|
-
f"OPTIONAL MATCH ()-[dstedge:{RelType.COLUMN_LINEAGE}]->(dst) "
|
|
135
|
-
"OPTIONAL MATCH (q:SqlQuery {id: dstedge.query_id}) "
|
|
136
|
-
"WITH dst, min(q.start_line) AS line, min(q.file_path) AS file "
|
|
137
|
-
"RETURN dst.id AS id, file AS file, line AS line LIMIT 100",
|
|
138
|
-
{"ref": ref},
|
|
139
|
-
)
|
|
178
|
+
sql = _downstream_sql(depth, include_intermediate)
|
|
179
|
+
results = run_read_routed(sql, {"ref": ref})
|
|
140
180
|
if not results and len(ref.split(".")) >= 3:
|
|
141
181
|
bare = _bare_ref(ref)
|
|
142
|
-
fallback_results = run_read_routed(
|
|
143
|
-
f"MATCH (c:{NodeLabel.COLUMN} {{id: $bare}})"
|
|
144
|
-
f"-[:{RelType.COLUMN_LINEAGE}*1..{depth}]->(dst:{NodeLabel.COLUMN}) "
|
|
145
|
-
f"{kf}"
|
|
146
|
-
f"OPTIONAL MATCH ()-[dstedge:{RelType.COLUMN_LINEAGE}]->(dst) "
|
|
147
|
-
"OPTIONAL MATCH (q:SqlQuery {id: dstedge.query_id}) "
|
|
148
|
-
"WITH dst, min(q.start_line) AS line, min(q.file_path) AS file "
|
|
149
|
-
"RETURN dst.id AS id, file AS file, line AS line LIMIT 100",
|
|
150
|
-
{"bare": bare},
|
|
151
|
-
)
|
|
182
|
+
fallback_results = run_read_routed(sql, {"bare": bare})
|
|
152
183
|
if fallback_results:
|
|
153
184
|
console.print(
|
|
154
185
|
f"[yellow]Hint:[/yellow] No results for '{ref}'. "
|
|
@@ -161,18 +192,16 @@ def downstream( # noqa: B008
|
|
|
161
192
|
if not raw:
|
|
162
193
|
from sqlcg.server.noise_filter import NoiseFilter
|
|
163
194
|
|
|
164
|
-
nf = NoiseFilter.from_config()
|
|
195
|
+
nf = NoiseFilter.from_config()
|
|
165
196
|
results = _filter_column_results(results, nf)
|
|
166
197
|
_print_table(_add_file_line_col(results), ["id", "file:line"])
|
|
167
198
|
|
|
168
199
|
# Append external consumer rows for terminal tables (scalar query, one per terminal).
|
|
169
|
-
# Resolve terminal tables from the column results; fall back to the root column's table.
|
|
170
200
|
terminal_tables: set[str] = set()
|
|
171
201
|
for r in results:
|
|
172
202
|
tbl = _col_id_to_table(r["id"])
|
|
173
203
|
if tbl:
|
|
174
204
|
terminal_tables.add(tbl)
|
|
175
|
-
# Also check the root column's table (in case no downstream columns were found).
|
|
176
205
|
root_parts = ref.rsplit(".", 1)
|
|
177
206
|
if len(root_parts) == 2:
|
|
178
207
|
terminal_tables.add(root_parts[0])
|
|
@@ -197,9 +226,11 @@ def impact( # noqa: B008
|
|
|
197
226
|
) -> None:
|
|
198
227
|
"""Show all queries impacted by a table."""
|
|
199
228
|
results = run_read_routed(
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
229
|
+
"SELECT DISTINCT q.id AS id, q.kind AS kind, q.target_table AS target"
|
|
230
|
+
' FROM "SqlTable" t'
|
|
231
|
+
' JOIN "SELECTS_FROM" sf ON sf.dst_key = t.qualified'
|
|
232
|
+
' JOIN "SqlQuery" q ON q.id = sf.src_key'
|
|
233
|
+
" WHERE t.qualified = ? LIMIT 100",
|
|
203
234
|
{"t": table},
|
|
204
235
|
)
|
|
205
236
|
if not raw:
|
|
@@ -222,22 +253,20 @@ def failures(
|
|
|
222
253
|
),
|
|
223
254
|
limit: int = typer.Option(100, "--limit", help="Maximum rows to return"), # noqa: B008
|
|
224
255
|
) -> None:
|
|
225
|
-
"""List files that failed to parse, with their dominant cause (E-code bucket).
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
)
|
|
240
|
-
rows = run_read_routed(cypher, {"cause": cause})
|
|
256
|
+
"""List files that failed to parse, with their dominant cause (E-code bucket)."""
|
|
257
|
+
if cause is not None:
|
|
258
|
+
rows = run_read_routed(
|
|
259
|
+
'SELECT path, parse_cause AS cause FROM "File"'
|
|
260
|
+
" WHERE parse_failed = true AND parse_cause = ?"
|
|
261
|
+
f" ORDER BY parse_cause LIMIT {limit}",
|
|
262
|
+
{"cause": cause},
|
|
263
|
+
)
|
|
264
|
+
else:
|
|
265
|
+
rows = run_read_routed(
|
|
266
|
+
'SELECT path, parse_cause AS cause FROM "File"'
|
|
267
|
+
f" WHERE parse_failed = true ORDER BY parse_cause LIMIT {limit}",
|
|
268
|
+
{},
|
|
269
|
+
)
|
|
241
270
|
_print_table(rows, ["path", "cause"])
|
|
242
271
|
|
|
243
272
|
|
|
@@ -248,8 +277,9 @@ def unused(
|
|
|
248
277
|
) -> None:
|
|
249
278
|
"""Find tables with no query references."""
|
|
250
279
|
results = run_read_routed(
|
|
251
|
-
|
|
252
|
-
|
|
280
|
+
'SELECT DISTINCT qualified FROM "SqlTable"'
|
|
281
|
+
' WHERE qualified NOT IN (SELECT DISTINCT dst_key FROM "SELECTS_FROM")'
|
|
282
|
+
" LIMIT 100",
|
|
253
283
|
{},
|
|
254
284
|
)
|
|
255
285
|
if not raw:
|
|
@@ -261,30 +291,15 @@ def unused(
|
|
|
261
291
|
|
|
262
292
|
|
|
263
293
|
def _bare_ref(ref: str) -> str:
|
|
264
|
-
"""Strip schema prefix from a ref string, keeping table.column.
|
|
265
|
-
|
|
266
|
-
For a 3-part ref ("mart.fact_t.amount") this returns "fact_t.amount".
|
|
267
|
-
For a 2-part ref ("fact_t.amount") this returns the ref unchanged.
|
|
268
|
-
Never uses rsplit — that would yield only the column name for 3-part refs.
|
|
269
|
-
"""
|
|
294
|
+
"""Strip schema prefix from a ref string, keeping table.column."""
|
|
270
295
|
parts = ref.split(".")
|
|
271
296
|
if len(parts) >= 3:
|
|
272
|
-
return ".".join(parts[1:])
|
|
273
|
-
return ref
|
|
297
|
+
return ".".join(parts[1:])
|
|
298
|
+
return ref
|
|
274
299
|
|
|
275
300
|
|
|
276
301
|
def _col_id_to_table(col_id: str) -> str:
|
|
277
|
-
"""Extract the table-qualified part from a column ID
|
|
278
|
-
|
|
279
|
-
Column IDs follow the format: schema.table.column or table.column.
|
|
280
|
-
The table part is everything except the last component.
|
|
281
|
-
|
|
282
|
-
Args:
|
|
283
|
-
col_id: A column ID string from the graph.
|
|
284
|
-
|
|
285
|
-
Returns:
|
|
286
|
-
The table-qualified portion (all but the last dotted component).
|
|
287
|
-
"""
|
|
302
|
+
"""Extract the table-qualified part from a column ID."""
|
|
288
303
|
parts = col_id.rsplit(".", 1)
|
|
289
304
|
return parts[0] if len(parts) == 2 else col_id
|
|
290
305
|
|
|
@@ -293,16 +308,12 @@ def _filter_column_results(
|
|
|
293
308
|
results: list[dict],
|
|
294
309
|
nf: NoiseFilter, # type: ignore[name-defined]
|
|
295
310
|
) -> list[dict]:
|
|
296
|
-
"""Filter column-ID result rows by NoiseFilter
|
|
311
|
+
"""Filter column-ID result rows by NoiseFilter."""
|
|
297
312
|
return [r for r in results if not nf.is_noise(_col_id_to_table(r["id"]))]
|
|
298
313
|
|
|
299
314
|
|
|
300
315
|
def _add_file_line_col(rows: list[dict]) -> list[dict]:
|
|
301
|
-
"""Add a 'file:line' composite column from 'file' and 'line' fields.
|
|
302
|
-
|
|
303
|
-
Formats as 'path/to/file.sql:N' when both are present, or '?' when either
|
|
304
|
-
is absent (multi-hop upstream where file/line is not available).
|
|
305
|
-
"""
|
|
316
|
+
"""Add a 'file:line' composite column from 'file' and 'line' fields."""
|
|
306
317
|
result = []
|
|
307
318
|
for row in rows:
|
|
308
319
|
new_row = dict(row)
|