sql-code-graph 1.0.2__py3-none-any.whl → 1.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sql_code_graph-1.0.2.dist-info → sql_code_graph-1.1.3.dist-info}/METADATA +13 -2
- {sql_code_graph-1.0.2.dist-info → sql_code_graph-1.1.3.dist-info}/RECORD +31 -29
- sqlcg/__init__.py +1 -1
- sqlcg/cli/commands/analyze.py +140 -22
- sqlcg/cli/commands/db.py +19 -2
- sqlcg/cli/commands/find.py +10 -3
- sqlcg/cli/commands/gain.py +1 -1
- sqlcg/cli/commands/git.py +61 -11
- sqlcg/cli/commands/index.py +30 -2
- sqlcg/cli/commands/mcp.py +103 -0
- sqlcg/cli/commands/reindex.py +122 -12
- sqlcg/core/config.py +113 -5
- sqlcg/core/freshness.py +134 -0
- sqlcg/core/graph_db.py +2 -0
- sqlcg/core/kuzu_backend.py +4 -1
- sqlcg/core/queries.cypher +18 -6
- sqlcg/core/queries.py +3 -1
- sqlcg/core/schema.cypher +13 -1
- sqlcg/core/schema.py +5 -1
- sqlcg/indexer/indexer.py +394 -160
- sqlcg/indexer/walker.py +3 -0
- sqlcg/parsers/ansi_parser.py +56 -0
- sqlcg/parsers/base.py +13 -4
- sqlcg/parsers/snowflake_parser.py +46 -6
- sqlcg/server/control.py +144 -0
- sqlcg/server/models.py +68 -0
- sqlcg/server/server.py +283 -1
- sqlcg/server/skill.py +20 -4
- sqlcg/server/tools.py +203 -13
- {sql_code_graph-1.0.2.dist-info → sql_code_graph-1.1.3.dist-info}/WHEEL +0 -0
- {sql_code_graph-1.0.2.dist-info → sql_code_graph-1.1.3.dist-info}/entry_points.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sql-code-graph
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.1.3
|
|
4
4
|
Summary: SQL code graph analyzer and lineage tracer
|
|
5
5
|
Project-URL: Homepage, https://github.com/Warhorze/sql-code-graph
|
|
6
6
|
Project-URL: Repository, https://github.com/Warhorze/sql-code-graph
|
|
@@ -219,7 +219,7 @@ After indexing, `sqlcg db info` shows non-zero `STAR_EXPANSION lineage edges`, a
|
|
|
219
219
|
| **Search & meta** | |
|
|
220
220
|
| `search_sql_pattern(query)` | Full-text search across indexed SQL |
|
|
221
221
|
| `list_dialects_and_repos()` | List indexed repos and dialects (catalogue) |
|
|
222
|
-
| `db_info()` | Graph health, node counts, parse quality breakdown, warnings |
|
|
222
|
+
| `db_info()` | Graph health, node counts, parse quality breakdown, warnings, freshness (indexed SHA vs HEAD) |
|
|
223
223
|
| `execute_cypher(query)` | Raw Cypher query against the graph |
|
|
224
224
|
| `submit_feedback(...)` | Report a false positive/negative to improve metrics |
|
|
225
225
|
|
|
@@ -228,6 +228,12 @@ After indexing, `sqlcg db info` shows non-zero `STAR_EXPANSION lineage edges`, a
|
|
|
228
228
|
> `table.column`. Each returned node carries both `name` (the bare column) and
|
|
229
229
|
> `table` (the owning `schema.table`), so results are navigable without a second lookup.
|
|
230
230
|
|
|
231
|
+
> **Provenance fields**: lineage edges now carry `file`, `line`, and `expression`
|
|
232
|
+
> (where the lineage was derived from), a `confidence` of `1.0` for plainly-parsed
|
|
233
|
+
> facts (lower for inferred edges, with a `reason`), and a `table_kind`
|
|
234
|
+
> (`table` / `cte` / `derived` / `external`) so CTE and derived aliases are
|
|
235
|
+
> distinguishable from real tables.
|
|
236
|
+
|
|
231
237
|
> **LLM agent tip**: call `db_info()` before lineage queries to check that
|
|
232
238
|
> `SqlColumn > 0` and `warnings` is empty. If `parse_quality["scripting_block"]`
|
|
233
239
|
> is high, column lineage will be limited for those files — use table-level tools
|
|
@@ -243,16 +249,21 @@ sqlcg db init # initialise graph database
|
|
|
243
249
|
sqlcg index <path> --dialect snowflake # index SQL files (snowflake is the tested dialect)
|
|
244
250
|
sqlcg index <path> --dialect auto # read dialect from .sqlcg.toml
|
|
245
251
|
sqlcg index <path> --profile # index + print per-stage timing and slowest files
|
|
252
|
+
sqlcg index <path> --include-working-tree # also index uncommitted changes (marks graph dirty)
|
|
246
253
|
sqlcg reindex <path> --from <sha> --to <sha> # incremental resync of only changed files
|
|
247
254
|
sqlcg analyze unused # tables with no query references
|
|
248
255
|
sqlcg analyze upstream/downstream # trace lineage from the CLI
|
|
249
256
|
sqlcg find table/column/pattern # search the graph
|
|
250
257
|
sqlcg watch <path> # watch for file changes
|
|
258
|
+
sqlcg db info # graph stats + freshness (indexed SHA vs HEAD)
|
|
251
259
|
sqlcg git install-hooks # install post-checkout + post-merge resync hooks
|
|
252
260
|
sqlcg gain # show usage metrics
|
|
253
261
|
sqlcg report # generate FP/error report
|
|
254
262
|
sqlcg mcp best-practices # print the fact/heuristic boundary for the MCP tools
|
|
255
263
|
sqlcg mcp start # start MCP server manually
|
|
264
|
+
sqlcg mcp status # server status JSON (via control socket)
|
|
265
|
+
sqlcg mcp stop # stop the running MCP server gracefully
|
|
266
|
+
sqlcg mcp restart # stop the server (client must respawn it)
|
|
256
267
|
sqlcg version # show installed version
|
|
257
268
|
```
|
|
258
269
|
|
|
@@ -1,37 +1,38 @@
|
|
|
1
|
-
sqlcg/__init__.py,sha256=
|
|
1
|
+
sqlcg/__init__.py,sha256=YGDRrWVIrONmQholAKWh6hSKxlPd2dLcM1AdHHdBhEA,115
|
|
2
2
|
sqlcg/__main__.py,sha256=1YoFLcqEgTwYq1J3TbUwpkdG0zeeLIf2fJvwWI-CLFU,109
|
|
3
3
|
sqlcg/cli/__init__.py,sha256=W8fD0LpMq2xm_5WKGNMvJh2WBL1ho5E8hUeAqXQYT1g,28
|
|
4
4
|
sqlcg/cli/main.py,sha256=WmdTjsOlz1ozi2Y3Aq4ezR_FCRl-Lc1YOKw3_d48dlY,1650
|
|
5
5
|
sqlcg/cli/commands/__init__.py,sha256=oSHtr6VD-jNubOjuCQyZj2tBppjMEpQDh-IGQ8of9eA,30
|
|
6
|
-
sqlcg/cli/commands/analyze.py,sha256=
|
|
7
|
-
sqlcg/cli/commands/db.py,sha256=
|
|
8
|
-
sqlcg/cli/commands/find.py,sha256=
|
|
9
|
-
sqlcg/cli/commands/gain.py,sha256=
|
|
10
|
-
sqlcg/cli/commands/git.py,sha256=
|
|
11
|
-
sqlcg/cli/commands/index.py,sha256=
|
|
6
|
+
sqlcg/cli/commands/analyze.py,sha256=hiKj0R1m5i4ZmwrXBlVT14xGy6rs9jmv_ZDCLVZj4Tg,14282
|
|
7
|
+
sqlcg/cli/commands/db.py,sha256=5VpknLqYaimK6YA516w6iQVX6JmHcD52o6MuW5d088c,7462
|
|
8
|
+
sqlcg/cli/commands/find.py,sha256=SsK6q4YRPknrz_lIQ4Gun6HRoAdoVRGClwAYdm_s2OU,3168
|
|
9
|
+
sqlcg/cli/commands/gain.py,sha256=hz36QmuaXJXutI4vyNMDfcNsBeLTXa6EOw2bWe2AhTQ,8939
|
|
10
|
+
sqlcg/cli/commands/git.py,sha256=yMgWOuoTCTBr2P1QgmghRi5ikmUYHuxDUVyBDYerErw,5728
|
|
11
|
+
sqlcg/cli/commands/index.py,sha256=xMnxKDiUt5LH_3lKAotoRctL4VSOvcw7Gq--idLPtm0,11091
|
|
12
12
|
sqlcg/cli/commands/install.py,sha256=KNABvrLbamPyYnmnVdCaM_MNezbDc-pr6IkignCWI8k,9186
|
|
13
|
-
sqlcg/cli/commands/mcp.py,sha256=
|
|
14
|
-
sqlcg/cli/commands/reindex.py,sha256=
|
|
13
|
+
sqlcg/cli/commands/mcp.py,sha256=2gDsNvtj1Ql7PkjX9dHWAzOK0uCPRR5DGdBAzJa8PIU,6005
|
|
14
|
+
sqlcg/cli/commands/reindex.py,sha256=n1mQTYAZshtCKPgpR12S6ZMCqO3cSUtpCXjzb1PuZxU,11857
|
|
15
15
|
sqlcg/cli/commands/report.py,sha256=JU0qjyMxwOukE7bN3XvvIzOI7zMg_Gsnvk_8F6pKNpA,4915
|
|
16
16
|
sqlcg/cli/commands/uninstall.py,sha256=IYwQaqnMmmzW0Nlls40wD-L3tVkMgKIMRXUkcXPMUc4,9398
|
|
17
17
|
sqlcg/cli/commands/watch.py,sha256=7N6c-QuvxAEGHzDZ0C3CU2BkHSraZW9YtgoFnz7SaQo,2373
|
|
18
18
|
sqlcg/core/__init__.py,sha256=uNsJCrCMVWVT80sHPtI_f39BYqIf5N0i6LSq8x8HsyI,283
|
|
19
|
-
sqlcg/core/config.py,sha256=
|
|
20
|
-
sqlcg/core/
|
|
19
|
+
sqlcg/core/config.py,sha256=LuB8HWPsIt1OsjOshTT1bJdXWXN01w76ABl9M-VB9DM,14777
|
|
20
|
+
sqlcg/core/freshness.py,sha256=gRb8pRPw5SdIUxAYkMXIJ00DTdQ6CegRZPAvWnv0rU0,4575
|
|
21
|
+
sqlcg/core/graph_db.py,sha256=Aa85wPFg26H-Ud9SrZyxCHH-99iitAI5S3X9T_62Yyw,7957
|
|
21
22
|
sqlcg/core/jobs.py,sha256=Je-fCdSKRgiSsv1W8SgNAlp36a7t7-pJZ-qKPbka9OE,3298
|
|
22
|
-
sqlcg/core/kuzu_backend.py,sha256=
|
|
23
|
+
sqlcg/core/kuzu_backend.py,sha256=3kL8bGEQm70fuxYUdt1p7fsY12lCLQ07x01NYg6FOGA,16821
|
|
23
24
|
sqlcg/core/neo4j_backend.py,sha256=AM1TncP9GBGph-rSHwalZPmGUV2kFILzaJP-PSB0UYw,8437
|
|
24
|
-
sqlcg/core/queries.cypher,sha256=
|
|
25
|
-
sqlcg/core/queries.py,sha256=
|
|
26
|
-
sqlcg/core/schema.cypher,sha256=
|
|
27
|
-
sqlcg/core/schema.py,sha256=
|
|
25
|
+
sqlcg/core/queries.cypher,sha256=cvPOVe5GUOzJN4bxUvDxNI--xIIP8gm42TR-gUnea4U,4685
|
|
26
|
+
sqlcg/core/queries.py,sha256=gkl4bhkZM8FsvbSA-IaK17sRFcO3hB5YlVCemkCXgWM,2064
|
|
27
|
+
sqlcg/core/schema.cypher,sha256=rK5QMhSrzZhuj73NeNXGX6oM-rPPPvxFjex0fEyUvkQ,2859
|
|
28
|
+
sqlcg/core/schema.py,sha256=JO5rkspYKjL9AEl5mt0VIJKn-IPOH3kJV_fVmAMuFCI,1467
|
|
28
29
|
sqlcg/indexer/__init__.py,sha256=Wh20Unz2OHs1oIyWLrpurPAasF0BET2g4iXtNk7mh2U,56
|
|
29
30
|
sqlcg/indexer/dbt_adapter.py,sha256=EB5x1WU5Z9d-I97ADDj88S_hG1C4z4nbrv8JUCzXfy8,686
|
|
30
31
|
sqlcg/indexer/error_classify.py,sha256=MYjPVprwT-ARPjBCyCzu2F9DSrZfnTVtVIoBgm8s4H8,5329
|
|
31
32
|
sqlcg/indexer/git_delta.py,sha256=P-QM4vnVURT2KLiE6u3cQynRUF-mTH13cbB4I20YHPQ,4468
|
|
32
|
-
sqlcg/indexer/indexer.py,sha256=
|
|
33
|
+
sqlcg/indexer/indexer.py,sha256=KyyowxiSNU3Gm4JE-mj8gVm6D80XERJPd-he59I2sIk,62018
|
|
33
34
|
sqlcg/indexer/pool.py,sha256=BTYx-pBe6zwUG89MHh0X7nzGNVlsHN-GjovYKanVI1s,18553
|
|
34
|
-
sqlcg/indexer/walker.py,sha256=
|
|
35
|
+
sqlcg/indexer/walker.py,sha256=umNaqDbuerr75VYG1TEOv0ATsbI40O3SIw35f7XJcDE,1931
|
|
35
36
|
sqlcg/indexer/watcher.py,sha256=mJQq1LASRLKKwhz0WhCUWPLLqyPR2_-FD_8efYU6gE8,8442
|
|
36
37
|
sqlcg/lineage/__init__.py,sha256=Da1DlYwtK13WHv_RnHjAtNkHTOuFbhxqCjT1Le7DsWM,46
|
|
37
38
|
sqlcg/lineage/aggregator.py,sha256=G1xsTjf981EVSgN1yIHcC_ecDvcTcSPvEp6Kb2HPXkY,4943
|
|
@@ -39,25 +40,26 @@ sqlcg/lineage/schema_resolver.py,sha256=iXt6LYF6UVWsGUpcfbmjmGn9wCgXl721lTGf_8Aa
|
|
|
39
40
|
sqlcg/metrics/__init__.py,sha256=hLJ6wm4St8qqYwKh3o9QG7lcEt1BEYM31ccqO9tGpIg,133
|
|
40
41
|
sqlcg/metrics/store.py,sha256=BaMf7QYTmYMlX_Jzi1GNU8R2sMVkWdn07f-ZSndtcNk,8879
|
|
41
42
|
sqlcg/parsers/__init__.py,sha256=AamA8wBbDZV9_zEtZCI4Hyen5UAVKHmBwjTghTt2PZE,785
|
|
42
|
-
sqlcg/parsers/ansi_parser.py,sha256=
|
|
43
|
-
sqlcg/parsers/base.py,sha256=
|
|
43
|
+
sqlcg/parsers/ansi_parser.py,sha256=tu1MWWaSYmpefKjgk2PPyGStIFjV47Z_1WjyBh5Zi2c,17180
|
|
44
|
+
sqlcg/parsers/base.py,sha256=IiOkVsm6jz9-48RqDCXiW-UXAraNxQ4pKXvSA7aolnA,49907
|
|
44
45
|
sqlcg/parsers/bigquery_parser.py,sha256=mOnWTfXB_Dp4JwFE1PVYOB6CDPf5nYE0Dea8kJCl9uQ,2827
|
|
45
46
|
sqlcg/parsers/postgres_parser.py,sha256=lYfUpQY6j4Qm7ndXBtXbgPoGzYqYddWt5YeFnWKdA6I,946
|
|
46
47
|
sqlcg/parsers/registry.py,sha256=LXy1F6rqQI6VdxpRvZg_tNpoEucW3mXZHYBMlMONbX4,1496
|
|
47
|
-
sqlcg/parsers/snowflake_parser.py,sha256=
|
|
48
|
+
sqlcg/parsers/snowflake_parser.py,sha256=fovMyqfhWD2wmtEyiwTC0aoP4QWP-3XQZ8WYkXvs9hg,15511
|
|
48
49
|
sqlcg/parsers/tsql_parser.py,sha256=RRj1pACtAk2tLTDaFWRYF67a0IDvaf5A1YQXWIz0bpQ,956
|
|
49
50
|
sqlcg/server/__init__.py,sha256=n4wuNE7xyJIJxJZBtmtdccCMQfvTdF-IqIaZVbC4FC4,35
|
|
51
|
+
sqlcg/server/control.py,sha256=v-r21npODiHlHnJHuo_6KWrKclQKq_E1QyrzIWjqgtY,4508
|
|
50
52
|
sqlcg/server/exceptions.py,sha256=EONw34icOByCTpppSQrvQBW6asc4hfqaGDCAFjv96II,469
|
|
51
|
-
sqlcg/server/models.py,sha256=
|
|
53
|
+
sqlcg/server/models.py,sha256=l7ORy6sbtzBW1y3qVaeLwEukbyAgBkz9S5VIm2q4b24,19378
|
|
52
54
|
sqlcg/server/noise_filter.py,sha256=idSBGgdKWWccJdpOo9qgbM2350Oew-2l5W6Yc9GYQqY,6337
|
|
53
|
-
sqlcg/server/server.py,sha256=
|
|
54
|
-
sqlcg/server/skill.py,sha256=
|
|
55
|
-
sqlcg/server/tools.py,sha256=
|
|
55
|
+
sqlcg/server/server.py,sha256=gzeO5WbSNfGxgIKte01uy0VjO1_basI2ChSuAwr0dBc,14844
|
|
56
|
+
sqlcg/server/skill.py,sha256=GE8eeimk6yiGGJ74erGypqYAviur5peSR6_2a4QQWVM,12828
|
|
57
|
+
sqlcg/server/tools.py,sha256=JvijDC0h5uHjZyZUIZq9sztNG3W5sr-Yy5rHwOVuJec,66642
|
|
56
58
|
sqlcg/utils/__init__.py,sha256=--iqt5ThTXmT8Wz7da8hs3n0zDfYPl8P-z5OgRJ_77E,154
|
|
57
59
|
sqlcg/utils/hashing.py,sha256=H25-sYfxHKb3_IERFnHyAIYNiXN470Oqo5sJT_D3YOA,438
|
|
58
60
|
sqlcg/utils/ignore.py,sha256=wJjwa0mjnQ_xJExOUxk25y00g065XmmzJapqV3ifD5o,1151
|
|
59
61
|
sqlcg/utils/logging.py,sha256=u0fCmYsLj9o81vawm3xZTHaw68GQYVm7JxG-gP81u8A,840
|
|
60
|
-
sql_code_graph-1.
|
|
61
|
-
sql_code_graph-1.
|
|
62
|
-
sql_code_graph-1.
|
|
63
|
-
sql_code_graph-1.
|
|
62
|
+
sql_code_graph-1.1.3.dist-info/METADATA,sha256=Z_aRnsDOgZ_ngAHkIr3x2XpEjF-x6UMUQwcIkAMlGjo,13615
|
|
63
|
+
sql_code_graph-1.1.3.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
64
|
+
sql_code_graph-1.1.3.dist-info/entry_points.txt,sha256=Wfe49sVzV9p4eVFGo5RxcV-frr3HOP0yzzst8JBxQLQ,46
|
|
65
|
+
sql_code_graph-1.1.3.dist-info/RECORD,,
|
sqlcg/__init__.py
CHANGED
sqlcg/cli/commands/analyze.py
CHANGED
|
@@ -9,6 +9,7 @@ from rich.console import Console
|
|
|
9
9
|
from rich.table import Table
|
|
10
10
|
|
|
11
11
|
from sqlcg.core.config import get_backend
|
|
12
|
+
from sqlcg.core.queries import GET_TABLE_EXTERNAL_CONSUMERS_QUERY
|
|
12
13
|
from sqlcg.core.schema import NodeLabel, RelType
|
|
13
14
|
|
|
14
15
|
if TYPE_CHECKING:
|
|
@@ -18,11 +19,37 @@ app = typer.Typer(help="Lineage analysis")
|
|
|
18
19
|
console = Console()
|
|
19
20
|
|
|
20
21
|
|
|
22
|
+
def _kind_filter(source_alias: str, include_intermediate: bool) -> str:
|
|
23
|
+
"""Build the Half-B (#38) kind filter for the lineage traversal query.
|
|
24
|
+
|
|
25
|
+
When ``include_intermediate`` is False, the filter uses ``OPTIONAL MATCH`` plus
|
|
26
|
+
``t.kind IS NULL OR t.kind IN ['table', 'external']`` so a source whose SqlTable
|
|
27
|
+
node is ABSENT (a CTE-body source on a graph indexed before the #39 fix, or not yet
|
|
28
|
+
re-indexed) is KEPT rather than silently dropped. Reverting this to an inner
|
|
29
|
+
``MATCH (t:SqlTable {...}) ... WHERE t.kind IN [...]`` is the #38 regression:
|
|
30
|
+
node-less physical sources vanish from results.
|
|
31
|
+
|
|
32
|
+
``source_alias`` is ``src`` for upstream and ``dst`` for downstream — it names both
|
|
33
|
+
the node whose table is looked up and the variable carried through the WITH clause.
|
|
34
|
+
This is the single production source of the filter string; the #40 recall guard
|
|
35
|
+
imports it so reverting Half B here turns the guard red.
|
|
36
|
+
"""
|
|
37
|
+
if include_intermediate:
|
|
38
|
+
return ""
|
|
39
|
+
return (
|
|
40
|
+
f"OPTIONAL MATCH (t:SqlTable {{qualified: {source_alias}.table_qualified}}) "
|
|
41
|
+
f"WITH c, {source_alias}, t WHERE t.kind IS NULL OR t.kind IN ['table', 'external'] "
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
|
|
21
45
|
@app.command("upstream")
|
|
22
46
|
def upstream( # noqa: B008
|
|
23
47
|
ref: str = typer.Argument(..., help="Column reference"), # noqa: B008
|
|
24
48
|
depth: int = typer.Option(5, "--depth", help="Maximum traversal depth"), # noqa: B008
|
|
25
49
|
raw: bool = typer.Option(False, "--raw", help="Disable noise filtering on results"), # noqa: B008
|
|
50
|
+
include_intermediate: bool = typer.Option( # noqa: B008
|
|
51
|
+
False, "--include-intermediate", help="Include CTE/derived intermediate nodes"
|
|
52
|
+
),
|
|
26
53
|
) -> None:
|
|
27
54
|
"""Trace upstream column lineage."""
|
|
28
55
|
# Bounds check for depth to prevent performance DoS
|
|
@@ -30,19 +57,32 @@ def upstream( # noqa: B008
|
|
|
30
57
|
console.print("[red]Error: --depth must be between 1 and 100[/red]")
|
|
31
58
|
raise typer.Exit(1)
|
|
32
59
|
|
|
33
|
-
|
|
60
|
+
# By default, filter out CTE/derived intermediate nodes; --include-intermediate restores them.
|
|
61
|
+
# Half B (#38): use OPTIONAL MATCH so a missing SqlTable node (e.g. CTE-body source not yet
|
|
62
|
+
# re-indexed after #39 fix) is KEPT rather than silently dropped. WHERE t.kind IS NULL OR
|
|
63
|
+
# t.kind IN [...] means: keep when node absent (NULL) OR when kind is a physical source.
|
|
64
|
+
# CTE aliases (kind='cte') and derived tables (kind='derived') are filtered out.
|
|
65
|
+
kind_filter = _kind_filter("src", include_intermediate)
|
|
66
|
+
|
|
67
|
+
with get_backend(read_only=True) as backend:
|
|
34
68
|
results = backend.run_read(
|
|
35
|
-
f"MATCH
|
|
36
|
-
f"<-[:{RelType.COLUMN_LINEAGE}*1..{depth}]-(src) "
|
|
37
|
-
"
|
|
69
|
+
f"MATCH (c:{NodeLabel.COLUMN} {{id: $ref}})"
|
|
70
|
+
f"<-[:{RelType.COLUMN_LINEAGE}*1..{depth}]-(src:{NodeLabel.COLUMN}) "
|
|
71
|
+
f"{kind_filter}"
|
|
72
|
+
f"OPTIONAL MATCH (src)-[direct:{RelType.COLUMN_LINEAGE}]->(c) "
|
|
73
|
+
"OPTIONAL MATCH (q:SqlQuery {id: direct.query_id}) "
|
|
74
|
+
"RETURN src.id AS id, q.file_path AS file, q.start_line AS line LIMIT 100",
|
|
38
75
|
{"ref": ref},
|
|
39
76
|
)
|
|
40
77
|
if not results and len(ref.split(".")) >= 3:
|
|
41
78
|
bare = _bare_ref(ref)
|
|
42
79
|
fallback_results = backend.run_read(
|
|
43
|
-
f"MATCH
|
|
44
|
-
f"<-[:{RelType.COLUMN_LINEAGE}*1..{depth}]-(src) "
|
|
45
|
-
"
|
|
80
|
+
f"MATCH (c:{NodeLabel.COLUMN} {{id: $bare}})"
|
|
81
|
+
f"<-[:{RelType.COLUMN_LINEAGE}*1..{depth}]-(src:{NodeLabel.COLUMN}) "
|
|
82
|
+
f"{kind_filter}"
|
|
83
|
+
f"OPTIONAL MATCH (src)-[direct:{RelType.COLUMN_LINEAGE}]->(c) "
|
|
84
|
+
"OPTIONAL MATCH (q:SqlQuery {id: direct.query_id}) "
|
|
85
|
+
"RETURN src.id AS id, q.file_path AS file, q.start_line AS line LIMIT 100",
|
|
46
86
|
{"bare": bare},
|
|
47
87
|
)
|
|
48
88
|
if fallback_results:
|
|
@@ -59,7 +99,7 @@ def upstream( # noqa: B008
|
|
|
59
99
|
|
|
60
100
|
nf = NoiseFilter.from_config() # repo_root=None → falls back to Path.cwd()
|
|
61
101
|
results = _filter_column_results(results, nf)
|
|
62
|
-
_print_table(results, ["id"])
|
|
102
|
+
_print_table(_add_file_line_col(results), ["id", "file:line"])
|
|
63
103
|
|
|
64
104
|
|
|
65
105
|
@app.command("downstream")
|
|
@@ -67,6 +107,9 @@ def downstream( # noqa: B008
|
|
|
67
107
|
ref: str = typer.Argument(..., help="Column reference"), # noqa: B008
|
|
68
108
|
depth: int = typer.Option(5, "--depth", help="Maximum traversal depth"), # noqa: B008
|
|
69
109
|
raw: bool = typer.Option(False, "--raw", help="Disable noise filtering on results"), # noqa: B008
|
|
110
|
+
include_intermediate: bool = typer.Option( # noqa: B008
|
|
111
|
+
False, "--include-intermediate", help="Include CTE/derived intermediate nodes"
|
|
112
|
+
),
|
|
70
113
|
) -> None:
|
|
71
114
|
"""Trace downstream column lineage."""
|
|
72
115
|
# Bounds check for depth to prevent performance DoS
|
|
@@ -74,19 +117,31 @@ def downstream( # noqa: B008
|
|
|
74
117
|
console.print("[red]Error: --depth must be between 1 and 100[/red]")
|
|
75
118
|
raise typer.Exit(1)
|
|
76
119
|
|
|
77
|
-
|
|
120
|
+
# By default, filter out CTE/derived intermediate nodes; --include-intermediate restores them.
|
|
121
|
+
# Half B (#38): OPTIONAL MATCH keeps sources whose SqlTable node is absent (NULL) or is a
|
|
122
|
+
# physical kind. WITH c, dst, t carries the three variables in scope at this interpolation
|
|
123
|
+
# point; direct and q are bound later in the query.
|
|
124
|
+
kind_filter = _kind_filter("dst", include_intermediate)
|
|
125
|
+
|
|
126
|
+
with get_backend(read_only=True) as backend:
|
|
78
127
|
results = backend.run_read(
|
|
79
|
-
f"MATCH
|
|
80
|
-
f"-[:{RelType.COLUMN_LINEAGE}*1..{depth}]->(dst) "
|
|
81
|
-
"
|
|
128
|
+
f"MATCH (c:{NodeLabel.COLUMN} {{id: $ref}})"
|
|
129
|
+
f"-[:{RelType.COLUMN_LINEAGE}*1..{depth}]->(dst:{NodeLabel.COLUMN}) "
|
|
130
|
+
f"{kind_filter}"
|
|
131
|
+
f"OPTIONAL MATCH (c)-[direct:{RelType.COLUMN_LINEAGE}]->(dst) "
|
|
132
|
+
"OPTIONAL MATCH (q:SqlQuery {id: direct.query_id}) "
|
|
133
|
+
"RETURN dst.id AS id, q.file_path AS file, q.start_line AS line LIMIT 100",
|
|
82
134
|
{"ref": ref},
|
|
83
135
|
)
|
|
84
136
|
if not results and len(ref.split(".")) >= 3:
|
|
85
137
|
bare = _bare_ref(ref)
|
|
86
138
|
fallback_results = backend.run_read(
|
|
87
|
-
f"MATCH
|
|
88
|
-
f"-[:{RelType.COLUMN_LINEAGE}*1..{depth}]->(dst) "
|
|
89
|
-
"
|
|
139
|
+
f"MATCH (c:{NodeLabel.COLUMN} {{id: $bare}})"
|
|
140
|
+
f"-[:{RelType.COLUMN_LINEAGE}*1..{depth}]->(dst:{NodeLabel.COLUMN}) "
|
|
141
|
+
f"{kind_filter}"
|
|
142
|
+
f"OPTIONAL MATCH (c)-[direct:{RelType.COLUMN_LINEAGE}]->(dst) "
|
|
143
|
+
"OPTIONAL MATCH (q:SqlQuery {id: direct.query_id}) "
|
|
144
|
+
"RETURN dst.id AS id, q.file_path AS file, q.start_line AS line LIMIT 100",
|
|
90
145
|
{"bare": bare},
|
|
91
146
|
)
|
|
92
147
|
if fallback_results:
|
|
@@ -103,38 +158,76 @@ def downstream( # noqa: B008
|
|
|
103
158
|
|
|
104
159
|
nf = NoiseFilter.from_config() # repo_root=None → falls back to Path.cwd()
|
|
105
160
|
results = _filter_column_results(results, nf)
|
|
106
|
-
_print_table(results, ["id"])
|
|
161
|
+
_print_table(_add_file_line_col(results), ["id", "file:line"])
|
|
162
|
+
|
|
163
|
+
# Append external consumer rows for terminal tables (scalar query, one per terminal).
|
|
164
|
+
# Resolve terminal tables from the column results; fall back to the root column's table.
|
|
165
|
+
terminal_tables: set[str] = set()
|
|
166
|
+
for r in results:
|
|
167
|
+
tbl = _col_id_to_table(r["id"])
|
|
168
|
+
if tbl:
|
|
169
|
+
terminal_tables.add(tbl)
|
|
170
|
+
# Also check the root column's table (in case no downstream columns were found).
|
|
171
|
+
root_parts = ref.rsplit(".", 1)
|
|
172
|
+
if len(root_parts) == 2:
|
|
173
|
+
terminal_tables.add(root_parts[0])
|
|
174
|
+
consumer_rows: list[dict] = []
|
|
175
|
+
for tbl in sorted(terminal_tables):
|
|
176
|
+
rows_ec = backend.run_read(
|
|
177
|
+
GET_TABLE_EXTERNAL_CONSUMERS_QUERY,
|
|
178
|
+
{"table_qualified": tbl},
|
|
179
|
+
)
|
|
180
|
+
for ec in rows_ec:
|
|
181
|
+
consumer_rows.append(
|
|
182
|
+
{"id": f"[external] {ec['name']} ({ec['consumer_type']})", "file:line": ""}
|
|
183
|
+
)
|
|
184
|
+
if consumer_rows:
|
|
185
|
+
_print_table(consumer_rows, ["id", "file:line"])
|
|
107
186
|
|
|
108
187
|
|
|
109
188
|
@app.command("impact")
|
|
110
189
|
def impact( # noqa: B008
|
|
111
190
|
table: str = typer.Argument(..., help="Table name to analyze"), # noqa: B008
|
|
191
|
+
raw: bool = typer.Option(False, "--raw", help="Disable noise filtering on results"), # noqa: B008
|
|
112
192
|
) -> None:
|
|
113
193
|
"""Show all queries impacted by a table."""
|
|
114
|
-
with get_backend() as backend:
|
|
194
|
+
with get_backend(read_only=True) as backend:
|
|
115
195
|
results = backend.run_read(
|
|
116
196
|
f"MATCH (t:{NodeLabel.TABLE} {{qualified: $t}})"
|
|
117
197
|
f"<-[:{RelType.SELECTS_FROM}]-(q:{NodeLabel.QUERY}) "
|
|
118
|
-
"RETURN q.id AS id, q.kind AS kind LIMIT 100",
|
|
198
|
+
"RETURN DISTINCT q.id AS id, q.kind AS kind, q.target_table AS target LIMIT 100",
|
|
119
199
|
{"t": table},
|
|
120
200
|
)
|
|
201
|
+
if not raw:
|
|
202
|
+
from sqlcg.server.noise_filter import NoiseFilter
|
|
203
|
+
|
|
204
|
+
nf = NoiseFilter.from_config()
|
|
205
|
+
results = [r for r in results if not nf.is_noise(r.get("target", ""))]
|
|
121
206
|
_print_table(results, ["id", "kind"])
|
|
122
207
|
|
|
123
208
|
|
|
124
209
|
@app.command("failures")
|
|
125
210
|
def failures(
|
|
126
211
|
cause: str | None = typer.Option( # noqa: B008
|
|
127
|
-
None,
|
|
212
|
+
None,
|
|
213
|
+
"--cause",
|
|
214
|
+
help=(
|
|
215
|
+
"Filter by E-code bucket. Valid values: "
|
|
216
|
+
"timeout, E8, E3, E2, E5, E1, qualify_failed, func_fallback, pure_ddl_skip"
|
|
217
|
+
),
|
|
128
218
|
),
|
|
129
219
|
limit: int = typer.Option(100, "--limit", help="Maximum rows to return"), # noqa: B008
|
|
130
220
|
) -> None:
|
|
131
221
|
"""List files that failed to parse, with their dominant cause (E-code bucket).
|
|
132
222
|
|
|
223
|
+
Valid --cause buckets (from highest to lowest severity):
|
|
224
|
+
timeout, E8, E3, E2, E5, E1, qualify_failed, func_fallback, pure_ddl_skip.
|
|
225
|
+
|
|
133
226
|
Requires a graph indexed with sqlcg >= v3 (schema version 3). Re-index
|
|
134
227
|
with 'sqlcg db reset && sqlcg index <path>' if the graph was built with
|
|
135
228
|
an earlier version.
|
|
136
229
|
"""
|
|
137
|
-
with get_backend() as backend:
|
|
230
|
+
with get_backend(read_only=True) as backend:
|
|
138
231
|
cypher = (
|
|
139
232
|
f"MATCH (f:{NodeLabel.FILE}) WHERE f.parse_failed = true "
|
|
140
233
|
"AND ($cause IS NULL OR f.parse_cause = $cause) "
|
|
@@ -148,14 +241,20 @@ def failures(
|
|
|
148
241
|
@app.command("unused")
|
|
149
242
|
def unused(
|
|
150
243
|
threshold: int = typer.Option(0, "--threshold", help="Minimum reference count threshold"),
|
|
244
|
+
raw: bool = typer.Option(False, "--raw", help="Disable noise filtering on results"), # noqa: B008
|
|
151
245
|
) -> None:
|
|
152
246
|
"""Find tables with no query references."""
|
|
153
|
-
with get_backend() as backend:
|
|
247
|
+
with get_backend(read_only=True) as backend:
|
|
154
248
|
results = backend.run_read(
|
|
155
249
|
f"MATCH (t:{NodeLabel.TABLE}) WHERE NOT (t)<-[:{RelType.SELECTS_FROM}]-() "
|
|
156
|
-
"RETURN t.qualified AS qualified LIMIT 100",
|
|
250
|
+
"RETURN DISTINCT t.qualified AS qualified LIMIT 100",
|
|
157
251
|
{},
|
|
158
252
|
)
|
|
253
|
+
if not raw:
|
|
254
|
+
from sqlcg.server.noise_filter import NoiseFilter
|
|
255
|
+
|
|
256
|
+
nf = NoiseFilter.from_config()
|
|
257
|
+
results = [r for r in results if not nf.is_noise(r["qualified"])]
|
|
159
258
|
_print_table(results, ["qualified"])
|
|
160
259
|
|
|
161
260
|
|
|
@@ -196,6 +295,25 @@ def _filter_column_results(
|
|
|
196
295
|
return [r for r in results if not nf.is_noise(_col_id_to_table(r["id"]))]
|
|
197
296
|
|
|
198
297
|
|
|
298
|
+
def _add_file_line_col(rows: list[dict]) -> list[dict]:
|
|
299
|
+
"""Add a 'file:line' composite column from 'file' and 'line' fields.
|
|
300
|
+
|
|
301
|
+
Formats as 'path/to/file.sql:N' when both are present, or '?' when either
|
|
302
|
+
is absent (multi-hop upstream where file/line is not available).
|
|
303
|
+
"""
|
|
304
|
+
result = []
|
|
305
|
+
for row in rows:
|
|
306
|
+
new_row = dict(row)
|
|
307
|
+
file = row.get("file")
|
|
308
|
+
line = row.get("line")
|
|
309
|
+
if file and line:
|
|
310
|
+
new_row["file:line"] = f"{file}:{line}"
|
|
311
|
+
else:
|
|
312
|
+
new_row["file:line"] = "?"
|
|
313
|
+
result.append(new_row)
|
|
314
|
+
return result
|
|
315
|
+
|
|
316
|
+
|
|
199
317
|
def _print_table(rows: list[dict], columns: list[str]) -> None:
|
|
200
318
|
"""Print results as a Rich table."""
|
|
201
319
|
if not rows:
|
sqlcg/cli/commands/db.py
CHANGED
|
@@ -2,11 +2,13 @@
|
|
|
2
2
|
|
|
3
3
|
import os
|
|
4
4
|
import shutil
|
|
5
|
+
from pathlib import Path
|
|
5
6
|
|
|
6
7
|
import typer
|
|
7
8
|
from rich.console import Console
|
|
8
9
|
|
|
9
10
|
from sqlcg.core.config import get_backend, get_db_path
|
|
11
|
+
from sqlcg.core.freshness import compute_freshness, render_freshness_line
|
|
10
12
|
from sqlcg.core.schema import NodeLabel
|
|
11
13
|
from sqlcg.utils.logging import getLogger
|
|
12
14
|
|
|
@@ -73,10 +75,25 @@ def db_reset( # noqa: B008
|
|
|
73
75
|
@app.command("info")
|
|
74
76
|
def db_info() -> None:
|
|
75
77
|
"""Show database stats."""
|
|
76
|
-
with get_backend() as backend:
|
|
78
|
+
with get_backend(read_only=True) as backend:
|
|
77
79
|
version = backend.get_schema_version() or "unknown"
|
|
78
80
|
console.print(f"Schema version: {version}")
|
|
79
81
|
|
|
82
|
+
# Freshness block — only shown when the DB has been indexed from a git repo
|
|
83
|
+
try:
|
|
84
|
+
indexed_sha = backend.get_indexed_sha()
|
|
85
|
+
repo_rows = backend.run_read("MATCH (r:Repo) RETURN r.path AS path LIMIT 1", {})
|
|
86
|
+
if repo_rows and indexed_sha is not None and repo_rows[0].get("path"):
|
|
87
|
+
repo_root = Path(repo_rows[0]["path"])
|
|
88
|
+
f = compute_freshness(repo_root, indexed_sha)
|
|
89
|
+
console.print(render_freshness_line(f))
|
|
90
|
+
except NotImplementedError:
|
|
91
|
+
# Neo4j backend raises NotImplementedError for get_indexed_sha — skip silently
|
|
92
|
+
pass
|
|
93
|
+
except Exception as e:
|
|
94
|
+
# Any unexpected error in the freshness block must not crash db info
|
|
95
|
+
logger.debug(f"Freshness check skipped: {e}")
|
|
96
|
+
|
|
80
97
|
# Show node counts for all labels
|
|
81
98
|
for label in NodeLabel:
|
|
82
99
|
try:
|
|
@@ -150,7 +167,7 @@ def db_info() -> None:
|
|
|
150
167
|
@app.command("list-repos")
|
|
151
168
|
def list_repos() -> None:
|
|
152
169
|
"""List all indexed repositories."""
|
|
153
|
-
with get_backend() as backend:
|
|
170
|
+
with get_backend(read_only=True) as backend:
|
|
154
171
|
result = backend.run_read("MATCH (r:Repo) RETURN r.path AS path, r.name AS name", {})
|
|
155
172
|
|
|
156
173
|
if not result:
|
sqlcg/cli/commands/find.py
CHANGED
|
@@ -18,7 +18,7 @@ def find_table( # noqa: B008
|
|
|
18
18
|
) -> None:
|
|
19
19
|
"""Find a table by name."""
|
|
20
20
|
name = name.lower() # graph keys are lowercased at index time (C2 normalization)
|
|
21
|
-
with get_backend() as backend:
|
|
21
|
+
with get_backend(read_only=True) as backend:
|
|
22
22
|
results = backend.run_read(
|
|
23
23
|
f"MATCH (t:{NodeLabel.TABLE}) WHERE t.qualified CONTAINS $name "
|
|
24
24
|
"RETURN t.qualified AS qualified, t.kind AS kind LIMIT 50",
|
|
@@ -38,14 +38,21 @@ def find_table( # noqa: B008
|
|
|
38
38
|
@app.command("column")
|
|
39
39
|
def find_column( # noqa: B008
|
|
40
40
|
ref: str = typer.Argument(..., help="Column reference (table.column)"), # noqa: B008
|
|
41
|
+
raw: bool = typer.Option(False, "--raw", help="Disable noise filtering on results"), # noqa: B008
|
|
41
42
|
) -> None:
|
|
42
43
|
"""Find a column by table.column reference."""
|
|
43
44
|
ref = ref.lower() # graph keys are lowercased at index time (C2 normalization)
|
|
44
|
-
with get_backend() as backend:
|
|
45
|
+
with get_backend(read_only=True) as backend:
|
|
45
46
|
results = backend.run_read(
|
|
46
47
|
f"MATCH (c:{NodeLabel.COLUMN}) WHERE c.id CONTAINS $ref RETURN c.id AS id LIMIT 50",
|
|
47
48
|
{"ref": ref},
|
|
48
49
|
)
|
|
50
|
+
if not raw:
|
|
51
|
+
from sqlcg.server.noise_filter import NoiseFilter
|
|
52
|
+
|
|
53
|
+
nf = NoiseFilter.from_config() # repo_root=None → falls back to Path.cwd()
|
|
54
|
+
# Filter on the schema.table portion of each column id (schema.table.column)
|
|
55
|
+
results = [r for r in results if not nf.is_noise(r["id"].rsplit(".", 1)[0])]
|
|
49
56
|
_print_table(results, ["id"])
|
|
50
57
|
|
|
51
58
|
|
|
@@ -54,7 +61,7 @@ def find_pattern( # noqa: B008
|
|
|
54
61
|
pattern: str = typer.Argument(..., help="SQL pattern to search for"), # noqa: B008
|
|
55
62
|
) -> None:
|
|
56
63
|
"""Find queries containing a SQL pattern."""
|
|
57
|
-
with get_backend() as backend:
|
|
64
|
+
with get_backend(read_only=True) as backend:
|
|
58
65
|
results = backend.run_read(
|
|
59
66
|
f"MATCH (q:{NodeLabel.QUERY}) WHERE q.sql CONTAINS $pattern "
|
|
60
67
|
"RETURN q.id AS id, q.kind AS kind LIMIT 50",
|
sqlcg/cli/commands/gain.py
CHANGED
|
@@ -123,7 +123,7 @@ def gain_cmd(
|
|
|
123
123
|
# Section F: parse quality from graph
|
|
124
124
|
parse_quality: dict[str, int] | None = None
|
|
125
125
|
try:
|
|
126
|
-
with get_backend() as backend:
|
|
126
|
+
with get_backend(read_only=True) as backend:
|
|
127
127
|
mode_rows = backend.run_read(
|
|
128
128
|
"MATCH (q:SqlQuery) RETURN q.parsing_mode AS mode,"
|
|
129
129
|
" COUNT(q) AS cnt ORDER BY cnt DESC",
|