sql-code-graph 1.0.1__py3-none-any.whl → 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sql_code_graph-1.0.1.dist-info → sql_code_graph-1.0.2.dist-info}/METADATA +1 -1
- {sql_code_graph-1.0.1.dist-info → sql_code_graph-1.0.2.dist-info}/RECORD +19 -19
- sqlcg/__init__.py +1 -1
- sqlcg/cli/commands/analyze.py +90 -0
- sqlcg/cli/commands/find.py +11 -0
- sqlcg/cli/commands/index.py +43 -1
- sqlcg/cli/commands/install.py +83 -46
- sqlcg/cli/commands/mcp.py +18 -12
- sqlcg/cli/commands/reindex.py +3 -0
- sqlcg/core/config.py +7 -0
- sqlcg/indexer/error_classify.py +5 -1
- sqlcg/indexer/git_delta.py +1 -0
- sqlcg/indexer/pool.py +9 -4
- sqlcg/parsers/base.py +120 -43
- sqlcg/server/server.py +61 -18
- sqlcg/server/tools.py +59 -1
- sqlcg/utils/ignore.py +2 -0
- {sql_code_graph-1.0.1.dist-info → sql_code_graph-1.0.2.dist-info}/WHEEL +0 -0
- {sql_code_graph-1.0.1.dist-info → sql_code_graph-1.0.2.dist-info}/entry_points.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sql-code-graph
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.2
|
|
4
4
|
Summary: SQL code graph analyzer and lineage tracer
|
|
5
5
|
Project-URL: Homepage, https://github.com/Warhorze/sql-code-graph
|
|
6
6
|
Project-URL: Repository, https://github.com/Warhorze/sql-code-graph
|
|
@@ -1,22 +1,22 @@
|
|
|
1
|
-
sqlcg/__init__.py,sha256=
|
|
1
|
+
sqlcg/__init__.py,sha256=hGOhwTAVTaRm7PjbaSQVCLvnF7rOGZZNdMqv0IoQdYg,115
|
|
2
2
|
sqlcg/__main__.py,sha256=1YoFLcqEgTwYq1J3TbUwpkdG0zeeLIf2fJvwWI-CLFU,109
|
|
3
3
|
sqlcg/cli/__init__.py,sha256=W8fD0LpMq2xm_5WKGNMvJh2WBL1ho5E8hUeAqXQYT1g,28
|
|
4
4
|
sqlcg/cli/main.py,sha256=WmdTjsOlz1ozi2Y3Aq4ezR_FCRl-Lc1YOKw3_d48dlY,1650
|
|
5
5
|
sqlcg/cli/commands/__init__.py,sha256=oSHtr6VD-jNubOjuCQyZj2tBppjMEpQDh-IGQ8of9eA,30
|
|
6
|
-
sqlcg/cli/commands/analyze.py,sha256=
|
|
6
|
+
sqlcg/cli/commands/analyze.py,sha256=PFQD29_VAtJ-wghYLsHRINp8VlnOVl1WlOdbAdcWz1E,8091
|
|
7
7
|
sqlcg/cli/commands/db.py,sha256=Yd4ZDz1BFwjO4Lyt3NefQnowkjdUxFDFmsPykBVH2Pk,6518
|
|
8
|
-
sqlcg/cli/commands/find.py,sha256=
|
|
8
|
+
sqlcg/cli/commands/find.py,sha256=P2OFI0O_-F4W5-oy5KObXUHI7gNTkJRtDSZ59xTKE9Y,2672
|
|
9
9
|
sqlcg/cli/commands/gain.py,sha256=bOvia7CVla_fESrDEdftYze8Mm0xDio3SpCzIyoXg7A,8925
|
|
10
10
|
sqlcg/cli/commands/git.py,sha256=96hmWYd861FC8RZqPQ_eBG8yLXSXaB9SLxmuwx00nWU,3347
|
|
11
|
-
sqlcg/cli/commands/index.py,sha256=
|
|
12
|
-
sqlcg/cli/commands/install.py,sha256=
|
|
13
|
-
sqlcg/cli/commands/mcp.py,sha256=
|
|
14
|
-
sqlcg/cli/commands/reindex.py,sha256=
|
|
11
|
+
sqlcg/cli/commands/index.py,sha256=Sgrg5MaQWfQzbX3e3Wcsfd8BEWDGuBm5l5vynpJsRzA,9801
|
|
12
|
+
sqlcg/cli/commands/install.py,sha256=KNABvrLbamPyYnmnVdCaM_MNezbDc-pr6IkignCWI8k,9186
|
|
13
|
+
sqlcg/cli/commands/mcp.py,sha256=cfi7D-RgEPUKdfUbsJC2iKImKOnHQvWxCLfwYIPdhdE,2174
|
|
14
|
+
sqlcg/cli/commands/reindex.py,sha256=J9gpaxSzJ1mTdOJWh7WSLskbRF9f_2EMWnUFF4VOtVU,6387
|
|
15
15
|
sqlcg/cli/commands/report.py,sha256=JU0qjyMxwOukE7bN3XvvIzOI7zMg_Gsnvk_8F6pKNpA,4915
|
|
16
16
|
sqlcg/cli/commands/uninstall.py,sha256=IYwQaqnMmmzW0Nlls40wD-L3tVkMgKIMRXUkcXPMUc4,9398
|
|
17
17
|
sqlcg/cli/commands/watch.py,sha256=7N6c-QuvxAEGHzDZ0C3CU2BkHSraZW9YtgoFnz7SaQo,2373
|
|
18
18
|
sqlcg/core/__init__.py,sha256=uNsJCrCMVWVT80sHPtI_f39BYqIf5N0i6LSq8x8HsyI,283
|
|
19
|
-
sqlcg/core/config.py,sha256=
|
|
19
|
+
sqlcg/core/config.py,sha256=YCq4OayvBSNXsYtOh3yZ-W6fyJBLwYunORDo2TPCU9s,10179
|
|
20
20
|
sqlcg/core/graph_db.py,sha256=gFiHjfVeRHp2FS3yRThDgCWFkugOQD065IvEqN6apg4,7881
|
|
21
21
|
sqlcg/core/jobs.py,sha256=Je-fCdSKRgiSsv1W8SgNAlp36a7t7-pJZ-qKPbka9OE,3298
|
|
22
22
|
sqlcg/core/kuzu_backend.py,sha256=ziHt-AB9sEZY7qB8whseWFicbTfOZaNOxcNVKhjii5Y,16587
|
|
@@ -27,10 +27,10 @@ sqlcg/core/schema.cypher,sha256=UWYsPMRgkn6HOlPZ3rl6BfY5hzKQKP5RGPaZg4NTZFY,2515
|
|
|
27
27
|
sqlcg/core/schema.py,sha256=9jBgJwuvfjLq2xC5B0NUyZZYxhqTb0LO0YzxcPM-gVM,1301
|
|
28
28
|
sqlcg/indexer/__init__.py,sha256=Wh20Unz2OHs1oIyWLrpurPAasF0BET2g4iXtNk7mh2U,56
|
|
29
29
|
sqlcg/indexer/dbt_adapter.py,sha256=EB5x1WU5Z9d-I97ADDj88S_hG1C4z4nbrv8JUCzXfy8,686
|
|
30
|
-
sqlcg/indexer/error_classify.py,sha256=
|
|
31
|
-
sqlcg/indexer/git_delta.py,sha256=
|
|
30
|
+
sqlcg/indexer/error_classify.py,sha256=MYjPVprwT-ARPjBCyCzu2F9DSrZfnTVtVIoBgm8s4H8,5329
|
|
31
|
+
sqlcg/indexer/git_delta.py,sha256=P-QM4vnVURT2KLiE6u3cQynRUF-mTH13cbB4I20YHPQ,4468
|
|
32
32
|
sqlcg/indexer/indexer.py,sha256=0B0BCUaLPdV9XtlCzhqR3hwHyD3w83o-tYG7yNr18Yo,50507
|
|
33
|
-
sqlcg/indexer/pool.py,sha256=
|
|
33
|
+
sqlcg/indexer/pool.py,sha256=BTYx-pBe6zwUG89MHh0X7nzGNVlsHN-GjovYKanVI1s,18553
|
|
34
34
|
sqlcg/indexer/walker.py,sha256=C__JuDcTzKxFqVjGFRr5cj9hgxvf8zffTz-0HMn1qTY,1746
|
|
35
35
|
sqlcg/indexer/watcher.py,sha256=mJQq1LASRLKKwhz0WhCUWPLLqyPR2_-FD_8efYU6gE8,8442
|
|
36
36
|
sqlcg/lineage/__init__.py,sha256=Da1DlYwtK13WHv_RnHjAtNkHTOuFbhxqCjT1Le7DsWM,46
|
|
@@ -40,7 +40,7 @@ sqlcg/metrics/__init__.py,sha256=hLJ6wm4St8qqYwKh3o9QG7lcEt1BEYM31ccqO9tGpIg,133
|
|
|
40
40
|
sqlcg/metrics/store.py,sha256=BaMf7QYTmYMlX_Jzi1GNU8R2sMVkWdn07f-ZSndtcNk,8879
|
|
41
41
|
sqlcg/parsers/__init__.py,sha256=AamA8wBbDZV9_zEtZCI4Hyen5UAVKHmBwjTghTt2PZE,785
|
|
42
42
|
sqlcg/parsers/ansi_parser.py,sha256=KruZn5CYjpktKmMRVWackshRI_AR6ehc-ReCsDeWNkQ,14321
|
|
43
|
-
sqlcg/parsers/base.py,sha256=
|
|
43
|
+
sqlcg/parsers/base.py,sha256=cSHlXwiSNu77TZI6_p1nRevbRTcBc1t5v8N_aKR7uB4,49117
|
|
44
44
|
sqlcg/parsers/bigquery_parser.py,sha256=mOnWTfXB_Dp4JwFE1PVYOB6CDPf5nYE0Dea8kJCl9uQ,2827
|
|
45
45
|
sqlcg/parsers/postgres_parser.py,sha256=lYfUpQY6j4Qm7ndXBtXbgPoGzYqYddWt5YeFnWKdA6I,946
|
|
46
46
|
sqlcg/parsers/registry.py,sha256=LXy1F6rqQI6VdxpRvZg_tNpoEucW3mXZHYBMlMONbX4,1496
|
|
@@ -50,14 +50,14 @@ sqlcg/server/__init__.py,sha256=n4wuNE7xyJIJxJZBtmtdccCMQfvTdF-IqIaZVbC4FC4,35
|
|
|
50
50
|
sqlcg/server/exceptions.py,sha256=EONw34icOByCTpppSQrvQBW6asc4hfqaGDCAFjv96II,469
|
|
51
51
|
sqlcg/server/models.py,sha256=dv4SM_o-aY8kUFIbCtj0l8ceMsfyvQtXCWPm4Ek_-14,16432
|
|
52
52
|
sqlcg/server/noise_filter.py,sha256=idSBGgdKWWccJdpOo9qgbM2350Oew-2l5W6Yc9GYQqY,6337
|
|
53
|
-
sqlcg/server/server.py,sha256=
|
|
53
|
+
sqlcg/server/server.py,sha256=mDAW_Zmk3Sp2sApw3Gw3veCqJe7waw-sioQyKZBn9ng,3774
|
|
54
54
|
sqlcg/server/skill.py,sha256=siAtrRdFHQnASe9nl33MvkTXXt9EgCB8id5i9AUq4XU,10718
|
|
55
|
-
sqlcg/server/tools.py,sha256=
|
|
55
|
+
sqlcg/server/tools.py,sha256=mSoYZRI7F5ZmdTcG-BnY6ULzrz3Y7qIFe3cHTVWVyMs,57785
|
|
56
56
|
sqlcg/utils/__init__.py,sha256=--iqt5ThTXmT8Wz7da8hs3n0zDfYPl8P-z5OgRJ_77E,154
|
|
57
57
|
sqlcg/utils/hashing.py,sha256=H25-sYfxHKb3_IERFnHyAIYNiXN470Oqo5sJT_D3YOA,438
|
|
58
|
-
sqlcg/utils/ignore.py,sha256=
|
|
58
|
+
sqlcg/utils/ignore.py,sha256=wJjwa0mjnQ_xJExOUxk25y00g065XmmzJapqV3ifD5o,1151
|
|
59
59
|
sqlcg/utils/logging.py,sha256=u0fCmYsLj9o81vawm3xZTHaw68GQYVm7JxG-gP81u8A,840
|
|
60
|
-
sql_code_graph-1.0.
|
|
61
|
-
sql_code_graph-1.0.
|
|
62
|
-
sql_code_graph-1.0.
|
|
63
|
-
sql_code_graph-1.0.
|
|
60
|
+
sql_code_graph-1.0.2.dist-info/METADATA,sha256=aikAv-KoUOGfgYo3-htWLyq61x1PE6bC1Onn_TNAuvE,12806
|
|
61
|
+
sql_code_graph-1.0.2.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
62
|
+
sql_code_graph-1.0.2.dist-info/entry_points.txt,sha256=Wfe49sVzV9p4eVFGo5RxcV-frr3HOP0yzzst8JBxQLQ,46
|
|
63
|
+
sql_code_graph-1.0.2.dist-info/RECORD,,
|
sqlcg/__init__.py
CHANGED
sqlcg/cli/commands/analyze.py
CHANGED
|
@@ -1,5 +1,9 @@
|
|
|
1
1
|
"""Analyze command for lineage analysis."""
|
|
2
2
|
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import TYPE_CHECKING
|
|
6
|
+
|
|
3
7
|
import typer
|
|
4
8
|
from rich.console import Console
|
|
5
9
|
from rich.table import Table
|
|
@@ -7,6 +11,9 @@ from rich.table import Table
|
|
|
7
11
|
from sqlcg.core.config import get_backend
|
|
8
12
|
from sqlcg.core.schema import NodeLabel, RelType
|
|
9
13
|
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from sqlcg.server.noise_filter import NoiseFilter
|
|
16
|
+
|
|
10
17
|
app = typer.Typer(help="Lineage analysis")
|
|
11
18
|
console = Console()
|
|
12
19
|
|
|
@@ -15,6 +22,7 @@ console = Console()
|
|
|
15
22
|
def upstream( # noqa: B008
|
|
16
23
|
ref: str = typer.Argument(..., help="Column reference"), # noqa: B008
|
|
17
24
|
depth: int = typer.Option(5, "--depth", help="Maximum traversal depth"), # noqa: B008
|
|
25
|
+
raw: bool = typer.Option(False, "--raw", help="Disable noise filtering on results"), # noqa: B008
|
|
18
26
|
) -> None:
|
|
19
27
|
"""Trace upstream column lineage."""
|
|
20
28
|
# Bounds check for depth to prevent performance DoS
|
|
@@ -29,6 +37,28 @@ def upstream( # noqa: B008
|
|
|
29
37
|
"RETURN src.id AS id LIMIT 100",
|
|
30
38
|
{"ref": ref},
|
|
31
39
|
)
|
|
40
|
+
if not results and len(ref.split(".")) >= 3:
|
|
41
|
+
bare = _bare_ref(ref)
|
|
42
|
+
fallback_results = backend.run_read(
|
|
43
|
+
f"MATCH p=(c:{NodeLabel.COLUMN} {{id: $bare}})"
|
|
44
|
+
f"<-[:{RelType.COLUMN_LINEAGE}*1..{depth}]-(src) "
|
|
45
|
+
"RETURN src.id AS id LIMIT 100",
|
|
46
|
+
{"bare": bare},
|
|
47
|
+
)
|
|
48
|
+
if fallback_results:
|
|
49
|
+
console.print(
|
|
50
|
+
f"[yellow]Hint:[/yellow] No results for '{ref}'. "
|
|
51
|
+
f"Found {len(fallback_results)} edge(s) under bare name '{bare}'. "
|
|
52
|
+
"The INSERT target may have been indexed without a schema prefix. "
|
|
53
|
+
"Multiple tables with the same unqualified name in different schemas "
|
|
54
|
+
"would all match — re-index with an explicit schema for precise results."
|
|
55
|
+
)
|
|
56
|
+
results = fallback_results
|
|
57
|
+
if not raw:
|
|
58
|
+
from sqlcg.server.noise_filter import NoiseFilter
|
|
59
|
+
|
|
60
|
+
nf = NoiseFilter.from_config() # repo_root=None → falls back to Path.cwd()
|
|
61
|
+
results = _filter_column_results(results, nf)
|
|
32
62
|
_print_table(results, ["id"])
|
|
33
63
|
|
|
34
64
|
|
|
@@ -36,6 +66,7 @@ def upstream( # noqa: B008
|
|
|
36
66
|
def downstream( # noqa: B008
|
|
37
67
|
ref: str = typer.Argument(..., help="Column reference"), # noqa: B008
|
|
38
68
|
depth: int = typer.Option(5, "--depth", help="Maximum traversal depth"), # noqa: B008
|
|
69
|
+
raw: bool = typer.Option(False, "--raw", help="Disable noise filtering on results"), # noqa: B008
|
|
39
70
|
) -> None:
|
|
40
71
|
"""Trace downstream column lineage."""
|
|
41
72
|
# Bounds check for depth to prevent performance DoS
|
|
@@ -50,6 +81,28 @@ def downstream( # noqa: B008
|
|
|
50
81
|
"RETURN dst.id AS id LIMIT 100",
|
|
51
82
|
{"ref": ref},
|
|
52
83
|
)
|
|
84
|
+
if not results and len(ref.split(".")) >= 3:
|
|
85
|
+
bare = _bare_ref(ref)
|
|
86
|
+
fallback_results = backend.run_read(
|
|
87
|
+
f"MATCH p=(c:{NodeLabel.COLUMN} {{id: $bare}})"
|
|
88
|
+
f"-[:{RelType.COLUMN_LINEAGE}*1..{depth}]->(dst) "
|
|
89
|
+
"RETURN dst.id AS id LIMIT 100",
|
|
90
|
+
{"bare": bare},
|
|
91
|
+
)
|
|
92
|
+
if fallback_results:
|
|
93
|
+
console.print(
|
|
94
|
+
f"[yellow]Hint:[/yellow] No results for '{ref}'. "
|
|
95
|
+
f"Found {len(fallback_results)} edge(s) under bare name '{bare}'. "
|
|
96
|
+
"The INSERT target may have been indexed without a schema prefix. "
|
|
97
|
+
"Multiple tables with the same unqualified name in different schemas "
|
|
98
|
+
"would all match — re-index with an explicit schema for precise results."
|
|
99
|
+
)
|
|
100
|
+
results = fallback_results
|
|
101
|
+
if not raw:
|
|
102
|
+
from sqlcg.server.noise_filter import NoiseFilter
|
|
103
|
+
|
|
104
|
+
nf = NoiseFilter.from_config() # repo_root=None → falls back to Path.cwd()
|
|
105
|
+
results = _filter_column_results(results, nf)
|
|
53
106
|
_print_table(results, ["id"])
|
|
54
107
|
|
|
55
108
|
|
|
@@ -106,6 +159,43 @@ def unused(
|
|
|
106
159
|
_print_table(results, ["qualified"])
|
|
107
160
|
|
|
108
161
|
|
|
162
|
+
def _bare_ref(ref: str) -> str:
|
|
163
|
+
"""Strip schema prefix from a ref string, keeping table.column.
|
|
164
|
+
|
|
165
|
+
For a 3-part ref ("mart.fact_t.amount") this returns "fact_t.amount".
|
|
166
|
+
For a 2-part ref ("fact_t.amount") this returns the ref unchanged.
|
|
167
|
+
Never uses rsplit — that would yield only the column name for 3-part refs.
|
|
168
|
+
"""
|
|
169
|
+
parts = ref.split(".")
|
|
170
|
+
if len(parts) >= 3:
|
|
171
|
+
return ".".join(parts[1:]) # drop schema, keep table.column
|
|
172
|
+
return ref # already bare (no schema prefix)
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def _col_id_to_table(col_id: str) -> str:
|
|
176
|
+
"""Extract the table-qualified part from a column ID (schema.table.col → schema.table).
|
|
177
|
+
|
|
178
|
+
Column IDs follow the format: schema.table.column or table.column.
|
|
179
|
+
The table part is everything except the last component.
|
|
180
|
+
|
|
181
|
+
Args:
|
|
182
|
+
col_id: A column ID string from the graph.
|
|
183
|
+
|
|
184
|
+
Returns:
|
|
185
|
+
The table-qualified portion (all but the last dotted component).
|
|
186
|
+
"""
|
|
187
|
+
parts = col_id.rsplit(".", 1)
|
|
188
|
+
return parts[0] if len(parts) == 2 else col_id
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def _filter_column_results(
|
|
192
|
+
results: list[dict],
|
|
193
|
+
nf: NoiseFilter, # type: ignore[name-defined]
|
|
194
|
+
) -> list[dict]:
|
|
195
|
+
"""Filter column-ID result rows by NoiseFilter, dropping rows whose table is noise."""
|
|
196
|
+
return [r for r in results if not nf.is_noise(_col_id_to_table(r["id"]))]
|
|
197
|
+
|
|
198
|
+
|
|
109
199
|
def _print_table(rows: list[dict], columns: list[str]) -> None:
|
|
110
200
|
"""Print results as a Rich table."""
|
|
111
201
|
if not rows:
|
sqlcg/cli/commands/find.py
CHANGED
|
@@ -14,14 +14,24 @@ console = Console()
|
|
|
14
14
|
@app.command("table")
|
|
15
15
|
def find_table( # noqa: B008
|
|
16
16
|
name: str = typer.Argument(..., help="Table name to search for"), # noqa: B008
|
|
17
|
+
raw: bool = typer.Option(False, "--raw", help="Disable noise filtering on results"), # noqa: B008
|
|
17
18
|
) -> None:
|
|
18
19
|
"""Find a table by name."""
|
|
20
|
+
name = name.lower() # graph keys are lowercased at index time (C2 normalization)
|
|
19
21
|
with get_backend() as backend:
|
|
20
22
|
results = backend.run_read(
|
|
21
23
|
f"MATCH (t:{NodeLabel.TABLE}) WHERE t.qualified CONTAINS $name "
|
|
22
24
|
"RETURN t.qualified AS qualified, t.kind AS kind LIMIT 50",
|
|
23
25
|
{"name": name},
|
|
24
26
|
)
|
|
27
|
+
if not raw:
|
|
28
|
+
from sqlcg.server.noise_filter import NoiseFilter
|
|
29
|
+
|
|
30
|
+
nf = NoiseFilter.from_config() # repo_root=None → falls back to Path.cwd()
|
|
31
|
+
ids = [r["qualified"] for r in results]
|
|
32
|
+
kept, _ = nf.filter_nodes(ids)
|
|
33
|
+
kept_set = set(kept)
|
|
34
|
+
results = [r for r in results if r["qualified"] in kept_set]
|
|
25
35
|
_print_table(results, ["qualified", "kind"])
|
|
26
36
|
|
|
27
37
|
|
|
@@ -30,6 +40,7 @@ def find_column( # noqa: B008
|
|
|
30
40
|
ref: str = typer.Argument(..., help="Column reference (table.column)"), # noqa: B008
|
|
31
41
|
) -> None:
|
|
32
42
|
"""Find a column by table.column reference."""
|
|
43
|
+
ref = ref.lower() # graph keys are lowercased at index time (C2 normalization)
|
|
33
44
|
with get_backend() as backend:
|
|
34
45
|
results = backend.run_read(
|
|
35
46
|
f"MATCH (c:{NodeLabel.COLUMN}) WHERE c.id CONTAINS $ref RETURN c.id AS id LIMIT 50",
|
sqlcg/cli/commands/index.py
CHANGED
|
@@ -14,7 +14,7 @@ from rich.progress import (
|
|
|
14
14
|
TimeRemainingColumn,
|
|
15
15
|
)
|
|
16
16
|
|
|
17
|
-
from sqlcg.core.config import get_backend, get_db_path, get_dialect
|
|
17
|
+
from sqlcg.core.config import KuzuConfig, get_backend, get_db_path, get_dialect
|
|
18
18
|
from sqlcg.indexer.indexer import Indexer
|
|
19
19
|
|
|
20
20
|
console = Console()
|
|
@@ -54,6 +54,9 @@ def index_cmd( # noqa: B008
|
|
|
54
54
|
quiet: bool = typer.Option( # noqa: B008
|
|
55
55
|
False, "--quiet", "-q", help="Suppress summary console output"
|
|
56
56
|
),
|
|
57
|
+
verbose: bool = typer.Option( # noqa: B008
|
|
58
|
+
False, "--verbose", "-v", help="Print parse warnings to stderr instead of log file"
|
|
59
|
+
),
|
|
57
60
|
debug: bool = typer.Option( # noqa: B008
|
|
58
61
|
False, "--debug", help="Show detailed log output during indexing"
|
|
59
62
|
),
|
|
@@ -68,11 +71,40 @@ def index_cmd( # noqa: B008
|
|
|
68
71
|
"""
|
|
69
72
|
|
|
70
73
|
import logging
|
|
74
|
+
import sys
|
|
71
75
|
|
|
72
76
|
level = logging.DEBUG if debug else logging.CRITICAL
|
|
73
77
|
logging.getLogger("sqlcg").setLevel(level)
|
|
74
78
|
logging.getLogger("sqlglot").setLevel(level)
|
|
75
79
|
|
|
80
|
+
# Route parse warnings to stderr (--verbose) or to the configured log file.
|
|
81
|
+
sqlcg_log = logging.getLogger("sqlcg")
|
|
82
|
+
|
|
83
|
+
class _CountingHandler(logging.Handler):
|
|
84
|
+
"""Counts WARNING+ records emitted during indexing."""
|
|
85
|
+
|
|
86
|
+
def __init__(self) -> None:
|
|
87
|
+
super().__init__(logging.WARNING)
|
|
88
|
+
self.count = 0
|
|
89
|
+
|
|
90
|
+
def emit(self, record: logging.LogRecord) -> None:
|
|
91
|
+
self.count += 1
|
|
92
|
+
|
|
93
|
+
_counter = _CountingHandler()
|
|
94
|
+
sqlcg_log.addHandler(_counter)
|
|
95
|
+
|
|
96
|
+
if verbose:
|
|
97
|
+
_warn_handler: logging.Handler = logging.StreamHandler(sys.stderr)
|
|
98
|
+
_warn_handler.setLevel(logging.WARNING)
|
|
99
|
+
sqlcg_log.addHandler(_warn_handler)
|
|
100
|
+
_warn_log_path = None
|
|
101
|
+
else:
|
|
102
|
+
_warn_log_path = KuzuConfig.from_env().log_path
|
|
103
|
+
_warn_log_path.parent.mkdir(parents=True, exist_ok=True)
|
|
104
|
+
_warn_handler = logging.FileHandler(_warn_log_path)
|
|
105
|
+
_warn_handler.setLevel(logging.WARNING)
|
|
106
|
+
sqlcg_log.addHandler(_warn_handler)
|
|
107
|
+
|
|
76
108
|
# Set buffer pool size via env var if specified
|
|
77
109
|
if buffer_pool_size > 0:
|
|
78
110
|
os.environ["SQLCG_BUFFER_POOL_MB"] = str(buffer_pool_size)
|
|
@@ -100,6 +132,16 @@ def index_cmd( # noqa: B008
|
|
|
100
132
|
# KuzuDB connection and released the lock by the time we get here.
|
|
101
133
|
console.print("\n[yellow]Interrupted — no partial graph written. Re-run to index.[/yellow]")
|
|
102
134
|
raise typer.Exit(130) from None
|
|
135
|
+
finally:
|
|
136
|
+
sqlcg_log.removeHandler(_warn_handler)
|
|
137
|
+
sqlcg_log.removeHandler(_counter)
|
|
138
|
+
_warn_handler.close()
|
|
139
|
+
|
|
140
|
+
if not verbose and not quiet and _counter.count > 0 and _warn_log_path is not None:
|
|
141
|
+
console.print(
|
|
142
|
+
f"[yellow]Parse warnings written to {_warn_log_path} "
|
|
143
|
+
"— use --verbose to show here.[/yellow]"
|
|
144
|
+
)
|
|
103
145
|
|
|
104
146
|
|
|
105
147
|
def _run_index(
|
sqlcg/cli/commands/install.py
CHANGED
|
@@ -1,8 +1,19 @@
|
|
|
1
|
-
"""Install sqlcg as an MCP server in Claude Code.
|
|
1
|
+
"""Install sqlcg as an MCP server in Claude Code.
|
|
2
|
+
|
|
3
|
+
Write path (in priority order):
|
|
4
|
+
1. ``claude mcp add -s user sql-code-graph <cmd> <args>`` — the official
|
|
5
|
+
Claude Code CLI write path (reads from ~/.claude.json under the hood).
|
|
6
|
+
2. Fallback: write ``~/.claude.json`` directly under mcpServers.user when
|
|
7
|
+
the ``claude`` binary is not found or returns non-zero.
|
|
8
|
+
|
|
9
|
+
The previous target (~/.claude/settings.json) was incorrect — Claude Code does
|
|
10
|
+
NOT read MCP servers from that file. See ARCHITECTURE_REVIEW.md §9.2.
|
|
11
|
+
"""
|
|
2
12
|
|
|
3
13
|
import json
|
|
4
14
|
import os
|
|
5
15
|
import shutil
|
|
16
|
+
import subprocess
|
|
6
17
|
import sys
|
|
7
18
|
from pathlib import Path
|
|
8
19
|
|
|
@@ -11,7 +22,6 @@ from rich.console import Console
|
|
|
11
22
|
|
|
12
23
|
console = Console()
|
|
13
24
|
|
|
14
|
-
_SETTINGS_PATH = Path.home() / ".claude" / "settings.json"
|
|
15
25
|
_SERVER_KEY = "sql-code-graph"
|
|
16
26
|
|
|
17
27
|
|
|
@@ -28,7 +38,10 @@ def install_cmd(
|
|
|
28
38
|
help="Repository root for --scope project (default: current directory).",
|
|
29
39
|
),
|
|
30
40
|
) -> None:
|
|
31
|
-
"""Register sqlcg as an MCP server in Claude Code
|
|
41
|
+
"""Register sqlcg as an MCP server in Claude Code.
|
|
42
|
+
|
|
43
|
+
Runs ``claude mcp add -s user sql-code-graph <cmd> <args>`` when the
|
|
44
|
+
``claude`` CLI is on PATH; otherwise writes ~/.claude.json directly.
|
|
32
45
|
|
|
33
46
|
Also provisions a Claude skill file (SKILL.md) at the chosen location.
|
|
34
47
|
Pass --scope project or --scope global to specify where the skill is written.
|
|
@@ -39,68 +52,81 @@ def install_cmd(
|
|
|
39
52
|
resolved_scope = _resolve_scope(scope)
|
|
40
53
|
|
|
41
54
|
if shutil.which("sqlcg"):
|
|
42
|
-
|
|
55
|
+
cmd_parts = ["sqlcg", "mcp", "start"]
|
|
43
56
|
elif shutil.which("uvx"):
|
|
44
|
-
|
|
57
|
+
cmd_parts = ["uvx", "sql-code-graph", "mcp", "start"]
|
|
45
58
|
else:
|
|
46
59
|
console.print("[red]Error:[/red] Neither 'sqlcg' nor 'uvx' found on PATH.")
|
|
47
60
|
raise typer.Exit(1)
|
|
48
61
|
|
|
49
|
-
|
|
50
|
-
|
|
62
|
+
entry: dict = {"command": cmd_parts[0], "args": cmd_parts[1:]}
|
|
63
|
+
|
|
64
|
+
if dry_run:
|
|
65
|
+
claude_bin = shutil.which("claude")
|
|
66
|
+
if claude_bin:
|
|
67
|
+
console.print("[dim]--dry-run: would run:[/dim]")
|
|
68
|
+
console.print(f" claude mcp add -s user {_SERVER_KEY} {' '.join(cmd_parts)}")
|
|
69
|
+
else:
|
|
70
|
+
claude_json = Path.home() / ".claude.json"
|
|
71
|
+
console.print("[dim]--dry-run: would write to ~/.claude.json:[/dim]")
|
|
72
|
+
_preview_claude_json(claude_json, entry)
|
|
73
|
+
_provision_skill(resolved_scope, repo, dry_run=True)
|
|
74
|
+
return
|
|
75
|
+
|
|
76
|
+
# --- Try official claude CLI first ---
|
|
77
|
+
claude_bin = shutil.which("claude")
|
|
78
|
+
if claude_bin:
|
|
79
|
+
proc = subprocess.run(
|
|
80
|
+
["claude", "mcp", "add", "-s", "user", _SERVER_KEY] + cmd_parts,
|
|
81
|
+
capture_output=True,
|
|
82
|
+
text=True,
|
|
83
|
+
)
|
|
84
|
+
if proc.returncode == 0:
|
|
85
|
+
console.print(f"[green]Configured:[/green] {_SERVER_KEY} via `claude mcp add`")
|
|
86
|
+
console.print("\nRestart Claude Code to pick up the new MCP server.")
|
|
87
|
+
_provision_skill(resolved_scope, repo, dry_run=False)
|
|
88
|
+
return
|
|
89
|
+
# Non-zero: log and fall through to ~/.claude.json fallback
|
|
90
|
+
console.print(
|
|
91
|
+
f"[yellow]Warning:[/yellow] `claude mcp add` returned rc={proc.returncode}; "
|
|
92
|
+
"falling back to ~/.claude.json write."
|
|
93
|
+
)
|
|
94
|
+
if proc.stderr:
|
|
95
|
+
console.print(f"[dim]{proc.stderr.strip()}[/dim]")
|
|
96
|
+
|
|
97
|
+
# --- Fallback: write ~/.claude.json directly ---
|
|
98
|
+
claude_json = Path.home() / ".claude.json"
|
|
99
|
+
if claude_json.exists():
|
|
51
100
|
try:
|
|
52
|
-
|
|
101
|
+
data: dict = json.loads(claude_json.read_text())
|
|
53
102
|
except (json.JSONDecodeError, OSError, TypeError):
|
|
54
103
|
console.print(
|
|
55
|
-
f"[yellow]Warning:[/yellow] {
|
|
104
|
+
f"[yellow]Warning:[/yellow] {claude_json} contains invalid JSON — "
|
|
56
105
|
"mcpServers key will be added"
|
|
57
106
|
)
|
|
58
|
-
|
|
107
|
+
data = {}
|
|
59
108
|
else:
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
mcp_servers: dict = settings.setdefault("mcpServers", {})
|
|
109
|
+
data = {}
|
|
63
110
|
|
|
64
|
-
|
|
65
|
-
if
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
# Still provision the skill even when MCP entry already exists
|
|
69
|
-
_provision_skill(resolved_scope, repo, dry_run)
|
|
111
|
+
existing = data.get("mcpServers", {}).get("user", {}).get(_SERVER_KEY)
|
|
112
|
+
if existing == entry:
|
|
113
|
+
console.print(f"[green]Already configured:[/green] {_SERVER_KEY} (in ~/.claude.json)")
|
|
114
|
+
_provision_skill(resolved_scope, repo, dry_run=False)
|
|
70
115
|
return
|
|
71
116
|
|
|
72
|
-
|
|
73
|
-
if (
|
|
74
|
-
existing_entry
|
|
75
|
-
and existing_entry.get("command") == "uvx"
|
|
76
|
-
and entry.get("command") == "sqlcg"
|
|
77
|
-
):
|
|
78
|
-
console.print(
|
|
79
|
-
"[blue]Updating[/blue] MCP entry from [dim]uvx[/dim] to local "
|
|
80
|
-
"[green]sqlcg[/green] binary (faster startup). Writing…"
|
|
81
|
-
)
|
|
82
|
-
|
|
83
|
-
mcp_servers[_SERVER_KEY] = entry
|
|
84
|
-
|
|
85
|
-
if dry_run is True:
|
|
86
|
-
console.print("[dim]--dry-run: would write:[/dim]")
|
|
87
|
-
console.print_json(json.dumps(settings, indent=2))
|
|
88
|
-
_provision_skill(resolved_scope, repo, dry_run)
|
|
89
|
-
return
|
|
117
|
+
data.setdefault("mcpServers", {}).setdefault("user", {})[_SERVER_KEY] = entry
|
|
90
118
|
|
|
91
119
|
try:
|
|
92
|
-
|
|
93
|
-
tmp =
|
|
94
|
-
|
|
95
|
-
os.replace(tmp, settings_path)
|
|
120
|
+
tmp = claude_json.with_suffix(".tmp")
|
|
121
|
+
tmp.write_text(json.dumps(data, indent=2) + "\n")
|
|
122
|
+
os.replace(tmp, claude_json)
|
|
96
123
|
except (OSError, TypeError, AttributeError):
|
|
97
124
|
pass # Ignore file I/O errors in testing
|
|
98
125
|
|
|
99
|
-
cmd_str =
|
|
126
|
+
cmd_str = " ".join(cmd_parts)
|
|
100
127
|
console.print(f"[green]Configured:[/green] {_SERVER_KEY} → {cmd_str}")
|
|
101
|
-
console.print(f"[dim]Written to {
|
|
128
|
+
console.print(f"[dim]Written to {claude_json}[/dim]")
|
|
102
129
|
|
|
103
|
-
# Note about cold cache if uvx was chosen
|
|
104
130
|
if entry.get("command") == "uvx":
|
|
105
131
|
console.print(
|
|
106
132
|
"[yellow]Note:[/yellow] First startup downloads dependencies (~30s). "
|
|
@@ -108,9 +134,20 @@ def install_cmd(
|
|
|
108
134
|
)
|
|
109
135
|
|
|
110
136
|
console.print("\nRestart Claude Code to pick up the new MCP server.")
|
|
137
|
+
_provision_skill(resolved_scope, repo, dry_run=False)
|
|
138
|
+
|
|
111
139
|
|
|
112
|
-
|
|
113
|
-
|
|
140
|
+
def _preview_claude_json(claude_json: Path, entry: dict) -> None:
|
|
141
|
+
"""Print what would be written to ~/.claude.json without touching the file."""
|
|
142
|
+
if claude_json.exists():
|
|
143
|
+
try:
|
|
144
|
+
data: dict = json.loads(claude_json.read_text())
|
|
145
|
+
except (json.JSONDecodeError, OSError, TypeError):
|
|
146
|
+
data = {}
|
|
147
|
+
else:
|
|
148
|
+
data = {}
|
|
149
|
+
data.setdefault("mcpServers", {}).setdefault("user", {})[_SERVER_KEY] = entry
|
|
150
|
+
console.print_json(json.dumps(data, indent=2))
|
|
114
151
|
|
|
115
152
|
|
|
116
153
|
def _resolve_scope(scope: str | None) -> str:
|
sqlcg/cli/commands/mcp.py
CHANGED
|
@@ -22,28 +22,34 @@ def _server_entry() -> dict:
|
|
|
22
22
|
|
|
23
23
|
@app.command("setup")
|
|
24
24
|
def mcp_setup(print_only: bool = typer.Option(True, "--print/--write")) -> None:
|
|
25
|
-
"""Print or write MCP server config JSON.
|
|
25
|
+
"""Print or write MCP server config JSON.
|
|
26
|
+
|
|
27
|
+
--print (default): print the JSON snippet for manual insertion.
|
|
28
|
+
--write: write to ~/.claude.json under mcpServers.user (the correct path
|
|
29
|
+
for Claude Code — not settings.json, which Claude Code does not read
|
|
30
|
+
for MCP servers).
|
|
31
|
+
"""
|
|
26
32
|
entry = _server_entry()
|
|
27
33
|
if print_only:
|
|
28
34
|
console.print_json(json.dumps({"mcpServers": {_SERVER_KEY: entry}}, indent=2))
|
|
29
35
|
return
|
|
30
36
|
|
|
31
|
-
|
|
32
|
-
|
|
37
|
+
# Write to ~/.claude.json (correct path for Claude Code MCP servers)
|
|
38
|
+
claude_json = Path.home() / ".claude.json"
|
|
39
|
+
if claude_json.exists():
|
|
33
40
|
try:
|
|
34
|
-
|
|
41
|
+
data: dict = json.loads(claude_json.read_text())
|
|
35
42
|
except json.JSONDecodeError:
|
|
36
|
-
|
|
43
|
+
data = {}
|
|
37
44
|
else:
|
|
38
|
-
|
|
45
|
+
data = {}
|
|
39
46
|
|
|
40
|
-
|
|
47
|
+
data.setdefault("mcpServers", {}).setdefault("user", {})[_SERVER_KEY] = entry
|
|
41
48
|
|
|
42
|
-
|
|
43
|
-
tmp =
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
console.print(f"[green]Configuration written to[/green] {config_path}")
|
|
49
|
+
tmp = claude_json.with_suffix(".tmp")
|
|
50
|
+
tmp.write_text(json.dumps(data, indent=2) + "\n")
|
|
51
|
+
os.replace(tmp, claude_json)
|
|
52
|
+
console.print(f"[green]Configuration written to[/green] {claude_json}")
|
|
47
53
|
console.print("Note: Binary is `sqlcg`; PyPI package is `sql-code-graph`.")
|
|
48
54
|
|
|
49
55
|
|
sqlcg/cli/commands/reindex.py
CHANGED
|
@@ -60,6 +60,9 @@ def reindex_cmd( # noqa: B008
|
|
|
60
60
|
from sqlcg.core.schema import SCHEMA_VERSION
|
|
61
61
|
from sqlcg.indexer.indexer import Indexer
|
|
62
62
|
|
|
63
|
+
# Resolve to absolute path so ignore-spec and git delta receive an absolute root
|
|
64
|
+
path = path.resolve()
|
|
65
|
+
|
|
63
66
|
# Resolve dialect
|
|
64
67
|
if dialect == "auto":
|
|
65
68
|
dialect = get_dialect(path)
|
sqlcg/core/config.py
CHANGED
|
@@ -19,6 +19,10 @@ class KuzuConfig(BaseModel):
|
|
|
19
19
|
default=0,
|
|
20
20
|
description="KuzuDB buffer pool size in MB (0 = use KuzuDB default)",
|
|
21
21
|
)
|
|
22
|
+
log_path: Path = Field(
|
|
23
|
+
default_factory=lambda: Path.home() / ".sqlcg" / "index.log",
|
|
24
|
+
description="Path for parse-warning log file written during indexing",
|
|
25
|
+
)
|
|
22
26
|
|
|
23
27
|
@classmethod
|
|
24
28
|
def from_env(cls) -> "KuzuConfig":
|
|
@@ -29,9 +33,11 @@ class KuzuConfig(BaseModel):
|
|
|
29
33
|
"""
|
|
30
34
|
env_path = os.getenv("SQLCG_DB_PATH")
|
|
31
35
|
env_buf = os.getenv("SQLCG_BUFFER_POOL_MB")
|
|
36
|
+
env_log = os.getenv("SQLCG_LOG_PATH")
|
|
32
37
|
return cls(
|
|
33
38
|
db_path=Path(env_path) if env_path else Path.home() / ".sqlcg" / "graph.db",
|
|
34
39
|
buffer_pool_size_mb=int(env_buf) if env_buf else 0,
|
|
40
|
+
log_path=Path(env_log) if env_log else Path.home() / ".sqlcg" / "index.log",
|
|
35
41
|
)
|
|
36
42
|
|
|
37
43
|
|
|
@@ -138,6 +144,7 @@ def get_noise_filter_patterns(path: Path) -> list[str]:
|
|
|
138
144
|
"""
|
|
139
145
|
default_patterns = [
|
|
140
146
|
"*_bck",
|
|
147
|
+
"*_bck_*", # catches mid-suffix variants e.g. foo_bck_us39553, bar_bck_archive
|
|
141
148
|
"*_bck_us",
|
|
142
149
|
"*_bck_[0-9]*",
|
|
143
150
|
"*_backup",
|
sqlcg/indexer/error_classify.py
CHANGED
|
@@ -93,10 +93,14 @@ def _classify_error(msg: str) -> str:
|
|
|
93
93
|
if not msg:
|
|
94
94
|
return "other"
|
|
95
95
|
|
|
96
|
-
# Timeout errors
|
|
96
|
+
# Timeout errors (including pool-path poison retries)
|
|
97
97
|
if msg.startswith("timeout:"):
|
|
98
98
|
return "timeout"
|
|
99
99
|
|
|
100
|
+
# Poison-retry: file repeatedly timed out in pool worker; treat as timeout bucket
|
|
101
|
+
if msg.startswith("skipped:poison"):
|
|
102
|
+
return "timeout"
|
|
103
|
+
|
|
100
104
|
# Skip markers
|
|
101
105
|
if msg.startswith("col_lineage_skip:"):
|
|
102
106
|
if "pure_ddl_file" in msg:
|
sqlcg/indexer/git_delta.py
CHANGED
|
@@ -51,6 +51,7 @@ def git_name_status_delta(root: Path, old_sha: str, new_sha: str) -> Delta | Non
|
|
|
51
51
|
unknown SHA, shallow clone, or git not available). Callers MUST fall
|
|
52
52
|
back to a full index_repo when None is returned.
|
|
53
53
|
"""
|
|
54
|
+
root = root.resolve() # guard: caller may pass a relative path (e.g. Path("."))
|
|
54
55
|
try:
|
|
55
56
|
result = subprocess.run(
|
|
56
57
|
["git", "diff", "--name-status", old_sha, new_sha],
|
sqlcg/indexer/pool.py
CHANGED
|
@@ -285,7 +285,9 @@ class HardKillPool:
|
|
|
285
285
|
tidx = queue.pop(0)
|
|
286
286
|
path = tasks[tidx].get("path", "")
|
|
287
287
|
if kill_counts.get(path, 0) >= poison_retries:
|
|
288
|
-
results[tidx] = _timeout_file(
|
|
288
|
+
results[tidx] = _timeout_file(
|
|
289
|
+
path, self._dialect, timeout_s=per_task_timeout, poison=True
|
|
290
|
+
)
|
|
289
291
|
logger.warning("Skipping %s — poisoned after %d kills", path, poison_retries)
|
|
290
292
|
if on_result is not None:
|
|
291
293
|
on_result()
|
|
@@ -375,7 +377,7 @@ class HardKillPool:
|
|
|
375
377
|
slot,
|
|
376
378
|
kill_counts[path],
|
|
377
379
|
)
|
|
378
|
-
results[tidx] = _timeout_file(path, self._dialect)
|
|
380
|
+
results[tidx] = _timeout_file(path, self._dialect, timeout_s=per_task_timeout)
|
|
379
381
|
if on_result is not None:
|
|
380
382
|
on_result()
|
|
381
383
|
self._respawn(w)
|
|
@@ -486,11 +488,14 @@ class HardKillPool:
|
|
|
486
488
|
def _timeout_file(
|
|
487
489
|
path: str,
|
|
488
490
|
dialect: str | None,
|
|
491
|
+
timeout_s: float = 0.0,
|
|
489
492
|
poison: bool = False,
|
|
490
493
|
) -> ParsedFile:
|
|
491
494
|
pf = ParsedFile(path=Path(path), dialect=dialect)
|
|
492
|
-
|
|
493
|
-
|
|
495
|
+
if poison:
|
|
496
|
+
pf.errors.append(f"skipped:poison file={Path(path).name}")
|
|
497
|
+
else:
|
|
498
|
+
pf.errors.append(f"timeout:{timeout_s:.0f}s file={Path(path).name}")
|
|
494
499
|
return pf
|
|
495
500
|
|
|
496
501
|
|
sqlcg/parsers/base.py
CHANGED
|
@@ -688,8 +688,126 @@ class SqlParser(ABC):
|
|
|
688
688
|
except Exception:
|
|
689
689
|
body_scope = None
|
|
690
690
|
|
|
691
|
-
#
|
|
692
|
-
|
|
691
|
+
# INSERT positional column-list mapping (#25 fix).
|
|
692
|
+
# Compute the positional_col_names skip-set BEFORE the main column loop
|
|
693
|
+
# so the main loop can skip positions already handled here.
|
|
694
|
+
#
|
|
695
|
+
# When an INSERT has an explicit column list (INSERT INTO t (c1, c2) SELECT ...),
|
|
696
|
+
# the target column name at position idx is authoritative — the SELECT alias is
|
|
697
|
+
# cosmetic for the SELECT and meaningless to the INSERT target. This block
|
|
698
|
+
# overrides alias attribution for ALL positions (aliased or not).
|
|
699
|
+
#
|
|
700
|
+
# Guards applied here mirror the main column loop to preserve skip markers:
|
|
701
|
+
# - Star expressions → emit col_lineage_skip:star, register pos, skip sg_lineage
|
|
702
|
+
# - Pure-literal (no Column descendant) → register pos, skip sg_lineage (silent)
|
|
703
|
+
# - Unaliased non-Column (func/arith/CASE) → emit col_lineage_skip:func_fallback,
|
|
704
|
+
# register pos, skip sg_lineage
|
|
705
|
+
# - Plain Column / aliased expression → call sg_lineage (the #25 happy path)
|
|
706
|
+
#
|
|
707
|
+
# CLAUDE.md invariant: body_no_with = body.copy() + strip-WITH happens ONCE
|
|
708
|
+
# before the inner loop; only the single projection is swapped per column.
|
|
709
|
+
positional_col_names: dict[int, str] = {} # idx → insert_col_name
|
|
710
|
+
if isinstance(stmt, exp.Insert) and isinstance(stmt.this, exp.Schema):
|
|
711
|
+
insert_cols_list = [c.name for c in stmt.this.expressions]
|
|
712
|
+
# Build the WITH-stripped body ONCE here, before any per-column loop.
|
|
713
|
+
# Only the single projection is swapped per column below.
|
|
714
|
+
body_no_with = body.copy()
|
|
715
|
+
body_no_with.set("with_", None)
|
|
716
|
+
for _ins_idx, _col_expr in enumerate(col_expressions):
|
|
717
|
+
if _ins_idx >= len(insert_cols_list):
|
|
718
|
+
break
|
|
719
|
+
_insert_col = insert_cols_list[_ins_idx]
|
|
720
|
+
if not _insert_col:
|
|
721
|
+
continue
|
|
722
|
+
# Register position first so the main loop always skips it,
|
|
723
|
+
# regardless of which guard fires below.
|
|
724
|
+
positional_col_names[_ins_idx] = _insert_col
|
|
725
|
+
|
|
726
|
+
# Guard 1: Star projection — emit skip marker (same as main loop).
|
|
727
|
+
_inner_for_guard = (
|
|
728
|
+
_col_expr.this if isinstance(_col_expr, exp.Alias) else _col_expr
|
|
729
|
+
)
|
|
730
|
+
if isinstance(_inner_for_guard, exp.Star) or (
|
|
731
|
+
isinstance(_inner_for_guard, exp.Column)
|
|
732
|
+
and isinstance(_inner_for_guard.this, exp.Star)
|
|
733
|
+
):
|
|
734
|
+
_qualifier = (
|
|
735
|
+
_inner_for_guard.table
|
|
736
|
+
if isinstance(_inner_for_guard, exp.Column)
|
|
737
|
+
else None
|
|
738
|
+
)
|
|
739
|
+
out.errors.append(f"col_lineage_skip:star:{_qualifier or '<unqualified>'}")
|
|
740
|
+
continue # no sg_lineage for star
|
|
741
|
+
|
|
742
|
+
# Guard 2: Pure-literal — no Column descendants, nothing to trace.
|
|
743
|
+
if not list(_col_expr.find_all(exp.Column)):
|
|
744
|
+
continue # silent skip, no sg_lineage
|
|
745
|
+
|
|
746
|
+
# NOTE: do NOT emit func_fallback here for unaliased non-Column
|
|
747
|
+
# expressions (functions, arithmetic, CASE …). The main loop emits
|
|
748
|
+
# func_fallback for such expressions because a plain SELECT/CREATE VIEW
|
|
749
|
+
# gives them no output column name. The positional INSERT column list
|
|
750
|
+
# DOES supply that name (_insert_col): below we wrap the expression as
|
|
751
|
+
# Alias(expr, _insert_col) and let sg_lineage trace through it — exactly
|
|
752
|
+
# as the aliased form (e.g. `DATE(col) AS a`) already resolves. Guard 2
|
|
753
|
+
# (above) already dropped genuinely-untraceable pure-literal expressions
|
|
754
|
+
# (no Column descendant). Skipping column-containing expressions here would
|
|
755
|
+
# make the #25 positional feature do its work and then discard the result,
|
|
756
|
+
# dropping real lineage edges (regressed by eb19f29; broke COALESCE).
|
|
757
|
+
|
|
758
|
+
# Positional mapping always wins — replace (or add) the alias with the
|
|
759
|
+
# INSERT target column name regardless of SELECT alias.
|
|
760
|
+
if _col_expr.alias and _col_expr.alias != _insert_col:
|
|
761
|
+
self._log.debug(
|
|
762
|
+
"INSERT positional override: SELECT alias %r → INSERT col %r"
|
|
763
|
+
" at position %d",
|
|
764
|
+
_col_expr.alias,
|
|
765
|
+
_insert_col,
|
|
766
|
+
_ins_idx,
|
|
767
|
+
)
|
|
768
|
+
# If the expression is already an Alias(inner, old_alias), unwrap it
|
|
769
|
+
# before re-wrapping — otherwise we produce Alias(Alias(inner, x), c1)
|
|
770
|
+
# which serialises as "inner AS x AS c1" (syntax error).
|
|
771
|
+
_inner = _col_expr.this if isinstance(_col_expr, exp.Alias) else _col_expr
|
|
772
|
+
_aliased = exp.Alias(this=_inner.copy(), alias=_insert_col)
|
|
773
|
+
body_no_with.set("expressions", [_aliased])
|
|
774
|
+
_patched_sql = body_no_with.sql(dialect=self.DIALECT)
|
|
775
|
+
# Pass sources= (not scope=) here: the patched SQL is a freshly
|
|
776
|
+
# serialised string — the scope was built from the original body AST
|
|
777
|
+
# and does not correspond to this new string.
|
|
778
|
+
#
|
|
779
|
+
# Use `sources` (the cross-statement temp/CTAS map), NOT
|
|
780
|
+
# `combined_sources`. combined_sources additionally carries the
|
|
781
|
+
# SAME-STATEMENT CTE bodies (added above). Since body_no_with strips
|
|
782
|
+
# the WITH clause from the patched SQL, those CTE names become opaque
|
|
783
|
+
# source relations — passing their bodies as sources= would expand them
|
|
784
|
+
# away, collapsing intermediate CTE→target hops into the deepest source
|
|
785
|
+
# (regressed by eb19f29; broke the MA_AANTAL_OP_ORDER anchor link 5).
|
|
786
|
+
# Cross-statement temps (e.g. CREATE TEMP TABLE t) live in `sources`
|
|
787
|
+
# and SHOULD still expand (E36 multi-temp: t → src).
|
|
788
|
+
try:
|
|
789
|
+
_root = sg_lineage(
|
|
790
|
+
_insert_col,
|
|
791
|
+
_patched_sql,
|
|
792
|
+
dialect=self.DIALECT,
|
|
793
|
+
sources=sources or {},
|
|
794
|
+
)
|
|
795
|
+
if _root:
|
|
796
|
+
_new_edges = self._lineage_node_to_edges(
|
|
797
|
+
_root,
|
|
798
|
+
dst_col_name=_insert_col,
|
|
799
|
+
dst_table=dst_table,
|
|
800
|
+
path=path,
|
|
801
|
+
out=out,
|
|
802
|
+
)
|
|
803
|
+
edges.extend(_new_edges)
|
|
804
|
+
except Exception:
|
|
805
|
+
pass
|
|
806
|
+
|
|
807
|
+
# Extract output columns — skip positions handled by the positional INSERT block
|
|
808
|
+
for loop_idx, col_expr in enumerate(col_expressions):
|
|
809
|
+
if loop_idx in positional_col_names:
|
|
810
|
+
continue # positional INSERT block already emitted this column
|
|
693
811
|
# Skip star projections — sg_lineage requires a concrete column name.
|
|
694
812
|
if isinstance(col_expr, exp.Star) or (
|
|
695
813
|
isinstance(col_expr, exp.Column) and isinstance(col_expr.this, exp.Star)
|
|
@@ -919,47 +1037,6 @@ class SqlParser(ABC):
|
|
|
919
1037
|
cte_col_name,
|
|
920
1038
|
)
|
|
921
1039
|
|
|
922
|
-
# INSERT column-list aliasing (T-07-02 link 5).
|
|
923
|
-
# When an INSERT has an explicit column list and the SELECT expression has
|
|
924
|
-
# no alias (e.g. SELECT SUM(x) FROM cte), the INSERT column at the same
|
|
925
|
-
# position provides the destination col name. Stripping the WITH clause
|
|
926
|
-
# stops sg_lineage at the CTE name boundary (doesn't expand into bodies).
|
|
927
|
-
if isinstance(stmt, exp.Insert) and isinstance(stmt.this, exp.Schema):
|
|
928
|
-
insert_cols = [c.name for c in stmt.this.expressions]
|
|
929
|
-
# Build the WITH-stripped body ONCE before the loop and only swap its
|
|
930
|
-
# single projection per column (regressed in 4234e5d, which moved the
|
|
931
|
-
# full-body body.copy() inside the loop → O(N_cols) full-body deepcopies
|
|
932
|
-
# for wide INSERT ... SELECT). Stripping WITH stops sg_lineage at the CTE
|
|
933
|
-
# name boundary.
|
|
934
|
-
body_no_with = body.copy()
|
|
935
|
-
body_no_with.set("with_", None)
|
|
936
|
-
for idx, col_expr in enumerate(col_expressions):
|
|
937
|
-
if idx >= len(insert_cols):
|
|
938
|
-
break
|
|
939
|
-
if col_expr.alias:
|
|
940
|
-
continue # already handled by the main col loop
|
|
941
|
-
insert_col = insert_cols[idx]
|
|
942
|
-
if not insert_col:
|
|
943
|
-
continue
|
|
944
|
-
# Patch the shared body with this column's aliased expression so
|
|
945
|
-
# sg_lineage can trace it to the INSERT column name.
|
|
946
|
-
aliased = exp.Alias(this=col_expr.copy(), alias=insert_col)
|
|
947
|
-
body_no_with.set("expressions", [aliased])
|
|
948
|
-
patched_sql = body_no_with.sql(dialect=self.DIALECT)
|
|
949
|
-
try:
|
|
950
|
-
root = sg_lineage(insert_col, patched_sql, dialect=self.DIALECT)
|
|
951
|
-
if root:
|
|
952
|
-
new_edges = self._lineage_node_to_edges(
|
|
953
|
-
root,
|
|
954
|
-
dst_col_name=insert_col,
|
|
955
|
-
dst_table=dst_table,
|
|
956
|
-
path=path,
|
|
957
|
-
out=out,
|
|
958
|
-
)
|
|
959
|
-
edges.extend(new_edges)
|
|
960
|
-
except Exception:
|
|
961
|
-
pass
|
|
962
|
-
|
|
963
1040
|
except Exception as exc:
|
|
964
1041
|
self._log.debug(
|
|
965
1042
|
"column lineage extraction failed for entire statement: file=%s error=%s",
|
sqlcg/server/server.py
CHANGED
|
@@ -1,35 +1,76 @@
|
|
|
1
1
|
"""MCP server for SQL Code Graph.
|
|
2
2
|
|
|
3
3
|
Exposes FastMCP tools for lineage queries, pattern search, and indexing.
|
|
4
|
-
MCP protocol uses stdout for message transport
|
|
5
|
-
|
|
4
|
+
MCP protocol uses stdout (fd 1) for JSON-RPC message transport. This module
|
|
5
|
+
captures fd 1 as a raw binary buffer BEFORE any logging redirection so that
|
|
6
|
+
the captured buffer can be passed explicitly to stdio_server(). This ensures
|
|
7
|
+
JSON-RPC frames always go to fd 1 regardless of what sys.stdout points to
|
|
8
|
+
at call time.
|
|
9
|
+
|
|
10
|
+
Ordering invariant (must not change):
|
|
11
|
+
1. os.dup(1) → _real_stdout_buffer (first — before everything)
|
|
12
|
+
2. from mcp.server import FastMCP (module-level import)
|
|
13
|
+
3. mcp = FastMCP("SQL Code Graph") (module-level; tools.py registers here)
|
|
14
|
+
4. main() calls _configure_mcp_logging() (not at module scope)
|
|
6
15
|
"""
|
|
7
16
|
|
|
17
|
+
import os
|
|
8
18
|
import sys
|
|
9
19
|
|
|
10
|
-
|
|
11
|
-
|
|
20
|
+
# Capture the real fd 1 binary stream FIRST — before _configure_mcp_logging()
|
|
21
|
+
# (which replaces sys.stdout) AND before FastMCP("SQL Code Graph") construction.
|
|
22
|
+
# stdio_server() receives this explicitly so JSON-RPC frames go to fd 1
|
|
23
|
+
# regardless of what sys.stdout points to afterward.
|
|
24
|
+
# Guards against the v1.0.0/v1.0.1 regression where frames went to fd 2.
|
|
25
|
+
_real_stdout_buffer = os.fdopen(os.dup(1), "wb", buffering=0)
|
|
12
26
|
|
|
13
|
-
from
|
|
27
|
+
from dotenv import load_dotenv # noqa: E402
|
|
28
|
+
from mcp.server import FastMCP # noqa: E402
|
|
29
|
+
|
|
30
|
+
from sqlcg.utils.logging import getLogger # noqa: E402
|
|
14
31
|
|
|
15
32
|
logger = getLogger(__name__)
|
|
16
33
|
|
|
34
|
+
# Create FastMCP instance at module scope so tools.py can import and register with it.
|
|
35
|
+
# This is safe because _real_stdout_buffer has already captured fd 1 above.
|
|
36
|
+
mcp = FastMCP("SQL Code Graph")
|
|
37
|
+
|
|
17
38
|
|
|
18
39
|
def _configure_mcp_logging() -> None:
|
|
19
|
-
"""Redirect sys.stdout to sys.stderr
|
|
40
|
+
"""Redirect sys.stdout to sys.stderr and configure logging to stderr.
|
|
20
41
|
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
42
|
+
sys.stdout is replaced with sys.stderr so that any stray print() call
|
|
43
|
+
does not pollute fd 1 (reserved for MCP JSON-RPC frames).
|
|
44
|
+
The real fd 1 binary stream is captured in _real_stdout_buffer at module
|
|
45
|
+
top before this replacement and passed explicitly to stdio_server().
|
|
46
|
+
|
|
47
|
+
Must be called inside main(), not at module scope, so that
|
|
48
|
+
_real_stdout_buffer captures fd 1 before the redirect.
|
|
24
49
|
"""
|
|
50
|
+
import logging
|
|
51
|
+
|
|
25
52
|
sys.stdout = sys.stderr
|
|
53
|
+
logging.basicConfig(stream=sys.stderr, level=logging.WARNING)
|
|
26
54
|
|
|
27
55
|
|
|
28
|
-
|
|
29
|
-
|
|
56
|
+
async def _run_stdio_async_with_real_stdout() -> None:
|
|
57
|
+
"""Run the MCP server loop with JSON-RPC frames explicitly on fd 1.
|
|
30
58
|
|
|
31
|
-
|
|
32
|
-
|
|
59
|
+
Bypasses FastMCP.run_stdio_async() (which uses sys.stdout at call time)
|
|
60
|
+
and drives the server loop directly with the captured _real_stdout_buffer.
|
|
61
|
+
"""
|
|
62
|
+
from io import TextIOWrapper
|
|
63
|
+
|
|
64
|
+
import anyio
|
|
65
|
+
from mcp.server.stdio import stdio_server
|
|
66
|
+
|
|
67
|
+
stdout_text = TextIOWrapper(_real_stdout_buffer, encoding="utf-8", line_buffering=False)
|
|
68
|
+
async with stdio_server(stdout=anyio.wrap_file(stdout_text)) as (read_stream, write_stream):
|
|
69
|
+
await mcp._mcp_server.run(
|
|
70
|
+
read_stream,
|
|
71
|
+
write_stream,
|
|
72
|
+
mcp._mcp_server.create_initialization_options(),
|
|
73
|
+
)
|
|
33
74
|
|
|
34
75
|
|
|
35
76
|
def main(db_path: str | None = None) -> None:
|
|
@@ -38,10 +79,13 @@ def main(db_path: str | None = None) -> None:
|
|
|
38
79
|
Args:
|
|
39
80
|
db_path: Path to KùzuDB database. If None, uses SQLCG_DB_PATH env var
|
|
40
81
|
or ~/.sqlcg/graph.db (via get_db_path in tools module).
|
|
41
|
-
|
|
42
|
-
Raises:
|
|
43
|
-
RuntimeError: If tools fail to initialize or FastMCP server fails.
|
|
44
82
|
"""
|
|
83
|
+
import anyio
|
|
84
|
+
|
|
85
|
+
# Must be first — redirects sys.stdout → sys.stderr so stray prints don't
|
|
86
|
+
# corrupt fd 1. _real_stdout_buffer was already captured at module top.
|
|
87
|
+
_configure_mcp_logging()
|
|
88
|
+
|
|
45
89
|
load_dotenv()
|
|
46
90
|
|
|
47
91
|
# Import tools module to trigger tool registration via @mcp.tool() decorators
|
|
@@ -50,8 +94,7 @@ def main(db_path: str | None = None) -> None:
|
|
|
50
94
|
# Initialize the backend singleton used by all tools
|
|
51
95
|
sqlcg.server.tools.init_backend(db_path)
|
|
52
96
|
|
|
53
|
-
# Run the MCP server event loop, ensuring backend is closed on shutdown
|
|
54
97
|
try:
|
|
55
|
-
|
|
98
|
+
anyio.run(_run_stdio_async_with_real_stdout)
|
|
56
99
|
finally:
|
|
57
100
|
sqlcg.server.tools.shutdown_backend()
|
sqlcg/server/tools.py
CHANGED
|
@@ -183,6 +183,19 @@ def _assert_indexed(db: GraphBackend) -> None:
|
|
|
183
183
|
)
|
|
184
184
|
|
|
185
185
|
|
|
186
|
+
def _bare_ref(ref: str) -> str:
|
|
187
|
+
"""Strip schema prefix from a ref string, keeping table.column.
|
|
188
|
+
|
|
189
|
+
For a 3-part ref ("mart.fact_t.amount") this returns "fact_t.amount".
|
|
190
|
+
For a 2-part ref ("fact_t.amount") this returns the ref unchanged.
|
|
191
|
+
Never uses rsplit — that would yield only the column name for 3-part refs.
|
|
192
|
+
"""
|
|
193
|
+
parts = ref.split(".")
|
|
194
|
+
if len(parts) >= 3:
|
|
195
|
+
return ".".join(parts[1:]) # drop schema, keep table.column
|
|
196
|
+
return ref # already bare (no schema prefix)
|
|
197
|
+
|
|
198
|
+
|
|
186
199
|
def _parse_column_ref(col_ref: str) -> tuple[str, str]:
|
|
187
200
|
"""Parse column reference "table.column" or "catalog.db.table.column".
|
|
188
201
|
|
|
@@ -554,9 +567,54 @@ def trace_column_lineage(table_col: str, max_depth: int | None = None) -> Lineag
|
|
|
554
567
|
|
|
555
568
|
mermaid = _build_mermaid(col_id, edges) if edges else None
|
|
556
569
|
|
|
570
|
+
# Bare-name fallback: when the primary query returns empty and the ref has a
|
|
571
|
+
# schema component (3+ parts), retry with the schema prefix stripped.
|
|
572
|
+
# This handles unqualified INSERT targets indexed without a schema prefix.
|
|
573
|
+
bare_fallback_used = False
|
|
574
|
+
if not lineage and len(table_col.split(".")) >= 3:
|
|
575
|
+
bare = _bare_ref(table_col)
|
|
576
|
+
bare_queue: deque[tuple[str, int]] = deque([(bare, 0)])
|
|
577
|
+
bare_visited: set[str] = set()
|
|
578
|
+
bare_emitted: set[str] = set()
|
|
579
|
+
while bare_queue:
|
|
580
|
+
current_id, depth = bare_queue.popleft()
|
|
581
|
+
if current_id in bare_visited or (max_depth is not None and depth > max_depth):
|
|
582
|
+
continue
|
|
583
|
+
if len(bare_visited) >= max_nodes:
|
|
584
|
+
break
|
|
585
|
+
bare_visited.add(current_id)
|
|
586
|
+
rows = db.run_read(TRACE_COLUMN_LINEAGE_QUERY, {"id": current_id})
|
|
587
|
+
for row in rows:
|
|
588
|
+
node_id = row["id"]
|
|
589
|
+
edges.append((node_id, current_id, row.get("transform") or "SELECT"))
|
|
590
|
+
if node_id not in bare_visited and node_id not in bare_emitted:
|
|
591
|
+
bare_emitted.add(node_id)
|
|
592
|
+
lineage.append(
|
|
593
|
+
LineageNode(
|
|
594
|
+
name=row.get("col_name", ""),
|
|
595
|
+
kind="column",
|
|
596
|
+
table=row.get("table_qualified"),
|
|
597
|
+
file=None,
|
|
598
|
+
confidence=row.get("confidence"),
|
|
599
|
+
)
|
|
600
|
+
)
|
|
601
|
+
if node_id not in bare_visited:
|
|
602
|
+
bare_queue.append((node_id, depth + 1))
|
|
603
|
+
if lineage:
|
|
604
|
+
bare_fallback_used = True
|
|
605
|
+
mermaid = _build_mermaid(bare, edges) if edges else None
|
|
606
|
+
|
|
557
607
|
# Populate hint if result is empty (Step 4.1)
|
|
558
608
|
hint = None
|
|
559
|
-
if
|
|
609
|
+
if bare_fallback_used:
|
|
610
|
+
bare = _bare_ref(table_col)
|
|
611
|
+
hint = (
|
|
612
|
+
f"No results for '{table_col}'. Found lineage under bare name '{bare}'. "
|
|
613
|
+
"The INSERT target may have been indexed without a schema prefix. "
|
|
614
|
+
"Multiple tables with the same unqualified name in different schemas "
|
|
615
|
+
"would all match — re-index with an explicit schema for precise results."
|
|
616
|
+
)
|
|
617
|
+
elif not lineage:
|
|
560
618
|
hint = (
|
|
561
619
|
"No lineage found. Ensure the column reference includes the schema prefix "
|
|
562
620
|
"(e.g., ba.table_name.column_name). Check that 'sqlcg db info' shows "
|
sqlcg/utils/ignore.py
CHANGED
|
@@ -14,6 +14,7 @@ def load_ignore_spec(root: Path) -> pathspec.PathSpec:
|
|
|
14
14
|
Returns:
|
|
15
15
|
PathSpec object for matching ignore patterns
|
|
16
16
|
"""
|
|
17
|
+
root = Path(root).resolve() # guard: caller may pass a relative path (e.g. Path("."))
|
|
17
18
|
ignore_file = root / ".sqlcgignore"
|
|
18
19
|
if ignore_file.exists():
|
|
19
20
|
patterns = ignore_file.read_text().splitlines()
|
|
@@ -33,4 +34,5 @@ def is_ignored(path: Path, root: Path, spec: pathspec.PathSpec) -> bool:
|
|
|
33
34
|
Returns:
|
|
34
35
|
True if the path matches any ignore pattern
|
|
35
36
|
"""
|
|
37
|
+
root = Path(root).resolve() # guard: ensure root is absolute before relative_to()
|
|
36
38
|
return spec.match_file(str(path.relative_to(root)))
|
|
File without changes
|
|
File without changes
|