sql-code-graph 1.0.0__py3-none-any.whl → 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sql_code_graph-1.0.0.dist-info → sql_code_graph-1.0.2.dist-info}/METADATA +1 -1
- {sql_code_graph-1.0.0.dist-info → sql_code_graph-1.0.2.dist-info}/RECORD +20 -20
- sqlcg/__init__.py +1 -1
- sqlcg/cli/commands/analyze.py +90 -0
- sqlcg/cli/commands/find.py +11 -0
- sqlcg/cli/commands/index.py +72 -1
- sqlcg/cli/commands/install.py +83 -46
- sqlcg/cli/commands/mcp.py +18 -12
- sqlcg/cli/commands/reindex.py +3 -0
- sqlcg/core/config.py +7 -0
- sqlcg/indexer/error_classify.py +5 -1
- sqlcg/indexer/git_delta.py +1 -0
- sqlcg/indexer/indexer.py +5 -13
- sqlcg/indexer/pool.py +64 -5
- sqlcg/parsers/base.py +178 -82
- sqlcg/server/server.py +61 -18
- sqlcg/server/tools.py +59 -1
- sqlcg/utils/ignore.py +2 -0
- {sql_code_graph-1.0.0.dist-info → sql_code_graph-1.0.2.dist-info}/WHEEL +0 -0
- {sql_code_graph-1.0.0.dist-info → sql_code_graph-1.0.2.dist-info}/entry_points.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sql-code-graph
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.2
|
|
4
4
|
Summary: SQL code graph analyzer and lineage tracer
|
|
5
5
|
Project-URL: Homepage, https://github.com/Warhorze/sql-code-graph
|
|
6
6
|
Project-URL: Repository, https://github.com/Warhorze/sql-code-graph
|
|
@@ -1,22 +1,22 @@
|
|
|
1
|
-
sqlcg/__init__.py,sha256=
|
|
1
|
+
sqlcg/__init__.py,sha256=hGOhwTAVTaRm7PjbaSQVCLvnF7rOGZZNdMqv0IoQdYg,115
|
|
2
2
|
sqlcg/__main__.py,sha256=1YoFLcqEgTwYq1J3TbUwpkdG0zeeLIf2fJvwWI-CLFU,109
|
|
3
3
|
sqlcg/cli/__init__.py,sha256=W8fD0LpMq2xm_5WKGNMvJh2WBL1ho5E8hUeAqXQYT1g,28
|
|
4
4
|
sqlcg/cli/main.py,sha256=WmdTjsOlz1ozi2Y3Aq4ezR_FCRl-Lc1YOKw3_d48dlY,1650
|
|
5
5
|
sqlcg/cli/commands/__init__.py,sha256=oSHtr6VD-jNubOjuCQyZj2tBppjMEpQDh-IGQ8of9eA,30
|
|
6
|
-
sqlcg/cli/commands/analyze.py,sha256=
|
|
6
|
+
sqlcg/cli/commands/analyze.py,sha256=PFQD29_VAtJ-wghYLsHRINp8VlnOVl1WlOdbAdcWz1E,8091
|
|
7
7
|
sqlcg/cli/commands/db.py,sha256=Yd4ZDz1BFwjO4Lyt3NefQnowkjdUxFDFmsPykBVH2Pk,6518
|
|
8
|
-
sqlcg/cli/commands/find.py,sha256=
|
|
8
|
+
sqlcg/cli/commands/find.py,sha256=P2OFI0O_-F4W5-oy5KObXUHI7gNTkJRtDSZ59xTKE9Y,2672
|
|
9
9
|
sqlcg/cli/commands/gain.py,sha256=bOvia7CVla_fESrDEdftYze8Mm0xDio3SpCzIyoXg7A,8925
|
|
10
10
|
sqlcg/cli/commands/git.py,sha256=96hmWYd861FC8RZqPQ_eBG8yLXSXaB9SLxmuwx00nWU,3347
|
|
11
|
-
sqlcg/cli/commands/index.py,sha256=
|
|
12
|
-
sqlcg/cli/commands/install.py,sha256=
|
|
13
|
-
sqlcg/cli/commands/mcp.py,sha256=
|
|
14
|
-
sqlcg/cli/commands/reindex.py,sha256=
|
|
11
|
+
sqlcg/cli/commands/index.py,sha256=Sgrg5MaQWfQzbX3e3Wcsfd8BEWDGuBm5l5vynpJsRzA,9801
|
|
12
|
+
sqlcg/cli/commands/install.py,sha256=KNABvrLbamPyYnmnVdCaM_MNezbDc-pr6IkignCWI8k,9186
|
|
13
|
+
sqlcg/cli/commands/mcp.py,sha256=cfi7D-RgEPUKdfUbsJC2iKImKOnHQvWxCLfwYIPdhdE,2174
|
|
14
|
+
sqlcg/cli/commands/reindex.py,sha256=J9gpaxSzJ1mTdOJWh7WSLskbRF9f_2EMWnUFF4VOtVU,6387
|
|
15
15
|
sqlcg/cli/commands/report.py,sha256=JU0qjyMxwOukE7bN3XvvIzOI7zMg_Gsnvk_8F6pKNpA,4915
|
|
16
16
|
sqlcg/cli/commands/uninstall.py,sha256=IYwQaqnMmmzW0Nlls40wD-L3tVkMgKIMRXUkcXPMUc4,9398
|
|
17
17
|
sqlcg/cli/commands/watch.py,sha256=7N6c-QuvxAEGHzDZ0C3CU2BkHSraZW9YtgoFnz7SaQo,2373
|
|
18
18
|
sqlcg/core/__init__.py,sha256=uNsJCrCMVWVT80sHPtI_f39BYqIf5N0i6LSq8x8HsyI,283
|
|
19
|
-
sqlcg/core/config.py,sha256=
|
|
19
|
+
sqlcg/core/config.py,sha256=YCq4OayvBSNXsYtOh3yZ-W6fyJBLwYunORDo2TPCU9s,10179
|
|
20
20
|
sqlcg/core/graph_db.py,sha256=gFiHjfVeRHp2FS3yRThDgCWFkugOQD065IvEqN6apg4,7881
|
|
21
21
|
sqlcg/core/jobs.py,sha256=Je-fCdSKRgiSsv1W8SgNAlp36a7t7-pJZ-qKPbka9OE,3298
|
|
22
22
|
sqlcg/core/kuzu_backend.py,sha256=ziHt-AB9sEZY7qB8whseWFicbTfOZaNOxcNVKhjii5Y,16587
|
|
@@ -27,10 +27,10 @@ sqlcg/core/schema.cypher,sha256=UWYsPMRgkn6HOlPZ3rl6BfY5hzKQKP5RGPaZg4NTZFY,2515
|
|
|
27
27
|
sqlcg/core/schema.py,sha256=9jBgJwuvfjLq2xC5B0NUyZZYxhqTb0LO0YzxcPM-gVM,1301
|
|
28
28
|
sqlcg/indexer/__init__.py,sha256=Wh20Unz2OHs1oIyWLrpurPAasF0BET2g4iXtNk7mh2U,56
|
|
29
29
|
sqlcg/indexer/dbt_adapter.py,sha256=EB5x1WU5Z9d-I97ADDj88S_hG1C4z4nbrv8JUCzXfy8,686
|
|
30
|
-
sqlcg/indexer/error_classify.py,sha256=
|
|
31
|
-
sqlcg/indexer/git_delta.py,sha256=
|
|
32
|
-
sqlcg/indexer/indexer.py,sha256=
|
|
33
|
-
sqlcg/indexer/pool.py,sha256=
|
|
30
|
+
sqlcg/indexer/error_classify.py,sha256=MYjPVprwT-ARPjBCyCzu2F9DSrZfnTVtVIoBgm8s4H8,5329
|
|
31
|
+
sqlcg/indexer/git_delta.py,sha256=P-QM4vnVURT2KLiE6u3cQynRUF-mTH13cbB4I20YHPQ,4468
|
|
32
|
+
sqlcg/indexer/indexer.py,sha256=0B0BCUaLPdV9XtlCzhqR3hwHyD3w83o-tYG7yNr18Yo,50507
|
|
33
|
+
sqlcg/indexer/pool.py,sha256=BTYx-pBe6zwUG89MHh0X7nzGNVlsHN-GjovYKanVI1s,18553
|
|
34
34
|
sqlcg/indexer/walker.py,sha256=C__JuDcTzKxFqVjGFRr5cj9hgxvf8zffTz-0HMn1qTY,1746
|
|
35
35
|
sqlcg/indexer/watcher.py,sha256=mJQq1LASRLKKwhz0WhCUWPLLqyPR2_-FD_8efYU6gE8,8442
|
|
36
36
|
sqlcg/lineage/__init__.py,sha256=Da1DlYwtK13WHv_RnHjAtNkHTOuFbhxqCjT1Le7DsWM,46
|
|
@@ -40,7 +40,7 @@ sqlcg/metrics/__init__.py,sha256=hLJ6wm4St8qqYwKh3o9QG7lcEt1BEYM31ccqO9tGpIg,133
|
|
|
40
40
|
sqlcg/metrics/store.py,sha256=BaMf7QYTmYMlX_Jzi1GNU8R2sMVkWdn07f-ZSndtcNk,8879
|
|
41
41
|
sqlcg/parsers/__init__.py,sha256=AamA8wBbDZV9_zEtZCI4Hyen5UAVKHmBwjTghTt2PZE,785
|
|
42
42
|
sqlcg/parsers/ansi_parser.py,sha256=KruZn5CYjpktKmMRVWackshRI_AR6ehc-ReCsDeWNkQ,14321
|
|
43
|
-
sqlcg/parsers/base.py,sha256=
|
|
43
|
+
sqlcg/parsers/base.py,sha256=cSHlXwiSNu77TZI6_p1nRevbRTcBc1t5v8N_aKR7uB4,49117
|
|
44
44
|
sqlcg/parsers/bigquery_parser.py,sha256=mOnWTfXB_Dp4JwFE1PVYOB6CDPf5nYE0Dea8kJCl9uQ,2827
|
|
45
45
|
sqlcg/parsers/postgres_parser.py,sha256=lYfUpQY6j4Qm7ndXBtXbgPoGzYqYddWt5YeFnWKdA6I,946
|
|
46
46
|
sqlcg/parsers/registry.py,sha256=LXy1F6rqQI6VdxpRvZg_tNpoEucW3mXZHYBMlMONbX4,1496
|
|
@@ -50,14 +50,14 @@ sqlcg/server/__init__.py,sha256=n4wuNE7xyJIJxJZBtmtdccCMQfvTdF-IqIaZVbC4FC4,35
|
|
|
50
50
|
sqlcg/server/exceptions.py,sha256=EONw34icOByCTpppSQrvQBW6asc4hfqaGDCAFjv96II,469
|
|
51
51
|
sqlcg/server/models.py,sha256=dv4SM_o-aY8kUFIbCtj0l8ceMsfyvQtXCWPm4Ek_-14,16432
|
|
52
52
|
sqlcg/server/noise_filter.py,sha256=idSBGgdKWWccJdpOo9qgbM2350Oew-2l5W6Yc9GYQqY,6337
|
|
53
|
-
sqlcg/server/server.py,sha256=
|
|
53
|
+
sqlcg/server/server.py,sha256=mDAW_Zmk3Sp2sApw3Gw3veCqJe7waw-sioQyKZBn9ng,3774
|
|
54
54
|
sqlcg/server/skill.py,sha256=siAtrRdFHQnASe9nl33MvkTXXt9EgCB8id5i9AUq4XU,10718
|
|
55
|
-
sqlcg/server/tools.py,sha256=
|
|
55
|
+
sqlcg/server/tools.py,sha256=mSoYZRI7F5ZmdTcG-BnY6ULzrz3Y7qIFe3cHTVWVyMs,57785
|
|
56
56
|
sqlcg/utils/__init__.py,sha256=--iqt5ThTXmT8Wz7da8hs3n0zDfYPl8P-z5OgRJ_77E,154
|
|
57
57
|
sqlcg/utils/hashing.py,sha256=H25-sYfxHKb3_IERFnHyAIYNiXN470Oqo5sJT_D3YOA,438
|
|
58
|
-
sqlcg/utils/ignore.py,sha256=
|
|
58
|
+
sqlcg/utils/ignore.py,sha256=wJjwa0mjnQ_xJExOUxk25y00g065XmmzJapqV3ifD5o,1151
|
|
59
59
|
sqlcg/utils/logging.py,sha256=u0fCmYsLj9o81vawm3xZTHaw68GQYVm7JxG-gP81u8A,840
|
|
60
|
-
sql_code_graph-1.0.
|
|
61
|
-
sql_code_graph-1.0.
|
|
62
|
-
sql_code_graph-1.0.
|
|
63
|
-
sql_code_graph-1.0.
|
|
60
|
+
sql_code_graph-1.0.2.dist-info/METADATA,sha256=aikAv-KoUOGfgYo3-htWLyq61x1PE6bC1Onn_TNAuvE,12806
|
|
61
|
+
sql_code_graph-1.0.2.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
62
|
+
sql_code_graph-1.0.2.dist-info/entry_points.txt,sha256=Wfe49sVzV9p4eVFGo5RxcV-frr3HOP0yzzst8JBxQLQ,46
|
|
63
|
+
sql_code_graph-1.0.2.dist-info/RECORD,,
|
sqlcg/__init__.py
CHANGED
sqlcg/cli/commands/analyze.py
CHANGED
|
@@ -1,5 +1,9 @@
|
|
|
1
1
|
"""Analyze command for lineage analysis."""
|
|
2
2
|
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import TYPE_CHECKING
|
|
6
|
+
|
|
3
7
|
import typer
|
|
4
8
|
from rich.console import Console
|
|
5
9
|
from rich.table import Table
|
|
@@ -7,6 +11,9 @@ from rich.table import Table
|
|
|
7
11
|
from sqlcg.core.config import get_backend
|
|
8
12
|
from sqlcg.core.schema import NodeLabel, RelType
|
|
9
13
|
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from sqlcg.server.noise_filter import NoiseFilter
|
|
16
|
+
|
|
10
17
|
app = typer.Typer(help="Lineage analysis")
|
|
11
18
|
console = Console()
|
|
12
19
|
|
|
@@ -15,6 +22,7 @@ console = Console()
|
|
|
15
22
|
def upstream( # noqa: B008
|
|
16
23
|
ref: str = typer.Argument(..., help="Column reference"), # noqa: B008
|
|
17
24
|
depth: int = typer.Option(5, "--depth", help="Maximum traversal depth"), # noqa: B008
|
|
25
|
+
raw: bool = typer.Option(False, "--raw", help="Disable noise filtering on results"), # noqa: B008
|
|
18
26
|
) -> None:
|
|
19
27
|
"""Trace upstream column lineage."""
|
|
20
28
|
# Bounds check for depth to prevent performance DoS
|
|
@@ -29,6 +37,28 @@ def upstream( # noqa: B008
|
|
|
29
37
|
"RETURN src.id AS id LIMIT 100",
|
|
30
38
|
{"ref": ref},
|
|
31
39
|
)
|
|
40
|
+
if not results and len(ref.split(".")) >= 3:
|
|
41
|
+
bare = _bare_ref(ref)
|
|
42
|
+
fallback_results = backend.run_read(
|
|
43
|
+
f"MATCH p=(c:{NodeLabel.COLUMN} {{id: $bare}})"
|
|
44
|
+
f"<-[:{RelType.COLUMN_LINEAGE}*1..{depth}]-(src) "
|
|
45
|
+
"RETURN src.id AS id LIMIT 100",
|
|
46
|
+
{"bare": bare},
|
|
47
|
+
)
|
|
48
|
+
if fallback_results:
|
|
49
|
+
console.print(
|
|
50
|
+
f"[yellow]Hint:[/yellow] No results for '{ref}'. "
|
|
51
|
+
f"Found {len(fallback_results)} edge(s) under bare name '{bare}'. "
|
|
52
|
+
"The INSERT target may have been indexed without a schema prefix. "
|
|
53
|
+
"Multiple tables with the same unqualified name in different schemas "
|
|
54
|
+
"would all match — re-index with an explicit schema for precise results."
|
|
55
|
+
)
|
|
56
|
+
results = fallback_results
|
|
57
|
+
if not raw:
|
|
58
|
+
from sqlcg.server.noise_filter import NoiseFilter
|
|
59
|
+
|
|
60
|
+
nf = NoiseFilter.from_config() # repo_root=None → falls back to Path.cwd()
|
|
61
|
+
results = _filter_column_results(results, nf)
|
|
32
62
|
_print_table(results, ["id"])
|
|
33
63
|
|
|
34
64
|
|
|
@@ -36,6 +66,7 @@ def upstream( # noqa: B008
|
|
|
36
66
|
def downstream( # noqa: B008
|
|
37
67
|
ref: str = typer.Argument(..., help="Column reference"), # noqa: B008
|
|
38
68
|
depth: int = typer.Option(5, "--depth", help="Maximum traversal depth"), # noqa: B008
|
|
69
|
+
raw: bool = typer.Option(False, "--raw", help="Disable noise filtering on results"), # noqa: B008
|
|
39
70
|
) -> None:
|
|
40
71
|
"""Trace downstream column lineage."""
|
|
41
72
|
# Bounds check for depth to prevent performance DoS
|
|
@@ -50,6 +81,28 @@ def downstream( # noqa: B008
|
|
|
50
81
|
"RETURN dst.id AS id LIMIT 100",
|
|
51
82
|
{"ref": ref},
|
|
52
83
|
)
|
|
84
|
+
if not results and len(ref.split(".")) >= 3:
|
|
85
|
+
bare = _bare_ref(ref)
|
|
86
|
+
fallback_results = backend.run_read(
|
|
87
|
+
f"MATCH p=(c:{NodeLabel.COLUMN} {{id: $bare}})"
|
|
88
|
+
f"-[:{RelType.COLUMN_LINEAGE}*1..{depth}]->(dst) "
|
|
89
|
+
"RETURN dst.id AS id LIMIT 100",
|
|
90
|
+
{"bare": bare},
|
|
91
|
+
)
|
|
92
|
+
if fallback_results:
|
|
93
|
+
console.print(
|
|
94
|
+
f"[yellow]Hint:[/yellow] No results for '{ref}'. "
|
|
95
|
+
f"Found {len(fallback_results)} edge(s) under bare name '{bare}'. "
|
|
96
|
+
"The INSERT target may have been indexed without a schema prefix. "
|
|
97
|
+
"Multiple tables with the same unqualified name in different schemas "
|
|
98
|
+
"would all match — re-index with an explicit schema for precise results."
|
|
99
|
+
)
|
|
100
|
+
results = fallback_results
|
|
101
|
+
if not raw:
|
|
102
|
+
from sqlcg.server.noise_filter import NoiseFilter
|
|
103
|
+
|
|
104
|
+
nf = NoiseFilter.from_config() # repo_root=None → falls back to Path.cwd()
|
|
105
|
+
results = _filter_column_results(results, nf)
|
|
53
106
|
_print_table(results, ["id"])
|
|
54
107
|
|
|
55
108
|
|
|
@@ -106,6 +159,43 @@ def unused(
|
|
|
106
159
|
_print_table(results, ["qualified"])
|
|
107
160
|
|
|
108
161
|
|
|
162
|
+
def _bare_ref(ref: str) -> str:
|
|
163
|
+
"""Strip schema prefix from a ref string, keeping table.column.
|
|
164
|
+
|
|
165
|
+
For a 3-part ref ("mart.fact_t.amount") this returns "fact_t.amount".
|
|
166
|
+
For a 2-part ref ("fact_t.amount") this returns the ref unchanged.
|
|
167
|
+
Never uses rsplit — that would yield only the column name for 3-part refs.
|
|
168
|
+
"""
|
|
169
|
+
parts = ref.split(".")
|
|
170
|
+
if len(parts) >= 3:
|
|
171
|
+
return ".".join(parts[1:]) # drop schema, keep table.column
|
|
172
|
+
return ref # already bare (no schema prefix)
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def _col_id_to_table(col_id: str) -> str:
|
|
176
|
+
"""Extract the table-qualified part from a column ID (schema.table.col → schema.table).
|
|
177
|
+
|
|
178
|
+
Column IDs follow the format: schema.table.column or table.column.
|
|
179
|
+
The table part is everything except the last component.
|
|
180
|
+
|
|
181
|
+
Args:
|
|
182
|
+
col_id: A column ID string from the graph.
|
|
183
|
+
|
|
184
|
+
Returns:
|
|
185
|
+
The table-qualified portion (all but the last dotted component).
|
|
186
|
+
"""
|
|
187
|
+
parts = col_id.rsplit(".", 1)
|
|
188
|
+
return parts[0] if len(parts) == 2 else col_id
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def _filter_column_results(
|
|
192
|
+
results: list[dict],
|
|
193
|
+
nf: NoiseFilter, # type: ignore[name-defined]
|
|
194
|
+
) -> list[dict]:
|
|
195
|
+
"""Filter column-ID result rows by NoiseFilter, dropping rows whose table is noise."""
|
|
196
|
+
return [r for r in results if not nf.is_noise(_col_id_to_table(r["id"]))]
|
|
197
|
+
|
|
198
|
+
|
|
109
199
|
def _print_table(rows: list[dict], columns: list[str]) -> None:
|
|
110
200
|
"""Print results as a Rich table."""
|
|
111
201
|
if not rows:
|
sqlcg/cli/commands/find.py
CHANGED
|
@@ -14,14 +14,24 @@ console = Console()
|
|
|
14
14
|
@app.command("table")
|
|
15
15
|
def find_table( # noqa: B008
|
|
16
16
|
name: str = typer.Argument(..., help="Table name to search for"), # noqa: B008
|
|
17
|
+
raw: bool = typer.Option(False, "--raw", help="Disable noise filtering on results"), # noqa: B008
|
|
17
18
|
) -> None:
|
|
18
19
|
"""Find a table by name."""
|
|
20
|
+
name = name.lower() # graph keys are lowercased at index time (C2 normalization)
|
|
19
21
|
with get_backend() as backend:
|
|
20
22
|
results = backend.run_read(
|
|
21
23
|
f"MATCH (t:{NodeLabel.TABLE}) WHERE t.qualified CONTAINS $name "
|
|
22
24
|
"RETURN t.qualified AS qualified, t.kind AS kind LIMIT 50",
|
|
23
25
|
{"name": name},
|
|
24
26
|
)
|
|
27
|
+
if not raw:
|
|
28
|
+
from sqlcg.server.noise_filter import NoiseFilter
|
|
29
|
+
|
|
30
|
+
nf = NoiseFilter.from_config() # repo_root=None → falls back to Path.cwd()
|
|
31
|
+
ids = [r["qualified"] for r in results]
|
|
32
|
+
kept, _ = nf.filter_nodes(ids)
|
|
33
|
+
kept_set = set(kept)
|
|
34
|
+
results = [r for r in results if r["qualified"] in kept_set]
|
|
25
35
|
_print_table(results, ["qualified", "kind"])
|
|
26
36
|
|
|
27
37
|
|
|
@@ -30,6 +40,7 @@ def find_column( # noqa: B008
|
|
|
30
40
|
ref: str = typer.Argument(..., help="Column reference (table.column)"), # noqa: B008
|
|
31
41
|
) -> None:
|
|
32
42
|
"""Find a column by table.column reference."""
|
|
43
|
+
ref = ref.lower() # graph keys are lowercased at index time (C2 normalization)
|
|
33
44
|
with get_backend() as backend:
|
|
34
45
|
results = backend.run_read(
|
|
35
46
|
f"MATCH (c:{NodeLabel.COLUMN}) WHERE c.id CONTAINS $ref RETURN c.id AS id LIMIT 50",
|
sqlcg/cli/commands/index.py
CHANGED
|
@@ -14,7 +14,7 @@ from rich.progress import (
|
|
|
14
14
|
TimeRemainingColumn,
|
|
15
15
|
)
|
|
16
16
|
|
|
17
|
-
from sqlcg.core.config import get_backend, get_db_path, get_dialect
|
|
17
|
+
from sqlcg.core.config import KuzuConfig, get_backend, get_db_path, get_dialect
|
|
18
18
|
from sqlcg.indexer.indexer import Indexer
|
|
19
19
|
|
|
20
20
|
console = Console()
|
|
@@ -54,6 +54,9 @@ def index_cmd( # noqa: B008
|
|
|
54
54
|
quiet: bool = typer.Option( # noqa: B008
|
|
55
55
|
False, "--quiet", "-q", help="Suppress summary console output"
|
|
56
56
|
),
|
|
57
|
+
verbose: bool = typer.Option( # noqa: B008
|
|
58
|
+
False, "--verbose", "-v", help="Print parse warnings to stderr instead of log file"
|
|
59
|
+
),
|
|
57
60
|
debug: bool = typer.Option( # noqa: B008
|
|
58
61
|
False, "--debug", help="Show detailed log output during indexing"
|
|
59
62
|
),
|
|
@@ -68,11 +71,40 @@ def index_cmd( # noqa: B008
|
|
|
68
71
|
"""
|
|
69
72
|
|
|
70
73
|
import logging
|
|
74
|
+
import sys
|
|
71
75
|
|
|
72
76
|
level = logging.DEBUG if debug else logging.CRITICAL
|
|
73
77
|
logging.getLogger("sqlcg").setLevel(level)
|
|
74
78
|
logging.getLogger("sqlglot").setLevel(level)
|
|
75
79
|
|
|
80
|
+
# Route parse warnings to stderr (--verbose) or to the configured log file.
|
|
81
|
+
sqlcg_log = logging.getLogger("sqlcg")
|
|
82
|
+
|
|
83
|
+
class _CountingHandler(logging.Handler):
|
|
84
|
+
"""Counts WARNING+ records emitted during indexing."""
|
|
85
|
+
|
|
86
|
+
def __init__(self) -> None:
|
|
87
|
+
super().__init__(logging.WARNING)
|
|
88
|
+
self.count = 0
|
|
89
|
+
|
|
90
|
+
def emit(self, record: logging.LogRecord) -> None:
|
|
91
|
+
self.count += 1
|
|
92
|
+
|
|
93
|
+
_counter = _CountingHandler()
|
|
94
|
+
sqlcg_log.addHandler(_counter)
|
|
95
|
+
|
|
96
|
+
if verbose:
|
|
97
|
+
_warn_handler: logging.Handler = logging.StreamHandler(sys.stderr)
|
|
98
|
+
_warn_handler.setLevel(logging.WARNING)
|
|
99
|
+
sqlcg_log.addHandler(_warn_handler)
|
|
100
|
+
_warn_log_path = None
|
|
101
|
+
else:
|
|
102
|
+
_warn_log_path = KuzuConfig.from_env().log_path
|
|
103
|
+
_warn_log_path.parent.mkdir(parents=True, exist_ok=True)
|
|
104
|
+
_warn_handler = logging.FileHandler(_warn_log_path)
|
|
105
|
+
_warn_handler.setLevel(logging.WARNING)
|
|
106
|
+
sqlcg_log.addHandler(_warn_handler)
|
|
107
|
+
|
|
76
108
|
# Set buffer pool size via env var if specified
|
|
77
109
|
if buffer_pool_size > 0:
|
|
78
110
|
os.environ["SQLCG_BUFFER_POOL_MB"] = str(buffer_pool_size)
|
|
@@ -84,6 +116,45 @@ def index_cmd( # noqa: B008
|
|
|
84
116
|
db_path = get_db_path()
|
|
85
117
|
db_path.parent.mkdir(parents=True, exist_ok=True)
|
|
86
118
|
|
|
119
|
+
try:
|
|
120
|
+
_run_index(
|
|
121
|
+
path=path,
|
|
122
|
+
dialect=dialect,
|
|
123
|
+
dbt_manifest=dbt_manifest,
|
|
124
|
+
timeout_per_file=timeout_per_file,
|
|
125
|
+
no_ddl=no_ddl,
|
|
126
|
+
quiet=quiet,
|
|
127
|
+
batch_size=batch_size,
|
|
128
|
+
profile=profile,
|
|
129
|
+
)
|
|
130
|
+
except KeyboardInterrupt:
|
|
131
|
+
# The backend context manager (inside _run_index) has already closed the
|
|
132
|
+
# KuzuDB connection and released the lock by the time we get here.
|
|
133
|
+
console.print("\n[yellow]Interrupted — no partial graph written. Re-run to index.[/yellow]")
|
|
134
|
+
raise typer.Exit(130) from None
|
|
135
|
+
finally:
|
|
136
|
+
sqlcg_log.removeHandler(_warn_handler)
|
|
137
|
+
sqlcg_log.removeHandler(_counter)
|
|
138
|
+
_warn_handler.close()
|
|
139
|
+
|
|
140
|
+
if not verbose and not quiet and _counter.count > 0 and _warn_log_path is not None:
|
|
141
|
+
console.print(
|
|
142
|
+
f"[yellow]Parse warnings written to {_warn_log_path} "
|
|
143
|
+
"— use --verbose to show here.[/yellow]"
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def _run_index(
|
|
148
|
+
*,
|
|
149
|
+
path: Path,
|
|
150
|
+
dialect: str | None,
|
|
151
|
+
dbt_manifest: Path | None,
|
|
152
|
+
timeout_per_file: int,
|
|
153
|
+
no_ddl: bool,
|
|
154
|
+
quiet: bool,
|
|
155
|
+
batch_size: int,
|
|
156
|
+
profile: bool,
|
|
157
|
+
) -> None:
|
|
87
158
|
with get_backend() as backend:
|
|
88
159
|
backend.init_schema()
|
|
89
160
|
|
sqlcg/cli/commands/install.py
CHANGED
|
@@ -1,8 +1,19 @@
|
|
|
1
|
-
"""Install sqlcg as an MCP server in Claude Code.
|
|
1
|
+
"""Install sqlcg as an MCP server in Claude Code.
|
|
2
|
+
|
|
3
|
+
Write path (in priority order):
|
|
4
|
+
1. ``claude mcp add -s user sql-code-graph <cmd> <args>`` — the official
|
|
5
|
+
Claude Code CLI write path (reads from ~/.claude.json under the hood).
|
|
6
|
+
2. Fallback: write ``~/.claude.json`` directly under mcpServers.user when
|
|
7
|
+
the ``claude`` binary is not found or returns non-zero.
|
|
8
|
+
|
|
9
|
+
The previous target (~/.claude/settings.json) was incorrect — Claude Code does
|
|
10
|
+
NOT read MCP servers from that file. See ARCHITECTURE_REVIEW.md §9.2.
|
|
11
|
+
"""
|
|
2
12
|
|
|
3
13
|
import json
|
|
4
14
|
import os
|
|
5
15
|
import shutil
|
|
16
|
+
import subprocess
|
|
6
17
|
import sys
|
|
7
18
|
from pathlib import Path
|
|
8
19
|
|
|
@@ -11,7 +22,6 @@ from rich.console import Console
|
|
|
11
22
|
|
|
12
23
|
console = Console()
|
|
13
24
|
|
|
14
|
-
_SETTINGS_PATH = Path.home() / ".claude" / "settings.json"
|
|
15
25
|
_SERVER_KEY = "sql-code-graph"
|
|
16
26
|
|
|
17
27
|
|
|
@@ -28,7 +38,10 @@ def install_cmd(
|
|
|
28
38
|
help="Repository root for --scope project (default: current directory).",
|
|
29
39
|
),
|
|
30
40
|
) -> None:
|
|
31
|
-
"""Register sqlcg as an MCP server in Claude Code
|
|
41
|
+
"""Register sqlcg as an MCP server in Claude Code.
|
|
42
|
+
|
|
43
|
+
Runs ``claude mcp add -s user sql-code-graph <cmd> <args>`` when the
|
|
44
|
+
``claude`` CLI is on PATH; otherwise writes ~/.claude.json directly.
|
|
32
45
|
|
|
33
46
|
Also provisions a Claude skill file (SKILL.md) at the chosen location.
|
|
34
47
|
Pass --scope project or --scope global to specify where the skill is written.
|
|
@@ -39,68 +52,81 @@ def install_cmd(
|
|
|
39
52
|
resolved_scope = _resolve_scope(scope)
|
|
40
53
|
|
|
41
54
|
if shutil.which("sqlcg"):
|
|
42
|
-
|
|
55
|
+
cmd_parts = ["sqlcg", "mcp", "start"]
|
|
43
56
|
elif shutil.which("uvx"):
|
|
44
|
-
|
|
57
|
+
cmd_parts = ["uvx", "sql-code-graph", "mcp", "start"]
|
|
45
58
|
else:
|
|
46
59
|
console.print("[red]Error:[/red] Neither 'sqlcg' nor 'uvx' found on PATH.")
|
|
47
60
|
raise typer.Exit(1)
|
|
48
61
|
|
|
49
|
-
|
|
50
|
-
|
|
62
|
+
entry: dict = {"command": cmd_parts[0], "args": cmd_parts[1:]}
|
|
63
|
+
|
|
64
|
+
if dry_run:
|
|
65
|
+
claude_bin = shutil.which("claude")
|
|
66
|
+
if claude_bin:
|
|
67
|
+
console.print("[dim]--dry-run: would run:[/dim]")
|
|
68
|
+
console.print(f" claude mcp add -s user {_SERVER_KEY} {' '.join(cmd_parts)}")
|
|
69
|
+
else:
|
|
70
|
+
claude_json = Path.home() / ".claude.json"
|
|
71
|
+
console.print("[dim]--dry-run: would write to ~/.claude.json:[/dim]")
|
|
72
|
+
_preview_claude_json(claude_json, entry)
|
|
73
|
+
_provision_skill(resolved_scope, repo, dry_run=True)
|
|
74
|
+
return
|
|
75
|
+
|
|
76
|
+
# --- Try official claude CLI first ---
|
|
77
|
+
claude_bin = shutil.which("claude")
|
|
78
|
+
if claude_bin:
|
|
79
|
+
proc = subprocess.run(
|
|
80
|
+
["claude", "mcp", "add", "-s", "user", _SERVER_KEY] + cmd_parts,
|
|
81
|
+
capture_output=True,
|
|
82
|
+
text=True,
|
|
83
|
+
)
|
|
84
|
+
if proc.returncode == 0:
|
|
85
|
+
console.print(f"[green]Configured:[/green] {_SERVER_KEY} via `claude mcp add`")
|
|
86
|
+
console.print("\nRestart Claude Code to pick up the new MCP server.")
|
|
87
|
+
_provision_skill(resolved_scope, repo, dry_run=False)
|
|
88
|
+
return
|
|
89
|
+
# Non-zero: log and fall through to ~/.claude.json fallback
|
|
90
|
+
console.print(
|
|
91
|
+
f"[yellow]Warning:[/yellow] `claude mcp add` returned rc={proc.returncode}; "
|
|
92
|
+
"falling back to ~/.claude.json write."
|
|
93
|
+
)
|
|
94
|
+
if proc.stderr:
|
|
95
|
+
console.print(f"[dim]{proc.stderr.strip()}[/dim]")
|
|
96
|
+
|
|
97
|
+
# --- Fallback: write ~/.claude.json directly ---
|
|
98
|
+
claude_json = Path.home() / ".claude.json"
|
|
99
|
+
if claude_json.exists():
|
|
51
100
|
try:
|
|
52
|
-
|
|
101
|
+
data: dict = json.loads(claude_json.read_text())
|
|
53
102
|
except (json.JSONDecodeError, OSError, TypeError):
|
|
54
103
|
console.print(
|
|
55
|
-
f"[yellow]Warning:[/yellow] {
|
|
104
|
+
f"[yellow]Warning:[/yellow] {claude_json} contains invalid JSON — "
|
|
56
105
|
"mcpServers key will be added"
|
|
57
106
|
)
|
|
58
|
-
|
|
107
|
+
data = {}
|
|
59
108
|
else:
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
mcp_servers: dict = settings.setdefault("mcpServers", {})
|
|
109
|
+
data = {}
|
|
63
110
|
|
|
64
|
-
|
|
65
|
-
if
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
# Still provision the skill even when MCP entry already exists
|
|
69
|
-
_provision_skill(resolved_scope, repo, dry_run)
|
|
111
|
+
existing = data.get("mcpServers", {}).get("user", {}).get(_SERVER_KEY)
|
|
112
|
+
if existing == entry:
|
|
113
|
+
console.print(f"[green]Already configured:[/green] {_SERVER_KEY} (in ~/.claude.json)")
|
|
114
|
+
_provision_skill(resolved_scope, repo, dry_run=False)
|
|
70
115
|
return
|
|
71
116
|
|
|
72
|
-
|
|
73
|
-
if (
|
|
74
|
-
existing_entry
|
|
75
|
-
and existing_entry.get("command") == "uvx"
|
|
76
|
-
and entry.get("command") == "sqlcg"
|
|
77
|
-
):
|
|
78
|
-
console.print(
|
|
79
|
-
"[blue]Updating[/blue] MCP entry from [dim]uvx[/dim] to local "
|
|
80
|
-
"[green]sqlcg[/green] binary (faster startup). Writing…"
|
|
81
|
-
)
|
|
82
|
-
|
|
83
|
-
mcp_servers[_SERVER_KEY] = entry
|
|
84
|
-
|
|
85
|
-
if dry_run is True:
|
|
86
|
-
console.print("[dim]--dry-run: would write:[/dim]")
|
|
87
|
-
console.print_json(json.dumps(settings, indent=2))
|
|
88
|
-
_provision_skill(resolved_scope, repo, dry_run)
|
|
89
|
-
return
|
|
117
|
+
data.setdefault("mcpServers", {}).setdefault("user", {})[_SERVER_KEY] = entry
|
|
90
118
|
|
|
91
119
|
try:
|
|
92
|
-
|
|
93
|
-
tmp =
|
|
94
|
-
|
|
95
|
-
os.replace(tmp, settings_path)
|
|
120
|
+
tmp = claude_json.with_suffix(".tmp")
|
|
121
|
+
tmp.write_text(json.dumps(data, indent=2) + "\n")
|
|
122
|
+
os.replace(tmp, claude_json)
|
|
96
123
|
except (OSError, TypeError, AttributeError):
|
|
97
124
|
pass # Ignore file I/O errors in testing
|
|
98
125
|
|
|
99
|
-
cmd_str =
|
|
126
|
+
cmd_str = " ".join(cmd_parts)
|
|
100
127
|
console.print(f"[green]Configured:[/green] {_SERVER_KEY} → {cmd_str}")
|
|
101
|
-
console.print(f"[dim]Written to {
|
|
128
|
+
console.print(f"[dim]Written to {claude_json}[/dim]")
|
|
102
129
|
|
|
103
|
-
# Note about cold cache if uvx was chosen
|
|
104
130
|
if entry.get("command") == "uvx":
|
|
105
131
|
console.print(
|
|
106
132
|
"[yellow]Note:[/yellow] First startup downloads dependencies (~30s). "
|
|
@@ -108,9 +134,20 @@ def install_cmd(
|
|
|
108
134
|
)
|
|
109
135
|
|
|
110
136
|
console.print("\nRestart Claude Code to pick up the new MCP server.")
|
|
137
|
+
_provision_skill(resolved_scope, repo, dry_run=False)
|
|
138
|
+
|
|
111
139
|
|
|
112
|
-
|
|
113
|
-
|
|
140
|
+
def _preview_claude_json(claude_json: Path, entry: dict) -> None:
|
|
141
|
+
"""Print what would be written to ~/.claude.json without touching the file."""
|
|
142
|
+
if claude_json.exists():
|
|
143
|
+
try:
|
|
144
|
+
data: dict = json.loads(claude_json.read_text())
|
|
145
|
+
except (json.JSONDecodeError, OSError, TypeError):
|
|
146
|
+
data = {}
|
|
147
|
+
else:
|
|
148
|
+
data = {}
|
|
149
|
+
data.setdefault("mcpServers", {}).setdefault("user", {})[_SERVER_KEY] = entry
|
|
150
|
+
console.print_json(json.dumps(data, indent=2))
|
|
114
151
|
|
|
115
152
|
|
|
116
153
|
def _resolve_scope(scope: str | None) -> str:
|
sqlcg/cli/commands/mcp.py
CHANGED
|
@@ -22,28 +22,34 @@ def _server_entry() -> dict:
|
|
|
22
22
|
|
|
23
23
|
@app.command("setup")
|
|
24
24
|
def mcp_setup(print_only: bool = typer.Option(True, "--print/--write")) -> None:
|
|
25
|
-
"""Print or write MCP server config JSON.
|
|
25
|
+
"""Print or write MCP server config JSON.
|
|
26
|
+
|
|
27
|
+
--print (default): print the JSON snippet for manual insertion.
|
|
28
|
+
--write: write to ~/.claude.json under mcpServers.user (the correct path
|
|
29
|
+
for Claude Code — not settings.json, which Claude Code does not read
|
|
30
|
+
for MCP servers).
|
|
31
|
+
"""
|
|
26
32
|
entry = _server_entry()
|
|
27
33
|
if print_only:
|
|
28
34
|
console.print_json(json.dumps({"mcpServers": {_SERVER_KEY: entry}}, indent=2))
|
|
29
35
|
return
|
|
30
36
|
|
|
31
|
-
|
|
32
|
-
|
|
37
|
+
# Write to ~/.claude.json (correct path for Claude Code MCP servers)
|
|
38
|
+
claude_json = Path.home() / ".claude.json"
|
|
39
|
+
if claude_json.exists():
|
|
33
40
|
try:
|
|
34
|
-
|
|
41
|
+
data: dict = json.loads(claude_json.read_text())
|
|
35
42
|
except json.JSONDecodeError:
|
|
36
|
-
|
|
43
|
+
data = {}
|
|
37
44
|
else:
|
|
38
|
-
|
|
45
|
+
data = {}
|
|
39
46
|
|
|
40
|
-
|
|
47
|
+
data.setdefault("mcpServers", {}).setdefault("user", {})[_SERVER_KEY] = entry
|
|
41
48
|
|
|
42
|
-
|
|
43
|
-
tmp =
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
console.print(f"[green]Configuration written to[/green] {config_path}")
|
|
49
|
+
tmp = claude_json.with_suffix(".tmp")
|
|
50
|
+
tmp.write_text(json.dumps(data, indent=2) + "\n")
|
|
51
|
+
os.replace(tmp, claude_json)
|
|
52
|
+
console.print(f"[green]Configuration written to[/green] {claude_json}")
|
|
47
53
|
console.print("Note: Binary is `sqlcg`; PyPI package is `sql-code-graph`.")
|
|
48
54
|
|
|
49
55
|
|
sqlcg/cli/commands/reindex.py
CHANGED
|
@@ -60,6 +60,9 @@ def reindex_cmd( # noqa: B008
|
|
|
60
60
|
from sqlcg.core.schema import SCHEMA_VERSION
|
|
61
61
|
from sqlcg.indexer.indexer import Indexer
|
|
62
62
|
|
|
63
|
+
# Resolve to absolute path so ignore-spec and git delta receive an absolute root
|
|
64
|
+
path = path.resolve()
|
|
65
|
+
|
|
63
66
|
# Resolve dialect
|
|
64
67
|
if dialect == "auto":
|
|
65
68
|
dialect = get_dialect(path)
|
sqlcg/core/config.py
CHANGED
|
@@ -19,6 +19,10 @@ class KuzuConfig(BaseModel):
|
|
|
19
19
|
default=0,
|
|
20
20
|
description="KuzuDB buffer pool size in MB (0 = use KuzuDB default)",
|
|
21
21
|
)
|
|
22
|
+
log_path: Path = Field(
|
|
23
|
+
default_factory=lambda: Path.home() / ".sqlcg" / "index.log",
|
|
24
|
+
description="Path for parse-warning log file written during indexing",
|
|
25
|
+
)
|
|
22
26
|
|
|
23
27
|
@classmethod
|
|
24
28
|
def from_env(cls) -> "KuzuConfig":
|
|
@@ -29,9 +33,11 @@ class KuzuConfig(BaseModel):
|
|
|
29
33
|
"""
|
|
30
34
|
env_path = os.getenv("SQLCG_DB_PATH")
|
|
31
35
|
env_buf = os.getenv("SQLCG_BUFFER_POOL_MB")
|
|
36
|
+
env_log = os.getenv("SQLCG_LOG_PATH")
|
|
32
37
|
return cls(
|
|
33
38
|
db_path=Path(env_path) if env_path else Path.home() / ".sqlcg" / "graph.db",
|
|
34
39
|
buffer_pool_size_mb=int(env_buf) if env_buf else 0,
|
|
40
|
+
log_path=Path(env_log) if env_log else Path.home() / ".sqlcg" / "index.log",
|
|
35
41
|
)
|
|
36
42
|
|
|
37
43
|
|
|
@@ -138,6 +144,7 @@ def get_noise_filter_patterns(path: Path) -> list[str]:
|
|
|
138
144
|
"""
|
|
139
145
|
default_patterns = [
|
|
140
146
|
"*_bck",
|
|
147
|
+
"*_bck_*", # catches mid-suffix variants e.g. foo_bck_us39553, bar_bck_archive
|
|
141
148
|
"*_bck_us",
|
|
142
149
|
"*_bck_[0-9]*",
|
|
143
150
|
"*_backup",
|
sqlcg/indexer/error_classify.py
CHANGED
|
@@ -93,10 +93,14 @@ def _classify_error(msg: str) -> str:
|
|
|
93
93
|
if not msg:
|
|
94
94
|
return "other"
|
|
95
95
|
|
|
96
|
-
# Timeout errors
|
|
96
|
+
# Timeout errors (including pool-path poison retries)
|
|
97
97
|
if msg.startswith("timeout:"):
|
|
98
98
|
return "timeout"
|
|
99
99
|
|
|
100
|
+
# Poison-retry: file repeatedly timed out in pool worker; treat as timeout bucket
|
|
101
|
+
if msg.startswith("skipped:poison"):
|
|
102
|
+
return "timeout"
|
|
103
|
+
|
|
100
104
|
# Skip markers
|
|
101
105
|
if msg.startswith("col_lineage_skip:"):
|
|
102
106
|
if "pure_ddl_file" in msg:
|