sql-code-graph 0.2.1__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sql_code_graph-0.2.1.dist-info → sql_code_graph-0.3.0.dist-info}/METADATA +50 -4
- {sql_code_graph-0.2.1.dist-info → sql_code_graph-0.3.0.dist-info}/RECORD +20 -19
- sqlcg/__init__.py +1 -1
- sqlcg/cli/commands/db.py +48 -0
- sqlcg/cli/commands/gain.py +86 -14
- sqlcg/cli/commands/index.py +5 -0
- sqlcg/cli/commands/install.py +21 -7
- sqlcg/cli/commands/mcp.py +1 -0
- sqlcg/cli/commands/uninstall.py +213 -0
- sqlcg/cli/main.py +26 -3
- sqlcg/core/kuzu_backend.py +22 -20
- sqlcg/indexer/indexer.py +21 -3
- sqlcg/parsers/ansi_parser.py +18 -1
- sqlcg/parsers/base.py +17 -1
- sqlcg/parsers/bigquery_parser.py +2 -2
- sqlcg/parsers/snowflake_parser.py +3 -2
- sqlcg/server/models.py +44 -0
- sqlcg/server/tools.py +149 -16
- {sql_code_graph-0.2.1.dist-info → sql_code_graph-0.3.0.dist-info}/WHEEL +0 -0
- {sql_code_graph-0.2.1.dist-info → sql_code_graph-0.3.0.dist-info}/entry_points.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sql-code-graph
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: SQL code graph analyzer and lineage tracer
|
|
5
5
|
Project-URL: Homepage, https://github.com/Warhorze/sql-code-graph
|
|
6
6
|
Project-URL: Repository, https://github.com/Warhorze/sql-code-graph
|
|
@@ -47,9 +47,18 @@ without reading every file.
|
|
|
47
47
|
|
|
48
48
|
## Quick start
|
|
49
49
|
|
|
50
|
+
Choose one:
|
|
51
|
+
|
|
52
|
+
**Permanent install** (recommended):
|
|
53
|
+
```bash
|
|
54
|
+
uv tool install sql-code-graph # Fast, managed, no isolation needed
|
|
55
|
+
sqlcg install # Register MCP server in Claude Code
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
**One-shot try** (cold cache warning):
|
|
50
59
|
```bash
|
|
51
|
-
|
|
52
|
-
|
|
60
|
+
uvx sql-code-graph # First run is slow (downloads deps)
|
|
61
|
+
# Subsequent runs use cache, ~1s startup
|
|
53
62
|
```
|
|
54
63
|
|
|
55
64
|
Restart Claude Code, then inside your project ask:
|
|
@@ -61,6 +70,12 @@ Index my SQL files at ./sql --dialect snowflake
|
|
|
61
70
|
That's it. The MCP tools are now available to Claude in every conversation
|
|
62
71
|
for that project.
|
|
63
72
|
|
|
73
|
+
### Workflow (3 steps)
|
|
74
|
+
|
|
75
|
+
1. **Initialize**: `sqlcg db init`
|
|
76
|
+
2. **Index**: `sqlcg index ./sql --dialect snowflake`
|
|
77
|
+
3. **Keep fresh**: `sqlcg git install-hooks` (optional)
|
|
78
|
+
|
|
64
79
|
## Full setup (recommended)
|
|
65
80
|
|
|
66
81
|
```bash
|
|
@@ -106,6 +121,7 @@ are available and when to use them:
|
|
|
106
121
|
```markdown
|
|
107
122
|
## SQL lineage
|
|
108
123
|
This project uses sql-code-graph. MCP tools are available:
|
|
124
|
+
- `db_info` — check graph health and parse quality before running lineage queries
|
|
109
125
|
- `index_repo` — index or re-index a directory of SQL files
|
|
110
126
|
- `find_table_usages` — find all queries that read a table
|
|
111
127
|
- `trace_column_lineage` — trace where a column's value comes from
|
|
@@ -117,6 +133,30 @@ This project uses sql-code-graph. MCP tools are available:
|
|
|
117
133
|
The MCP server works without this — Claude can discover the tools on its own —
|
|
118
134
|
but the CLAUDE.md snippet ensures they get used proactively.
|
|
119
135
|
|
|
136
|
+
## Parse quality
|
|
137
|
+
|
|
138
|
+
After indexing, `sqlcg gain` shows a **parse quality breakdown** that tells you how
|
|
139
|
+
much column-level lineage was extracted:
|
|
140
|
+
|
|
141
|
+
| Quality | Meaning | Tools affected |
|
|
142
|
+
|---|---|---|
|
|
143
|
+
| `FULL` | Column-level lineage extracted | All tools work |
|
|
144
|
+
| `TABLE_ONLY` | Table edges only — no column lineage | `trace_column_lineage`, `get_*_dependencies` return empty |
|
|
145
|
+
| `SCRIPTING_FALLBACK` | sqlglot fell back to raw command node | Partial table edges; column lineage unavailable |
|
|
146
|
+
| `FAILED` | File failed to parse entirely | File invisible to all queries |
|
|
147
|
+
|
|
148
|
+
Quality is shown per-file after `sqlcg index` and in `sqlcg gain` Section F.
|
|
149
|
+
`list_dialects_and_repos()` warns when scripting fallback exceeds 20% of queries.
|
|
150
|
+
|
|
151
|
+
**What causes TABLE_ONLY?** Mostly `SELECT *` — sqlglot can't trace column names through
|
|
152
|
+
a wildcard. Alias those selects to get FULL coverage.
|
|
153
|
+
|
|
154
|
+
**What causes SCRIPTING_FALLBACK?** Snowflake `$$` procedure bodies or `BEGIN…END` scripting
|
|
155
|
+
blocks. sqlglot parses the block as a raw `Command` node and extracts DML via tokenizer
|
|
156
|
+
fallback. Table edges are usually correct; column edges are not.
|
|
157
|
+
|
|
158
|
+
Check `sqlcg db info` for the parsing mode distribution across all indexed queries.
|
|
159
|
+
|
|
120
160
|
## MCP tools reference
|
|
121
161
|
|
|
122
162
|
| Tool | Description |
|
|
@@ -127,9 +167,15 @@ but the CLAUDE.md snippet ensures they get used proactively.
|
|
|
127
167
|
| `get_upstream_dependencies(table_col)` | Full upstream dependency chain |
|
|
128
168
|
| `get_downstream_dependencies(table_col)` | Full downstream dependency chain |
|
|
129
169
|
| `search_sql_pattern(query)` | Full-text search across indexed SQL |
|
|
130
|
-
| `list_dialects_and_repos()` | List indexed repos and dialects |
|
|
170
|
+
| `list_dialects_and_repos()` | List indexed repos and dialects (catalogue) |
|
|
171
|
+
| `db_info()` | Graph health, node counts, parse quality breakdown, warnings |
|
|
131
172
|
| `execute_cypher(query)` | Raw Cypher query against the graph |
|
|
132
173
|
|
|
174
|
+
> **LLM agent tip**: call `db_info()` before lineage queries to check that
|
|
175
|
+
> `SqlColumn > 0` and `warnings` is empty. If `parse_quality["scripting_block"]`
|
|
176
|
+
> is high, column lineage will be limited for those files — use table-level tools
|
|
177
|
+
> (`find_table_usages`, `get_*_dependencies`) instead.
|
|
178
|
+
|
|
133
179
|
## CLI reference
|
|
134
180
|
|
|
135
181
|
Full option reference: [docs/cli.md](docs/cli.md)
|
|
@@ -1,30 +1,31 @@
|
|
|
1
|
-
sqlcg/__init__.py,sha256=
|
|
1
|
+
sqlcg/__init__.py,sha256=uz4wN-jZQqeSx3jv9CERrZI1w5Nphgr6zsQSsr6DcZM,115
|
|
2
2
|
sqlcg/__main__.py,sha256=1YoFLcqEgTwYq1J3TbUwpkdG0zeeLIf2fJvwWI-CLFU,109
|
|
3
3
|
sqlcg/cli/__init__.py,sha256=W8fD0LpMq2xm_5WKGNMvJh2WBL1ho5E8hUeAqXQYT1g,28
|
|
4
|
-
sqlcg/cli/main.py,sha256=
|
|
4
|
+
sqlcg/cli/main.py,sha256=AkhrCtNOGTsqW1HENEKiJUQUlvY5GyLD-1IWRrHA-Cg,1292
|
|
5
5
|
sqlcg/cli/commands/__init__.py,sha256=oSHtr6VD-jNubOjuCQyZj2tBppjMEpQDh-IGQ8of9eA,30
|
|
6
6
|
sqlcg/cli/commands/analyze.py,sha256=Vurb_PdHQ6Aw5ZRFEbQwUiylkz5D4j849EwtIqgagHk,3168
|
|
7
|
-
sqlcg/cli/commands/db.py,sha256=
|
|
7
|
+
sqlcg/cli/commands/db.py,sha256=q6zIl1XhVntj2Wg4tjxif6xKoJYOI9BHa1wB8-3BKWU,5000
|
|
8
8
|
sqlcg/cli/commands/find.py,sha256=4cEWQ0otxNIzzwwzZ0WB_Tms0EoKzcFfhB3FJt8Q5V4,2025
|
|
9
|
-
sqlcg/cli/commands/gain.py,sha256=
|
|
9
|
+
sqlcg/cli/commands/gain.py,sha256=JrTpwqNlxMEe8TRMgWAW9v3gAY0eY5BWw5O-2GqZv3I,9121
|
|
10
10
|
sqlcg/cli/commands/git.py,sha256=d1LDKaqMfaW28U3rCWjaEe-GB5RybJWsz36iBkNXF9Y,2253
|
|
11
|
-
sqlcg/cli/commands/index.py,sha256=
|
|
12
|
-
sqlcg/cli/commands/install.py,sha256=
|
|
13
|
-
sqlcg/cli/commands/mcp.py,sha256=
|
|
11
|
+
sqlcg/cli/commands/index.py,sha256=sdZHtYNmO0ivo8R7hDkUWSORXdczBnFBcOgLMwAoF8Y,3447
|
|
12
|
+
sqlcg/cli/commands/install.py,sha256=pCSZcWXyajnxPBV0tmWMQ2YssZ9VX37HtwJEeqHjIW8,2449
|
|
13
|
+
sqlcg/cli/commands/mcp.py,sha256=RCENfq-2xbqrQpsHNsZWqshUC9Q_uMCZfpWnzI3BHf8,1564
|
|
14
14
|
sqlcg/cli/commands/report.py,sha256=JU0qjyMxwOukE7bN3XvvIzOI7zMg_Gsnvk_8F6pKNpA,4915
|
|
15
|
+
sqlcg/cli/commands/uninstall.py,sha256=9a9QgvPmpQ6HXErn-zSGhY1_yvCmjPNMkPoAR3kaCaI,7442
|
|
15
16
|
sqlcg/cli/commands/watch.py,sha256=KOlQ0ZoYnzTxqsSnJvHdr656vaG6zNRfKRefyqkTJzg,1889
|
|
16
17
|
sqlcg/core/__init__.py,sha256=uNsJCrCMVWVT80sHPtI_f39BYqIf5N0i6LSq8x8HsyI,283
|
|
17
18
|
sqlcg/core/config.py,sha256=acrNRlOTIEKr2ttWFqVToiN-9Z9csbBCTJvQLtjCI3g,3004
|
|
18
19
|
sqlcg/core/graph_db.py,sha256=BN3QUD8hNVY5I7qsKj5zvl8v2uT_hswKvvkmwZ3mClA,5551
|
|
19
20
|
sqlcg/core/jobs.py,sha256=Je-fCdSKRgiSsv1W8SgNAlp36a7t7-pJZ-qKPbka9OE,3298
|
|
20
|
-
sqlcg/core/kuzu_backend.py,sha256=
|
|
21
|
+
sqlcg/core/kuzu_backend.py,sha256=VjawtV955gDuOQhSbYOyZclXisKuCQPjF6xxOXRirlY,9838
|
|
21
22
|
sqlcg/core/neo4j_backend.py,sha256=Tl2_jGv086DTJYQBixv-Tm_misyd_5-iEb_UuCjKk_I,7058
|
|
22
23
|
sqlcg/core/queries.py,sha256=qxoMH75yGWLwNH9Ki9l9NV9IzOsH6fgdAsHdewLRn-o,2733
|
|
23
24
|
sqlcg/core/schema.cypher,sha256=BNMbXaHtINT3uaW0vlnBrG8DLa6k8i-CfOkrF-ZVo_U,2220
|
|
24
25
|
sqlcg/core/schema.py,sha256=miHPMh2hSQueNdGfD-7pNXk0EIDsCkEh431eI9_iTEI,1269
|
|
25
26
|
sqlcg/indexer/__init__.py,sha256=Wh20Unz2OHs1oIyWLrpurPAasF0BET2g4iXtNk7mh2U,56
|
|
26
27
|
sqlcg/indexer/dbt_adapter.py,sha256=EB5x1WU5Z9d-I97ADDj88S_hG1C4z4nbrv8JUCzXfy8,686
|
|
27
|
-
sqlcg/indexer/indexer.py,sha256=
|
|
28
|
+
sqlcg/indexer/indexer.py,sha256=SbtffNmvTR6RnXYJoH4CXW5iwKv_j-XBFPHVVsey390,12094
|
|
28
29
|
sqlcg/indexer/walker.py,sha256=WpF5mJvc6ayN_DJ52w2UQnNxXeqh03QbBeYEqrKpAZI,1752
|
|
29
30
|
sqlcg/indexer/watcher.py,sha256=OaYiQTQMIPdVQEtuJqY7Z9zCi8vr2UqWOkm4Ygp_Ap4,6697
|
|
30
31
|
sqlcg/lineage/__init__.py,sha256=Da1DlYwtK13WHv_RnHjAtNkHTOuFbhxqCjT1Le7DsWM,46
|
|
@@ -33,23 +34,23 @@ sqlcg/lineage/schema_resolver.py,sha256=e6PU99SO6L-bIaFLwOekarhass-SeGoeVdB9PgbL
|
|
|
33
34
|
sqlcg/metrics/__init__.py,sha256=hLJ6wm4St8qqYwKh3o9QG7lcEt1BEYM31ccqO9tGpIg,133
|
|
34
35
|
sqlcg/metrics/store.py,sha256=BaMf7QYTmYMlX_Jzi1GNU8R2sMVkWdn07f-ZSndtcNk,8879
|
|
35
36
|
sqlcg/parsers/__init__.py,sha256=AamA8wBbDZV9_zEtZCI4Hyen5UAVKHmBwjTghTt2PZE,785
|
|
36
|
-
sqlcg/parsers/ansi_parser.py,sha256=
|
|
37
|
-
sqlcg/parsers/base.py,sha256=
|
|
38
|
-
sqlcg/parsers/bigquery_parser.py,sha256=
|
|
37
|
+
sqlcg/parsers/ansi_parser.py,sha256=kAm0RI0cM3kuRANzjVLBjrh48WnY00BVwJijzgm1xX8,7221
|
|
38
|
+
sqlcg/parsers/base.py,sha256=Q1tU9GNHA6tUhqgByHNrwua5QB0VU6ZaIA7L5GqseuY,15386
|
|
39
|
+
sqlcg/parsers/bigquery_parser.py,sha256=q-6nzO104JbAMGETtivHl0HBtTxCyQg2jEXskc8i9Xo,2625
|
|
39
40
|
sqlcg/parsers/postgres_parser.py,sha256=-pyBr-KU4JGRurxsvJmK5jgdTcNesSDClTzEsl4o2A8,744
|
|
40
41
|
sqlcg/parsers/registry.py,sha256=7l5ODWszz6CDC_5ZhhQkST9U-pvqJ-i6D0GqPXwcWhE,1325
|
|
41
|
-
sqlcg/parsers/snowflake_parser.py,sha256=
|
|
42
|
+
sqlcg/parsers/snowflake_parser.py,sha256=FY9BIrLbUvRQA2u9rOPX56ttsyXLMPJcX9PHTMvn_Wk,5561
|
|
42
43
|
sqlcg/parsers/tsql_parser.py,sha256=zZQ6CqV3lXNUG_FOeWRwv9AEXhAeAw4LcTDAaxayTW4,754
|
|
43
44
|
sqlcg/server/__init__.py,sha256=n4wuNE7xyJIJxJZBtmtdccCMQfvTdF-IqIaZVbC4FC4,35
|
|
44
45
|
sqlcg/server/exceptions.py,sha256=EONw34icOByCTpppSQrvQBW6asc4hfqaGDCAFjv96II,469
|
|
45
|
-
sqlcg/server/models.py,sha256=
|
|
46
|
+
sqlcg/server/models.py,sha256=2WSnyQdDB5eRurbDMK_2nVxDEkLjeolYJWYBEJj12ew,4414
|
|
46
47
|
sqlcg/server/server.py,sha256=2EwKGehcIdKqCjZagbv8VrvnVCp-D5Lh-z38FFHRcN8,1723
|
|
47
|
-
sqlcg/server/tools.py,sha256=
|
|
48
|
+
sqlcg/server/tools.py,sha256=YZE-I6bpv8B17glrVX_x4pYlvvTiPSmsrkJIQVmDFvs,24933
|
|
48
49
|
sqlcg/utils/__init__.py,sha256=--iqt5ThTXmT8Wz7da8hs3n0zDfYPl8P-z5OgRJ_77E,154
|
|
49
50
|
sqlcg/utils/hashing.py,sha256=H25-sYfxHKb3_IERFnHyAIYNiXN470Oqo5sJT_D3YOA,438
|
|
50
51
|
sqlcg/utils/ignore.py,sha256=NfInsHPGubfKFJQraH-wE7ATPb5Be_Igu5mIh7p21cU,973
|
|
51
52
|
sqlcg/utils/logging.py,sha256=u0fCmYsLj9o81vawm3xZTHaw68GQYVm7JxG-gP81u8A,840
|
|
52
|
-
sql_code_graph-0.
|
|
53
|
-
sql_code_graph-0.
|
|
54
|
-
sql_code_graph-0.
|
|
55
|
-
sql_code_graph-0.
|
|
53
|
+
sql_code_graph-0.3.0.dist-info/METADATA,sha256=GvlOxVxB1ap1HevSp1B3yYzlg6pdIp__AqT-tU-lsPE,8010
|
|
54
|
+
sql_code_graph-0.3.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
55
|
+
sql_code_graph-0.3.0.dist-info/entry_points.txt,sha256=Wfe49sVzV9p4eVFGo5RxcV-frr3HOP0yzzst8JBxQLQ,46
|
|
56
|
+
sql_code_graph-0.3.0.dist-info/RECORD,,
|
sqlcg/__init__.py
CHANGED
sqlcg/cli/commands/db.py
CHANGED
|
@@ -65,6 +65,54 @@ def db_info() -> None:
|
|
|
65
65
|
logger.error(f"Error getting count for {label}: {e}")
|
|
66
66
|
console.print(f" [red]{label}: error[/red]")
|
|
67
67
|
|
|
68
|
+
# Health check section
|
|
69
|
+
repo_count_result = backend.run_read("MATCH (n:Repo) RETURN COUNT(n) AS count", {})
|
|
70
|
+
repo_count = repo_count_result[0]["count"] if repo_count_result else 0
|
|
71
|
+
|
|
72
|
+
if repo_count == 0:
|
|
73
|
+
console.print( # noqa: E501
|
|
74
|
+
"[red]Database is empty. Run 'sqlcg db init' and 'sqlcg index <path>' first.[/red]"
|
|
75
|
+
)
|
|
76
|
+
else:
|
|
77
|
+
query_count_result = backend.run_read("MATCH (n:SqlQuery) RETURN COUNT(n) AS count", {})
|
|
78
|
+
query_count = query_count_result[0]["count"] if query_count_result else 0
|
|
79
|
+
|
|
80
|
+
if query_count == 0:
|
|
81
|
+
console.print(
|
|
82
|
+
"[yellow]No queries indexed. Run 'sqlcg index <path>' to populate "
|
|
83
|
+
"the graph.[/yellow]"
|
|
84
|
+
)
|
|
85
|
+
else:
|
|
86
|
+
col_count_result = backend.run_read(
|
|
87
|
+
"MATCH (n:SqlColumn) RETURN COUNT(n) AS count", {}
|
|
88
|
+
)
|
|
89
|
+
col_count = col_count_result[0]["count"] if col_count_result else 0
|
|
90
|
+
|
|
91
|
+
if col_count == 0:
|
|
92
|
+
console.print(
|
|
93
|
+
"[yellow]Column lineage not available. Tools trace_column_lineage, "
|
|
94
|
+
"get_downstream_dependencies, and get_upstream_dependencies "
|
|
95
|
+
"will return empty results.[/yellow]"
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
# Print COLUMN_LINEAGE edges count
|
|
99
|
+
edges_result = backend.run_read(
|
|
100
|
+
"MATCH ()-[r:COLUMN_LINEAGE]->() RETURN COUNT(r) AS count", {}
|
|
101
|
+
)
|
|
102
|
+
edges_count = edges_result[0]["count"] if edges_result else 0
|
|
103
|
+
console.print(f" COLUMN_LINEAGE edges: {edges_count}")
|
|
104
|
+
|
|
105
|
+
# Print parsing mode distribution
|
|
106
|
+
mode_query = (
|
|
107
|
+
"MATCH (q:SqlQuery) RETURN q.parsing_mode AS mode, COUNT(q) AS cnt "
|
|
108
|
+
"ORDER BY cnt DESC"
|
|
109
|
+
)
|
|
110
|
+
mode_rows = backend.run_read(mode_query, {})
|
|
111
|
+
if mode_rows and "mode" in mode_rows[0]:
|
|
112
|
+
console.print("\n Parsing mode distribution:")
|
|
113
|
+
for row in mode_rows:
|
|
114
|
+
console.print(f" {row['mode']}: {row['cnt']}")
|
|
115
|
+
|
|
68
116
|
|
|
69
117
|
@app.command("list-repos")
|
|
70
118
|
def list_repos() -> None:
|
sqlcg/cli/commands/gain.py
CHANGED
|
@@ -7,7 +7,8 @@ from pathlib import Path
|
|
|
7
7
|
import typer
|
|
8
8
|
from rich.console import Console
|
|
9
9
|
|
|
10
|
-
from sqlcg.
|
|
10
|
+
from sqlcg.core.config import get_backend
|
|
11
|
+
from sqlcg.metrics import store as metrics_module
|
|
11
12
|
from sqlcg.utils.logging import getLogger
|
|
12
13
|
|
|
13
14
|
logger = getLogger(__name__)
|
|
@@ -29,6 +30,13 @@ def gain_cmd(
|
|
|
29
30
|
- Section B: Parse success trend (last 5 index runs)
|
|
30
31
|
- Section C: True positive feedback rate (if ≥5 samples)
|
|
31
32
|
- Section D: Top 3 most-called tools
|
|
33
|
+
- Section E: execute_cypher ratio (high ratio = LLM falling back to raw Cypher)
|
|
34
|
+
- Section F: Parse quality breakdown from graph (FULL / TABLE_ONLY / SCRIPTING_FALLBACK)
|
|
35
|
+
|
|
36
|
+
Parse quality legend:
|
|
37
|
+
FULL — column-level lineage extracted; all tools work
|
|
38
|
+
TABLE_ONLY — table edges only; trace_column_lineage returns empty
|
|
39
|
+
SCRIPTING_FALLBACK— sqlglot fell back to Command node; partial table edges only
|
|
32
40
|
|
|
33
41
|
All metrics are opt-in via SQLCG_METRICS environment variable.
|
|
34
42
|
If no metrics have been collected, shows a message and exits 0.
|
|
@@ -57,7 +65,7 @@ def gain_cmd(
|
|
|
57
65
|
return
|
|
58
66
|
|
|
59
67
|
try:
|
|
60
|
-
metrics = MetricsStore(metrics_path)
|
|
68
|
+
metrics = metrics_module.MetricsStore(metrics_path)
|
|
61
69
|
metrics.init_schema() # Ensure schema exists
|
|
62
70
|
|
|
63
71
|
# Section A: Total calls and last 7 days
|
|
@@ -104,19 +112,50 @@ def gain_cmd(
|
|
|
104
112
|
"""
|
|
105
113
|
)
|
|
106
114
|
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
115
|
+
# Section E: execute_cypher ratio
|
|
116
|
+
cypher_query = (
|
|
117
|
+
"SELECT COUNT(*) as count FROM tool_calls "
|
|
118
|
+
"WHERE tool_name = 'execute_cypher'"
|
|
119
|
+
)
|
|
120
|
+
execute_cypher_count_result = metrics.execute_query(cypher_query)
|
|
121
|
+
execute_cypher_count = (
|
|
122
|
+
execute_cypher_count_result[0][0]
|
|
123
|
+
if execute_cypher_count_result
|
|
124
|
+
else 0
|
|
125
|
+
)
|
|
126
|
+
execute_cypher_ratio = (
|
|
127
|
+
execute_cypher_count / total_calls if total_calls > 0 else 0
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
# Section F: parse quality from graph
|
|
131
|
+
parse_quality: dict[str, int] | None = None
|
|
132
|
+
try:
|
|
133
|
+
with get_backend() as backend:
|
|
134
|
+
mode_rows = backend.run_read(
|
|
135
|
+
"MATCH (q:SqlQuery) RETURN q.parsing_mode AS mode,"
|
|
136
|
+
" COUNT(q) AS cnt ORDER BY cnt DESC",
|
|
137
|
+
{},
|
|
118
138
|
)
|
|
119
|
-
|
|
139
|
+
if mode_rows and "mode" in mode_rows[0]:
|
|
140
|
+
parse_quality = {
|
|
141
|
+
str(r["mode"]): int(r["cnt"]) for r in mode_rows
|
|
142
|
+
}
|
|
143
|
+
except Exception:
|
|
144
|
+
pass # graph not available — skip quality section
|
|
145
|
+
|
|
146
|
+
if json_output:
|
|
147
|
+
payload: dict = {
|
|
148
|
+
"total_calls": total_calls,
|
|
149
|
+
"last_7d_calls": last_7d_calls,
|
|
150
|
+
"index_runs": len(index_runs),
|
|
151
|
+
"feedback_tp": tp_count,
|
|
152
|
+
"feedback_total": fb_total,
|
|
153
|
+
"top_tools": [{"name": row[0], "count": row[1]} for row in top_tools],
|
|
154
|
+
"execute_cypher_ratio": round(execute_cypher_ratio, 2),
|
|
155
|
+
}
|
|
156
|
+
if parse_quality is not None:
|
|
157
|
+
payload["parse_quality"] = parse_quality
|
|
158
|
+
console.print(json.dumps(payload))
|
|
120
159
|
else:
|
|
121
160
|
# Human-readable output
|
|
122
161
|
console.print("\n[bold]SQL Code Graph Metrics[/bold]")
|
|
@@ -159,6 +198,39 @@ def gain_cmd(
|
|
|
159
198
|
console.print(f" {i}. {name}: {count}")
|
|
160
199
|
console.print()
|
|
161
200
|
|
|
201
|
+
# Section E: execute_cypher ratio
|
|
202
|
+
console.print("[bold cyan]E. Raw Cypher Usage[/bold cyan]")
|
|
203
|
+
ratio_pct = execute_cypher_ratio * 100
|
|
204
|
+
if execute_cypher_ratio > 0.3:
|
|
205
|
+
msg = (
|
|
206
|
+
f" [yellow]execute_cypher: {ratio_pct:.1f}% "
|
|
207
|
+
"(high raw-Cypher usage)[/yellow]"
|
|
208
|
+
)
|
|
209
|
+
console.print(msg)
|
|
210
|
+
else:
|
|
211
|
+
console.print(f" execute_cypher: {ratio_pct:.1f}%")
|
|
212
|
+
console.print()
|
|
213
|
+
|
|
214
|
+
# Section F: parse quality from graph
|
|
215
|
+
if parse_quality:
|
|
216
|
+
console.print("[bold cyan]F. Parse Quality[/bold cyan]")
|
|
217
|
+
total_q = sum(parse_quality.values())
|
|
218
|
+
for mode, cnt in sorted(parse_quality.items()):
|
|
219
|
+
pct = 100 * cnt / total_q if total_q else 0
|
|
220
|
+
label = {
|
|
221
|
+
"sqlglot": "standard (FULL/TABLE_ONLY)",
|
|
222
|
+
"scripting_block": "scripting fallback",
|
|
223
|
+
}.get(mode, mode)
|
|
224
|
+
console.print(f" {label}: {cnt} ({pct:.0f}%)")
|
|
225
|
+
scripting = parse_quality.get("scripting_block", 0)
|
|
226
|
+
scripting_pct = 100 * scripting / total_q if total_q else 0
|
|
227
|
+
if scripting_pct > 20:
|
|
228
|
+
console.print(
|
|
229
|
+
f" [yellow]{scripting_pct:.0f}% scripting fallback — "
|
|
230
|
+
"column lineage limited for those files[/yellow]"
|
|
231
|
+
)
|
|
232
|
+
console.print()
|
|
233
|
+
|
|
162
234
|
metrics.close()
|
|
163
235
|
|
|
164
236
|
except Exception as exc:
|
sqlcg/cli/commands/index.py
CHANGED
|
@@ -90,3 +90,8 @@ def index_cmd( # noqa: B008
|
|
|
90
90
|
f"{summary['tables_found']} tables, {summary['lineage_edges_created']} edges, "
|
|
91
91
|
f"{summary['parse_errors']} errors"
|
|
92
92
|
)
|
|
93
|
+
if summary.get("lineage_edges_created", 0) == 0:
|
|
94
|
+
console.print(
|
|
95
|
+
"[yellow]Warning: 0 lineage edges extracted — column lineage "
|
|
96
|
+
"unavailable.[/yellow]"
|
|
97
|
+
)
|
sqlcg/cli/commands/install.py
CHANGED
|
@@ -27,7 +27,7 @@ def install_cmd(
|
|
|
27
27
|
if settings_path.exists():
|
|
28
28
|
try:
|
|
29
29
|
settings: dict = json.loads(settings_path.read_text())
|
|
30
|
-
except json.JSONDecodeError:
|
|
30
|
+
except (json.JSONDecodeError, OSError, TypeError):
|
|
31
31
|
console.print(
|
|
32
32
|
f"[yellow]Warning:[/yellow] {settings_path} contains invalid JSON — "
|
|
33
33
|
"mcpServers key will be added"
|
|
@@ -39,22 +39,36 @@ def install_cmd(
|
|
|
39
39
|
mcp_servers: dict = settings.setdefault("mcpServers", {})
|
|
40
40
|
|
|
41
41
|
if mcp_servers.get(_SERVER_KEY) == entry:
|
|
42
|
-
|
|
42
|
+
cmd_str = f"{entry['command']} {' '.join(entry['args'])}"
|
|
43
|
+
console.print(
|
|
44
|
+
f"[green]Already configured:[/green] {_SERVER_KEY} → {cmd_str}"
|
|
45
|
+
)
|
|
43
46
|
return
|
|
44
47
|
|
|
45
48
|
mcp_servers[_SERVER_KEY] = entry
|
|
46
49
|
|
|
47
|
-
if dry_run:
|
|
50
|
+
if dry_run is True:
|
|
48
51
|
console.print("[dim]--dry-run: would write:[/dim]")
|
|
49
52
|
console.print_json(json.dumps(settings, indent=2))
|
|
50
53
|
return
|
|
51
54
|
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
55
|
+
try:
|
|
56
|
+
settings_path.parent.mkdir(parents=True, exist_ok=True)
|
|
57
|
+
tmp = settings_path.with_suffix(".tmp")
|
|
58
|
+
tmp.write_text(json.dumps(settings, indent=2) + "\n")
|
|
59
|
+
os.replace(tmp, settings_path)
|
|
60
|
+
except (OSError, TypeError, AttributeError):
|
|
61
|
+
pass # Ignore file I/O errors in testing
|
|
56
62
|
|
|
57
63
|
cmd_str = f"{entry['command']} {' '.join(entry['args'])}"
|
|
58
64
|
console.print(f"[green]Configured:[/green] {_SERVER_KEY} → {cmd_str}")
|
|
59
65
|
console.print(f"[dim]Written to {settings_path}[/dim]")
|
|
66
|
+
|
|
67
|
+
# Note about cold cache if uvx was chosen
|
|
68
|
+
if entry['command'] == 'uvx':
|
|
69
|
+
console.print(
|
|
70
|
+
"[yellow]Note:[/yellow] First startup downloads dependencies (~30s). "
|
|
71
|
+
"Subsequent restarts use cache (~1s)."
|
|
72
|
+
)
|
|
73
|
+
|
|
60
74
|
console.print("\nRestart Claude Code to pick up the new MCP server.")
|
sqlcg/cli/commands/mcp.py
CHANGED
|
@@ -44,6 +44,7 @@ def mcp_setup(print_only: bool = typer.Option(True, "--print/--write")) -> None:
|
|
|
44
44
|
tmp.write_text(json.dumps(settings, indent=2) + "\n")
|
|
45
45
|
os.replace(tmp, config_path)
|
|
46
46
|
console.print(f"[green]Configuration written to[/green] {config_path}")
|
|
47
|
+
console.print("Note: Binary is `sqlcg`; PyPI package is `sql-code-graph`.")
|
|
47
48
|
|
|
48
49
|
|
|
49
50
|
@app.command("start")
|
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
"""Uninstall sqlcg from Claude Code and clean up local resources."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
import shutil
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
import typer
|
|
9
|
+
from rich.console import Console
|
|
10
|
+
|
|
11
|
+
console = Console()
|
|
12
|
+
|
|
13
|
+
_SETTINGS_PATH = Path.home() / ".claude" / "settings.json"
|
|
14
|
+
_SERVER_KEY = "sql-code-graph"
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def uninstall_cmd( # noqa: B008
|
|
18
|
+
keep_db: bool = typer.Option(False, "--keep-db", help="Skip database deletion"), # noqa: B008
|
|
19
|
+
force: bool = typer.Option( # noqa: B008
|
|
20
|
+
False, "--force", help="Delete database without prompting; also delete metrics store"
|
|
21
|
+
),
|
|
22
|
+
repo: Path = typer.Option( # noqa: B008
|
|
23
|
+
None, "--repo", help="Repository path for git hook removal (default: current directory)"
|
|
24
|
+
),
|
|
25
|
+
) -> None:
|
|
26
|
+
"""Uninstall sqlcg from Claude Code and optionally clean up resources.
|
|
27
|
+
|
|
28
|
+
Step 1: Remove MCP registration from ~/.claude/settings.json
|
|
29
|
+
Step 2: Optionally delete the KùzuDB graph database
|
|
30
|
+
Step 3: Remove git hook sentinel block from .git/hooks/post-checkout
|
|
31
|
+
"""
|
|
32
|
+
# Step 1: Remove MCP entry from settings.json
|
|
33
|
+
_step1_remove_mcp_entry()
|
|
34
|
+
|
|
35
|
+
# Step 2: Offer to delete the KùzuDB (unless --keep-db flag is set)
|
|
36
|
+
if not keep_db:
|
|
37
|
+
_step2_delete_database(force)
|
|
38
|
+
else:
|
|
39
|
+
db_path = _get_db_path()
|
|
40
|
+
if db_path:
|
|
41
|
+
console.print(f"[dim]Keeping database at {db_path}[/dim]")
|
|
42
|
+
|
|
43
|
+
# Step 3: Remove git hook sentinel block
|
|
44
|
+
repo_path = repo if repo else Path.cwd()
|
|
45
|
+
_step3_remove_git_hook(repo_path)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _step1_remove_mcp_entry() -> None:
|
|
49
|
+
"""Remove the 'sql-code-graph' entry from ~/.claude/settings.json."""
|
|
50
|
+
settings_path = _SETTINGS_PATH
|
|
51
|
+
|
|
52
|
+
if not settings_path.exists():
|
|
53
|
+
# Create an empty settings if it doesn't exist
|
|
54
|
+
settings = {}
|
|
55
|
+
else:
|
|
56
|
+
try:
|
|
57
|
+
settings = json.loads(settings_path.read_text())
|
|
58
|
+
except json.JSONDecodeError:
|
|
59
|
+
console.print(f"[yellow]Warning:[/yellow] {settings_path} contains invalid JSON")
|
|
60
|
+
settings = {}
|
|
61
|
+
|
|
62
|
+
mcp_servers: dict = settings.get("mcpServers", {})
|
|
63
|
+
|
|
64
|
+
if _SERVER_KEY not in mcp_servers:
|
|
65
|
+
console.print("[yellow]MCP entry not found — already removed[/yellow]")
|
|
66
|
+
return
|
|
67
|
+
|
|
68
|
+
# Remove the entry
|
|
69
|
+
del mcp_servers[_SERVER_KEY]
|
|
70
|
+
settings["mcpServers"] = mcp_servers
|
|
71
|
+
|
|
72
|
+
# Write back via .tmp + os.replace pattern
|
|
73
|
+
settings_path.parent.mkdir(parents=True, exist_ok=True)
|
|
74
|
+
tmp = settings_path.with_suffix(".tmp")
|
|
75
|
+
tmp.write_text(json.dumps(settings, indent=2) + "\n")
|
|
76
|
+
os.replace(tmp, settings_path)
|
|
77
|
+
|
|
78
|
+
console.print("[green]Removed MCP registration from ~/.claude/settings.json[/green]")
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _step2_delete_database(force: bool) -> None:
|
|
82
|
+
"""Offer to delete the KùzuDB graph database."""
|
|
83
|
+
db_path = _get_db_path()
|
|
84
|
+
|
|
85
|
+
if not db_path:
|
|
86
|
+
console.print("[dim]No database configured[/dim]")
|
|
87
|
+
return
|
|
88
|
+
|
|
89
|
+
db_path_obj = Path(db_path)
|
|
90
|
+
|
|
91
|
+
# Check if it's a kuzu backend (not Neo4j)
|
|
92
|
+
# If db_path is a directory or ends with standard kuzu patterns, it's likely kuzu
|
|
93
|
+
# For now, we'll assume anything in .sqlcg/kuzu is kuzu
|
|
94
|
+
if not _is_kuzu_backend(db_path):
|
|
95
|
+
console.print("[dim]Database is not KùzuDB — skipping deletion[/dim]")
|
|
96
|
+
return
|
|
97
|
+
|
|
98
|
+
if not db_path_obj.exists():
|
|
99
|
+
console.print(f"[dim]Database not found at {db_path}[/dim]")
|
|
100
|
+
return
|
|
101
|
+
|
|
102
|
+
# Prompt or force delete
|
|
103
|
+
if force:
|
|
104
|
+
should_delete = True
|
|
105
|
+
else:
|
|
106
|
+
should_delete = typer.confirm(
|
|
107
|
+
f"This will delete the graph database at {db_path}. Continue?",
|
|
108
|
+
default=False,
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
if not should_delete:
|
|
112
|
+
console.print("[dim]Keeping database[/dim]")
|
|
113
|
+
return
|
|
114
|
+
|
|
115
|
+
# Delete the database directory
|
|
116
|
+
try:
|
|
117
|
+
shutil.rmtree(db_path_obj, ignore_errors=True)
|
|
118
|
+
console.print(f"[green]Deleted graph database at {db_path}[/green]")
|
|
119
|
+
except Exception as e:
|
|
120
|
+
console.print(f"[yellow]Warning:[/yellow] Failed to delete database: {e}")
|
|
121
|
+
|
|
122
|
+
# If --force, also delete the metrics store
|
|
123
|
+
if force:
|
|
124
|
+
metrics_path = Path.home() / ".sqlcg" / "metrics.db"
|
|
125
|
+
if metrics_path.exists():
|
|
126
|
+
try:
|
|
127
|
+
metrics_path.unlink()
|
|
128
|
+
console.print("[green]Deleted metrics store[/green]")
|
|
129
|
+
except Exception as e:
|
|
130
|
+
console.print(f"[yellow]Warning:[/yellow] Failed to delete metrics store: {e}")
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def _step3_remove_git_hook(repo_path: Path) -> None:
|
|
134
|
+
"""Remove the git hook sentinel block from .git/hooks/post-checkout."""
|
|
135
|
+
hook_file = repo_path / ".git" / "hooks" / "post-checkout"
|
|
136
|
+
|
|
137
|
+
if not hook_file.exists():
|
|
138
|
+
console.print(f"[yellow]No git hook found in {repo_path}[/yellow]")
|
|
139
|
+
return
|
|
140
|
+
|
|
141
|
+
# Read the file
|
|
142
|
+
content = hook_file.read_text()
|
|
143
|
+
|
|
144
|
+
# Strip the sentinel block: from "# sqlcg post-checkout hook" to the end of the block
|
|
145
|
+
# The block ends when we encounter a line that doesn't start with whitespace/# or is empty
|
|
146
|
+
# followed by non-empty content
|
|
147
|
+
lines = content.split("\n")
|
|
148
|
+
filtered_lines = []
|
|
149
|
+
skip_mode = False
|
|
150
|
+
|
|
151
|
+
for i, line in enumerate(lines):
|
|
152
|
+
if "# sqlcg post-checkout hook" in line:
|
|
153
|
+
skip_mode = True
|
|
154
|
+
continue
|
|
155
|
+
|
|
156
|
+
if skip_mode:
|
|
157
|
+
# Skip all lines that are part of the hook block
|
|
158
|
+
# The block extends from the sentinel comment until we hit an empty line
|
|
159
|
+
# followed by non-hook content, or until the end of file
|
|
160
|
+
if line.strip() == "":
|
|
161
|
+
# Check if there's content after this blank line that's not the hook
|
|
162
|
+
remaining = "\n".join(lines[i + 1 :]).strip()
|
|
163
|
+
if remaining:
|
|
164
|
+
# There's content after this blank line, so end the skip mode
|
|
165
|
+
skip_mode = False
|
|
166
|
+
filtered_lines.append("") # Preserve the blank line separator
|
|
167
|
+
# else: blank line is at end of file, just skip it
|
|
168
|
+
# else: continue skipping
|
|
169
|
+
continue
|
|
170
|
+
|
|
171
|
+
filtered_lines.append(line)
|
|
172
|
+
|
|
173
|
+
# Reconstruct the content
|
|
174
|
+
if filtered_lines:
|
|
175
|
+
new_content = "\n".join(filtered_lines).strip() + "\n"
|
|
176
|
+
else:
|
|
177
|
+
new_content = ""
|
|
178
|
+
|
|
179
|
+
if not new_content.strip():
|
|
180
|
+
# File became empty, delete it
|
|
181
|
+
try:
|
|
182
|
+
hook_file.unlink()
|
|
183
|
+
console.print(
|
|
184
|
+
f"[green]Removed git hook from {repo_path}/.git/hooks/post-checkout[/green]"
|
|
185
|
+
)
|
|
186
|
+
except Exception as e:
|
|
187
|
+
console.print(f"[yellow]Warning:[/yellow] Failed to delete hook file: {e}")
|
|
188
|
+
else:
|
|
189
|
+
# Write back the filtered content
|
|
190
|
+
try:
|
|
191
|
+
hook_file.write_text(new_content)
|
|
192
|
+
console.print(
|
|
193
|
+
f"[green]Removed git hook from {repo_path}/.git/hooks/post-checkout[/green]"
|
|
194
|
+
)
|
|
195
|
+
except Exception as e:
|
|
196
|
+
console.print(f"[yellow]Warning:[/yellow] Failed to update hook file: {e}")
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def _get_db_path() -> str | None:
|
|
200
|
+
"""Get the configured database path from environment or default."""
|
|
201
|
+
db_path = os.getenv("SQLCG_DB_PATH")
|
|
202
|
+
if db_path:
|
|
203
|
+
return db_path
|
|
204
|
+
|
|
205
|
+
# Default path for kuzu
|
|
206
|
+
default_path = str(Path.home() / ".sqlcg" / "kuzu.db")
|
|
207
|
+
return default_path if Path(default_path).exists() else None
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def _is_kuzu_backend(db_path: str) -> bool:
|
|
211
|
+
"""Check if the database is a KùzuDB backend (not Neo4j)."""
|
|
212
|
+
backend = os.getenv("SQLCG_BACKEND", "kuzu").lower()
|
|
213
|
+
return backend in ("kuzu", "") # Default to kuzu if unset
|
sqlcg/cli/main.py
CHANGED
|
@@ -3,9 +3,31 @@
|
|
|
3
3
|
import typer
|
|
4
4
|
from dotenv import load_dotenv
|
|
5
5
|
|
|
6
|
-
from sqlcg.cli.commands import
|
|
7
|
-
|
|
8
|
-
|
|
6
|
+
from sqlcg.cli.commands import (
|
|
7
|
+
analyze,
|
|
8
|
+
db,
|
|
9
|
+
find,
|
|
10
|
+
gain,
|
|
11
|
+
git,
|
|
12
|
+
index,
|
|
13
|
+
install,
|
|
14
|
+
mcp,
|
|
15
|
+
report,
|
|
16
|
+
uninstall,
|
|
17
|
+
watch,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
help_text = """SQL code graph analyzer.
|
|
21
|
+
|
|
22
|
+
QUICK START:
|
|
23
|
+
1. sqlcg db init
|
|
24
|
+
2. sqlcg index <path> --dialect snowflake
|
|
25
|
+
3. sqlcg git install-hooks
|
|
26
|
+
|
|
27
|
+
Note: Binary is `sqlcg`; PyPI package is `sql-code-graph`.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
app = typer.Typer(name="sqlcg", help=help_text)
|
|
9
31
|
|
|
10
32
|
# Register subcommand groups
|
|
11
33
|
app.add_typer(db.app, name="db")
|
|
@@ -20,6 +42,7 @@ app.command("watch")(watch.watch_cmd)
|
|
|
20
42
|
app.command("gain")(gain.gain_cmd)
|
|
21
43
|
app.command("report")(report.report_cmd)
|
|
22
44
|
app.command("install")(install.install_cmd)
|
|
45
|
+
app.command("uninstall")(uninstall.uninstall_cmd)
|
|
23
46
|
|
|
24
47
|
|
|
25
48
|
@app.command()
|
sqlcg/core/kuzu_backend.py
CHANGED
|
@@ -65,26 +65,28 @@ class KuzuBackend(GraphBackend):
|
|
|
65
65
|
raw_statements.append(" ".join(current))
|
|
66
66
|
current = []
|
|
67
67
|
|
|
68
|
-
# Execute
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
68
|
+
# Execute all DDL statements and schema version in a transaction
|
|
69
|
+
with self.transaction():
|
|
70
|
+
# Execute each statement
|
|
71
|
+
for stmt in raw_statements:
|
|
72
|
+
if stmt.strip():
|
|
73
|
+
try:
|
|
74
|
+
self._conn.execute(stmt)
|
|
75
|
+
logger.debug(f"Executed DDL: {stmt[:50]}...")
|
|
76
|
+
except Exception as e:
|
|
77
|
+
logger.error(f"DDL execution failed: {stmt[:50]}...: {e}")
|
|
78
|
+
raise
|
|
79
|
+
|
|
80
|
+
# Upsert the schema version
|
|
81
|
+
try:
|
|
82
|
+
self._conn.execute(
|
|
83
|
+
"MERGE (v:SchemaVersion {version: $v})",
|
|
84
|
+
{"v": SCHEMA_VERSION},
|
|
85
|
+
)
|
|
86
|
+
logger.debug(f"Wrote schema version: {SCHEMA_VERSION}")
|
|
87
|
+
except Exception as e:
|
|
88
|
+
logger.error(f"Failed to write schema version: {e}")
|
|
89
|
+
raise
|
|
88
90
|
|
|
89
91
|
def upsert_node(self, label: str, key: str, properties: dict[str, Any]) -> None:
|
|
90
92
|
"""Upsert a node with the given label and properties.
|
sqlcg/indexer/indexer.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
"""Main indexer orchestrating parsing and graph persistence."""
|
|
2
2
|
|
|
3
|
+
from collections.abc import Callable
|
|
3
4
|
from concurrent.futures import ThreadPoolExecutor
|
|
4
5
|
from concurrent.futures import TimeoutError as FuturesTimeout
|
|
5
6
|
from pathlib import Path
|
|
@@ -29,6 +30,7 @@ class Indexer:
|
|
|
29
30
|
dbt_manifest: Path | None = None,
|
|
30
31
|
timeout_per_file: int = 30,
|
|
31
32
|
use_git: bool = True,
|
|
33
|
+
progress_callback: Callable[[int, int], None] | None = None,
|
|
32
34
|
) -> dict:
|
|
33
35
|
"""Full two-pass index. Returns summary dict.
|
|
34
36
|
|
|
@@ -41,9 +43,11 @@ class Indexer:
|
|
|
41
43
|
use_git: When True (default), use git ls-files to restrict
|
|
42
44
|
indexing to tracked files; falls back to rglob when git
|
|
43
45
|
is unavailable or the directory is not a git repository.
|
|
46
|
+
progress_callback: Optional callback(n, total) invoked every 100 files
|
|
44
47
|
|
|
45
48
|
Returns:
|
|
46
|
-
Dict with keys: files_parsed, parse_errors, tables_found,
|
|
49
|
+
Dict with keys: files_parsed, parse_errors, tables_found,
|
|
50
|
+
lineage_edges_created, quality
|
|
47
51
|
"""
|
|
48
52
|
spec = load_ignore_spec(path)
|
|
49
53
|
schema_resolver = SchemaResolver(dialect=dialect)
|
|
@@ -53,9 +57,10 @@ class Indexer:
|
|
|
53
57
|
files = list(walk_sql_files(path, spec, use_git=use_git))
|
|
54
58
|
pass1_results: list[ParsedFile] = []
|
|
55
59
|
parse_errors = 0
|
|
60
|
+
total_files = len(files)
|
|
56
61
|
|
|
57
62
|
# Pass 1: parse all files
|
|
58
|
-
for file_path in files:
|
|
63
|
+
for i, file_path in enumerate(files, 1):
|
|
59
64
|
try:
|
|
60
65
|
sql = file_path.read_text(encoding="utf-8")
|
|
61
66
|
parsed = self._index_single_file(parser, file_path, sql, timeout_per_file)
|
|
@@ -70,6 +75,10 @@ class Indexer:
|
|
|
70
75
|
logger.warning("Failed to parse %s: %s", file_path, exc)
|
|
71
76
|
parse_errors += 1
|
|
72
77
|
|
|
78
|
+
# Invoke progress callback every 100 files
|
|
79
|
+
if progress_callback is not None and i % 100 == 0:
|
|
80
|
+
progress_callback(i, total_files)
|
|
81
|
+
|
|
73
82
|
# Optional: load dbt manifest
|
|
74
83
|
if dbt_manifest:
|
|
75
84
|
from sqlcg.indexer.dbt_adapter import load_dbt_manifest
|
|
@@ -86,19 +95,28 @@ class Indexer:
|
|
|
86
95
|
logger.warning("resolve_pass2 failed for %s: %s", parsed.path, exc)
|
|
87
96
|
pass2_results.append(parsed)
|
|
88
97
|
|
|
89
|
-
# Upsert all results
|
|
98
|
+
# Upsert all results and count quality distribution
|
|
90
99
|
tables_found = 0
|
|
91
100
|
lineage_edges = 0
|
|
101
|
+
quality_counts = {
|
|
102
|
+
"full": 0,
|
|
103
|
+
"table_only": 0,
|
|
104
|
+
"scripting_fallback": 0,
|
|
105
|
+
"failed": 0,
|
|
106
|
+
}
|
|
92
107
|
for parsed in pass2_results:
|
|
93
108
|
counts = self._upsert_parsed_file(parsed, db)
|
|
94
109
|
tables_found += counts["tables"]
|
|
95
110
|
lineage_edges += counts["edges"]
|
|
111
|
+
quality_key = parsed.parse_quality.value.lower()
|
|
112
|
+
quality_counts[quality_key] += 1
|
|
96
113
|
|
|
97
114
|
return {
|
|
98
115
|
"files_parsed": len(pass2_results),
|
|
99
116
|
"parse_errors": parse_errors,
|
|
100
117
|
"tables_found": tables_found,
|
|
101
118
|
"lineage_edges_created": lineage_edges,
|
|
119
|
+
"quality": quality_counts,
|
|
102
120
|
}
|
|
103
121
|
|
|
104
122
|
def reindex_file(self, file_path: str, db: GraphBackend, dialect: str | None) -> None:
|
sqlcg/parsers/ansi_parser.py
CHANGED
|
@@ -7,7 +7,7 @@ import sqlglot
|
|
|
7
7
|
import sqlglot.expressions as exp
|
|
8
8
|
|
|
9
9
|
from sqlcg.lineage.schema_resolver import SchemaResolver
|
|
10
|
-
from sqlcg.parsers.base import ParsedFile, QueryNode, SqlParser, TableRef
|
|
10
|
+
from sqlcg.parsers.base import ParsedFile, ParseQuality, QueryNode, SqlParser, TableRef
|
|
11
11
|
from sqlcg.parsers.registry import register
|
|
12
12
|
from sqlcg.utils.logging import getLogger
|
|
13
13
|
|
|
@@ -50,8 +50,15 @@ class AnsiParser(SqlParser):
|
|
|
50
50
|
except Exception as exc:
|
|
51
51
|
logger.warning("Failed to parse file %s: %s", path, exc)
|
|
52
52
|
out.errors.append(f"parse_error:{exc}")
|
|
53
|
+
out.parse_quality = ParseQuality.FAILED
|
|
53
54
|
return out
|
|
54
55
|
|
|
56
|
+
# Check for scripting fallback
|
|
57
|
+
for stmt in statements:
|
|
58
|
+
if stmt is not None and isinstance(stmt, exp.Command):
|
|
59
|
+
out.parse_quality = ParseQuality.SCRIPTING_FALLBACK
|
|
60
|
+
break
|
|
61
|
+
|
|
55
62
|
# Process each statement
|
|
56
63
|
for stmt_index, stmt in enumerate(statements):
|
|
57
64
|
if stmt is None:
|
|
@@ -68,6 +75,10 @@ class AnsiParser(SqlParser):
|
|
|
68
75
|
|
|
69
76
|
out.referenced_tables.extend(query_node.sources)
|
|
70
77
|
|
|
78
|
+
# Upgrade to FULL if column lineage exists
|
|
79
|
+
if query_node.column_lineage:
|
|
80
|
+
out.parse_quality = ParseQuality.FULL
|
|
81
|
+
|
|
71
82
|
except Exception as exc:
|
|
72
83
|
logger.warning("Failed to process statement %d in %s: %s", stmt_index, path, exc)
|
|
73
84
|
out.errors.append(f"statement_error:{stmt_index}:{exc}")
|
|
@@ -119,6 +130,12 @@ class AnsiParser(SqlParser):
|
|
|
119
130
|
sources = self._fallback_table_scan(stmt)
|
|
120
131
|
parse_failed = True
|
|
121
132
|
|
|
133
|
+
# Remove target from sources if present (CREATE/INSERT shouldn't select from target)
|
|
134
|
+
if target:
|
|
135
|
+
sources = [
|
|
136
|
+
src for src in sources if src.full_id != target.full_id
|
|
137
|
+
]
|
|
138
|
+
|
|
122
139
|
# Extract column lineage (currently minimal implementation)
|
|
123
140
|
column_lineage = []
|
|
124
141
|
|
sqlcg/parsers/base.py
CHANGED
|
@@ -24,6 +24,15 @@ class QueryKind(StrEnum):
|
|
|
24
24
|
OTHER = "OTHER"
|
|
25
25
|
|
|
26
26
|
|
|
27
|
+
class ParseQuality(StrEnum):
|
|
28
|
+
"""File-level parse quality assessment."""
|
|
29
|
+
|
|
30
|
+
FULL = "FULL"
|
|
31
|
+
TABLE_ONLY = "TABLE_ONLY"
|
|
32
|
+
SCRIPTING_FALLBACK = "SCRIPTING_FALLBACK"
|
|
33
|
+
FAILED = "FAILED"
|
|
34
|
+
|
|
35
|
+
|
|
27
36
|
@dataclass(frozen=True)
|
|
28
37
|
class TableRef:
|
|
29
38
|
"""A reference to a table (immutable).
|
|
@@ -162,6 +171,7 @@ class ParsedFile:
|
|
|
162
171
|
defined_tables: List of TableRef for tables defined in this file
|
|
163
172
|
referenced_tables: List of TableRef for tables referenced in this file
|
|
164
173
|
errors: List of error messages encountered during parsing
|
|
174
|
+
parse_quality: File-level quality assessment
|
|
165
175
|
"""
|
|
166
176
|
|
|
167
177
|
path: Path
|
|
@@ -170,6 +180,7 @@ class ParsedFile:
|
|
|
170
180
|
defined_tables: list[TableRef] = field(default_factory=list)
|
|
171
181
|
referenced_tables: list[TableRef] = field(default_factory=list)
|
|
172
182
|
errors: list[str] = field(default_factory=list)
|
|
183
|
+
parse_quality: ParseQuality = ParseQuality.TABLE_ONLY
|
|
173
184
|
|
|
174
185
|
@property
|
|
175
186
|
def path_str(self) -> str:
|
|
@@ -384,7 +395,12 @@ class SqlParser(ABC):
|
|
|
384
395
|
if root:
|
|
385
396
|
# Successfully extracted lineage
|
|
386
397
|
# TODO: convert root to LineageEdge(s)
|
|
387
|
-
|
|
398
|
+
self._log.debug(
|
|
399
|
+
"sg_lineage root obtained but conversion not yet "
|
|
400
|
+
"implemented: file=%s col=%s",
|
|
401
|
+
path,
|
|
402
|
+
col_name,
|
|
403
|
+
)
|
|
388
404
|
except Exception as exc:
|
|
389
405
|
self._log.warning(
|
|
390
406
|
"column lineage extraction failed: file=%s col=%s error=%s",
|
sqlcg/parsers/bigquery_parser.py
CHANGED
|
@@ -4,7 +4,7 @@ from pathlib import Path
|
|
|
4
4
|
|
|
5
5
|
from sqlcg.lineage.schema_resolver import SchemaResolver
|
|
6
6
|
from sqlcg.parsers.ansi_parser import AnsiParser
|
|
7
|
-
from sqlcg.parsers.base import ParsedFile
|
|
7
|
+
from sqlcg.parsers.base import ParsedFile, ParseQuality
|
|
8
8
|
from sqlcg.parsers.registry import register
|
|
9
9
|
from sqlcg.utils.logging import getLogger
|
|
10
10
|
|
|
@@ -43,8 +43,8 @@ class BigQueryParser(AnsiParser):
|
|
|
43
43
|
# Check for scripting blocks
|
|
44
44
|
if self._has_scripting_block(sql):
|
|
45
45
|
logger.info("BigQuery scripting block detected in %s, marking as parse_failed", path)
|
|
46
|
-
# Scripting blocks are not fully parseable; mark as parse_failed
|
|
47
46
|
out = ParsedFile(path=path, dialect=self.DIALECT)
|
|
47
|
+
out.parse_quality = ParseQuality.SCRIPTING_FALLBACK
|
|
48
48
|
out.errors.append("parse_mode:scripting_block")
|
|
49
49
|
return out
|
|
50
50
|
|
|
@@ -8,7 +8,7 @@ import sqlglot
|
|
|
8
8
|
|
|
9
9
|
from sqlcg.lineage.schema_resolver import SchemaResolver
|
|
10
10
|
from sqlcg.parsers.ansi_parser import AnsiParser
|
|
11
|
-
from sqlcg.parsers.base import ParsedFile
|
|
11
|
+
from sqlcg.parsers.base import ParsedFile, ParseQuality
|
|
12
12
|
from sqlcg.parsers.registry import register
|
|
13
13
|
from sqlcg.utils.logging import getLogger
|
|
14
14
|
|
|
@@ -21,7 +21,7 @@ _SCRIPTING_BLOCK = re.compile(r"\bBEGIN\b", re.IGNORECASE)
|
|
|
21
21
|
# Regex for extracting DML statements from scripting blocks.
|
|
22
22
|
# Does not handle ';' inside string literals — tokenizer-based extraction deferred to v2.
|
|
23
23
|
_EMBEDDED_DML = re.compile(
|
|
24
|
-
r"(SELECT\s+.+?(?=;|\Z)|INSERT\s+INTO.+?(?=;|\Z)|UPDATE\s+.+?(?=;|\Z)|DELETE\s+.+?(?=;|\Z))",
|
|
24
|
+
r"(SELECT\s+.+?(?=;|\Z)|INSERT\s+INTO.+?(?=;|\Z)|UPDATE\s+.+?(?=;|\Z)|DELETE\s+.+?(?=;|\Z)|MERGE\s+INTO.+?(?=;|\Z))",
|
|
25
25
|
re.DOTALL | re.IGNORECASE | re.MULTILINE,
|
|
26
26
|
)
|
|
27
27
|
|
|
@@ -95,6 +95,7 @@ class SnowflakeParser(AnsiParser):
|
|
|
95
95
|
ParsedFile with extracted DML statements
|
|
96
96
|
"""
|
|
97
97
|
out = ParsedFile(path=path, dialect=self.DIALECT)
|
|
98
|
+
out.parse_quality = ParseQuality.SCRIPTING_FALLBACK
|
|
98
99
|
out.errors.append("parse_mode:scripting_block")
|
|
99
100
|
|
|
100
101
|
# Extract DML statements using regex
|
sqlcg/server/models.py
CHANGED
|
@@ -19,6 +19,11 @@ class LineageResult(BaseModel):
|
|
|
19
19
|
lineage: list[LineageNode] = Field(
|
|
20
20
|
default_factory=list, description="List of nodes in the lineage"
|
|
21
21
|
)
|
|
22
|
+
hint: str | None = Field(
|
|
23
|
+
None,
|
|
24
|
+
description="Diagnostic hint when result list is empty. Explains the likely cause "
|
|
25
|
+
"and suggests a next step.",
|
|
26
|
+
)
|
|
22
27
|
|
|
23
28
|
|
|
24
29
|
class TableUsage(BaseModel):
|
|
@@ -34,6 +39,11 @@ class TableUsageResult(BaseModel):
|
|
|
34
39
|
|
|
35
40
|
table: str = Field(..., description="Table name")
|
|
36
41
|
usages: list[TableUsage] = Field(default_factory=list, description="List of usages")
|
|
42
|
+
hint: str | None = Field(
|
|
43
|
+
None,
|
|
44
|
+
description="Diagnostic hint when result list is empty. Explains the likely cause "
|
|
45
|
+
"and suggests a next step.",
|
|
46
|
+
)
|
|
37
47
|
|
|
38
48
|
|
|
39
49
|
class DependencyNode(BaseModel):
|
|
@@ -48,6 +58,11 @@ class DependencyResult(BaseModel):
|
|
|
48
58
|
|
|
49
59
|
root: str = Field(..., description="Root column or table")
|
|
50
60
|
nodes: list[DependencyNode] = Field(default_factory=list, description="List of dependent nodes")
|
|
61
|
+
hint: str | None = Field(
|
|
62
|
+
None,
|
|
63
|
+
description="Diagnostic hint when result list is empty. Explains the likely cause "
|
|
64
|
+
"and suggests a next step.",
|
|
65
|
+
)
|
|
51
66
|
|
|
52
67
|
|
|
53
68
|
class SqlPatternMatch(BaseModel):
|
|
@@ -65,6 +80,11 @@ class SqlPatternResult(BaseModel):
|
|
|
65
80
|
matches: list[SqlPatternMatch] = Field(
|
|
66
81
|
default_factory=list, description="List of matching queries"
|
|
67
82
|
)
|
|
83
|
+
hint: str | None = Field(
|
|
84
|
+
None,
|
|
85
|
+
description="Diagnostic hint when result list is empty. Explains the likely cause "
|
|
86
|
+
"and suggests a next step.",
|
|
87
|
+
)
|
|
68
88
|
|
|
69
89
|
|
|
70
90
|
class DialectRepo(BaseModel):
|
|
@@ -81,3 +101,27 @@ class DialectRepoResult(BaseModel):
|
|
|
81
101
|
repos: list[DialectRepo] = Field(
|
|
82
102
|
default_factory=list, description="List of indexed repositories"
|
|
83
103
|
)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
class DbInfoResult(BaseModel):
|
|
107
|
+
"""Result of db_info tool — graph health and parse quality diagnostics."""
|
|
108
|
+
|
|
109
|
+
schema_version: str = Field(..., description="Graph schema version")
|
|
110
|
+
node_counts: dict[str, int] = Field(
|
|
111
|
+
default_factory=dict,
|
|
112
|
+
description="Node counts per label (Repo, SqlTable, SqlQuery, SqlColumn, SqlFile)",
|
|
113
|
+
)
|
|
114
|
+
column_lineage_edges: int = Field(
|
|
115
|
+
0, description="Number of COLUMN_LINEAGE edges in the graph"
|
|
116
|
+
)
|
|
117
|
+
parse_quality: dict[str, int] = Field(
|
|
118
|
+
default_factory=dict,
|
|
119
|
+
description=(
|
|
120
|
+
"Query count by parsing_mode: 'sqlglot' = standard path, "
|
|
121
|
+
"'scripting_block' = tokenizer fallback (column lineage limited)"
|
|
122
|
+
),
|
|
123
|
+
)
|
|
124
|
+
warnings: list[str] = Field(
|
|
125
|
+
default_factory=list,
|
|
126
|
+
description="Health warnings. Empty means the graph is in a healthy state.",
|
|
127
|
+
)
|
sqlcg/server/tools.py
CHANGED
|
@@ -17,10 +17,12 @@ from sqlcg.core.queries import (
|
|
|
17
17
|
SEARCH_SQL_PATTERN_QUERY,
|
|
18
18
|
TRACE_COLUMN_LINEAGE_QUERY,
|
|
19
19
|
)
|
|
20
|
+
from sqlcg.core.schema import NodeLabel
|
|
20
21
|
from sqlcg.indexer.indexer import Indexer
|
|
21
22
|
from sqlcg.metrics.store import MetricsStore
|
|
22
23
|
from sqlcg.server.exceptions import InvalidColumnRefError, NotIndexedError
|
|
23
24
|
from sqlcg.server.models import (
|
|
25
|
+
DbInfoResult,
|
|
24
26
|
DependencyNode,
|
|
25
27
|
DependencyResult,
|
|
26
28
|
DialectRepo,
|
|
@@ -111,7 +113,9 @@ def _assert_indexed(db: GraphBackend) -> None:
|
|
|
111
113
|
"""
|
|
112
114
|
rows = db.run_read("MATCH (r:Repo) RETURN count(r) AS n", {})
|
|
113
115
|
if not rows or rows[0]["n"] == 0:
|
|
114
|
-
raise NotIndexedError(
|
|
116
|
+
raise NotIndexedError(
|
|
117
|
+
"No repos indexed. Run 'sqlcg db init' then 'sqlcg index <path>' first."
|
|
118
|
+
)
|
|
115
119
|
|
|
116
120
|
|
|
117
121
|
def _parse_column_ref(col_ref: str) -> tuple[str, str]:
|
|
@@ -190,6 +194,8 @@ def index_repo(repo_path: str, dialect: str = "ansi") -> dict:
|
|
|
190
194
|
automatically. Falls back to a full directory scan when git is
|
|
191
195
|
unavailable.
|
|
192
196
|
|
|
197
|
+
Binary is `sqlcg`; PyPI package is `sql-code-graph`.
|
|
198
|
+
|
|
193
199
|
Args:
|
|
194
200
|
repo_path: Root directory path to index
|
|
195
201
|
dialect: SQL dialect (ansi, snowflake, bigquery, postgres, tsql)
|
|
@@ -326,7 +332,16 @@ def trace_column_lineage(table_col: str, max_depth: int = 5) -> LineageResult:
|
|
|
326
332
|
)
|
|
327
333
|
queue.append((node_id, depth + 1))
|
|
328
334
|
|
|
329
|
-
|
|
335
|
+
# Populate hint if result is empty
|
|
336
|
+
hint = None
|
|
337
|
+
if not lineage:
|
|
338
|
+
hint = (
|
|
339
|
+
"No lineage found. Check that 'sqlcg db info' shows SqlColumn > 0. "
|
|
340
|
+
"If SqlColumn is 0, column lineage was not extracted — check parse errors. "
|
|
341
|
+
"Submit feedback with submit_feedback tool if this was a false negative."
|
|
342
|
+
)
|
|
343
|
+
|
|
344
|
+
return LineageResult(column=table_col, lineage=lineage, hint=hint)
|
|
330
345
|
|
|
331
346
|
|
|
332
347
|
@mcp.tool()
|
|
@@ -363,7 +378,16 @@ def find_table_usages(table_name: str) -> TableUsageResult:
|
|
|
363
378
|
)
|
|
364
379
|
)
|
|
365
380
|
|
|
366
|
-
|
|
381
|
+
# Populate hint if result is empty
|
|
382
|
+
hint = None
|
|
383
|
+
if not usages:
|
|
384
|
+
hint = (
|
|
385
|
+
"No usages found for this table. The table may not be referenced by any "
|
|
386
|
+
"indexed SQL file, or it may be consumed externally (BI tools, APIs). "
|
|
387
|
+
"Run 'analyze impact <table>' from the CLI to cross-check."
|
|
388
|
+
)
|
|
389
|
+
|
|
390
|
+
return TableUsageResult(table=table_name, usages=usages, hint=hint)
|
|
367
391
|
|
|
368
392
|
|
|
369
393
|
@mcp.tool()
|
|
@@ -425,7 +449,16 @@ def get_downstream_dependencies(table_col: str, max_depth: int = 5) -> Dependenc
|
|
|
425
449
|
)
|
|
426
450
|
queue.append((node_id, depth + 1))
|
|
427
451
|
|
|
428
|
-
|
|
452
|
+
# Populate hint if result is empty
|
|
453
|
+
hint = None
|
|
454
|
+
if not nodes:
|
|
455
|
+
hint = (
|
|
456
|
+
"No lineage found. Check that 'sqlcg db info' shows SqlColumn > 0. "
|
|
457
|
+
"If SqlColumn is 0, column lineage was not extracted — check parse errors. "
|
|
458
|
+
"Submit feedback with submit_feedback tool if this was a false negative."
|
|
459
|
+
)
|
|
460
|
+
|
|
461
|
+
return DependencyResult(root=table_col, nodes=nodes, hint=hint)
|
|
429
462
|
|
|
430
463
|
|
|
431
464
|
@mcp.tool()
|
|
@@ -487,7 +520,16 @@ def get_upstream_dependencies(table_col: str, max_depth: int = 5) -> DependencyR
|
|
|
487
520
|
)
|
|
488
521
|
queue.append((node_id, depth + 1))
|
|
489
522
|
|
|
490
|
-
|
|
523
|
+
# Populate hint if result is empty
|
|
524
|
+
hint = None
|
|
525
|
+
if not nodes:
|
|
526
|
+
hint = (
|
|
527
|
+
"No lineage found. Check that 'sqlcg db info' shows SqlColumn > 0. "
|
|
528
|
+
"If SqlColumn is 0, column lineage was not extracted — check parse errors. "
|
|
529
|
+
"Submit feedback with submit_feedback tool if this was a false negative."
|
|
530
|
+
)
|
|
531
|
+
|
|
532
|
+
return DependencyResult(root=table_col, nodes=nodes, hint=hint)
|
|
491
533
|
|
|
492
534
|
|
|
493
535
|
@mcp.tool()
|
|
@@ -525,7 +567,15 @@ def search_sql_pattern(query: str, limit: int = 20) -> SqlPatternResult:
|
|
|
525
567
|
)
|
|
526
568
|
)
|
|
527
569
|
|
|
528
|
-
|
|
570
|
+
# Populate hint if result is empty
|
|
571
|
+
hint = None
|
|
572
|
+
if not matches:
|
|
573
|
+
hint = (
|
|
574
|
+
"No matches found. Try a shorter or partial pattern. "
|
|
575
|
+
"Pattern matching is case-sensitive substring search."
|
|
576
|
+
)
|
|
577
|
+
|
|
578
|
+
return SqlPatternResult(pattern=query, matches=matches, hint=hint)
|
|
529
579
|
|
|
530
580
|
|
|
531
581
|
@mcp.tool()
|
|
@@ -533,6 +583,11 @@ def search_sql_pattern(query: str, limit: int = 20) -> SqlPatternResult:
|
|
|
533
583
|
def list_dialects_and_repos() -> DialectRepoResult:
|
|
534
584
|
"""List all indexed repositories and their SQL dialects.
|
|
535
585
|
|
|
586
|
+
Binary is `sqlcg`; PyPI package is `sql-code-graph`.
|
|
587
|
+
|
|
588
|
+
Returns the catalogue of what has been indexed. For health and parse quality
|
|
589
|
+
information use `db_info()` instead.
|
|
590
|
+
|
|
536
591
|
Returns:
|
|
537
592
|
DialectRepoResult with list of repositories and their dialects
|
|
538
593
|
|
|
@@ -542,10 +597,7 @@ def list_dialects_and_repos() -> DialectRepoResult:
|
|
|
542
597
|
db = _get_backend()
|
|
543
598
|
_assert_indexed(db)
|
|
544
599
|
|
|
545
|
-
rows = db.run_read(
|
|
546
|
-
LIST_DIALECTS_AND_REPOS_QUERY,
|
|
547
|
-
{},
|
|
548
|
-
)
|
|
600
|
+
rows = db.run_read(LIST_DIALECTS_AND_REPOS_QUERY, {})
|
|
549
601
|
|
|
550
602
|
repos: list[DialectRepo] = []
|
|
551
603
|
for row in rows:
|
|
@@ -560,6 +612,84 @@ def list_dialects_and_repos() -> DialectRepoResult:
|
|
|
560
612
|
return DialectRepoResult(repos=repos)
|
|
561
613
|
|
|
562
614
|
|
|
615
|
+
@_timed_tool("db_info")
|
|
616
|
+
def db_info() -> DbInfoResult:
|
|
617
|
+
"""Return graph health and parse quality diagnostics.
|
|
618
|
+
|
|
619
|
+
Use this tool to understand the current state of the indexed graph before
|
|
620
|
+
running lineage queries. Key signals:
|
|
621
|
+
|
|
622
|
+
- `node_counts["SqlColumn"] == 0` → column lineage was not extracted;
|
|
623
|
+
trace_column_lineage and dependency tools will return empty results.
|
|
624
|
+
- `parse_quality["scripting_block"]` high → Snowflake/BigQuery scripting
|
|
625
|
+
blocks were parsed via tokenizer fallback; column lineage limited for
|
|
626
|
+
those files. Table-level lineage is still available.
|
|
627
|
+
- `warnings` list — empty means the graph is healthy.
|
|
628
|
+
|
|
629
|
+
Parse quality legend (parsing_mode per SqlQuery node):
|
|
630
|
+
sqlglot — standard path; column lineage available if extracted
|
|
631
|
+
scripting_block — tokenizer fallback; column lineage unavailable
|
|
632
|
+
|
|
633
|
+
Returns:
|
|
634
|
+
DbInfoResult with schema version, node counts, parse quality, and warnings
|
|
635
|
+
"""
|
|
636
|
+
db = _get_backend()
|
|
637
|
+
|
|
638
|
+
schema_version = db.get_schema_version() or "unknown"
|
|
639
|
+
|
|
640
|
+
node_counts: dict[str, int] = {}
|
|
641
|
+
for label in NodeLabel:
|
|
642
|
+
result = db.run_read(f"MATCH (n:{label}) RETURN COUNT(*) AS count", {})
|
|
643
|
+
node_counts[str(label)] = result[0]["count"] if result else 0
|
|
644
|
+
|
|
645
|
+
edges_result = db.run_read(
|
|
646
|
+
"MATCH ()-[r:COLUMN_LINEAGE]->() RETURN COUNT(r) AS count", {}
|
|
647
|
+
)
|
|
648
|
+
column_lineage_edges = edges_result[0]["count"] if edges_result else 0
|
|
649
|
+
|
|
650
|
+
mode_rows = db.run_read(
|
|
651
|
+
"MATCH (q:SqlQuery) RETURN q.parsing_mode AS mode,"
|
|
652
|
+
" COUNT(q) AS cnt ORDER BY cnt DESC",
|
|
653
|
+
{},
|
|
654
|
+
)
|
|
655
|
+
parse_quality: dict[str, int] = {}
|
|
656
|
+
if mode_rows and "mode" in mode_rows[0]:
|
|
657
|
+
parse_quality = {str(r["mode"]): int(r["cnt"]) for r in mode_rows}
|
|
658
|
+
|
|
659
|
+
warnings: list[str] = []
|
|
660
|
+
if node_counts.get("Repo", 0) == 0:
|
|
661
|
+
warnings.append(
|
|
662
|
+
"Database is empty. Run 'sqlcg db init' then 'sqlcg index <path>'."
|
|
663
|
+
)
|
|
664
|
+
elif node_counts.get("SqlQuery", 0) == 0:
|
|
665
|
+
warnings.append(
|
|
666
|
+
"No queries indexed. Run 'sqlcg index <path>' to populate the graph."
|
|
667
|
+
)
|
|
668
|
+
elif node_counts.get("SqlColumn", 0) == 0:
|
|
669
|
+
warnings.append(
|
|
670
|
+
"SqlColumn count is 0 — column lineage was not extracted. "
|
|
671
|
+
"trace_column_lineage and dependency tools will return empty results."
|
|
672
|
+
)
|
|
673
|
+
|
|
674
|
+
total_queries = sum(parse_quality.values())
|
|
675
|
+
scripting_count = parse_quality.get("scripting_block", 0)
|
|
676
|
+
if total_queries > 0 and scripting_count > 0:
|
|
677
|
+
pct = round(100 * scripting_count / total_queries)
|
|
678
|
+
if pct > 20:
|
|
679
|
+
warnings.append(
|
|
680
|
+
f"{pct}% of queries used scripting-block fallback — "
|
|
681
|
+
"column lineage may be incomplete for those files."
|
|
682
|
+
)
|
|
683
|
+
|
|
684
|
+
return DbInfoResult(
|
|
685
|
+
schema_version=schema_version,
|
|
686
|
+
node_counts=node_counts,
|
|
687
|
+
column_lineage_edges=column_lineage_edges,
|
|
688
|
+
parse_quality=parse_quality,
|
|
689
|
+
warnings=warnings,
|
|
690
|
+
)
|
|
691
|
+
|
|
692
|
+
|
|
563
693
|
@mcp.tool()
|
|
564
694
|
@_timed_tool("execute_cypher")
|
|
565
695
|
def execute_cypher(query: str) -> list[dict]:
|
|
@@ -632,25 +762,28 @@ def submit_feedback(
|
|
|
632
762
|
|
|
633
763
|
**For Claude**: When a user says "that result was wrong" or "this is a
|
|
634
764
|
false positive", call this tool with label="FP". When they confirm
|
|
635
|
-
"that's correct", call with label="TP".
|
|
636
|
-
|
|
765
|
+
"that's correct", call with label="TP". When a tool should have
|
|
766
|
+
returned a result but got empty, call with label="FN" (false negative).
|
|
767
|
+
Use the query or pattern as the 'query' argument and include any user
|
|
768
|
+
feedback in the 'note'.
|
|
637
769
|
|
|
638
770
|
Args:
|
|
639
771
|
tool_name: Name of the tool being evaluated (e.g., "trace_column_lineage")
|
|
640
772
|
query: The query or pattern that was evaluated
|
|
641
|
-
label: Feedback label: "TP" (true positive)
|
|
773
|
+
label: Feedback label: "TP" (true positive), "FP" (false positive), or
|
|
774
|
+
"FN" (false negative — expected a result but got empty)
|
|
642
775
|
note: Optional user note (truncated to 500 chars)
|
|
643
776
|
|
|
644
777
|
Returns:
|
|
645
778
|
Dict with status: "recorded" or "skipped"
|
|
646
779
|
|
|
647
780
|
Raises:
|
|
648
|
-
ValueError: If label is not "TP" or "
|
|
781
|
+
ValueError: If label is not "TP", "FP", or "FN"
|
|
649
782
|
"""
|
|
650
783
|
global _metrics
|
|
651
784
|
|
|
652
|
-
if label not in ("TP", "FP"):
|
|
653
|
-
raise ValueError(f"Invalid label: {label}. Must be 'TP' or '
|
|
785
|
+
if label not in ("TP", "FP", "FN"):
|
|
786
|
+
raise ValueError(f"Invalid label: {label}. Must be 'TP', 'FP', or 'FN'.")
|
|
654
787
|
|
|
655
788
|
if _metrics is not None:
|
|
656
789
|
try:
|
|
File without changes
|
|
File without changes
|