sql-code-graph 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sql_code_graph-0.2.1.dist-info/METADATA +171 -0
- sql_code_graph-0.2.1.dist-info/RECORD +55 -0
- sql_code_graph-0.2.1.dist-info/WHEEL +4 -0
- sql_code_graph-0.2.1.dist-info/entry_points.txt +2 -0
- sqlcg/__init__.py +5 -0
- sqlcg/__main__.py +6 -0
- sqlcg/cli/__init__.py +1 -0
- sqlcg/cli/commands/__init__.py +1 -0
- sqlcg/cli/commands/analyze.py +93 -0
- sqlcg/cli/commands/db.py +83 -0
- sqlcg/cli/commands/find.py +63 -0
- sqlcg/cli/commands/gain.py +169 -0
- sqlcg/cli/commands/git.py +73 -0
- sqlcg/cli/commands/index.py +92 -0
- sqlcg/cli/commands/install.py +60 -0
- sqlcg/cli/commands/mcp.py +54 -0
- sqlcg/cli/commands/report.py +135 -0
- sqlcg/cli/commands/watch.py +57 -0
- sqlcg/cli/main.py +40 -0
- sqlcg/core/__init__.py +8 -0
- sqlcg/core/config.py +104 -0
- sqlcg/core/graph_db.py +179 -0
- sqlcg/core/jobs.py +105 -0
- sqlcg/core/kuzu_backend.py +269 -0
- sqlcg/core/neo4j_backend.py +195 -0
- sqlcg/core/queries.py +82 -0
- sqlcg/core/schema.cypher +104 -0
- sqlcg/core/schema.py +48 -0
- sqlcg/indexer/__init__.py +1 -0
- sqlcg/indexer/dbt_adapter.py +23 -0
- sqlcg/indexer/indexer.py +317 -0
- sqlcg/indexer/walker.py +55 -0
- sqlcg/indexer/watcher.py +195 -0
- sqlcg/lineage/__init__.py +1 -0
- sqlcg/lineage/aggregator.py +58 -0
- sqlcg/lineage/schema_resolver.py +198 -0
- sqlcg/metrics/__init__.py +5 -0
- sqlcg/metrics/store.py +273 -0
- sqlcg/parsers/__init__.py +30 -0
- sqlcg/parsers/ansi_parser.py +215 -0
- sqlcg/parsers/base.py +414 -0
- sqlcg/parsers/bigquery_parser.py +77 -0
- sqlcg/parsers/postgres_parser.py +27 -0
- sqlcg/parsers/registry.py +46 -0
- sqlcg/parsers/snowflake_parser.py +148 -0
- sqlcg/parsers/tsql_parser.py +27 -0
- sqlcg/server/__init__.py +1 -0
- sqlcg/server/exceptions.py +20 -0
- sqlcg/server/models.py +83 -0
- sqlcg/server/server.py +57 -0
- sqlcg/server/tools.py +663 -0
- sqlcg/utils/__init__.py +6 -0
- sqlcg/utils/hashing.py +18 -0
- sqlcg/utils/ignore.py +36 -0
- sqlcg/utils/logging.py +29 -0
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: sql-code-graph
|
|
3
|
+
Version: 0.2.1
|
|
4
|
+
Summary: SQL code graph analyzer and lineage tracer
|
|
5
|
+
Project-URL: Homepage, https://github.com/Warhorze/sql-code-graph
|
|
6
|
+
Project-URL: Repository, https://github.com/Warhorze/sql-code-graph
|
|
7
|
+
Project-URL: Issues, https://github.com/Warhorze/sql-code-graph/issues
|
|
8
|
+
Project-URL: Changelog, https://github.com/Warhorze/sql-code-graph/blob/master/CHANGELOG.md
|
|
9
|
+
Author-email: wesley <rademakerwesley@gmail.com>
|
|
10
|
+
License: MIT
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Environment :: Console
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
18
|
+
Classifier: Topic :: Database
|
|
19
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
20
|
+
Requires-Python: >=3.12
|
|
21
|
+
Requires-Dist: kuzu==0.11.3
|
|
22
|
+
Requires-Dist: mcp<2.0,>=1.27.0
|
|
23
|
+
Requires-Dist: pathspec>=0.12.1
|
|
24
|
+
Requires-Dist: pydantic>=2.0
|
|
25
|
+
Requires-Dist: python-dotenv>=1.0.0
|
|
26
|
+
Requires-Dist: rich>=13.7.0
|
|
27
|
+
Requires-Dist: sqlglot==30.6.0
|
|
28
|
+
Requires-Dist: sqlglotc==30.6.0
|
|
29
|
+
Requires-Dist: typer>=0.9.0
|
|
30
|
+
Requires-Dist: watchdog>=3.0.0
|
|
31
|
+
Provides-Extra: dbt
|
|
32
|
+
Requires-Dist: dbt-core>=1.7; extra == 'dbt'
|
|
33
|
+
Provides-Extra: neo4j
|
|
34
|
+
Requires-Dist: neo4j>=5.15.0; extra == 'neo4j'
|
|
35
|
+
Provides-Extra: snowflake
|
|
36
|
+
Requires-Dist: acryl-datahub<0.15.0,>=0.14.0; extra == 'snowflake'
|
|
37
|
+
Description-Content-Type: text/markdown
|
|
38
|
+
|
|
39
|
+
# sql-code-graph
|
|
40
|
+
|
|
41
|
+
SQL lineage and dependency analysis as an MCP server for Claude Code.
|
|
42
|
+
|
|
43
|
+
Indexes a directory of `.sql` files into a graph database and exposes lineage
|
|
44
|
+
queries as MCP tools — so Claude can answer questions like *"what tables does
|
|
45
|
+
this view depend on?"* or *"where is `orders.customer_id` derived from?"*
|
|
46
|
+
without reading every file.
|
|
47
|
+
|
|
48
|
+
## Quick start
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
pip install sql-code-graph # or: uvx sql-code-graph (no install needed)
|
|
52
|
+
sqlcg install # register MCP server in Claude Code
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
Restart Claude Code, then inside your project ask:
|
|
56
|
+
|
|
57
|
+
```
|
|
58
|
+
Index my SQL files at ./sql --dialect snowflake
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
That's it. The MCP tools are now available to Claude in every conversation
|
|
62
|
+
for that project.
|
|
63
|
+
|
|
64
|
+
## Full setup (recommended)
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
# 1. Install
|
|
68
|
+
pip install sql-code-graph
|
|
69
|
+
|
|
70
|
+
# 2. Register with Claude Code (~/.claude/settings.json)
|
|
71
|
+
sqlcg install
|
|
72
|
+
|
|
73
|
+
# 3. Restart Claude Code
|
|
74
|
+
|
|
75
|
+
# 4. Index your SQL repo
|
|
76
|
+
# Only git-tracked files are indexed — build artefacts, node_modules,
|
|
77
|
+
# and .venv are ignored automatically.
|
|
78
|
+
sqlcg db init
|
|
79
|
+
sqlcg index ./sql --dialect snowflake # or: bigquery, postgres, ansi
|
|
80
|
+
|
|
81
|
+
# 5. (Optional) Keep the graph fresh on branch switches
|
|
82
|
+
cd /your/sql/repo
|
|
83
|
+
sqlcg git install-hooks
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
Step 5 installs a `post-checkout` git hook that re-indexes automatically
|
|
87
|
+
whenever you switch branches. Without it the graph may be stale after a
|
|
88
|
+
`git checkout` until you re-run `sqlcg index` manually.
|
|
89
|
+
|
|
90
|
+
## Dialect config
|
|
91
|
+
|
|
92
|
+
To avoid passing `--dialect` every time, create `.sqlcg.toml` in your repo root:
|
|
93
|
+
|
|
94
|
+
```toml
|
|
95
|
+
[sqlcg]
|
|
96
|
+
dialect = "snowflake" # snowflake | bigquery | postgres | ansi
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
The git hook and `sqlcg index --dialect auto` both read this file.
|
|
100
|
+
|
|
101
|
+
## Add to your project CLAUDE.md (recommended)
|
|
102
|
+
|
|
103
|
+
Adding a short note to your project's `CLAUDE.md` helps Claude know the tools
|
|
104
|
+
are available and when to use them:
|
|
105
|
+
|
|
106
|
+
```markdown
|
|
107
|
+
## SQL lineage
|
|
108
|
+
This project uses sql-code-graph. MCP tools are available:
|
|
109
|
+
- `index_repo` — index or re-index a directory of SQL files
|
|
110
|
+
- `find_table_usages` — find all queries that read a table
|
|
111
|
+
- `trace_column_lineage` — trace where a column's value comes from
|
|
112
|
+
- `get_upstream_dependencies` / `get_downstream_dependencies` — dependency chains
|
|
113
|
+
- `search_sql_pattern` — full-text search across all indexed SQL
|
|
114
|
+
- `execute_cypher` — raw graph query for advanced analysis
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
The MCP server works without this — Claude can discover the tools on its own —
|
|
118
|
+
but the CLAUDE.md snippet ensures they get used proactively.
|
|
119
|
+
|
|
120
|
+
## MCP tools reference
|
|
121
|
+
|
|
122
|
+
| Tool | Description |
|
|
123
|
+
|------|-------------|
|
|
124
|
+
| `index_repo(repo_path, dialect)` | Index a directory of SQL files |
|
|
125
|
+
| `trace_column_lineage(table_col)` | Trace column lineage upstream |
|
|
126
|
+
| `find_table_usages(table_name)` | Find all queries that read a table |
|
|
127
|
+
| `get_upstream_dependencies(table_col)` | Full upstream dependency chain |
|
|
128
|
+
| `get_downstream_dependencies(table_col)` | Full downstream dependency chain |
|
|
129
|
+
| `search_sql_pattern(query)` | Full-text search across indexed SQL |
|
|
130
|
+
| `list_dialects_and_repos()` | List indexed repos and dialects |
|
|
131
|
+
| `execute_cypher(query)` | Raw Cypher query against the graph |
|
|
132
|
+
|
|
133
|
+
## CLI reference
|
|
134
|
+
|
|
135
|
+
Full option reference: [docs/cli.md](docs/cli.md)
|
|
136
|
+
|
|
137
|
+
```bash
|
|
138
|
+
sqlcg install # register MCP server in Claude Code
|
|
139
|
+
sqlcg db init # initialise graph database
|
|
140
|
+
sqlcg index <path> --dialect <d> # index SQL files
|
|
141
|
+
sqlcg index <path> --dialect auto # read dialect from .sqlcg.toml
|
|
142
|
+
sqlcg watch <path> # watch for file changes
|
|
143
|
+
sqlcg git install-hooks # install post-checkout hook
|
|
144
|
+
sqlcg gain # show usage metrics
|
|
145
|
+
sqlcg report # generate FP/error report
|
|
146
|
+
sqlcg mcp start # start MCP server manually
|
|
147
|
+
sqlcg version # show installed version
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
## Supported dialects
|
|
151
|
+
|
|
152
|
+
`snowflake` · `bigquery` · `postgres` · `ansi` · `tsql` · `dbt` (via optional extra)
|
|
153
|
+
|
|
154
|
+
## Development
|
|
155
|
+
|
|
156
|
+
```bash
|
|
157
|
+
git clone https://github.com/Warhorze/sql-code-graph
|
|
158
|
+
cd sql-code-graph
|
|
159
|
+
uv sync --all-extras
|
|
160
|
+
uv run pytest tests/unit
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
## Issues
|
|
164
|
+
|
|
165
|
+
Bug reports and feature requests: [github.com/Warhorze/sql-code-graph/issues](https://github.com/Warhorze/sql-code-graph/issues)
|
|
166
|
+
|
|
167
|
+
Questions and discussion: [github.com/Warhorze/sql-code-graph/discussions](https://github.com/Warhorze/sql-code-graph/discussions)
|
|
168
|
+
|
|
169
|
+
## License
|
|
170
|
+
|
|
171
|
+
MIT
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
sqlcg/__init__.py,sha256=UOfmI89XKJTvVCisH5LdsjbnKEv-ESDsi5XGcM4VisY,115
|
|
2
|
+
sqlcg/__main__.py,sha256=1YoFLcqEgTwYq1J3TbUwpkdG0zeeLIf2fJvwWI-CLFU,109
|
|
3
|
+
sqlcg/cli/__init__.py,sha256=W8fD0LpMq2xm_5WKGNMvJh2WBL1ho5E8hUeAqXQYT1g,28
|
|
4
|
+
sqlcg/cli/main.py,sha256=4FvjYUmiuX6Zij0zuiMwWJTOXb_OSQe60poQQ3W6qSA,987
|
|
5
|
+
sqlcg/cli/commands/__init__.py,sha256=oSHtr6VD-jNubOjuCQyZj2tBppjMEpQDh-IGQ8of9eA,30
|
|
6
|
+
sqlcg/cli/commands/analyze.py,sha256=Vurb_PdHQ6Aw5ZRFEbQwUiylkz5D4j849EwtIqgagHk,3168
|
|
7
|
+
sqlcg/cli/commands/db.py,sha256=iFr8re4z_0qxz_3LTH_5pQoleIQ3cHJy9eeeHrrEp4o,2866
|
|
8
|
+
sqlcg/cli/commands/find.py,sha256=4cEWQ0otxNIzzwwzZ0WB_Tms0EoKzcFfhB3FJt8Q5V4,2025
|
|
9
|
+
sqlcg/cli/commands/gain.py,sha256=FXPF8vEc0S03FN-fiUO3YauOsDe3p9yp4Wy9entj8tE,5793
|
|
10
|
+
sqlcg/cli/commands/git.py,sha256=d1LDKaqMfaW28U3rCWjaEe-GB5RybJWsz36iBkNXF9Y,2253
|
|
11
|
+
sqlcg/cli/commands/index.py,sha256=u0jL9PeDKloTfEBWpdcpH7P7ASTakFsiEuEEMObjm0U,3208
|
|
12
|
+
sqlcg/cli/commands/install.py,sha256=499JWrosmceKmSOmGohABj0M0jvrcURyt4tHfEXNoTQ,1964
|
|
13
|
+
sqlcg/cli/commands/mcp.py,sha256=z_K_ARmuAnjAxWD-CXkjGjXI-DUgWV19yzjJW2cl8wI,1484
|
|
14
|
+
sqlcg/cli/commands/report.py,sha256=JU0qjyMxwOukE7bN3XvvIzOI7zMg_Gsnvk_8F6pKNpA,4915
|
|
15
|
+
sqlcg/cli/commands/watch.py,sha256=KOlQ0ZoYnzTxqsSnJvHdr656vaG6zNRfKRefyqkTJzg,1889
|
|
16
|
+
sqlcg/core/__init__.py,sha256=uNsJCrCMVWVT80sHPtI_f39BYqIf5N0i6LSq8x8HsyI,283
|
|
17
|
+
sqlcg/core/config.py,sha256=acrNRlOTIEKr2ttWFqVToiN-9Z9csbBCTJvQLtjCI3g,3004
|
|
18
|
+
sqlcg/core/graph_db.py,sha256=BN3QUD8hNVY5I7qsKj5zvl8v2uT_hswKvvkmwZ3mClA,5551
|
|
19
|
+
sqlcg/core/jobs.py,sha256=Je-fCdSKRgiSsv1W8SgNAlp36a7t7-pJZ-qKPbka9OE,3298
|
|
20
|
+
sqlcg/core/kuzu_backend.py,sha256=6ymm1Q6pj3jw4luKV5q0-qFpWh3KZmQiktliHPc-YoU,9656
|
|
21
|
+
sqlcg/core/neo4j_backend.py,sha256=Tl2_jGv086DTJYQBixv-Tm_misyd_5-iEb_UuCjKk_I,7058
|
|
22
|
+
sqlcg/core/queries.py,sha256=qxoMH75yGWLwNH9Ki9l9NV9IzOsH6fgdAsHdewLRn-o,2733
|
|
23
|
+
sqlcg/core/schema.cypher,sha256=BNMbXaHtINT3uaW0vlnBrG8DLa6k8i-CfOkrF-ZVo_U,2220
|
|
24
|
+
sqlcg/core/schema.py,sha256=miHPMh2hSQueNdGfD-7pNXk0EIDsCkEh431eI9_iTEI,1269
|
|
25
|
+
sqlcg/indexer/__init__.py,sha256=Wh20Unz2OHs1oIyWLrpurPAasF0BET2g4iXtNk7mh2U,56
|
|
26
|
+
sqlcg/indexer/dbt_adapter.py,sha256=EB5x1WU5Z9d-I97ADDj88S_hG1C4z4nbrv8JUCzXfy8,686
|
|
27
|
+
sqlcg/indexer/indexer.py,sha256=rRu51-BOIJiRaitE2V-f_VffwULlWtN5sG1kEplw8_s,11338
|
|
28
|
+
sqlcg/indexer/walker.py,sha256=WpF5mJvc6ayN_DJ52w2UQnNxXeqh03QbBeYEqrKpAZI,1752
|
|
29
|
+
sqlcg/indexer/watcher.py,sha256=OaYiQTQMIPdVQEtuJqY7Z9zCi8vr2UqWOkm4Ygp_Ap4,6697
|
|
30
|
+
sqlcg/lineage/__init__.py,sha256=Da1DlYwtK13WHv_RnHjAtNkHTOuFbhxqCjT1Le7DsWM,46
|
|
31
|
+
sqlcg/lineage/aggregator.py,sha256=wEQmZSL0COKKRwQpfO5skL427aJlsDa4qiH-UvSSs24,1905
|
|
32
|
+
sqlcg/lineage/schema_resolver.py,sha256=e6PU99SO6L-bIaFLwOekarhass-SeGoeVdB9PgbLSjg,6803
|
|
33
|
+
sqlcg/metrics/__init__.py,sha256=hLJ6wm4St8qqYwKh3o9QG7lcEt1BEYM31ccqO9tGpIg,133
|
|
34
|
+
sqlcg/metrics/store.py,sha256=BaMf7QYTmYMlX_Jzi1GNU8R2sMVkWdn07f-ZSndtcNk,8879
|
|
35
|
+
sqlcg/parsers/__init__.py,sha256=AamA8wBbDZV9_zEtZCI4Hyen5UAVKHmBwjTghTt2PZE,785
|
|
36
|
+
sqlcg/parsers/ansi_parser.py,sha256=S82CfyQlB2VCwU4eKJOXh4blFQBGtz5q0wuxHrFFrn8,6539
|
|
37
|
+
sqlcg/parsers/base.py,sha256=a7YDijigCkeGrLJjgnckp78mKtv9o6O75s9iKHLu5qc,14812
|
|
38
|
+
sqlcg/parsers/bigquery_parser.py,sha256=6VfKhTUVLbRdKmQieEe9S8oxv6-zzqXw4t6DeGRUlEs,2624
|
|
39
|
+
sqlcg/parsers/postgres_parser.py,sha256=-pyBr-KU4JGRurxsvJmK5jgdTcNesSDClTzEsl4o2A8,744
|
|
40
|
+
sqlcg/parsers/registry.py,sha256=7l5ODWszz6CDC_5ZhhQkST9U-pvqJ-i6D0GqPXwcWhE,1325
|
|
41
|
+
sqlcg/parsers/snowflake_parser.py,sha256=oNfKAA95AJpy292tp0I3o5vuT7tf6a_4dtUJBSErfBg,5463
|
|
42
|
+
sqlcg/parsers/tsql_parser.py,sha256=zZQ6CqV3lXNUG_FOeWRwv9AEXhAeAw4LcTDAaxayTW4,754
|
|
43
|
+
sqlcg/server/__init__.py,sha256=n4wuNE7xyJIJxJZBtmtdccCMQfvTdF-IqIaZVbC4FC4,35
|
|
44
|
+
sqlcg/server/exceptions.py,sha256=EONw34icOByCTpppSQrvQBW6asc4hfqaGDCAFjv96II,469
|
|
45
|
+
sqlcg/server/models.py,sha256=Tt1EoD7hYsQ0Q92RDkjEhoWwhDGkqA3jehauSvOVD0w,2812
|
|
46
|
+
sqlcg/server/server.py,sha256=2EwKGehcIdKqCjZagbv8VrvnVCp-D5Lh-z38FFHRcN8,1723
|
|
47
|
+
sqlcg/server/tools.py,sha256=YBLbTdxCY0r39p8jjENps9t6HftHk-6sFVF-y5vzMF8,19704
|
|
48
|
+
sqlcg/utils/__init__.py,sha256=--iqt5ThTXmT8Wz7da8hs3n0zDfYPl8P-z5OgRJ_77E,154
|
|
49
|
+
sqlcg/utils/hashing.py,sha256=H25-sYfxHKb3_IERFnHyAIYNiXN470Oqo5sJT_D3YOA,438
|
|
50
|
+
sqlcg/utils/ignore.py,sha256=NfInsHPGubfKFJQraH-wE7ATPb5Be_Igu5mIh7p21cU,973
|
|
51
|
+
sqlcg/utils/logging.py,sha256=u0fCmYsLj9o81vawm3xZTHaw68GQYVm7JxG-gP81u8A,840
|
|
52
|
+
sql_code_graph-0.2.1.dist-info/METADATA,sha256=BbaL4fmjPJ2-NkLmiGU4-dvMOlZi1dL_CioMI3bdTvY,5920
|
|
53
|
+
sql_code_graph-0.2.1.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
54
|
+
sql_code_graph-0.2.1.dist-info/entry_points.txt,sha256=Wfe49sVzV9p4eVFGo5RxcV-frr3HOP0yzzst8JBxQLQ,46
|
|
55
|
+
sql_code_graph-0.2.1.dist-info/RECORD,,
|
sqlcg/__init__.py
ADDED
sqlcg/__main__.py
ADDED
sqlcg/cli/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""CLI module for sqlcg."""
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""CLI commands for sqlcg."""
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
"""Analyze command for lineage analysis."""
|
|
2
|
+
|
|
3
|
+
import typer
|
|
4
|
+
from rich.console import Console
|
|
5
|
+
from rich.table import Table
|
|
6
|
+
|
|
7
|
+
from sqlcg.core.config import get_backend
|
|
8
|
+
from sqlcg.core.schema import NodeLabel, RelType
|
|
9
|
+
|
|
10
|
+
app = typer.Typer(help="Lineage analysis")
|
|
11
|
+
console = Console()
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@app.command("upstream")
|
|
15
|
+
def upstream( # noqa: B008
|
|
16
|
+
ref: str = typer.Argument(..., help="Column reference"), # noqa: B008
|
|
17
|
+
depth: int = typer.Option(5, "--depth", help="Maximum traversal depth"), # noqa: B008
|
|
18
|
+
) -> None:
|
|
19
|
+
"""Trace upstream column lineage."""
|
|
20
|
+
# Bounds check for depth to prevent performance DoS
|
|
21
|
+
if depth < 1 or depth > 100:
|
|
22
|
+
console.print("[red]Error: --depth must be between 1 and 100[/red]")
|
|
23
|
+
raise typer.Exit(1)
|
|
24
|
+
|
|
25
|
+
with get_backend() as backend:
|
|
26
|
+
results = backend.run_read(
|
|
27
|
+
f"MATCH p=(c:{NodeLabel.COLUMN} {{id: $ref}})"
|
|
28
|
+
f"<-[:{RelType.COLUMN_LINEAGE}*1..{depth}]-(src) "
|
|
29
|
+
"RETURN src.id AS id LIMIT 100",
|
|
30
|
+
{"ref": ref},
|
|
31
|
+
)
|
|
32
|
+
_print_table(results, ["id"])
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@app.command("downstream")
|
|
36
|
+
def downstream( # noqa: B008
|
|
37
|
+
ref: str = typer.Argument(..., help="Column reference"), # noqa: B008
|
|
38
|
+
depth: int = typer.Option(5, "--depth", help="Maximum traversal depth"), # noqa: B008
|
|
39
|
+
) -> None:
|
|
40
|
+
"""Trace downstream column lineage."""
|
|
41
|
+
# Bounds check for depth to prevent performance DoS
|
|
42
|
+
if depth < 1 or depth > 100:
|
|
43
|
+
console.print("[red]Error: --depth must be between 1 and 100[/red]")
|
|
44
|
+
raise typer.Exit(1)
|
|
45
|
+
|
|
46
|
+
with get_backend() as backend:
|
|
47
|
+
results = backend.run_read(
|
|
48
|
+
f"MATCH p=(c:{NodeLabel.COLUMN} {{id: $ref}})"
|
|
49
|
+
f"-[:{RelType.COLUMN_LINEAGE}*1..{depth}]->(dst) "
|
|
50
|
+
"RETURN dst.id AS id LIMIT 100",
|
|
51
|
+
{"ref": ref},
|
|
52
|
+
)
|
|
53
|
+
_print_table(results, ["id"])
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
@app.command("impact")
|
|
57
|
+
def impact( # noqa: B008
|
|
58
|
+
table: str = typer.Argument(..., help="Table name to analyze"), # noqa: B008
|
|
59
|
+
) -> None:
|
|
60
|
+
"""Show all queries impacted by a table."""
|
|
61
|
+
with get_backend() as backend:
|
|
62
|
+
results = backend.run_read(
|
|
63
|
+
f"MATCH (t:{NodeLabel.TABLE} {{qualified: $t}})"
|
|
64
|
+
f"<-[:{RelType.SELECTS_FROM}]-(q:{NodeLabel.QUERY}) "
|
|
65
|
+
"RETURN q.id AS id, q.kind AS kind LIMIT 100",
|
|
66
|
+
{"t": table},
|
|
67
|
+
)
|
|
68
|
+
_print_table(results, ["id", "kind"])
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@app.command("unused")
|
|
72
|
+
def unused(
|
|
73
|
+
threshold: int = typer.Option(0, "--threshold", help="Minimum reference count threshold"),
|
|
74
|
+
) -> None:
|
|
75
|
+
"""Find tables with no query references."""
|
|
76
|
+
with get_backend() as backend:
|
|
77
|
+
results = backend.run_read(
|
|
78
|
+
f"MATCH (t:{NodeLabel.TABLE}) WHERE NOT (t)<-[:{RelType.SELECTS_FROM}]-() "
|
|
79
|
+
"RETURN t.qualified AS qualified LIMIT 100",
|
|
80
|
+
{},
|
|
81
|
+
)
|
|
82
|
+
_print_table(results, ["qualified"])
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _print_table(rows: list[dict], columns: list[str]) -> None:
|
|
86
|
+
"""Print results as a Rich table."""
|
|
87
|
+
if not rows:
|
|
88
|
+
console.print("[yellow]No results[/yellow]")
|
|
89
|
+
return
|
|
90
|
+
t = Table(*columns)
|
|
91
|
+
for row in rows:
|
|
92
|
+
t.add_row(*[str(row.get(c, "")) for c in columns])
|
|
93
|
+
console.print(t)
|
sqlcg/cli/commands/db.py
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"""Database management commands."""
|
|
2
|
+
|
|
3
|
+
import shutil
|
|
4
|
+
|
|
5
|
+
import typer
|
|
6
|
+
from rich.console import Console
|
|
7
|
+
|
|
8
|
+
from sqlcg.core.config import get_backend, get_db_path
|
|
9
|
+
from sqlcg.core.schema import NodeLabel
|
|
10
|
+
from sqlcg.utils.logging import getLogger
|
|
11
|
+
|
|
12
|
+
logger = getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
app = typer.Typer(help="Database management commands")
|
|
15
|
+
console = Console()
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@app.command("init")
|
|
19
|
+
def db_init() -> None:
|
|
20
|
+
"""Initialise the graph database (idempotent)."""
|
|
21
|
+
db_path = get_db_path()
|
|
22
|
+
db_path.parent.mkdir(parents=True, exist_ok=True)
|
|
23
|
+
with get_backend() as backend:
|
|
24
|
+
backend.init_schema()
|
|
25
|
+
version = backend.get_schema_version()
|
|
26
|
+
console.print(f"[green]Database initialised[/green] at {db_path} (schema v{version})")
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@app.command("reset")
|
|
30
|
+
def db_reset( # noqa: B008
|
|
31
|
+
repo: str | None = typer.Option(None, "--repo", help="Reset only this repo path"), # noqa: B008
|
|
32
|
+
) -> None:
|
|
33
|
+
"""Wipe the database or a single repo's subgraph."""
|
|
34
|
+
if repo:
|
|
35
|
+
# Delete all nodes for this repo (use run_write for mutation)
|
|
36
|
+
with get_backend() as backend:
|
|
37
|
+
backend.run_write(
|
|
38
|
+
"MATCH (r:Repo {path: $p}) DETACH DELETE r",
|
|
39
|
+
{"p": repo},
|
|
40
|
+
)
|
|
41
|
+
console.print(f"[yellow]Reset repo[/yellow] {repo}")
|
|
42
|
+
else:
|
|
43
|
+
# Full reset — delete the DB file (close backend first to release file handle)
|
|
44
|
+
db_path = get_db_path()
|
|
45
|
+
if db_path.exists():
|
|
46
|
+
shutil.rmtree(str(db_path), ignore_errors=True)
|
|
47
|
+
console.print("[red]Database wiped[/red]")
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@app.command("info")
|
|
51
|
+
def db_info() -> None:
|
|
52
|
+
"""Show database stats."""
|
|
53
|
+
with get_backend() as backend:
|
|
54
|
+
version = backend.get_schema_version() or "unknown"
|
|
55
|
+
console.print(f"Schema version: {version}")
|
|
56
|
+
|
|
57
|
+
# Show node counts for all labels
|
|
58
|
+
for label in NodeLabel:
|
|
59
|
+
try:
|
|
60
|
+
result = backend.run_read(f"MATCH (n:{label}) RETURN COUNT(*) AS count", {})
|
|
61
|
+
count = result[0]["count"] if result else 0
|
|
62
|
+
console.print(f" {label}: {count}")
|
|
63
|
+
except Exception as e:
|
|
64
|
+
# Log unexpected exceptions instead of silently skipping
|
|
65
|
+
logger.error(f"Error getting count for {label}: {e}")
|
|
66
|
+
console.print(f" [red]{label}: error[/red]")
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
@app.command("list-repos")
|
|
70
|
+
def list_repos() -> None:
|
|
71
|
+
"""List all indexed repositories."""
|
|
72
|
+
with get_backend() as backend:
|
|
73
|
+
result = backend.run_read("MATCH (r:Repo) RETURN r.path AS path, r.name AS name", {})
|
|
74
|
+
|
|
75
|
+
if not result:
|
|
76
|
+
console.print("[yellow]No repositories indexed[/yellow]")
|
|
77
|
+
else:
|
|
78
|
+
from rich.table import Table
|
|
79
|
+
|
|
80
|
+
table = Table("Path", "Name")
|
|
81
|
+
for row in result:
|
|
82
|
+
table.add_row(str(row.get("path", "")), str(row.get("name", "")))
|
|
83
|
+
console.print(table)
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"""Find command for searching the graph."""
|
|
2
|
+
|
|
3
|
+
import typer
|
|
4
|
+
from rich.console import Console
|
|
5
|
+
from rich.table import Table
|
|
6
|
+
|
|
7
|
+
from sqlcg.core.config import get_backend
|
|
8
|
+
from sqlcg.core.schema import NodeLabel
|
|
9
|
+
|
|
10
|
+
app = typer.Typer(help="Search the graph")
|
|
11
|
+
console = Console()
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@app.command("table")
|
|
15
|
+
def find_table( # noqa: B008
|
|
16
|
+
name: str = typer.Argument(..., help="Table name to search for"), # noqa: B008
|
|
17
|
+
) -> None:
|
|
18
|
+
"""Find a table by name."""
|
|
19
|
+
with get_backend() as backend:
|
|
20
|
+
results = backend.run_read(
|
|
21
|
+
f"MATCH (t:{NodeLabel.TABLE}) WHERE t.qualified CONTAINS $name "
|
|
22
|
+
"RETURN t.qualified AS qualified, t.kind AS kind LIMIT 50",
|
|
23
|
+
{"name": name},
|
|
24
|
+
)
|
|
25
|
+
_print_table(results, ["qualified", "kind"])
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@app.command("column")
|
|
29
|
+
def find_column( # noqa: B008
|
|
30
|
+
ref: str = typer.Argument(..., help="Column reference (table.column)"), # noqa: B008
|
|
31
|
+
) -> None:
|
|
32
|
+
"""Find a column by table.column reference."""
|
|
33
|
+
with get_backend() as backend:
|
|
34
|
+
results = backend.run_read(
|
|
35
|
+
f"MATCH (c:{NodeLabel.COLUMN}) WHERE c.id CONTAINS $ref RETURN c.id AS id LIMIT 50",
|
|
36
|
+
{"ref": ref},
|
|
37
|
+
)
|
|
38
|
+
_print_table(results, ["id"])
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@app.command("pattern")
|
|
42
|
+
def find_pattern( # noqa: B008
|
|
43
|
+
pattern: str = typer.Argument(..., help="SQL pattern to search for"), # noqa: B008
|
|
44
|
+
) -> None:
|
|
45
|
+
"""Find queries containing a SQL pattern."""
|
|
46
|
+
with get_backend() as backend:
|
|
47
|
+
results = backend.run_read(
|
|
48
|
+
f"MATCH (q:{NodeLabel.QUERY}) WHERE q.sql CONTAINS $pattern "
|
|
49
|
+
"RETURN q.id AS id, q.kind AS kind LIMIT 50",
|
|
50
|
+
{"pattern": pattern},
|
|
51
|
+
)
|
|
52
|
+
_print_table(results, ["id", "kind"])
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _print_table(rows: list[dict], columns: list[str]) -> None:
|
|
56
|
+
"""Print results as a Rich table."""
|
|
57
|
+
if not rows:
|
|
58
|
+
console.print("[yellow]No results[/yellow]")
|
|
59
|
+
return
|
|
60
|
+
t = Table(*columns)
|
|
61
|
+
for row in rows:
|
|
62
|
+
t.add_row(*[str(row.get(c, "")) for c in columns])
|
|
63
|
+
console.print(t)
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
"""The sqlcg gain command — view metrics and feedback analytics."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from datetime import UTC, datetime, timedelta
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
import typer
|
|
8
|
+
from rich.console import Console
|
|
9
|
+
|
|
10
|
+
from sqlcg.metrics.store import MetricsStore
|
|
11
|
+
from sqlcg.utils.logging import getLogger
|
|
12
|
+
|
|
13
|
+
logger = getLogger(__name__)
|
|
14
|
+
console = Console()
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def gain_cmd(
|
|
18
|
+
json_output: bool = typer.Option( # noqa: B008
|
|
19
|
+
False,
|
|
20
|
+
"--json",
|
|
21
|
+
help="Output metrics as JSON",
|
|
22
|
+
),
|
|
23
|
+
_metrics_path: Path | None = None, # For testing only
|
|
24
|
+
) -> None:
|
|
25
|
+
"""Show metrics and feedback analytics.
|
|
26
|
+
|
|
27
|
+
Displays:
|
|
28
|
+
- Section A: Total MCP tool calls and calls in the last 7 days
|
|
29
|
+
- Section B: Parse success trend (last 5 index runs)
|
|
30
|
+
- Section C: True positive feedback rate (if ≥5 samples)
|
|
31
|
+
- Section D: Top 3 most-called tools
|
|
32
|
+
|
|
33
|
+
All metrics are opt-in via SQLCG_METRICS environment variable.
|
|
34
|
+
If no metrics have been collected, shows a message and exits 0.
|
|
35
|
+
"""
|
|
36
|
+
if _metrics_path is None:
|
|
37
|
+
metrics_path = Path.home() / ".sqlcg" / "metrics.db"
|
|
38
|
+
else:
|
|
39
|
+
metrics_path = _metrics_path
|
|
40
|
+
|
|
41
|
+
if not metrics_path.exists():
|
|
42
|
+
if json_output:
|
|
43
|
+
console.print(
|
|
44
|
+
json.dumps(
|
|
45
|
+
{
|
|
46
|
+
"total_calls": 0,
|
|
47
|
+
"last_7d_calls": 0,
|
|
48
|
+
"index_runs": 0,
|
|
49
|
+
"feedback_tp": 0,
|
|
50
|
+
"feedback_total": 0,
|
|
51
|
+
"top_tools": [],
|
|
52
|
+
}
|
|
53
|
+
)
|
|
54
|
+
)
|
|
55
|
+
else:
|
|
56
|
+
console.print("No metrics collected yet.")
|
|
57
|
+
return
|
|
58
|
+
|
|
59
|
+
try:
|
|
60
|
+
metrics = MetricsStore(metrics_path)
|
|
61
|
+
metrics.init_schema() # Ensure schema exists
|
|
62
|
+
|
|
63
|
+
# Section A: Total calls and last 7 days
|
|
64
|
+
all_calls = metrics.execute_query("SELECT COUNT(*) as count FROM tool_calls")
|
|
65
|
+
total_calls = all_calls[0][0] if all_calls else 0
|
|
66
|
+
|
|
67
|
+
cutoff_date = (datetime.now(UTC) - timedelta(days=7)).isoformat()
|
|
68
|
+
last_7d = metrics.execute_query(
|
|
69
|
+
"SELECT COUNT(*) as count FROM tool_calls WHERE timestamp > ?",
|
|
70
|
+
(cutoff_date,),
|
|
71
|
+
)
|
|
72
|
+
last_7d_calls = last_7d[0][0] if last_7d else 0
|
|
73
|
+
|
|
74
|
+
# Section B: Parse success (last 5 index runs)
|
|
75
|
+
index_runs = metrics.execute_query(
|
|
76
|
+
"""
|
|
77
|
+
SELECT files_parsed, parse_errors
|
|
78
|
+
FROM index_runs
|
|
79
|
+
ORDER BY timestamp DESC
|
|
80
|
+
LIMIT 5
|
|
81
|
+
"""
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
# Section C: TP feedback rate
|
|
85
|
+
feedback_counts = metrics.execute_query(
|
|
86
|
+
"""
|
|
87
|
+
SELECT
|
|
88
|
+
SUM(CASE WHEN label = 'TP' THEN 1 ELSE 0 END) as tp_count,
|
|
89
|
+
COUNT(*) as total
|
|
90
|
+
FROM feedback
|
|
91
|
+
"""
|
|
92
|
+
)
|
|
93
|
+
tp_count = feedback_counts[0][0] or 0 if feedback_counts else 0
|
|
94
|
+
fb_total = feedback_counts[0][1] or 0 if feedback_counts else 0
|
|
95
|
+
|
|
96
|
+
# Section D: Top 3 tools
|
|
97
|
+
top_tools = metrics.execute_query(
|
|
98
|
+
"""
|
|
99
|
+
SELECT tool_name, COUNT(*) as count
|
|
100
|
+
FROM tool_calls
|
|
101
|
+
GROUP BY tool_name
|
|
102
|
+
ORDER BY count DESC
|
|
103
|
+
LIMIT 3
|
|
104
|
+
"""
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
if json_output:
|
|
108
|
+
console.print(
|
|
109
|
+
json.dumps(
|
|
110
|
+
{
|
|
111
|
+
"total_calls": total_calls,
|
|
112
|
+
"last_7d_calls": last_7d_calls,
|
|
113
|
+
"index_runs": len(index_runs),
|
|
114
|
+
"feedback_tp": tp_count,
|
|
115
|
+
"feedback_total": fb_total,
|
|
116
|
+
"top_tools": [{"name": row[0], "count": row[1]} for row in top_tools],
|
|
117
|
+
}
|
|
118
|
+
)
|
|
119
|
+
)
|
|
120
|
+
else:
|
|
121
|
+
# Human-readable output
|
|
122
|
+
console.print("\n[bold]SQL Code Graph Metrics[/bold]")
|
|
123
|
+
console.print()
|
|
124
|
+
|
|
125
|
+
# Section A
|
|
126
|
+
console.print("[bold cyan]A. Tool Calls[/bold cyan]")
|
|
127
|
+
console.print(f" Total: {total_calls}")
|
|
128
|
+
console.print(f" Last 7 days: {last_7d_calls}")
|
|
129
|
+
console.print()
|
|
130
|
+
|
|
131
|
+
# Section B
|
|
132
|
+
console.print("[bold cyan]B. Index Runs[/bold cyan]")
|
|
133
|
+
if index_runs:
|
|
134
|
+
console.print(f" Recent runs: {len(index_runs)}")
|
|
135
|
+
total_files = sum(row[0] for row in index_runs)
|
|
136
|
+
total_errors = sum(row[1] for row in index_runs)
|
|
137
|
+
success_rate = (
|
|
138
|
+
100 * (total_files - total_errors) / total_files if total_files > 0 else 0
|
|
139
|
+
)
|
|
140
|
+
console.print(f" Success rate (last 5): {success_rate:.1f}%")
|
|
141
|
+
else:
|
|
142
|
+
console.print(" No index runs recorded")
|
|
143
|
+
console.print()
|
|
144
|
+
|
|
145
|
+
# Section C
|
|
146
|
+
if fb_total >= 5:
|
|
147
|
+
tp_rate = 100 * tp_count / fb_total if fb_total > 0 else 0
|
|
148
|
+
console.print("[bold cyan]C. Feedback[/bold cyan]")
|
|
149
|
+
console.print(f" True positive rate: {tp_rate:.1f}% ({tp_count}/{fb_total})")
|
|
150
|
+
else:
|
|
151
|
+
console.print("[bold cyan]C. Feedback[/bold cyan]")
|
|
152
|
+
console.print(f" Samples: {fb_total}/5 (need 5 to show TP rate)")
|
|
153
|
+
console.print()
|
|
154
|
+
|
|
155
|
+
# Section D
|
|
156
|
+
if top_tools:
|
|
157
|
+
console.print("[bold cyan]D. Top Tools[/bold cyan]")
|
|
158
|
+
for i, (name, count) in enumerate(top_tools, 1):
|
|
159
|
+
console.print(f" {i}. {name}: {count}")
|
|
160
|
+
console.print()
|
|
161
|
+
|
|
162
|
+
metrics.close()
|
|
163
|
+
|
|
164
|
+
except Exception as exc:
|
|
165
|
+
logger.error(f"Failed to generate metrics report: {exc}")
|
|
166
|
+
if json_output:
|
|
167
|
+
console.print(json.dumps({"error": str(exc)}))
|
|
168
|
+
else:
|
|
169
|
+
console.print(f"[red]Error: {exc}[/red]")
|