sql-code-graph 0.3.0__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sql_code_graph-0.3.0.dist-info → sql_code_graph-1.0.0.dist-info}/METADATA +87 -9
- sql_code_graph-1.0.0.dist-info/RECORD +63 -0
- sqlcg/__init__.py +1 -1
- sqlcg/cli/commands/analyze.py +24 -0
- sqlcg/cli/commands/db.py +40 -7
- sqlcg/cli/commands/gain.py +5 -17
- sqlcg/cli/commands/git.py +71 -40
- sqlcg/cli/commands/index.py +122 -17
- sqlcg/cli/commands/install.py +147 -8
- sqlcg/cli/commands/mcp.py +12 -0
- sqlcg/cli/commands/reindex.py +170 -0
- sqlcg/cli/commands/uninstall.py +94 -39
- sqlcg/cli/commands/watch.py +14 -1
- sqlcg/cli/main.py +8 -0
- sqlcg/core/config.py +185 -2
- sqlcg/core/graph_db.py +65 -0
- sqlcg/core/kuzu_backend.py +177 -6
- sqlcg/core/neo4j_backend.py +38 -0
- sqlcg/core/queries.cypher +114 -0
- sqlcg/core/queries.py +44 -82
- sqlcg/core/schema.cypher +15 -3
- sqlcg/core/schema.py +2 -1
- sqlcg/indexer/error_classify.py +140 -0
- sqlcg/indexer/git_delta.py +121 -0
- sqlcg/indexer/indexer.py +952 -125
- sqlcg/indexer/pool.py +446 -0
- sqlcg/indexer/walker.py +1 -3
- sqlcg/indexer/watcher.py +68 -18
- sqlcg/lineage/aggregator.py +58 -2
- sqlcg/lineage/schema_resolver.py +26 -14
- sqlcg/parsers/ansi_parser.py +195 -26
- sqlcg/parsers/base.py +609 -59
- sqlcg/parsers/bigquery_parser.py +7 -2
- sqlcg/parsers/postgres_parser.py +7 -2
- sqlcg/parsers/registry.py +7 -2
- sqlcg/parsers/snowflake_parser.py +170 -8
- sqlcg/parsers/tsql_parser.py +7 -2
- sqlcg/server/models.py +297 -4
- sqlcg/server/noise_filter.py +167 -0
- sqlcg/server/skill.py +256 -0
- sqlcg/server/tools.py +934 -178
- sql_code_graph-0.3.0.dist-info/RECORD +0 -56
- {sql_code_graph-0.3.0.dist-info → sql_code_graph-1.0.0.dist-info}/WHEEL +0 -0
- {sql_code_graph-0.3.0.dist-info → sql_code_graph-1.0.0.dist-info}/entry_points.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sql-code-graph
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 1.0.0
|
|
4
4
|
Summary: SQL code graph analyzer and lineage tracer
|
|
5
5
|
Project-URL: Homepage, https://github.com/Warhorze/sql-code-graph
|
|
6
6
|
Project-URL: Repository, https://github.com/Warhorze/sql-code-graph
|
|
@@ -38,6 +38,19 @@ Description-Content-Type: text/markdown
|
|
|
38
38
|
|
|
39
39
|
# sql-code-graph
|
|
40
40
|
|
|
41
|
+
> **Pre-1.0 — expect breaking changes.** APIs, CLI flags, and graph schema may
|
|
42
|
+
> change between releases without a deprecation period. Pin to an exact version
|
|
43
|
+
> in production. Re-indexing is always the migration path.
|
|
44
|
+
|
|
45
|
+
> **Dialect support.** sqlcg is built on [sqlglot](https://github.com/tobymao/sqlglot),
|
|
46
|
+
> so in theory it can work for any dialect sqlglot parses. In practice we've only
|
|
47
|
+
> tested it against a production **Snowflake** warehouse (~1,400 SQL files) — and
|
|
48
|
+
> getting that one dialect working properly was already challenging. Other dialects
|
|
49
|
+
> (`bigquery`, `postgres`, `ansi`, `tsql`, `dbt`) may well work, but their lineage
|
|
50
|
+
> hasn't been validated, so expect rough edges. We'd gladly collaborate to get
|
|
51
|
+
> another dialect integrated — [open an issue](https://github.com/Warhorze/sql-code-graph/issues)
|
|
52
|
+
> with a minimal, anonymised corpus we can test against.
|
|
53
|
+
|
|
41
54
|
SQL lineage and dependency analysis as an MCP server for Claude Code.
|
|
42
55
|
|
|
43
56
|
Indexes a directory of `.sql` files into a graph database and exposes lineage
|
|
@@ -91,7 +104,7 @@ sqlcg install
|
|
|
91
104
|
# Only git-tracked files are indexed — build artefacts, node_modules,
|
|
92
105
|
# and .venv are ignored automatically.
|
|
93
106
|
sqlcg db init
|
|
94
|
-
sqlcg index ./sql --dialect snowflake #
|
|
107
|
+
sqlcg index ./sql --dialect snowflake # snowflake is the only tested dialect
|
|
95
108
|
|
|
96
109
|
# 5. (Optional) Keep the graph fresh on branch switches
|
|
97
110
|
cd /your/sql/repo
|
|
@@ -108,7 +121,7 @@ To avoid passing `--dialect` every time, create `.sqlcg.toml` in your repo root:
|
|
|
108
121
|
|
|
109
122
|
```toml
|
|
110
123
|
[sqlcg]
|
|
111
|
-
dialect = "snowflake" #
|
|
124
|
+
dialect = "snowflake" # the only tested dialect
|
|
112
125
|
```
|
|
113
126
|
|
|
114
127
|
The git hook and `sqlcg index --dialect auto` both read this file.
|
|
@@ -149,7 +162,8 @@ Quality is shown per-file after `sqlcg index` and in `sqlcg gain` Section F.
|
|
|
149
162
|
`list_dialects_and_repos()` warns when scripting fallback exceeds 20% of queries.
|
|
150
163
|
|
|
151
164
|
**What causes TABLE_ONLY?** Mostly `SELECT *` — sqlglot can't trace column names through
|
|
152
|
-
a wildcard
|
|
165
|
+
a wildcard without knowing the source table's columns. See [Resolving SELECT *](#resolving-select-)
|
|
166
|
+
below for how sqlcg expands wildcards automatically from your DDL and CTAS bodies.
|
|
153
167
|
|
|
154
168
|
**What causes SCRIPTING_FALLBACK?** Snowflake `$$` procedure bodies or `BEGIN…END` scripting
|
|
155
169
|
blocks. sqlglot parses the block as a raw `Command` node and extracts DML via tokenizer
|
|
@@ -157,19 +171,62 @@ fallback. Table edges are usually correct; column edges are not.
|
|
|
157
171
|
|
|
158
172
|
Check `sqlcg db info` for the parsing mode distribution across all indexed queries.
|
|
159
173
|
|
|
174
|
+
## Resolving SELECT *
|
|
175
|
+
|
|
176
|
+
A `SELECT *` ETL produces `TABLE_ONLY` parse quality because sqlglot needs the source
|
|
177
|
+
table's column list to expand the wildcard into individual columns. sqlcg resolves this
|
|
178
|
+
**automatically, with no extra setup** — there is no CSV to export or command to run.
|
|
179
|
+
|
|
180
|
+
Wildcards are expanded from two sources harvested while indexing:
|
|
181
|
+
|
|
182
|
+
1. **DDL files** — `CREATE TABLE` / `CREATE VIEW` statements give sqlcg the column list
|
|
183
|
+
for any table they define.
|
|
184
|
+
2. **Cross-file CTAS bodies** — a `CREATE TABLE … AS SELECT` (or CTE) in one file is used
|
|
185
|
+
to resolve `SELECT *` against that table in another file.
|
|
186
|
+
|
|
187
|
+
So the only thing you need to do is **index the DDL alongside your ETLs** — point `sqlcg
|
|
188
|
+
index` at a path that contains both (e.g. the repo root, or both `ddl/` and `etl/`). The
|
|
189
|
+
more of your `CREATE` statements sqlcg can see, the more wildcards it resolves.
|
|
190
|
+
|
|
191
|
+
```bash
|
|
192
|
+
sqlcg index . --dialect snowflake # index DDL + ETLs together
|
|
193
|
+
```
|
|
194
|
+
|
|
195
|
+
After indexing, `sqlcg db info` shows non-zero `STAR_EXPANSION lineage edges`, and
|
|
196
|
+
`trace_column_lineage` returns results for queries that previously returned empty.
|
|
197
|
+
|
|
198
|
+
> **Note:** earlier versions accepted an exported `INFORMATION_SCHEMA` CSV (`sqlcg
|
|
199
|
+
> load-schema`). That path was **removed** — profiling showed it added zero lineage
|
|
200
|
+
> edges over DDL + cross-file CTAS resolution on a real warehouse. DDL is now the
|
|
201
|
+
> single source of column truth; no CSV is needed or accepted.
|
|
202
|
+
|
|
160
203
|
## MCP tools reference
|
|
161
204
|
|
|
162
205
|
| Tool | Description |
|
|
163
206
|
|------|-------------|
|
|
164
207
|
| `index_repo(repo_path, dialect)` | Index a directory of SQL files |
|
|
165
|
-
|
|
|
166
|
-
| `
|
|
208
|
+
| **Lineage & dependencies** | |
|
|
209
|
+
| `trace_column_lineage(table_col)` | Trace a column's value upstream to its sources |
|
|
167
210
|
| `get_upstream_dependencies(table_col)` | Full upstream dependency chain |
|
|
168
211
|
| `get_downstream_dependencies(table_col)` | Full downstream dependency chain |
|
|
212
|
+
| `find_table_usages(table_name)` | Find all queries that read a table |
|
|
213
|
+
| `find_definition(table_qualified)` | Find where a table/view is defined |
|
|
214
|
+
| **Change impact** | |
|
|
215
|
+
| `get_change_scope(table_qualified)` | Blast radius of changing a table (impact + risk) |
|
|
216
|
+
| `diff_impact(changed_files)` | What a set of changed files affects downstream |
|
|
217
|
+
| `get_backfill_order(table_qualified)` | Topological rebuild/backfill order |
|
|
218
|
+
| `scope_change(target)` | Synthesised change-scope summary for a target |
|
|
219
|
+
| **Search & meta** | |
|
|
169
220
|
| `search_sql_pattern(query)` | Full-text search across indexed SQL |
|
|
170
221
|
| `list_dialects_and_repos()` | List indexed repos and dialects (catalogue) |
|
|
171
222
|
| `db_info()` | Graph health, node counts, parse quality breakdown, warnings |
|
|
172
223
|
| `execute_cypher(query)` | Raw Cypher query against the graph |
|
|
224
|
+
| `submit_feedback(...)` | Report a false positive/negative to improve metrics |
|
|
225
|
+
|
|
226
|
+
> **Input format**: lineage/dependency tools expect a **schema-qualified** column
|
|
227
|
+
> reference — `schema.table.column` (e.g. `ba.orders.customer_id`), not a bare
|
|
228
|
+
> `table.column`. Each returned node carries both `name` (the bare column) and
|
|
229
|
+
> `table` (the owning `schema.table`), so results are navigable without a second lookup.
|
|
173
230
|
|
|
174
231
|
> **LLM agent tip**: call `db_info()` before lineage queries to check that
|
|
175
232
|
> `SqlColumn > 0` and `warnings` is empty. If `parse_quality["scripting_block"]`
|
|
@@ -183,19 +240,40 @@ Full option reference: [docs/cli.md](docs/cli.md)
|
|
|
183
240
|
```bash
|
|
184
241
|
sqlcg install # register MCP server in Claude Code
|
|
185
242
|
sqlcg db init # initialise graph database
|
|
186
|
-
sqlcg index <path> --dialect
|
|
243
|
+
sqlcg index <path> --dialect snowflake # index SQL files (snowflake is the tested dialect)
|
|
187
244
|
sqlcg index <path> --dialect auto # read dialect from .sqlcg.toml
|
|
245
|
+
sqlcg index <path> --profile # index + print per-stage timing and slowest files
|
|
246
|
+
sqlcg reindex <path> --from <sha> --to <sha> # incremental resync of only changed files
|
|
247
|
+
sqlcg analyze unused # tables with no query references
|
|
248
|
+
sqlcg analyze upstream/downstream # trace lineage from the CLI
|
|
249
|
+
sqlcg find table/column/pattern # search the graph
|
|
188
250
|
sqlcg watch <path> # watch for file changes
|
|
189
|
-
sqlcg git install-hooks # install post-checkout
|
|
251
|
+
sqlcg git install-hooks # install post-checkout + post-merge resync hooks
|
|
190
252
|
sqlcg gain # show usage metrics
|
|
191
253
|
sqlcg report # generate FP/error report
|
|
254
|
+
sqlcg mcp best-practices # print the fact/heuristic boundary for the MCP tools
|
|
192
255
|
sqlcg mcp start # start MCP server manually
|
|
193
256
|
sqlcg version # show installed version
|
|
194
257
|
```
|
|
195
258
|
|
|
196
259
|
## Supported dialects
|
|
197
260
|
|
|
198
|
-
|
|
261
|
+
sqlcg is built on [sqlglot](https://github.com/tobymao/sqlglot), so other dialects
|
|
262
|
+
*can* be parsed in theory. Only Snowflake has been tested against a real corpus,
|
|
263
|
+
though — the table reflects what's actually been exercised, not what sqlglot can do.
|
|
264
|
+
|
|
265
|
+
| Dialect | Status |
|
|
266
|
+
|---------|--------|
|
|
267
|
+
| `snowflake` | ✅ Tested against a production DWH (~1,400 files) |
|
|
268
|
+
| `bigquery` | ⚠️ Unproven — parses via sqlglot, lineage not validated |
|
|
269
|
+
| `postgres` | ⚠️ Unproven — parses via sqlglot, lineage not validated |
|
|
270
|
+
| `ansi` | ⚠️ Unproven — parses via sqlglot, lineage not validated |
|
|
271
|
+
| `tsql` | ⚠️ Unproven — parses via sqlglot, lineage not validated |
|
|
272
|
+
| `dbt` | ⚠️ Unproven — via optional extra, lineage not validated |
|
|
273
|
+
|
|
274
|
+
Want another dialect properly supported? We'd be glad to collaborate — open an issue
|
|
275
|
+
with a minimal, anonymised corpus we can develop and test against. Getting Snowflake
|
|
276
|
+
right was real work, so a representative corpus is what makes the difference.
|
|
199
277
|
|
|
200
278
|
## Development
|
|
201
279
|
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
sqlcg/__init__.py,sha256=2lT2oiKX19arg1oTOFf13dXA3qyyQNpRevdvKHZIOp4,115
|
|
2
|
+
sqlcg/__main__.py,sha256=1YoFLcqEgTwYq1J3TbUwpkdG0zeeLIf2fJvwWI-CLFU,109
|
|
3
|
+
sqlcg/cli/__init__.py,sha256=W8fD0LpMq2xm_5WKGNMvJh2WBL1ho5E8hUeAqXQYT1g,28
|
|
4
|
+
sqlcg/cli/main.py,sha256=WmdTjsOlz1ozi2Y3Aq4ezR_FCRl-Lc1YOKw3_d48dlY,1650
|
|
5
|
+
sqlcg/cli/commands/__init__.py,sha256=oSHtr6VD-jNubOjuCQyZj2tBppjMEpQDh-IGQ8of9eA,30
|
|
6
|
+
sqlcg/cli/commands/analyze.py,sha256=kfcySSjc_UhSsOsJg7o5VD7TH4v72KVzol7Cdn2EuOU,4127
|
|
7
|
+
sqlcg/cli/commands/db.py,sha256=Yd4ZDz1BFwjO4Lyt3NefQnowkjdUxFDFmsPykBVH2Pk,6518
|
|
8
|
+
sqlcg/cli/commands/find.py,sha256=4cEWQ0otxNIzzwwzZ0WB_Tms0EoKzcFfhB3FJt8Q5V4,2025
|
|
9
|
+
sqlcg/cli/commands/gain.py,sha256=bOvia7CVla_fESrDEdftYze8Mm0xDio3SpCzIyoXg7A,8925
|
|
10
|
+
sqlcg/cli/commands/git.py,sha256=96hmWYd861FC8RZqPQ_eBG8yLXSXaB9SLxmuwx00nWU,3347
|
|
11
|
+
sqlcg/cli/commands/index.py,sha256=6f-kaoY5roY4DDvEOi_HrDnBG9Jrqy0_A47gsxZsNUQ,7421
|
|
12
|
+
sqlcg/cli/commands/install.py,sha256=mNVXdGlQ4JtCaaibuzU-inf519T97mC-Nj9K-G2gMQY,7525
|
|
13
|
+
sqlcg/cli/commands/mcp.py,sha256=H1j6b5Tqr5VXja2GafgD5sJD6hZ5rsgfPwIikK1PZqc,1903
|
|
14
|
+
sqlcg/cli/commands/reindex.py,sha256=iZXxYGI2m2wxkvIA1mB9uvOEp66QaT5zF5TGd0OpqlU,6275
|
|
15
|
+
sqlcg/cli/commands/report.py,sha256=JU0qjyMxwOukE7bN3XvvIzOI7zMg_Gsnvk_8F6pKNpA,4915
|
|
16
|
+
sqlcg/cli/commands/uninstall.py,sha256=IYwQaqnMmmzW0Nlls40wD-L3tVkMgKIMRXUkcXPMUc4,9398
|
|
17
|
+
sqlcg/cli/commands/watch.py,sha256=7N6c-QuvxAEGHzDZ0C3CU2BkHSraZW9YtgoFnz7SaQo,2373
|
|
18
|
+
sqlcg/core/__init__.py,sha256=uNsJCrCMVWVT80sHPtI_f39BYqIf5N0i6LSq8x8HsyI,283
|
|
19
|
+
sqlcg/core/config.py,sha256=em9gYtau2hu-scWzZk4CSZh4L8r9ZymgmH_2BspqsQw,9773
|
|
20
|
+
sqlcg/core/graph_db.py,sha256=gFiHjfVeRHp2FS3yRThDgCWFkugOQD065IvEqN6apg4,7881
|
|
21
|
+
sqlcg/core/jobs.py,sha256=Je-fCdSKRgiSsv1W8SgNAlp36a7t7-pJZ-qKPbka9OE,3298
|
|
22
|
+
sqlcg/core/kuzu_backend.py,sha256=ziHt-AB9sEZY7qB8whseWFicbTfOZaNOxcNVKhjii5Y,16587
|
|
23
|
+
sqlcg/core/neo4j_backend.py,sha256=AM1TncP9GBGph-rSHwalZPmGUV2kFILzaJP-PSB0UYw,8437
|
|
24
|
+
sqlcg/core/queries.cypher,sha256=auWIPJeVjgykk6wqTRMoNQCwRhzG2ZhF4MRufso2KYA,4182
|
|
25
|
+
sqlcg/core/queries.py,sha256=XBdQTBSsX3WUqO3AdX5EWYH435GDrbwEg1BR9AvJSSo,1880
|
|
26
|
+
sqlcg/core/schema.cypher,sha256=UWYsPMRgkn6HOlPZ3rl6BfY5hzKQKP5RGPaZg4NTZFY,2515
|
|
27
|
+
sqlcg/core/schema.py,sha256=9jBgJwuvfjLq2xC5B0NUyZZYxhqTb0LO0YzxcPM-gVM,1301
|
|
28
|
+
sqlcg/indexer/__init__.py,sha256=Wh20Unz2OHs1oIyWLrpurPAasF0BET2g4iXtNk7mh2U,56
|
|
29
|
+
sqlcg/indexer/dbt_adapter.py,sha256=EB5x1WU5Z9d-I97ADDj88S_hG1C4z4nbrv8JUCzXfy8,686
|
|
30
|
+
sqlcg/indexer/error_classify.py,sha256=eWmc9WdOFe9kY_DMgKL0vv9gfcKnFw8e8U7cpUUw9wU,5139
|
|
31
|
+
sqlcg/indexer/git_delta.py,sha256=V7WiNgiYPRo97K_mB3ymkJDZGoFExqwTZ2ut0Nqua5o,4383
|
|
32
|
+
sqlcg/indexer/indexer.py,sha256=Jes0SybIDXLWQlWbRrDAbxVfJ7OsdS3PDAVSoRcv3Tc,50605
|
|
33
|
+
sqlcg/indexer/pool.py,sha256=Q9DQmgUsSeKL1S-gNAzMbCNPGI9WsG6Nmt_noh_O8M8,16069
|
|
34
|
+
sqlcg/indexer/walker.py,sha256=C__JuDcTzKxFqVjGFRr5cj9hgxvf8zffTz-0HMn1qTY,1746
|
|
35
|
+
sqlcg/indexer/watcher.py,sha256=mJQq1LASRLKKwhz0WhCUWPLLqyPR2_-FD_8efYU6gE8,8442
|
|
36
|
+
sqlcg/lineage/__init__.py,sha256=Da1DlYwtK13WHv_RnHjAtNkHTOuFbhxqCjT1Le7DsWM,46
|
|
37
|
+
sqlcg/lineage/aggregator.py,sha256=G1xsTjf981EVSgN1yIHcC_ecDvcTcSPvEp6Kb2HPXkY,4943
|
|
38
|
+
sqlcg/lineage/schema_resolver.py,sha256=iXt6LYF6UVWsGUpcfbmjmGn9wCgXl721lTGf_8AaWcc,7320
|
|
39
|
+
sqlcg/metrics/__init__.py,sha256=hLJ6wm4St8qqYwKh3o9QG7lcEt1BEYM31ccqO9tGpIg,133
|
|
40
|
+
sqlcg/metrics/store.py,sha256=BaMf7QYTmYMlX_Jzi1GNU8R2sMVkWdn07f-ZSndtcNk,8879
|
|
41
|
+
sqlcg/parsers/__init__.py,sha256=AamA8wBbDZV9_zEtZCI4Hyen5UAVKHmBwjTghTt2PZE,785
|
|
42
|
+
sqlcg/parsers/ansi_parser.py,sha256=KruZn5CYjpktKmMRVWackshRI_AR6ehc-ReCsDeWNkQ,14321
|
|
43
|
+
sqlcg/parsers/base.py,sha256=aw-gueAMdt551peUY0g7lWbswQLPWx0FDCK4RDfUjDE,43205
|
|
44
|
+
sqlcg/parsers/bigquery_parser.py,sha256=mOnWTfXB_Dp4JwFE1PVYOB6CDPf5nYE0Dea8kJCl9uQ,2827
|
|
45
|
+
sqlcg/parsers/postgres_parser.py,sha256=lYfUpQY6j4Qm7ndXBtXbgPoGzYqYddWt5YeFnWKdA6I,946
|
|
46
|
+
sqlcg/parsers/registry.py,sha256=LXy1F6rqQI6VdxpRvZg_tNpoEucW3mXZHYBMlMONbX4,1496
|
|
47
|
+
sqlcg/parsers/snowflake_parser.py,sha256=Xc80vlhKiJqbt4cT7UcpYKcYzV9rSqFyG0d_oTc-eJE,12627
|
|
48
|
+
sqlcg/parsers/tsql_parser.py,sha256=RRj1pACtAk2tLTDaFWRYF67a0IDvaf5A1YQXWIz0bpQ,956
|
|
49
|
+
sqlcg/server/__init__.py,sha256=n4wuNE7xyJIJxJZBtmtdccCMQfvTdF-IqIaZVbC4FC4,35
|
|
50
|
+
sqlcg/server/exceptions.py,sha256=EONw34icOByCTpppSQrvQBW6asc4hfqaGDCAFjv96II,469
|
|
51
|
+
sqlcg/server/models.py,sha256=dv4SM_o-aY8kUFIbCtj0l8ceMsfyvQtXCWPm4Ek_-14,16432
|
|
52
|
+
sqlcg/server/noise_filter.py,sha256=idSBGgdKWWccJdpOo9qgbM2350Oew-2l5W6Yc9GYQqY,6337
|
|
53
|
+
sqlcg/server/server.py,sha256=2EwKGehcIdKqCjZagbv8VrvnVCp-D5Lh-z38FFHRcN8,1723
|
|
54
|
+
sqlcg/server/skill.py,sha256=siAtrRdFHQnASe9nl33MvkTXXt9EgCB8id5i9AUq4XU,10718
|
|
55
|
+
sqlcg/server/tools.py,sha256=Jh16fefXMmw0mYUejoIMAXlJoPAaQoUbgrCghsmHNLk,54892
|
|
56
|
+
sqlcg/utils/__init__.py,sha256=--iqt5ThTXmT8Wz7da8hs3n0zDfYPl8P-z5OgRJ_77E,154
|
|
57
|
+
sqlcg/utils/hashing.py,sha256=H25-sYfxHKb3_IERFnHyAIYNiXN470Oqo5sJT_D3YOA,438
|
|
58
|
+
sqlcg/utils/ignore.py,sha256=NfInsHPGubfKFJQraH-wE7ATPb5Be_Igu5mIh7p21cU,973
|
|
59
|
+
sqlcg/utils/logging.py,sha256=u0fCmYsLj9o81vawm3xZTHaw68GQYVm7JxG-gP81u8A,840
|
|
60
|
+
sql_code_graph-1.0.0.dist-info/METADATA,sha256=HQdFHBzEKTlPlqnwRCT9n0iKrmWqkmM5mhM3fOi5lvo,12806
|
|
61
|
+
sql_code_graph-1.0.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
62
|
+
sql_code_graph-1.0.0.dist-info/entry_points.txt,sha256=Wfe49sVzV9p4eVFGo5RxcV-frr3HOP0yzzst8JBxQLQ,46
|
|
63
|
+
sql_code_graph-1.0.0.dist-info/RECORD,,
|
sqlcg/__init__.py
CHANGED
sqlcg/cli/commands/analyze.py
CHANGED
|
@@ -68,6 +68,30 @@ def impact( # noqa: B008
|
|
|
68
68
|
_print_table(results, ["id", "kind"])
|
|
69
69
|
|
|
70
70
|
|
|
71
|
+
@app.command("failures")
|
|
72
|
+
def failures(
|
|
73
|
+
cause: str | None = typer.Option( # noqa: B008
|
|
74
|
+
None, "--cause", help="Filter by E-code bucket (e.g. E5, timeout)"
|
|
75
|
+
),
|
|
76
|
+
limit: int = typer.Option(100, "--limit", help="Maximum rows to return"), # noqa: B008
|
|
77
|
+
) -> None:
|
|
78
|
+
"""List files that failed to parse, with their dominant cause (E-code bucket).
|
|
79
|
+
|
|
80
|
+
Requires a graph indexed with sqlcg >= v3 (schema version 3). Re-index
|
|
81
|
+
with 'sqlcg db reset && sqlcg index <path>' if the graph was built with
|
|
82
|
+
an earlier version.
|
|
83
|
+
"""
|
|
84
|
+
with get_backend() as backend:
|
|
85
|
+
cypher = (
|
|
86
|
+
f"MATCH (f:{NodeLabel.FILE}) WHERE f.parse_failed = true "
|
|
87
|
+
"AND ($cause IS NULL OR f.parse_cause = $cause) "
|
|
88
|
+
"RETURN f.path AS path, f.parse_cause AS cause "
|
|
89
|
+
f"ORDER BY f.parse_cause LIMIT {limit}"
|
|
90
|
+
)
|
|
91
|
+
rows = backend.run_read(cypher, {"cause": cause})
|
|
92
|
+
_print_table(rows, ["path", "cause"])
|
|
93
|
+
|
|
94
|
+
|
|
71
95
|
@app.command("unused")
|
|
72
96
|
def unused(
|
|
73
97
|
threshold: int = typer.Option(0, "--threshold", help="Minimum reference count threshold"),
|
sqlcg/cli/commands/db.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
"""Database management commands."""
|
|
2
2
|
|
|
3
|
+
import os
|
|
3
4
|
import shutil
|
|
4
5
|
|
|
5
6
|
import typer
|
|
@@ -16,8 +17,18 @@ console = Console()
|
|
|
16
17
|
|
|
17
18
|
|
|
18
19
|
@app.command("init")
|
|
19
|
-
def db_init(
|
|
20
|
+
def db_init(
|
|
21
|
+
buffer_pool_size: int = typer.Option(
|
|
22
|
+
0,
|
|
23
|
+
"--buffer-pool-size",
|
|
24
|
+
help="KuzuDB buffer pool size in MB (0 = default). "
|
|
25
|
+
"Set to 256-512 on memory-constrained machines.",
|
|
26
|
+
),
|
|
27
|
+
) -> None:
|
|
20
28
|
"""Initialise the graph database (idempotent)."""
|
|
29
|
+
if buffer_pool_size > 0:
|
|
30
|
+
os.environ["SQLCG_BUFFER_POOL_MB"] = str(buffer_pool_size)
|
|
31
|
+
|
|
21
32
|
db_path = get_db_path()
|
|
22
33
|
db_path.parent.mkdir(parents=True, exist_ok=True)
|
|
23
34
|
with get_backend() as backend:
|
|
@@ -40,11 +51,23 @@ def db_reset( # noqa: B008
|
|
|
40
51
|
)
|
|
41
52
|
console.print(f"[yellow]Reset repo[/yellow] {repo}")
|
|
42
53
|
else:
|
|
43
|
-
# Full reset — delete the DB
|
|
54
|
+
# Full reset — delete the DB. Kuzu may store it as a single file (current,
|
|
55
|
+
# e.g. 0.11.x) or a directory (older versions); also drop the .wal sidecar.
|
|
56
|
+
# shutil.rmtree silently no-ops on a regular file (NotADirectoryError +
|
|
57
|
+
# ignore_errors), so dispatch on the actual filesystem type.
|
|
44
58
|
db_path = get_db_path()
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
59
|
+
removed = False
|
|
60
|
+
for target in (db_path, db_path.with_name(db_path.name + ".wal")):
|
|
61
|
+
if target.is_dir():
|
|
62
|
+
shutil.rmtree(str(target), ignore_errors=True)
|
|
63
|
+
removed = True
|
|
64
|
+
elif target.exists():
|
|
65
|
+
target.unlink()
|
|
66
|
+
removed = True
|
|
67
|
+
if removed:
|
|
68
|
+
console.print("[red]Database wiped[/red]")
|
|
69
|
+
else:
|
|
70
|
+
console.print("[yellow]Nothing to wipe — database does not exist[/yellow]")
|
|
48
71
|
|
|
49
72
|
|
|
50
73
|
@app.command("info")
|
|
@@ -102,10 +125,20 @@ def db_info() -> None:
|
|
|
102
125
|
edges_count = edges_result[0]["count"] if edges_result else 0
|
|
103
126
|
console.print(f" COLUMN_LINEAGE edges: {edges_count}")
|
|
104
127
|
|
|
128
|
+
# Print star resolution metrics (T-07)
|
|
129
|
+
from sqlcg.core.queries import COUNT_STAR_EXPANSIONS_QUERY, COUNT_STAR_SOURCES_QUERY
|
|
130
|
+
|
|
131
|
+
star_source_result = backend.run_read(COUNT_STAR_SOURCES_QUERY, {})
|
|
132
|
+
star_source_count = star_source_result[0]["n"] if star_source_result else 0
|
|
133
|
+
console.print(f" STAR_SOURCE edges: {star_source_count}")
|
|
134
|
+
|
|
135
|
+
star_expansion_result = backend.run_read(COUNT_STAR_EXPANSIONS_QUERY, {})
|
|
136
|
+
star_expansion_count = star_expansion_result[0]["n"] if star_expansion_result else 0
|
|
137
|
+
console.print(f" STAR_EXPANSION lineage edges: {star_expansion_count}")
|
|
138
|
+
|
|
105
139
|
# Print parsing mode distribution
|
|
106
140
|
mode_query = (
|
|
107
|
-
"MATCH (q:SqlQuery) RETURN q.parsing_mode AS mode, COUNT(q) AS cnt "
|
|
108
|
-
"ORDER BY cnt DESC"
|
|
141
|
+
"MATCH (q:SqlQuery) RETURN q.parsing_mode AS mode, COUNT(q) AS cnt ORDER BY cnt DESC"
|
|
109
142
|
)
|
|
110
143
|
mode_rows = backend.run_read(mode_query, {})
|
|
111
144
|
if mode_rows and "mode" in mode_rows[0]:
|
sqlcg/cli/commands/gain.py
CHANGED
|
@@ -113,19 +113,12 @@ def gain_cmd(
|
|
|
113
113
|
)
|
|
114
114
|
|
|
115
115
|
# Section E: execute_cypher ratio
|
|
116
|
-
cypher_query = (
|
|
117
|
-
"SELECT COUNT(*) as count FROM tool_calls "
|
|
118
|
-
"WHERE tool_name = 'execute_cypher'"
|
|
119
|
-
)
|
|
116
|
+
cypher_query = "SELECT COUNT(*) as count FROM tool_calls WHERE tool_name = 'execute_cypher'"
|
|
120
117
|
execute_cypher_count_result = metrics.execute_query(cypher_query)
|
|
121
118
|
execute_cypher_count = (
|
|
122
|
-
execute_cypher_count_result[0][0]
|
|
123
|
-
if execute_cypher_count_result
|
|
124
|
-
else 0
|
|
125
|
-
)
|
|
126
|
-
execute_cypher_ratio = (
|
|
127
|
-
execute_cypher_count / total_calls if total_calls > 0 else 0
|
|
119
|
+
execute_cypher_count_result[0][0] if execute_cypher_count_result else 0
|
|
128
120
|
)
|
|
121
|
+
execute_cypher_ratio = execute_cypher_count / total_calls if total_calls > 0 else 0
|
|
129
122
|
|
|
130
123
|
# Section F: parse quality from graph
|
|
131
124
|
parse_quality: dict[str, int] | None = None
|
|
@@ -137,9 +130,7 @@ def gain_cmd(
|
|
|
137
130
|
{},
|
|
138
131
|
)
|
|
139
132
|
if mode_rows and "mode" in mode_rows[0]:
|
|
140
|
-
parse_quality = {
|
|
141
|
-
str(r["mode"]): int(r["cnt"]) for r in mode_rows
|
|
142
|
-
}
|
|
133
|
+
parse_quality = {str(r["mode"]): int(r["cnt"]) for r in mode_rows}
|
|
143
134
|
except Exception:
|
|
144
135
|
pass # graph not available — skip quality section
|
|
145
136
|
|
|
@@ -202,10 +193,7 @@ def gain_cmd(
|
|
|
202
193
|
console.print("[bold cyan]E. Raw Cypher Usage[/bold cyan]")
|
|
203
194
|
ratio_pct = execute_cypher_ratio * 100
|
|
204
195
|
if execute_cypher_ratio > 0.3:
|
|
205
|
-
msg = (
|
|
206
|
-
f" [yellow]execute_cypher: {ratio_pct:.1f}% "
|
|
207
|
-
"(high raw-Cypher usage)[/yellow]"
|
|
208
|
-
)
|
|
196
|
+
msg = f" [yellow]execute_cypher: {ratio_pct:.1f}% (high raw-Cypher usage)[/yellow]"
|
|
209
197
|
console.print(msg)
|
|
210
198
|
else:
|
|
211
199
|
console.print(f" execute_cypher: {ratio_pct:.1f}%")
|
sqlcg/cli/commands/git.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
"""Git integration commands for sqlcg."""
|
|
2
2
|
|
|
3
3
|
from pathlib import Path
|
|
4
|
+
from typing import NamedTuple
|
|
4
5
|
|
|
5
6
|
import typer
|
|
6
7
|
from rich.console import Console
|
|
@@ -10,6 +11,71 @@ console = Console()
|
|
|
10
11
|
app = typer.Typer(name="git", help="Git integration commands")
|
|
11
12
|
|
|
12
13
|
|
|
14
|
+
class _HookSpec(NamedTuple):
|
|
15
|
+
filename: str
|
|
16
|
+
sentinel: str
|
|
17
|
+
script: str
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
_HOOKS: list[_HookSpec] = [
|
|
21
|
+
_HookSpec(
|
|
22
|
+
filename="post-checkout",
|
|
23
|
+
sentinel="# sqlcg post-checkout hook",
|
|
24
|
+
script=(
|
|
25
|
+
"#!/bin/sh\n"
|
|
26
|
+
"# sqlcg post-checkout hook — incremental resync after branch switch\n"
|
|
27
|
+
"# $3 == 1 means branch checkout (not file checkout); skip file checkouts\n"
|
|
28
|
+
'[ "$3" = "1" ] || exit 0\n'
|
|
29
|
+
'sqlcg reindex --from "$1" --to "$2"'
|
|
30
|
+
' "$(git rev-parse --show-toplevel)" --dialect auto --quiet || true\n'
|
|
31
|
+
),
|
|
32
|
+
),
|
|
33
|
+
_HookSpec(
|
|
34
|
+
filename="post-merge",
|
|
35
|
+
sentinel="# sqlcg post-merge hook",
|
|
36
|
+
script="""\
|
|
37
|
+
#!/bin/sh
|
|
38
|
+
# sqlcg post-merge hook — incremental resync after pull/merge
|
|
39
|
+
# post-merge receives only $1 (squash flag), no old/new SHA; use stored-SHA delta
|
|
40
|
+
sqlcg reindex "$(git rev-parse --show-toplevel)" --dialect auto --quiet || true
|
|
41
|
+
""",
|
|
42
|
+
),
|
|
43
|
+
]
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _install_single_hook(hooks_dir: Path, spec: _HookSpec) -> None:
|
|
47
|
+
"""Install one git hook idempotently.
|
|
48
|
+
|
|
49
|
+
If the hook file already contains the sentinel, it is already installed — skip silently.
|
|
50
|
+
If the hook file exists without the sentinel, warn and print the script for manual append.
|
|
51
|
+
Otherwise, write the hook file and set 0o755.
|
|
52
|
+
"""
|
|
53
|
+
hook_path = hooks_dir / spec.filename
|
|
54
|
+
|
|
55
|
+
if hook_path.exists():
|
|
56
|
+
existing_content = hook_path.read_text()
|
|
57
|
+
if spec.sentinel in existing_content:
|
|
58
|
+
# Already installed — idempotent, skip silently
|
|
59
|
+
return
|
|
60
|
+
else:
|
|
61
|
+
# Foreign hook without sqlcg sentinel
|
|
62
|
+
console.print(
|
|
63
|
+
f"[yellow]Warning: existing {spec.filename} hook found that was not created "
|
|
64
|
+
"by sqlcg.[/yellow]"
|
|
65
|
+
)
|
|
66
|
+
console.print(
|
|
67
|
+
f"[yellow]To integrate sqlcg, manually append the following to "
|
|
68
|
+
f".git/hooks/{spec.filename}:[/yellow]"
|
|
69
|
+
)
|
|
70
|
+
console.print("")
|
|
71
|
+
console.print("[cyan]" + spec.script.rstrip() + "[/cyan]")
|
|
72
|
+
return
|
|
73
|
+
|
|
74
|
+
hook_path.write_text(spec.script)
|
|
75
|
+
hook_path.chmod(0o755)
|
|
76
|
+
console.print(f"[green]Installed git hook:[/green] .git/hooks/{spec.filename}")
|
|
77
|
+
|
|
78
|
+
|
|
13
79
|
@app.command("install-hooks")
|
|
14
80
|
def install_hooks(
|
|
15
81
|
repo: Path | None = typer.Option( # noqa: B008
|
|
@@ -18,8 +84,9 @@ def install_hooks(
|
|
|
18
84
|
) -> None:
|
|
19
85
|
"""Install git hooks for sqlcg integration.
|
|
20
86
|
|
|
21
|
-
Writes a post-checkout hook that triggers
|
|
22
|
-
|
|
87
|
+
Writes a post-checkout hook that triggers incremental resync after branch switches
|
|
88
|
+
and a post-merge hook that triggers resync after pulls/merges.
|
|
89
|
+
Idempotent: running multiple times produces one hook entry per hook.
|
|
23
90
|
"""
|
|
24
91
|
if repo is None:
|
|
25
92
|
repo = Path.cwd()
|
|
@@ -33,41 +100,5 @@ def install_hooks(
|
|
|
33
100
|
|
|
34
101
|
hooks_dir.mkdir(parents=True, exist_ok=True)
|
|
35
102
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
# Hook script content
|
|
40
|
-
hook_script = """#!/bin/sh
|
|
41
|
-
# sqlcg post-checkout hook — resync graph after branch switch
|
|
42
|
-
# $3 == 1 means branch checkout (not file checkout); skip file checkouts
|
|
43
|
-
[ "$3" = "1" ] || exit 0
|
|
44
|
-
sqlcg index "$(git rev-parse --show-toplevel)" --dialect auto --quiet || true
|
|
45
|
-
"""
|
|
46
|
-
|
|
47
|
-
# Check if hook already exists
|
|
48
|
-
if hook_path.exists():
|
|
49
|
-
existing_content = hook_path.read_text()
|
|
50
|
-
if hook_sentinel in existing_content:
|
|
51
|
-
# Already installed, idempotent: skip silently
|
|
52
|
-
return
|
|
53
|
-
else:
|
|
54
|
-
# Existing hook without sqlcg sentinel
|
|
55
|
-
console.print(
|
|
56
|
-
"[yellow]Warning: existing post-checkout hook found that was not created "
|
|
57
|
-
"by sqlcg.[/yellow]"
|
|
58
|
-
)
|
|
59
|
-
console.print(
|
|
60
|
-
"[yellow]To integrate sqlcg, manually append the following to "
|
|
61
|
-
".git/hooks/post-checkout:[/yellow]"
|
|
62
|
-
)
|
|
63
|
-
console.print("")
|
|
64
|
-
console.print("[cyan]" + hook_script.rstrip() + "[/cyan]")
|
|
65
|
-
return
|
|
66
|
-
|
|
67
|
-
# Write hook script
|
|
68
|
-
hook_path.write_text(hook_script)
|
|
69
|
-
|
|
70
|
-
# Make it executable
|
|
71
|
-
hook_path.chmod(0o755)
|
|
72
|
-
|
|
73
|
-
console.print("[green]Installed git hook:[/green] .git/hooks/post-checkout")
|
|
103
|
+
for spec in _HOOKS:
|
|
104
|
+
_install_single_hook(hooks_dir, spec)
|