sql-code-graph 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sql_code_graph-1.3.0.dist-info → sql_code_graph-1.4.0.dist-info}/METADATA +2 -4
- {sql_code_graph-1.3.0.dist-info → sql_code_graph-1.4.0.dist-info}/RECORD +29 -29
- sqlcg/__init__.py +1 -1
- sqlcg/cli/commands/analyze.py +138 -127
- sqlcg/cli/commands/db.py +31 -56
- sqlcg/cli/commands/find.py +5 -9
- sqlcg/cli/commands/gain.py +14 -16
- sqlcg/cli/commands/index.py +6 -17
- sqlcg/cli/commands/reindex.py +1 -1
- sqlcg/cli/commands/uninstall.py +9 -20
- sqlcg/core/__init__.py +1 -3
- sqlcg/core/config.py +25 -81
- sqlcg/core/duckdb_backend.py +764 -0
- sqlcg/core/freshness.py +1 -1
- sqlcg/core/graph_db.py +20 -4
- sqlcg/core/queries.py +26 -7
- sqlcg/core/queries.sql +249 -0
- sqlcg/core/schema.py +1 -1
- sqlcg/indexer/indexer.py +27 -36
- sqlcg/metrics/store.py +1 -1
- sqlcg/server/control.py +1 -1
- sqlcg/server/noise_filter.py +1 -1
- sqlcg/server/read_client.py +2 -2
- sqlcg/server/server.py +26 -23
- sqlcg/server/skill.py +2 -2
- sqlcg/server/tools.py +43 -106
- sqlcg/server/writer.py +43 -218
- sqlcg/core/kuzu_backend.py +0 -449
- sqlcg/core/neo4j_backend.py +0 -233
- {sql_code_graph-1.3.0.dist-info → sql_code_graph-1.4.0.dist-info}/WHEEL +0 -0
- {sql_code_graph-1.3.0.dist-info → sql_code_graph-1.4.0.dist-info}/entry_points.txt +0 -0
sqlcg/cli/commands/db.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
"""Database management commands."""
|
|
2
2
|
|
|
3
|
-
import os
|
|
4
3
|
import shutil
|
|
5
4
|
from pathlib import Path
|
|
6
5
|
|
|
@@ -20,18 +19,8 @@ console = Console()
|
|
|
20
19
|
|
|
21
20
|
|
|
22
21
|
@app.command("init")
|
|
23
|
-
def db_init(
|
|
24
|
-
buffer_pool_size: int = typer.Option(
|
|
25
|
-
0,
|
|
26
|
-
"--buffer-pool-size",
|
|
27
|
-
help="KuzuDB buffer pool size in MB (0 = default). "
|
|
28
|
-
"Set to 256-512 on memory-constrained machines.",
|
|
29
|
-
),
|
|
30
|
-
) -> None:
|
|
22
|
+
def db_init() -> None:
|
|
31
23
|
"""Initialise the graph database (idempotent)."""
|
|
32
|
-
if buffer_pool_size > 0:
|
|
33
|
-
os.environ["SQLCG_BUFFER_POOL_MB"] = str(buffer_pool_size)
|
|
34
|
-
|
|
35
24
|
db_path = get_db_path()
|
|
36
25
|
db_path.parent.mkdir(parents=True, exist_ok=True)
|
|
37
26
|
with get_backend() as backend:
|
|
@@ -49,38 +38,36 @@ def db_reset( # noqa: B008
|
|
|
49
38
|
|
|
50
39
|
from sqlcg.server.control import sock_path
|
|
51
40
|
|
|
52
|
-
#
|
|
53
|
-
# full reset and the --repo partial reset open the RW backend directly and
|
|
54
|
-
# would fight the server's lock. Guard runs BEFORE either destructive branch.
|
|
41
|
+
# Refuse cleanly when a server is live.
|
|
55
42
|
sp = sock_path()
|
|
56
43
|
if sp.exists():
|
|
57
44
|
try:
|
|
58
45
|
with _socket.socket(_socket.AF_UNIX, _socket.SOCK_STREAM) as s:
|
|
59
46
|
s.settimeout(1)
|
|
60
47
|
s.connect(str(sp))
|
|
61
|
-
# Connection succeeded — a server is live.
|
|
62
48
|
console.print(
|
|
63
49
|
"[red]A server is running on this database; stop it first "
|
|
64
50
|
"('sqlcg mcp stop') before resetting the database.[/red]"
|
|
65
51
|
)
|
|
66
52
|
raise typer.Exit(1)
|
|
67
53
|
except (FileNotFoundError, ConnectionRefusedError, OSError):
|
|
68
|
-
# No live server — fall through to destructive action.
|
|
69
54
|
pass
|
|
70
55
|
|
|
71
56
|
if repo:
|
|
72
|
-
# Delete all nodes for this repo (
|
|
57
|
+
# Delete all nodes for this repo: delete File nodes (cascades to all
|
|
58
|
+
# related nodes via delete_nodes_for_file) and the Repo node itself.
|
|
73
59
|
with get_backend() as backend:
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
60
|
+
# Get all files for this repo
|
|
61
|
+
file_rows = backend.run_read(
|
|
62
|
+
'SELECT path FROM "File" WHERE repo_path = ?',
|
|
63
|
+
{"repo_path": repo},
|
|
77
64
|
)
|
|
65
|
+
for fr in file_rows:
|
|
66
|
+
backend.delete_nodes_for_file(fr["path"])
|
|
67
|
+
backend.run_write('DELETE FROM "Repo" WHERE path = ?', {"p": repo})
|
|
78
68
|
console.print(f"[yellow]Reset repo[/yellow] {repo}")
|
|
79
69
|
else:
|
|
80
|
-
# Full reset — delete the
|
|
81
|
-
# e.g. 0.11.x) or a directory (older versions); also drop the .wal sidecar.
|
|
82
|
-
# shutil.rmtree silently no-ops on a regular file (NotADirectoryError +
|
|
83
|
-
# ignore_errors), so dispatch on the actual filesystem type.
|
|
70
|
+
# Full reset — delete the DuckDB file (single file, not a directory).
|
|
84
71
|
db_path = get_db_path()
|
|
85
72
|
removed = False
|
|
86
73
|
for target in (db_path, db_path.with_name(db_path.name + ".wal")):
|
|
@@ -99,56 +86,46 @@ def db_reset( # noqa: B008
|
|
|
99
86
|
@app.command("info")
|
|
100
87
|
def db_info() -> None:
|
|
101
88
|
"""Show database stats."""
|
|
102
|
-
# db info
|
|
103
|
-
#
|
|
104
|
-
# holds the write lock. get_schema_version / get_indexed_sha are inlined as
|
|
105
|
-
# run_read_routed calls using their known Cypher so they too route through the
|
|
106
|
-
# socket when a server is live; this avoids a direct-open that would hit the lock.
|
|
89
|
+
# db info routes through the live server (run_read_routed) to avoid holding
|
|
90
|
+
# the DuckDB file lock when the MCP server is running.
|
|
107
91
|
|
|
108
92
|
# Schema version
|
|
109
|
-
schema_rows = run_read_routed(
|
|
93
|
+
schema_rows = run_read_routed('SELECT version FROM "SchemaVersion" LIMIT 1', {})
|
|
110
94
|
version = (schema_rows[0]["version"] if schema_rows else None) or "unknown"
|
|
111
95
|
console.print(f"Schema version: {version}")
|
|
112
96
|
|
|
113
|
-
# Freshness block
|
|
97
|
+
# Freshness block
|
|
114
98
|
try:
|
|
115
|
-
sha_rows = run_read_routed(
|
|
116
|
-
"MATCH (v:SchemaVersion) RETURN v.indexed_sha AS sha LIMIT 1", {}
|
|
117
|
-
)
|
|
99
|
+
sha_rows = run_read_routed('SELECT indexed_sha AS sha FROM "SchemaVersion" LIMIT 1', {})
|
|
118
100
|
indexed_sha = sha_rows[0]["sha"] if sha_rows else None
|
|
119
|
-
repo_rows = run_read_routed(
|
|
101
|
+
repo_rows = run_read_routed('SELECT path FROM "Repo" LIMIT 1', {})
|
|
120
102
|
if repo_rows and indexed_sha is not None and repo_rows[0].get("path"):
|
|
121
103
|
repo_root = Path(repo_rows[0]["path"])
|
|
122
104
|
f = compute_freshness(repo_root, indexed_sha)
|
|
123
105
|
console.print(render_freshness_line(f))
|
|
124
|
-
except NotImplementedError:
|
|
125
|
-
# Neo4j backend raises NotImplementedError for get_indexed_sha — skip silently
|
|
126
|
-
pass
|
|
127
106
|
except Exception as e:
|
|
128
|
-
# Any unexpected error in the freshness block must not crash db info
|
|
129
107
|
logger.debug(f"Freshness check skipped: {e}")
|
|
130
108
|
|
|
131
|
-
#
|
|
109
|
+
# Node counts
|
|
132
110
|
for label in NodeLabel:
|
|
133
111
|
try:
|
|
134
|
-
result = run_read_routed(f
|
|
112
|
+
result = run_read_routed(f'SELECT count(*) AS count FROM "{label}"', {})
|
|
135
113
|
count = result[0]["count"] if result else 0
|
|
136
114
|
console.print(f" {label}: {count}")
|
|
137
115
|
except Exception as e:
|
|
138
|
-
# Log unexpected exceptions instead of silently skipping
|
|
139
116
|
logger.error(f"Error getting count for {label}: {e}")
|
|
140
117
|
console.print(f" [red]{label}: error[/red]")
|
|
141
118
|
|
|
142
|
-
# Health check
|
|
143
|
-
repo_count_result = run_read_routed(
|
|
119
|
+
# Health check
|
|
120
|
+
repo_count_result = run_read_routed('SELECT count(*) AS count FROM "Repo"', {})
|
|
144
121
|
repo_count = repo_count_result[0]["count"] if repo_count_result else 0
|
|
145
122
|
|
|
146
123
|
if repo_count == 0:
|
|
147
|
-
console.print(
|
|
124
|
+
console.print(
|
|
148
125
|
"[red]Database is empty. Run 'sqlcg db init' and 'sqlcg index <path>' first.[/red]"
|
|
149
126
|
)
|
|
150
127
|
else:
|
|
151
|
-
query_count_result = run_read_routed(
|
|
128
|
+
query_count_result = run_read_routed('SELECT count(*) AS count FROM "SqlQuery"', {})
|
|
152
129
|
query_count = query_count_result[0]["count"] if query_count_result else 0
|
|
153
130
|
|
|
154
131
|
if query_count == 0:
|
|
@@ -157,7 +134,7 @@ def db_info() -> None:
|
|
|
157
134
|
"the graph.[/yellow]"
|
|
158
135
|
)
|
|
159
136
|
else:
|
|
160
|
-
col_count_result = run_read_routed(
|
|
137
|
+
col_count_result = run_read_routed('SELECT count(*) AS count FROM "SqlColumn"', {})
|
|
161
138
|
col_count = col_count_result[0]["count"] if col_count_result else 0
|
|
162
139
|
|
|
163
140
|
if col_count == 0:
|
|
@@ -167,12 +144,10 @@ def db_info() -> None:
|
|
|
167
144
|
"will return empty results.[/yellow]"
|
|
168
145
|
)
|
|
169
146
|
|
|
170
|
-
|
|
171
|
-
edges_result = run_read_routed("MATCH ()-[r:COLUMN_LINEAGE]->() RETURN COUNT(r) AS count", {})
|
|
147
|
+
edges_result = run_read_routed('SELECT count(*) AS count FROM "COLUMN_LINEAGE"', {})
|
|
172
148
|
edges_count = edges_result[0]["count"] if edges_result else 0
|
|
173
149
|
console.print(f" COLUMN_LINEAGE edges: {edges_count}")
|
|
174
150
|
|
|
175
|
-
# Print star resolution metrics (T-07)
|
|
176
151
|
from sqlcg.core.queries import COUNT_STAR_EXPANSIONS_QUERY, COUNT_STAR_SOURCES_QUERY
|
|
177
152
|
|
|
178
153
|
star_source_result = run_read_routed(COUNT_STAR_SOURCES_QUERY, {})
|
|
@@ -183,11 +158,11 @@ def db_info() -> None:
|
|
|
183
158
|
star_expansion_count = star_expansion_result[0]["n"] if star_expansion_result else 0
|
|
184
159
|
console.print(f" STAR_EXPANSION lineage edges: {star_expansion_count}")
|
|
185
160
|
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
"
|
|
161
|
+
mode_rows = run_read_routed(
|
|
162
|
+
'SELECT parsing_mode AS mode, count(*) AS cnt FROM "SqlQuery"'
|
|
163
|
+
" GROUP BY parsing_mode ORDER BY cnt DESC",
|
|
164
|
+
{},
|
|
189
165
|
)
|
|
190
|
-
mode_rows = run_read_routed(mode_query, {})
|
|
191
166
|
if mode_rows and "mode" in mode_rows[0]:
|
|
192
167
|
console.print("\n Parsing mode distribution:")
|
|
193
168
|
for row in mode_rows:
|
|
@@ -197,7 +172,7 @@ def db_info() -> None:
|
|
|
197
172
|
@app.command("list-repos")
|
|
198
173
|
def list_repos() -> None:
|
|
199
174
|
"""List all indexed repositories."""
|
|
200
|
-
result = run_read_routed(
|
|
175
|
+
result = run_read_routed('SELECT path, name FROM "Repo"', {})
|
|
201
176
|
|
|
202
177
|
if not result:
|
|
203
178
|
console.print("[yellow]No repositories indexed[/yellow]")
|
sqlcg/cli/commands/find.py
CHANGED
|
@@ -4,7 +4,6 @@ import typer
|
|
|
4
4
|
from rich.console import Console
|
|
5
5
|
from rich.table import Table
|
|
6
6
|
|
|
7
|
-
from sqlcg.core.schema import NodeLabel
|
|
8
7
|
from sqlcg.server.read_client import run_read_routed
|
|
9
8
|
|
|
10
9
|
app = typer.Typer(help="Search the graph")
|
|
@@ -19,14 +18,13 @@ def find_table( # noqa: B008
|
|
|
19
18
|
"""Find a table by name."""
|
|
20
19
|
name = name.lower() # graph keys are lowercased at index time (C2 normalization)
|
|
21
20
|
results = run_read_routed(
|
|
22
|
-
|
|
23
|
-
"RETURN t.qualified AS qualified, t.kind AS kind LIMIT 50",
|
|
21
|
+
"SELECT qualified, kind FROM \"SqlTable\" WHERE qualified LIKE '%' || ? || '%' LIMIT 50",
|
|
24
22
|
{"name": name},
|
|
25
23
|
)
|
|
26
24
|
if not raw:
|
|
27
25
|
from sqlcg.server.noise_filter import NoiseFilter
|
|
28
26
|
|
|
29
|
-
nf = NoiseFilter.from_config()
|
|
27
|
+
nf = NoiseFilter.from_config()
|
|
30
28
|
ids = [r["qualified"] for r in results]
|
|
31
29
|
kept, _ = nf.filter_nodes(ids)
|
|
32
30
|
kept_set = set(kept)
|
|
@@ -42,14 +40,13 @@ def find_column( # noqa: B008
|
|
|
42
40
|
"""Find a column by table.column reference."""
|
|
43
41
|
ref = ref.lower() # graph keys are lowercased at index time (C2 normalization)
|
|
44
42
|
results = run_read_routed(
|
|
45
|
-
|
|
43
|
+
"SELECT id FROM \"SqlColumn\" WHERE id LIKE '%' || ? || '%' LIMIT 50",
|
|
46
44
|
{"ref": ref},
|
|
47
45
|
)
|
|
48
46
|
if not raw:
|
|
49
47
|
from sqlcg.server.noise_filter import NoiseFilter
|
|
50
48
|
|
|
51
|
-
nf = NoiseFilter.from_config()
|
|
52
|
-
# Filter on the schema.table portion of each column id (schema.table.column)
|
|
49
|
+
nf = NoiseFilter.from_config()
|
|
53
50
|
results = [r for r in results if not nf.is_noise(r["id"].rsplit(".", 1)[0])]
|
|
54
51
|
_print_table(results, ["id"])
|
|
55
52
|
|
|
@@ -60,8 +57,7 @@ def find_pattern( # noqa: B008
|
|
|
60
57
|
) -> None:
|
|
61
58
|
"""Find queries containing a SQL pattern."""
|
|
62
59
|
results = run_read_routed(
|
|
63
|
-
|
|
64
|
-
"RETURN q.id AS id, q.kind AS kind LIMIT 50",
|
|
60
|
+
"SELECT id, kind FROM \"SqlQuery\" WHERE sql LIKE '%' || ? || '%' LIMIT 50",
|
|
65
61
|
{"pattern": pattern},
|
|
66
62
|
)
|
|
67
63
|
_print_table(results, ["id", "kind"])
|
sqlcg/cli/commands/gain.py
CHANGED
|
@@ -112,13 +112,11 @@ def gain_cmd(
|
|
|
112
112
|
"""
|
|
113
113
|
)
|
|
114
114
|
|
|
115
|
-
# Section E:
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
)
|
|
121
|
-
execute_cypher_ratio = execute_cypher_count / total_calls if total_calls > 0 else 0
|
|
115
|
+
# Section E: execute_sql ratio
|
|
116
|
+
sql_query = "SELECT COUNT(*) as count FROM tool_calls WHERE tool_name = 'execute_sql'"
|
|
117
|
+
execute_sql_count_result = metrics.execute_query(sql_query)
|
|
118
|
+
execute_sql_count = execute_sql_count_result[0][0] if execute_sql_count_result else 0
|
|
119
|
+
execute_sql_ratio = execute_sql_count / total_calls if total_calls > 0 else 0
|
|
122
120
|
|
|
123
121
|
# Section F: parse quality from graph.
|
|
124
122
|
# run_read_routed raises typer.Exit (Exception-derived, NOT SystemExit) on
|
|
@@ -127,8 +125,8 @@ def gain_cmd(
|
|
|
127
125
|
parse_quality: dict[str, int] | None = None
|
|
128
126
|
try:
|
|
129
127
|
mode_rows = run_read_routed(
|
|
130
|
-
|
|
131
|
-
"
|
|
128
|
+
'SELECT parsing_mode AS mode, count(*) AS cnt FROM "SqlQuery"'
|
|
129
|
+
" GROUP BY parsing_mode ORDER BY cnt DESC",
|
|
132
130
|
{},
|
|
133
131
|
)
|
|
134
132
|
if mode_rows and "mode" in mode_rows[0]:
|
|
@@ -144,7 +142,7 @@ def gain_cmd(
|
|
|
144
142
|
"feedback_tp": tp_count,
|
|
145
143
|
"feedback_total": fb_total,
|
|
146
144
|
"top_tools": [{"name": row[0], "count": row[1]} for row in top_tools],
|
|
147
|
-
"
|
|
145
|
+
"execute_sql_ratio": round(execute_sql_ratio, 2),
|
|
148
146
|
}
|
|
149
147
|
if parse_quality is not None:
|
|
150
148
|
payload["parse_quality"] = parse_quality
|
|
@@ -191,14 +189,14 @@ def gain_cmd(
|
|
|
191
189
|
console.print(f" {i}. {name}: {count}")
|
|
192
190
|
console.print()
|
|
193
191
|
|
|
194
|
-
# Section E:
|
|
195
|
-
console.print("[bold cyan]E. Raw
|
|
196
|
-
ratio_pct =
|
|
197
|
-
if
|
|
198
|
-
msg = f" [yellow]
|
|
192
|
+
# Section E: execute_sql ratio
|
|
193
|
+
console.print("[bold cyan]E. Raw SQL Usage[/bold cyan]")
|
|
194
|
+
ratio_pct = execute_sql_ratio * 100
|
|
195
|
+
if execute_sql_ratio > 0.3:
|
|
196
|
+
msg = f" [yellow]execute_sql: {ratio_pct:.1f}% (high raw-SQL usage)[/yellow]"
|
|
199
197
|
console.print(msg)
|
|
200
198
|
else:
|
|
201
|
-
console.print(f"
|
|
199
|
+
console.print(f" execute_sql: {ratio_pct:.1f}%")
|
|
202
200
|
console.print()
|
|
203
201
|
|
|
204
202
|
# Section F: parse quality from graph
|
sqlcg/cli/commands/index.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
"""Index command for scanning and indexing SQL files."""
|
|
2
2
|
|
|
3
3
|
import json
|
|
4
|
-
import os
|
|
5
4
|
import socket as _socket
|
|
6
5
|
from pathlib import Path
|
|
7
6
|
|
|
@@ -16,7 +15,7 @@ from rich.progress import (
|
|
|
16
15
|
TimeRemainingColumn,
|
|
17
16
|
)
|
|
18
17
|
|
|
19
|
-
from sqlcg.core.config import
|
|
18
|
+
from sqlcg.core.config import DbConfig, config_file_present, get_backend, get_db_path, get_dialect
|
|
20
19
|
from sqlcg.indexer.indexer import Indexer
|
|
21
20
|
|
|
22
21
|
console = Console()
|
|
@@ -37,17 +36,11 @@ def index_cmd( # noqa: B008
|
|
|
37
36
|
timeout_per_file: int = typer.Option( # noqa: B008
|
|
38
37
|
10, "--timeout-per-file", help="Timeout per file in seconds"
|
|
39
38
|
),
|
|
40
|
-
buffer_pool_size: int = typer.Option( # noqa: B008
|
|
41
|
-
0,
|
|
42
|
-
"--buffer-pool-size",
|
|
43
|
-
help="KuzuDB buffer pool size in MB (0 = default). "
|
|
44
|
-
"Set to 256-512 on memory-constrained machines.",
|
|
45
|
-
),
|
|
46
39
|
batch_size: int = typer.Option( # noqa: B008
|
|
47
40
|
50,
|
|
48
41
|
"--batch-size",
|
|
49
42
|
help=(
|
|
50
|
-
"Files per
|
|
43
|
+
"Files per DuckDB transaction in the upsert pass. "
|
|
51
44
|
"Default 50 balances commit-overhead reduction (vs. legacy per-file commits) "
|
|
52
45
|
"against per-batch memory cost. Lower values are safer for memory-constrained "
|
|
53
46
|
"machines; higher values give marginal speedup at the cost of larger working sets. "
|
|
@@ -148,16 +141,12 @@ def index_cmd( # noqa: B008
|
|
|
148
141
|
sqlcg_log.addHandler(_warn_handler)
|
|
149
142
|
_warn_log_path = None
|
|
150
143
|
else:
|
|
151
|
-
_warn_log_path =
|
|
144
|
+
_warn_log_path = DbConfig.from_env().log_path
|
|
152
145
|
_warn_log_path.parent.mkdir(parents=True, exist_ok=True)
|
|
153
146
|
_warn_handler = logging.FileHandler(_warn_log_path)
|
|
154
147
|
_warn_handler.setLevel(logging.WARNING)
|
|
155
148
|
sqlcg_log.addHandler(_warn_handler)
|
|
156
149
|
|
|
157
|
-
# Set buffer pool size via env var if specified
|
|
158
|
-
if buffer_pool_size > 0:
|
|
159
|
-
os.environ["SQLCG_BUFFER_POOL_MB"] = str(buffer_pool_size)
|
|
160
|
-
|
|
161
150
|
if not quiet and not config_file_present(path):
|
|
162
151
|
console.print(
|
|
163
152
|
f"[yellow]No .sqlcg.toml found at {path}/.sqlcg.toml — "
|
|
@@ -181,7 +170,7 @@ def index_cmd( # noqa: B008
|
|
|
181
170
|
)
|
|
182
171
|
except KeyboardInterrupt:
|
|
183
172
|
# The backend context manager (inside _run_index) has already closed the
|
|
184
|
-
#
|
|
173
|
+
# DuckDB connection and released the lock by the time we get here.
|
|
185
174
|
console.print("\n[yellow]Interrupted — no partial graph written. Re-run to index.[/yellow]")
|
|
186
175
|
raise typer.Exit(130) from None
|
|
187
176
|
finally:
|
|
@@ -400,10 +389,10 @@ def _run_index(
|
|
|
400
389
|
)
|
|
401
390
|
|
|
402
391
|
# Connect files to repo
|
|
392
|
+
from sqlcg.core.queries import INDEX_REPO_FILES_QUERY
|
|
403
393
|
from sqlcg.core.schema import RelType
|
|
404
394
|
|
|
405
|
-
|
|
406
|
-
file_rows = backend.run_read(files_query, {"repo_prefix": abs_path})
|
|
395
|
+
file_rows = backend.run_read(INDEX_REPO_FILES_QUERY, {"repo_prefix": abs_path})
|
|
407
396
|
for row in file_rows:
|
|
408
397
|
backend.upsert_edge(
|
|
409
398
|
NodeLabel.FILE,
|
sqlcg/cli/commands/reindex.py
CHANGED
|
@@ -21,7 +21,7 @@ console = Console()
|
|
|
21
21
|
# Client-side socket timeout for the --notify control-socket path.
|
|
22
22
|
# A real DWH server-side resync_changed measured ~89 s (41 changed files + closure);
|
|
23
23
|
# 300 s covers that with headroom while keeping the wait bounded on a wedged server.
|
|
24
|
-
# This is a CLI transport bound, NOT a
|
|
24
|
+
# This is a CLI transport bound, NOT a DbConfig/indexer constant.
|
|
25
25
|
_NOTIFY_SOCKET_TIMEOUT_S = 300
|
|
26
26
|
|
|
27
27
|
|
sqlcg/cli/commands/uninstall.py
CHANGED
|
@@ -26,7 +26,7 @@ def uninstall_cmd( # noqa: B008
|
|
|
26
26
|
"""Uninstall sqlcg from Claude Code and optionally clean up resources.
|
|
27
27
|
|
|
28
28
|
Step 1: Remove MCP registration from ~/.claude/settings.json
|
|
29
|
-
Step 2: Optionally delete the
|
|
29
|
+
Step 2: Optionally delete the DuckDB graph database
|
|
30
30
|
Step 3: Remove git hook sentinel block from .git/hooks/post-checkout
|
|
31
31
|
Step 4: Remove sqlcg skill directory from ~/.claude/skills/sqlcg/ and
|
|
32
32
|
<repo>/.claude/skills/sqlcg/
|
|
@@ -34,7 +34,7 @@ def uninstall_cmd( # noqa: B008
|
|
|
34
34
|
# Step 1: Remove MCP entry from settings.json
|
|
35
35
|
_step1_remove_mcp_entry()
|
|
36
36
|
|
|
37
|
-
# Step 2: Offer to delete the
|
|
37
|
+
# Step 2: Offer to delete the database (unless --keep-db flag is set)
|
|
38
38
|
if not keep_db:
|
|
39
39
|
_step2_delete_database(force)
|
|
40
40
|
else:
|
|
@@ -84,7 +84,7 @@ def _step1_remove_mcp_entry() -> None:
|
|
|
84
84
|
|
|
85
85
|
|
|
86
86
|
def _step2_delete_database(force: bool) -> None:
|
|
87
|
-
"""Offer to delete the
|
|
87
|
+
"""Offer to delete the DuckDB graph database."""
|
|
88
88
|
db_path = _get_db_path()
|
|
89
89
|
|
|
90
90
|
if not db_path:
|
|
@@ -93,13 +93,6 @@ def _step2_delete_database(force: bool) -> None:
|
|
|
93
93
|
|
|
94
94
|
db_path_obj = Path(db_path)
|
|
95
95
|
|
|
96
|
-
# Check if it's a kuzu backend (not Neo4j)
|
|
97
|
-
# If db_path is a directory or ends with standard kuzu patterns, it's likely kuzu
|
|
98
|
-
# For now, we'll assume anything in .sqlcg/kuzu is kuzu
|
|
99
|
-
if not _is_kuzu_backend(db_path):
|
|
100
|
-
console.print("[dim]Database is not KùzuDB — skipping deletion[/dim]")
|
|
101
|
-
return
|
|
102
|
-
|
|
103
96
|
if not db_path_obj.exists():
|
|
104
97
|
console.print(f"[dim]Database not found at {db_path}[/dim]")
|
|
105
98
|
return
|
|
@@ -117,9 +110,11 @@ def _step2_delete_database(force: bool) -> None:
|
|
|
117
110
|
console.print("[dim]Keeping database[/dim]")
|
|
118
111
|
return
|
|
119
112
|
|
|
120
|
-
#
|
|
113
|
+
# DuckDB is a single file (+ optional .wal sibling); delete both.
|
|
121
114
|
try:
|
|
122
|
-
|
|
115
|
+
for target in (db_path_obj, db_path_obj.with_name(db_path_obj.name + ".wal")):
|
|
116
|
+
if target.exists():
|
|
117
|
+
target.unlink()
|
|
123
118
|
console.print(f"[green]Deleted graph database at {db_path}[/green]")
|
|
124
119
|
except Exception as e:
|
|
125
120
|
console.print(f"[yellow]Warning:[/yellow] Failed to delete database: {e}")
|
|
@@ -222,18 +217,12 @@ def _step3_remove_git_hook(repo_path: Path) -> None:
|
|
|
222
217
|
|
|
223
218
|
def _get_db_path() -> str | None:
|
|
224
219
|
"""Get the configured database path from environment or default."""
|
|
225
|
-
from sqlcg.core.config import
|
|
220
|
+
from sqlcg.core.config import DbConfig
|
|
226
221
|
|
|
227
|
-
db_path = str(
|
|
222
|
+
db_path = str(DbConfig.from_env().db_path)
|
|
228
223
|
return db_path if Path(db_path).exists() else None
|
|
229
224
|
|
|
230
225
|
|
|
231
|
-
def _is_kuzu_backend(db_path: str) -> bool:
|
|
232
|
-
"""Check if the database is a KùzuDB backend (not Neo4j)."""
|
|
233
|
-
backend = os.getenv("SQLCG_BACKEND", "kuzu").lower()
|
|
234
|
-
return backend in ("kuzu", "") # Default to kuzu if unset
|
|
235
|
-
|
|
236
|
-
|
|
237
226
|
# Candidate skill directory locations to remove (global first, then project-relative)
|
|
238
227
|
# Each entry is a callable(repo_path) -> Path resolving to the sqlcg skill dir.
|
|
239
228
|
_SKILL_DIR_TARGETS = [
|
sqlcg/core/__init__.py
CHANGED
|
@@ -2,7 +2,5 @@
|
|
|
2
2
|
|
|
3
3
|
from sqlcg.core import schema
|
|
4
4
|
from sqlcg.core.graph_db import GraphBackend
|
|
5
|
-
from sqlcg.core.kuzu_backend import KuzuBackend
|
|
6
|
-
from sqlcg.core.neo4j_backend import Neo4jBackend
|
|
7
5
|
|
|
8
|
-
__all__ = ["GraphBackend", "
|
|
6
|
+
__all__ = ["GraphBackend", "schema"]
|
sqlcg/core/config.py
CHANGED
|
@@ -11,64 +11,37 @@ if TYPE_CHECKING:
|
|
|
11
11
|
from sqlcg.core.graph_db import GraphBackend
|
|
12
12
|
|
|
13
13
|
|
|
14
|
-
class
|
|
15
|
-
"""Configuration for
|
|
14
|
+
class DbConfig(BaseModel):
|
|
15
|
+
"""Configuration for the DuckDB backend."""
|
|
16
16
|
|
|
17
17
|
db_path: Path = Field(default_factory=lambda: Path.home() / ".sqlcg" / "graph.db")
|
|
18
|
-
buffer_pool_size_mb: int = Field(
|
|
19
|
-
default=0,
|
|
20
|
-
description="KuzuDB buffer pool size in MB (0 = use KuzuDB default)",
|
|
21
|
-
)
|
|
22
18
|
log_path: Path = Field(
|
|
23
19
|
default_factory=lambda: Path.home() / ".sqlcg" / "index.log",
|
|
24
20
|
description="Path for parse-warning log file written during indexing",
|
|
25
21
|
)
|
|
26
22
|
|
|
27
23
|
@classmethod
|
|
28
|
-
def from_env(cls) -> "
|
|
29
|
-
"""Load
|
|
24
|
+
def from_env(cls) -> "DbConfig":
|
|
25
|
+
"""Load database config from environment variables.
|
|
30
26
|
|
|
31
27
|
Returns:
|
|
32
|
-
|
|
28
|
+
DbConfig instance with environment-overridden values if present.
|
|
33
29
|
"""
|
|
34
30
|
env_path = os.getenv("SQLCG_DB_PATH")
|
|
35
|
-
env_buf = os.getenv("SQLCG_BUFFER_POOL_MB")
|
|
36
31
|
env_log = os.getenv("SQLCG_LOG_PATH")
|
|
37
32
|
return cls(
|
|
38
33
|
db_path=Path(env_path) if env_path else Path.home() / ".sqlcg" / "graph.db",
|
|
39
|
-
buffer_pool_size_mb=int(env_buf) if env_buf else 0,
|
|
40
34
|
log_path=Path(env_log) if env_log else Path.home() / ".sqlcg" / "index.log",
|
|
41
35
|
)
|
|
42
36
|
|
|
43
37
|
|
|
44
|
-
class Neo4jConfig(BaseModel):
|
|
45
|
-
"""Configuration for Neo4j backend."""
|
|
46
|
-
|
|
47
|
-
uri: str = Field(default="bolt://localhost:7687")
|
|
48
|
-
user: str = Field(default="neo4j")
|
|
49
|
-
password: str = Field(default="password")
|
|
50
|
-
|
|
51
|
-
@classmethod
|
|
52
|
-
def from_env(cls) -> "Neo4jConfig":
|
|
53
|
-
"""Load Neo4j config from environment variables.
|
|
54
|
-
|
|
55
|
-
Returns:
|
|
56
|
-
Neo4jConfig instance with environment-overridden values if present.
|
|
57
|
-
"""
|
|
58
|
-
return cls(
|
|
59
|
-
uri=os.getenv("NEO4J_URI", "bolt://localhost:7687"),
|
|
60
|
-
user=os.getenv("NEO4J_USER", "neo4j"),
|
|
61
|
-
password=os.getenv("NEO4J_PASSWORD", "password"),
|
|
62
|
-
)
|
|
63
|
-
|
|
64
|
-
|
|
65
38
|
def get_db_path() -> Path:
|
|
66
39
|
"""Get the database path from environment or use default.
|
|
67
40
|
|
|
68
41
|
Returns:
|
|
69
|
-
Path to the
|
|
42
|
+
Path to the DuckDB database file
|
|
70
43
|
"""
|
|
71
|
-
return
|
|
44
|
+
return DbConfig.from_env().db_path
|
|
72
45
|
|
|
73
46
|
|
|
74
47
|
def config_file_present(path: Path) -> bool:
|
|
@@ -347,58 +320,29 @@ def get_external_consumers(path: Path) -> list[ExternalConsumerSpec]:
|
|
|
347
320
|
|
|
348
321
|
|
|
349
322
|
def get_backend(read_only: bool = False) -> "GraphBackend":
|
|
350
|
-
"""Get a
|
|
323
|
+
"""Get a DuckDBBackend instance.
|
|
324
|
+
|
|
325
|
+
The ``read_only`` parameter is accepted for API compatibility but is
|
|
326
|
+
ignored — DuckDB uses a single R/W handle for the process lifetime.
|
|
327
|
+
Concurrent read safety is provided by DuckDB's MVCC (readers see a
|
|
328
|
+
consistent snapshot during an in-flight write transaction).
|
|
329
|
+
|
|
330
|
+
Cross-process access: whichever process opens the DuckDB file first holds
|
|
331
|
+
an exclusive lock; other processes cannot open it at all (even read-only).
|
|
332
|
+
CLI read commands therefore route through the live MCP server via
|
|
333
|
+
``read_client.run_read_routed`` (v1.2.0) when a server is live, and open
|
|
334
|
+
the file directly only when no server is running.
|
|
351
335
|
|
|
352
336
|
Args:
|
|
353
|
-
read_only:
|
|
354
|
-
enables multiple concurrent read-only opens (reader/reader
|
|
355
|
-
concurrency), but does NOT allow reads while a read-write writer
|
|
356
|
-
holds the exclusive process lock — that requires routing through the
|
|
357
|
-
live MCP server via ``read_client.run_read_routed`` (v1.2.0).
|
|
358
|
-
Ignored for Neo4jBackend (Neo4j has no single-writer process lock;
|
|
359
|
-
the flag is a no-op and the normal connection is opened).
|
|
360
|
-
All writer call sites (index, reindex, db init/reset, server
|
|
361
|
-
init_backend) use the default ``False``.
|
|
337
|
+
read_only: Ignored for DuckDB. Accepted for API compatibility.
|
|
362
338
|
|
|
363
339
|
Returns:
|
|
364
|
-
A
|
|
340
|
+
A DuckDBBackend instance.
|
|
365
341
|
|
|
366
342
|
Raises:
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
Note:
|
|
370
|
-
CLI read commands (find, analyze, db info, gain) route through a live
|
|
371
|
-
MCP server via ``read_client.run_read_routed`` (v1.2.0) when a server
|
|
372
|
-
is live, falling back to ``get_backend(read_only=True)`` when no server
|
|
373
|
-
is present. The fallback path still contends for the process lock under
|
|
374
|
-
an active writer (Windows / no-server fallback only).
|
|
343
|
+
duckdb.IOException: If the file is locked by another process.
|
|
375
344
|
"""
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
if backend_type == "kuzu":
|
|
379
|
-
from sqlcg.core.kuzu_backend import KuzuBackend
|
|
345
|
+
from sqlcg.core.duckdb_backend import DuckDBBackend
|
|
380
346
|
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
return KuzuBackend(
|
|
384
|
-
str(kuzu_cfg.db_path),
|
|
385
|
-
buffer_pool_size_mb=kuzu_cfg.buffer_pool_size_mb,
|
|
386
|
-
read_only=read_only,
|
|
387
|
-
)
|
|
388
|
-
except RuntimeError as exc:
|
|
389
|
-
if read_only and "READ ONLY" in str(exc):
|
|
390
|
-
# KùzuDB refuses to open a non-existent or empty DB in read-only
|
|
391
|
-
# mode ("Cannot create an empty database under READ ONLY mode").
|
|
392
|
-
# Surface the same empty-DB guidance the user sees from `db info`.
|
|
393
|
-
raise RuntimeError(
|
|
394
|
-
"Database not initialised — run 'sqlcg db init' and 'sqlcg index <path>' first."
|
|
395
|
-
) from exc
|
|
396
|
-
raise
|
|
397
|
-
elif backend_type == "neo4j":
|
|
398
|
-
from sqlcg.core.neo4j_backend import Neo4jBackend
|
|
399
|
-
|
|
400
|
-
neo4j_cfg = Neo4jConfig.from_env()
|
|
401
|
-
# read_only is ignored for Neo4j — no single-writer process lock.
|
|
402
|
-
return Neo4jBackend(neo4j_cfg.uri, neo4j_cfg.user, neo4j_cfg.password)
|
|
403
|
-
else:
|
|
404
|
-
raise ValueError(f"Unknown backend type: {backend_type}")
|
|
347
|
+
cfg = DbConfig.from_env()
|
|
348
|
+
return DuckDBBackend(str(cfg.db_path))
|