sql-code-graph 1.2.2__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sql_code_graph-1.2.2.dist-info → sql_code_graph-1.4.0.dist-info}/METADATA +2 -4
- {sql_code_graph-1.2.2.dist-info → sql_code_graph-1.4.0.dist-info}/RECORD +31 -30
- sqlcg/__init__.py +1 -1
- sqlcg/cli/commands/analyze.py +138 -127
- sqlcg/cli/commands/db.py +49 -51
- sqlcg/cli/commands/find.py +5 -9
- sqlcg/cli/commands/gain.py +14 -16
- sqlcg/cli/commands/git.py +11 -4
- sqlcg/cli/commands/index.py +173 -21
- sqlcg/cli/commands/mcp.py +70 -3
- sqlcg/cli/commands/reindex.py +147 -77
- sqlcg/cli/commands/uninstall.py +9 -20
- sqlcg/core/__init__.py +1 -3
- sqlcg/core/config.py +25 -81
- sqlcg/core/duckdb_backend.py +764 -0
- sqlcg/core/freshness.py +1 -1
- sqlcg/core/graph_db.py +20 -4
- sqlcg/core/queries.py +26 -7
- sqlcg/core/queries.sql +249 -0
- sqlcg/core/schema.py +1 -1
- sqlcg/indexer/indexer.py +27 -36
- sqlcg/metrics/store.py +49 -1
- sqlcg/server/control.py +1 -1
- sqlcg/server/noise_filter.py +1 -1
- sqlcg/server/read_client.py +2 -2
- sqlcg/server/server.py +184 -86
- sqlcg/server/skill.py +2 -2
- sqlcg/server/tools.py +119 -41
- sqlcg/server/writer.py +459 -0
- sqlcg/core/kuzu_backend.py +0 -445
- sqlcg/core/neo4j_backend.py +0 -233
- {sql_code_graph-1.2.2.dist-info → sql_code_graph-1.4.0.dist-info}/WHEEL +0 -0
- {sql_code_graph-1.2.2.dist-info → sql_code_graph-1.4.0.dist-info}/entry_points.txt +0 -0
sqlcg/cli/commands/mcp.py
CHANGED
|
@@ -78,7 +78,12 @@ def mcp_status() -> None:
|
|
|
78
78
|
"""Print server status JSON (connects to control socket).
|
|
79
79
|
|
|
80
80
|
Returns JSON with fields: running, pid, db_path, indexed_sha, head_sha,
|
|
81
|
-
stale_by_commits, connected_clients, uptime when a server
|
|
81
|
+
stale_by_commits, connected_clients, uptime, writer_queue when a server
|
|
82
|
+
is live.
|
|
83
|
+
|
|
84
|
+
The status response is length-prefixed framed (v1.3.0, B3) so large
|
|
85
|
+
writer_queue payloads are received in full — the client uses the
|
|
86
|
+
recv-exactly makefile+readline+read(n) pattern, NOT a single recv(4096).
|
|
82
87
|
|
|
83
88
|
When no server is found: {"running": false}.
|
|
84
89
|
When the PID file exists with a live process but the socket is unavailable:
|
|
@@ -89,6 +94,7 @@ def mcp_status() -> None:
|
|
|
89
94
|
to the PID-file probe — never hangs or errors on a dead socket.
|
|
90
95
|
"""
|
|
91
96
|
import socket as _socket
|
|
97
|
+
from datetime import datetime
|
|
92
98
|
|
|
93
99
|
from sqlcg.server.control import is_pid_alive, read_pid, sock_path
|
|
94
100
|
|
|
@@ -98,8 +104,69 @@ def mcp_status() -> None:
|
|
|
98
104
|
s.settimeout(2)
|
|
99
105
|
s.connect(str(sp))
|
|
100
106
|
s.sendall(json.dumps({"op": "status"}).encode() + b"\n")
|
|
101
|
-
|
|
102
|
-
|
|
107
|
+
# Framed recv-exactly (B3 / OD-4): read length line then exactly that many bytes.
|
|
108
|
+
# This replaces the old s.recv(4096) which would truncate large writer_queue payloads.
|
|
109
|
+
f = s.makefile("rb")
|
|
110
|
+
length_line = f.readline()
|
|
111
|
+
if length_line:
|
|
112
|
+
try:
|
|
113
|
+
body_len = int(length_line.strip())
|
|
114
|
+
data = f.read(body_len)
|
|
115
|
+
except (ValueError, OSError):
|
|
116
|
+
data = length_line # fallback: treat first line as body
|
|
117
|
+
else:
|
|
118
|
+
data = b""
|
|
119
|
+
|
|
120
|
+
status = json.loads(data.decode())
|
|
121
|
+
|
|
122
|
+
# Pretty-print the base fields.
|
|
123
|
+
console.print_json(json.dumps({k: v for k, v in status.items() if k != "writer_queue"}))
|
|
124
|
+
|
|
125
|
+
# Render the writer_queue block separately for readability.
|
|
126
|
+
wq = status.get("writer_queue")
|
|
127
|
+
if wq:
|
|
128
|
+
console.print("\n[bold]writer_queue[/bold]")
|
|
129
|
+
active = wq.get("active")
|
|
130
|
+
if active:
|
|
131
|
+
console.print(f" active: op={active.get('op')!r} root={active.get('root')!r}")
|
|
132
|
+
prog = wq.get("active_progress", {})
|
|
133
|
+
if prog.get("state") == "running":
|
|
134
|
+
files_done = prog.get("files_done", 0)
|
|
135
|
+
files_total = prog.get("files_total")
|
|
136
|
+
if files_total:
|
|
137
|
+
console.print(f" progress: {files_done}/{files_total} files")
|
|
138
|
+
else:
|
|
139
|
+
console.print(" active: none")
|
|
140
|
+
|
|
141
|
+
pending = wq.get("pending", [])
|
|
142
|
+
console.print(f" pending: {len(pending)}")
|
|
143
|
+
|
|
144
|
+
total_coalesced = wq.get("coalesced_since_start", 0)
|
|
145
|
+
by_reason = wq.get("coalesced_by_reason", {})
|
|
146
|
+
if total_coalesced:
|
|
147
|
+
from sqlcg.server.writer import (
|
|
148
|
+
COALESCE_COLLAPSED_INTO_PENDING_REINDEX,
|
|
149
|
+
COALESCE_REINDEX_DROPPED_INDEX_PENDING,
|
|
150
|
+
COALESCE_SUPERSEDED_BY_INDEX,
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
n_sup = by_reason.get(COALESCE_SUPERSEDED_BY_INDEX, 0)
|
|
154
|
+
n_col = by_reason.get(COALESCE_COLLAPSED_INTO_PENDING_REINDEX, 0)
|
|
155
|
+
n_drop = by_reason.get(COALESCE_REINDEX_DROPPED_INDEX_PENDING, 0)
|
|
156
|
+
console.print(
|
|
157
|
+
f" coalesced: {total_coalesced} "
|
|
158
|
+
f"(superseded_by_index={n_sup}, "
|
|
159
|
+
f"collapsed_into_pending_reindex={n_col}, "
|
|
160
|
+
f"reindex_dropped_index_pending={n_drop})"
|
|
161
|
+
)
|
|
162
|
+
last_at = wq.get("last_coalesce_at")
|
|
163
|
+
last_reason = wq.get("last_coalesce_reason")
|
|
164
|
+
if last_at and last_reason:
|
|
165
|
+
last_human = datetime.fromtimestamp(last_at).strftime("%Y-%m-%d %H:%M:%S")
|
|
166
|
+
console.print(f" last coalesce: {last_reason} at {last_human}")
|
|
167
|
+
else:
|
|
168
|
+
console.print(" coalesced: 0")
|
|
169
|
+
|
|
103
170
|
except (FileNotFoundError, ConnectionRefusedError, OSError):
|
|
104
171
|
# Socket unavailable — probe via PID file (R3: stale-socket fall-through)
|
|
105
172
|
rec = read_pid()
|
sqlcg/cli/commands/reindex.py
CHANGED
|
@@ -21,7 +21,7 @@ console = Console()
|
|
|
21
21
|
# Client-side socket timeout for the --notify control-socket path.
|
|
22
22
|
# A real DWH server-side resync_changed measured ~89 s (41 changed files + closure);
|
|
23
23
|
# 300 s covers that with headroom while keeping the wait bounded on a wedged server.
|
|
24
|
-
# This is a CLI transport bound, NOT a
|
|
24
|
+
# This is a CLI transport bound, NOT a DbConfig/indexer constant.
|
|
25
25
|
_NOTIFY_SOCKET_TIMEOUT_S = 300
|
|
26
26
|
|
|
27
27
|
|
|
@@ -70,92 +70,37 @@ def reindex_cmd( # noqa: B008
|
|
|
70
70
|
Exits with an error if the database schema version does not match the current
|
|
71
71
|
build — run 'sqlcg db reset && sqlcg db init && sqlcg index <path>' to re-init.
|
|
72
72
|
"""
|
|
73
|
-
import json
|
|
74
|
-
import socket as _socket
|
|
75
|
-
|
|
76
73
|
from sqlcg.core.config import config_file_present, get_backend, get_db_path, get_dialect
|
|
77
74
|
from sqlcg.core.schema import SCHEMA_VERSION
|
|
78
75
|
from sqlcg.indexer.indexer import Indexer
|
|
79
|
-
from sqlcg.server.control import sock_path
|
|
80
76
|
|
|
81
77
|
# Resolve to absolute path so ignore-spec and git delta receive an absolute root
|
|
82
78
|
path = path.resolve()
|
|
83
79
|
|
|
84
|
-
#
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
with _socket.socket(_socket.AF_UNIX, _socket.SOCK_STREAM) as s:
|
|
89
|
-
s.settimeout(_NOTIFY_SOCKET_TIMEOUT_S)
|
|
90
|
-
s.connect(str(sp))
|
|
91
|
-
# Resolve SHAs before sending — standalone mode reads from DB via socket
|
|
92
|
-
effective_from = from_sha
|
|
93
|
-
if effective_from is None:
|
|
94
|
-
# Standalone mode: we cannot read stored SHA here without opening the
|
|
95
|
-
# DB (which would conflict with the running server). If no --from is
|
|
96
|
-
# given with --notify, we send from="stored" as a sentinel and fall
|
|
97
|
-
# back to direct write; the caller should pass --from explicitly.
|
|
98
|
-
raise OSError( # noqa: TRY301
|
|
99
|
-
"--notify without --from requires direct DB access; falling through"
|
|
100
|
-
)
|
|
101
|
-
# Resolve symbolic refs (HEAD, branch names) to concrete 40-char SHAs
|
|
102
|
-
# before sending — prevents literal "HEAD" from being stored in the graph.
|
|
103
|
-
effective_from = _resolve_ref(path, effective_from)
|
|
104
|
-
effective_to = _resolve_ref(path, to_sha) if to_sha else _get_head(path)
|
|
105
|
-
payload = {
|
|
106
|
-
"op": "reindex",
|
|
107
|
-
"root": str(path),
|
|
108
|
-
"from": effective_from,
|
|
109
|
-
"to": effective_to,
|
|
110
|
-
"dialect": dialect,
|
|
111
|
-
}
|
|
112
|
-
s.sendall(json.dumps(payload).encode() + b"\n")
|
|
113
|
-
data = s.recv(65536)
|
|
114
|
-
result = json.loads(data)
|
|
115
|
-
if "error" in result:
|
|
116
|
-
console.print(f"[red]Server reindex error: {result['error']}[/red]")
|
|
117
|
-
raise typer.Exit(1)
|
|
118
|
-
if not quiet:
|
|
119
|
-
srv_summary = result.get("summary", {})
|
|
120
|
-
console.print(
|
|
121
|
-
f"[green]Resynced via server[/green] "
|
|
122
|
-
f"+{srv_summary.get('added', 0)} added, "
|
|
123
|
-
f"~{srv_summary.get('modified', 0)} modified, "
|
|
124
|
-
f"-{srv_summary.get('deleted', 0)} deleted"
|
|
125
|
-
)
|
|
126
|
-
raise typer.Exit(0)
|
|
127
|
-
except TimeoutError:
|
|
128
|
-
# Bug 1 fix: server is alive and working (accepted the connection, holds the
|
|
129
|
-
# lock, will finish and persist). Do NOT fall through to the direct-write
|
|
130
|
-
# path — that would hit the held lock and produce a false "Database is locked"
|
|
131
|
-
# error. Exit 0 so the git hook stays non-fatal; the server will complete.
|
|
132
|
-
# (socket.timeout is an alias of TimeoutError, a subclass of OSError — this
|
|
133
|
-
# clause must be listed before the broad OSError clause below.)
|
|
134
|
-
import sys
|
|
135
|
-
|
|
136
|
-
print(
|
|
137
|
-
f"Server is still applying the reindex (timed out waiting after "
|
|
138
|
-
f"{_NOTIFY_SOCKET_TIMEOUT_S}s); the graph will update when it finishes "
|
|
139
|
-
f"— check 'sqlcg mcp status'.",
|
|
140
|
-
file=sys.stderr,
|
|
141
|
-
)
|
|
142
|
-
raise typer.Exit(0) from None
|
|
143
|
-
except (FileNotFoundError, ConnectionRefusedError, OSError):
|
|
144
|
-
# R3: no live server (stale socket, socket absent, fallback condition) —
|
|
145
|
-
# fall through to the existing direct-write path unchanged.
|
|
146
|
-
# NOTE: socket.timeout / TimeoutError is an OSError subclass, so the
|
|
147
|
-
# dedicated timeout clause above must be listed first (already is).
|
|
148
|
-
pass
|
|
149
|
-
except typer.Exit:
|
|
150
|
-
raise
|
|
151
|
-
except Exception as exc:
|
|
152
|
-
console.print(f"[red]--notify routing failed: {exc}[/red]")
|
|
153
|
-
raise typer.Exit(1) from exc
|
|
154
|
-
|
|
155
|
-
# Resolve dialect
|
|
80
|
+
# Resolve dialect before routing so the WriterRequest always carries a concrete
|
|
81
|
+
# dialect (never the literal sentinel "auto"). Bug A: the route call was before
|
|
82
|
+
# this resolution, causing the server to receive "auto" and fail with
|
|
83
|
+
# "Unknown dialect 'auto'" on every server-routed reindex.
|
|
156
84
|
if dialect == "auto":
|
|
157
85
|
dialect = get_dialect(path)
|
|
158
86
|
|
|
87
|
+
# Step 3.3 — route manual reindex through the socket when a server is live.
|
|
88
|
+
# The --notify flag is kept for backward compatibility but no longer required;
|
|
89
|
+
# manual reindex (no --notify) now also probes the socket by default.
|
|
90
|
+
# W3: from=null is sent when from_sha is None — the server resolves the stored
|
|
91
|
+
# SHA at drain start (no more "requires direct DB access" refusal).
|
|
92
|
+
_is_hook_path = notify # hook path: fire-and-forget; manual path: wait by default
|
|
93
|
+
_routed = _try_route_reindex_via_server(
|
|
94
|
+
path=path,
|
|
95
|
+
from_sha=from_sha,
|
|
96
|
+
to_sha=to_sha,
|
|
97
|
+
dialect=dialect,
|
|
98
|
+
wait=not _is_hook_path,
|
|
99
|
+
quiet=quiet,
|
|
100
|
+
)
|
|
101
|
+
if _routed:
|
|
102
|
+
return
|
|
103
|
+
|
|
159
104
|
if not quiet and not config_file_present(path):
|
|
160
105
|
console.print(
|
|
161
106
|
f"[yellow]No .sqlcg.toml found at {path}/.sqlcg.toml — "
|
|
@@ -248,6 +193,131 @@ def reindex_cmd( # noqa: B008
|
|
|
248
193
|
)
|
|
249
194
|
|
|
250
195
|
|
|
196
|
+
def _try_route_reindex_via_server(
|
|
197
|
+
*,
|
|
198
|
+
path: Path,
|
|
199
|
+
from_sha: str | None,
|
|
200
|
+
to_sha: str | None,
|
|
201
|
+
dialect: str | None,
|
|
202
|
+
wait: bool,
|
|
203
|
+
quiet: bool,
|
|
204
|
+
) -> bool:
|
|
205
|
+
"""Probe for a live server and route the reindex through the socket if found.
|
|
206
|
+
|
|
207
|
+
W3: ``from`` may be ``None`` — the server resolves the stored indexed SHA
|
|
208
|
+
at drain start. Symbolic refs are resolved to concrete SHAs before sending
|
|
209
|
+
(prevents literal "HEAD" being stored in the graph).
|
|
210
|
+
|
|
211
|
+
Returns True if the reindex was handled via the server (caller should return).
|
|
212
|
+
Returns False if no server is live (caller should fall through to direct path).
|
|
213
|
+
"""
|
|
214
|
+
import json
|
|
215
|
+
import socket as _socket
|
|
216
|
+
|
|
217
|
+
from sqlcg.server.control import sock_path
|
|
218
|
+
|
|
219
|
+
sp = sock_path()
|
|
220
|
+
if not sp.exists():
|
|
221
|
+
return False
|
|
222
|
+
|
|
223
|
+
# Resolve symbolic SHAs if provided (the hook path already resolves them).
|
|
224
|
+
effective_from = _resolve_ref(path, from_sha) if from_sha is not None else None
|
|
225
|
+
effective_to = _resolve_ref(path, to_sha) if to_sha is not None else None
|
|
226
|
+
|
|
227
|
+
payload = {
|
|
228
|
+
"op": "reindex",
|
|
229
|
+
"root": str(path),
|
|
230
|
+
"from": effective_from, # None → server resolves at drain start (W3)
|
|
231
|
+
"to": effective_to,
|
|
232
|
+
"dialect": dialect,
|
|
233
|
+
"wait": wait,
|
|
234
|
+
"requested_by": "hook" if not wait else "cli",
|
|
235
|
+
}
|
|
236
|
+
payload_bytes = json.dumps(payload).encode()
|
|
237
|
+
frame = f"{len(payload_bytes)}\n".encode() + payload_bytes
|
|
238
|
+
|
|
239
|
+
try:
|
|
240
|
+
with _socket.socket(_socket.AF_UNIX, _socket.SOCK_STREAM) as s:
|
|
241
|
+
s.settimeout(_NOTIFY_SOCKET_TIMEOUT_S)
|
|
242
|
+
s.connect(str(sp))
|
|
243
|
+
s.sendall(frame)
|
|
244
|
+
|
|
245
|
+
f = s.makefile("rb")
|
|
246
|
+
if not wait:
|
|
247
|
+
# Fire-and-forget: read one framed acknowledgement.
|
|
248
|
+
length_line = f.readline()
|
|
249
|
+
if length_line:
|
|
250
|
+
try:
|
|
251
|
+
body_len = int(length_line.strip())
|
|
252
|
+
resp_bytes = f.read(body_len)
|
|
253
|
+
result = json.loads(resp_bytes)
|
|
254
|
+
if "error" in result:
|
|
255
|
+
console.print(f"[red]Server reindex error: {result['error']}[/red]")
|
|
256
|
+
raise typer.Exit(1)
|
|
257
|
+
if not quiet:
|
|
258
|
+
pos = result.get("position", "?")
|
|
259
|
+
console.print(
|
|
260
|
+
f"[green]Reindex queued via server[/green] (position {pos})"
|
|
261
|
+
)
|
|
262
|
+
except (ValueError, json.JSONDecodeError):
|
|
263
|
+
pass
|
|
264
|
+
return True
|
|
265
|
+
|
|
266
|
+
# wait=True: stream framed frames until done:true.
|
|
267
|
+
while True:
|
|
268
|
+
length_line = f.readline()
|
|
269
|
+
if not length_line:
|
|
270
|
+
break
|
|
271
|
+
try:
|
|
272
|
+
body_len = int(length_line.strip())
|
|
273
|
+
except ValueError:
|
|
274
|
+
break
|
|
275
|
+
frame_bytes = f.read(body_len)
|
|
276
|
+
frame_resp = json.loads(frame_bytes)
|
|
277
|
+
|
|
278
|
+
if frame_resp.get("done"):
|
|
279
|
+
if not frame_resp.get("ok"):
|
|
280
|
+
err = frame_resp.get("error", "unknown error")
|
|
281
|
+
console.print(f"[red]Server reindex error: {err}[/red]")
|
|
282
|
+
raise typer.Exit(1)
|
|
283
|
+
srv_summary = frame_resp.get("summary", {})
|
|
284
|
+
if not quiet:
|
|
285
|
+
if srv_summary.get("fell_back_to_full"):
|
|
286
|
+
console.print(
|
|
287
|
+
"[yellow]Closure exceeded depth cap — fell back to full index "
|
|
288
|
+
"(via server).[/yellow]"
|
|
289
|
+
)
|
|
290
|
+
else:
|
|
291
|
+
console.print(
|
|
292
|
+
f"[green]Resynced via server[/green] "
|
|
293
|
+
f"+{srv_summary.get('added', 0)} added, "
|
|
294
|
+
f"~{srv_summary.get('modified', 0)} modified, "
|
|
295
|
+
f"-{srv_summary.get('deleted', 0)} deleted"
|
|
296
|
+
)
|
|
297
|
+
break
|
|
298
|
+
|
|
299
|
+
return True
|
|
300
|
+
|
|
301
|
+
except TimeoutError:
|
|
302
|
+
import sys
|
|
303
|
+
|
|
304
|
+
print(
|
|
305
|
+
f"Server is still applying the reindex (timed out waiting after "
|
|
306
|
+
f"{_NOTIFY_SOCKET_TIMEOUT_S}s); the graph will update when it finishes "
|
|
307
|
+
"— check 'sqlcg mcp status'.",
|
|
308
|
+
file=sys.stderr,
|
|
309
|
+
)
|
|
310
|
+
raise typer.Exit(0) from None
|
|
311
|
+
except (FileNotFoundError, ConnectionRefusedError, OSError):
|
|
312
|
+
# No live server — fall through to direct path.
|
|
313
|
+
return False
|
|
314
|
+
except typer.Exit:
|
|
315
|
+
raise
|
|
316
|
+
except Exception as exc:
|
|
317
|
+
console.print(f"[red]Socket routing failed: {exc}[/red]")
|
|
318
|
+
raise typer.Exit(1) from exc
|
|
319
|
+
|
|
320
|
+
|
|
251
321
|
def _resolve_ref(root: Path, ref: str) -> str:
|
|
252
322
|
"""Resolve a git ref (HEAD, branch, tag, or concrete SHA) to a 40-char SHA.
|
|
253
323
|
|
sqlcg/cli/commands/uninstall.py
CHANGED
|
@@ -26,7 +26,7 @@ def uninstall_cmd( # noqa: B008
|
|
|
26
26
|
"""Uninstall sqlcg from Claude Code and optionally clean up resources.
|
|
27
27
|
|
|
28
28
|
Step 1: Remove MCP registration from ~/.claude/settings.json
|
|
29
|
-
Step 2: Optionally delete the
|
|
29
|
+
Step 2: Optionally delete the DuckDB graph database
|
|
30
30
|
Step 3: Remove git hook sentinel block from .git/hooks/post-checkout
|
|
31
31
|
Step 4: Remove sqlcg skill directory from ~/.claude/skills/sqlcg/ and
|
|
32
32
|
<repo>/.claude/skills/sqlcg/
|
|
@@ -34,7 +34,7 @@ def uninstall_cmd( # noqa: B008
|
|
|
34
34
|
# Step 1: Remove MCP entry from settings.json
|
|
35
35
|
_step1_remove_mcp_entry()
|
|
36
36
|
|
|
37
|
-
# Step 2: Offer to delete the
|
|
37
|
+
# Step 2: Offer to delete the database (unless --keep-db flag is set)
|
|
38
38
|
if not keep_db:
|
|
39
39
|
_step2_delete_database(force)
|
|
40
40
|
else:
|
|
@@ -84,7 +84,7 @@ def _step1_remove_mcp_entry() -> None:
|
|
|
84
84
|
|
|
85
85
|
|
|
86
86
|
def _step2_delete_database(force: bool) -> None:
|
|
87
|
-
"""Offer to delete the
|
|
87
|
+
"""Offer to delete the DuckDB graph database."""
|
|
88
88
|
db_path = _get_db_path()
|
|
89
89
|
|
|
90
90
|
if not db_path:
|
|
@@ -93,13 +93,6 @@ def _step2_delete_database(force: bool) -> None:
|
|
|
93
93
|
|
|
94
94
|
db_path_obj = Path(db_path)
|
|
95
95
|
|
|
96
|
-
# Check if it's a kuzu backend (not Neo4j)
|
|
97
|
-
# If db_path is a directory or ends with standard kuzu patterns, it's likely kuzu
|
|
98
|
-
# For now, we'll assume anything in .sqlcg/kuzu is kuzu
|
|
99
|
-
if not _is_kuzu_backend(db_path):
|
|
100
|
-
console.print("[dim]Database is not KùzuDB — skipping deletion[/dim]")
|
|
101
|
-
return
|
|
102
|
-
|
|
103
96
|
if not db_path_obj.exists():
|
|
104
97
|
console.print(f"[dim]Database not found at {db_path}[/dim]")
|
|
105
98
|
return
|
|
@@ -117,9 +110,11 @@ def _step2_delete_database(force: bool) -> None:
|
|
|
117
110
|
console.print("[dim]Keeping database[/dim]")
|
|
118
111
|
return
|
|
119
112
|
|
|
120
|
-
#
|
|
113
|
+
# DuckDB is a single file (+ optional .wal sibling); delete both.
|
|
121
114
|
try:
|
|
122
|
-
|
|
115
|
+
for target in (db_path_obj, db_path_obj.with_name(db_path_obj.name + ".wal")):
|
|
116
|
+
if target.exists():
|
|
117
|
+
target.unlink()
|
|
123
118
|
console.print(f"[green]Deleted graph database at {db_path}[/green]")
|
|
124
119
|
except Exception as e:
|
|
125
120
|
console.print(f"[yellow]Warning:[/yellow] Failed to delete database: {e}")
|
|
@@ -222,18 +217,12 @@ def _step3_remove_git_hook(repo_path: Path) -> None:
|
|
|
222
217
|
|
|
223
218
|
def _get_db_path() -> str | None:
|
|
224
219
|
"""Get the configured database path from environment or default."""
|
|
225
|
-
from sqlcg.core.config import
|
|
220
|
+
from sqlcg.core.config import DbConfig
|
|
226
221
|
|
|
227
|
-
db_path = str(
|
|
222
|
+
db_path = str(DbConfig.from_env().db_path)
|
|
228
223
|
return db_path if Path(db_path).exists() else None
|
|
229
224
|
|
|
230
225
|
|
|
231
|
-
def _is_kuzu_backend(db_path: str) -> bool:
|
|
232
|
-
"""Check if the database is a KùzuDB backend (not Neo4j)."""
|
|
233
|
-
backend = os.getenv("SQLCG_BACKEND", "kuzu").lower()
|
|
234
|
-
return backend in ("kuzu", "") # Default to kuzu if unset
|
|
235
|
-
|
|
236
|
-
|
|
237
226
|
# Candidate skill directory locations to remove (global first, then project-relative)
|
|
238
227
|
# Each entry is a callable(repo_path) -> Path resolving to the sqlcg skill dir.
|
|
239
228
|
_SKILL_DIR_TARGETS = [
|
sqlcg/core/__init__.py
CHANGED
|
@@ -2,7 +2,5 @@
|
|
|
2
2
|
|
|
3
3
|
from sqlcg.core import schema
|
|
4
4
|
from sqlcg.core.graph_db import GraphBackend
|
|
5
|
-
from sqlcg.core.kuzu_backend import KuzuBackend
|
|
6
|
-
from sqlcg.core.neo4j_backend import Neo4jBackend
|
|
7
5
|
|
|
8
|
-
__all__ = ["GraphBackend", "
|
|
6
|
+
__all__ = ["GraphBackend", "schema"]
|
sqlcg/core/config.py
CHANGED
|
@@ -11,64 +11,37 @@ if TYPE_CHECKING:
|
|
|
11
11
|
from sqlcg.core.graph_db import GraphBackend
|
|
12
12
|
|
|
13
13
|
|
|
14
|
-
class
|
|
15
|
-
"""Configuration for
|
|
14
|
+
class DbConfig(BaseModel):
|
|
15
|
+
"""Configuration for the DuckDB backend."""
|
|
16
16
|
|
|
17
17
|
db_path: Path = Field(default_factory=lambda: Path.home() / ".sqlcg" / "graph.db")
|
|
18
|
-
buffer_pool_size_mb: int = Field(
|
|
19
|
-
default=0,
|
|
20
|
-
description="KuzuDB buffer pool size in MB (0 = use KuzuDB default)",
|
|
21
|
-
)
|
|
22
18
|
log_path: Path = Field(
|
|
23
19
|
default_factory=lambda: Path.home() / ".sqlcg" / "index.log",
|
|
24
20
|
description="Path for parse-warning log file written during indexing",
|
|
25
21
|
)
|
|
26
22
|
|
|
27
23
|
@classmethod
|
|
28
|
-
def from_env(cls) -> "
|
|
29
|
-
"""Load
|
|
24
|
+
def from_env(cls) -> "DbConfig":
|
|
25
|
+
"""Load database config from environment variables.
|
|
30
26
|
|
|
31
27
|
Returns:
|
|
32
|
-
|
|
28
|
+
DbConfig instance with environment-overridden values if present.
|
|
33
29
|
"""
|
|
34
30
|
env_path = os.getenv("SQLCG_DB_PATH")
|
|
35
|
-
env_buf = os.getenv("SQLCG_BUFFER_POOL_MB")
|
|
36
31
|
env_log = os.getenv("SQLCG_LOG_PATH")
|
|
37
32
|
return cls(
|
|
38
33
|
db_path=Path(env_path) if env_path else Path.home() / ".sqlcg" / "graph.db",
|
|
39
|
-
buffer_pool_size_mb=int(env_buf) if env_buf else 0,
|
|
40
34
|
log_path=Path(env_log) if env_log else Path.home() / ".sqlcg" / "index.log",
|
|
41
35
|
)
|
|
42
36
|
|
|
43
37
|
|
|
44
|
-
class Neo4jConfig(BaseModel):
|
|
45
|
-
"""Configuration for Neo4j backend."""
|
|
46
|
-
|
|
47
|
-
uri: str = Field(default="bolt://localhost:7687")
|
|
48
|
-
user: str = Field(default="neo4j")
|
|
49
|
-
password: str = Field(default="password")
|
|
50
|
-
|
|
51
|
-
@classmethod
|
|
52
|
-
def from_env(cls) -> "Neo4jConfig":
|
|
53
|
-
"""Load Neo4j config from environment variables.
|
|
54
|
-
|
|
55
|
-
Returns:
|
|
56
|
-
Neo4jConfig instance with environment-overridden values if present.
|
|
57
|
-
"""
|
|
58
|
-
return cls(
|
|
59
|
-
uri=os.getenv("NEO4J_URI", "bolt://localhost:7687"),
|
|
60
|
-
user=os.getenv("NEO4J_USER", "neo4j"),
|
|
61
|
-
password=os.getenv("NEO4J_PASSWORD", "password"),
|
|
62
|
-
)
|
|
63
|
-
|
|
64
|
-
|
|
65
38
|
def get_db_path() -> Path:
|
|
66
39
|
"""Get the database path from environment or use default.
|
|
67
40
|
|
|
68
41
|
Returns:
|
|
69
|
-
Path to the
|
|
42
|
+
Path to the DuckDB database file
|
|
70
43
|
"""
|
|
71
|
-
return
|
|
44
|
+
return DbConfig.from_env().db_path
|
|
72
45
|
|
|
73
46
|
|
|
74
47
|
def config_file_present(path: Path) -> bool:
|
|
@@ -347,58 +320,29 @@ def get_external_consumers(path: Path) -> list[ExternalConsumerSpec]:
|
|
|
347
320
|
|
|
348
321
|
|
|
349
322
|
def get_backend(read_only: bool = False) -> "GraphBackend":
|
|
350
|
-
"""Get a
|
|
323
|
+
"""Get a DuckDBBackend instance.
|
|
324
|
+
|
|
325
|
+
The ``read_only`` parameter is accepted for API compatibility but is
|
|
326
|
+
ignored — DuckDB uses a single R/W handle for the process lifetime.
|
|
327
|
+
Concurrent read safety is provided by DuckDB's MVCC (readers see a
|
|
328
|
+
consistent snapshot during an in-flight write transaction).
|
|
329
|
+
|
|
330
|
+
Cross-process access: whichever process opens the DuckDB file first holds
|
|
331
|
+
an exclusive lock; other processes cannot open it at all (even read-only).
|
|
332
|
+
CLI read commands therefore route through the live MCP server via
|
|
333
|
+
``read_client.run_read_routed`` (v1.2.0) when a server is live, and open
|
|
334
|
+
the file directly only when no server is running.
|
|
351
335
|
|
|
352
336
|
Args:
|
|
353
|
-
read_only:
|
|
354
|
-
enables multiple concurrent read-only opens (reader/reader
|
|
355
|
-
concurrency), but does NOT allow reads while a read-write writer
|
|
356
|
-
holds the exclusive process lock — that requires routing through the
|
|
357
|
-
live MCP server via ``read_client.run_read_routed`` (v1.2.0).
|
|
358
|
-
Ignored for Neo4jBackend (Neo4j has no single-writer process lock;
|
|
359
|
-
the flag is a no-op and the normal connection is opened).
|
|
360
|
-
All writer call sites (index, reindex, db init/reset, server
|
|
361
|
-
init_backend) use the default ``False``.
|
|
337
|
+
read_only: Ignored for DuckDB. Accepted for API compatibility.
|
|
362
338
|
|
|
363
339
|
Returns:
|
|
364
|
-
A
|
|
340
|
+
A DuckDBBackend instance.
|
|
365
341
|
|
|
366
342
|
Raises:
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
Note:
|
|
370
|
-
CLI read commands (find, analyze, db info, gain) route through a live
|
|
371
|
-
MCP server via ``read_client.run_read_routed`` (v1.2.0) when a server
|
|
372
|
-
is live, falling back to ``get_backend(read_only=True)`` when no server
|
|
373
|
-
is present. The fallback path still contends for the process lock under
|
|
374
|
-
an active writer (Windows / no-server fallback only).
|
|
343
|
+
duckdb.IOException: If the file is locked by another process.
|
|
375
344
|
"""
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
if backend_type == "kuzu":
|
|
379
|
-
from sqlcg.core.kuzu_backend import KuzuBackend
|
|
345
|
+
from sqlcg.core.duckdb_backend import DuckDBBackend
|
|
380
346
|
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
return KuzuBackend(
|
|
384
|
-
str(kuzu_cfg.db_path),
|
|
385
|
-
buffer_pool_size_mb=kuzu_cfg.buffer_pool_size_mb,
|
|
386
|
-
read_only=read_only,
|
|
387
|
-
)
|
|
388
|
-
except RuntimeError as exc:
|
|
389
|
-
if read_only and "READ ONLY" in str(exc):
|
|
390
|
-
# KùzuDB refuses to open a non-existent or empty DB in read-only
|
|
391
|
-
# mode ("Cannot create an empty database under READ ONLY mode").
|
|
392
|
-
# Surface the same empty-DB guidance the user sees from `db info`.
|
|
393
|
-
raise RuntimeError(
|
|
394
|
-
"Database not initialised — run 'sqlcg db init' and 'sqlcg index <path>' first."
|
|
395
|
-
) from exc
|
|
396
|
-
raise
|
|
397
|
-
elif backend_type == "neo4j":
|
|
398
|
-
from sqlcg.core.neo4j_backend import Neo4jBackend
|
|
399
|
-
|
|
400
|
-
neo4j_cfg = Neo4jConfig.from_env()
|
|
401
|
-
# read_only is ignored for Neo4j — no single-writer process lock.
|
|
402
|
-
return Neo4jBackend(neo4j_cfg.uri, neo4j_cfg.user, neo4j_cfg.password)
|
|
403
|
-
else:
|
|
404
|
-
raise ValueError(f"Unknown backend type: {backend_type}")
|
|
347
|
+
cfg = DbConfig.from_env()
|
|
348
|
+
return DuckDBBackend(str(cfg.db_path))
|