sql-code-graph 1.2.2__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sql_code_graph-1.2.2.dist-info → sql_code_graph-1.4.0.dist-info}/METADATA +2 -4
- {sql_code_graph-1.2.2.dist-info → sql_code_graph-1.4.0.dist-info}/RECORD +31 -30
- sqlcg/__init__.py +1 -1
- sqlcg/cli/commands/analyze.py +138 -127
- sqlcg/cli/commands/db.py +49 -51
- sqlcg/cli/commands/find.py +5 -9
- sqlcg/cli/commands/gain.py +14 -16
- sqlcg/cli/commands/git.py +11 -4
- sqlcg/cli/commands/index.py +173 -21
- sqlcg/cli/commands/mcp.py +70 -3
- sqlcg/cli/commands/reindex.py +147 -77
- sqlcg/cli/commands/uninstall.py +9 -20
- sqlcg/core/__init__.py +1 -3
- sqlcg/core/config.py +25 -81
- sqlcg/core/duckdb_backend.py +764 -0
- sqlcg/core/freshness.py +1 -1
- sqlcg/core/graph_db.py +20 -4
- sqlcg/core/queries.py +26 -7
- sqlcg/core/queries.sql +249 -0
- sqlcg/core/schema.py +1 -1
- sqlcg/indexer/indexer.py +27 -36
- sqlcg/metrics/store.py +49 -1
- sqlcg/server/control.py +1 -1
- sqlcg/server/noise_filter.py +1 -1
- sqlcg/server/read_client.py +2 -2
- sqlcg/server/server.py +184 -86
- sqlcg/server/skill.py +2 -2
- sqlcg/server/tools.py +119 -41
- sqlcg/server/writer.py +459 -0
- sqlcg/core/kuzu_backend.py +0 -445
- sqlcg/core/neo4j_backend.py +0 -233
- {sql_code_graph-1.2.2.dist-info → sql_code_graph-1.4.0.dist-info}/WHEEL +0 -0
- {sql_code_graph-1.2.2.dist-info → sql_code_graph-1.4.0.dist-info}/entry_points.txt +0 -0
sqlcg/core/kuzu_backend.py
DELETED
|
@@ -1,445 +0,0 @@
|
|
|
1
|
-
"""KùzuDB implementation of GraphBackend."""
|
|
2
|
-
|
|
3
|
-
from collections.abc import Iterator
|
|
4
|
-
from contextlib import contextmanager
|
|
5
|
-
from typing import Any
|
|
6
|
-
|
|
7
|
-
import kuzu
|
|
8
|
-
|
|
9
|
-
from sqlcg.core.graph_db import GraphBackend
|
|
10
|
-
from sqlcg.core.queries import (
|
|
11
|
-
DELETE_COLUMNS_FOR_FILE,
|
|
12
|
-
DELETE_FILE,
|
|
13
|
-
DELETE_QUERIES_FOR_FILE,
|
|
14
|
-
DELETE_TABLES_FOR_FILE,
|
|
15
|
-
)
|
|
16
|
-
from sqlcg.core.schema import (
|
|
17
|
-
NODE_REPO,
|
|
18
|
-
SCHEMA_DDL,
|
|
19
|
-
SCHEMA_VERSION,
|
|
20
|
-
)
|
|
21
|
-
from sqlcg.utils.logging import getLogger
|
|
22
|
-
|
|
23
|
-
logger = getLogger(__name__)
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
def _find_lock_holder(db_path: str) -> str:
|
|
27
|
-
"""Return a human-readable PID string for the process holding the DB lock.
|
|
28
|
-
|
|
29
|
-
Uses lsof on Linux/macOS. Returns a descriptive fallback if lsof is
|
|
30
|
-
unavailable or returns no results.
|
|
31
|
-
"""
|
|
32
|
-
import shutil
|
|
33
|
-
import subprocess
|
|
34
|
-
|
|
35
|
-
if not shutil.which("lsof"):
|
|
36
|
-
return "PID unknown (lsof not available)"
|
|
37
|
-
try:
|
|
38
|
-
result = subprocess.run(
|
|
39
|
-
["lsof", "-t", db_path],
|
|
40
|
-
capture_output=True,
|
|
41
|
-
text=True,
|
|
42
|
-
timeout=3,
|
|
43
|
-
)
|
|
44
|
-
pids = result.stdout.strip().split()
|
|
45
|
-
if pids:
|
|
46
|
-
return f"PID {', '.join(pids)}"
|
|
47
|
-
except Exception:
|
|
48
|
-
pass
|
|
49
|
-
return "PID unknown"
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
class KuzuBackend(GraphBackend):
|
|
53
|
-
"""KùzuDB implementation of the graph database backend."""
|
|
54
|
-
|
|
55
|
-
def __init__(self, db_path: str, buffer_pool_size_mb: int = 0, read_only: bool = False):
|
|
56
|
-
"""Initialize KùzuDB backend.
|
|
57
|
-
|
|
58
|
-
Args:
|
|
59
|
-
db_path: Path to the KùzuDB database file (or ':memory:' for in-memory)
|
|
60
|
-
buffer_pool_size_mb: Buffer pool size in MB (0 = use KuzuDB default)
|
|
61
|
-
read_only: Open in read-only mode. Enables concurrent read-only
|
|
62
|
-
opens (reader/reader concurrency) by not taking the exclusive
|
|
63
|
-
write lock. Does NOT allow reads while a read-write writer
|
|
64
|
-
holds the lock — KùzuDB's exclusive lock is process-level.
|
|
65
|
-
|
|
66
|
-
Raises:
|
|
67
|
-
RuntimeError: If the database is locked or cannot be opened.
|
|
68
|
-
"""
|
|
69
|
-
self._db_path = db_path
|
|
70
|
-
try:
|
|
71
|
-
kwargs: dict = {"read_only": read_only}
|
|
72
|
-
if buffer_pool_size_mb > 0:
|
|
73
|
-
kwargs["buffer_pool_size"] = buffer_pool_size_mb * 1024 * 1024
|
|
74
|
-
self._db = kuzu.database.Database(db_path, **kwargs)
|
|
75
|
-
except RuntimeError as exc:
|
|
76
|
-
if "Could not set lock" in str(exc) or "lock" in str(exc).lower():
|
|
77
|
-
# Attempt to find the holding PID via lsof
|
|
78
|
-
pid_hint = _find_lock_holder(db_path)
|
|
79
|
-
pid_str = pid_hint.split()[-1] if pid_hint else "<PID>"
|
|
80
|
-
msg = (
|
|
81
|
-
f"Database is locked — another sqlcg process is running "
|
|
82
|
-
f"({pid_hint}). "
|
|
83
|
-
f"Wait for it to finish or kill it with: kill {pid_str}"
|
|
84
|
-
)
|
|
85
|
-
raise RuntimeError(msg) from exc
|
|
86
|
-
raise
|
|
87
|
-
self._conn = kuzu.Connection(self._db)
|
|
88
|
-
self._in_transaction = False
|
|
89
|
-
|
|
90
|
-
def init_schema(self) -> None:
|
|
91
|
-
"""Initialize the database schema if not already present.
|
|
92
|
-
|
|
93
|
-
Creates all node and relationship tables from the schema DDL.
|
|
94
|
-
"""
|
|
95
|
-
# Check if Repo node table exists (first table in DDL)
|
|
96
|
-
try:
|
|
97
|
-
self._conn.execute(f"MATCH (n:{NODE_REPO}) RETURN COUNT(*) as count LIMIT 1")
|
|
98
|
-
# If we get here, the schema is already initialized
|
|
99
|
-
logger.debug("Schema already initialized")
|
|
100
|
-
return
|
|
101
|
-
except Exception:
|
|
102
|
-
# Schema not initialized, proceed with initialization
|
|
103
|
-
pass
|
|
104
|
-
|
|
105
|
-
# Remove comments and split statements
|
|
106
|
-
# Split by ";" to get individual statements
|
|
107
|
-
raw_statements = []
|
|
108
|
-
current = []
|
|
109
|
-
|
|
110
|
-
for line in SCHEMA_DDL.split("\n"):
|
|
111
|
-
line = line.strip()
|
|
112
|
-
if line and not line.startswith("--"):
|
|
113
|
-
current.append(line)
|
|
114
|
-
if line.endswith(";"):
|
|
115
|
-
raw_statements.append(" ".join(current))
|
|
116
|
-
current = []
|
|
117
|
-
|
|
118
|
-
# Execute all DDL statements and schema version in a transaction
|
|
119
|
-
with self.transaction():
|
|
120
|
-
# Execute each statement
|
|
121
|
-
for stmt in raw_statements:
|
|
122
|
-
if stmt.strip():
|
|
123
|
-
try:
|
|
124
|
-
self._conn.execute(stmt)
|
|
125
|
-
logger.debug(f"Executed DDL: {stmt[:50]}...")
|
|
126
|
-
except Exception as e:
|
|
127
|
-
logger.error(f"DDL execution failed: {stmt[:50]}...: {e}")
|
|
128
|
-
raise
|
|
129
|
-
|
|
130
|
-
# Upsert the schema version
|
|
131
|
-
try:
|
|
132
|
-
self._conn.execute(
|
|
133
|
-
"MERGE (v:SchemaVersion {version: $v})",
|
|
134
|
-
{"v": SCHEMA_VERSION},
|
|
135
|
-
)
|
|
136
|
-
logger.debug(f"Wrote schema version: {SCHEMA_VERSION}")
|
|
137
|
-
except Exception as e:
|
|
138
|
-
logger.error(f"Failed to write schema version: {e}")
|
|
139
|
-
raise
|
|
140
|
-
|
|
141
|
-
def upsert_node(self, label: str, key: str, properties: dict[str, Any]) -> None:
|
|
142
|
-
"""Upsert a node with the given label and properties.
|
|
143
|
-
|
|
144
|
-
Note: The key parameter is used for the primary key field. The actual
|
|
145
|
-
primary key field name depends on the label. For now, we use the key
|
|
146
|
-
as the primary key identifier.
|
|
147
|
-
|
|
148
|
-
Note: Properties that match the primary key field are skipped in the SET clause.
|
|
149
|
-
"""
|
|
150
|
-
# Validate property keys to prevent Cypher injection
|
|
151
|
-
self._validate_props(properties)
|
|
152
|
-
|
|
153
|
-
pk_field = self._pk_field(label)
|
|
154
|
-
|
|
155
|
-
# Build the MERGE statement
|
|
156
|
-
# Format: MERGE (n:Label {pk_field: $key}) SET n.field = $field, ...
|
|
157
|
-
params = {"key": key}
|
|
158
|
-
|
|
159
|
-
# Filter out the primary key field from properties (cannot be updated via SET)
|
|
160
|
-
set_properties = {k: v for k, v in properties.items() if k != pk_field}
|
|
161
|
-
|
|
162
|
-
for k, v in set_properties.items():
|
|
163
|
-
params[k] = v
|
|
164
|
-
|
|
165
|
-
query = f"MERGE (n:{label} {{{pk_field}: $key}})"
|
|
166
|
-
if set_properties:
|
|
167
|
-
set_parts = [f"n.{k} = ${k}" for k in set_properties.keys()]
|
|
168
|
-
query += f" SET {', '.join(set_parts)}"
|
|
169
|
-
|
|
170
|
-
try:
|
|
171
|
-
self._conn.execute(query, params)
|
|
172
|
-
except Exception as e:
|
|
173
|
-
logger.error(f"upsert_node failed: {label} {key}: {e}")
|
|
174
|
-
raise
|
|
175
|
-
|
|
176
|
-
def upsert_edge(
|
|
177
|
-
self,
|
|
178
|
-
src_label: str,
|
|
179
|
-
src_key: str,
|
|
180
|
-
dst_label: str,
|
|
181
|
-
dst_key: str,
|
|
182
|
-
rel_type: str,
|
|
183
|
-
properties: dict[str, Any],
|
|
184
|
-
) -> None:
|
|
185
|
-
"""Upsert a relationship between two nodes."""
|
|
186
|
-
# Validate property keys to prevent Cypher injection
|
|
187
|
-
self._validate_props(properties)
|
|
188
|
-
|
|
189
|
-
src_pk_field = self._pk_field(src_label)
|
|
190
|
-
dst_pk_field = self._pk_field(dst_label)
|
|
191
|
-
|
|
192
|
-
# langchain_kuzu incompatible with our typed DDL schema.
|
|
193
|
-
query = f"""
|
|
194
|
-
MATCH (src:{src_label} {{{src_pk_field}: $src_key}})
|
|
195
|
-
MATCH (dst:{dst_label} {{{dst_pk_field}: $dst_key}})
|
|
196
|
-
MERGE (src)-[r:{rel_type}]->(dst)
|
|
197
|
-
"""
|
|
198
|
-
|
|
199
|
-
params = {"src_key": src_key, "dst_key": dst_key}
|
|
200
|
-
|
|
201
|
-
if properties:
|
|
202
|
-
set_parts = [f"r.{k} = ${k}" for k in properties.keys()]
|
|
203
|
-
query += f" SET {', '.join(set_parts)}"
|
|
204
|
-
for k, v in properties.items():
|
|
205
|
-
params[k] = v
|
|
206
|
-
|
|
207
|
-
try:
|
|
208
|
-
self._conn.execute(query, params)
|
|
209
|
-
except Exception as e:
|
|
210
|
-
logger.error(f"upsert_edge failed: {src_label} -> {rel_type} -> {dst_label}: {e}")
|
|
211
|
-
raise
|
|
212
|
-
|
|
213
|
-
def upsert_nodes_bulk(self, label: str, rows: list[dict[str, Any]]) -> None:
|
|
214
|
-
"""Bulk-upsert nodes of one label in a single backend round-trip."""
|
|
215
|
-
if not rows:
|
|
216
|
-
return
|
|
217
|
-
# Validate all property keys across all rows (same guard as upsert_node)
|
|
218
|
-
for row in rows:
|
|
219
|
-
self._validate_props(row)
|
|
220
|
-
|
|
221
|
-
pk_field = self._pk_field(label)
|
|
222
|
-
# Determine the property key set from the first row; require homogeneity.
|
|
223
|
-
keys = list(rows[0].keys())
|
|
224
|
-
if pk_field not in keys:
|
|
225
|
-
raise ValueError(
|
|
226
|
-
f"upsert_nodes_bulk({label}): every row must include primary key '{pk_field}'"
|
|
227
|
-
)
|
|
228
|
-
for i, row in enumerate(rows[1:], 1):
|
|
229
|
-
if set(row.keys()) != set(keys):
|
|
230
|
-
raise ValueError(
|
|
231
|
-
f"upsert_nodes_bulk({label}): row {i} has property keys "
|
|
232
|
-
f"{sorted(row.keys())}, expected {sorted(keys)}"
|
|
233
|
-
)
|
|
234
|
-
|
|
235
|
-
set_keys = [k for k in keys if k != pk_field]
|
|
236
|
-
# UNWIND $rows AS row MERGE (n:Label {pk: row.pk}) SET n.k = row.k, ...
|
|
237
|
-
query = f"UNWIND $rows AS row MERGE (n:{label} {{{pk_field}: row.{pk_field}}})"
|
|
238
|
-
if set_keys:
|
|
239
|
-
set_parts = [f"n.{k} = row.{k}" for k in set_keys]
|
|
240
|
-
query += f" SET {', '.join(set_parts)}"
|
|
241
|
-
|
|
242
|
-
try:
|
|
243
|
-
self._conn.execute(query, {"rows": rows})
|
|
244
|
-
except Exception as e:
|
|
245
|
-
logger.error(f"upsert_nodes_bulk failed: {label} ({len(rows)} rows): {e}")
|
|
246
|
-
raise
|
|
247
|
-
|
|
248
|
-
def upsert_edges_bulk(
|
|
249
|
-
self,
|
|
250
|
-
src_label: str,
|
|
251
|
-
dst_label: str,
|
|
252
|
-
rel_type: str,
|
|
253
|
-
rows: list[dict[str, Any]],
|
|
254
|
-
) -> None:
|
|
255
|
-
"""Bulk-upsert edges of one (src_label, rel_type, dst_label) triple."""
|
|
256
|
-
if not rows:
|
|
257
|
-
return
|
|
258
|
-
for row in rows:
|
|
259
|
-
# src_key/dst_key are not graph properties; validate the remainder.
|
|
260
|
-
props = {k: v for k, v in row.items() if k not in ("src_key", "dst_key")}
|
|
261
|
-
self._validate_props(props)
|
|
262
|
-
|
|
263
|
-
src_pk = self._pk_field(src_label)
|
|
264
|
-
dst_pk = self._pk_field(dst_label)
|
|
265
|
-
|
|
266
|
-
keys = list(rows[0].keys())
|
|
267
|
-
for required in ("src_key", "dst_key"):
|
|
268
|
-
if required not in keys:
|
|
269
|
-
raise ValueError(
|
|
270
|
-
f"upsert_edges_bulk({src_label}->{rel_type}->{dst_label}): "
|
|
271
|
-
f"every row must include '{required}'"
|
|
272
|
-
)
|
|
273
|
-
for i, row in enumerate(rows[1:], 1):
|
|
274
|
-
if set(row.keys()) != set(keys):
|
|
275
|
-
raise ValueError(
|
|
276
|
-
f"upsert_edges_bulk: row {i} has property keys {sorted(row.keys())}, "
|
|
277
|
-
f"expected {sorted(keys)}"
|
|
278
|
-
)
|
|
279
|
-
|
|
280
|
-
prop_keys = [k for k in keys if k not in ("src_key", "dst_key")]
|
|
281
|
-
query = (
|
|
282
|
-
f"UNWIND $rows AS row "
|
|
283
|
-
f"MATCH (src:{src_label} {{{src_pk}: row.src_key}}) "
|
|
284
|
-
f"MATCH (dst:{dst_label} {{{dst_pk}: row.dst_key}}) "
|
|
285
|
-
f"MERGE (src)-[r:{rel_type}]->(dst)"
|
|
286
|
-
)
|
|
287
|
-
if prop_keys:
|
|
288
|
-
set_parts = [f"r.{k} = row.{k}" for k in prop_keys]
|
|
289
|
-
query += f" SET {', '.join(set_parts)}"
|
|
290
|
-
|
|
291
|
-
try:
|
|
292
|
-
self._conn.execute(query, {"rows": rows})
|
|
293
|
-
except Exception as e:
|
|
294
|
-
logger.error(
|
|
295
|
-
f"upsert_edges_bulk failed: {src_label}->{rel_type}->{dst_label} "
|
|
296
|
-
f"({len(rows)} rows): {e}"
|
|
297
|
-
)
|
|
298
|
-
raise
|
|
299
|
-
|
|
300
|
-
def run_read(self, query: str, params: dict[str, Any]) -> list[dict[str, Any]]:
|
|
301
|
-
"""Execute a read-only query and return results."""
|
|
302
|
-
try:
|
|
303
|
-
result = self._conn.execute(query, params)
|
|
304
|
-
# KùzuDB returns a QueryResult that we need to convert to list of dicts
|
|
305
|
-
rows = []
|
|
306
|
-
column_names = result.get_column_names() # type: ignore[union-attr]
|
|
307
|
-
for row in result:
|
|
308
|
-
# Each row is a tuple-like object with column names
|
|
309
|
-
rows.append(dict(zip(column_names, row, strict=True)))
|
|
310
|
-
return rows
|
|
311
|
-
except Exception as e:
|
|
312
|
-
logger.error(f"run_read failed: {e}")
|
|
313
|
-
raise
|
|
314
|
-
|
|
315
|
-
def run_write(self, query: str, params: dict[str, Any]) -> None:
|
|
316
|
-
"""Execute a write query (mutation)."""
|
|
317
|
-
try:
|
|
318
|
-
self._conn.execute(query, params)
|
|
319
|
-
except Exception as e:
|
|
320
|
-
logger.error(f"run_write failed: {e}")
|
|
321
|
-
raise
|
|
322
|
-
|
|
323
|
-
def delete_nodes_for_file(self, file_path: str) -> None:
|
|
324
|
-
"""Delete all nodes and relationships associated with a file.
|
|
325
|
-
|
|
326
|
-
This executes four separate Cypher statements:
|
|
327
|
-
1. Delete Column nodes for tables defined in this file
|
|
328
|
-
2. Delete Query nodes and their edges
|
|
329
|
-
3. Delete Table nodes defined in this file
|
|
330
|
-
4. Delete the File node itself
|
|
331
|
-
|
|
332
|
-
KùzuDB does not support multiple statements in a single execute() call,
|
|
333
|
-
so each statement is executed separately within the active transaction.
|
|
334
|
-
"""
|
|
335
|
-
try:
|
|
336
|
-
# Step A: Delete Column nodes for tables defined in this file
|
|
337
|
-
self._conn.execute(DELETE_COLUMNS_FOR_FILE, {"path": file_path})
|
|
338
|
-
logger.debug(f"Deleted Column nodes for {file_path}")
|
|
339
|
-
|
|
340
|
-
# Step B: Delete Query nodes and their edges
|
|
341
|
-
self._conn.execute(DELETE_QUERIES_FOR_FILE, {"path": file_path})
|
|
342
|
-
logger.debug(f"Deleted Query nodes for {file_path}")
|
|
343
|
-
|
|
344
|
-
# Step C: Delete Table nodes defined in this file
|
|
345
|
-
self._conn.execute(DELETE_TABLES_FOR_FILE, {"path": file_path})
|
|
346
|
-
logger.debug(f"Deleted Table nodes for {file_path}")
|
|
347
|
-
|
|
348
|
-
# Step D: Delete the File node itself
|
|
349
|
-
self._conn.execute(DELETE_FILE, {"path": file_path})
|
|
350
|
-
logger.debug(f"Deleted File node for {file_path}")
|
|
351
|
-
|
|
352
|
-
except Exception as e:
|
|
353
|
-
logger.error(f"delete_nodes_for_file failed for {file_path}: {e}")
|
|
354
|
-
raise
|
|
355
|
-
|
|
356
|
-
def get_schema_version(self) -> str | None:
|
|
357
|
-
"""Get the stored schema version from the database.
|
|
358
|
-
|
|
359
|
-
Returns:
|
|
360
|
-
The schema version string, or None if not set.
|
|
361
|
-
"""
|
|
362
|
-
try:
|
|
363
|
-
result = self.run_read(
|
|
364
|
-
"MATCH (v:SchemaVersion) RETURN v.version AS version LIMIT 1", {}
|
|
365
|
-
)
|
|
366
|
-
return result[0]["version"] if result else None
|
|
367
|
-
except Exception as e:
|
|
368
|
-
logger.warning(f"Failed to read schema version: {e}")
|
|
369
|
-
return None
|
|
370
|
-
|
|
371
|
-
def set_indexed_sha(self, sha: str) -> None:
|
|
372
|
-
"""Persist the git SHA of the last successful index.
|
|
373
|
-
|
|
374
|
-
Uses MERGE so the call is safe whether the SchemaVersion node already
|
|
375
|
-
exists or not. Mirrors the init_schema MERGE pattern.
|
|
376
|
-
|
|
377
|
-
Args:
|
|
378
|
-
sha: Git commit SHA string.
|
|
379
|
-
"""
|
|
380
|
-
try:
|
|
381
|
-
self._conn.execute(
|
|
382
|
-
"MERGE (v:SchemaVersion {version: $version}) SET v.indexed_sha = $sha",
|
|
383
|
-
{"version": SCHEMA_VERSION, "sha": sha},
|
|
384
|
-
)
|
|
385
|
-
except Exception as e:
|
|
386
|
-
logger.warning(f"Failed to write indexed_sha: {e}")
|
|
387
|
-
|
|
388
|
-
def get_indexed_sha(self) -> str | None:
|
|
389
|
-
"""Retrieve the git SHA of the last successful index.
|
|
390
|
-
|
|
391
|
-
Returns:
|
|
392
|
-
The stored SHA string, or None if never set.
|
|
393
|
-
"""
|
|
394
|
-
try:
|
|
395
|
-
result = self.run_read(
|
|
396
|
-
"MATCH (v:SchemaVersion) RETURN v.indexed_sha AS sha LIMIT 1", {}
|
|
397
|
-
)
|
|
398
|
-
return result[0]["sha"] if result else None
|
|
399
|
-
except Exception as e:
|
|
400
|
-
logger.warning(f"Failed to read indexed_sha: {e}")
|
|
401
|
-
return None
|
|
402
|
-
|
|
403
|
-
def close(self) -> None:
|
|
404
|
-
"""Close the database connection."""
|
|
405
|
-
try:
|
|
406
|
-
self._conn.close()
|
|
407
|
-
self._db.close()
|
|
408
|
-
logger.debug("KuzuBackend connection closed")
|
|
409
|
-
except Exception as e:
|
|
410
|
-
logger.error(f"Error closing KuzuBackend: {e}")
|
|
411
|
-
raise
|
|
412
|
-
|
|
413
|
-
@contextmanager
|
|
414
|
-
def transaction(self) -> Iterator["KuzuBackend"]:
|
|
415
|
-
"""Context manager for KùzuDB transactions.
|
|
416
|
-
|
|
417
|
-
Uses Cypher's BEGIN TRANSACTION / COMMIT / ROLLBACK commands.
|
|
418
|
-
KùzuDB 0.11.3 transaction API: execute("BEGIN TRANSACTION"),
|
|
419
|
-
then execute("COMMIT") or execute("ROLLBACK").
|
|
420
|
-
|
|
421
|
-
Yields:
|
|
422
|
-
self (the KuzuBackend instance)
|
|
423
|
-
|
|
424
|
-
Raises:
|
|
425
|
-
Any exception raised in the context triggers ROLLBACK.
|
|
426
|
-
"""
|
|
427
|
-
try:
|
|
428
|
-
self._conn.execute("BEGIN TRANSACTION")
|
|
429
|
-
self._in_transaction = True
|
|
430
|
-
yield self
|
|
431
|
-
self._conn.execute("COMMIT")
|
|
432
|
-
self._in_transaction = False
|
|
433
|
-
except Exception as exc:
|
|
434
|
-
self._in_transaction = False
|
|
435
|
-
if "No active transaction" in str(exc):
|
|
436
|
-
# KuzuDB killed the transaction internally (e.g. KU_UNREACHABLE assertion
|
|
437
|
-
# during a bulk write). Individual failures were already logged; the batch
|
|
438
|
-
# is lost but the indexer can continue.
|
|
439
|
-
logger.warning("Transaction invalidated by KuzuDB — batch not committed")
|
|
440
|
-
return
|
|
441
|
-
try:
|
|
442
|
-
self._conn.execute("ROLLBACK")
|
|
443
|
-
except Exception as rollback_err:
|
|
444
|
-
logger.debug("Rollback skipped: %s", rollback_err)
|
|
445
|
-
raise
|
sqlcg/core/neo4j_backend.py
DELETED
|
@@ -1,233 +0,0 @@
|
|
|
1
|
-
"""Neo4j implementation of GraphBackend."""
|
|
2
|
-
|
|
3
|
-
from collections.abc import Iterator
|
|
4
|
-
from contextlib import contextmanager
|
|
5
|
-
from typing import Any
|
|
6
|
-
|
|
7
|
-
from sqlcg.core.graph_db import GraphBackend
|
|
8
|
-
from sqlcg.core.queries import (
|
|
9
|
-
DELETE_COLUMNS_FOR_FILE,
|
|
10
|
-
DELETE_FILE,
|
|
11
|
-
DELETE_QUERIES_FOR_FILE,
|
|
12
|
-
DELETE_TABLES_FOR_FILE,
|
|
13
|
-
)
|
|
14
|
-
from sqlcg.core.schema import NODE_COLUMN, NODE_FILE, NODE_QUERY, NODE_REPO, NODE_TABLE
|
|
15
|
-
from sqlcg.utils.logging import getLogger
|
|
16
|
-
|
|
17
|
-
logger = getLogger(__name__)
|
|
18
|
-
|
|
19
|
-
try:
|
|
20
|
-
from neo4j import GraphDatabase as _GraphDatabase
|
|
21
|
-
|
|
22
|
-
GraphDatabase = _GraphDatabase
|
|
23
|
-
NEO4J_AVAILABLE = True
|
|
24
|
-
except ImportError:
|
|
25
|
-
GraphDatabase = None # type: ignore[assignment,misc]
|
|
26
|
-
NEO4J_AVAILABLE = False
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
class Neo4jBackend(GraphBackend):
|
|
30
|
-
"""Neo4j implementation of the graph database backend."""
|
|
31
|
-
|
|
32
|
-
def __init__(self, uri: str, user: str, password: str):
|
|
33
|
-
"""Initialize Neo4j backend.
|
|
34
|
-
|
|
35
|
-
Args:
|
|
36
|
-
uri: Neo4j connection URI (e.g., "bolt://localhost:7687")
|
|
37
|
-
user: Neo4j username
|
|
38
|
-
password: Neo4j password
|
|
39
|
-
|
|
40
|
-
Raises:
|
|
41
|
-
ImportError: If the neo4j package is not installed
|
|
42
|
-
"""
|
|
43
|
-
if not NEO4J_AVAILABLE:
|
|
44
|
-
raise ImportError(
|
|
45
|
-
"neo4j package is not installed. "
|
|
46
|
-
"Install it with: pip install 'sql-code-graph[neo4j]'"
|
|
47
|
-
)
|
|
48
|
-
|
|
49
|
-
self._driver = GraphDatabase.driver(uri, auth=(user, password))
|
|
50
|
-
self._session = self._driver.session()
|
|
51
|
-
|
|
52
|
-
def init_schema(self) -> None:
|
|
53
|
-
"""Initialize the database schema if not already present.
|
|
54
|
-
|
|
55
|
-
Creates indexes and constraints for efficient querying.
|
|
56
|
-
"""
|
|
57
|
-
# IF NOT EXISTS already ensures idempotency; APOC utilities add no safety benefit here.
|
|
58
|
-
indexes = [
|
|
59
|
-
f"CREATE INDEX idx_repo_path IF NOT EXISTS FOR (r:{NODE_REPO}) ON (r.path)",
|
|
60
|
-
f"CREATE INDEX idx_file_path IF NOT EXISTS FOR (f:{NODE_FILE}) ON (f.path)",
|
|
61
|
-
f"CREATE INDEX idx_table_qualified IF NOT EXISTS FOR (t:{NODE_TABLE}) ON (t.qualified)",
|
|
62
|
-
f"CREATE INDEX idx_column_id IF NOT EXISTS FOR (c:{NODE_COLUMN}) ON (c.id)",
|
|
63
|
-
f"CREATE INDEX idx_query_id IF NOT EXISTS FOR (q:{NODE_QUERY}) ON (q.id)",
|
|
64
|
-
]
|
|
65
|
-
|
|
66
|
-
for index_query in indexes:
|
|
67
|
-
try:
|
|
68
|
-
self._session.run(index_query)
|
|
69
|
-
logger.debug(f"Created index: {index_query[:50]}...")
|
|
70
|
-
except Exception as e:
|
|
71
|
-
logger.warning(f"Index creation skipped: {e}")
|
|
72
|
-
|
|
73
|
-
def upsert_node(self, label: str, key: str, properties: dict[str, Any]) -> None:
|
|
74
|
-
"""Upsert a node with the given label and properties."""
|
|
75
|
-
# Validate property keys to prevent Cypher injection
|
|
76
|
-
self._validate_props(properties)
|
|
77
|
-
|
|
78
|
-
pk_field = self._pk_field(label)
|
|
79
|
-
query = f"MERGE (n:{label} {{{pk_field}: $key}}) SET n += $props"
|
|
80
|
-
try:
|
|
81
|
-
self._session.run(query, {"key": key, "props": properties})
|
|
82
|
-
except Exception as e:
|
|
83
|
-
logger.error(f"upsert_node failed: {label} {key}: {e}")
|
|
84
|
-
raise
|
|
85
|
-
|
|
86
|
-
def upsert_edge(
|
|
87
|
-
self,
|
|
88
|
-
src_label: str,
|
|
89
|
-
src_key: str,
|
|
90
|
-
dst_label: str,
|
|
91
|
-
dst_key: str,
|
|
92
|
-
rel_type: str,
|
|
93
|
-
properties: dict[str, Any],
|
|
94
|
-
) -> None:
|
|
95
|
-
"""Upsert a relationship between two nodes."""
|
|
96
|
-
# Validate property keys to prevent Cypher injection
|
|
97
|
-
self._validate_props(properties)
|
|
98
|
-
|
|
99
|
-
src_pk = self._pk_field(src_label)
|
|
100
|
-
dst_pk = self._pk_field(dst_label)
|
|
101
|
-
query = (
|
|
102
|
-
f"MATCH (src:{src_label} {{{src_pk}: $src_key}})"
|
|
103
|
-
f" MATCH (dst:{dst_label} {{{dst_pk}: $dst_key}})"
|
|
104
|
-
f" MERGE (src)-[r:{rel_type}]->(dst)"
|
|
105
|
-
" SET r += $props"
|
|
106
|
-
)
|
|
107
|
-
try:
|
|
108
|
-
self._session.run(query, {"src_key": src_key, "dst_key": dst_key, "props": properties})
|
|
109
|
-
except Exception as e:
|
|
110
|
-
logger.error(f"upsert_edge failed: {src_label} -> {rel_type} -> {dst_label}: {e}")
|
|
111
|
-
raise
|
|
112
|
-
|
|
113
|
-
def upsert_nodes_bulk(self, label: str, rows: list[dict[str, Any]]) -> None:
|
|
114
|
-
"""Bulk-upsert nodes of one label.
|
|
115
|
-
|
|
116
|
-
Neo4j adapter is not currently implemented. Use KuzuBackend instead.
|
|
117
|
-
"""
|
|
118
|
-
raise NotImplementedError(
|
|
119
|
-
"Neo4j bulk upsert is not yet implemented. Use KuzuBackend instead."
|
|
120
|
-
)
|
|
121
|
-
|
|
122
|
-
def upsert_edges_bulk(
|
|
123
|
-
self,
|
|
124
|
-
src_label: str,
|
|
125
|
-
dst_label: str,
|
|
126
|
-
rel_type: str,
|
|
127
|
-
rows: list[dict[str, Any]],
|
|
128
|
-
) -> None:
|
|
129
|
-
"""Bulk-upsert edges of one (src_label, rel_type, dst_label) triple.
|
|
130
|
-
|
|
131
|
-
Neo4j adapter is not currently implemented. Use KuzuBackend instead.
|
|
132
|
-
"""
|
|
133
|
-
raise NotImplementedError(
|
|
134
|
-
"Neo4j bulk upsert is not yet implemented. Use KuzuBackend instead."
|
|
135
|
-
)
|
|
136
|
-
|
|
137
|
-
def run_read(self, query: str, params: dict[str, Any]) -> list[dict[str, Any]]:
|
|
138
|
-
"""Execute a read-only query and return results."""
|
|
139
|
-
try:
|
|
140
|
-
result = self._session.run(query, params)
|
|
141
|
-
rows = [dict(record) for record in result]
|
|
142
|
-
return rows
|
|
143
|
-
except Exception as e:
|
|
144
|
-
logger.error(f"run_read failed: {e}")
|
|
145
|
-
raise
|
|
146
|
-
|
|
147
|
-
def run_write(self, query: str, params: dict[str, Any]) -> None:
|
|
148
|
-
"""Execute a write query (mutation)."""
|
|
149
|
-
try:
|
|
150
|
-
self._session.run(query, params)
|
|
151
|
-
except Exception as e:
|
|
152
|
-
logger.error(f"run_write failed: {e}")
|
|
153
|
-
raise
|
|
154
|
-
|
|
155
|
-
def delete_nodes_for_file(self, file_path: str) -> None:
|
|
156
|
-
"""Delete all nodes and relationships associated with a file."""
|
|
157
|
-
params = {"path": file_path}
|
|
158
|
-
try:
|
|
159
|
-
# Step A: Delete SqlColumn nodes for tables defined in this file
|
|
160
|
-
self._session.run(DELETE_COLUMNS_FOR_FILE, params)
|
|
161
|
-
# Step B: Delete SqlQuery nodes
|
|
162
|
-
self._session.run(DELETE_QUERIES_FOR_FILE, params)
|
|
163
|
-
# Step C: Delete SqlTable nodes defined in this file
|
|
164
|
-
self._session.run(DELETE_TABLES_FOR_FILE, params)
|
|
165
|
-
# Step D: Delete the File node itself
|
|
166
|
-
self._session.run(DELETE_FILE, params)
|
|
167
|
-
logger.debug(f"Deleted all nodes for {file_path}")
|
|
168
|
-
except Exception as e:
|
|
169
|
-
logger.error(f"delete_nodes_for_file failed for {file_path}: {e}")
|
|
170
|
-
raise
|
|
171
|
-
|
|
172
|
-
def get_schema_version(self) -> str | None:
|
|
173
|
-
"""Get the stored schema version from the database.
|
|
174
|
-
|
|
175
|
-
Returns:
|
|
176
|
-
The schema version string, or None if not set.
|
|
177
|
-
"""
|
|
178
|
-
try:
|
|
179
|
-
result = self.run_read(
|
|
180
|
-
"MATCH (v:SchemaVersion) RETURN v.version AS version LIMIT 1", {}
|
|
181
|
-
)
|
|
182
|
-
return result[0]["version"] if result else None
|
|
183
|
-
except Exception as e:
|
|
184
|
-
logger.warning(f"Failed to read schema version: {e}")
|
|
185
|
-
return None
|
|
186
|
-
|
|
187
|
-
def set_indexed_sha(self, sha: str) -> None:
|
|
188
|
-
"""Persist the git SHA of the last successful index (Neo4j stub).
|
|
189
|
-
|
|
190
|
-
Neo4j support for indexed_sha is not yet implemented.
|
|
191
|
-
"""
|
|
192
|
-
raise NotImplementedError("set_indexed_sha is not yet implemented for Neo4jBackend")
|
|
193
|
-
|
|
194
|
-
def get_indexed_sha(self) -> str | None:
|
|
195
|
-
"""Retrieve the git SHA of the last successful index (Neo4j stub).
|
|
196
|
-
|
|
197
|
-
Neo4j support for indexed_sha is not yet implemented.
|
|
198
|
-
"""
|
|
199
|
-
raise NotImplementedError("get_indexed_sha is not yet implemented for Neo4jBackend")
|
|
200
|
-
|
|
201
|
-
def close(self) -> None:
|
|
202
|
-
"""Close the database connection."""
|
|
203
|
-
try:
|
|
204
|
-
self._session.close()
|
|
205
|
-
self._driver.close()
|
|
206
|
-
logger.debug("Neo4jBackend connection closed")
|
|
207
|
-
except Exception as e:
|
|
208
|
-
logger.error(f"Error closing Neo4jBackend: {e}")
|
|
209
|
-
raise
|
|
210
|
-
|
|
211
|
-
@contextmanager
|
|
212
|
-
def transaction(self) -> Iterator["Neo4jBackend"]:
|
|
213
|
-
"""Context manager for Neo4j transactions.
|
|
214
|
-
|
|
215
|
-
Creates a fresh session per transaction to avoid issues with shared
|
|
216
|
-
long-lived sessions that may be closed externally.
|
|
217
|
-
|
|
218
|
-
Yields:
|
|
219
|
-
self (the Neo4jBackend instance)
|
|
220
|
-
|
|
221
|
-
Raises:
|
|
222
|
-
Any exception raised in the context triggers ROLLBACK.
|
|
223
|
-
"""
|
|
224
|
-
session = self._driver.session()
|
|
225
|
-
tx = session.begin_transaction()
|
|
226
|
-
try:
|
|
227
|
-
yield self
|
|
228
|
-
tx.commit()
|
|
229
|
-
except Exception:
|
|
230
|
-
tx.rollback()
|
|
231
|
-
raise
|
|
232
|
-
finally:
|
|
233
|
-
session.close()
|
|
File without changes
|
|
File without changes
|