sql-code-graph 1.2.2__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sql_code_graph-1.2.2.dist-info → sql_code_graph-1.4.0.dist-info}/METADATA +2 -4
- {sql_code_graph-1.2.2.dist-info → sql_code_graph-1.4.0.dist-info}/RECORD +31 -30
- sqlcg/__init__.py +1 -1
- sqlcg/cli/commands/analyze.py +138 -127
- sqlcg/cli/commands/db.py +49 -51
- sqlcg/cli/commands/find.py +5 -9
- sqlcg/cli/commands/gain.py +14 -16
- sqlcg/cli/commands/git.py +11 -4
- sqlcg/cli/commands/index.py +173 -21
- sqlcg/cli/commands/mcp.py +70 -3
- sqlcg/cli/commands/reindex.py +147 -77
- sqlcg/cli/commands/uninstall.py +9 -20
- sqlcg/core/__init__.py +1 -3
- sqlcg/core/config.py +25 -81
- sqlcg/core/duckdb_backend.py +764 -0
- sqlcg/core/freshness.py +1 -1
- sqlcg/core/graph_db.py +20 -4
- sqlcg/core/queries.py +26 -7
- sqlcg/core/queries.sql +249 -0
- sqlcg/core/schema.py +1 -1
- sqlcg/indexer/indexer.py +27 -36
- sqlcg/metrics/store.py +49 -1
- sqlcg/server/control.py +1 -1
- sqlcg/server/noise_filter.py +1 -1
- sqlcg/server/read_client.py +2 -2
- sqlcg/server/server.py +184 -86
- sqlcg/server/skill.py +2 -2
- sqlcg/server/tools.py +119 -41
- sqlcg/server/writer.py +459 -0
- sqlcg/core/kuzu_backend.py +0 -445
- sqlcg/core/neo4j_backend.py +0 -233
- {sql_code_graph-1.2.2.dist-info → sql_code_graph-1.4.0.dist-info}/WHEEL +0 -0
- {sql_code_graph-1.2.2.dist-info → sql_code_graph-1.4.0.dist-info}/entry_points.txt +0 -0
sqlcg/server/tools.py
CHANGED
|
@@ -6,11 +6,15 @@ import time
|
|
|
6
6
|
from collections import deque
|
|
7
7
|
from contextlib import contextmanager
|
|
8
8
|
from pathlib import Path
|
|
9
|
+
from typing import TYPE_CHECKING
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
import anyio
|
|
9
13
|
|
|
10
14
|
from sqlcg.core.config import get_db_path, get_presentation_prefixes
|
|
15
|
+
from sqlcg.core.duckdb_backend import DuckDBBackend
|
|
11
16
|
from sqlcg.core.freshness import compute_freshness
|
|
12
17
|
from sqlcg.core.graph_db import GraphBackend
|
|
13
|
-
from sqlcg.core.kuzu_backend import KuzuBackend
|
|
14
18
|
from sqlcg.core.queries import (
|
|
15
19
|
ANALYZE_UNUSED_TABLES_QUERY,
|
|
16
20
|
FIND_DEFINITION_QUERY,
|
|
@@ -94,32 +98,74 @@ from sqlcg.utils.logging import getLogger # noqa: E402
|
|
|
94
98
|
|
|
95
99
|
logger = getLogger(__name__)
|
|
96
100
|
|
|
97
|
-
# Module-level singleton backend (
|
|
101
|
+
# Module-level singleton backend (DuckDB single R/W handle for the process lifetime)
|
|
98
102
|
_backend: GraphBackend | None = None
|
|
99
103
|
|
|
100
104
|
# Module-level metrics store singleton
|
|
101
105
|
_metrics: MetricsStore | None = None
|
|
102
106
|
|
|
107
|
+
# Module-level backend lock — injected by server.py _run_with_control so that
|
|
108
|
+
# MCP write tools (index_repo) share the same lock as the drain loop.
|
|
109
|
+
# None when no server event-loop is running (unit tests, direct DB access).
|
|
110
|
+
_backend_lock: "anyio.Lock | None" = None
|
|
111
|
+
|
|
112
|
+
# The path that init_backend() actually opened. Captured at init time so
|
|
113
|
+
# MCP write tools use this path, not get_db_path() which returns the default
|
|
114
|
+
# ~/.sqlcg/graph.db regardless of what was passed to init_backend.
|
|
115
|
+
_init_db_path: str | None = None
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def _set_backend_lock(lock: "anyio.Lock | None") -> None:
|
|
119
|
+
"""Register the backend lock from the server's task group.
|
|
120
|
+
|
|
121
|
+
Called by server.py _run_with_control so MCP write tools use the same
|
|
122
|
+
lock as the drain loop — ensuring no concurrent RW access.
|
|
123
|
+
"""
|
|
124
|
+
global _backend_lock
|
|
125
|
+
_backend_lock = lock
|
|
126
|
+
|
|
103
127
|
|
|
104
128
|
def init_backend(db_path: str | None = None) -> None:
|
|
105
129
|
"""Initialize the module-level backend singleton.
|
|
106
130
|
|
|
131
|
+
Startup sequence (OD-2 — measured on kuzu 0.11.3):
|
|
132
|
+
1. Open read-write → create schema if absent (init_schema is a no-op on
|
|
133
|
+
an already-initialized DB — it does NOT migrate).
|
|
134
|
+
2. Run the schema-version gate (Step 1.4): refuse non-zero if the stored
|
|
135
|
+
version differs from the current build's SCHEMA_VERSION.
|
|
136
|
+
3. Close the RW backend.
|
|
137
|
+
4. Reopen read-only and store as the serving singleton.
|
|
138
|
+
|
|
139
|
+
This ensures ``init_schema()`` — which issues DDL — never runs on the RO
|
|
140
|
+
connection (DDL raises on RO; ``Cannot create an empty database under READ
|
|
141
|
+
ONLY mode.`` is raised on a non-existent DB opened RO).
|
|
142
|
+
|
|
107
143
|
Args:
|
|
108
|
-
db_path: Path to
|
|
144
|
+
db_path: Path to DuckDB database. If None, uses get_db_path().
|
|
109
145
|
|
|
110
146
|
Raises:
|
|
111
|
-
RuntimeError: If backend initialization fails
|
|
147
|
+
RuntimeError: If backend initialization fails or schema version
|
|
148
|
+
is stale (the caller must not swallow this — server must exit).
|
|
112
149
|
"""
|
|
113
|
-
global _backend, _metrics
|
|
150
|
+
global _backend, _metrics, _init_db_path
|
|
114
151
|
path = db_path or str(get_db_path())
|
|
115
|
-
|
|
152
|
+
_init_db_path = path
|
|
153
|
+
|
|
154
|
+
# DuckDB: single R/W handle for the process lifetime — no RO/RW escalation.
|
|
155
|
+
# init_schema is idempotent; transaction() wraps the DDL in one commit.
|
|
156
|
+
rw_backend = DuckDBBackend(path)
|
|
116
157
|
try:
|
|
117
|
-
|
|
158
|
+
rw_backend.init_schema()
|
|
118
159
|
except Exception as exc:
|
|
119
|
-
|
|
160
|
+
rw_backend.close()
|
|
120
161
|
raise RuntimeError(f"Backend initialization failed: {exc}") from exc
|
|
121
|
-
|
|
122
|
-
|
|
162
|
+
|
|
163
|
+
# Step 2 — schema-version gate (Step 1.4).
|
|
164
|
+
_assert_schema_current(rw_backend, path)
|
|
165
|
+
|
|
166
|
+
# DuckDB: the same handle is used for reads and writes (MVCC).
|
|
167
|
+
_backend = rw_backend
|
|
168
|
+
logger.debug(f"Backend initialized (DuckDB R/W): {path}")
|
|
123
169
|
|
|
124
170
|
# Initialize metrics store (best-effort, failures are logged as WARNING)
|
|
125
171
|
try:
|
|
@@ -136,7 +182,7 @@ def shutdown_backend() -> None:
|
|
|
136
182
|
Closes the database connection and clears the global reference.
|
|
137
183
|
Safe to call multiple times.
|
|
138
184
|
"""
|
|
139
|
-
global _backend, _metrics
|
|
185
|
+
global _backend, _metrics, _init_db_path
|
|
140
186
|
if _backend is not None:
|
|
141
187
|
_backend.close()
|
|
142
188
|
_backend = None
|
|
@@ -144,6 +190,7 @@ def shutdown_backend() -> None:
|
|
|
144
190
|
if _metrics is not None:
|
|
145
191
|
_metrics.close()
|
|
146
192
|
_metrics = None
|
|
193
|
+
_init_db_path = None
|
|
147
194
|
|
|
148
195
|
|
|
149
196
|
def _get_backend() -> GraphBackend:
|
|
@@ -157,6 +204,31 @@ def _get_backend() -> GraphBackend:
|
|
|
157
204
|
return _backend
|
|
158
205
|
|
|
159
206
|
|
|
207
|
+
def _assert_schema_current(backend: GraphBackend, path: str) -> None:
|
|
208
|
+
"""Refuse to start when the stored schema version differs from the current build.
|
|
209
|
+
|
|
210
|
+
Called inside the RW-ensure window of init_backend (Step 1.4) after
|
|
211
|
+
init_schema() has run the create-if-absent step.
|
|
212
|
+
|
|
213
|
+
Args:
|
|
214
|
+
backend: An open (RW) backend to query.
|
|
215
|
+
path: The db_path string — included in the error message for context.
|
|
216
|
+
|
|
217
|
+
Raises:
|
|
218
|
+
RuntimeError: Stored version present and != current SCHEMA_VERSION.
|
|
219
|
+
Message names both versions and the sqlcg db reset remedy.
|
|
220
|
+
"""
|
|
221
|
+
from sqlcg.core.schema import SCHEMA_VERSION
|
|
222
|
+
|
|
223
|
+
stored = backend.get_schema_version()
|
|
224
|
+
if stored is not None and stored != SCHEMA_VERSION:
|
|
225
|
+
msg = (
|
|
226
|
+
f"Database schema is v{stored}, but this build expects v{SCHEMA_VERSION} — "
|
|
227
|
+
f"run 'sqlcg db reset && sqlcg index <path>' to re-index."
|
|
228
|
+
)
|
|
229
|
+
raise RuntimeError(msg)
|
|
230
|
+
|
|
231
|
+
|
|
160
232
|
@contextmanager
|
|
161
233
|
def _open_backend():
|
|
162
234
|
"""Context manager to get the initialized backend.
|
|
@@ -183,11 +255,11 @@ def _assert_indexed(db: GraphBackend) -> None:
|
|
|
183
255
|
Raises:
|
|
184
256
|
NotIndexedError: If no repos or files have been indexed
|
|
185
257
|
"""
|
|
186
|
-
rows = db.run_read(
|
|
258
|
+
rows = db.run_read('SELECT count(*) AS n FROM "Repo"', {})
|
|
187
259
|
if rows and rows[0]["n"] > 0:
|
|
188
260
|
return
|
|
189
261
|
# Fallback: accept a graph with File nodes but no Repo (test-only or partial state).
|
|
190
|
-
file_rows = db.run_read(
|
|
262
|
+
file_rows = db.run_read('SELECT count(*) AS n FROM "File"', {})
|
|
191
263
|
if file_rows and file_rows[0]["n"] > 0:
|
|
192
264
|
logger.debug(
|
|
193
265
|
"File nodes present but no Repo node — accepting as test-only/partial graph; "
|
|
@@ -214,7 +286,7 @@ def _indexed_root(db: GraphBackend) -> Path | None:
|
|
|
214
286
|
Absolute Path of the indexed root, or None if unavailable.
|
|
215
287
|
"""
|
|
216
288
|
try:
|
|
217
|
-
rows = db.run_read(
|
|
289
|
+
rows = db.run_read('SELECT path FROM "Repo" LIMIT 1', {})
|
|
218
290
|
if rows and rows[0].get("path"):
|
|
219
291
|
return Path(rows[0]["path"])
|
|
220
292
|
except Exception:
|
|
@@ -364,7 +436,10 @@ def _kahn_topological_sort(affected_tables: list[str], db: GraphBackend) -> tupl
|
|
|
364
436
|
indegree: dict[str, int] = {t: 0 for t in affected_tables}
|
|
365
437
|
|
|
366
438
|
for table in affected_tables:
|
|
367
|
-
rows = db.run_read(
|
|
439
|
+
rows = db.run_read(
|
|
440
|
+
GET_TABLE_DIRECT_UPSTREAMS_QUERY,
|
|
441
|
+
{"table_qualified": table, "table_qualified2": table},
|
|
442
|
+
)
|
|
368
443
|
for row in rows:
|
|
369
444
|
src = row["upstream_table"]
|
|
370
445
|
if src in table_set and src != table and table not in successors[src]:
|
|
@@ -462,19 +537,21 @@ def index_repo(repo_path: str, dialect: str = "ansi") -> dict:
|
|
|
462
537
|
success = True
|
|
463
538
|
|
|
464
539
|
try:
|
|
465
|
-
db = _get_backend()
|
|
466
|
-
indexer = Indexer()
|
|
467
540
|
path = Path(repo_path).resolve()
|
|
468
541
|
if not path.exists():
|
|
469
542
|
raise ValueError(f"Repository path does not exist: {repo_path}")
|
|
470
543
|
if not path.is_dir():
|
|
471
544
|
raise ValueError(f"Repository path is not a directory: {repo_path}")
|
|
472
545
|
|
|
546
|
+
# DuckDB: single R/W handle for the process lifetime — use directly.
|
|
547
|
+
rw_db = _get_backend()
|
|
548
|
+
|
|
549
|
+
indexer = Indexer()
|
|
473
550
|
# Ensure the Repo node exists for this repository
|
|
474
551
|
from sqlcg.core.schema import NodeLabel, RelType
|
|
475
552
|
|
|
476
553
|
abs_path = str(path)
|
|
477
|
-
|
|
554
|
+
rw_db.upsert_node(
|
|
478
555
|
NodeLabel.REPO,
|
|
479
556
|
abs_path,
|
|
480
557
|
{
|
|
@@ -484,14 +561,14 @@ def index_repo(repo_path: str, dialect: str = "ansi") -> dict:
|
|
|
484
561
|
)
|
|
485
562
|
|
|
486
563
|
# Index the repository (with absolute path)
|
|
487
|
-
result = indexer.index_repo(path, dialect,
|
|
564
|
+
result = indexer.index_repo(path, dialect, rw_db)
|
|
488
565
|
|
|
489
566
|
# Create BELONGS_TO relationships from File nodes to Repo node
|
|
490
567
|
# Query for all File nodes in this repo and link them to the Repo
|
|
491
568
|
repo_prefix = abs_path.rstrip("/") + "/"
|
|
492
|
-
file_rows =
|
|
569
|
+
file_rows = rw_db.run_read(INDEX_REPO_FILES_QUERY, {"repo_prefix": repo_prefix})
|
|
493
570
|
for row in file_rows:
|
|
494
|
-
|
|
571
|
+
rw_db.upsert_edge(
|
|
495
572
|
NodeLabel.FILE,
|
|
496
573
|
row["path"],
|
|
497
574
|
NodeLabel.REPO,
|
|
@@ -499,7 +576,6 @@ def index_repo(repo_path: str, dialect: str = "ansi") -> dict:
|
|
|
499
576
|
RelType.BELONGS_TO,
|
|
500
577
|
{},
|
|
501
578
|
)
|
|
502
|
-
|
|
503
579
|
logger.info(f"Indexed {result['files_parsed']} files with {result['tables_found']} tables")
|
|
504
580
|
|
|
505
581
|
# Record metrics
|
|
@@ -831,7 +907,10 @@ def get_change_scope(table_qualified: str) -> ChangeScopeResult:
|
|
|
831
907
|
def_rows = db.run_read(GET_TABLE_DEFINING_FILES_QUERY, {"table_qualified": target})
|
|
832
908
|
defining_files = _dedup_preserve_order([r["file_path"] for r in def_rows])
|
|
833
909
|
|
|
834
|
-
up_rows = db.run_read(
|
|
910
|
+
up_rows = db.run_read(
|
|
911
|
+
GET_TABLE_DIRECT_UPSTREAMS_QUERY,
|
|
912
|
+
{"table_qualified": target, "table_qualified2": target},
|
|
913
|
+
)
|
|
835
914
|
upstream_raw = _dedup_preserve_order(
|
|
836
915
|
[r["upstream_table"] for r in up_rows if r["upstream_table"]]
|
|
837
916
|
)
|
|
@@ -1464,14 +1543,15 @@ def db_info() -> DbInfoResult:
|
|
|
1464
1543
|
|
|
1465
1544
|
node_counts: dict[str, int] = {}
|
|
1466
1545
|
for label in NodeLabel:
|
|
1467
|
-
result = db.run_read(f
|
|
1546
|
+
result = db.run_read(f'SELECT count(*) AS count FROM "{label}"', {})
|
|
1468
1547
|
node_counts[str(label)] = result[0]["count"] if result else 0
|
|
1469
1548
|
|
|
1470
|
-
edges_result = db.run_read(
|
|
1549
|
+
edges_result = db.run_read('SELECT count(*) AS count FROM "COLUMN_LINEAGE"', {})
|
|
1471
1550
|
column_lineage_edges = edges_result[0]["count"] if edges_result else 0
|
|
1472
1551
|
|
|
1473
1552
|
mode_rows = db.run_read(
|
|
1474
|
-
|
|
1553
|
+
'SELECT parsing_mode AS mode, count(*) AS cnt FROM "SqlQuery" '
|
|
1554
|
+
"GROUP BY parsing_mode ORDER BY cnt DESC",
|
|
1475
1555
|
{},
|
|
1476
1556
|
)
|
|
1477
1557
|
parse_quality: dict[str, int] = {}
|
|
@@ -1503,7 +1583,7 @@ def db_info() -> DbInfoResult:
|
|
|
1503
1583
|
_freshness_kwargs: dict = {}
|
|
1504
1584
|
try:
|
|
1505
1585
|
_indexed_sha = db.get_indexed_sha()
|
|
1506
|
-
_repo_rows = db.run_read(
|
|
1586
|
+
_repo_rows = db.run_read('SELECT path FROM "Repo" LIMIT 1', {})
|
|
1507
1587
|
if _repo_rows and _indexed_sha is not None and _repo_rows[0].get("path"):
|
|
1508
1588
|
_root = Path(_repo_rows[0]["path"])
|
|
1509
1589
|
_f = compute_freshness(_root, _indexed_sha)
|
|
@@ -1533,11 +1613,11 @@ def db_info() -> DbInfoResult:
|
|
|
1533
1613
|
|
|
1534
1614
|
|
|
1535
1615
|
@mcp.tool()
|
|
1536
|
-
@_timed_tool("
|
|
1537
|
-
def
|
|
1538
|
-
"""Execute a read-only
|
|
1616
|
+
@_timed_tool("execute_sql")
|
|
1617
|
+
def execute_sql(query: str) -> list[dict]:
|
|
1618
|
+
"""Execute a read-only SQL query against the graph (DuckDB).
|
|
1539
1619
|
|
|
1540
|
-
This tool allows direct
|
|
1620
|
+
This tool allows direct SQL queries for advanced users. It enforces
|
|
1541
1621
|
read-only mode by stripping quoted literals and checking for write
|
|
1542
1622
|
operation keywords. A LIMIT clause is automatically appended if missing.
|
|
1543
1623
|
|
|
@@ -1548,31 +1628,29 @@ def execute_cypher(query: str) -> list[dict]:
|
|
|
1548
1628
|
that contains such keywords.
|
|
1549
1629
|
|
|
1550
1630
|
Args:
|
|
1551
|
-
query:
|
|
1631
|
+
query: DuckDB SQL query string (read-only SELECT only)
|
|
1552
1632
|
|
|
1553
1633
|
Returns:
|
|
1554
1634
|
List of result dictionaries from the query
|
|
1555
1635
|
|
|
1556
1636
|
Raises:
|
|
1557
|
-
ValueError: If the query contains write operations (
|
|
1558
|
-
DELETE,
|
|
1637
|
+
ValueError: If the query contains write operations (INSERT, UPDATE,
|
|
1638
|
+
DELETE, CREATE, DROP, TRUNCATE, MERGE)
|
|
1559
1639
|
"""
|
|
1560
1640
|
db = _get_backend()
|
|
1561
1641
|
|
|
1562
1642
|
# Strip quoted string literals before blocklist check
|
|
1563
|
-
# This prevents mutation commands hiding inside strings from triggering the blocker
|
|
1564
|
-
# Handle escaped quotes: '' in single quotes, "" in double quotes
|
|
1565
1643
|
stripped = re.sub(r"'(?:''|[^'])*'", "", query)
|
|
1566
1644
|
stripped = re.sub(r'"(?:""|[^"])*"', "", stripped)
|
|
1567
1645
|
|
|
1568
1646
|
# Check for write operations (case-insensitive)
|
|
1569
1647
|
if re.search(
|
|
1570
|
-
r"\b(
|
|
1648
|
+
r"\b(INSERT|UPDATE|DELETE|CREATE|MERGE|DROP|TRUNCATE)\b",
|
|
1571
1649
|
stripped,
|
|
1572
1650
|
re.IGNORECASE,
|
|
1573
1651
|
):
|
|
1574
1652
|
raise ValueError(
|
|
1575
|
-
"Write operations are not permitted via
|
|
1653
|
+
"Write operations are not permitted via execute_sql. "
|
|
1576
1654
|
"Use the CLI or dedicated tools instead."
|
|
1577
1655
|
)
|
|
1578
1656
|
|
|
@@ -1580,13 +1658,13 @@ def execute_cypher(query: str) -> list[dict]:
|
|
|
1580
1658
|
q = query.rstrip()
|
|
1581
1659
|
if q.endswith(";"):
|
|
1582
1660
|
q = q[:-1].rstrip()
|
|
1583
|
-
if "limit" not in stripped.lower():
|
|
1661
|
+
if "limit" not in stripped.lower():
|
|
1584
1662
|
q = q + " LIMIT 500"
|
|
1585
1663
|
|
|
1586
1664
|
try:
|
|
1587
1665
|
return db.run_read(q, {})
|
|
1588
1666
|
except Exception as e:
|
|
1589
|
-
logger.error(f"
|
|
1667
|
+
logger.error(f"SQL execution failed: {e}")
|
|
1590
1668
|
raise
|
|
1591
1669
|
|
|
1592
1670
|
|
|
@@ -1677,7 +1755,7 @@ def analyze_unused() -> UnusedTablesResult:
|
|
|
1677
1755
|
|
|
1678
1756
|
# Single aggregation — no Python per-row graph traversal.
|
|
1679
1757
|
unused_rows = db.run_read(ANALYZE_UNUSED_TABLES_QUERY, {})
|
|
1680
|
-
total_rows = db.run_read(
|
|
1758
|
+
total_rows = db.run_read('SELECT count(*) AS n FROM "SqlTable"', {})
|
|
1681
1759
|
total_tables_scanned = total_rows[0]["n"] if total_rows else 0
|
|
1682
1760
|
|
|
1683
1761
|
prefixes = get_presentation_prefixes(root)
|