sql-code-graph 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sqlcg/cli/commands/db.py CHANGED
@@ -1,6 +1,5 @@
1
1
  """Database management commands."""
2
2
 
3
- import os
4
3
  import shutil
5
4
  from pathlib import Path
6
5
 
@@ -20,18 +19,8 @@ console = Console()
20
19
 
21
20
 
22
21
  @app.command("init")
23
- def db_init(
24
- buffer_pool_size: int = typer.Option(
25
- 0,
26
- "--buffer-pool-size",
27
- help="KuzuDB buffer pool size in MB (0 = default). "
28
- "Set to 256-512 on memory-constrained machines.",
29
- ),
30
- ) -> None:
22
+ def db_init() -> None:
31
23
  """Initialise the graph database (idempotent)."""
32
- if buffer_pool_size > 0:
33
- os.environ["SQLCG_BUFFER_POOL_MB"] = str(buffer_pool_size)
34
-
35
24
  db_path = get_db_path()
36
25
  db_path.parent.mkdir(parents=True, exist_ok=True)
37
26
  with get_backend() as backend:
@@ -49,38 +38,36 @@ def db_reset( # noqa: B008
49
38
 
50
39
  from sqlcg.server.control import sock_path
51
40
 
52
- # Step 3.4 (OD-3 / W2): refuse cleanly when a server is live — both the
53
- # full reset and the --repo partial reset open the RW backend directly and
54
- # would fight the server's lock. Guard runs BEFORE either destructive branch.
41
+ # Refuse cleanly when a server is live.
55
42
  sp = sock_path()
56
43
  if sp.exists():
57
44
  try:
58
45
  with _socket.socket(_socket.AF_UNIX, _socket.SOCK_STREAM) as s:
59
46
  s.settimeout(1)
60
47
  s.connect(str(sp))
61
- # Connection succeeded — a server is live.
62
48
  console.print(
63
49
  "[red]A server is running on this database; stop it first "
64
50
  "('sqlcg mcp stop') before resetting the database.[/red]"
65
51
  )
66
52
  raise typer.Exit(1)
67
53
  except (FileNotFoundError, ConnectionRefusedError, OSError):
68
- # No live server — fall through to destructive action.
69
54
  pass
70
55
 
71
56
  if repo:
72
- # Delete all nodes for this repo (use run_write for mutation)
57
+ # Delete all nodes for this repo: delete File nodes (cascades to all
58
+ # related nodes via delete_nodes_for_file) and the Repo node itself.
73
59
  with get_backend() as backend:
74
- backend.run_write(
75
- "MATCH (r:Repo {path: $p}) DETACH DELETE r",
76
- {"p": repo},
60
+ # Get all files for this repo
61
+ file_rows = backend.run_read(
62
+ 'SELECT path FROM "File" WHERE repo_path = ?',
63
+ {"repo_path": repo},
77
64
  )
65
+ for fr in file_rows:
66
+ backend.delete_nodes_for_file(fr["path"])
67
+ backend.run_write('DELETE FROM "Repo" WHERE path = ?', {"p": repo})
78
68
  console.print(f"[yellow]Reset repo[/yellow] {repo}")
79
69
  else:
80
- # Full reset — delete the DB. Kuzu may store it as a single file (current,
81
- # e.g. 0.11.x) or a directory (older versions); also drop the .wal sidecar.
82
- # shutil.rmtree silently no-ops on a regular file (NotADirectoryError +
83
- # ignore_errors), so dispatch on the actual filesystem type.
70
+ # Full reset — delete the DuckDB file (single file, not a directory).
84
71
  db_path = get_db_path()
85
72
  removed = False
86
73
  for target in (db_path, db_path.with_name(db_path.name + ".wal")):
@@ -99,56 +86,46 @@ def db_reset( # noqa: B008
99
86
  @app.command("info")
100
87
  def db_info() -> None:
101
88
  """Show database stats."""
102
- # db info is a read-only command. All Cypher reads route through the live
103
- # server (run_read_routed) to avoid "Database is locked" while the MCP server
104
- # holds the write lock. get_schema_version / get_indexed_sha are inlined as
105
- # run_read_routed calls using their known Cypher so they too route through the
106
- # socket when a server is live; this avoids a direct-open that would hit the lock.
89
+ # db info routes through the live server (run_read_routed) to avoid holding
90
+ # the DuckDB file lock when the MCP server is running.
107
91
 
108
92
  # Schema version
109
- schema_rows = run_read_routed("MATCH (v:SchemaVersion) RETURN v.version AS version LIMIT 1", {})
93
+ schema_rows = run_read_routed('SELECT version FROM "SchemaVersion" LIMIT 1', {})
110
94
  version = (schema_rows[0]["version"] if schema_rows else None) or "unknown"
111
95
  console.print(f"Schema version: {version}")
112
96
 
113
- # Freshness block — only shown when the DB has been indexed from a git repo
97
+ # Freshness block
114
98
  try:
115
- sha_rows = run_read_routed(
116
- "MATCH (v:SchemaVersion) RETURN v.indexed_sha AS sha LIMIT 1", {}
117
- )
99
+ sha_rows = run_read_routed('SELECT indexed_sha AS sha FROM "SchemaVersion" LIMIT 1', {})
118
100
  indexed_sha = sha_rows[0]["sha"] if sha_rows else None
119
- repo_rows = run_read_routed("MATCH (r:Repo) RETURN r.path AS path LIMIT 1", {})
101
+ repo_rows = run_read_routed('SELECT path FROM "Repo" LIMIT 1', {})
120
102
  if repo_rows and indexed_sha is not None and repo_rows[0].get("path"):
121
103
  repo_root = Path(repo_rows[0]["path"])
122
104
  f = compute_freshness(repo_root, indexed_sha)
123
105
  console.print(render_freshness_line(f))
124
- except NotImplementedError:
125
- # Neo4j backend raises NotImplementedError for get_indexed_sha — skip silently
126
- pass
127
106
  except Exception as e:
128
- # Any unexpected error in the freshness block must not crash db info
129
107
  logger.debug(f"Freshness check skipped: {e}")
130
108
 
131
- # Show node counts for all labels
109
+ # Node counts
132
110
  for label in NodeLabel:
133
111
  try:
134
- result = run_read_routed(f"MATCH (n:{label}) RETURN COUNT(*) AS count", {})
112
+ result = run_read_routed(f'SELECT count(*) AS count FROM "{label}"', {})
135
113
  count = result[0]["count"] if result else 0
136
114
  console.print(f" {label}: {count}")
137
115
  except Exception as e:
138
- # Log unexpected exceptions instead of silently skipping
139
116
  logger.error(f"Error getting count for {label}: {e}")
140
117
  console.print(f" [red]{label}: error[/red]")
141
118
 
142
- # Health check section
143
- repo_count_result = run_read_routed("MATCH (n:Repo) RETURN COUNT(n) AS count", {})
119
+ # Health check
120
+ repo_count_result = run_read_routed('SELECT count(*) AS count FROM "Repo"', {})
144
121
  repo_count = repo_count_result[0]["count"] if repo_count_result else 0
145
122
 
146
123
  if repo_count == 0:
147
- console.print( # noqa: E501
124
+ console.print(
148
125
  "[red]Database is empty. Run 'sqlcg db init' and 'sqlcg index <path>' first.[/red]"
149
126
  )
150
127
  else:
151
- query_count_result = run_read_routed("MATCH (n:SqlQuery) RETURN COUNT(n) AS count", {})
128
+ query_count_result = run_read_routed('SELECT count(*) AS count FROM "SqlQuery"', {})
152
129
  query_count = query_count_result[0]["count"] if query_count_result else 0
153
130
 
154
131
  if query_count == 0:
@@ -157,7 +134,7 @@ def db_info() -> None:
157
134
  "the graph.[/yellow]"
158
135
  )
159
136
  else:
160
- col_count_result = run_read_routed("MATCH (n:SqlColumn) RETURN COUNT(n) AS count", {})
137
+ col_count_result = run_read_routed('SELECT count(*) AS count FROM "SqlColumn"', {})
161
138
  col_count = col_count_result[0]["count"] if col_count_result else 0
162
139
 
163
140
  if col_count == 0:
@@ -167,12 +144,10 @@ def db_info() -> None:
167
144
  "will return empty results.[/yellow]"
168
145
  )
169
146
 
170
- # Print COLUMN_LINEAGE edges count
171
- edges_result = run_read_routed("MATCH ()-[r:COLUMN_LINEAGE]->() RETURN COUNT(r) AS count", {})
147
+ edges_result = run_read_routed('SELECT count(*) AS count FROM "COLUMN_LINEAGE"', {})
172
148
  edges_count = edges_result[0]["count"] if edges_result else 0
173
149
  console.print(f" COLUMN_LINEAGE edges: {edges_count}")
174
150
 
175
- # Print star resolution metrics (T-07)
176
151
  from sqlcg.core.queries import COUNT_STAR_EXPANSIONS_QUERY, COUNT_STAR_SOURCES_QUERY
177
152
 
178
153
  star_source_result = run_read_routed(COUNT_STAR_SOURCES_QUERY, {})
@@ -183,11 +158,11 @@ def db_info() -> None:
183
158
  star_expansion_count = star_expansion_result[0]["n"] if star_expansion_result else 0
184
159
  console.print(f" STAR_EXPANSION lineage edges: {star_expansion_count}")
185
160
 
186
- # Print parsing mode distribution
187
- mode_query = (
188
- "MATCH (q:SqlQuery) RETURN q.parsing_mode AS mode, COUNT(q) AS cnt ORDER BY cnt DESC"
161
+ mode_rows = run_read_routed(
162
+ 'SELECT parsing_mode AS mode, count(*) AS cnt FROM "SqlQuery"'
163
+ " GROUP BY parsing_mode ORDER BY cnt DESC",
164
+ {},
189
165
  )
190
- mode_rows = run_read_routed(mode_query, {})
191
166
  if mode_rows and "mode" in mode_rows[0]:
192
167
  console.print("\n Parsing mode distribution:")
193
168
  for row in mode_rows:
@@ -197,7 +172,7 @@ def db_info() -> None:
197
172
  @app.command("list-repos")
198
173
  def list_repos() -> None:
199
174
  """List all indexed repositories."""
200
- result = run_read_routed("MATCH (r:Repo) RETURN r.path AS path, r.name AS name", {})
175
+ result = run_read_routed('SELECT path, name FROM "Repo"', {})
201
176
 
202
177
  if not result:
203
178
  console.print("[yellow]No repositories indexed[/yellow]")
@@ -4,7 +4,6 @@ import typer
4
4
  from rich.console import Console
5
5
  from rich.table import Table
6
6
 
7
- from sqlcg.core.schema import NodeLabel
8
7
  from sqlcg.server.read_client import run_read_routed
9
8
 
10
9
  app = typer.Typer(help="Search the graph")
@@ -19,14 +18,13 @@ def find_table( # noqa: B008
19
18
  """Find a table by name."""
20
19
  name = name.lower() # graph keys are lowercased at index time (C2 normalization)
21
20
  results = run_read_routed(
22
- f"MATCH (t:{NodeLabel.TABLE}) WHERE t.qualified CONTAINS $name "
23
- "RETURN t.qualified AS qualified, t.kind AS kind LIMIT 50",
21
+ "SELECT qualified, kind FROM \"SqlTable\" WHERE qualified LIKE '%' || ? || '%' LIMIT 50",
24
22
  {"name": name},
25
23
  )
26
24
  if not raw:
27
25
  from sqlcg.server.noise_filter import NoiseFilter
28
26
 
29
- nf = NoiseFilter.from_config() # repo_root=None → falls back to Path.cwd()
27
+ nf = NoiseFilter.from_config()
30
28
  ids = [r["qualified"] for r in results]
31
29
  kept, _ = nf.filter_nodes(ids)
32
30
  kept_set = set(kept)
@@ -42,14 +40,13 @@ def find_column( # noqa: B008
42
40
  """Find a column by table.column reference."""
43
41
  ref = ref.lower() # graph keys are lowercased at index time (C2 normalization)
44
42
  results = run_read_routed(
45
- f"MATCH (c:{NodeLabel.COLUMN}) WHERE c.id CONTAINS $ref RETURN c.id AS id LIMIT 50",
43
+ "SELECT id FROM \"SqlColumn\" WHERE id LIKE '%' || ? || '%' LIMIT 50",
46
44
  {"ref": ref},
47
45
  )
48
46
  if not raw:
49
47
  from sqlcg.server.noise_filter import NoiseFilter
50
48
 
51
- nf = NoiseFilter.from_config() # repo_root=None → falls back to Path.cwd()
52
- # Filter on the schema.table portion of each column id (schema.table.column)
49
+ nf = NoiseFilter.from_config()
53
50
  results = [r for r in results if not nf.is_noise(r["id"].rsplit(".", 1)[0])]
54
51
  _print_table(results, ["id"])
55
52
 
@@ -60,8 +57,7 @@ def find_pattern( # noqa: B008
60
57
  ) -> None:
61
58
  """Find queries containing a SQL pattern."""
62
59
  results = run_read_routed(
63
- f"MATCH (q:{NodeLabel.QUERY}) WHERE q.sql CONTAINS $pattern "
64
- "RETURN q.id AS id, q.kind AS kind LIMIT 50",
60
+ "SELECT id, kind FROM \"SqlQuery\" WHERE sql LIKE '%' || ? || '%' LIMIT 50",
65
61
  {"pattern": pattern},
66
62
  )
67
63
  _print_table(results, ["id", "kind"])
@@ -112,13 +112,11 @@ def gain_cmd(
112
112
  """
113
113
  )
114
114
 
115
- # Section E: execute_cypher ratio
116
- cypher_query = "SELECT COUNT(*) as count FROM tool_calls WHERE tool_name = 'execute_cypher'"
117
- execute_cypher_count_result = metrics.execute_query(cypher_query)
118
- execute_cypher_count = (
119
- execute_cypher_count_result[0][0] if execute_cypher_count_result else 0
120
- )
121
- execute_cypher_ratio = execute_cypher_count / total_calls if total_calls > 0 else 0
115
+ # Section E: execute_sql ratio
116
+ sql_query = "SELECT COUNT(*) as count FROM tool_calls WHERE tool_name = 'execute_sql'"
117
+ execute_sql_count_result = metrics.execute_query(sql_query)
118
+ execute_sql_count = execute_sql_count_result[0][0] if execute_sql_count_result else 0
119
+ execute_sql_ratio = execute_sql_count / total_calls if total_calls > 0 else 0
122
120
 
123
121
  # Section F: parse quality from graph.
124
122
  # run_read_routed raises typer.Exit (Exception-derived, NOT SystemExit) on
@@ -127,8 +125,8 @@ def gain_cmd(
127
125
  parse_quality: dict[str, int] | None = None
128
126
  try:
129
127
  mode_rows = run_read_routed(
130
- "MATCH (q:SqlQuery) RETURN q.parsing_mode AS mode,"
131
- " COUNT(q) AS cnt ORDER BY cnt DESC",
128
+ 'SELECT parsing_mode AS mode, count(*) AS cnt FROM "SqlQuery"'
129
+ " GROUP BY parsing_mode ORDER BY cnt DESC",
132
130
  {},
133
131
  )
134
132
  if mode_rows and "mode" in mode_rows[0]:
@@ -144,7 +142,7 @@ def gain_cmd(
144
142
  "feedback_tp": tp_count,
145
143
  "feedback_total": fb_total,
146
144
  "top_tools": [{"name": row[0], "count": row[1]} for row in top_tools],
147
- "execute_cypher_ratio": round(execute_cypher_ratio, 2),
145
+ "execute_sql_ratio": round(execute_sql_ratio, 2),
148
146
  }
149
147
  if parse_quality is not None:
150
148
  payload["parse_quality"] = parse_quality
@@ -191,14 +189,14 @@ def gain_cmd(
191
189
  console.print(f" {i}. {name}: {count}")
192
190
  console.print()
193
191
 
194
- # Section E: execute_cypher ratio
195
- console.print("[bold cyan]E. Raw Cypher Usage[/bold cyan]")
196
- ratio_pct = execute_cypher_ratio * 100
197
- if execute_cypher_ratio > 0.3:
198
- msg = f" [yellow]execute_cypher: {ratio_pct:.1f}% (high raw-Cypher usage)[/yellow]"
192
+ # Section E: execute_sql ratio
193
+ console.print("[bold cyan]E. Raw SQL Usage[/bold cyan]")
194
+ ratio_pct = execute_sql_ratio * 100
195
+ if execute_sql_ratio > 0.3:
196
+ msg = f" [yellow]execute_sql: {ratio_pct:.1f}% (high raw-SQL usage)[/yellow]"
199
197
  console.print(msg)
200
198
  else:
201
- console.print(f" execute_cypher: {ratio_pct:.1f}%")
199
+ console.print(f" execute_sql: {ratio_pct:.1f}%")
202
200
  console.print()
203
201
 
204
202
  # Section F: parse quality from graph
@@ -1,7 +1,6 @@
1
1
  """Index command for scanning and indexing SQL files."""
2
2
 
3
3
  import json
4
- import os
5
4
  import socket as _socket
6
5
  from pathlib import Path
7
6
 
@@ -16,7 +15,7 @@ from rich.progress import (
16
15
  TimeRemainingColumn,
17
16
  )
18
17
 
19
- from sqlcg.core.config import KuzuConfig, config_file_present, get_backend, get_db_path, get_dialect
18
+ from sqlcg.core.config import DbConfig, config_file_present, get_backend, get_db_path, get_dialect
20
19
  from sqlcg.indexer.indexer import Indexer
21
20
 
22
21
  console = Console()
@@ -37,17 +36,11 @@ def index_cmd( # noqa: B008
37
36
  timeout_per_file: int = typer.Option( # noqa: B008
38
37
  10, "--timeout-per-file", help="Timeout per file in seconds"
39
38
  ),
40
- buffer_pool_size: int = typer.Option( # noqa: B008
41
- 0,
42
- "--buffer-pool-size",
43
- help="KuzuDB buffer pool size in MB (0 = default). "
44
- "Set to 256-512 on memory-constrained machines.",
45
- ),
46
39
  batch_size: int = typer.Option( # noqa: B008
47
40
  50,
48
41
  "--batch-size",
49
42
  help=(
50
- "Files per KuzuDB transaction in the upsert pass. "
43
+ "Files per DuckDB transaction in the upsert pass. "
51
44
  "Default 50 balances commit-overhead reduction (vs. legacy per-file commits) "
52
45
  "against per-batch memory cost. Lower values are safer for memory-constrained "
53
46
  "machines; higher values give marginal speedup at the cost of larger working sets. "
@@ -148,16 +141,12 @@ def index_cmd( # noqa: B008
148
141
  sqlcg_log.addHandler(_warn_handler)
149
142
  _warn_log_path = None
150
143
  else:
151
- _warn_log_path = KuzuConfig.from_env().log_path
144
+ _warn_log_path = DbConfig.from_env().log_path
152
145
  _warn_log_path.parent.mkdir(parents=True, exist_ok=True)
153
146
  _warn_handler = logging.FileHandler(_warn_log_path)
154
147
  _warn_handler.setLevel(logging.WARNING)
155
148
  sqlcg_log.addHandler(_warn_handler)
156
149
 
157
- # Set buffer pool size via env var if specified
158
- if buffer_pool_size > 0:
159
- os.environ["SQLCG_BUFFER_POOL_MB"] = str(buffer_pool_size)
160
-
161
150
  if not quiet and not config_file_present(path):
162
151
  console.print(
163
152
  f"[yellow]No .sqlcg.toml found at {path}/.sqlcg.toml — "
@@ -181,7 +170,7 @@ def index_cmd( # noqa: B008
181
170
  )
182
171
  except KeyboardInterrupt:
183
172
  # The backend context manager (inside _run_index) has already closed the
184
- # KuzuDB connection and released the lock by the time we get here.
173
+ # DuckDB connection and released the lock by the time we get here.
185
174
  console.print("\n[yellow]Interrupted — no partial graph written. Re-run to index.[/yellow]")
186
175
  raise typer.Exit(130) from None
187
176
  finally:
@@ -400,10 +389,10 @@ def _run_index(
400
389
  )
401
390
 
402
391
  # Connect files to repo
392
+ from sqlcg.core.queries import INDEX_REPO_FILES_QUERY
403
393
  from sqlcg.core.schema import RelType
404
394
 
405
- files_query = "MATCH (f:File) WHERE f.path STARTS WITH $repo_prefix RETURN f.path AS path"
406
- file_rows = backend.run_read(files_query, {"repo_prefix": abs_path})
395
+ file_rows = backend.run_read(INDEX_REPO_FILES_QUERY, {"repo_prefix": abs_path})
407
396
  for row in file_rows:
408
397
  backend.upsert_edge(
409
398
  NodeLabel.FILE,
@@ -21,7 +21,7 @@ console = Console()
21
21
  # Client-side socket timeout for the --notify control-socket path.
22
22
  # A real DWH server-side resync_changed measured ~89 s (41 changed files + closure);
23
23
  # 300 s covers that with headroom while keeping the wait bounded on a wedged server.
24
- # This is a CLI transport bound, NOT a KuzuConfig/indexer constant.
24
+ # This is a CLI transport bound, NOT a DbConfig/indexer constant.
25
25
  _NOTIFY_SOCKET_TIMEOUT_S = 300
26
26
 
27
27
 
@@ -26,7 +26,7 @@ def uninstall_cmd( # noqa: B008
26
26
  """Uninstall sqlcg from Claude Code and optionally clean up resources.
27
27
 
28
28
  Step 1: Remove MCP registration from ~/.claude/settings.json
29
- Step 2: Optionally delete the KùzuDB graph database
29
+ Step 2: Optionally delete the DuckDB graph database
30
30
  Step 3: Remove git hook sentinel block from .git/hooks/post-checkout
31
31
  Step 4: Remove sqlcg skill directory from ~/.claude/skills/sqlcg/ and
32
32
  <repo>/.claude/skills/sqlcg/
@@ -34,7 +34,7 @@ def uninstall_cmd( # noqa: B008
34
34
  # Step 1: Remove MCP entry from settings.json
35
35
  _step1_remove_mcp_entry()
36
36
 
37
- # Step 2: Offer to delete the KùzuDB (unless --keep-db flag is set)
37
+ # Step 2: Offer to delete the database (unless --keep-db flag is set)
38
38
  if not keep_db:
39
39
  _step2_delete_database(force)
40
40
  else:
@@ -84,7 +84,7 @@ def _step1_remove_mcp_entry() -> None:
84
84
 
85
85
 
86
86
  def _step2_delete_database(force: bool) -> None:
87
- """Offer to delete the KùzuDB graph database."""
87
+ """Offer to delete the DuckDB graph database."""
88
88
  db_path = _get_db_path()
89
89
 
90
90
  if not db_path:
@@ -93,13 +93,6 @@ def _step2_delete_database(force: bool) -> None:
93
93
 
94
94
  db_path_obj = Path(db_path)
95
95
 
96
- # Check if it's a kuzu backend (not Neo4j)
97
- # If db_path is a directory or ends with standard kuzu patterns, it's likely kuzu
98
- # For now, we'll assume anything in .sqlcg/kuzu is kuzu
99
- if not _is_kuzu_backend(db_path):
100
- console.print("[dim]Database is not KùzuDB — skipping deletion[/dim]")
101
- return
102
-
103
96
  if not db_path_obj.exists():
104
97
  console.print(f"[dim]Database not found at {db_path}[/dim]")
105
98
  return
@@ -117,9 +110,11 @@ def _step2_delete_database(force: bool) -> None:
117
110
  console.print("[dim]Keeping database[/dim]")
118
111
  return
119
112
 
120
- # Delete the database directory
113
+ # DuckDB is a single file (+ optional .wal sibling); delete both.
121
114
  try:
122
- shutil.rmtree(db_path_obj, ignore_errors=True)
115
+ for target in (db_path_obj, db_path_obj.with_name(db_path_obj.name + ".wal")):
116
+ if target.exists():
117
+ target.unlink()
123
118
  console.print(f"[green]Deleted graph database at {db_path}[/green]")
124
119
  except Exception as e:
125
120
  console.print(f"[yellow]Warning:[/yellow] Failed to delete database: {e}")
@@ -222,18 +217,12 @@ def _step3_remove_git_hook(repo_path: Path) -> None:
222
217
 
223
218
  def _get_db_path() -> str | None:
224
219
  """Get the configured database path from environment or default."""
225
- from sqlcg.core.config import KuzuConfig
220
+ from sqlcg.core.config import DbConfig
226
221
 
227
- db_path = str(KuzuConfig.from_env().db_path)
222
+ db_path = str(DbConfig.from_env().db_path)
228
223
  return db_path if Path(db_path).exists() else None
229
224
 
230
225
 
231
- def _is_kuzu_backend(db_path: str) -> bool:
232
- """Check if the database is a KùzuDB backend (not Neo4j)."""
233
- backend = os.getenv("SQLCG_BACKEND", "kuzu").lower()
234
- return backend in ("kuzu", "") # Default to kuzu if unset
235
-
236
-
237
226
  # Candidate skill directory locations to remove (global first, then project-relative)
238
227
  # Each entry is a callable(repo_path) -> Path resolving to the sqlcg skill dir.
239
228
  _SKILL_DIR_TARGETS = [
sqlcg/core/__init__.py CHANGED
@@ -2,7 +2,5 @@
2
2
 
3
3
  from sqlcg.core import schema
4
4
  from sqlcg.core.graph_db import GraphBackend
5
- from sqlcg.core.kuzu_backend import KuzuBackend
6
- from sqlcg.core.neo4j_backend import Neo4jBackend
7
5
 
8
- __all__ = ["GraphBackend", "KuzuBackend", "Neo4jBackend", "schema"]
6
+ __all__ = ["GraphBackend", "schema"]
sqlcg/core/config.py CHANGED
@@ -11,64 +11,37 @@ if TYPE_CHECKING:
11
11
  from sqlcg.core.graph_db import GraphBackend
12
12
 
13
13
 
14
- class KuzuConfig(BaseModel):
15
- """Configuration for KùzuDB backend."""
14
+ class DbConfig(BaseModel):
15
+ """Configuration for the DuckDB backend."""
16
16
 
17
17
  db_path: Path = Field(default_factory=lambda: Path.home() / ".sqlcg" / "graph.db")
18
- buffer_pool_size_mb: int = Field(
19
- default=0,
20
- description="KuzuDB buffer pool size in MB (0 = use KuzuDB default)",
21
- )
22
18
  log_path: Path = Field(
23
19
  default_factory=lambda: Path.home() / ".sqlcg" / "index.log",
24
20
  description="Path for parse-warning log file written during indexing",
25
21
  )
26
22
 
27
23
  @classmethod
28
- def from_env(cls) -> "KuzuConfig":
29
- """Load KùzuDB config from environment variables.
24
+ def from_env(cls) -> "DbConfig":
25
+ """Load database config from environment variables.
30
26
 
31
27
  Returns:
32
- KuzuConfig instance with environment-overridden values if present.
28
+ DbConfig instance with environment-overridden values if present.
33
29
  """
34
30
  env_path = os.getenv("SQLCG_DB_PATH")
35
- env_buf = os.getenv("SQLCG_BUFFER_POOL_MB")
36
31
  env_log = os.getenv("SQLCG_LOG_PATH")
37
32
  return cls(
38
33
  db_path=Path(env_path) if env_path else Path.home() / ".sqlcg" / "graph.db",
39
- buffer_pool_size_mb=int(env_buf) if env_buf else 0,
40
34
  log_path=Path(env_log) if env_log else Path.home() / ".sqlcg" / "index.log",
41
35
  )
42
36
 
43
37
 
44
- class Neo4jConfig(BaseModel):
45
- """Configuration for Neo4j backend."""
46
-
47
- uri: str = Field(default="bolt://localhost:7687")
48
- user: str = Field(default="neo4j")
49
- password: str = Field(default="password")
50
-
51
- @classmethod
52
- def from_env(cls) -> "Neo4jConfig":
53
- """Load Neo4j config from environment variables.
54
-
55
- Returns:
56
- Neo4jConfig instance with environment-overridden values if present.
57
- """
58
- return cls(
59
- uri=os.getenv("NEO4J_URI", "bolt://localhost:7687"),
60
- user=os.getenv("NEO4J_USER", "neo4j"),
61
- password=os.getenv("NEO4J_PASSWORD", "password"),
62
- )
63
-
64
-
65
38
  def get_db_path() -> Path:
66
39
  """Get the database path from environment or use default.
67
40
 
68
41
  Returns:
69
- Path to the KùzuDB database file
42
+ Path to the DuckDB database file
70
43
  """
71
- return KuzuConfig.from_env().db_path
44
+ return DbConfig.from_env().db_path
72
45
 
73
46
 
74
47
  def config_file_present(path: Path) -> bool:
@@ -347,58 +320,29 @@ def get_external_consumers(path: Path) -> list[ExternalConsumerSpec]:
347
320
 
348
321
 
349
322
  def get_backend(read_only: bool = False) -> "GraphBackend":
350
- """Get a graph backend instance respecting the SQLCG_BACKEND env var.
323
+ """Get a DuckDBBackend instance.
324
+
325
+ The ``read_only`` parameter is accepted for API compatibility but is
326
+ ignored — DuckDB uses a single R/W handle for the process lifetime.
327
+ Concurrent read safety is provided by DuckDB's MVCC (readers see a
328
+ consistent snapshot during an in-flight write transaction).
329
+
330
+ Cross-process access: whichever process opens the DuckDB file first holds
331
+ an exclusive lock; other processes cannot open it at all (even read-only).
332
+ CLI read commands therefore route through the live MCP server via
333
+ ``read_client.run_read_routed`` (v1.2.0) when a server is live, and open
334
+ the file directly only when no server is running.
351
335
 
352
336
  Args:
353
- read_only: Open the database in read-only mode. For KuzuBackend this
354
- enables multiple concurrent read-only opens (reader/reader
355
- concurrency), but does NOT allow reads while a read-write writer
356
- holds the exclusive process lock — that requires routing through the
357
- live MCP server via ``read_client.run_read_routed`` (v1.2.0).
358
- Ignored for Neo4jBackend (Neo4j has no single-writer process lock;
359
- the flag is a no-op and the normal connection is opened).
360
- All writer call sites (index, reindex, db init/reset, server
361
- init_backend) use the default ``False``.
337
+ read_only: Ignored for DuckDB. Accepted for API compatibility.
362
338
 
363
339
  Returns:
364
- A GraphBackend instance (KuzuBackend by default, or Neo4jBackend)
340
+ A DuckDBBackend instance.
365
341
 
366
342
  Raises:
367
- ValueError: If backend type is not recognized
368
-
369
- Note:
370
- CLI read commands (find, analyze, db info, gain) route through a live
371
- MCP server via ``read_client.run_read_routed`` (v1.2.0) when a server
372
- is live, falling back to ``get_backend(read_only=True)`` when no server
373
- is present. The fallback path still contends for the process lock under
374
- an active writer (Windows / no-server fallback only).
343
+ duckdb.IOException: If the file is locked by another process.
375
344
  """
376
- backend_type = os.getenv("SQLCG_BACKEND", "kuzu")
377
-
378
- if backend_type == "kuzu":
379
- from sqlcg.core.kuzu_backend import KuzuBackend
345
+ from sqlcg.core.duckdb_backend import DuckDBBackend
380
346
 
381
- kuzu_cfg = KuzuConfig.from_env()
382
- try:
383
- return KuzuBackend(
384
- str(kuzu_cfg.db_path),
385
- buffer_pool_size_mb=kuzu_cfg.buffer_pool_size_mb,
386
- read_only=read_only,
387
- )
388
- except RuntimeError as exc:
389
- if read_only and "READ ONLY" in str(exc):
390
- # KùzuDB refuses to open a non-existent or empty DB in read-only
391
- # mode ("Cannot create an empty database under READ ONLY mode").
392
- # Surface the same empty-DB guidance the user sees from `db info`.
393
- raise RuntimeError(
394
- "Database not initialised — run 'sqlcg db init' and 'sqlcg index <path>' first."
395
- ) from exc
396
- raise
397
- elif backend_type == "neo4j":
398
- from sqlcg.core.neo4j_backend import Neo4jBackend
399
-
400
- neo4j_cfg = Neo4jConfig.from_env()
401
- # read_only is ignored for Neo4j — no single-writer process lock.
402
- return Neo4jBackend(neo4j_cfg.uri, neo4j_cfg.user, neo4j_cfg.password)
403
- else:
404
- raise ValueError(f"Unknown backend type: {backend_type}")
347
+ cfg = DbConfig.from_env()
348
+ return DuckDBBackend(str(cfg.db_path))