sql-code-graph 1.2.2__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sqlcg/cli/commands/db.py CHANGED
@@ -1,6 +1,5 @@
1
1
  """Database management commands."""
2
2
 
3
- import os
4
3
  import shutil
5
4
  from pathlib import Path
6
5
 
@@ -20,18 +19,8 @@ console = Console()
20
19
 
21
20
 
22
21
  @app.command("init")
23
- def db_init(
24
- buffer_pool_size: int = typer.Option(
25
- 0,
26
- "--buffer-pool-size",
27
- help="KuzuDB buffer pool size in MB (0 = default). "
28
- "Set to 256-512 on memory-constrained machines.",
29
- ),
30
- ) -> None:
22
+ def db_init() -> None:
31
23
  """Initialise the graph database (idempotent)."""
32
- if buffer_pool_size > 0:
33
- os.environ["SQLCG_BUFFER_POOL_MB"] = str(buffer_pool_size)
34
-
35
24
  db_path = get_db_path()
36
25
  db_path.parent.mkdir(parents=True, exist_ok=True)
37
26
  with get_backend() as backend:
@@ -45,19 +34,40 @@ def db_reset( # noqa: B008
45
34
  repo: str | None = typer.Option(None, "--repo", help="Reset only this repo path"), # noqa: B008
46
35
  ) -> None:
47
36
  """Wipe the database or a single repo's subgraph."""
37
+ import socket as _socket
38
+
39
+ from sqlcg.server.control import sock_path
40
+
41
+ # Refuse cleanly when a server is live.
42
+ sp = sock_path()
43
+ if sp.exists():
44
+ try:
45
+ with _socket.socket(_socket.AF_UNIX, _socket.SOCK_STREAM) as s:
46
+ s.settimeout(1)
47
+ s.connect(str(sp))
48
+ console.print(
49
+ "[red]A server is running on this database; stop it first "
50
+ "('sqlcg mcp stop') before resetting the database.[/red]"
51
+ )
52
+ raise typer.Exit(1)
53
+ except (FileNotFoundError, ConnectionRefusedError, OSError):
54
+ pass
55
+
48
56
  if repo:
49
- # Delete all nodes for this repo (use run_write for mutation)
57
+ # Delete all nodes for this repo: delete File nodes (cascades to all
58
+ # related nodes via delete_nodes_for_file) and the Repo node itself.
50
59
  with get_backend() as backend:
51
- backend.run_write(
52
- "MATCH (r:Repo {path: $p}) DETACH DELETE r",
53
- {"p": repo},
60
+ # Get all files for this repo
61
+ file_rows = backend.run_read(
62
+ 'SELECT path FROM "File" WHERE repo_path = ?',
63
+ {"repo_path": repo},
54
64
  )
65
+ for fr in file_rows:
66
+ backend.delete_nodes_for_file(fr["path"])
67
+ backend.run_write('DELETE FROM "Repo" WHERE path = ?', {"p": repo})
55
68
  console.print(f"[yellow]Reset repo[/yellow] {repo}")
56
69
  else:
57
- # Full reset — delete the DB. Kuzu may store it as a single file (current,
58
- # e.g. 0.11.x) or a directory (older versions); also drop the .wal sidecar.
59
- # shutil.rmtree silently no-ops on a regular file (NotADirectoryError +
60
- # ignore_errors), so dispatch on the actual filesystem type.
70
+ # Full reset — delete the DuckDB file (single file, not a directory).
61
71
  db_path = get_db_path()
62
72
  removed = False
63
73
  for target in (db_path, db_path.with_name(db_path.name + ".wal")):
@@ -76,56 +86,46 @@ def db_reset( # noqa: B008
76
86
  @app.command("info")
77
87
  def db_info() -> None:
78
88
  """Show database stats."""
79
- # db info is a read-only command. All Cypher reads route through the live
80
- # server (run_read_routed) to avoid "Database is locked" while the MCP server
81
- # holds the write lock. get_schema_version / get_indexed_sha are inlined as
82
- # run_read_routed calls using their known Cypher so they too route through the
83
- # socket when a server is live; this avoids a direct-open that would hit the lock.
89
+ # db info routes through the live server (run_read_routed) to avoid holding
90
+ # the DuckDB file lock when the MCP server is running.
84
91
 
85
92
  # Schema version
86
- schema_rows = run_read_routed("MATCH (v:SchemaVersion) RETURN v.version AS version LIMIT 1", {})
93
+ schema_rows = run_read_routed('SELECT version FROM "SchemaVersion" LIMIT 1', {})
87
94
  version = (schema_rows[0]["version"] if schema_rows else None) or "unknown"
88
95
  console.print(f"Schema version: {version}")
89
96
 
90
- # Freshness block — only shown when the DB has been indexed from a git repo
97
+ # Freshness block
91
98
  try:
92
- sha_rows = run_read_routed(
93
- "MATCH (v:SchemaVersion) RETURN v.indexed_sha AS sha LIMIT 1", {}
94
- )
99
+ sha_rows = run_read_routed('SELECT indexed_sha AS sha FROM "SchemaVersion" LIMIT 1', {})
95
100
  indexed_sha = sha_rows[0]["sha"] if sha_rows else None
96
- repo_rows = run_read_routed("MATCH (r:Repo) RETURN r.path AS path LIMIT 1", {})
101
+ repo_rows = run_read_routed('SELECT path FROM "Repo" LIMIT 1', {})
97
102
  if repo_rows and indexed_sha is not None and repo_rows[0].get("path"):
98
103
  repo_root = Path(repo_rows[0]["path"])
99
104
  f = compute_freshness(repo_root, indexed_sha)
100
105
  console.print(render_freshness_line(f))
101
- except NotImplementedError:
102
- # Neo4j backend raises NotImplementedError for get_indexed_sha — skip silently
103
- pass
104
106
  except Exception as e:
105
- # Any unexpected error in the freshness block must not crash db info
106
107
  logger.debug(f"Freshness check skipped: {e}")
107
108
 
108
- # Show node counts for all labels
109
+ # Node counts
109
110
  for label in NodeLabel:
110
111
  try:
111
- result = run_read_routed(f"MATCH (n:{label}) RETURN COUNT(*) AS count", {})
112
+ result = run_read_routed(f'SELECT count(*) AS count FROM "{label}"', {})
112
113
  count = result[0]["count"] if result else 0
113
114
  console.print(f" {label}: {count}")
114
115
  except Exception as e:
115
- # Log unexpected exceptions instead of silently skipping
116
116
  logger.error(f"Error getting count for {label}: {e}")
117
117
  console.print(f" [red]{label}: error[/red]")
118
118
 
119
- # Health check section
120
- repo_count_result = run_read_routed("MATCH (n:Repo) RETURN COUNT(n) AS count", {})
119
+ # Health check
120
+ repo_count_result = run_read_routed('SELECT count(*) AS count FROM "Repo"', {})
121
121
  repo_count = repo_count_result[0]["count"] if repo_count_result else 0
122
122
 
123
123
  if repo_count == 0:
124
- console.print( # noqa: E501
124
+ console.print(
125
125
  "[red]Database is empty. Run 'sqlcg db init' and 'sqlcg index <path>' first.[/red]"
126
126
  )
127
127
  else:
128
- query_count_result = run_read_routed("MATCH (n:SqlQuery) RETURN COUNT(n) AS count", {})
128
+ query_count_result = run_read_routed('SELECT count(*) AS count FROM "SqlQuery"', {})
129
129
  query_count = query_count_result[0]["count"] if query_count_result else 0
130
130
 
131
131
  if query_count == 0:
@@ -134,7 +134,7 @@ def db_info() -> None:
134
134
  "the graph.[/yellow]"
135
135
  )
136
136
  else:
137
- col_count_result = run_read_routed("MATCH (n:SqlColumn) RETURN COUNT(n) AS count", {})
137
+ col_count_result = run_read_routed('SELECT count(*) AS count FROM "SqlColumn"', {})
138
138
  col_count = col_count_result[0]["count"] if col_count_result else 0
139
139
 
140
140
  if col_count == 0:
@@ -144,12 +144,10 @@ def db_info() -> None:
144
144
  "will return empty results.[/yellow]"
145
145
  )
146
146
 
147
- # Print COLUMN_LINEAGE edges count
148
- edges_result = run_read_routed("MATCH ()-[r:COLUMN_LINEAGE]->() RETURN COUNT(r) AS count", {})
147
+ edges_result = run_read_routed('SELECT count(*) AS count FROM "COLUMN_LINEAGE"', {})
149
148
  edges_count = edges_result[0]["count"] if edges_result else 0
150
149
  console.print(f" COLUMN_LINEAGE edges: {edges_count}")
151
150
 
152
- # Print star resolution metrics (T-07)
153
151
  from sqlcg.core.queries import COUNT_STAR_EXPANSIONS_QUERY, COUNT_STAR_SOURCES_QUERY
154
152
 
155
153
  star_source_result = run_read_routed(COUNT_STAR_SOURCES_QUERY, {})
@@ -160,11 +158,11 @@ def db_info() -> None:
160
158
  star_expansion_count = star_expansion_result[0]["n"] if star_expansion_result else 0
161
159
  console.print(f" STAR_EXPANSION lineage edges: {star_expansion_count}")
162
160
 
163
- # Print parsing mode distribution
164
- mode_query = (
165
- "MATCH (q:SqlQuery) RETURN q.parsing_mode AS mode, COUNT(q) AS cnt ORDER BY cnt DESC"
161
+ mode_rows = run_read_routed(
162
+ 'SELECT parsing_mode AS mode, count(*) AS cnt FROM "SqlQuery"'
163
+ " GROUP BY parsing_mode ORDER BY cnt DESC",
164
+ {},
166
165
  )
167
- mode_rows = run_read_routed(mode_query, {})
168
166
  if mode_rows and "mode" in mode_rows[0]:
169
167
  console.print("\n Parsing mode distribution:")
170
168
  for row in mode_rows:
@@ -174,7 +172,7 @@ def db_info() -> None:
174
172
  @app.command("list-repos")
175
173
  def list_repos() -> None:
176
174
  """List all indexed repositories."""
177
- result = run_read_routed("MATCH (r:Repo) RETURN r.path AS path, r.name AS name", {})
175
+ result = run_read_routed('SELECT path, name FROM "Repo"', {})
178
176
 
179
177
  if not result:
180
178
  console.print("[yellow]No repositories indexed[/yellow]")
@@ -4,7 +4,6 @@ import typer
4
4
  from rich.console import Console
5
5
  from rich.table import Table
6
6
 
7
- from sqlcg.core.schema import NodeLabel
8
7
  from sqlcg.server.read_client import run_read_routed
9
8
 
10
9
  app = typer.Typer(help="Search the graph")
@@ -19,14 +18,13 @@ def find_table( # noqa: B008
19
18
  """Find a table by name."""
20
19
  name = name.lower() # graph keys are lowercased at index time (C2 normalization)
21
20
  results = run_read_routed(
22
- f"MATCH (t:{NodeLabel.TABLE}) WHERE t.qualified CONTAINS $name "
23
- "RETURN t.qualified AS qualified, t.kind AS kind LIMIT 50",
21
+ "SELECT qualified, kind FROM \"SqlTable\" WHERE qualified LIKE '%' || ? || '%' LIMIT 50",
24
22
  {"name": name},
25
23
  )
26
24
  if not raw:
27
25
  from sqlcg.server.noise_filter import NoiseFilter
28
26
 
29
- nf = NoiseFilter.from_config() # repo_root=None → falls back to Path.cwd()
27
+ nf = NoiseFilter.from_config()
30
28
  ids = [r["qualified"] for r in results]
31
29
  kept, _ = nf.filter_nodes(ids)
32
30
  kept_set = set(kept)
@@ -42,14 +40,13 @@ def find_column( # noqa: B008
42
40
  """Find a column by table.column reference."""
43
41
  ref = ref.lower() # graph keys are lowercased at index time (C2 normalization)
44
42
  results = run_read_routed(
45
- f"MATCH (c:{NodeLabel.COLUMN}) WHERE c.id CONTAINS $ref RETURN c.id AS id LIMIT 50",
43
+ "SELECT id FROM \"SqlColumn\" WHERE id LIKE '%' || ? || '%' LIMIT 50",
46
44
  {"ref": ref},
47
45
  )
48
46
  if not raw:
49
47
  from sqlcg.server.noise_filter import NoiseFilter
50
48
 
51
- nf = NoiseFilter.from_config() # repo_root=None → falls back to Path.cwd()
52
- # Filter on the schema.table portion of each column id (schema.table.column)
49
+ nf = NoiseFilter.from_config()
53
50
  results = [r for r in results if not nf.is_noise(r["id"].rsplit(".", 1)[0])]
54
51
  _print_table(results, ["id"])
55
52
 
@@ -60,8 +57,7 @@ def find_pattern( # noqa: B008
60
57
  ) -> None:
61
58
  """Find queries containing a SQL pattern."""
62
59
  results = run_read_routed(
63
- f"MATCH (q:{NodeLabel.QUERY}) WHERE q.sql CONTAINS $pattern "
64
- "RETURN q.id AS id, q.kind AS kind LIMIT 50",
60
+ "SELECT id, kind FROM \"SqlQuery\" WHERE sql LIKE '%' || ? || '%' LIMIT 50",
65
61
  {"pattern": pattern},
66
62
  )
67
63
  _print_table(results, ["id", "kind"])
@@ -112,13 +112,11 @@ def gain_cmd(
112
112
  """
113
113
  )
114
114
 
115
- # Section E: execute_cypher ratio
116
- cypher_query = "SELECT COUNT(*) as count FROM tool_calls WHERE tool_name = 'execute_cypher'"
117
- execute_cypher_count_result = metrics.execute_query(cypher_query)
118
- execute_cypher_count = (
119
- execute_cypher_count_result[0][0] if execute_cypher_count_result else 0
120
- )
121
- execute_cypher_ratio = execute_cypher_count / total_calls if total_calls > 0 else 0
115
+ # Section E: execute_sql ratio
116
+ sql_query = "SELECT COUNT(*) as count FROM tool_calls WHERE tool_name = 'execute_sql'"
117
+ execute_sql_count_result = metrics.execute_query(sql_query)
118
+ execute_sql_count = execute_sql_count_result[0][0] if execute_sql_count_result else 0
119
+ execute_sql_ratio = execute_sql_count / total_calls if total_calls > 0 else 0
122
120
 
123
121
  # Section F: parse quality from graph.
124
122
  # run_read_routed raises typer.Exit (Exception-derived, NOT SystemExit) on
@@ -127,8 +125,8 @@ def gain_cmd(
127
125
  parse_quality: dict[str, int] | None = None
128
126
  try:
129
127
  mode_rows = run_read_routed(
130
- "MATCH (q:SqlQuery) RETURN q.parsing_mode AS mode,"
131
- " COUNT(q) AS cnt ORDER BY cnt DESC",
128
+ 'SELECT parsing_mode AS mode, count(*) AS cnt FROM "SqlQuery"'
129
+ " GROUP BY parsing_mode ORDER BY cnt DESC",
132
130
  {},
133
131
  )
134
132
  if mode_rows and "mode" in mode_rows[0]:
@@ -144,7 +142,7 @@ def gain_cmd(
144
142
  "feedback_tp": tp_count,
145
143
  "feedback_total": fb_total,
146
144
  "top_tools": [{"name": row[0], "count": row[1]} for row in top_tools],
147
- "execute_cypher_ratio": round(execute_cypher_ratio, 2),
145
+ "execute_sql_ratio": round(execute_sql_ratio, 2),
148
146
  }
149
147
  if parse_quality is not None:
150
148
  payload["parse_quality"] = parse_quality
@@ -191,14 +189,14 @@ def gain_cmd(
191
189
  console.print(f" {i}. {name}: {count}")
192
190
  console.print()
193
191
 
194
- # Section E: execute_cypher ratio
195
- console.print("[bold cyan]E. Raw Cypher Usage[/bold cyan]")
196
- ratio_pct = execute_cypher_ratio * 100
197
- if execute_cypher_ratio > 0.3:
198
- msg = f" [yellow]execute_cypher: {ratio_pct:.1f}% (high raw-Cypher usage)[/yellow]"
192
+ # Section E: execute_sql ratio
193
+ console.print("[bold cyan]E. Raw SQL Usage[/bold cyan]")
194
+ ratio_pct = execute_sql_ratio * 100
195
+ if execute_sql_ratio > 0.3:
196
+ msg = f" [yellow]execute_sql: {ratio_pct:.1f}% (high raw-SQL usage)[/yellow]"
199
197
  console.print(msg)
200
198
  else:
201
- console.print(f" execute_cypher: {ratio_pct:.1f}%")
199
+ console.print(f" execute_sql: {ratio_pct:.1f}%")
202
200
  console.print()
203
201
 
204
202
  # Section F: parse quality from graph
sqlcg/cli/commands/git.py CHANGED
@@ -33,7 +33,7 @@ _HOOKS: list[_HookSpec] = [
33
33
  '[ "$3" = "1" ] || exit 0\n'
34
34
  '{sqlcg_bin} reindex --from "$1" --to "$2"'
35
35
  ' "$(git rev-parse --show-toplevel)" --dialect auto --quiet --notify'
36
- ' || echo "sqlcg: graph not updated (server busy/locked)'
36
+ ' || echo "sqlcg: graph not updated (reindex failed)'
37
37
  " -- run 'sqlcg mcp status'\" >&2\n"
38
38
  ),
39
39
  ),
@@ -50,10 +50,10 @@ PREV=$(git rev-parse --verify --quiet ORIG_HEAD)
50
50
  TOP=$(git rev-parse --show-toplevel)
51
51
  if [ -n "$PREV" ]; then
52
52
  {sqlcg_bin} reindex --from "$PREV" --to HEAD "$TOP" --dialect auto --quiet --notify \\
53
- || echo "sqlcg: graph not updated (server busy/locked) -- run 'sqlcg mcp status'" >&2
53
+ || echo "sqlcg: graph not updated (reindex failed) -- run 'sqlcg mcp status'" >&2
54
54
  else
55
55
  {sqlcg_bin} reindex "$TOP" --dialect auto --quiet --notify \\
56
- || echo "sqlcg: graph not updated (server busy/locked) -- run 'sqlcg mcp status'" >&2
56
+ || echo "sqlcg: graph not updated (reindex failed) -- run 'sqlcg mcp status'" >&2
57
57
  fi
58
58
  """,
59
59
  ),
@@ -101,7 +101,14 @@ def _install_single_hook(hooks_dir: Path, spec: _HookSpec, sqlcg_bin: str) -> No
101
101
  if hook_path.exists():
102
102
  existing_content = hook_path.read_text()
103
103
  if spec.sentinel in existing_content:
104
- # Already installed — idempotent, skip silently
104
+ if existing_content == script:
105
+ # Byte-identical current template — true idempotency, silent skip.
106
+ return
107
+ # Sentinel present but content differs: sqlcg-owned but stale hook.
108
+ # Overwrite with the current rendered template and report the upgrade.
109
+ hook_path.write_text(script)
110
+ hook_path.chmod(0o755)
111
+ console.print(f"[green]Upgraded git hook:[/green] .git/hooks/{spec.filename}")
105
112
  return
106
113
  else:
107
114
  # Foreign hook without sqlcg sentinel
@@ -1,6 +1,7 @@
1
1
  """Index command for scanning and indexing SQL files."""
2
2
 
3
- import os
3
+ import json
4
+ import socket as _socket
4
5
  from pathlib import Path
5
6
 
6
7
  import typer
@@ -14,11 +15,15 @@ from rich.progress import (
14
15
  TimeRemainingColumn,
15
16
  )
16
17
 
17
- from sqlcg.core.config import KuzuConfig, config_file_present, get_backend, get_db_path, get_dialect
18
+ from sqlcg.core.config import DbConfig, config_file_present, get_backend, get_db_path, get_dialect
18
19
  from sqlcg.indexer.indexer import Indexer
19
20
 
20
21
  console = Console()
21
22
 
23
+ # Socket timeout for the index-via-server path.
24
+ # Generous budget: full index of a large repo can take several minutes.
25
+ _INDEX_SOCKET_TIMEOUT_S = 600
26
+
22
27
 
23
28
  def index_cmd( # noqa: B008
24
29
  path: Path = typer.Argument(..., help="Directory to index"), # noqa: B008
@@ -31,17 +36,11 @@ def index_cmd( # noqa: B008
31
36
  timeout_per_file: int = typer.Option( # noqa: B008
32
37
  10, "--timeout-per-file", help="Timeout per file in seconds"
33
38
  ),
34
- buffer_pool_size: int = typer.Option( # noqa: B008
35
- 0,
36
- "--buffer-pool-size",
37
- help="KuzuDB buffer pool size in MB (0 = default). "
38
- "Set to 256-512 on memory-constrained machines.",
39
- ),
40
39
  batch_size: int = typer.Option( # noqa: B008
41
40
  50,
42
41
  "--batch-size",
43
42
  help=(
44
- "Files per KuzuDB transaction in the upsert pass. "
43
+ "Files per DuckDB transaction in the upsert pass. "
45
44
  "Default 50 balances commit-overhead reduction (vs. legacy per-file commits) "
46
45
  "against per-batch memory cost. Lower values are safer for memory-constrained "
47
46
  "machines; higher values give marginal speedup at the cost of larger working sets. "
@@ -71,9 +70,24 @@ def index_cmd( # noqa: B008
71
70
  "Marks freshness as 'indexed with working-tree changes'."
72
71
  ),
73
72
  ),
73
+ detach: bool = typer.Option( # noqa: B008
74
+ False,
75
+ "--detach",
76
+ help=(
77
+ "When routing through a live server, return immediately after enqueueing "
78
+ "(fire-and-forget). Default is to wait for the index to complete."
79
+ ),
80
+ ),
74
81
  ) -> None:
75
82
  """Index SQL files in a directory.
76
83
 
84
+ When a server is live on this DB, the index is routed through the server's
85
+ control socket so the DB is never opened directly (avoids lock contention).
86
+ Use --detach to enqueue and return immediately (fire-and-forget).
87
+
88
+ With no server live, falls back to the direct-write path unchanged
89
+ (zero-config small-repo invariant).
90
+
77
91
  Schema aliases (staging schema → canonical schema) can be configured in
78
92
  .sqlcg.toml under sqlcg.schema_aliases, e.g. da_tmp = "da".
79
93
  """
@@ -85,6 +99,26 @@ def index_cmd( # noqa: B008
85
99
  logging.getLogger("sqlcg").setLevel(level)
86
100
  logging.getLogger("sqlglot").setLevel(level)
87
101
 
102
+ # Resolve path early so socket routing uses the absolute path.
103
+ path = path.resolve()
104
+
105
+ # Resolve dialect before routing so the WriterRequest always carries a concrete
106
+ # dialect (never the literal sentinel "auto"). Bug A: the route call was before
107
+ # this resolution, causing the server to receive "auto" and fail with
108
+ # "Unknown dialect 'auto'" on every server-routed index.
109
+ if dialect == "auto":
110
+ dialect = get_dialect(path)
111
+
112
+ # Step 3.2 — probe for a live server and route through the socket if present.
113
+ _routed = _try_route_index_via_server(
114
+ path=path,
115
+ dialect=dialect,
116
+ wait=not detach,
117
+ quiet=quiet,
118
+ )
119
+ if _routed:
120
+ return
121
+
88
122
  # Route parse warnings to stderr (--verbose) or to the configured log file.
89
123
  sqlcg_log = logging.getLogger("sqlcg")
90
124
 
@@ -107,20 +141,12 @@ def index_cmd( # noqa: B008
107
141
  sqlcg_log.addHandler(_warn_handler)
108
142
  _warn_log_path = None
109
143
  else:
110
- _warn_log_path = KuzuConfig.from_env().log_path
144
+ _warn_log_path = DbConfig.from_env().log_path
111
145
  _warn_log_path.parent.mkdir(parents=True, exist_ok=True)
112
146
  _warn_handler = logging.FileHandler(_warn_log_path)
113
147
  _warn_handler.setLevel(logging.WARNING)
114
148
  sqlcg_log.addHandler(_warn_handler)
115
149
 
116
- # Set buffer pool size via env var if specified
117
- if buffer_pool_size > 0:
118
- os.environ["SQLCG_BUFFER_POOL_MB"] = str(buffer_pool_size)
119
-
120
- # Resolve dialect: 'auto' reads from .sqlcg.toml, otherwise use provided value
121
- if dialect == "auto":
122
- dialect = get_dialect(path)
123
-
124
150
  if not quiet and not config_file_present(path):
125
151
  console.print(
126
152
  f"[yellow]No .sqlcg.toml found at {path}/.sqlcg.toml — "
@@ -144,7 +170,7 @@ def index_cmd( # noqa: B008
144
170
  )
145
171
  except KeyboardInterrupt:
146
172
  # The backend context manager (inside _run_index) has already closed the
147
- # KuzuDB connection and released the lock by the time we get here.
173
+ # DuckDB connection and released the lock by the time we get here.
148
174
  console.print("\n[yellow]Interrupted — no partial graph written. Re-run to index.[/yellow]")
149
175
  raise typer.Exit(130) from None
150
176
  finally:
@@ -172,6 +198,132 @@ def index_cmd( # noqa: B008
172
198
  )
173
199
 
174
200
 
201
+ def _try_route_index_via_server(
202
+ *,
203
+ path: Path,
204
+ dialect: str | None,
205
+ wait: bool,
206
+ quiet: bool,
207
+ ) -> bool:
208
+ """Probe for a live server and route the index through the socket if found.
209
+
210
+ Returns True if the index was handled via the server (caller should return).
211
+ Returns False if no server is live (caller should fall through to direct path).
212
+ """
213
+ from sqlcg.server.control import sock_path
214
+
215
+ sp = sock_path()
216
+ if not sp.exists():
217
+ return False
218
+
219
+ payload = {
220
+ "op": "index",
221
+ "root": str(path),
222
+ "dialect": dialect,
223
+ "wait": wait,
224
+ "requested_by": "cli",
225
+ }
226
+ payload_bytes = json.dumps(payload).encode()
227
+ frame = f"{len(payload_bytes)}\n".encode() + payload_bytes
228
+
229
+ try:
230
+ with _socket.socket(_socket.AF_UNIX, _socket.SOCK_STREAM) as s:
231
+ s.settimeout(_INDEX_SOCKET_TIMEOUT_S)
232
+ s.connect(str(sp))
233
+ s.sendall(frame)
234
+
235
+ if not wait:
236
+ # Fire-and-forget: read one framed acknowledgement frame.
237
+ f = s.makefile("rb")
238
+ length_line = f.readline()
239
+ if length_line:
240
+ try:
241
+ body_len = int(length_line.strip())
242
+ resp_bytes = f.read(body_len)
243
+ resp = json.loads(resp_bytes)
244
+ if "error" in resp:
245
+ err = resp["error"]
246
+ if "SQLCG_DB_PATH" in err or "write lock" in err:
247
+ console.print(f"[red]{err}[/red]")
248
+ else:
249
+ console.print(f"[red]Server error: {err}[/red]")
250
+ raise typer.Exit(1)
251
+ if not quiet:
252
+ pos = resp.get("position", "?")
253
+ console.print(f"[green]Queued via server[/green] (position {pos})")
254
+ except (ValueError, json.JSONDecodeError):
255
+ pass
256
+ return True
257
+
258
+ # wait=True: stream framed frames until done:true.
259
+ f = s.makefile("rb")
260
+ with Progress(
261
+ SpinnerColumn(),
262
+ TextColumn("[progress.description]{task.description}"),
263
+ BarColumn(),
264
+ MofNCompleteColumn(),
265
+ TimeRemainingColumn(),
266
+ console=console,
267
+ redirect_stderr=True,
268
+ ) as progress:
269
+ task = progress.add_task("Indexing via server", total=None)
270
+
271
+ while True:
272
+ length_line = f.readline()
273
+ if not length_line:
274
+ break
275
+ try:
276
+ body_len = int(length_line.strip())
277
+ except ValueError:
278
+ break
279
+ frame_bytes = f.read(body_len)
280
+ frame_resp = json.loads(frame_bytes)
281
+
282
+ if frame_resp.get("done"):
283
+ if not frame_resp.get("ok"):
284
+ err = frame_resp.get("error", "unknown error")
285
+ if "SQLCG_DB_PATH" in err or "write lock" in err:
286
+ console.print(f"[red]{err}[/red]")
287
+ else:
288
+ console.print(f"[red]Server index error: {err}[/red]")
289
+ raise typer.Exit(1)
290
+ srv_summary = frame_resp.get("summary", {})
291
+ if not quiet:
292
+ console.print(
293
+ f"[green]Indexed via server[/green] "
294
+ f"{srv_summary.get('files_parsed', '?')} files — "
295
+ f"{srv_summary.get('tables_found', '?')} tables, "
296
+ f"{srv_summary.get('lineage_edges_created', '?')} edges"
297
+ )
298
+ break
299
+ # Progress frame
300
+ files_done = frame_resp.get("files_done", 0)
301
+ files_total = frame_resp.get("files_total")
302
+ if files_total:
303
+ progress.update(task, completed=files_done, total=files_total)
304
+
305
+ return True
306
+
307
+ except TimeoutError:
308
+ import sys as _sys
309
+
310
+ print(
311
+ f"Server is still applying the index (timed out waiting after "
312
+ f"{_INDEX_SOCKET_TIMEOUT_S}s); the graph will update when it finishes "
313
+ "— check 'sqlcg mcp status'.",
314
+ file=_sys.stderr,
315
+ )
316
+ raise typer.Exit(0) from None
317
+ except (FileNotFoundError, ConnectionRefusedError, OSError):
318
+ # No live server — fall through to direct path.
319
+ return False
320
+ except typer.Exit:
321
+ raise
322
+ except Exception as exc:
323
+ console.print(f"[red]Socket routing failed: {exc}[/red]")
324
+ raise typer.Exit(1) from exc
325
+
326
+
175
327
  def _run_index(
176
328
  *,
177
329
  path: Path,
@@ -237,10 +389,10 @@ def _run_index(
237
389
  )
238
390
 
239
391
  # Connect files to repo
392
+ from sqlcg.core.queries import INDEX_REPO_FILES_QUERY
240
393
  from sqlcg.core.schema import RelType
241
394
 
242
- files_query = "MATCH (f:File) WHERE f.path STARTS WITH $repo_prefix RETURN f.path AS path"
243
- file_rows = backend.run_read(files_query, {"repo_prefix": abs_path})
395
+ file_rows = backend.run_read(INDEX_REPO_FILES_QUERY, {"repo_prefix": abs_path})
244
396
  for row in file_rows:
245
397
  backend.upsert_edge(
246
398
  NodeLabel.FILE,