sql-code-graph 1.0.2__py3-none-any.whl → 1.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sqlcg/cli/commands/git.py CHANGED
@@ -1,5 +1,7 @@
1
1
  """Git integration commands for sqlcg."""
2
2
 
3
+ import shutil
4
+ import sys
3
5
  from pathlib import Path
4
6
  from typing import NamedTuple
5
7
 
@@ -14,36 +16,79 @@ app = typer.Typer(name="git", help="Git integration commands")
14
16
  class _HookSpec(NamedTuple):
15
17
  filename: str
16
18
  sentinel: str
17
- script: str
19
+ script_template: str
18
20
 
19
21
 
22
+ # Hook script templates — use {sqlcg_bin} as the placeholder for the resolved binary.
23
+ # The sentinel comments (e.g. "# sqlcg post-checkout hook") must stay byte-for-byte
24
+ # unchanged so R9 idempotency is preserved: _install_single_hook matches them verbatim.
20
25
  _HOOKS: list[_HookSpec] = [
21
26
  _HookSpec(
22
27
  filename="post-checkout",
23
28
  sentinel="# sqlcg post-checkout hook",
24
- script=(
29
+ script_template=(
25
30
  "#!/bin/sh\n"
26
31
  "# sqlcg post-checkout hook — incremental resync after branch switch\n"
27
32
  "# $3 == 1 means branch checkout (not file checkout); skip file checkouts\n"
28
33
  '[ "$3" = "1" ] || exit 0\n'
29
- 'sqlcg reindex --from "$1" --to "$2"'
30
- ' "$(git rev-parse --show-toplevel)" --dialect auto --quiet || true\n'
34
+ '{sqlcg_bin} reindex --from "$1" --to "$2"'
35
+ ' "$(git rev-parse --show-toplevel)" --dialect auto --quiet --notify'
36
+ ' || echo "sqlcg: graph not updated (server busy/locked)'
37
+ " -- run 'sqlcg mcp status'\" >&2\n"
31
38
  ),
32
39
  ),
33
40
  _HookSpec(
34
41
  filename="post-merge",
35
42
  sentinel="# sqlcg post-merge hook",
36
- script="""\
43
+ script_template="""\
37
44
  #!/bin/sh
38
45
  # sqlcg post-merge hook — incremental resync after pull/merge
39
- # post-merge receives only $1 (squash flag), no old/new SHA; use stored-SHA delta
40
- sqlcg reindex "$(git rev-parse --show-toplevel)" --dialect auto --quiet || true
46
+ # git sets ORIG_HEAD to the pre-merge HEAD; pass it as --from so --notify can route
47
+ # through a running server (same path as post-checkout). If ORIG_HEAD is unset (e.g.
48
+ # first-ever merge / gc'd), fall back to the standalone stored-SHA delta (direct write).
49
+ PREV=$(git rev-parse --verify --quiet ORIG_HEAD)
50
+ TOP=$(git rev-parse --show-toplevel)
51
+ if [ -n "$PREV" ]; then
52
+ {sqlcg_bin} reindex --from "$PREV" --to HEAD "$TOP" --dialect auto --quiet --notify \\
53
+ || echo "sqlcg: graph not updated (server busy/locked) -- run 'sqlcg mcp status'" >&2
54
+ else
55
+ {sqlcg_bin} reindex "$TOP" --dialect auto --quiet --notify \\
56
+ || echo "sqlcg: graph not updated (server busy/locked) -- run 'sqlcg mcp status'" >&2
57
+ fi
41
58
  """,
42
59
  ),
43
60
  ]
44
61
 
45
62
 
46
- def _install_single_hook(hooks_dir: Path, spec: _HookSpec) -> None:
63
+ def _resolve_sqlcg_bin() -> str:
64
+ """Resolve the absolute path of the installing sqlcg binary.
65
+
66
+ Resolution order:
67
+ 1. shutil.which("sqlcg") — the binary on the installer's $PATH.
68
+ 2. sys.argv[0] resolved via Path(...).resolve() if it ends in "sqlcg" and is executable.
69
+ 3. Bare "sqlcg" fallback (current behaviour) — prints a warning so the user knows.
70
+
71
+ Returns the resolved path string (absolute when resolvable, bare "sqlcg" otherwise).
72
+ """
73
+ # 1. Try $PATH first — the binary the user means
74
+ which_result = shutil.which("sqlcg")
75
+ if which_result:
76
+ return which_result
77
+
78
+ # 2. Try sys.argv[0] for python -m / editable-install invocations
79
+ argv0 = Path(sys.argv[0]).resolve()
80
+ if argv0.name == "sqlcg" and argv0.is_file() and argv0.stat().st_mode & 0o111:
81
+ return str(argv0)
82
+
83
+ # 3. Bare fallback — still functional but relies on $PATH at hook-run time
84
+ console.print(
85
+ "[yellow]Warning: could not resolve the sqlcg binary path; the generated hooks "
86
+ "will use bare 'sqlcg' and rely on $PATH at hook-run time.[/yellow]"
87
+ )
88
+ return "sqlcg"
89
+
90
+
91
+ def _install_single_hook(hooks_dir: Path, spec: _HookSpec, sqlcg_bin: str) -> None:
47
92
  """Install one git hook idempotently.
48
93
 
49
94
  If the hook file already contains the sentinel, it is already installed — skip silently.
@@ -51,6 +96,7 @@ def _install_single_hook(hooks_dir: Path, spec: _HookSpec) -> None:
51
96
  Otherwise, write the hook file and set 0o755.
52
97
  """
53
98
  hook_path = hooks_dir / spec.filename
99
+ script = spec.script_template.format(sqlcg_bin=sqlcg_bin)
54
100
 
55
101
  if hook_path.exists():
56
102
  existing_content = hook_path.read_text()
@@ -68,10 +114,10 @@ def _install_single_hook(hooks_dir: Path, spec: _HookSpec) -> None:
68
114
  f".git/hooks/{spec.filename}:[/yellow]"
69
115
  )
70
116
  console.print("")
71
- console.print("[cyan]" + spec.script.rstrip() + "[/cyan]")
117
+ console.print("[cyan]" + script.rstrip() + "[/cyan]")
72
118
  return
73
119
 
74
- hook_path.write_text(spec.script)
120
+ hook_path.write_text(script)
75
121
  hook_path.chmod(0o755)
76
122
  console.print(f"[green]Installed git hook:[/green] .git/hooks/{spec.filename}")
77
123
 
@@ -87,6 +133,8 @@ def install_hooks(
87
133
  Writes a post-checkout hook that triggers incremental resync after branch switches
88
134
  and a post-merge hook that triggers resync after pulls/merges.
89
135
  Idempotent: running multiple times produces one hook entry per hook.
136
+ The hooks embed the absolute path of the installing sqlcg binary so version skew
137
+ between the installed binary and the hook command is avoided.
90
138
  """
91
139
  if repo is None:
92
140
  repo = Path.cwd()
@@ -100,5 +148,7 @@ def install_hooks(
100
148
 
101
149
  hooks_dir.mkdir(parents=True, exist_ok=True)
102
150
 
151
+ sqlcg_bin = _resolve_sqlcg_bin()
152
+
103
153
  for spec in _HOOKS:
104
- _install_single_hook(hooks_dir, spec)
154
+ _install_single_hook(hooks_dir, spec, sqlcg_bin)
@@ -14,7 +14,7 @@ from rich.progress import (
14
14
  TimeRemainingColumn,
15
15
  )
16
16
 
17
- from sqlcg.core.config import KuzuConfig, get_backend, get_db_path, get_dialect
17
+ from sqlcg.core.config import KuzuConfig, config_file_present, get_backend, get_db_path, get_dialect
18
18
  from sqlcg.indexer.indexer import Indexer
19
19
 
20
20
  console = Console()
@@ -29,7 +29,7 @@ def index_cmd( # noqa: B008
29
29
  None, "--dbt-manifest", help="Path to dbt manifest"
30
30
  ),
31
31
  timeout_per_file: int = typer.Option( # noqa: B008
32
- 5, "--timeout-per-file", help="Timeout per file in seconds"
32
+ 10, "--timeout-per-file", help="Timeout per file in seconds"
33
33
  ),
34
34
  buffer_pool_size: int = typer.Option( # noqa: B008
35
35
  0,
@@ -63,6 +63,14 @@ def index_cmd( # noqa: B008
63
63
  profile: bool = typer.Option( # noqa: B008
64
64
  False, "--profile/--no-profile", help="Emit per-stage timing after indexing"
65
65
  ),
66
+ include_working_tree: bool = typer.Option( # noqa: B008
67
+ False,
68
+ "--include-working-tree",
69
+ help=(
70
+ "Index the working tree including uncommitted changes. "
71
+ "Marks freshness as 'indexed with working-tree changes'."
72
+ ),
73
+ ),
66
74
  ) -> None:
67
75
  """Index SQL files in a directory.
68
76
 
@@ -113,6 +121,13 @@ def index_cmd( # noqa: B008
113
121
  if dialect == "auto":
114
122
  dialect = get_dialect(path)
115
123
 
124
+ if not quiet and not config_file_present(path):
125
+ console.print(
126
+ f"[yellow]No .sqlcg.toml found at {path}/.sqlcg.toml — "
127
+ "using defaults (snowflake dialect, no aliases/prefixes). "
128
+ "Create .sqlcg.toml in the index directory to customise.[/yellow]"
129
+ )
130
+
116
131
  db_path = get_db_path()
117
132
  db_path.parent.mkdir(parents=True, exist_ok=True)
118
133
 
@@ -137,6 +152,19 @@ def index_cmd( # noqa: B008
137
152
  sqlcg_log.removeHandler(_counter)
138
153
  _warn_handler.close()
139
154
 
155
+ # --include-working-tree: if the working tree is dirty, overwrite the stored SHA
156
+ # with a "<head>+dirty" sentinel so 'db info' can distinguish clean-HEAD index
157
+ # from working-tree-inclusive index. The backend was closed inside _run_index,
158
+ # so we open a fresh context here for the single sentinel write.
159
+ if include_working_tree:
160
+ from sqlcg.core.freshness import _git
161
+
162
+ dirty_out = _git(path, "status", "--porcelain")
163
+ if dirty_out: # non-empty string → working tree is dirty
164
+ head = _git(path, "rev-parse", "HEAD") or "unknown"
165
+ with get_backend() as _b2:
166
+ _b2.set_indexed_sha(f"{head}+dirty")
167
+
140
168
  if not verbose and not quiet and _counter.count > 0 and _warn_log_path is not None:
141
169
  console.print(
142
170
  f"[yellow]Parse warnings written to {_warn_log_path} "
sqlcg/cli/commands/mcp.py CHANGED
@@ -71,3 +71,106 @@ def mcp_best_practices() -> None:
71
71
  from sqlcg.server.skill import render_body
72
72
 
73
73
  typer.echo(render_body())
74
+
75
+
76
+ @app.command("status")
77
+ def mcp_status() -> None:
78
+ """Print server status JSON (connects to control socket).
79
+
80
+ Returns JSON with fields: running, pid, db_path, indexed_sha, head_sha,
81
+ stale_by_commits, connected_clients, uptime when a server is live.
82
+
83
+ When no server is found: {"running": false}.
84
+ When the PID file exists with a live process but the socket is unavailable:
85
+ {"running": true, "degraded": "socket unavailable", ...}.
86
+
87
+ R3 (stale socket): if the socket file exists but the server is not
88
+ responding (ConnectionRefusedError / FileNotFoundError), falls through
89
+ to the PID-file probe — never hangs or errors on a dead socket.
90
+ """
91
+ import socket as _socket
92
+
93
+ from sqlcg.server.control import is_pid_alive, read_pid, sock_path
94
+
95
+ sp = sock_path()
96
+ try:
97
+ with _socket.socket(_socket.AF_UNIX, _socket.SOCK_STREAM) as s:
98
+ s.settimeout(2)
99
+ s.connect(str(sp))
100
+ s.sendall(json.dumps({"op": "status"}).encode() + b"\n")
101
+ data = s.recv(4096)
102
+ console.print_json(data.decode())
103
+ except (FileNotFoundError, ConnectionRefusedError, OSError):
104
+ # Socket unavailable — probe via PID file (R3: stale-socket fall-through)
105
+ rec = read_pid()
106
+ if rec and is_pid_alive(rec["pid"]):
107
+ console.print_json(
108
+ json.dumps(
109
+ {
110
+ "running": True,
111
+ "degraded": "socket unavailable",
112
+ "pid": rec["pid"],
113
+ "db_path": rec["db_path"],
114
+ }
115
+ )
116
+ )
117
+ else:
118
+ console.print_json(json.dumps({"running": False}))
119
+
120
+
121
+ @app.command("stop")
122
+ def mcp_stop() -> None:
123
+ """Stop the running MCP server gracefully.
124
+
125
+ Sends a ``stop`` op via the control socket; waits up to 5 s for the
126
+ socket file to disappear (confirming clean exit). Falls back to SIGTERM
127
+ on the PID-file PID if the socket is unavailable.
128
+
129
+ R3 (stale socket): ``ConnectionRefusedError`` / ``FileNotFoundError`` are
130
+ caught — never hangs on a dead socket.
131
+ """
132
+ import socket as _socket
133
+ import time
134
+
135
+ from sqlcg.server.control import is_pid_alive, read_pid, sock_path
136
+
137
+ sp = sock_path()
138
+ try:
139
+ with _socket.socket(_socket.AF_UNIX, _socket.SOCK_STREAM) as s:
140
+ s.settimeout(2)
141
+ s.connect(str(sp))
142
+ s.sendall(json.dumps({"op": "stop"}).encode() + b"\n")
143
+ s.recv(128)
144
+ # Wait up to 5 s for the socket file to disappear (confirms clean exit)
145
+ for _ in range(10):
146
+ if not sp.exists():
147
+ break
148
+ time.sleep(0.5)
149
+ console.print("[green]Server stopped.[/green]")
150
+ except (FileNotFoundError, ConnectionRefusedError, OSError):
151
+ # Socket unavailable — fall back to SIGTERM via PID file
152
+ import signal
153
+
154
+ rec = read_pid()
155
+ if rec and is_pid_alive(rec["pid"]):
156
+ os.kill(rec["pid"], signal.SIGTERM)
157
+ console.print(f"[yellow]Socket unavailable — sent SIGTERM to PID {rec['pid']}[/yellow]")
158
+ else:
159
+ console.print("[yellow]No server found to stop.[/yellow]")
160
+
161
+
162
+ @app.command("restart")
163
+ def mcp_restart() -> None:
164
+ """Stop the server. The client (editor) must respawn.
165
+
166
+ v1.1 cannot re-parent an editor-spawned stdio process. This command
167
+ stops the current server and prints guidance for the user to restart
168
+ the MCP server via their editor's MCP configuration.
169
+
170
+ True auto-restart (re-parenting stdio) is deferred to v1.2.
171
+ """
172
+ mcp_stop()
173
+ console.print(
174
+ "[yellow]Server stopped. Please restart via your editor's MCP configuration.[/yellow]"
175
+ )
176
+ console.print("[dim]True auto-restart (re-parenting stdio) is deferred to v1.2.[/dim]")
@@ -18,6 +18,12 @@ from rich.console import Console
18
18
 
19
19
  console = Console()
20
20
 
21
+ # Client-side socket timeout for the --notify control-socket path.
22
+ # A real DWH server-side resync_changed measured ~89 s (41 changed files + closure);
23
+ # 300 s covers that with headroom while keeping the wait bounded on a wedged server.
24
+ # This is a CLI transport bound, NOT a KuzuConfig/indexer constant.
25
+ _NOTIFY_SOCKET_TIMEOUT_S = 300
26
+
21
27
 
22
28
  def reindex_cmd( # noqa: B008
23
29
  path: Path = typer.Argument(..., help="Repository root directory to resync"), # noqa: B008
@@ -39,10 +45,18 @@ def reindex_cmd( # noqa: B008
39
45
  help="Files per KuzuDB transaction (same default as index command)",
40
46
  ),
41
47
  timeout_per_file: int = typer.Option( # noqa: B008
42
- 5,
48
+ 10,
43
49
  "--timeout-per-file",
44
50
  help="Per-file parse timeout in seconds",
45
51
  ),
52
+ notify: bool = typer.Option( # noqa: B008
53
+ False,
54
+ "--notify",
55
+ help=(
56
+ "If a server is live on this DB, route the reindex through the server "
57
+ "(avoids lock contention). Falls back to direct write if no server is found."
58
+ ),
59
+ ),
46
60
  ) -> None:
47
61
  """Incrementally resync the graph after a git branch change or pull.
48
62
 
@@ -56,17 +70,99 @@ def reindex_cmd( # noqa: B008
56
70
  Exits with an error if the database schema version does not match the current
57
71
  build — run 'sqlcg db reset && sqlcg db init && sqlcg index <path>' to re-init.
58
72
  """
59
- from sqlcg.core.config import get_backend, get_db_path, get_dialect
73
+ import json
74
+ import socket as _socket
75
+
76
+ from sqlcg.core.config import config_file_present, get_backend, get_db_path, get_dialect
60
77
  from sqlcg.core.schema import SCHEMA_VERSION
61
78
  from sqlcg.indexer.indexer import Indexer
79
+ from sqlcg.server.control import sock_path
62
80
 
63
81
  # Resolve to absolute path so ignore-spec and git delta receive an absolute root
64
82
  path = path.resolve()
65
83
 
84
+ # --notify: if a server is live, route reindex through the socket (R3 fallback)
85
+ if notify:
86
+ sp = sock_path()
87
+ try:
88
+ with _socket.socket(_socket.AF_UNIX, _socket.SOCK_STREAM) as s:
89
+ s.settimeout(_NOTIFY_SOCKET_TIMEOUT_S)
90
+ s.connect(str(sp))
91
+ # Resolve SHAs before sending — standalone mode reads from DB via socket
92
+ effective_from = from_sha
93
+ if effective_from is None:
94
+ # Standalone mode: we cannot read stored SHA here without opening the
95
+ # DB (which would conflict with the running server). If no --from is
96
+ # given with --notify, we send from="stored" as a sentinel and fall
97
+ # back to direct write; the caller should pass --from explicitly.
98
+ raise OSError( # noqa: TRY301
99
+ "--notify without --from requires direct DB access; falling through"
100
+ )
101
+ # Resolve symbolic refs (HEAD, branch names) to concrete 40-char SHAs
102
+ # before sending — prevents literal "HEAD" from being stored in the graph.
103
+ effective_from = _resolve_ref(path, effective_from)
104
+ effective_to = _resolve_ref(path, to_sha) if to_sha else _get_head(path)
105
+ payload = {
106
+ "op": "reindex",
107
+ "root": str(path),
108
+ "from": effective_from,
109
+ "to": effective_to,
110
+ "dialect": dialect,
111
+ }
112
+ s.sendall(json.dumps(payload).encode() + b"\n")
113
+ data = s.recv(65536)
114
+ result = json.loads(data)
115
+ if "error" in result:
116
+ console.print(f"[red]Server reindex error: {result['error']}[/red]")
117
+ raise typer.Exit(1)
118
+ if not quiet:
119
+ srv_summary = result.get("summary", {})
120
+ console.print(
121
+ f"[green]Resynced via server[/green] "
122
+ f"+{srv_summary.get('added', 0)} added, "
123
+ f"~{srv_summary.get('modified', 0)} modified, "
124
+ f"-{srv_summary.get('deleted', 0)} deleted"
125
+ )
126
+ raise typer.Exit(0)
127
+ except TimeoutError:
128
+ # Bug 1 fix: server is alive and working (accepted the connection, holds the
129
+ # lock, will finish and persist). Do NOT fall through to the direct-write
130
+ # path — that would hit the held lock and produce a false "Database is locked"
131
+ # error. Exit 0 so the git hook stays non-fatal; the server will complete.
132
+ # (socket.timeout is an alias of TimeoutError, a subclass of OSError — this
133
+ # clause must be listed before the broad OSError clause below.)
134
+ import sys
135
+
136
+ print(
137
+ f"Server is still applying the reindex (timed out waiting after "
138
+ f"{_NOTIFY_SOCKET_TIMEOUT_S}s); the graph will update when it finishes "
139
+ f"— check 'sqlcg mcp status'.",
140
+ file=sys.stderr,
141
+ )
142
+ raise typer.Exit(0) from None
143
+ except (FileNotFoundError, ConnectionRefusedError, OSError):
144
+ # R3: no live server (stale socket, socket absent, fallback condition) —
145
+ # fall through to the existing direct-write path unchanged.
146
+ # NOTE: socket.timeout / TimeoutError is an OSError subclass, so the
147
+ # dedicated timeout clause above must be listed first (already is).
148
+ pass
149
+ except typer.Exit:
150
+ raise
151
+ except Exception as exc:
152
+ console.print(f"[red]--notify routing failed: {exc}[/red]")
153
+ raise typer.Exit(1) from exc
154
+
66
155
  # Resolve dialect
67
156
  if dialect == "auto":
68
157
  dialect = get_dialect(path)
69
158
 
159
+ if not quiet and not config_file_present(path):
160
+ console.print(
161
+ f"[yellow]No .sqlcg.toml found at {path}/.sqlcg.toml — "
162
+ "using defaults (snowflake dialect, no aliases/prefixes). "
163
+ "Create .sqlcg.toml in the index directory to customise.[/yellow]"
164
+ )
165
+
70
166
  db_path = get_db_path()
71
167
  db_path.parent.mkdir(parents=True, exist_ok=True)
72
168
 
@@ -88,15 +184,17 @@ def reindex_cmd( # noqa: B008
88
184
 
89
185
  # ---- Determine mode -------------------------------------------------------
90
186
  if from_sha is not None:
91
- # Explicit-SHA mode
92
- effective_to = to_sha or _get_head(path)
187
+ # Explicit-SHA mode — resolve symbolic refs to concrete SHAs before storing
188
+ effective_from = _resolve_ref(path, from_sha)
189
+ effective_to = _resolve_ref(path, to_sha) if to_sha else _get_head(path)
93
190
  if not quiet:
94
191
  console.print(
95
- f"Resyncing [cyan]{path}[/cyan] [dim]{from_sha[:8]}..{effective_to[:8]}[/dim]"
192
+ f"Resyncing [cyan]{path}[/cyan] "
193
+ f"[dim]{effective_from[:8]}..{effective_to[:8]}[/dim]"
96
194
  )
97
195
  summary = indexer.resync_changed(
98
196
  path,
99
- from_sha,
197
+ effective_from,
100
198
  effective_to,
101
199
  backend,
102
200
  dialect,
@@ -150,24 +248,36 @@ def reindex_cmd( # noqa: B008
150
248
  )
151
249
 
152
250
 
153
- def _get_head(root: Path) -> str:
154
- """Return the current HEAD SHA for the git repo at root.
251
+ def _resolve_ref(root: Path, ref: str) -> str:
252
+ """Resolve a git ref (HEAD, branch, tag, or concrete SHA) to a 40-char SHA.
155
253
 
156
- Raises typer.Exit(1) if git is unavailable or root is not a git repo.
254
+ A concrete SHA resolves to itself (idempotent), so callers may pass either a
255
+ symbolic ref or a SHA without branching.
256
+
257
+ Raises typer.Exit(1) if git is unavailable or the ref cannot be resolved.
157
258
  """
158
259
  try:
159
260
  result = subprocess.run(
160
- ["git", "rev-parse", "HEAD"],
261
+ ["git", "rev-parse", ref],
161
262
  cwd=str(root),
162
263
  capture_output=True,
163
264
  text=True,
164
265
  )
165
266
  if result.returncode != 0:
166
267
  console.print(
167
- f"[red]Could not determine HEAD SHA in {root}: {result.stderr.strip()}[/red]"
268
+ f"[red]Could not resolve ref '{ref}' in {root}: {result.stderr.strip()}[/red]"
168
269
  )
169
270
  raise typer.Exit(1)
170
271
  return result.stdout.strip()
171
272
  except FileNotFoundError:
172
- console.print("[red]git is not available — cannot determine HEAD SHA[/red]")
273
+ console.print("[red]git is not available — cannot resolve ref[/red]")
173
274
  raise typer.Exit(1) from None
275
+
276
+
277
+ def _get_head(root: Path) -> str:
278
+ """Return the current HEAD SHA for the git repo at root.
279
+
280
+ Delegates to _resolve_ref so there is one git-rev-parse code path.
281
+ Raises typer.Exit(1) if git is unavailable or root is not a git repo.
282
+ """
283
+ return _resolve_ref(root, "HEAD")
sqlcg/core/config.py CHANGED
@@ -71,6 +71,21 @@ def get_db_path() -> Path:
71
71
  return KuzuConfig.from_env().db_path
72
72
 
73
73
 
74
+ def config_file_present(path: Path) -> bool:
75
+ """Return True when a .sqlcg.toml file exists at the given directory.
76
+
77
+ Single source of truth for the config filename so callers never hard-code
78
+ ".sqlcg.toml" independently.
79
+
80
+ Args:
81
+ path: Directory to check for .sqlcg.toml
82
+
83
+ Returns:
84
+ True if path/.sqlcg.toml exists, False otherwise.
85
+ """
86
+ return (Path(path) / ".sqlcg.toml").exists()
87
+
88
+
74
89
  def get_dialect(path: Path) -> str:
75
90
  """Get the SQL dialect from .sqlcg.toml or fall back to snowflake.
76
91
 
@@ -266,9 +281,90 @@ def get_presentation_prefixes(path: Path) -> list[str]:
266
281
  return []
267
282
 
268
283
 
269
- def get_backend() -> "GraphBackend":
284
+ class ExternalConsumerSpec(BaseModel):
285
+ """Specification for a single external downstream consumer declared in .sqlcg.toml."""
286
+
287
+ name: str
288
+ consumer_type: str
289
+ consumes: list[str]
290
+
291
+
292
+ def get_external_consumers(path: Path) -> list[ExternalConsumerSpec]:
293
+ """Get external downstream consumer declarations from .sqlcg.toml.
294
+
295
+ Reads [[sqlcg.external_consumers]] array-of-tables from .sqlcg.toml. Each
296
+ table must have ``name`` and ``consumes`` (non-empty list). Rows without a
297
+ ``name`` or with an empty ``consumes`` list are silently skipped. The
298
+ ``kind`` field is stored as ``consumer_type`` (lowercased). **Defaults to an
299
+ empty list** when the section is absent — when unset, the ingestion pass is a
300
+ no-op (correct generic behaviour for any user). No hardcoded fallback::
301
+
302
+ [[sqlcg.external_consumers]]
303
+ name = "Tableau: Sales Dashboard"
304
+ kind = "tableau"
305
+ consumes = ["ia_sales.fct_orders"]
306
+
307
+ Args:
308
+ path: Root directory to search for .sqlcg.toml
309
+
310
+ Returns:
311
+ List of ExternalConsumerSpec objects. Defaults to an empty list.
312
+ """
313
+ config_file = Path(path) / ".sqlcg.toml"
314
+ if config_file.exists():
315
+ try:
316
+ with open(config_file, "rb") as f:
317
+ config = tomllib.load(f)
318
+ raw = config.get("sqlcg", {}).get("external_consumers", [])
319
+ if not isinstance(raw, list):
320
+ return []
321
+ specs: list[ExternalConsumerSpec] = []
322
+ for entry in raw:
323
+ if not isinstance(entry, dict):
324
+ continue
325
+ name = entry.get("name", "")
326
+ if not name or not isinstance(name, str):
327
+ continue
328
+ consumes_raw = entry.get("consumes", [])
329
+ if not isinstance(consumes_raw, list) or not consumes_raw:
330
+ continue
331
+ consumes = [c.lower() for c in consumes_raw if isinstance(c, str)]
332
+ if not consumes:
333
+ continue
334
+ kind = entry.get("kind", "")
335
+ consumer_type = kind.lower() if isinstance(kind, str) else ""
336
+ specs.append(
337
+ ExternalConsumerSpec(
338
+ name=name,
339
+ consumer_type=consumer_type,
340
+ consumes=consumes,
341
+ )
342
+ )
343
+ return specs
344
+ except Exception:
345
+ pass
346
+ return []
347
+
348
+
349
+ def get_backend(read_only: bool = False) -> "GraphBackend":
270
350
  """Get a graph backend instance respecting the SQLCG_BACKEND env var.
271
351
 
352
+ Args:
353
+ read_only: Open in read-only mode. When ``True``, the KùzuDB open
354
+ does not take an exclusive write lock, enabling *multiple concurrent
355
+ read-only opens* (reader/reader concurrency). CLI read commands
356
+ pass ``True`` so they do not hold the exclusive write lock and
357
+ therefore do not block other concurrent readers or a pending reindex.
358
+ Note: this does NOT allow reads while a read-write writer already
359
+ holds the exclusive lock — KùzuDB's exclusive write lock is
360
+ process-level; a ``read_only=True`` open still fails with
361
+ "Database is locked" when a writer is active. Reads during an
362
+ active writer remain a known limitation (future work: route reads
363
+ through the live MCP server).
364
+ Neo4j has no single-writer lock; this flag is a no-op there.
365
+ All writer call sites (index, reindex, db init/reset, server
366
+ init_backend) use the default ``False``.
367
+
272
368
  Returns:
273
369
  A GraphBackend instance (KuzuBackend by default, or Neo4jBackend)
274
370
 
@@ -281,14 +377,26 @@ def get_backend() -> "GraphBackend":
281
377
  from sqlcg.core.kuzu_backend import KuzuBackend
282
378
 
283
379
  kuzu_cfg = KuzuConfig.from_env()
284
- return KuzuBackend(
285
- str(kuzu_cfg.db_path),
286
- buffer_pool_size_mb=kuzu_cfg.buffer_pool_size_mb,
287
- )
380
+ try:
381
+ return KuzuBackend(
382
+ str(kuzu_cfg.db_path),
383
+ buffer_pool_size_mb=kuzu_cfg.buffer_pool_size_mb,
384
+ read_only=read_only,
385
+ )
386
+ except RuntimeError as exc:
387
+ if read_only and "READ ONLY" in str(exc):
388
+ # KùzuDB refuses to open a non-existent or empty DB in read-only
389
+ # mode ("Cannot create an empty database under READ ONLY mode").
390
+ # Surface the same empty-DB guidance the user sees from `db info`.
391
+ raise RuntimeError(
392
+ "Database not initialised — run 'sqlcg db init' and 'sqlcg index <path>' first."
393
+ ) from exc
394
+ raise
288
395
  elif backend_type == "neo4j":
289
396
  from sqlcg.core.neo4j_backend import Neo4jBackend
290
397
 
291
398
  neo4j_cfg = Neo4jConfig.from_env()
399
+ # Neo4j has no single-writer lock; read_only is a no-op here.
292
400
  return Neo4jBackend(neo4j_cfg.uri, neo4j_cfg.user, neo4j_cfg.password)
293
401
  else:
294
402
  raise ValueError(f"Unknown backend type: {backend_type}")