sql-code-graph 1.0.2__py3-none-any.whl → 1.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sql_code_graph-1.0.2.dist-info → sql_code_graph-1.1.3.dist-info}/METADATA +13 -2
- {sql_code_graph-1.0.2.dist-info → sql_code_graph-1.1.3.dist-info}/RECORD +31 -29
- sqlcg/__init__.py +1 -1
- sqlcg/cli/commands/analyze.py +140 -22
- sqlcg/cli/commands/db.py +19 -2
- sqlcg/cli/commands/find.py +10 -3
- sqlcg/cli/commands/gain.py +1 -1
- sqlcg/cli/commands/git.py +61 -11
- sqlcg/cli/commands/index.py +30 -2
- sqlcg/cli/commands/mcp.py +103 -0
- sqlcg/cli/commands/reindex.py +122 -12
- sqlcg/core/config.py +113 -5
- sqlcg/core/freshness.py +134 -0
- sqlcg/core/graph_db.py +2 -0
- sqlcg/core/kuzu_backend.py +4 -1
- sqlcg/core/queries.cypher +18 -6
- sqlcg/core/queries.py +3 -1
- sqlcg/core/schema.cypher +13 -1
- sqlcg/core/schema.py +5 -1
- sqlcg/indexer/indexer.py +394 -160
- sqlcg/indexer/walker.py +3 -0
- sqlcg/parsers/ansi_parser.py +56 -0
- sqlcg/parsers/base.py +13 -4
- sqlcg/parsers/snowflake_parser.py +46 -6
- sqlcg/server/control.py +144 -0
- sqlcg/server/models.py +68 -0
- sqlcg/server/server.py +283 -1
- sqlcg/server/skill.py +20 -4
- sqlcg/server/tools.py +203 -13
- {sql_code_graph-1.0.2.dist-info → sql_code_graph-1.1.3.dist-info}/WHEEL +0 -0
- {sql_code_graph-1.0.2.dist-info → sql_code_graph-1.1.3.dist-info}/entry_points.txt +0 -0
sqlcg/cli/commands/git.py
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
"""Git integration commands for sqlcg."""
|
|
2
2
|
|
|
3
|
+
import shutil
|
|
4
|
+
import sys
|
|
3
5
|
from pathlib import Path
|
|
4
6
|
from typing import NamedTuple
|
|
5
7
|
|
|
@@ -14,36 +16,79 @@ app = typer.Typer(name="git", help="Git integration commands")
|
|
|
14
16
|
class _HookSpec(NamedTuple):
|
|
15
17
|
filename: str
|
|
16
18
|
sentinel: str
|
|
17
|
-
|
|
19
|
+
script_template: str
|
|
18
20
|
|
|
19
21
|
|
|
22
|
+
# Hook script templates — use {sqlcg_bin} as the placeholder for the resolved binary.
|
|
23
|
+
# The sentinel comments (e.g. "# sqlcg post-checkout hook") must stay byte-for-byte
|
|
24
|
+
# unchanged so R9 idempotency is preserved: _install_single_hook matches them verbatim.
|
|
20
25
|
_HOOKS: list[_HookSpec] = [
|
|
21
26
|
_HookSpec(
|
|
22
27
|
filename="post-checkout",
|
|
23
28
|
sentinel="# sqlcg post-checkout hook",
|
|
24
|
-
|
|
29
|
+
script_template=(
|
|
25
30
|
"#!/bin/sh\n"
|
|
26
31
|
"# sqlcg post-checkout hook — incremental resync after branch switch\n"
|
|
27
32
|
"# $3 == 1 means branch checkout (not file checkout); skip file checkouts\n"
|
|
28
33
|
'[ "$3" = "1" ] || exit 0\n'
|
|
29
|
-
'
|
|
30
|
-
' "$(git rev-parse --show-toplevel)" --dialect auto --quiet
|
|
34
|
+
'{sqlcg_bin} reindex --from "$1" --to "$2"'
|
|
35
|
+
' "$(git rev-parse --show-toplevel)" --dialect auto --quiet --notify'
|
|
36
|
+
' || echo "sqlcg: graph not updated (server busy/locked)'
|
|
37
|
+
" -- run 'sqlcg mcp status'\" >&2\n"
|
|
31
38
|
),
|
|
32
39
|
),
|
|
33
40
|
_HookSpec(
|
|
34
41
|
filename="post-merge",
|
|
35
42
|
sentinel="# sqlcg post-merge hook",
|
|
36
|
-
|
|
43
|
+
script_template="""\
|
|
37
44
|
#!/bin/sh
|
|
38
45
|
# sqlcg post-merge hook — incremental resync after pull/merge
|
|
39
|
-
#
|
|
40
|
-
|
|
46
|
+
# git sets ORIG_HEAD to the pre-merge HEAD; pass it as --from so --notify can route
|
|
47
|
+
# through a running server (same path as post-checkout). If ORIG_HEAD is unset (e.g.
|
|
48
|
+
# first-ever merge / gc'd), fall back to the standalone stored-SHA delta (direct write).
|
|
49
|
+
PREV=$(git rev-parse --verify --quiet ORIG_HEAD)
|
|
50
|
+
TOP=$(git rev-parse --show-toplevel)
|
|
51
|
+
if [ -n "$PREV" ]; then
|
|
52
|
+
{sqlcg_bin} reindex --from "$PREV" --to HEAD "$TOP" --dialect auto --quiet --notify \\
|
|
53
|
+
|| echo "sqlcg: graph not updated (server busy/locked) -- run 'sqlcg mcp status'" >&2
|
|
54
|
+
else
|
|
55
|
+
{sqlcg_bin} reindex "$TOP" --dialect auto --quiet --notify \\
|
|
56
|
+
|| echo "sqlcg: graph not updated (server busy/locked) -- run 'sqlcg mcp status'" >&2
|
|
57
|
+
fi
|
|
41
58
|
""",
|
|
42
59
|
),
|
|
43
60
|
]
|
|
44
61
|
|
|
45
62
|
|
|
46
|
-
def
|
|
63
|
+
def _resolve_sqlcg_bin() -> str:
|
|
64
|
+
"""Resolve the absolute path of the installing sqlcg binary.
|
|
65
|
+
|
|
66
|
+
Resolution order:
|
|
67
|
+
1. shutil.which("sqlcg") — the binary on the installer's $PATH.
|
|
68
|
+
2. sys.argv[0] resolved via Path(...).resolve() if it ends in "sqlcg" and is executable.
|
|
69
|
+
3. Bare "sqlcg" fallback (current behaviour) — prints a warning so the user knows.
|
|
70
|
+
|
|
71
|
+
Returns the resolved path string (absolute when resolvable, bare "sqlcg" otherwise).
|
|
72
|
+
"""
|
|
73
|
+
# 1. Try $PATH first — the binary the user means
|
|
74
|
+
which_result = shutil.which("sqlcg")
|
|
75
|
+
if which_result:
|
|
76
|
+
return which_result
|
|
77
|
+
|
|
78
|
+
# 2. Try sys.argv[0] for python -m / editable-install invocations
|
|
79
|
+
argv0 = Path(sys.argv[0]).resolve()
|
|
80
|
+
if argv0.name == "sqlcg" and argv0.is_file() and argv0.stat().st_mode & 0o111:
|
|
81
|
+
return str(argv0)
|
|
82
|
+
|
|
83
|
+
# 3. Bare fallback — still functional but relies on $PATH at hook-run time
|
|
84
|
+
console.print(
|
|
85
|
+
"[yellow]Warning: could not resolve the sqlcg binary path; the generated hooks "
|
|
86
|
+
"will use bare 'sqlcg' and rely on $PATH at hook-run time.[/yellow]"
|
|
87
|
+
)
|
|
88
|
+
return "sqlcg"
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def _install_single_hook(hooks_dir: Path, spec: _HookSpec, sqlcg_bin: str) -> None:
|
|
47
92
|
"""Install one git hook idempotently.
|
|
48
93
|
|
|
49
94
|
If the hook file already contains the sentinel, it is already installed — skip silently.
|
|
@@ -51,6 +96,7 @@ def _install_single_hook(hooks_dir: Path, spec: _HookSpec) -> None:
|
|
|
51
96
|
Otherwise, write the hook file and set 0o755.
|
|
52
97
|
"""
|
|
53
98
|
hook_path = hooks_dir / spec.filename
|
|
99
|
+
script = spec.script_template.format(sqlcg_bin=sqlcg_bin)
|
|
54
100
|
|
|
55
101
|
if hook_path.exists():
|
|
56
102
|
existing_content = hook_path.read_text()
|
|
@@ -68,10 +114,10 @@ def _install_single_hook(hooks_dir: Path, spec: _HookSpec) -> None:
|
|
|
68
114
|
f".git/hooks/{spec.filename}:[/yellow]"
|
|
69
115
|
)
|
|
70
116
|
console.print("")
|
|
71
|
-
console.print("[cyan]" +
|
|
117
|
+
console.print("[cyan]" + script.rstrip() + "[/cyan]")
|
|
72
118
|
return
|
|
73
119
|
|
|
74
|
-
hook_path.write_text(
|
|
120
|
+
hook_path.write_text(script)
|
|
75
121
|
hook_path.chmod(0o755)
|
|
76
122
|
console.print(f"[green]Installed git hook:[/green] .git/hooks/{spec.filename}")
|
|
77
123
|
|
|
@@ -87,6 +133,8 @@ def install_hooks(
|
|
|
87
133
|
Writes a post-checkout hook that triggers incremental resync after branch switches
|
|
88
134
|
and a post-merge hook that triggers resync after pulls/merges.
|
|
89
135
|
Idempotent: running multiple times produces one hook entry per hook.
|
|
136
|
+
The hooks embed the absolute path of the installing sqlcg binary so version skew
|
|
137
|
+
between the installed binary and the hook command is avoided.
|
|
90
138
|
"""
|
|
91
139
|
if repo is None:
|
|
92
140
|
repo = Path.cwd()
|
|
@@ -100,5 +148,7 @@ def install_hooks(
|
|
|
100
148
|
|
|
101
149
|
hooks_dir.mkdir(parents=True, exist_ok=True)
|
|
102
150
|
|
|
151
|
+
sqlcg_bin = _resolve_sqlcg_bin()
|
|
152
|
+
|
|
103
153
|
for spec in _HOOKS:
|
|
104
|
-
_install_single_hook(hooks_dir, spec)
|
|
154
|
+
_install_single_hook(hooks_dir, spec, sqlcg_bin)
|
sqlcg/cli/commands/index.py
CHANGED
|
@@ -14,7 +14,7 @@ from rich.progress import (
|
|
|
14
14
|
TimeRemainingColumn,
|
|
15
15
|
)
|
|
16
16
|
|
|
17
|
-
from sqlcg.core.config import KuzuConfig, get_backend, get_db_path, get_dialect
|
|
17
|
+
from sqlcg.core.config import KuzuConfig, config_file_present, get_backend, get_db_path, get_dialect
|
|
18
18
|
from sqlcg.indexer.indexer import Indexer
|
|
19
19
|
|
|
20
20
|
console = Console()
|
|
@@ -29,7 +29,7 @@ def index_cmd( # noqa: B008
|
|
|
29
29
|
None, "--dbt-manifest", help="Path to dbt manifest"
|
|
30
30
|
),
|
|
31
31
|
timeout_per_file: int = typer.Option( # noqa: B008
|
|
32
|
-
|
|
32
|
+
10, "--timeout-per-file", help="Timeout per file in seconds"
|
|
33
33
|
),
|
|
34
34
|
buffer_pool_size: int = typer.Option( # noqa: B008
|
|
35
35
|
0,
|
|
@@ -63,6 +63,14 @@ def index_cmd( # noqa: B008
|
|
|
63
63
|
profile: bool = typer.Option( # noqa: B008
|
|
64
64
|
False, "--profile/--no-profile", help="Emit per-stage timing after indexing"
|
|
65
65
|
),
|
|
66
|
+
include_working_tree: bool = typer.Option( # noqa: B008
|
|
67
|
+
False,
|
|
68
|
+
"--include-working-tree",
|
|
69
|
+
help=(
|
|
70
|
+
"Index the working tree including uncommitted changes. "
|
|
71
|
+
"Marks freshness as 'indexed with working-tree changes'."
|
|
72
|
+
),
|
|
73
|
+
),
|
|
66
74
|
) -> None:
|
|
67
75
|
"""Index SQL files in a directory.
|
|
68
76
|
|
|
@@ -113,6 +121,13 @@ def index_cmd( # noqa: B008
|
|
|
113
121
|
if dialect == "auto":
|
|
114
122
|
dialect = get_dialect(path)
|
|
115
123
|
|
|
124
|
+
if not quiet and not config_file_present(path):
|
|
125
|
+
console.print(
|
|
126
|
+
f"[yellow]No .sqlcg.toml found at {path}/.sqlcg.toml — "
|
|
127
|
+
"using defaults (snowflake dialect, no aliases/prefixes). "
|
|
128
|
+
"Create .sqlcg.toml in the index directory to customise.[/yellow]"
|
|
129
|
+
)
|
|
130
|
+
|
|
116
131
|
db_path = get_db_path()
|
|
117
132
|
db_path.parent.mkdir(parents=True, exist_ok=True)
|
|
118
133
|
|
|
@@ -137,6 +152,19 @@ def index_cmd( # noqa: B008
|
|
|
137
152
|
sqlcg_log.removeHandler(_counter)
|
|
138
153
|
_warn_handler.close()
|
|
139
154
|
|
|
155
|
+
# --include-working-tree: if the working tree is dirty, overwrite the stored SHA
|
|
156
|
+
# with a "<head>+dirty" sentinel so 'db info' can distinguish clean-HEAD index
|
|
157
|
+
# from working-tree-inclusive index. The backend was closed inside _run_index,
|
|
158
|
+
# so we open a fresh context here for the single sentinel write.
|
|
159
|
+
if include_working_tree:
|
|
160
|
+
from sqlcg.core.freshness import _git
|
|
161
|
+
|
|
162
|
+
dirty_out = _git(path, "status", "--porcelain")
|
|
163
|
+
if dirty_out: # non-empty string → working tree is dirty
|
|
164
|
+
head = _git(path, "rev-parse", "HEAD") or "unknown"
|
|
165
|
+
with get_backend() as _b2:
|
|
166
|
+
_b2.set_indexed_sha(f"{head}+dirty")
|
|
167
|
+
|
|
140
168
|
if not verbose and not quiet and _counter.count > 0 and _warn_log_path is not None:
|
|
141
169
|
console.print(
|
|
142
170
|
f"[yellow]Parse warnings written to {_warn_log_path} "
|
sqlcg/cli/commands/mcp.py
CHANGED
|
@@ -71,3 +71,106 @@ def mcp_best_practices() -> None:
|
|
|
71
71
|
from sqlcg.server.skill import render_body
|
|
72
72
|
|
|
73
73
|
typer.echo(render_body())
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
@app.command("status")
|
|
77
|
+
def mcp_status() -> None:
|
|
78
|
+
"""Print server status JSON (connects to control socket).
|
|
79
|
+
|
|
80
|
+
Returns JSON with fields: running, pid, db_path, indexed_sha, head_sha,
|
|
81
|
+
stale_by_commits, connected_clients, uptime when a server is live.
|
|
82
|
+
|
|
83
|
+
When no server is found: {"running": false}.
|
|
84
|
+
When the PID file exists with a live process but the socket is unavailable:
|
|
85
|
+
{"running": true, "degraded": "socket unavailable", ...}.
|
|
86
|
+
|
|
87
|
+
R3 (stale socket): if the socket file exists but the server is not
|
|
88
|
+
responding (ConnectionRefusedError / FileNotFoundError), falls through
|
|
89
|
+
to the PID-file probe — never hangs or errors on a dead socket.
|
|
90
|
+
"""
|
|
91
|
+
import socket as _socket
|
|
92
|
+
|
|
93
|
+
from sqlcg.server.control import is_pid_alive, read_pid, sock_path
|
|
94
|
+
|
|
95
|
+
sp = sock_path()
|
|
96
|
+
try:
|
|
97
|
+
with _socket.socket(_socket.AF_UNIX, _socket.SOCK_STREAM) as s:
|
|
98
|
+
s.settimeout(2)
|
|
99
|
+
s.connect(str(sp))
|
|
100
|
+
s.sendall(json.dumps({"op": "status"}).encode() + b"\n")
|
|
101
|
+
data = s.recv(4096)
|
|
102
|
+
console.print_json(data.decode())
|
|
103
|
+
except (FileNotFoundError, ConnectionRefusedError, OSError):
|
|
104
|
+
# Socket unavailable — probe via PID file (R3: stale-socket fall-through)
|
|
105
|
+
rec = read_pid()
|
|
106
|
+
if rec and is_pid_alive(rec["pid"]):
|
|
107
|
+
console.print_json(
|
|
108
|
+
json.dumps(
|
|
109
|
+
{
|
|
110
|
+
"running": True,
|
|
111
|
+
"degraded": "socket unavailable",
|
|
112
|
+
"pid": rec["pid"],
|
|
113
|
+
"db_path": rec["db_path"],
|
|
114
|
+
}
|
|
115
|
+
)
|
|
116
|
+
)
|
|
117
|
+
else:
|
|
118
|
+
console.print_json(json.dumps({"running": False}))
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
@app.command("stop")
|
|
122
|
+
def mcp_stop() -> None:
|
|
123
|
+
"""Stop the running MCP server gracefully.
|
|
124
|
+
|
|
125
|
+
Sends a ``stop`` op via the control socket; waits up to 5 s for the
|
|
126
|
+
socket file to disappear (confirming clean exit). Falls back to SIGTERM
|
|
127
|
+
on the PID-file PID if the socket is unavailable.
|
|
128
|
+
|
|
129
|
+
R3 (stale socket): ``ConnectionRefusedError`` / ``FileNotFoundError`` are
|
|
130
|
+
caught — never hangs on a dead socket.
|
|
131
|
+
"""
|
|
132
|
+
import socket as _socket
|
|
133
|
+
import time
|
|
134
|
+
|
|
135
|
+
from sqlcg.server.control import is_pid_alive, read_pid, sock_path
|
|
136
|
+
|
|
137
|
+
sp = sock_path()
|
|
138
|
+
try:
|
|
139
|
+
with _socket.socket(_socket.AF_UNIX, _socket.SOCK_STREAM) as s:
|
|
140
|
+
s.settimeout(2)
|
|
141
|
+
s.connect(str(sp))
|
|
142
|
+
s.sendall(json.dumps({"op": "stop"}).encode() + b"\n")
|
|
143
|
+
s.recv(128)
|
|
144
|
+
# Wait up to 5 s for the socket file to disappear (confirms clean exit)
|
|
145
|
+
for _ in range(10):
|
|
146
|
+
if not sp.exists():
|
|
147
|
+
break
|
|
148
|
+
time.sleep(0.5)
|
|
149
|
+
console.print("[green]Server stopped.[/green]")
|
|
150
|
+
except (FileNotFoundError, ConnectionRefusedError, OSError):
|
|
151
|
+
# Socket unavailable — fall back to SIGTERM via PID file
|
|
152
|
+
import signal
|
|
153
|
+
|
|
154
|
+
rec = read_pid()
|
|
155
|
+
if rec and is_pid_alive(rec["pid"]):
|
|
156
|
+
os.kill(rec["pid"], signal.SIGTERM)
|
|
157
|
+
console.print(f"[yellow]Socket unavailable — sent SIGTERM to PID {rec['pid']}[/yellow]")
|
|
158
|
+
else:
|
|
159
|
+
console.print("[yellow]No server found to stop.[/yellow]")
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
@app.command("restart")
|
|
163
|
+
def mcp_restart() -> None:
|
|
164
|
+
"""Stop the server. The client (editor) must respawn.
|
|
165
|
+
|
|
166
|
+
v1.1 cannot re-parent an editor-spawned stdio process. This command
|
|
167
|
+
stops the current server and prints guidance for the user to restart
|
|
168
|
+
the MCP server via their editor's MCP configuration.
|
|
169
|
+
|
|
170
|
+
True auto-restart (re-parenting stdio) is deferred to v1.2.
|
|
171
|
+
"""
|
|
172
|
+
mcp_stop()
|
|
173
|
+
console.print(
|
|
174
|
+
"[yellow]Server stopped. Please restart via your editor's MCP configuration.[/yellow]"
|
|
175
|
+
)
|
|
176
|
+
console.print("[dim]True auto-restart (re-parenting stdio) is deferred to v1.2.[/dim]")
|
sqlcg/cli/commands/reindex.py
CHANGED
|
@@ -18,6 +18,12 @@ from rich.console import Console
|
|
|
18
18
|
|
|
19
19
|
console = Console()
|
|
20
20
|
|
|
21
|
+
# Client-side socket timeout for the --notify control-socket path.
|
|
22
|
+
# A real DWH server-side resync_changed measured ~89 s (41 changed files + closure);
|
|
23
|
+
# 300 s covers that with headroom while keeping the wait bounded on a wedged server.
|
|
24
|
+
# This is a CLI transport bound, NOT a KuzuConfig/indexer constant.
|
|
25
|
+
_NOTIFY_SOCKET_TIMEOUT_S = 300
|
|
26
|
+
|
|
21
27
|
|
|
22
28
|
def reindex_cmd( # noqa: B008
|
|
23
29
|
path: Path = typer.Argument(..., help="Repository root directory to resync"), # noqa: B008
|
|
@@ -39,10 +45,18 @@ def reindex_cmd( # noqa: B008
|
|
|
39
45
|
help="Files per KuzuDB transaction (same default as index command)",
|
|
40
46
|
),
|
|
41
47
|
timeout_per_file: int = typer.Option( # noqa: B008
|
|
42
|
-
|
|
48
|
+
10,
|
|
43
49
|
"--timeout-per-file",
|
|
44
50
|
help="Per-file parse timeout in seconds",
|
|
45
51
|
),
|
|
52
|
+
notify: bool = typer.Option( # noqa: B008
|
|
53
|
+
False,
|
|
54
|
+
"--notify",
|
|
55
|
+
help=(
|
|
56
|
+
"If a server is live on this DB, route the reindex through the server "
|
|
57
|
+
"(avoids lock contention). Falls back to direct write if no server is found."
|
|
58
|
+
),
|
|
59
|
+
),
|
|
46
60
|
) -> None:
|
|
47
61
|
"""Incrementally resync the graph after a git branch change or pull.
|
|
48
62
|
|
|
@@ -56,17 +70,99 @@ def reindex_cmd( # noqa: B008
|
|
|
56
70
|
Exits with an error if the database schema version does not match the current
|
|
57
71
|
build — run 'sqlcg db reset && sqlcg db init && sqlcg index <path>' to re-init.
|
|
58
72
|
"""
|
|
59
|
-
|
|
73
|
+
import json
|
|
74
|
+
import socket as _socket
|
|
75
|
+
|
|
76
|
+
from sqlcg.core.config import config_file_present, get_backend, get_db_path, get_dialect
|
|
60
77
|
from sqlcg.core.schema import SCHEMA_VERSION
|
|
61
78
|
from sqlcg.indexer.indexer import Indexer
|
|
79
|
+
from sqlcg.server.control import sock_path
|
|
62
80
|
|
|
63
81
|
# Resolve to absolute path so ignore-spec and git delta receive an absolute root
|
|
64
82
|
path = path.resolve()
|
|
65
83
|
|
|
84
|
+
# --notify: if a server is live, route reindex through the socket (R3 fallback)
|
|
85
|
+
if notify:
|
|
86
|
+
sp = sock_path()
|
|
87
|
+
try:
|
|
88
|
+
with _socket.socket(_socket.AF_UNIX, _socket.SOCK_STREAM) as s:
|
|
89
|
+
s.settimeout(_NOTIFY_SOCKET_TIMEOUT_S)
|
|
90
|
+
s.connect(str(sp))
|
|
91
|
+
# Resolve SHAs before sending — standalone mode reads from DB via socket
|
|
92
|
+
effective_from = from_sha
|
|
93
|
+
if effective_from is None:
|
|
94
|
+
# Standalone mode: we cannot read stored SHA here without opening the
|
|
95
|
+
# DB (which would conflict with the running server). If no --from is
|
|
96
|
+
# given with --notify, we send from="stored" as a sentinel and fall
|
|
97
|
+
# back to direct write; the caller should pass --from explicitly.
|
|
98
|
+
raise OSError( # noqa: TRY301
|
|
99
|
+
"--notify without --from requires direct DB access; falling through"
|
|
100
|
+
)
|
|
101
|
+
# Resolve symbolic refs (HEAD, branch names) to concrete 40-char SHAs
|
|
102
|
+
# before sending — prevents literal "HEAD" from being stored in the graph.
|
|
103
|
+
effective_from = _resolve_ref(path, effective_from)
|
|
104
|
+
effective_to = _resolve_ref(path, to_sha) if to_sha else _get_head(path)
|
|
105
|
+
payload = {
|
|
106
|
+
"op": "reindex",
|
|
107
|
+
"root": str(path),
|
|
108
|
+
"from": effective_from,
|
|
109
|
+
"to": effective_to,
|
|
110
|
+
"dialect": dialect,
|
|
111
|
+
}
|
|
112
|
+
s.sendall(json.dumps(payload).encode() + b"\n")
|
|
113
|
+
data = s.recv(65536)
|
|
114
|
+
result = json.loads(data)
|
|
115
|
+
if "error" in result:
|
|
116
|
+
console.print(f"[red]Server reindex error: {result['error']}[/red]")
|
|
117
|
+
raise typer.Exit(1)
|
|
118
|
+
if not quiet:
|
|
119
|
+
srv_summary = result.get("summary", {})
|
|
120
|
+
console.print(
|
|
121
|
+
f"[green]Resynced via server[/green] "
|
|
122
|
+
f"+{srv_summary.get('added', 0)} added, "
|
|
123
|
+
f"~{srv_summary.get('modified', 0)} modified, "
|
|
124
|
+
f"-{srv_summary.get('deleted', 0)} deleted"
|
|
125
|
+
)
|
|
126
|
+
raise typer.Exit(0)
|
|
127
|
+
except TimeoutError:
|
|
128
|
+
# Bug 1 fix: server is alive and working (accepted the connection, holds the
|
|
129
|
+
# lock, will finish and persist). Do NOT fall through to the direct-write
|
|
130
|
+
# path — that would hit the held lock and produce a false "Database is locked"
|
|
131
|
+
# error. Exit 0 so the git hook stays non-fatal; the server will complete.
|
|
132
|
+
# (socket.timeout is an alias of TimeoutError, a subclass of OSError — this
|
|
133
|
+
# clause must be listed before the broad OSError clause below.)
|
|
134
|
+
import sys
|
|
135
|
+
|
|
136
|
+
print(
|
|
137
|
+
f"Server is still applying the reindex (timed out waiting after "
|
|
138
|
+
f"{_NOTIFY_SOCKET_TIMEOUT_S}s); the graph will update when it finishes "
|
|
139
|
+
f"— check 'sqlcg mcp status'.",
|
|
140
|
+
file=sys.stderr,
|
|
141
|
+
)
|
|
142
|
+
raise typer.Exit(0) from None
|
|
143
|
+
except (FileNotFoundError, ConnectionRefusedError, OSError):
|
|
144
|
+
# R3: no live server (stale socket, socket absent, fallback condition) —
|
|
145
|
+
# fall through to the existing direct-write path unchanged.
|
|
146
|
+
# NOTE: socket.timeout / TimeoutError is an OSError subclass, so the
|
|
147
|
+
# dedicated timeout clause above must be listed first (already is).
|
|
148
|
+
pass
|
|
149
|
+
except typer.Exit:
|
|
150
|
+
raise
|
|
151
|
+
except Exception as exc:
|
|
152
|
+
console.print(f"[red]--notify routing failed: {exc}[/red]")
|
|
153
|
+
raise typer.Exit(1) from exc
|
|
154
|
+
|
|
66
155
|
# Resolve dialect
|
|
67
156
|
if dialect == "auto":
|
|
68
157
|
dialect = get_dialect(path)
|
|
69
158
|
|
|
159
|
+
if not quiet and not config_file_present(path):
|
|
160
|
+
console.print(
|
|
161
|
+
f"[yellow]No .sqlcg.toml found at {path}/.sqlcg.toml — "
|
|
162
|
+
"using defaults (snowflake dialect, no aliases/prefixes). "
|
|
163
|
+
"Create .sqlcg.toml in the index directory to customise.[/yellow]"
|
|
164
|
+
)
|
|
165
|
+
|
|
70
166
|
db_path = get_db_path()
|
|
71
167
|
db_path.parent.mkdir(parents=True, exist_ok=True)
|
|
72
168
|
|
|
@@ -88,15 +184,17 @@ def reindex_cmd( # noqa: B008
|
|
|
88
184
|
|
|
89
185
|
# ---- Determine mode -------------------------------------------------------
|
|
90
186
|
if from_sha is not None:
|
|
91
|
-
# Explicit-SHA mode
|
|
92
|
-
|
|
187
|
+
# Explicit-SHA mode — resolve symbolic refs to concrete SHAs before storing
|
|
188
|
+
effective_from = _resolve_ref(path, from_sha)
|
|
189
|
+
effective_to = _resolve_ref(path, to_sha) if to_sha else _get_head(path)
|
|
93
190
|
if not quiet:
|
|
94
191
|
console.print(
|
|
95
|
-
f"Resyncing [cyan]{path}[/cyan]
|
|
192
|
+
f"Resyncing [cyan]{path}[/cyan] "
|
|
193
|
+
f"[dim]{effective_from[:8]}..{effective_to[:8]}[/dim]"
|
|
96
194
|
)
|
|
97
195
|
summary = indexer.resync_changed(
|
|
98
196
|
path,
|
|
99
|
-
|
|
197
|
+
effective_from,
|
|
100
198
|
effective_to,
|
|
101
199
|
backend,
|
|
102
200
|
dialect,
|
|
@@ -150,24 +248,36 @@ def reindex_cmd( # noqa: B008
|
|
|
150
248
|
)
|
|
151
249
|
|
|
152
250
|
|
|
153
|
-
def
|
|
154
|
-
"""
|
|
251
|
+
def _resolve_ref(root: Path, ref: str) -> str:
|
|
252
|
+
"""Resolve a git ref (HEAD, branch, tag, or concrete SHA) to a 40-char SHA.
|
|
155
253
|
|
|
156
|
-
|
|
254
|
+
A concrete SHA resolves to itself (idempotent), so callers may pass either a
|
|
255
|
+
symbolic ref or a SHA without branching.
|
|
256
|
+
|
|
257
|
+
Raises typer.Exit(1) if git is unavailable or the ref cannot be resolved.
|
|
157
258
|
"""
|
|
158
259
|
try:
|
|
159
260
|
result = subprocess.run(
|
|
160
|
-
["git", "rev-parse",
|
|
261
|
+
["git", "rev-parse", ref],
|
|
161
262
|
cwd=str(root),
|
|
162
263
|
capture_output=True,
|
|
163
264
|
text=True,
|
|
164
265
|
)
|
|
165
266
|
if result.returncode != 0:
|
|
166
267
|
console.print(
|
|
167
|
-
f"[red]Could not
|
|
268
|
+
f"[red]Could not resolve ref '{ref}' in {root}: {result.stderr.strip()}[/red]"
|
|
168
269
|
)
|
|
169
270
|
raise typer.Exit(1)
|
|
170
271
|
return result.stdout.strip()
|
|
171
272
|
except FileNotFoundError:
|
|
172
|
-
console.print("[red]git is not available — cannot
|
|
273
|
+
console.print("[red]git is not available — cannot resolve ref[/red]")
|
|
173
274
|
raise typer.Exit(1) from None
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
def _get_head(root: Path) -> str:
|
|
278
|
+
"""Return the current HEAD SHA for the git repo at root.
|
|
279
|
+
|
|
280
|
+
Delegates to _resolve_ref so there is one git-rev-parse code path.
|
|
281
|
+
Raises typer.Exit(1) if git is unavailable or root is not a git repo.
|
|
282
|
+
"""
|
|
283
|
+
return _resolve_ref(root, "HEAD")
|
sqlcg/core/config.py
CHANGED
|
@@ -71,6 +71,21 @@ def get_db_path() -> Path:
|
|
|
71
71
|
return KuzuConfig.from_env().db_path
|
|
72
72
|
|
|
73
73
|
|
|
74
|
+
def config_file_present(path: Path) -> bool:
|
|
75
|
+
"""Return True when a .sqlcg.toml file exists at the given directory.
|
|
76
|
+
|
|
77
|
+
Single source of truth for the config filename so callers never hard-code
|
|
78
|
+
".sqlcg.toml" independently.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
path: Directory to check for .sqlcg.toml
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
True if path/.sqlcg.toml exists, False otherwise.
|
|
85
|
+
"""
|
|
86
|
+
return (Path(path) / ".sqlcg.toml").exists()
|
|
87
|
+
|
|
88
|
+
|
|
74
89
|
def get_dialect(path: Path) -> str:
|
|
75
90
|
"""Get the SQL dialect from .sqlcg.toml or fall back to snowflake.
|
|
76
91
|
|
|
@@ -266,9 +281,90 @@ def get_presentation_prefixes(path: Path) -> list[str]:
|
|
|
266
281
|
return []
|
|
267
282
|
|
|
268
283
|
|
|
269
|
-
|
|
284
|
+
class ExternalConsumerSpec(BaseModel):
|
|
285
|
+
"""Specification for a single external downstream consumer declared in .sqlcg.toml."""
|
|
286
|
+
|
|
287
|
+
name: str
|
|
288
|
+
consumer_type: str
|
|
289
|
+
consumes: list[str]
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
def get_external_consumers(path: Path) -> list[ExternalConsumerSpec]:
|
|
293
|
+
"""Get external downstream consumer declarations from .sqlcg.toml.
|
|
294
|
+
|
|
295
|
+
Reads [[sqlcg.external_consumers]] array-of-tables from .sqlcg.toml. Each
|
|
296
|
+
table must have ``name`` and ``consumes`` (non-empty list). Rows without a
|
|
297
|
+
``name`` or with an empty ``consumes`` list are silently skipped. The
|
|
298
|
+
``kind`` field is stored as ``consumer_type`` (lowercased). **Defaults to an
|
|
299
|
+
empty list** when the section is absent — when unset, the ingestion pass is a
|
|
300
|
+
no-op (correct generic behaviour for any user). No hardcoded fallback::
|
|
301
|
+
|
|
302
|
+
[[sqlcg.external_consumers]]
|
|
303
|
+
name = "Tableau: Sales Dashboard"
|
|
304
|
+
kind = "tableau"
|
|
305
|
+
consumes = ["ia_sales.fct_orders"]
|
|
306
|
+
|
|
307
|
+
Args:
|
|
308
|
+
path: Root directory to search for .sqlcg.toml
|
|
309
|
+
|
|
310
|
+
Returns:
|
|
311
|
+
List of ExternalConsumerSpec objects. Defaults to an empty list.
|
|
312
|
+
"""
|
|
313
|
+
config_file = Path(path) / ".sqlcg.toml"
|
|
314
|
+
if config_file.exists():
|
|
315
|
+
try:
|
|
316
|
+
with open(config_file, "rb") as f:
|
|
317
|
+
config = tomllib.load(f)
|
|
318
|
+
raw = config.get("sqlcg", {}).get("external_consumers", [])
|
|
319
|
+
if not isinstance(raw, list):
|
|
320
|
+
return []
|
|
321
|
+
specs: list[ExternalConsumerSpec] = []
|
|
322
|
+
for entry in raw:
|
|
323
|
+
if not isinstance(entry, dict):
|
|
324
|
+
continue
|
|
325
|
+
name = entry.get("name", "")
|
|
326
|
+
if not name or not isinstance(name, str):
|
|
327
|
+
continue
|
|
328
|
+
consumes_raw = entry.get("consumes", [])
|
|
329
|
+
if not isinstance(consumes_raw, list) or not consumes_raw:
|
|
330
|
+
continue
|
|
331
|
+
consumes = [c.lower() for c in consumes_raw if isinstance(c, str)]
|
|
332
|
+
if not consumes:
|
|
333
|
+
continue
|
|
334
|
+
kind = entry.get("kind", "")
|
|
335
|
+
consumer_type = kind.lower() if isinstance(kind, str) else ""
|
|
336
|
+
specs.append(
|
|
337
|
+
ExternalConsumerSpec(
|
|
338
|
+
name=name,
|
|
339
|
+
consumer_type=consumer_type,
|
|
340
|
+
consumes=consumes,
|
|
341
|
+
)
|
|
342
|
+
)
|
|
343
|
+
return specs
|
|
344
|
+
except Exception:
|
|
345
|
+
pass
|
|
346
|
+
return []
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
def get_backend(read_only: bool = False) -> "GraphBackend":
|
|
270
350
|
"""Get a graph backend instance respecting the SQLCG_BACKEND env var.
|
|
271
351
|
|
|
352
|
+
Args:
|
|
353
|
+
read_only: Open in read-only mode. When ``True``, the KùzuDB open
|
|
354
|
+
does not take an exclusive write lock, enabling *multiple concurrent
|
|
355
|
+
read-only opens* (reader/reader concurrency). CLI read commands
|
|
356
|
+
pass ``True`` so they do not hold the exclusive write lock and
|
|
357
|
+
therefore do not block other concurrent readers or a pending reindex.
|
|
358
|
+
Note: this does NOT allow reads while a read-write writer already
|
|
359
|
+
holds the exclusive lock — KùzuDB's exclusive write lock is
|
|
360
|
+
process-level; a ``read_only=True`` open still fails with
|
|
361
|
+
"Database is locked" when a writer is active. Reads during an
|
|
362
|
+
active writer remain a known limitation (future work: route reads
|
|
363
|
+
through the live MCP server).
|
|
364
|
+
Neo4j has no single-writer lock; this flag is a no-op there.
|
|
365
|
+
All writer call sites (index, reindex, db init/reset, server
|
|
366
|
+
init_backend) use the default ``False``.
|
|
367
|
+
|
|
272
368
|
Returns:
|
|
273
369
|
A GraphBackend instance (KuzuBackend by default, or Neo4jBackend)
|
|
274
370
|
|
|
@@ -281,14 +377,26 @@ def get_backend() -> "GraphBackend":
|
|
|
281
377
|
from sqlcg.core.kuzu_backend import KuzuBackend
|
|
282
378
|
|
|
283
379
|
kuzu_cfg = KuzuConfig.from_env()
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
380
|
+
try:
|
|
381
|
+
return KuzuBackend(
|
|
382
|
+
str(kuzu_cfg.db_path),
|
|
383
|
+
buffer_pool_size_mb=kuzu_cfg.buffer_pool_size_mb,
|
|
384
|
+
read_only=read_only,
|
|
385
|
+
)
|
|
386
|
+
except RuntimeError as exc:
|
|
387
|
+
if read_only and "READ ONLY" in str(exc):
|
|
388
|
+
# KùzuDB refuses to open a non-existent or empty DB in read-only
|
|
389
|
+
# mode ("Cannot create an empty database under READ ONLY mode").
|
|
390
|
+
# Surface the same empty-DB guidance the user sees from `db info`.
|
|
391
|
+
raise RuntimeError(
|
|
392
|
+
"Database not initialised — run 'sqlcg db init' and 'sqlcg index <path>' first."
|
|
393
|
+
) from exc
|
|
394
|
+
raise
|
|
288
395
|
elif backend_type == "neo4j":
|
|
289
396
|
from sqlcg.core.neo4j_backend import Neo4jBackend
|
|
290
397
|
|
|
291
398
|
neo4j_cfg = Neo4jConfig.from_env()
|
|
399
|
+
# Neo4j has no single-writer lock; read_only is a no-op here.
|
|
292
400
|
return Neo4jBackend(neo4j_cfg.uri, neo4j_cfg.user, neo4j_cfg.password)
|
|
293
401
|
else:
|
|
294
402
|
raise ValueError(f"Unknown backend type: {backend_type}")
|