sql-code-graph 1.1.0__py3-none-any.whl → 1.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sql_code_graph-1.1.0.dist-info → sql_code_graph-1.2.2.dist-info}/METADATA +11 -1
- {sql_code_graph-1.1.0.dist-info → sql_code_graph-1.2.2.dist-info}/RECORD +19 -18
- {sql_code_graph-1.1.0.dist-info → sql_code_graph-1.2.2.dist-info}/WHEEL +1 -1
- sqlcg/__init__.py +1 -1
- sqlcg/cli/commands/analyze.py +156 -134
- sqlcg/cli/commands/db.py +92 -86
- sqlcg/cli/commands/find.py +30 -33
- sqlcg/cli/commands/gain.py +13 -11
- sqlcg/core/config.py +35 -5
- sqlcg/core/kuzu_backend.py +4 -1
- sqlcg/core/queries.cypher +0 -6
- sqlcg/core/queries.py +0 -1
- sqlcg/indexer/indexer.py +109 -11
- sqlcg/lineage/aggregator.py +17 -45
- sqlcg/parsers/ansi_parser.py +2 -2
- sqlcg/parsers/base.py +7 -1
- sqlcg/server/read_client.py +192 -0
- sqlcg/server/server.py +97 -18
- {sql_code_graph-1.1.0.dist-info → sql_code_graph-1.2.2.dist-info}/entry_points.txt +0 -0
sqlcg/lineage/aggregator.py
CHANGED
|
@@ -22,17 +22,33 @@ class CrossFileAggregator:
|
|
|
22
22
|
# Maps lowercased table name (bare name) -> exp.Select body for CTAS statements.
|
|
23
23
|
# Populated during register_pass1 and used to seed sources_map in pass 2.
|
|
24
24
|
self.cross_file_sources: dict[str, Any] = {}
|
|
25
|
+
# #44 canonical-name index: bare name (lowercased) -> sole DDL-defined full_id.
|
|
26
|
+
# Built from DDL-defined tables only (defined_tables, not CTAS bodies).
|
|
27
|
+
# Used by _build_file_rows to rewrite an unqualified INSERT-target to the
|
|
28
|
+
# canonical full_id so INSERT-target nodes share identity with the DDL node.
|
|
29
|
+
self.canonical_by_bare: dict[str, str] = {}
|
|
30
|
+
# Bare names defined by >1 schema — do NOT rewrite (ambiguous).
|
|
31
|
+
self._ambiguous_bare: set[str] = set()
|
|
25
32
|
|
|
26
33
|
def register_pass1(self, parsed: ParsedFile) -> None:
|
|
27
34
|
"""Register a pass-1 result and build view/table source map.
|
|
28
35
|
|
|
29
|
-
Also harvests CTAS bodies from statements for cross-file temp-table resolution
|
|
36
|
+
Also harvests CTAS bodies from statements for cross-file temp-table resolution,
|
|
37
|
+
and builds the bare-name → canonical-full_id index (#44) from DDL tables.
|
|
30
38
|
|
|
31
39
|
Args:
|
|
32
40
|
parsed: ParsedFile from pass 1
|
|
33
41
|
"""
|
|
34
42
|
for table in parsed.defined_tables:
|
|
35
43
|
self.sources[table.full_id] = parsed
|
|
44
|
+
# #44: build canonical_by_bare index from DDL-defined tables
|
|
45
|
+
bare = (table.name or "").lower()
|
|
46
|
+
if bare:
|
|
47
|
+
if bare in self.canonical_by_bare and self.canonical_by_bare[bare] != table.full_id:
|
|
48
|
+
# Same bare name defined in multiple schemas → ambiguous, never rewrite
|
|
49
|
+
self._ambiguous_bare.add(bare)
|
|
50
|
+
else:
|
|
51
|
+
self.canonical_by_bare[bare] = table.full_id
|
|
36
52
|
|
|
37
53
|
# Harvest CTAS bodies from statements for cross-file resolution.
|
|
38
54
|
# Key convention matches AnsiParser.parse_file line 109: lowercased bare name.
|
|
@@ -68,47 +84,3 @@ class CrossFileAggregator:
|
|
|
68
84
|
if bare and bare in self.cross_file_sources and bare not in same_file_bare_names:
|
|
69
85
|
return True
|
|
70
86
|
return False
|
|
71
|
-
|
|
72
|
-
def resolve_pass2(self, parser, parsed: ParsedFile) -> ParsedFile:
|
|
73
|
-
"""Re-parse with cross-file schema context.
|
|
74
|
-
|
|
75
|
-
Args:
|
|
76
|
-
parser: SqlParser instance
|
|
77
|
-
parsed: ParsedFile from pass 1
|
|
78
|
-
|
|
79
|
-
Returns:
|
|
80
|
-
ParsedFile from pass 2 with resolved cross-file references,
|
|
81
|
-
or the pass-1 result if skip predicate determines no re-parse is needed
|
|
82
|
-
or if the file cannot be re-read.
|
|
83
|
-
|
|
84
|
-
Raises:
|
|
85
|
-
No exceptions are raised; file read errors are logged as WARNING
|
|
86
|
-
and the pass-1 result is returned unchanged.
|
|
87
|
-
|
|
88
|
-
Note:
|
|
89
|
-
This method returns the exact same ParsedFile object (via `return parsed`)
|
|
90
|
-
on the skip path. This identity semantics are used by callers to track
|
|
91
|
-
which files were skipped (resolved is parsed). Do not introduce a .copy()
|
|
92
|
-
on the skip path — that would break the identity check.
|
|
93
|
-
"""
|
|
94
|
-
# Register view sources for schema resolution
|
|
95
|
-
parser._schema.add_view_sources(self.sources)
|
|
96
|
-
|
|
97
|
-
if not self._needs_pass2(parsed):
|
|
98
|
-
# File has no cross-file dependencies — pass-1 result is already final.
|
|
99
|
-
return parsed
|
|
100
|
-
|
|
101
|
-
try:
|
|
102
|
-
sql = parsed.path.read_text(encoding="utf-8")
|
|
103
|
-
except (FileNotFoundError, OSError) as exc:
|
|
104
|
-
logger.warning(
|
|
105
|
-
"resolve_pass2: cannot re-read %s (%s) — returning pass-1 result",
|
|
106
|
-
parsed.path,
|
|
107
|
-
exc,
|
|
108
|
-
)
|
|
109
|
-
return parsed
|
|
110
|
-
|
|
111
|
-
# Filter cross-file CTAS bodies to what this file actually references —
|
|
112
|
-
# keeps exp.expand bounded by referenced_tables, not by corpus size.
|
|
113
|
-
ref_names = {(t.name or "").lower() for t in parsed.referenced_tables if t.name}
|
|
114
|
-
return parser.parse_file(parsed.path, sql, dependency_filter=ref_names)
|
sqlcg/parsers/ansi_parser.py
CHANGED
|
@@ -85,8 +85,8 @@ class AnsiParser(SqlParser):
|
|
|
85
85
|
the cross-file sources seeded into `sources_map` are filtered to only those
|
|
86
86
|
whose name is in the set. Pass-1 callers (and direct test callers) pass
|
|
87
87
|
`None` to disable filtering; pass-2 callers
|
|
88
|
-
(`
|
|
89
|
-
`ParsedFile.referenced_tables`.
|
|
88
|
+
(the `index_repo` pass-2 dispatch in `indexer.py`) compute this from
|
|
89
|
+
the pass-1 `ParsedFile.referenced_tables`.
|
|
90
90
|
_precomputed_start_lines: optional list of 1-based start lines, one per
|
|
91
91
|
statement. When provided (e.g. by SnowflakeParser which computes the map
|
|
92
92
|
from the preprocessed SQL after ``_preprocess_snowflake_sql`` — which
|
sqlcg/parsers/base.py
CHANGED
|
@@ -967,10 +967,16 @@ class SqlParser(ABC):
|
|
|
967
967
|
if not isinstance(cte_body, (exp.Select, exp.Union)):
|
|
968
968
|
continue
|
|
969
969
|
|
|
970
|
-
# For Union bodies, use the left
|
|
970
|
+
# For Union bodies, use the deepest left-branch Select's projections.
|
|
971
971
|
# Union.expressions is always empty; projections are on Union.this.
|
|
972
|
+
# For N=2: cte_body.this is a Select — the while loop is a no-op.
|
|
973
|
+
# For N≥3: cte_body.this is a nested Union (A UNION ALL B UNION ALL C
|
|
974
|
+
# parses as Union(Union(A,B),C)), so we walk down to the deepest
|
|
975
|
+
# left-branch Select (whose star qualify() already expanded in place).
|
|
972
976
|
if isinstance(cte_body, exp.Union):
|
|
973
977
|
projection_source = cte_body.this
|
|
978
|
+
while isinstance(projection_source, exp.Union):
|
|
979
|
+
projection_source = projection_source.this
|
|
974
980
|
else:
|
|
975
981
|
projection_source = cte_body
|
|
976
982
|
|
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
"""Client helper for routing CLI read commands through the live MCP server.
|
|
2
|
+
|
|
3
|
+
When a server is live on the target DB, CLI read commands route their
|
|
4
|
+
``run_read(cypher, params)`` calls over the Unix control socket instead of
|
|
5
|
+
opening the DB directly. This avoids "Database is locked" errors when the
|
|
6
|
+
server holds KuzuDB's process-level write lock.
|
|
7
|
+
|
|
8
|
+
With no server running (``query_via_server`` returns ``None``), the fallback
|
|
9
|
+
opens the DB with ``get_backend(read_only=True)`` — zero-config small-repo
|
|
10
|
+
invariant preserved.
|
|
11
|
+
|
|
12
|
+
Framing protocol (v1.2.0):
|
|
13
|
+
Request: ``<decimal-byte-length>\\n<json-body>``
|
|
14
|
+
Response: ``<decimal-byte-length>\\n<json-body>``
|
|
15
|
+
Only the ``query`` op uses this framing; legacy ops (status/stop/reindex)
|
|
16
|
+
keep their unframed ``{...}\\n`` protocol.
|
|
17
|
+
|
|
18
|
+
Client receive strategy: after sending the framed request, read the length
|
|
19
|
+
line with ``f.readline()`` (blocking, will not return a partial line) then
|
|
20
|
+
read exactly that many bytes with ``f.read(n)``. This is the recv-exactly
|
|
21
|
+
pattern required by BLOCKER 2 — a single ``s.recv(65536)`` would silently
|
|
22
|
+
truncate large result sets. Do NOT copy reindex.py's single-recv pattern
|
|
23
|
+
here.
|
|
24
|
+
|
|
25
|
+
Server-busy behaviour (v1.1.0 F1 parity):
|
|
26
|
+
If the server is alive but the lock is held (timeout waiting for the
|
|
27
|
+
response), raise ``typer.Exit`` — a plain ``Exception`` subclass, NOT
|
|
28
|
+
``SystemExit`` / ``BaseException``. This ensures gain.py's
|
|
29
|
+
``except Exception: pass`` handler catches it and degrades gracefully
|
|
30
|
+
(skips the parse-quality section) instead of crashing. Other read
|
|
31
|
+
commands let the ``typer.Exit`` propagate to a clean non-zero CLI exit.
|
|
32
|
+
Do NOT fall back to a direct open on timeout — the server is alive and
|
|
33
|
+
holds the lock, so falling back would reproduce the "Database is locked"
|
|
34
|
+
error (mirrors the F1 fix in reindex.py:127–142).
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
from __future__ import annotations
|
|
38
|
+
|
|
39
|
+
import json
|
|
40
|
+
import socket as _socket
|
|
41
|
+
import sys
|
|
42
|
+
from pathlib import Path
|
|
43
|
+
|
|
44
|
+
import typer
|
|
45
|
+
|
|
46
|
+
# Client-side socket timeout for the query control-socket path.
|
|
47
|
+
# Sized to cover the longest in-flight reindex (~89 s DWH resync_changed)
|
|
48
|
+
# with headroom. This is a CLI transport constant, NOT a KuzuConfig value —
|
|
49
|
+
# same convention as _NOTIFY_SOCKET_TIMEOUT_S in reindex.py.
|
|
50
|
+
_QUERY_SOCKET_TIMEOUT_S = 300
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def query_via_server(
|
|
54
|
+
cypher: str,
|
|
55
|
+
params: dict,
|
|
56
|
+
db_path: Path | None = None,
|
|
57
|
+
timeout_s: float = _QUERY_SOCKET_TIMEOUT_S,
|
|
58
|
+
) -> list[dict] | None:
|
|
59
|
+
"""Send a read query over the control socket.
|
|
60
|
+
|
|
61
|
+
Uses length-prefixed framing (v1.2.0): ``<len>\\n<json-body>`` for both
|
|
62
|
+
request and response. Reads the response with ``makefile`` + ``readline``
|
|
63
|
+
+ ``read(n)`` — NOT a single ``recv`` — so arbitrarily large result sets
|
|
64
|
+
are returned in full without truncation (BLOCKER 2).
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
cypher: Cypher query string (must be read-only; server enforces).
|
|
68
|
+
params: Query parameter dict.
|
|
69
|
+
db_path: Explicit database path. Defaults to ``get_db_path()``.
|
|
70
|
+
timeout_s: Socket timeout in seconds. On timeout the server is alive
|
|
71
|
+
and holds the lock — raises ``typer.Exit``, does NOT fall back to
|
|
72
|
+
a direct open (which would reproduce the lock error).
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
Row list (list[dict]) on success.
|
|
76
|
+
None when NO server is live (caller should fall back to direct open).
|
|
77
|
+
|
|
78
|
+
Raises:
|
|
79
|
+
typer.Exit: Server is alive but busy (timeout waiting for response).
|
|
80
|
+
Exception-derived, NOT SystemExit — caught by gain.py's
|
|
81
|
+
``except Exception: pass`` so parse-quality section degrades
|
|
82
|
+
gracefully (WARNING 3).
|
|
83
|
+
typer.Exit: Server returned ``{"error": ...}`` response.
|
|
84
|
+
"""
|
|
85
|
+
from sqlcg.server.control import sock_path
|
|
86
|
+
|
|
87
|
+
if sys.platform == "win32":
|
|
88
|
+
# No Unix domain socket on Windows — fall through to direct open.
|
|
89
|
+
return None
|
|
90
|
+
|
|
91
|
+
sp = sock_path(db_path)
|
|
92
|
+
if not sp.exists():
|
|
93
|
+
return None
|
|
94
|
+
|
|
95
|
+
req = {"op": "query", "cypher": cypher, "params": params}
|
|
96
|
+
req_bytes = json.dumps(req).encode()
|
|
97
|
+
frame = f"{len(req_bytes)}\n".encode() + req_bytes
|
|
98
|
+
|
|
99
|
+
try:
|
|
100
|
+
with _socket.socket(_socket.AF_UNIX, _socket.SOCK_STREAM) as s:
|
|
101
|
+
s.settimeout(timeout_s)
|
|
102
|
+
s.connect(str(sp))
|
|
103
|
+
s.sendall(frame)
|
|
104
|
+
|
|
105
|
+
# Recv-exactly via makefile:
|
|
106
|
+
# - f.readline() reads the length line (``<int>\n``) — will not
|
|
107
|
+
# return a partial line because makefile buffers internally.
|
|
108
|
+
# - f.read(n) reads exactly n bytes — accumulates until complete.
|
|
109
|
+
# A single s.recv(65536) would silently truncate large bodies
|
|
110
|
+
# (BLOCKER 2 guard: this is the recv-exactly implementation).
|
|
111
|
+
f = s.makefile("rb")
|
|
112
|
+
length_line = f.readline()
|
|
113
|
+
if not length_line:
|
|
114
|
+
return None # server closed connection unexpectedly
|
|
115
|
+
try:
|
|
116
|
+
body_len = int(length_line.strip())
|
|
117
|
+
except ValueError:
|
|
118
|
+
# Server sent an unframed response — protocol mismatch.
|
|
119
|
+
return None
|
|
120
|
+
body = f.read(body_len)
|
|
121
|
+
|
|
122
|
+
except TimeoutError:
|
|
123
|
+
# Server is alive and holding the lock. Do NOT fall back to a direct
|
|
124
|
+
# open — that would hit the held lock and produce "Database is locked"
|
|
125
|
+
# (mirrors v1.1.0 F1 fix in reindex.py:127–142).
|
|
126
|
+
from rich.console import Console
|
|
127
|
+
|
|
128
|
+
Console(stderr=True).print(
|
|
129
|
+
f"[red]Server is busy (reindex in progress); timed out after "
|
|
130
|
+
f"{timeout_s:.0f}s. The graph will update when it finishes — "
|
|
131
|
+
"check 'sqlcg mcp status'.[/red]"
|
|
132
|
+
)
|
|
133
|
+
raise typer.Exit(1) from None
|
|
134
|
+
except (FileNotFoundError, ConnectionRefusedError, OSError):
|
|
135
|
+
# Socket absent or refused — no live server; caller falls back to
|
|
136
|
+
# direct open.
|
|
137
|
+
return None
|
|
138
|
+
|
|
139
|
+
try:
|
|
140
|
+
resp = json.loads(body)
|
|
141
|
+
except (json.JSONDecodeError, ValueError):
|
|
142
|
+
return None # malformed response; treat as no-server
|
|
143
|
+
|
|
144
|
+
if "error" in resp:
|
|
145
|
+
from rich.console import Console
|
|
146
|
+
|
|
147
|
+
Console(stderr=True).print(f"[red]Server query error: {resp['error']}[/red]")
|
|
148
|
+
raise typer.Exit(1)
|
|
149
|
+
|
|
150
|
+
return resp.get("rows", [])
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def run_read_routed(
|
|
154
|
+
cypher: str,
|
|
155
|
+
params: dict,
|
|
156
|
+
db_path: Path | None = None,
|
|
157
|
+
) -> list[dict]:
|
|
158
|
+
"""Route through a live server if present, else direct read-only open.
|
|
159
|
+
|
|
160
|
+
This is the single seam every CLI read command calls instead of building
|
|
161
|
+
its own backend. Centralises the fallback semantics:
|
|
162
|
+
|
|
163
|
+
- ``query_via_server`` returns a list → server is live, use rows.
|
|
164
|
+
- ``query_via_server`` returns None → no server, open DB directly with
|
|
165
|
+
``get_backend(read_only=True)`` (BLOCKER 1 — must pass read_only=True
|
|
166
|
+
or the fallback opens read-write and reproduces lock contention).
|
|
167
|
+
- ``query_via_server`` raises ``typer.Exit`` → server busy/error; let it
|
|
168
|
+
propagate (do NOT fall back — lock is held).
|
|
169
|
+
|
|
170
|
+
Args:
|
|
171
|
+
cypher: Cypher query string.
|
|
172
|
+
params: Query parameter dict.
|
|
173
|
+
db_path: Explicit database path. Defaults to ``get_db_path()``.
|
|
174
|
+
|
|
175
|
+
Returns:
|
|
176
|
+
Row list from the server or from a direct read-only DB open.
|
|
177
|
+
|
|
178
|
+
Raises:
|
|
179
|
+
typer.Exit: Server busy or server error (propagated from
|
|
180
|
+
``query_via_server``).
|
|
181
|
+
"""
|
|
182
|
+
rows = query_via_server(cypher, params, db_path=db_path)
|
|
183
|
+
if rows is not None:
|
|
184
|
+
return rows
|
|
185
|
+
|
|
186
|
+
# No server live — fall back to a direct read-only open.
|
|
187
|
+
# read_only=True is required: without it the fallback opens read-write
|
|
188
|
+
# and any concurrent writer will produce "Database is locked" (BLOCKER 1).
|
|
189
|
+
from sqlcg.core.config import get_backend
|
|
190
|
+
|
|
191
|
+
with get_backend(read_only=True) as backend:
|
|
192
|
+
return backend.run_read(cypher, params)
|
sqlcg/server/server.py
CHANGED
|
@@ -77,24 +77,39 @@ async def _control_socket_task(
|
|
|
77
77
|
db_path: "Path",
|
|
78
78
|
backend_ref: "Callable[[], GraphBackend | None]",
|
|
79
79
|
stop_event: "anyio.Event",
|
|
80
|
-
|
|
80
|
+
backend_lock: "anyio.Lock",
|
|
81
81
|
start_time: float,
|
|
82
82
|
) -> None:
|
|
83
83
|
"""Accept control connections on ``<db>.sock`` and dispatch ops.
|
|
84
84
|
|
|
85
|
-
Supported ops
|
|
85
|
+
Supported ops:
|
|
86
86
|
|
|
87
87
|
- ``{"op": "status"}`` → running state, pid, db_path, freshness, uptime.
|
|
88
|
+
Unframed (legacy single-recv protocol).
|
|
88
89
|
- ``{"op": "stop"}`` → sends ``{"ok": true}`` then signals stop via
|
|
89
|
-
*stop_event*.
|
|
90
|
-
and closes stdin to trigger EOF in the MCP stdio loop.
|
|
90
|
+
*stop_event*. Unframed.
|
|
91
91
|
- ``{"op": "reindex", "root", "from", "to", "dialect"}`` → runs
|
|
92
92
|
``Indexer.resync_changed`` off the event-loop thread via
|
|
93
|
-
``anyio.to_thread.run_sync``, serialised behind *
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
93
|
+
``anyio.to_thread.run_sync``, serialised behind *backend_lock* (R1, R2).
|
|
94
|
+
Unframed.
|
|
95
|
+
- ``{"op": "query", "cypher": ..., "params": ...}`` → executes a
|
|
96
|
+
read-only Cypher query on the single backend connection, serialised
|
|
97
|
+
behind *backend_lock*. **Length-prefixed framing** (v1.2.0):
|
|
98
|
+
``<decimal-byte-length>\\n<json-body>`` on both request and response.
|
|
99
|
+
|
|
100
|
+
Framing protocol (v1.2.0, ``query`` op only):
|
|
101
|
+
Request: ``b"<len>\\n" + json_body`` — server detects by sniffing the
|
|
102
|
+
first line; a bare decimal integer → framed. Unframed requests always
|
|
103
|
+
start with ``{`` (never a digit), so the sniff is unambiguous.
|
|
104
|
+
Response: same ``<len>\\n<body>`` format for framed requests; unframed
|
|
105
|
+
response for unframed requests. Old clients that use the unframed
|
|
106
|
+
``s.recv(65536)`` + ``json.loads`` pattern will get a loud
|
|
107
|
+
``json.JSONDecodeError`` if they accidentally receive a framed response —
|
|
108
|
+
NOT silent truncation. Only the new ``read_client`` sends framed
|
|
109
|
+
requests, so this does not affect existing callers.
|
|
110
|
+
|
|
111
|
+
R2 (single connection): all backend operations go through ``backend_lock``
|
|
112
|
+
so concurrent calls never touch the single Kuzu connection simultaneously.
|
|
98
113
|
|
|
99
114
|
R8 teardown ordering: the caller must cancel this task BEFORE calling
|
|
100
115
|
``shutdown_backend()``. This is guaranteed by the ``anyio.CancelScope``
|
|
@@ -108,9 +123,25 @@ async def _control_socket_task(
|
|
|
108
123
|
import anyio
|
|
109
124
|
import anyio.abc as _anyio_abc
|
|
110
125
|
import anyio.to_thread as _to_thread
|
|
126
|
+
from anyio.streams.buffered import BufferedByteReceiveStream
|
|
111
127
|
|
|
112
128
|
from sqlcg.core.config import get_db_path as _get_db_path
|
|
113
129
|
|
|
130
|
+
# Read-only keyword allow-list for the ``query`` op. Only these leading
|
|
131
|
+
# keywords are permitted — anything that starts with a write keyword is
|
|
132
|
+
# rejected before execution. This is a guard against accidental mutation,
|
|
133
|
+
# not a security boundary (the socket is already 0o600 / owner-only).
|
|
134
|
+
_QUERY_ALLOWED_KEYWORDS = frozenset({"MATCH", "RETURN", "WITH", "CALL", "UNWIND", "OPTIONAL"})
|
|
135
|
+
|
|
136
|
+
def _is_read_only_cypher(cypher: str) -> bool:
|
|
137
|
+
"""Return True iff the leading keyword is in the read-only allow-list."""
|
|
138
|
+
import re
|
|
139
|
+
|
|
140
|
+
m = re.match(r"\s*(?:--[^\n]*)?\s*(\w+)", cypher, re.IGNORECASE)
|
|
141
|
+
if not m:
|
|
142
|
+
return False
|
|
143
|
+
return m.group(1).upper() in _QUERY_ALLOWED_KEYWORDS
|
|
144
|
+
|
|
114
145
|
sp = sock_path(db_path)
|
|
115
146
|
|
|
116
147
|
listener = await anyio.create_unix_listener(str(sp))
|
|
@@ -119,8 +150,29 @@ async def _control_socket_task(
|
|
|
119
150
|
async def _handle_connection(stream: _anyio_abc.SocketStream) -> None:
|
|
120
151
|
async with stream:
|
|
121
152
|
try:
|
|
122
|
-
|
|
123
|
-
|
|
153
|
+
# Sniff for framed vs unframed request.
|
|
154
|
+
# Framed (query op, v1.2.0): ``<decimal-len>\n<json-body>``
|
|
155
|
+
# Unframed (legacy status/stop/reindex): JSON object starting with ``{``
|
|
156
|
+
# The sniff is unambiguous: unframed JSON always starts with ``{``,
|
|
157
|
+
# never a bare decimal digit.
|
|
158
|
+
buf = BufferedByteReceiveStream(stream)
|
|
159
|
+
first_line = await buf.receive_until(b"\n", max_bytes=64)
|
|
160
|
+
|
|
161
|
+
try:
|
|
162
|
+
body_len = int(first_line.strip())
|
|
163
|
+
framed = True
|
|
164
|
+
except ValueError:
|
|
165
|
+
framed = False
|
|
166
|
+
|
|
167
|
+
if framed:
|
|
168
|
+
# Framed request: read exactly body_len bytes then parse.
|
|
169
|
+
raw_body = await buf.receive_exactly(body_len)
|
|
170
|
+
req = json.loads(raw_body)
|
|
171
|
+
else:
|
|
172
|
+
# Unframed request (legacy ops): first_line IS the JSON
|
|
173
|
+
# (terminated by \n as sent by the client).
|
|
174
|
+
req = json.loads(first_line)
|
|
175
|
+
|
|
124
176
|
op = req.get("op")
|
|
125
177
|
|
|
126
178
|
if op == "status":
|
|
@@ -195,15 +247,42 @@ async def _control_socket_task(
|
|
|
195
247
|
dialect,
|
|
196
248
|
)
|
|
197
249
|
|
|
198
|
-
async with
|
|
250
|
+
async with backend_lock:
|
|
199
251
|
# R1: run off event-loop thread; R2: lock serialises
|
|
200
252
|
summary = await _to_thread.run_sync(_do_reindex)
|
|
201
253
|
resp = {"ok": True, "summary": summary}
|
|
202
254
|
|
|
255
|
+
elif op == "query":
|
|
256
|
+
# Framed op (v1.2.0): read-only Cypher query over the socket.
|
|
257
|
+
# Must only be called with a framed request (sniff above sets framed=True).
|
|
258
|
+
cypher = req.get("cypher", "")
|
|
259
|
+
params = req.get("params") or {}
|
|
260
|
+
if not _is_read_only_cypher(cypher):
|
|
261
|
+
resp = {"error": "query op is read-only"}
|
|
262
|
+
else:
|
|
263
|
+
db = backend_ref()
|
|
264
|
+
if db is None:
|
|
265
|
+
resp = {"error": "backend not available"}
|
|
266
|
+
else:
|
|
267
|
+
|
|
268
|
+
def _do_query() -> list:
|
|
269
|
+
return db.run_read(cypher, params)
|
|
270
|
+
|
|
271
|
+
async with backend_lock:
|
|
272
|
+
# R1: run off event-loop thread; R2: lock serialises
|
|
273
|
+
# reads and writes on the single Kuzu connection.
|
|
274
|
+
rows = await _to_thread.run_sync(_do_query)
|
|
275
|
+
resp = {"ok": True, "rows": rows}
|
|
276
|
+
|
|
203
277
|
else:
|
|
204
278
|
resp = {"error": f"unknown op: {op!r}"}
|
|
205
279
|
|
|
206
|
-
|
|
280
|
+
# Send response: framed for framed requests, unframed for legacy ops.
|
|
281
|
+
resp_bytes = json.dumps(resp).encode()
|
|
282
|
+
if framed:
|
|
283
|
+
await stream.send(f"{len(resp_bytes)}\n".encode() + resp_bytes)
|
|
284
|
+
else:
|
|
285
|
+
await stream.send(resp_bytes + b"\n")
|
|
207
286
|
|
|
208
287
|
except Exception as exc:
|
|
209
288
|
try:
|
|
@@ -289,16 +368,16 @@ async def _run_with_control(db_path: "Path", start_time: float) -> None:
|
|
|
289
368
|
with ``abandon_on_cancel=False``). We cannot interrupt it without
|
|
290
369
|
killing the process; ``_stop_watcher`` does cleanup first.
|
|
291
370
|
|
|
292
|
-
``
|
|
293
|
-
``_control_socket_task`` so concurrent
|
|
294
|
-
behind a single lock (R2).
|
|
371
|
+
``backend_lock`` is created once here and passed into
|
|
372
|
+
``_control_socket_task`` so concurrent control ops (reindex, query) are
|
|
373
|
+
serialised behind a single lock on the Kuzu connection (R2).
|
|
295
374
|
"""
|
|
296
375
|
import anyio
|
|
297
376
|
|
|
298
377
|
import sqlcg.server.tools as _tools
|
|
299
378
|
|
|
300
379
|
stop_event = anyio.Event()
|
|
301
|
-
|
|
380
|
+
backend_lock = anyio.Lock() # R2: serialise all backend ops (Kuzu not thread-safe)
|
|
302
381
|
|
|
303
382
|
async with anyio.create_task_group() as tg:
|
|
304
383
|
if sys.platform != "win32":
|
|
@@ -308,7 +387,7 @@ async def _run_with_control(db_path: "Path", start_time: float) -> None:
|
|
|
308
387
|
db_path,
|
|
309
388
|
lambda: _tools._backend,
|
|
310
389
|
stop_event,
|
|
311
|
-
|
|
390
|
+
backend_lock,
|
|
312
391
|
start_time,
|
|
313
392
|
)
|
|
314
393
|
# Watch stop_event; shuts down and calls os._exit(0).
|
|
File without changes
|