sql-code-graph 1.2.2__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sql_code_graph-1.2.2.dist-info → sql_code_graph-1.4.0.dist-info}/METADATA +2 -4
- {sql_code_graph-1.2.2.dist-info → sql_code_graph-1.4.0.dist-info}/RECORD +31 -30
- sqlcg/__init__.py +1 -1
- sqlcg/cli/commands/analyze.py +138 -127
- sqlcg/cli/commands/db.py +49 -51
- sqlcg/cli/commands/find.py +5 -9
- sqlcg/cli/commands/gain.py +14 -16
- sqlcg/cli/commands/git.py +11 -4
- sqlcg/cli/commands/index.py +173 -21
- sqlcg/cli/commands/mcp.py +70 -3
- sqlcg/cli/commands/reindex.py +147 -77
- sqlcg/cli/commands/uninstall.py +9 -20
- sqlcg/core/__init__.py +1 -3
- sqlcg/core/config.py +25 -81
- sqlcg/core/duckdb_backend.py +764 -0
- sqlcg/core/freshness.py +1 -1
- sqlcg/core/graph_db.py +20 -4
- sqlcg/core/queries.py +26 -7
- sqlcg/core/queries.sql +249 -0
- sqlcg/core/schema.py +1 -1
- sqlcg/indexer/indexer.py +27 -36
- sqlcg/metrics/store.py +49 -1
- sqlcg/server/control.py +1 -1
- sqlcg/server/noise_filter.py +1 -1
- sqlcg/server/read_client.py +2 -2
- sqlcg/server/server.py +184 -86
- sqlcg/server/skill.py +2 -2
- sqlcg/server/tools.py +119 -41
- sqlcg/server/writer.py +459 -0
- sqlcg/core/kuzu_backend.py +0 -445
- sqlcg/core/neo4j_backend.py +0 -233
- {sql_code_graph-1.2.2.dist-info → sql_code_graph-1.4.0.dist-info}/WHEEL +0 -0
- {sql_code_graph-1.2.2.dist-info → sql_code_graph-1.4.0.dist-info}/entry_points.txt +0 -0
sqlcg/core/freshness.py
CHANGED
|
@@ -74,7 +74,7 @@ def compute_freshness(root: Path, indexed_sha: str | None) -> Freshness:
|
|
|
74
74
|
Args:
|
|
75
75
|
root: Filesystem path used as the ``cwd`` for git commands. This is
|
|
76
76
|
typically the ``r.path`` value read from the ``Repo`` graph node.
|
|
77
|
-
indexed_sha: The SHA recorded by ``
|
|
77
|
+
indexed_sha: The SHA recorded by ``DuckDBBackend.get_indexed_sha()``.
|
|
78
78
|
May be ``None`` when the graph was never indexed, or a sentinel
|
|
79
79
|
like ``"<head>+dirty"`` written by ``index --include-working-tree``.
|
|
80
80
|
"""
|
sqlcg/core/graph_db.py
CHANGED
|
@@ -77,7 +77,7 @@ class GraphBackend(ABC):
|
|
|
77
77
|
|
|
78
78
|
Each row dict must contain the primary-key field for `label` (see _pk_field)
|
|
79
79
|
plus any other properties to SET. All rows must share the same property-key
|
|
80
|
-
set; backends MAY raise if rows are heterogeneous (
|
|
80
|
+
set; backends MAY raise if rows are heterogeneous (DuckDBBackend does).
|
|
81
81
|
|
|
82
82
|
Idempotent MERGE semantics, identical to upsert_node per row.
|
|
83
83
|
|
|
@@ -118,7 +118,7 @@ class GraphBackend(ABC):
|
|
|
118
118
|
"""Execute a read-only query and return results.
|
|
119
119
|
|
|
120
120
|
Args:
|
|
121
|
-
query: Query string (
|
|
121
|
+
query: Query string (SQL)
|
|
122
122
|
params: Parameters to bind in the query
|
|
123
123
|
|
|
124
124
|
Returns:
|
|
@@ -130,7 +130,7 @@ class GraphBackend(ABC):
|
|
|
130
130
|
"""Execute a write query (mutation).
|
|
131
131
|
|
|
132
132
|
Args:
|
|
133
|
-
query: Query string (
|
|
133
|
+
query: Query string (SQL)
|
|
134
134
|
params: Parameters to bind in the query
|
|
135
135
|
"""
|
|
136
136
|
|
|
@@ -213,7 +213,7 @@ class GraphBackend(ABC):
|
|
|
213
213
|
def _validate_props(properties: dict[str, Any]) -> None:
|
|
214
214
|
"""Validate that all property keys are safe identifiers.
|
|
215
215
|
|
|
216
|
-
Guards against
|
|
216
|
+
Guards against SQL injection via property key interpolation.
|
|
217
217
|
|
|
218
218
|
Args:
|
|
219
219
|
properties: Dictionary of properties to validate
|
|
@@ -244,3 +244,19 @@ class GraphBackend(ABC):
|
|
|
244
244
|
yield self
|
|
245
245
|
except Exception:
|
|
246
246
|
raise
|
|
247
|
+
|
|
248
|
+
def clear_all_tables(self) -> None:
|
|
249
|
+
"""Delete all node and edge rows, preserving the schema structure.
|
|
250
|
+
|
|
251
|
+
Used by the server drain body for the full-rebuild-in-transaction
|
|
252
|
+
reindex path. Concrete backends must override this.
|
|
253
|
+
"""
|
|
254
|
+
raise NotImplementedError(f"{type(self).__name__} does not support clear_all_tables")
|
|
255
|
+
|
|
256
|
+
def expand_star_sources(self) -> int:
|
|
257
|
+
"""Expand SELECT * lineage into per-column STAR_EXPANSION edges.
|
|
258
|
+
|
|
259
|
+
Runs once per index after ingestion. Concrete backends must override
|
|
260
|
+
this; returns the total STAR_EXPANSION edge count.
|
|
261
|
+
"""
|
|
262
|
+
raise NotImplementedError(f"{type(self).__name__} does not support expand_star_sources")
|
sqlcg/core/queries.py
CHANGED
|
@@ -1,20 +1,35 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""SQL query loader. All query strings live in queries.sql."""
|
|
2
2
|
|
|
3
3
|
import re
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
|
|
6
|
-
|
|
6
|
+
_SQL_FILE = Path(__file__).parent / "queries.sql"
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
def _load() -> dict[str, str]:
|
|
10
|
-
"""Load named
|
|
10
|
+
"""Load named SQL blocks from queries.sql.
|
|
11
11
|
|
|
12
12
|
Format: blocks are separated by lines matching "-- BLOCK_NAME" at the start.
|
|
13
|
-
Each block name becomes a key in the returned dict.
|
|
13
|
+
Each block name becomes a key in the returned dict. Comment lines that start
|
|
14
|
+
with "-- " followed by lowercase words (e.g. "-- params: ...") are included in
|
|
15
|
+
the block text so the regex split is only on UPPER_SNAKE_CASE block headers.
|
|
14
16
|
"""
|
|
15
|
-
text =
|
|
16
|
-
|
|
17
|
-
|
|
17
|
+
text = _SQL_FILE.read_text(encoding="utf-8")
|
|
18
|
+
# Split on lines like "-- BLOCK_NAME" (all-caps with underscores, start of line)
|
|
19
|
+
blocks = re.split(r"^--\s+([A-Z][A-Z0-9_]+)\s*$", text, flags=re.MULTILINE)
|
|
20
|
+
result: dict[str, str] = {}
|
|
21
|
+
for i in range(1, len(blocks), 2):
|
|
22
|
+
name = blocks[i]
|
|
23
|
+
body = blocks[i + 1].strip()
|
|
24
|
+
# Strip leading comment lines (-- params: ...) from block body
|
|
25
|
+
body_lines = []
|
|
26
|
+
for line in body.splitlines():
|
|
27
|
+
stripped = line.strip()
|
|
28
|
+
if stripped.startswith("--"):
|
|
29
|
+
continue
|
|
30
|
+
body_lines.append(line)
|
|
31
|
+
result[name] = "\n".join(body_lines).strip()
|
|
32
|
+
return result
|
|
18
33
|
|
|
19
34
|
|
|
20
35
|
_Q = _load()
|
|
@@ -30,7 +45,11 @@ GET_DOWNSTREAM_DEPENDENCIES_QUERY = _Q["GET_DOWNSTREAM_DEPENDENCIES"]
|
|
|
30
45
|
GET_UPSTREAM_DEPENDENCIES_QUERY = _Q["GET_UPSTREAM_DEPENDENCIES"]
|
|
31
46
|
SEARCH_SQL_PATTERN_QUERY = _Q["SEARCH_SQL_PATTERN"]
|
|
32
47
|
LIST_DIALECTS_AND_REPOS_QUERY = _Q["LIST_DIALECTS_AND_REPOS"]
|
|
48
|
+
# EXPAND_STAR_SOURCES is implemented as three DML steps in DuckDBBackend.expand_star_sources()
|
|
49
|
+
# rather than a single query (DuckDB cannot do MERGE in the Cypher sense).
|
|
33
50
|
EXPAND_STAR_SOURCES_QUERY = _Q["EXPAND_STAR_SOURCES"]
|
|
51
|
+
EXPAND_STAR_SOURCES_HAS_COLUMN_QUERY = _Q["EXPAND_STAR_SOURCES_HAS_COLUMN"]
|
|
52
|
+
EXPAND_STAR_SOURCES_LINEAGE_QUERY = _Q["EXPAND_STAR_SOURCES_LINEAGE"]
|
|
34
53
|
COUNT_STAR_SOURCES_QUERY = _Q["COUNT_STAR_SOURCES"]
|
|
35
54
|
COUNT_STAR_EXPANSIONS_QUERY = _Q["COUNT_STAR_EXPANSIONS"]
|
|
36
55
|
FIND_DEFINITION_QUERY = _Q["FIND_DEFINITION"]
|
sqlcg/core/queries.sql
ADDED
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
-- DuckDB SQL query library.
|
|
2
|
+
-- Format identical to queries.cypher: blocks separated by "-- BLOCK_NAME" lines.
|
|
3
|
+
-- All queries use ? positional parameters (list order matches the named params below).
|
|
4
|
+
-- Named params in comments are for documentation — callers pass values as a list.
|
|
5
|
+
|
|
6
|
+
-- DELETE_COLUMNS_FOR_FILE
|
|
7
|
+
-- params: [path, path]
|
|
8
|
+
DELETE FROM "SqlColumn" WHERE id IN (
|
|
9
|
+
SELECT hc.dst_key FROM "HAS_COLUMN" hc
|
|
10
|
+
WHERE hc.src_key IN (
|
|
11
|
+
SELECT di.src_key FROM "DEFINED_IN" di WHERE di.dst_key = ?
|
|
12
|
+
)
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
-- DELETE_QUERIES_FOR_FILE
|
|
16
|
+
-- params: [path]
|
|
17
|
+
DELETE FROM "SqlQuery" WHERE file_path = ?
|
|
18
|
+
|
|
19
|
+
-- DELETE_TABLES_FOR_FILE
|
|
20
|
+
-- params: [path, path]
|
|
21
|
+
DELETE FROM "SqlTable" WHERE qualified IN (
|
|
22
|
+
SELECT di.src_key FROM "DEFINED_IN" di WHERE di.dst_key = ?
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
-- DELETE_FILE
|
|
26
|
+
-- params: [path]
|
|
27
|
+
DELETE FROM "File" WHERE path = ?
|
|
28
|
+
|
|
29
|
+
-- INDEX_REPO_FILES
|
|
30
|
+
-- params: [repo_prefix]
|
|
31
|
+
SELECT path FROM "File" WHERE path LIKE ? || '%'
|
|
32
|
+
|
|
33
|
+
-- TRACE_COLUMN_LINEAGE
|
|
34
|
+
-- params: [id]
|
|
35
|
+
SELECT
|
|
36
|
+
src.id AS id,
|
|
37
|
+
src.col_name AS col_name,
|
|
38
|
+
src.table_qualified AS table_qualified,
|
|
39
|
+
cl.transform AS transform,
|
|
40
|
+
cl.confidence AS confidence,
|
|
41
|
+
q.file_path AS file,
|
|
42
|
+
q.start_line AS line,
|
|
43
|
+
q.sql AS expression,
|
|
44
|
+
t.kind AS table_kind
|
|
45
|
+
FROM "COLUMN_LINEAGE" cl
|
|
46
|
+
JOIN "SqlColumn" src ON src.id = cl.src_key
|
|
47
|
+
LEFT JOIN "SqlQuery" q ON q.id = cl.query_id
|
|
48
|
+
LEFT JOIN "SqlTable" t ON t.qualified = src.table_qualified
|
|
49
|
+
WHERE cl.dst_key = ?
|
|
50
|
+
|
|
51
|
+
-- FIND_TABLE_USAGES
|
|
52
|
+
-- params: [name]
|
|
53
|
+
SELECT f.path AS file, q.sql AS sql, q.kind AS kind
|
|
54
|
+
FROM "SqlTable" t
|
|
55
|
+
JOIN "SELECTS_FROM" sf ON sf.dst_key = t.qualified
|
|
56
|
+
JOIN "SqlQuery" q ON q.id = sf.src_key
|
|
57
|
+
JOIN "QUERY_DEFINED_IN" qdi ON qdi.src_key = q.id
|
|
58
|
+
JOIN "File" f ON f.path = qdi.dst_key
|
|
59
|
+
WHERE t.name = ?
|
|
60
|
+
|
|
61
|
+
-- GET_DOWNSTREAM_DEPENDENCIES
|
|
62
|
+
-- params: [id]
|
|
63
|
+
SELECT dst.id AS id, dst.col_name AS col_name, dst.table_qualified AS table_qualified
|
|
64
|
+
FROM "COLUMN_LINEAGE" cl
|
|
65
|
+
JOIN "SqlColumn" dst ON dst.id = cl.dst_key
|
|
66
|
+
WHERE cl.src_key = ?
|
|
67
|
+
|
|
68
|
+
-- GET_UPSTREAM_DEPENDENCIES
|
|
69
|
+
-- params: [id]
|
|
70
|
+
SELECT src.id AS id, src.col_name AS col_name, src.table_qualified AS table_qualified
|
|
71
|
+
FROM "COLUMN_LINEAGE" cl
|
|
72
|
+
JOIN "SqlColumn" src ON src.id = cl.src_key
|
|
73
|
+
WHERE cl.dst_key = ?
|
|
74
|
+
|
|
75
|
+
-- SEARCH_SQL_PATTERN
|
|
76
|
+
-- params: [query, limit]
|
|
77
|
+
SELECT f.path AS file, q.sql AS sql, q.kind AS kind
|
|
78
|
+
FROM "SqlQuery" q
|
|
79
|
+
JOIN "QUERY_DEFINED_IN" qdi ON qdi.src_key = q.id
|
|
80
|
+
JOIN "File" f ON f.path = qdi.dst_key
|
|
81
|
+
WHERE q.sql LIKE '%' || ? || '%'
|
|
82
|
+
LIMIT ?
|
|
83
|
+
|
|
84
|
+
-- LIST_DIALECTS_AND_REPOS
|
|
85
|
+
-- params: []
|
|
86
|
+
SELECT r.path AS path, r.name AS name,
|
|
87
|
+
list(DISTINCT f.dialect) AS dialects
|
|
88
|
+
FROM "Repo" r
|
|
89
|
+
JOIN "BELONGS_TO" bt ON bt.dst_key = r.path
|
|
90
|
+
JOIN "File" f ON f.path = bt.src_key
|
|
91
|
+
GROUP BY r.path, r.name
|
|
92
|
+
|
|
93
|
+
-- EXPAND_STAR_SOURCES
|
|
94
|
+
-- Inserts new SqlColumn destination nodes and COLUMN_LINEAGE edges from STAR_SOURCE.
|
|
95
|
+
-- Returns count of new edges created.
|
|
96
|
+
-- params: []
|
|
97
|
+
INSERT OR REPLACE INTO "SqlColumn" (id, col_name, table_qualified, catalog, db, table_name)
|
|
98
|
+
SELECT DISTINCT
|
|
99
|
+
q.target_table || '.' || c.col_name AS id,
|
|
100
|
+
c.col_name,
|
|
101
|
+
q.target_table AS table_qualified,
|
|
102
|
+
tgt.catalog,
|
|
103
|
+
tgt.db,
|
|
104
|
+
tgt.name AS table_name
|
|
105
|
+
FROM "STAR_SOURCE" ss
|
|
106
|
+
JOIN "SqlQuery" q ON q.id = ss.src_key
|
|
107
|
+
JOIN "SqlTable" t ON t.qualified = ss.dst_key
|
|
108
|
+
JOIN "HAS_COLUMN" hc ON hc.src_key = t.qualified
|
|
109
|
+
JOIN "SqlColumn" c ON c.id = hc.dst_key
|
|
110
|
+
JOIN "SqlTable" tgt ON tgt.qualified = q.target_table
|
|
111
|
+
WHERE q.target_table <> ''
|
|
112
|
+
AND q.target_table <> t.qualified
|
|
113
|
+
|
|
114
|
+
-- EXPAND_STAR_SOURCES_HAS_COLUMN
|
|
115
|
+
-- Insert HAS_COLUMN edges for the new destination columns.
|
|
116
|
+
-- params: []
|
|
117
|
+
INSERT OR REPLACE INTO "HAS_COLUMN" (src_key, dst_key, source)
|
|
118
|
+
SELECT DISTINCT
|
|
119
|
+
q.target_table AS src_key,
|
|
120
|
+
q.target_table || '.' || c.col_name AS dst_key,
|
|
121
|
+
'star_expansion' AS source
|
|
122
|
+
FROM "STAR_SOURCE" ss
|
|
123
|
+
JOIN "SqlQuery" q ON q.id = ss.src_key
|
|
124
|
+
JOIN "SqlTable" t ON t.qualified = ss.dst_key
|
|
125
|
+
JOIN "HAS_COLUMN" hc ON hc.src_key = t.qualified
|
|
126
|
+
JOIN "SqlColumn" c ON c.id = hc.dst_key
|
|
127
|
+
JOIN "SqlTable" tgt ON tgt.qualified = q.target_table
|
|
128
|
+
WHERE q.target_table <> ''
|
|
129
|
+
AND q.target_table <> t.qualified
|
|
130
|
+
|
|
131
|
+
-- EXPAND_STAR_SOURCES_LINEAGE
|
|
132
|
+
-- Insert COLUMN_LINEAGE edges for the star expansion.
|
|
133
|
+
-- params: []
|
|
134
|
+
INSERT OR REPLACE INTO "COLUMN_LINEAGE" (src_key, dst_key, transform, confidence, query_id)
|
|
135
|
+
SELECT DISTINCT
|
|
136
|
+
c.id AS src_key,
|
|
137
|
+
q.target_table || '.' || c.col_name AS dst_key,
|
|
138
|
+
'STAR_EXPANSION' AS transform,
|
|
139
|
+
0.8 AS confidence,
|
|
140
|
+
q.id AS query_id
|
|
141
|
+
FROM "STAR_SOURCE" ss
|
|
142
|
+
JOIN "SqlQuery" q ON q.id = ss.src_key
|
|
143
|
+
JOIN "SqlTable" t ON t.qualified = ss.dst_key
|
|
144
|
+
JOIN "HAS_COLUMN" hc ON hc.src_key = t.qualified
|
|
145
|
+
JOIN "SqlColumn" c ON c.id = hc.dst_key
|
|
146
|
+
JOIN "SqlTable" tgt ON tgt.qualified = q.target_table
|
|
147
|
+
WHERE q.target_table <> ''
|
|
148
|
+
AND q.target_table <> t.qualified
|
|
149
|
+
|
|
150
|
+
-- COUNT_STAR_SOURCES
|
|
151
|
+
-- params: []
|
|
152
|
+
SELECT count(*) AS n FROM "STAR_SOURCE"
|
|
153
|
+
|
|
154
|
+
-- COUNT_STAR_EXPANSIONS
|
|
155
|
+
-- params: []
|
|
156
|
+
SELECT count(*) AS n FROM "COLUMN_LINEAGE" WHERE transform = 'STAR_EXPANSION'
|
|
157
|
+
|
|
158
|
+
-- FIND_DEFINITION
|
|
159
|
+
-- params: [table_qualified]
|
|
160
|
+
SELECT t.qualified AS table_qualified, t.kind AS kind,
|
|
161
|
+
t.defined_in_file AS defined_in_file, f.path AS file_path
|
|
162
|
+
FROM "SqlTable" t
|
|
163
|
+
JOIN "DEFINED_IN" di ON di.src_key = t.qualified
|
|
164
|
+
JOIN "File" f ON f.path = di.dst_key
|
|
165
|
+
WHERE t.qualified = ?
|
|
166
|
+
|
|
167
|
+
-- GET_TABLE_DEFINING_FILES
|
|
168
|
+
-- params: [table_qualified]
|
|
169
|
+
SELECT f.path AS file_path, t.kind AS kind
|
|
170
|
+
FROM "SqlTable" t
|
|
171
|
+
JOIN "DEFINED_IN" di ON di.src_key = t.qualified
|
|
172
|
+
JOIN "File" f ON f.path = di.dst_key
|
|
173
|
+
WHERE t.qualified = ?
|
|
174
|
+
|
|
175
|
+
-- GET_TABLE_DIRECT_UPSTREAMS
|
|
176
|
+
-- params: [table_qualified, table_qualified]
|
|
177
|
+
SELECT DISTINCT src.qualified AS upstream_table, f.path AS in_file
|
|
178
|
+
FROM "SqlQuery" q
|
|
179
|
+
JOIN "SELECTS_FROM" sf ON sf.src_key = q.id
|
|
180
|
+
JOIN "SqlTable" src ON src.qualified = sf.dst_key
|
|
181
|
+
LEFT JOIN "QUERY_DEFINED_IN" qdi ON qdi.src_key = q.id
|
|
182
|
+
LEFT JOIN "File" f ON f.path = qdi.dst_key
|
|
183
|
+
WHERE q.target_table = ?
|
|
184
|
+
AND src.qualified <> ?
|
|
185
|
+
|
|
186
|
+
-- GET_COLUMNS_FOR_TABLE
|
|
187
|
+
-- params: [table_qualified]
|
|
188
|
+
SELECT c.id AS col_id, c.col_name AS col_name
|
|
189
|
+
FROM "SqlTable" t
|
|
190
|
+
JOIN "HAS_COLUMN" hc ON hc.src_key = t.qualified
|
|
191
|
+
JOIN "SqlColumn" c ON c.id = hc.dst_key
|
|
192
|
+
WHERE t.qualified = ?
|
|
193
|
+
|
|
194
|
+
-- GET_TABLES_DEFINED_IN_FILE
|
|
195
|
+
-- params: [file_path]
|
|
196
|
+
SELECT t.qualified AS table_qualified
|
|
197
|
+
FROM "SqlTable" t
|
|
198
|
+
JOIN "DEFINED_IN" di ON di.src_key = t.qualified
|
|
199
|
+
WHERE di.dst_key = ?
|
|
200
|
+
|
|
201
|
+
-- ANALYZE_UNUSED_TABLES
|
|
202
|
+
-- params: []
|
|
203
|
+
SELECT t.qualified AS table_qualified
|
|
204
|
+
FROM "SqlTable" t
|
|
205
|
+
WHERE t.qualified NOT IN (SELECT DISTINCT dst_key FROM "SELECTS_FROM")
|
|
206
|
+
ORDER BY t.qualified
|
|
207
|
+
|
|
208
|
+
-- HUB_RANKING
|
|
209
|
+
-- params: [k]
|
|
210
|
+
SELECT t.qualified AS table_qualified,
|
|
211
|
+
count(DISTINCT q.target_table) AS downstream_dependents
|
|
212
|
+
FROM "SqlTable" t
|
|
213
|
+
JOIN "SELECTS_FROM" sf ON sf.dst_key = t.qualified
|
|
214
|
+
JOIN "SqlQuery" q ON q.id = sf.src_key
|
|
215
|
+
WHERE q.target_table <> ''
|
|
216
|
+
AND q.target_table <> t.qualified
|
|
217
|
+
GROUP BY t.qualified
|
|
218
|
+
ORDER BY downstream_dependents DESC, t.qualified
|
|
219
|
+
LIMIT ?
|
|
220
|
+
|
|
221
|
+
-- DEPENDENT_FILES_OF_TABLES
|
|
222
|
+
-- params: [tables] (list — caller must expand with unnest or IN clause)
|
|
223
|
+
SELECT DISTINCT f.path AS path
|
|
224
|
+
FROM "SqlTable" t
|
|
225
|
+
JOIN "SELECTS_FROM" sf ON sf.dst_key = t.qualified
|
|
226
|
+
JOIN "SqlQuery" q ON q.id = sf.src_key
|
|
227
|
+
JOIN "QUERY_DEFINED_IN" qdi ON qdi.src_key = q.id
|
|
228
|
+
JOIN "File" f ON f.path = qdi.dst_key
|
|
229
|
+
WHERE t.qualified = ANY(?)
|
|
230
|
+
|
|
231
|
+
-- GET_TABLE_EXTERNAL_CONSUMERS
|
|
232
|
+
-- params: [table_qualified]
|
|
233
|
+
SELECT e.name AS name, e.consumer_type AS consumer_type
|
|
234
|
+
FROM "SqlTable" t
|
|
235
|
+
JOIN "CONSUMED_BY" cb ON cb.src_key = t.qualified
|
|
236
|
+
JOIN "ExternalConsumer" e ON e.name = cb.dst_key
|
|
237
|
+
WHERE t.qualified = ?
|
|
238
|
+
|
|
239
|
+
-- GET_TABLES_EXTERNAL_CONSUMERS_BATCH
|
|
240
|
+
-- params: [table_qualifieds] (list)
|
|
241
|
+
SELECT t.qualified AS table_qualified, e.name AS name, e.consumer_type AS consumer_type
|
|
242
|
+
FROM "SqlTable" t
|
|
243
|
+
JOIN "CONSUMED_BY" cb ON cb.src_key = t.qualified
|
|
244
|
+
JOIN "ExternalConsumer" e ON e.name = cb.dst_key
|
|
245
|
+
WHERE t.qualified = ANY(?)
|
|
246
|
+
|
|
247
|
+
-- COUNT_EXTERNAL_CONSUMERS
|
|
248
|
+
-- params: []
|
|
249
|
+
SELECT count(*) AS n FROM "CONSUMED_BY"
|
sqlcg/core/schema.py
CHANGED
sqlcg/indexer/indexer.py
CHANGED
|
@@ -773,9 +773,13 @@ class Indexer:
|
|
|
773
773
|
for cl_path in closure_files:
|
|
774
774
|
try:
|
|
775
775
|
cl_rows = db.run_read(
|
|
776
|
-
"
|
|
777
|
-
|
|
778
|
-
"
|
|
776
|
+
"SELECT DISTINCT t.name AS name"
|
|
777
|
+
' FROM "File" f'
|
|
778
|
+
' JOIN "QUERY_DEFINED_IN" qdi ON qdi.dst_key = f.path'
|
|
779
|
+
' JOIN "SqlQuery" q ON q.id = qdi.src_key'
|
|
780
|
+
' JOIN "SELECTS_FROM" sf ON sf.src_key = q.id'
|
|
781
|
+
' JOIN "SqlTable" t ON t.qualified = sf.dst_key'
|
|
782
|
+
" WHERE f.path = ?",
|
|
779
783
|
{"path": cl_path},
|
|
780
784
|
)
|
|
781
785
|
for row in cl_rows:
|
|
@@ -794,17 +798,21 @@ class Indexer:
|
|
|
794
798
|
# first, but we do a broader search since the table might be qualified differently.
|
|
795
799
|
try:
|
|
796
800
|
def_rows = db.run_read(
|
|
797
|
-
"
|
|
798
|
-
"
|
|
799
|
-
"
|
|
800
|
-
|
|
801
|
+
"SELECT DISTINCT f.path AS file_path"
|
|
802
|
+
' FROM "SqlTable" t'
|
|
803
|
+
' JOIN "DEFINED_IN" di ON di.src_key = t.qualified'
|
|
804
|
+
' JOIN "File" f ON f.path = di.dst_key'
|
|
805
|
+
" WHERE upper(t.name) = upper(?)",
|
|
806
|
+
{"name": bare_name},
|
|
801
807
|
)
|
|
802
808
|
if not def_rows:
|
|
803
|
-
# Try lowercase match
|
|
809
|
+
# Try bare lowercase match
|
|
804
810
|
def_rows = db.run_read(
|
|
805
|
-
"
|
|
806
|
-
"
|
|
807
|
-
"
|
|
811
|
+
"SELECT DISTINCT f.path AS file_path"
|
|
812
|
+
' FROM "SqlTable" t'
|
|
813
|
+
' JOIN "DEFINED_IN" di ON di.src_key = t.qualified'
|
|
814
|
+
' JOIN "File" f ON f.path = di.dst_key'
|
|
815
|
+
" WHERE lower(t.name) = lower(?)",
|
|
808
816
|
{"name": bare_name},
|
|
809
817
|
)
|
|
810
818
|
for row in def_rows:
|
|
@@ -1369,32 +1377,15 @@ class Indexer:
|
|
|
1369
1377
|
return file_rows.counts
|
|
1370
1378
|
|
|
1371
1379
|
def _expand_star_sources(self, db: GraphBackend) -> int:
|
|
1372
|
-
"""Run the post-ingestion star expansion
|
|
1380
|
+
"""Run the post-ingestion star expansion.
|
|
1381
|
+
|
|
1382
|
+
Calls the three-step DML expand_star_sources() method which returns the
|
|
1383
|
+
total STAR_EXPANSION edge count.
|
|
1373
1384
|
|
|
1374
1385
|
Returns:
|
|
1375
|
-
Number of COLUMN_LINEAGE edges
|
|
1386
|
+
Number of COLUMN_LINEAGE STAR_EXPANSION edges after expansion.
|
|
1376
1387
|
"""
|
|
1377
|
-
|
|
1378
|
-
|
|
1379
|
-
# Count COLUMN_LINEAGE edges before expansion
|
|
1380
|
-
before = db.run_read(
|
|
1381
|
-
"MATCH ()-[r:COLUMN_LINEAGE {transform: 'STAR_EXPANSION'}]->() RETURN count(r) AS n",
|
|
1382
|
-
{},
|
|
1383
|
-
)
|
|
1384
|
-
before_count = before[0]["n"] if before else 0
|
|
1385
|
-
|
|
1386
|
-
# Run the expansion query (without explicit transaction, as caller may already be in one)
|
|
1387
|
-
db.run_read(EXPAND_STAR_SOURCES_QUERY, {})
|
|
1388
|
-
|
|
1389
|
-
# Count COLUMN_LINEAGE edges after expansion
|
|
1390
|
-
after = db.run_read(
|
|
1391
|
-
"MATCH ()-[r:COLUMN_LINEAGE {transform: 'STAR_EXPANSION'}]->() RETURN count(r) AS n",
|
|
1392
|
-
{},
|
|
1393
|
-
)
|
|
1394
|
-
after_count = after[0]["n"] if after else 0
|
|
1395
|
-
|
|
1396
|
-
# Return the number of new edges created
|
|
1397
|
-
return max(0, after_count - before_count)
|
|
1388
|
+
return db.expand_star_sources()
|
|
1398
1389
|
|
|
1399
1390
|
def _ingest_external_consumers(self, db: GraphBackend, path: Path) -> dict:
|
|
1400
1391
|
"""Ingest declared external downstream consumers from .sqlcg.toml.
|
|
@@ -1417,10 +1408,10 @@ class Indexer:
|
|
|
1417
1408
|
for spec in specs:
|
|
1418
1409
|
all_targets.extend(spec.consumes)
|
|
1419
1410
|
|
|
1420
|
-
# Single
|
|
1411
|
+
# Single round-trip to check which targets exist as SqlTable nodes
|
|
1421
1412
|
if all_targets:
|
|
1422
1413
|
existing_rows = db.run_read(
|
|
1423
|
-
|
|
1414
|
+
'SELECT qualified FROM "SqlTable" WHERE qualified = ANY(?)',
|
|
1424
1415
|
{"names": all_targets},
|
|
1425
1416
|
)
|
|
1426
1417
|
existing_tables: set[str] = {row["qualified"] for row in existing_rows}
|
sqlcg/metrics/store.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"""SQLite-based metrics storage for SQL Code Graph.
|
|
2
2
|
|
|
3
|
-
Importable without
|
|
3
|
+
Importable without a graph backend. All writes are wrapped in try/except with
|
|
4
4
|
WARNING-level logging on failure. Opt-out via SQLCG_METRICS=0.
|
|
5
5
|
"""
|
|
6
6
|
|
|
@@ -117,6 +117,19 @@ class MetricsStore:
|
|
|
117
117
|
)
|
|
118
118
|
"""
|
|
119
119
|
)
|
|
120
|
+
self._conn.execute(
|
|
121
|
+
"""
|
|
122
|
+
CREATE TABLE IF NOT EXISTS writer_queue_events (
|
|
123
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
124
|
+
timestamp TEXT NOT NULL,
|
|
125
|
+
event TEXT NOT NULL,
|
|
126
|
+
op TEXT,
|
|
127
|
+
reason TEXT,
|
|
128
|
+
queue_depth INTEGER,
|
|
129
|
+
duration_ms REAL
|
|
130
|
+
)
|
|
131
|
+
"""
|
|
132
|
+
)
|
|
120
133
|
self._conn.commit()
|
|
121
134
|
except sqlite3.Error as e:
|
|
122
135
|
logger.warning(f"Failed to initialize metrics schema: {e}")
|
|
@@ -228,6 +241,41 @@ class MetricsStore:
|
|
|
228
241
|
except sqlite3.Error as e:
|
|
229
242
|
logger.warning(f"Failed to record feedback: {e}")
|
|
230
243
|
|
|
244
|
+
def record_queue_event(
|
|
245
|
+
self,
|
|
246
|
+
event: str,
|
|
247
|
+
*,
|
|
248
|
+
op: str | None = None,
|
|
249
|
+
reason: str | None = None,
|
|
250
|
+
queue_depth: int | None = None,
|
|
251
|
+
duration_ms: float | None = None,
|
|
252
|
+
) -> None:
|
|
253
|
+
"""Record a writer-queue lifecycle event.
|
|
254
|
+
|
|
255
|
+
Args:
|
|
256
|
+
event: One of ``"enqueued"``, ``"coalesced"``, ``"drained"``.
|
|
257
|
+
op: The write op type (``"index"`` or ``"reindex"``).
|
|
258
|
+
reason: Coalesce reason constant (set for ``"coalesced"`` events).
|
|
259
|
+
queue_depth: Pending-queue depth at event time.
|
|
260
|
+
duration_ms: Drain wall-clock duration (set for ``"drained"``).
|
|
261
|
+
"""
|
|
262
|
+
if not self._enabled or self._conn is None:
|
|
263
|
+
return
|
|
264
|
+
|
|
265
|
+
try:
|
|
266
|
+
timestamp = datetime.now(UTC).isoformat()
|
|
267
|
+
self._conn.execute(
|
|
268
|
+
"""
|
|
269
|
+
INSERT INTO writer_queue_events
|
|
270
|
+
(timestamp, event, op, reason, queue_depth, duration_ms)
|
|
271
|
+
VALUES (?, ?, ?, ?, ?, ?)
|
|
272
|
+
""",
|
|
273
|
+
(timestamp, event, op, reason, queue_depth, duration_ms),
|
|
274
|
+
)
|
|
275
|
+
self._conn.commit()
|
|
276
|
+
except sqlite3.Error as e:
|
|
277
|
+
logger.warning(f"Failed to record queue event: {e}")
|
|
278
|
+
|
|
231
279
|
def execute_query(self, query: str, params: tuple | None = None) -> list[tuple]:
|
|
232
280
|
"""Execute a read-only query.
|
|
233
281
|
|
sqlcg/server/control.py
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
Manages the ``.pid`` and ``.sock`` files that allow a second CLI process to
|
|
4
4
|
discover, query, and stop the running MCP server.
|
|
5
5
|
|
|
6
|
-
All paths are derived from ``get_db_path()`` (i.e. ``
|
|
6
|
+
All paths are derived from ``get_db_path()`` (i.e. ``DbConfig.from_env().db_path``)
|
|
7
7
|
so that two servers on two different databases do not collide on a single control
|
|
8
8
|
file. Callers may pass an explicit ``db_path`` to override the default; this also
|
|
9
9
|
makes unit tests straightforward (set ``SQLCG_DB_PATH=/tmp/test.db``, assert paths
|
sqlcg/server/noise_filter.py
CHANGED
|
@@ -15,7 +15,7 @@ from sqlcg.core.config import (
|
|
|
15
15
|
class NoiseFilter:
|
|
16
16
|
"""Config-driven filter for backup tables and schema-alias mirrors.
|
|
17
17
|
|
|
18
|
-
Loaded once per tool call from
|
|
18
|
+
Loaded once per tool call from DbConfig.from_env().db_path parent dir
|
|
19
19
|
(falls back to cwd). All methods are pure — no graph calls.
|
|
20
20
|
"""
|
|
21
21
|
|
sqlcg/server/read_client.py
CHANGED
|
@@ -45,7 +45,7 @@ import typer
|
|
|
45
45
|
|
|
46
46
|
# Client-side socket timeout for the query control-socket path.
|
|
47
47
|
# Sized to cover the longest in-flight reindex (~89 s DWH resync_changed)
|
|
48
|
-
# with headroom. This is a CLI transport constant, NOT a
|
|
48
|
+
# with headroom. This is a CLI transport constant, NOT a DbConfig value —
|
|
49
49
|
# same convention as _NOTIFY_SOCKET_TIMEOUT_S in reindex.py.
|
|
50
50
|
_QUERY_SOCKET_TIMEOUT_S = 300
|
|
51
51
|
|
|
@@ -92,7 +92,7 @@ def query_via_server(
|
|
|
92
92
|
if not sp.exists():
|
|
93
93
|
return None
|
|
94
94
|
|
|
95
|
-
req = {"op": "query", "
|
|
95
|
+
req = {"op": "query", "sql": cypher, "params": params} # "sql" is the canonical key
|
|
96
96
|
req_bytes = json.dumps(req).encode()
|
|
97
97
|
frame = f"{len(req_bytes)}\n".encode() + req_bytes
|
|
98
98
|
|