sql-code-graph 1.2.2__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sqlcg/core/freshness.py CHANGED
@@ -74,7 +74,7 @@ def compute_freshness(root: Path, indexed_sha: str | None) -> Freshness:
74
74
  Args:
75
75
  root: Filesystem path used as the ``cwd`` for git commands. This is
76
76
  typically the ``r.path`` value read from the ``Repo`` graph node.
77
- indexed_sha: The SHA recorded by ``KuzuBackend.get_indexed_sha()``.
77
+ indexed_sha: The SHA recorded by ``DuckDBBackend.get_indexed_sha()``.
78
78
  May be ``None`` when the graph was never indexed, or a sentinel
79
79
  like ``"<head>+dirty"`` written by ``index --include-working-tree``.
80
80
  """
sqlcg/core/graph_db.py CHANGED
@@ -77,7 +77,7 @@ class GraphBackend(ABC):
77
77
 
78
78
  Each row dict must contain the primary-key field for `label` (see _pk_field)
79
79
  plus any other properties to SET. All rows must share the same property-key
80
- set; backends MAY raise if rows are heterogeneous (KuzuBackend does).
80
+ set; backends MAY raise if rows are heterogeneous (DuckDBBackend does).
81
81
 
82
82
  Idempotent MERGE semantics, identical to upsert_node per row.
83
83
 
@@ -118,7 +118,7 @@ class GraphBackend(ABC):
118
118
  """Execute a read-only query and return results.
119
119
 
120
120
  Args:
121
- query: Query string (Cypher for KùzuDB/Neo4j)
121
+ query: Query string (SQL)
122
122
  params: Parameters to bind in the query
123
123
 
124
124
  Returns:
@@ -130,7 +130,7 @@ class GraphBackend(ABC):
130
130
  """Execute a write query (mutation).
131
131
 
132
132
  Args:
133
- query: Query string (Cypher for KùzuDB/Neo4j)
133
+ query: Query string (SQL)
134
134
  params: Parameters to bind in the query
135
135
  """
136
136
 
@@ -213,7 +213,7 @@ class GraphBackend(ABC):
213
213
  def _validate_props(properties: dict[str, Any]) -> None:
214
214
  """Validate that all property keys are safe identifiers.
215
215
 
216
- Guards against Cypher injection via property key interpolation.
216
+ Guards against SQL injection via property key interpolation.
217
217
 
218
218
  Args:
219
219
  properties: Dictionary of properties to validate
@@ -244,3 +244,19 @@ class GraphBackend(ABC):
244
244
  yield self
245
245
  except Exception:
246
246
  raise
247
+
248
+ def clear_all_tables(self) -> None:
249
+ """Delete all node and edge rows, preserving the schema structure.
250
+
251
+ Used by the server drain body for the full-rebuild-in-transaction
252
+ reindex path. Concrete backends must override this.
253
+ """
254
+ raise NotImplementedError(f"{type(self).__name__} does not support clear_all_tables")
255
+
256
+ def expand_star_sources(self) -> int:
257
+ """Expand SELECT * lineage into per-column STAR_EXPANSION edges.
258
+
259
+ Runs once per index after ingestion. Concrete backends must override
260
+ this; returns the total STAR_EXPANSION edge count.
261
+ """
262
+ raise NotImplementedError(f"{type(self).__name__} does not support expand_star_sources")
sqlcg/core/queries.py CHANGED
@@ -1,20 +1,35 @@
1
- """Cypher query loader. All query strings live in queries.cypher."""
1
+ """SQL query loader. All query strings live in queries.sql."""
2
2
 
3
3
  import re
4
4
  from pathlib import Path
5
5
 
6
- _CYPHER_FILE = Path(__file__).parent / "queries.cypher"
6
+ _SQL_FILE = Path(__file__).parent / "queries.sql"
7
7
 
8
8
 
9
9
  def _load() -> dict[str, str]:
10
- """Load named Cypher blocks from queries.cypher.
10
+ """Load named SQL blocks from queries.sql.
11
11
 
12
12
  Format: blocks are separated by lines matching "-- BLOCK_NAME" at the start.
13
- Each block name becomes a key in the returned dict.
13
+ Each block name becomes a key in the returned dict. Comment lines that start
14
+ with "-- " followed by lowercase words (e.g. "-- params: ...") are included in
15
+ the block text so the regex split is only on UPPER_SNAKE_CASE block headers.
14
16
  """
15
- text = _CYPHER_FILE.read_text(encoding="utf-8")
16
- blocks = re.split(r"^--\s+(\w+)\s*$", text, flags=re.MULTILINE)
17
- return {blocks[i]: blocks[i + 1].strip() for i in range(1, len(blocks), 2)}
17
+ text = _SQL_FILE.read_text(encoding="utf-8")
18
+ # Split on lines like "-- BLOCK_NAME" (all-caps with underscores, start of line)
19
+ blocks = re.split(r"^--\s+([A-Z][A-Z0-9_]+)\s*$", text, flags=re.MULTILINE)
20
+ result: dict[str, str] = {}
21
+ for i in range(1, len(blocks), 2):
22
+ name = blocks[i]
23
+ body = blocks[i + 1].strip()
24
+ # Strip leading comment lines (-- params: ...) from block body
25
+ body_lines = []
26
+ for line in body.splitlines():
27
+ stripped = line.strip()
28
+ if stripped.startswith("--"):
29
+ continue
30
+ body_lines.append(line)
31
+ result[name] = "\n".join(body_lines).strip()
32
+ return result
18
33
 
19
34
 
20
35
  _Q = _load()
@@ -30,7 +45,11 @@ GET_DOWNSTREAM_DEPENDENCIES_QUERY = _Q["GET_DOWNSTREAM_DEPENDENCIES"]
30
45
  GET_UPSTREAM_DEPENDENCIES_QUERY = _Q["GET_UPSTREAM_DEPENDENCIES"]
31
46
  SEARCH_SQL_PATTERN_QUERY = _Q["SEARCH_SQL_PATTERN"]
32
47
  LIST_DIALECTS_AND_REPOS_QUERY = _Q["LIST_DIALECTS_AND_REPOS"]
48
+ # EXPAND_STAR_SOURCES is implemented as three DML steps in DuckDBBackend.expand_star_sources()
49
+ # rather than a single query (DuckDB cannot do MERGE in the Cypher sense).
33
50
  EXPAND_STAR_SOURCES_QUERY = _Q["EXPAND_STAR_SOURCES"]
51
+ EXPAND_STAR_SOURCES_HAS_COLUMN_QUERY = _Q["EXPAND_STAR_SOURCES_HAS_COLUMN"]
52
+ EXPAND_STAR_SOURCES_LINEAGE_QUERY = _Q["EXPAND_STAR_SOURCES_LINEAGE"]
34
53
  COUNT_STAR_SOURCES_QUERY = _Q["COUNT_STAR_SOURCES"]
35
54
  COUNT_STAR_EXPANSIONS_QUERY = _Q["COUNT_STAR_EXPANSIONS"]
36
55
  FIND_DEFINITION_QUERY = _Q["FIND_DEFINITION"]
sqlcg/core/queries.sql ADDED
@@ -0,0 +1,249 @@
1
+ -- DuckDB SQL query library.
2
+ -- Format identical to queries.cypher: blocks separated by "-- BLOCK_NAME" lines.
3
+ -- All queries use ? positional parameters (list order matches the named params below).
4
+ -- Named params in comments are for documentation — callers pass values as a list.
5
+
6
+ -- DELETE_COLUMNS_FOR_FILE
7
+ -- params: [path, path]
8
+ DELETE FROM "SqlColumn" WHERE id IN (
9
+ SELECT hc.dst_key FROM "HAS_COLUMN" hc
10
+ WHERE hc.src_key IN (
11
+ SELECT di.src_key FROM "DEFINED_IN" di WHERE di.dst_key = ?
12
+ )
13
+ )
14
+
15
+ -- DELETE_QUERIES_FOR_FILE
16
+ -- params: [path]
17
+ DELETE FROM "SqlQuery" WHERE file_path = ?
18
+
19
+ -- DELETE_TABLES_FOR_FILE
20
+ -- params: [path, path]
21
+ DELETE FROM "SqlTable" WHERE qualified IN (
22
+ SELECT di.src_key FROM "DEFINED_IN" di WHERE di.dst_key = ?
23
+ )
24
+
25
+ -- DELETE_FILE
26
+ -- params: [path]
27
+ DELETE FROM "File" WHERE path = ?
28
+
29
+ -- INDEX_REPO_FILES
30
+ -- params: [repo_prefix]
31
+ SELECT path FROM "File" WHERE path LIKE ? || '%'
32
+
33
+ -- TRACE_COLUMN_LINEAGE
34
+ -- params: [id]
35
+ SELECT
36
+ src.id AS id,
37
+ src.col_name AS col_name,
38
+ src.table_qualified AS table_qualified,
39
+ cl.transform AS transform,
40
+ cl.confidence AS confidence,
41
+ q.file_path AS file,
42
+ q.start_line AS line,
43
+ q.sql AS expression,
44
+ t.kind AS table_kind
45
+ FROM "COLUMN_LINEAGE" cl
46
+ JOIN "SqlColumn" src ON src.id = cl.src_key
47
+ LEFT JOIN "SqlQuery" q ON q.id = cl.query_id
48
+ LEFT JOIN "SqlTable" t ON t.qualified = src.table_qualified
49
+ WHERE cl.dst_key = ?
50
+
51
+ -- FIND_TABLE_USAGES
52
+ -- params: [name]
53
+ SELECT f.path AS file, q.sql AS sql, q.kind AS kind
54
+ FROM "SqlTable" t
55
+ JOIN "SELECTS_FROM" sf ON sf.dst_key = t.qualified
56
+ JOIN "SqlQuery" q ON q.id = sf.src_key
57
+ JOIN "QUERY_DEFINED_IN" qdi ON qdi.src_key = q.id
58
+ JOIN "File" f ON f.path = qdi.dst_key
59
+ WHERE t.name = ?
60
+
61
+ -- GET_DOWNSTREAM_DEPENDENCIES
62
+ -- params: [id]
63
+ SELECT dst.id AS id, dst.col_name AS col_name, dst.table_qualified AS table_qualified
64
+ FROM "COLUMN_LINEAGE" cl
65
+ JOIN "SqlColumn" dst ON dst.id = cl.dst_key
66
+ WHERE cl.src_key = ?
67
+
68
+ -- GET_UPSTREAM_DEPENDENCIES
69
+ -- params: [id]
70
+ SELECT src.id AS id, src.col_name AS col_name, src.table_qualified AS table_qualified
71
+ FROM "COLUMN_LINEAGE" cl
72
+ JOIN "SqlColumn" src ON src.id = cl.src_key
73
+ WHERE cl.dst_key = ?
74
+
75
+ -- SEARCH_SQL_PATTERN
76
+ -- params: [query, limit]
77
+ SELECT f.path AS file, q.sql AS sql, q.kind AS kind
78
+ FROM "SqlQuery" q
79
+ JOIN "QUERY_DEFINED_IN" qdi ON qdi.src_key = q.id
80
+ JOIN "File" f ON f.path = qdi.dst_key
81
+ WHERE q.sql LIKE '%' || ? || '%'
82
+ LIMIT ?
83
+
84
+ -- LIST_DIALECTS_AND_REPOS
85
+ -- params: []
86
+ SELECT r.path AS path, r.name AS name,
87
+ list(DISTINCT f.dialect) AS dialects
88
+ FROM "Repo" r
89
+ JOIN "BELONGS_TO" bt ON bt.dst_key = r.path
90
+ JOIN "File" f ON f.path = bt.src_key
91
+ GROUP BY r.path, r.name
92
+
93
+ -- EXPAND_STAR_SOURCES
94
+ -- Inserts new SqlColumn destination nodes and COLUMN_LINEAGE edges from STAR_SOURCE.
95
+ -- Returns count of new edges created.
96
+ -- params: []
97
+ INSERT OR REPLACE INTO "SqlColumn" (id, col_name, table_qualified, catalog, db, table_name)
98
+ SELECT DISTINCT
99
+ q.target_table || '.' || c.col_name AS id,
100
+ c.col_name,
101
+ q.target_table AS table_qualified,
102
+ tgt.catalog,
103
+ tgt.db,
104
+ tgt.name AS table_name
105
+ FROM "STAR_SOURCE" ss
106
+ JOIN "SqlQuery" q ON q.id = ss.src_key
107
+ JOIN "SqlTable" t ON t.qualified = ss.dst_key
108
+ JOIN "HAS_COLUMN" hc ON hc.src_key = t.qualified
109
+ JOIN "SqlColumn" c ON c.id = hc.dst_key
110
+ JOIN "SqlTable" tgt ON tgt.qualified = q.target_table
111
+ WHERE q.target_table <> ''
112
+ AND q.target_table <> t.qualified
113
+
114
+ -- EXPAND_STAR_SOURCES_HAS_COLUMN
115
+ -- Insert HAS_COLUMN edges for the new destination columns.
116
+ -- params: []
117
+ INSERT OR REPLACE INTO "HAS_COLUMN" (src_key, dst_key, source)
118
+ SELECT DISTINCT
119
+ q.target_table AS src_key,
120
+ q.target_table || '.' || c.col_name AS dst_key,
121
+ 'star_expansion' AS source
122
+ FROM "STAR_SOURCE" ss
123
+ JOIN "SqlQuery" q ON q.id = ss.src_key
124
+ JOIN "SqlTable" t ON t.qualified = ss.dst_key
125
+ JOIN "HAS_COLUMN" hc ON hc.src_key = t.qualified
126
+ JOIN "SqlColumn" c ON c.id = hc.dst_key
127
+ JOIN "SqlTable" tgt ON tgt.qualified = q.target_table
128
+ WHERE q.target_table <> ''
129
+ AND q.target_table <> t.qualified
130
+
131
+ -- EXPAND_STAR_SOURCES_LINEAGE
132
+ -- Insert COLUMN_LINEAGE edges for the star expansion.
133
+ -- params: []
134
+ INSERT OR REPLACE INTO "COLUMN_LINEAGE" (src_key, dst_key, transform, confidence, query_id)
135
+ SELECT DISTINCT
136
+ c.id AS src_key,
137
+ q.target_table || '.' || c.col_name AS dst_key,
138
+ 'STAR_EXPANSION' AS transform,
139
+ 0.8 AS confidence,
140
+ q.id AS query_id
141
+ FROM "STAR_SOURCE" ss
142
+ JOIN "SqlQuery" q ON q.id = ss.src_key
143
+ JOIN "SqlTable" t ON t.qualified = ss.dst_key
144
+ JOIN "HAS_COLUMN" hc ON hc.src_key = t.qualified
145
+ JOIN "SqlColumn" c ON c.id = hc.dst_key
146
+ JOIN "SqlTable" tgt ON tgt.qualified = q.target_table
147
+ WHERE q.target_table <> ''
148
+ AND q.target_table <> t.qualified
149
+
150
+ -- COUNT_STAR_SOURCES
151
+ -- params: []
152
+ SELECT count(*) AS n FROM "STAR_SOURCE"
153
+
154
+ -- COUNT_STAR_EXPANSIONS
155
+ -- params: []
156
+ SELECT count(*) AS n FROM "COLUMN_LINEAGE" WHERE transform = 'STAR_EXPANSION'
157
+
158
+ -- FIND_DEFINITION
159
+ -- params: [table_qualified]
160
+ SELECT t.qualified AS table_qualified, t.kind AS kind,
161
+ t.defined_in_file AS defined_in_file, f.path AS file_path
162
+ FROM "SqlTable" t
163
+ JOIN "DEFINED_IN" di ON di.src_key = t.qualified
164
+ JOIN "File" f ON f.path = di.dst_key
165
+ WHERE t.qualified = ?
166
+
167
+ -- GET_TABLE_DEFINING_FILES
168
+ -- params: [table_qualified]
169
+ SELECT f.path AS file_path, t.kind AS kind
170
+ FROM "SqlTable" t
171
+ JOIN "DEFINED_IN" di ON di.src_key = t.qualified
172
+ JOIN "File" f ON f.path = di.dst_key
173
+ WHERE t.qualified = ?
174
+
175
+ -- GET_TABLE_DIRECT_UPSTREAMS
176
+ -- params: [table_qualified, table_qualified]
177
+ SELECT DISTINCT src.qualified AS upstream_table, f.path AS in_file
178
+ FROM "SqlQuery" q
179
+ JOIN "SELECTS_FROM" sf ON sf.src_key = q.id
180
+ JOIN "SqlTable" src ON src.qualified = sf.dst_key
181
+ LEFT JOIN "QUERY_DEFINED_IN" qdi ON qdi.src_key = q.id
182
+ LEFT JOIN "File" f ON f.path = qdi.dst_key
183
+ WHERE q.target_table = ?
184
+ AND src.qualified <> ?
185
+
186
+ -- GET_COLUMNS_FOR_TABLE
187
+ -- params: [table_qualified]
188
+ SELECT c.id AS col_id, c.col_name AS col_name
189
+ FROM "SqlTable" t
190
+ JOIN "HAS_COLUMN" hc ON hc.src_key = t.qualified
191
+ JOIN "SqlColumn" c ON c.id = hc.dst_key
192
+ WHERE t.qualified = ?
193
+
194
+ -- GET_TABLES_DEFINED_IN_FILE
195
+ -- params: [file_path]
196
+ SELECT t.qualified AS table_qualified
197
+ FROM "SqlTable" t
198
+ JOIN "DEFINED_IN" di ON di.src_key = t.qualified
199
+ WHERE di.dst_key = ?
200
+
201
+ -- ANALYZE_UNUSED_TABLES
202
+ -- params: []
203
+ SELECT t.qualified AS table_qualified
204
+ FROM "SqlTable" t
205
+ WHERE t.qualified NOT IN (SELECT DISTINCT dst_key FROM "SELECTS_FROM")
206
+ ORDER BY t.qualified
207
+
208
+ -- HUB_RANKING
209
+ -- params: [k]
210
+ SELECT t.qualified AS table_qualified,
211
+ count(DISTINCT q.target_table) AS downstream_dependents
212
+ FROM "SqlTable" t
213
+ JOIN "SELECTS_FROM" sf ON sf.dst_key = t.qualified
214
+ JOIN "SqlQuery" q ON q.id = sf.src_key
215
+ WHERE q.target_table <> ''
216
+ AND q.target_table <> t.qualified
217
+ GROUP BY t.qualified
218
+ ORDER BY downstream_dependents DESC, t.qualified
219
+ LIMIT ?
220
+
221
+ -- DEPENDENT_FILES_OF_TABLES
222
+ -- params: [tables] (list — caller must expand with unnest or IN clause)
223
+ SELECT DISTINCT f.path AS path
224
+ FROM "SqlTable" t
225
+ JOIN "SELECTS_FROM" sf ON sf.dst_key = t.qualified
226
+ JOIN "SqlQuery" q ON q.id = sf.src_key
227
+ JOIN "QUERY_DEFINED_IN" qdi ON qdi.src_key = q.id
228
+ JOIN "File" f ON f.path = qdi.dst_key
229
+ WHERE t.qualified = ANY(?)
230
+
231
+ -- GET_TABLE_EXTERNAL_CONSUMERS
232
+ -- params: [table_qualified]
233
+ SELECT e.name AS name, e.consumer_type AS consumer_type
234
+ FROM "SqlTable" t
235
+ JOIN "CONSUMED_BY" cb ON cb.src_key = t.qualified
236
+ JOIN "ExternalConsumer" e ON e.name = cb.dst_key
237
+ WHERE t.qualified = ?
238
+
239
+ -- GET_TABLES_EXTERNAL_CONSUMERS_BATCH
240
+ -- params: [table_qualifieds] (list)
241
+ SELECT t.qualified AS table_qualified, e.name AS name, e.consumer_type AS consumer_type
242
+ FROM "SqlTable" t
243
+ JOIN "CONSUMED_BY" cb ON cb.src_key = t.qualified
244
+ JOIN "ExternalConsumer" e ON e.name = cb.dst_key
245
+ WHERE t.qualified = ANY(?)
246
+
247
+ -- COUNT_EXTERNAL_CONSUMERS
248
+ -- params: []
249
+ SELECT count(*) AS n FROM "CONSUMED_BY"
sqlcg/core/schema.py CHANGED
@@ -1,4 +1,4 @@
1
- """KùzuDB schema definition for sqlcg graph."""
1
+ """Graph schema definition for sqlcg (DuckDB backend)."""
2
2
 
3
3
  from enum import StrEnum
4
4
  from importlib.resources import files
sqlcg/indexer/indexer.py CHANGED
@@ -773,9 +773,13 @@ class Indexer:
773
773
  for cl_path in closure_files:
774
774
  try:
775
775
  cl_rows = db.run_read(
776
- "MATCH (f:File {path: $path})<-[:QUERY_DEFINED_IN]-(q:SqlQuery) "
777
- "MATCH (q)-[:SELECTS_FROM]->(t:SqlTable) "
778
- "RETURN DISTINCT t.name AS name",
776
+ "SELECT DISTINCT t.name AS name"
777
+ ' FROM "File" f'
778
+ ' JOIN "QUERY_DEFINED_IN" qdi ON qdi.dst_key = f.path'
779
+ ' JOIN "SqlQuery" q ON q.id = qdi.src_key'
780
+ ' JOIN "SELECTS_FROM" sf ON sf.src_key = q.id'
781
+ ' JOIN "SqlTable" t ON t.qualified = sf.dst_key'
782
+ " WHERE f.path = ?",
779
783
  {"path": cl_path},
780
784
  )
781
785
  for row in cl_rows:
@@ -794,17 +798,21 @@ class Indexer:
794
798
  # first, but we do a broader search since the table might be qualified differently.
795
799
  try:
796
800
  def_rows = db.run_read(
797
- "MATCH (t:SqlTable)-[:DEFINED_IN]->(f:File) "
798
- "WHERE t.name = $name "
799
- "RETURN DISTINCT f.path AS file_path",
800
- {"name": bare_name.upper()},
801
+ "SELECT DISTINCT f.path AS file_path"
802
+ ' FROM "SqlTable" t'
803
+ ' JOIN "DEFINED_IN" di ON di.src_key = t.qualified'
804
+ ' JOIN "File" f ON f.path = di.dst_key'
805
+ " WHERE upper(t.name) = upper(?)",
806
+ {"name": bare_name},
801
807
  )
802
808
  if not def_rows:
803
- # Try lowercase match
809
+ # Try bare lowercase match
804
810
  def_rows = db.run_read(
805
- "MATCH (t:SqlTable)-[:DEFINED_IN]->(f:File) "
806
- "WHERE t.name = $name "
807
- "RETURN DISTINCT f.path AS file_path",
811
+ "SELECT DISTINCT f.path AS file_path"
812
+ ' FROM "SqlTable" t'
813
+ ' JOIN "DEFINED_IN" di ON di.src_key = t.qualified'
814
+ ' JOIN "File" f ON f.path = di.dst_key'
815
+ " WHERE lower(t.name) = lower(?)",
808
816
  {"name": bare_name},
809
817
  )
810
818
  for row in def_rows:
@@ -1369,32 +1377,15 @@ class Indexer:
1369
1377
  return file_rows.counts
1370
1378
 
1371
1379
  def _expand_star_sources(self, db: GraphBackend) -> int:
1372
- """Run the post-ingestion star expansion query.
1380
+ """Run the post-ingestion star expansion.
1381
+
1382
+ Calls the three-step DML expand_star_sources() method which returns the
1383
+ total STAR_EXPANSION edge count.
1373
1384
 
1374
1385
  Returns:
1375
- Number of COLUMN_LINEAGE edges created by the expansion
1386
+ Number of COLUMN_LINEAGE STAR_EXPANSION edges after expansion.
1376
1387
  """
1377
- from sqlcg.core.queries import EXPAND_STAR_SOURCES_QUERY
1378
-
1379
- # Count COLUMN_LINEAGE edges before expansion
1380
- before = db.run_read(
1381
- "MATCH ()-[r:COLUMN_LINEAGE {transform: 'STAR_EXPANSION'}]->() RETURN count(r) AS n",
1382
- {},
1383
- )
1384
- before_count = before[0]["n"] if before else 0
1385
-
1386
- # Run the expansion query (without explicit transaction, as caller may already be in one)
1387
- db.run_read(EXPAND_STAR_SOURCES_QUERY, {})
1388
-
1389
- # Count COLUMN_LINEAGE edges after expansion
1390
- after = db.run_read(
1391
- "MATCH ()-[r:COLUMN_LINEAGE {transform: 'STAR_EXPANSION'}]->() RETURN count(r) AS n",
1392
- {},
1393
- )
1394
- after_count = after[0]["n"] if after else 0
1395
-
1396
- # Return the number of new edges created
1397
- return max(0, after_count - before_count)
1388
+ return db.expand_star_sources()
1398
1389
 
1399
1390
  def _ingest_external_consumers(self, db: GraphBackend, path: Path) -> dict:
1400
1391
  """Ingest declared external downstream consumers from .sqlcg.toml.
@@ -1417,10 +1408,10 @@ class Indexer:
1417
1408
  for spec in specs:
1418
1409
  all_targets.extend(spec.consumes)
1419
1410
 
1420
- # Single UNWIND round-trip to check which targets exist as SqlTable nodes
1411
+ # Single round-trip to check which targets exist as SqlTable nodes
1421
1412
  if all_targets:
1422
1413
  existing_rows = db.run_read(
1423
- "UNWIND $names AS n MATCH (t:SqlTable {qualified: n}) RETURN n AS qualified",
1414
+ 'SELECT qualified FROM "SqlTable" WHERE qualified = ANY(?)',
1424
1415
  {"names": all_targets},
1425
1416
  )
1426
1417
  existing_tables: set[str] = {row["qualified"] for row in existing_rows}
sqlcg/metrics/store.py CHANGED
@@ -1,6 +1,6 @@
1
1
  """SQLite-based metrics storage for SQL Code Graph.
2
2
 
3
- Importable without KùzuDB. All writes are wrapped in try/except with
3
+ Importable without a graph backend. All writes are wrapped in try/except with
4
4
  WARNING-level logging on failure. Opt-out via SQLCG_METRICS=0.
5
5
  """
6
6
 
@@ -117,6 +117,19 @@ class MetricsStore:
117
117
  )
118
118
  """
119
119
  )
120
+ self._conn.execute(
121
+ """
122
+ CREATE TABLE IF NOT EXISTS writer_queue_events (
123
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
124
+ timestamp TEXT NOT NULL,
125
+ event TEXT NOT NULL,
126
+ op TEXT,
127
+ reason TEXT,
128
+ queue_depth INTEGER,
129
+ duration_ms REAL
130
+ )
131
+ """
132
+ )
120
133
  self._conn.commit()
121
134
  except sqlite3.Error as e:
122
135
  logger.warning(f"Failed to initialize metrics schema: {e}")
@@ -228,6 +241,41 @@ class MetricsStore:
228
241
  except sqlite3.Error as e:
229
242
  logger.warning(f"Failed to record feedback: {e}")
230
243
 
244
+ def record_queue_event(
245
+ self,
246
+ event: str,
247
+ *,
248
+ op: str | None = None,
249
+ reason: str | None = None,
250
+ queue_depth: int | None = None,
251
+ duration_ms: float | None = None,
252
+ ) -> None:
253
+ """Record a writer-queue lifecycle event.
254
+
255
+ Args:
256
+ event: One of ``"enqueued"``, ``"coalesced"``, ``"drained"``.
257
+ op: The write op type (``"index"`` or ``"reindex"``).
258
+ reason: Coalesce reason constant (set for ``"coalesced"`` events).
259
+ queue_depth: Pending-queue depth at event time.
260
+ duration_ms: Drain wall-clock duration (set for ``"drained"``).
261
+ """
262
+ if not self._enabled or self._conn is None:
263
+ return
264
+
265
+ try:
266
+ timestamp = datetime.now(UTC).isoformat()
267
+ self._conn.execute(
268
+ """
269
+ INSERT INTO writer_queue_events
270
+ (timestamp, event, op, reason, queue_depth, duration_ms)
271
+ VALUES (?, ?, ?, ?, ?, ?)
272
+ """,
273
+ (timestamp, event, op, reason, queue_depth, duration_ms),
274
+ )
275
+ self._conn.commit()
276
+ except sqlite3.Error as e:
277
+ logger.warning(f"Failed to record queue event: {e}")
278
+
231
279
  def execute_query(self, query: str, params: tuple | None = None) -> list[tuple]:
232
280
  """Execute a read-only query.
233
281
 
sqlcg/server/control.py CHANGED
@@ -3,7 +3,7 @@
3
3
  Manages the ``.pid`` and ``.sock`` files that allow a second CLI process to
4
4
  discover, query, and stop the running MCP server.
5
5
 
6
- All paths are derived from ``get_db_path()`` (i.e. ``KuzuConfig.from_env().db_path``)
6
+ All paths are derived from ``get_db_path()`` (i.e. ``DbConfig.from_env().db_path``)
7
7
  so that two servers on two different databases do not collide on a single control
8
8
  file. Callers may pass an explicit ``db_path`` to override the default; this also
9
9
  makes unit tests straightforward (set ``SQLCG_DB_PATH=/tmp/test.db``, assert paths
@@ -15,7 +15,7 @@ from sqlcg.core.config import (
15
15
  class NoiseFilter:
16
16
  """Config-driven filter for backup tables and schema-alias mirrors.
17
17
 
18
- Loaded once per tool call from KuzuConfig.from_env().db_path parent dir
18
+ Loaded once per tool call from DbConfig.from_env().db_path parent dir
19
19
  (falls back to cwd). All methods are pure — no graph calls.
20
20
  """
21
21
 
@@ -45,7 +45,7 @@ import typer
45
45
 
46
46
  # Client-side socket timeout for the query control-socket path.
47
47
  # Sized to cover the longest in-flight reindex (~89 s DWH resync_changed)
48
- # with headroom. This is a CLI transport constant, NOT a KuzuConfig value —
48
+ # with headroom. This is a CLI transport constant, NOT a DbConfig value —
49
49
  # same convention as _NOTIFY_SOCKET_TIMEOUT_S in reindex.py.
50
50
  _QUERY_SOCKET_TIMEOUT_S = 300
51
51
 
@@ -92,7 +92,7 @@ def query_via_server(
92
92
  if not sp.exists():
93
93
  return None
94
94
 
95
- req = {"op": "query", "cypher": cypher, "params": params}
95
+ req = {"op": "query", "sql": cypher, "params": params} # "sql" is the canonical key
96
96
  req_bytes = json.dumps(req).encode()
97
97
  frame = f"{len(req_bytes)}\n".encode() + req_bytes
98
98