sql-code-graph 1.1.0__py3-none-any.whl → 1.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sql-code-graph
3
- Version: 1.1.0
3
+ Version: 1.1.3
4
4
  Summary: SQL code graph analyzer and lineage tracer
5
5
  Project-URL: Homepage, https://github.com/Warhorze/sql-code-graph
6
6
  Project-URL: Repository, https://github.com/Warhorze/sql-code-graph
@@ -1,12 +1,12 @@
1
- sqlcg/__init__.py,sha256=CWoJX8Awg5Tf6p2E5lT66EFE8kd-Aru8aujKizglgdo,115
1
+ sqlcg/__init__.py,sha256=YGDRrWVIrONmQholAKWh6hSKxlPd2dLcM1AdHHdBhEA,115
2
2
  sqlcg/__main__.py,sha256=1YoFLcqEgTwYq1J3TbUwpkdG0zeeLIf2fJvwWI-CLFU,109
3
3
  sqlcg/cli/__init__.py,sha256=W8fD0LpMq2xm_5WKGNMvJh2WBL1ho5E8hUeAqXQYT1g,28
4
4
  sqlcg/cli/main.py,sha256=WmdTjsOlz1ozi2Y3Aq4ezR_FCRl-Lc1YOKw3_d48dlY,1650
5
5
  sqlcg/cli/commands/__init__.py,sha256=oSHtr6VD-jNubOjuCQyZj2tBppjMEpQDh-IGQ8of9eA,30
6
- sqlcg/cli/commands/analyze.py,sha256=qtvM_TeqYzaLClZksM_o5hAdksZ9sqLM9HGDtLDrXwY,12646
7
- sqlcg/cli/commands/db.py,sha256=Q3VEdNJzhrs26KtskI5j9B3C0vBTZe4VN2sZXZG_6BY,7434
8
- sqlcg/cli/commands/find.py,sha256=5MbGavA-QS75zwm35dYK-0H1bJ1Zd_gJHgQ_lXnpMDU,3126
9
- sqlcg/cli/commands/gain.py,sha256=bOvia7CVla_fESrDEdftYze8Mm0xDio3SpCzIyoXg7A,8925
6
+ sqlcg/cli/commands/analyze.py,sha256=hiKj0R1m5i4ZmwrXBlVT14xGy6rs9jmv_ZDCLVZj4Tg,14282
7
+ sqlcg/cli/commands/db.py,sha256=5VpknLqYaimK6YA516w6iQVX6JmHcD52o6MuW5d088c,7462
8
+ sqlcg/cli/commands/find.py,sha256=SsK6q4YRPknrz_lIQ4Gun6HRoAdoVRGClwAYdm_s2OU,3168
9
+ sqlcg/cli/commands/gain.py,sha256=hz36QmuaXJXutI4vyNMDfcNsBeLTXa6EOw2bWe2AhTQ,8939
10
10
  sqlcg/cli/commands/git.py,sha256=yMgWOuoTCTBr2P1QgmghRi5ikmUYHuxDUVyBDYerErw,5728
11
11
  sqlcg/cli/commands/index.py,sha256=xMnxKDiUt5LH_3lKAotoRctL4VSOvcw7Gq--idLPtm0,11091
12
12
  sqlcg/cli/commands/install.py,sha256=KNABvrLbamPyYnmnVdCaM_MNezbDc-pr6IkignCWI8k,9186
@@ -16,21 +16,21 @@ sqlcg/cli/commands/report.py,sha256=JU0qjyMxwOukE7bN3XvvIzOI7zMg_Gsnvk_8F6pKNpA,
16
16
  sqlcg/cli/commands/uninstall.py,sha256=IYwQaqnMmmzW0Nlls40wD-L3tVkMgKIMRXUkcXPMUc4,9398
17
17
  sqlcg/cli/commands/watch.py,sha256=7N6c-QuvxAEGHzDZ0C3CU2BkHSraZW9YtgoFnz7SaQo,2373
18
18
  sqlcg/core/__init__.py,sha256=uNsJCrCMVWVT80sHPtI_f39BYqIf5N0i6LSq8x8HsyI,283
19
- sqlcg/core/config.py,sha256=8QtFNRnrzLK1Zw93AKX37h6bSASDLv-42FzDQ7zxTtI,13079
19
+ sqlcg/core/config.py,sha256=LuB8HWPsIt1OsjOshTT1bJdXWXN01w76ABl9M-VB9DM,14777
20
20
  sqlcg/core/freshness.py,sha256=gRb8pRPw5SdIUxAYkMXIJ00DTdQ6CegRZPAvWnv0rU0,4575
21
21
  sqlcg/core/graph_db.py,sha256=Aa85wPFg26H-Ud9SrZyxCHH-99iitAI5S3X9T_62Yyw,7957
22
22
  sqlcg/core/jobs.py,sha256=Je-fCdSKRgiSsv1W8SgNAlp36a7t7-pJZ-qKPbka9OE,3298
23
- sqlcg/core/kuzu_backend.py,sha256=ziHt-AB9sEZY7qB8whseWFicbTfOZaNOxcNVKhjii5Y,16587
23
+ sqlcg/core/kuzu_backend.py,sha256=3kL8bGEQm70fuxYUdt1p7fsY12lCLQ07x01NYg6FOGA,16821
24
24
  sqlcg/core/neo4j_backend.py,sha256=AM1TncP9GBGph-rSHwalZPmGUV2kFILzaJP-PSB0UYw,8437
25
- sqlcg/core/queries.cypher,sha256=91Pb10-ekSi0812wuHJTdXcMY4sT53_5o-oHhfSP_DQ,4967
26
- sqlcg/core/queries.py,sha256=JLgV4MIgP7KVIQ0xpGj3_-MBhBfY_9XPoCdcI2mO-TM,2148
25
+ sqlcg/core/queries.cypher,sha256=cvPOVe5GUOzJN4bxUvDxNI--xIIP8gm42TR-gUnea4U,4685
26
+ sqlcg/core/queries.py,sha256=gkl4bhkZM8FsvbSA-IaK17sRFcO3hB5YlVCemkCXgWM,2064
27
27
  sqlcg/core/schema.cypher,sha256=rK5QMhSrzZhuj73NeNXGX6oM-rPPPvxFjex0fEyUvkQ,2859
28
28
  sqlcg/core/schema.py,sha256=JO5rkspYKjL9AEl5mt0VIJKn-IPOH3kJV_fVmAMuFCI,1467
29
29
  sqlcg/indexer/__init__.py,sha256=Wh20Unz2OHs1oIyWLrpurPAasF0BET2g4iXtNk7mh2U,56
30
30
  sqlcg/indexer/dbt_adapter.py,sha256=EB5x1WU5Z9d-I97ADDj88S_hG1C4z4nbrv8JUCzXfy8,686
31
31
  sqlcg/indexer/error_classify.py,sha256=MYjPVprwT-ARPjBCyCzu2F9DSrZfnTVtVIoBgm8s4H8,5329
32
32
  sqlcg/indexer/git_delta.py,sha256=P-QM4vnVURT2KLiE6u3cQynRUF-mTH13cbB4I20YHPQ,4468
33
- sqlcg/indexer/indexer.py,sha256=DYdUr59hRKCjJTRiQUWOC72JUQ9TgBrH0W4UOYNwqx8,60913
33
+ sqlcg/indexer/indexer.py,sha256=KyyowxiSNU3Gm4JE-mj8gVm6D80XERJPd-he59I2sIk,62018
34
34
  sqlcg/indexer/pool.py,sha256=BTYx-pBe6zwUG89MHh0X7nzGNVlsHN-GjovYKanVI1s,18553
35
35
  sqlcg/indexer/walker.py,sha256=umNaqDbuerr75VYG1TEOv0ATsbI40O3SIw35f7XJcDE,1931
36
36
  sqlcg/indexer/watcher.py,sha256=mJQq1LASRLKKwhz0WhCUWPLLqyPR2_-FD_8efYU6gE8,8442
@@ -41,7 +41,7 @@ sqlcg/metrics/__init__.py,sha256=hLJ6wm4St8qqYwKh3o9QG7lcEt1BEYM31ccqO9tGpIg,133
41
41
  sqlcg/metrics/store.py,sha256=BaMf7QYTmYMlX_Jzi1GNU8R2sMVkWdn07f-ZSndtcNk,8879
42
42
  sqlcg/parsers/__init__.py,sha256=AamA8wBbDZV9_zEtZCI4Hyen5UAVKHmBwjTghTt2PZE,785
43
43
  sqlcg/parsers/ansi_parser.py,sha256=tu1MWWaSYmpefKjgk2PPyGStIFjV47Z_1WjyBh5Zi2c,17180
44
- sqlcg/parsers/base.py,sha256=uL0W22zpbIz_9eq-i-4LSlonxy2J1yChuISMLSYgvRU,49345
44
+ sqlcg/parsers/base.py,sha256=IiOkVsm6jz9-48RqDCXiW-UXAraNxQ4pKXvSA7aolnA,49907
45
45
  sqlcg/parsers/bigquery_parser.py,sha256=mOnWTfXB_Dp4JwFE1PVYOB6CDPf5nYE0Dea8kJCl9uQ,2827
46
46
  sqlcg/parsers/postgres_parser.py,sha256=lYfUpQY6j4Qm7ndXBtXbgPoGzYqYddWt5YeFnWKdA6I,946
47
47
  sqlcg/parsers/registry.py,sha256=LXy1F6rqQI6VdxpRvZg_tNpoEucW3mXZHYBMlMONbX4,1496
@@ -59,7 +59,7 @@ sqlcg/utils/__init__.py,sha256=--iqt5ThTXmT8Wz7da8hs3n0zDfYPl8P-z5OgRJ_77E,154
59
59
  sqlcg/utils/hashing.py,sha256=H25-sYfxHKb3_IERFnHyAIYNiXN470Oqo5sJT_D3YOA,438
60
60
  sqlcg/utils/ignore.py,sha256=wJjwa0mjnQ_xJExOUxk25y00g065XmmzJapqV3ifD5o,1151
61
61
  sqlcg/utils/logging.py,sha256=u0fCmYsLj9o81vawm3xZTHaw68GQYVm7JxG-gP81u8A,840
62
- sql_code_graph-1.1.0.dist-info/METADATA,sha256=blW1eYNjfy6P61747uUtc22qm5MDETMcVYImXPa762g,13615
63
- sql_code_graph-1.1.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
64
- sql_code_graph-1.1.0.dist-info/entry_points.txt,sha256=Wfe49sVzV9p4eVFGo5RxcV-frr3HOP0yzzst8JBxQLQ,46
65
- sql_code_graph-1.1.0.dist-info/RECORD,,
62
+ sql_code_graph-1.1.3.dist-info/METADATA,sha256=Z_aRnsDOgZ_ngAHkIr3x2XpEjF-x6UMUQwcIkAMlGjo,13615
63
+ sql_code_graph-1.1.3.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
64
+ sql_code_graph-1.1.3.dist-info/entry_points.txt,sha256=Wfe49sVzV9p4eVFGo5RxcV-frr3HOP0yzzst8JBxQLQ,46
65
+ sql_code_graph-1.1.3.dist-info/RECORD,,
sqlcg/__init__.py CHANGED
@@ -1,5 +1,5 @@
1
1
  """SQL Code Graph - SQL lineage and dependency analysis tool."""
2
2
 
3
- __version__ = "1.1.0"
3
+ __version__ = "1.1.3"
4
4
 
5
5
  __all__ = ["__version__"]
@@ -19,6 +19,29 @@ app = typer.Typer(help="Lineage analysis")
19
19
  console = Console()
20
20
 
21
21
 
22
+ def _kind_filter(source_alias: str, include_intermediate: bool) -> str:
23
+ """Build the Half-B (#38) kind filter for the lineage traversal query.
24
+
25
+ When ``include_intermediate`` is False, the filter uses ``OPTIONAL MATCH`` plus
26
+ ``t.kind IS NULL OR t.kind IN ['table', 'external']`` so a source whose SqlTable
27
+ node is ABSENT (a CTE-body source on a graph indexed before the #39 fix, or not yet
28
+ re-indexed) is KEPT rather than silently dropped. Reverting this to an inner
29
+ ``MATCH (t:SqlTable {...}) ... WHERE t.kind IN [...]`` is the #38 regression:
30
+ node-less physical sources vanish from results.
31
+
32
+ ``source_alias`` is ``src`` for upstream and ``dst`` for downstream — it names both
33
+ the node whose table is looked up and the variable carried through the WITH clause.
34
+ This is the single production source of the filter string; the #40 recall guard
35
+ imports it so reverting Half B here turns the guard red.
36
+ """
37
+ if include_intermediate:
38
+ return ""
39
+ return (
40
+ f"OPTIONAL MATCH (t:SqlTable {{qualified: {source_alias}.table_qualified}}) "
41
+ f"WITH c, {source_alias}, t WHERE t.kind IS NULL OR t.kind IN ['table', 'external'] "
42
+ )
43
+
44
+
22
45
  @app.command("upstream")
23
46
  def upstream( # noqa: B008
24
47
  ref: str = typer.Argument(..., help="Column reference"), # noqa: B008
@@ -34,15 +57,14 @@ def upstream( # noqa: B008
34
57
  console.print("[red]Error: --depth must be between 1 and 100[/red]")
35
58
  raise typer.Exit(1)
36
59
 
37
- # By default, filter out CTE/derived intermediate nodes; --include-intermediate restores them
38
- kind_filter = (
39
- ""
40
- if include_intermediate
41
- else "MATCH (t:SqlTable {qualified: src.table_qualified}) "
42
- "WHERE t.kind IN ['table', 'external'] "
43
- )
60
+ # By default, filter out CTE/derived intermediate nodes; --include-intermediate restores them.
61
+ # Half B (#38): use OPTIONAL MATCH so a missing SqlTable node (e.g. CTE-body source not yet
62
+ # re-indexed after #39 fix) is KEPT rather than silently dropped. WHERE t.kind IS NULL OR
63
+ # t.kind IN [...] means: keep when node absent (NULL) OR when kind is a physical source.
64
+ # CTE aliases (kind='cte') and derived tables (kind='derived') are filtered out.
65
+ kind_filter = _kind_filter("src", include_intermediate)
44
66
 
45
- with get_backend() as backend:
67
+ with get_backend(read_only=True) as backend:
46
68
  results = backend.run_read(
47
69
  f"MATCH (c:{NodeLabel.COLUMN} {{id: $ref}})"
48
70
  f"<-[:{RelType.COLUMN_LINEAGE}*1..{depth}]-(src:{NodeLabel.COLUMN}) "
@@ -95,15 +117,13 @@ def downstream( # noqa: B008
95
117
  console.print("[red]Error: --depth must be between 1 and 100[/red]")
96
118
  raise typer.Exit(1)
97
119
 
98
- # By default, filter out CTE/derived intermediate nodes; --include-intermediate restores them
99
- kind_filter = (
100
- ""
101
- if include_intermediate
102
- else "MATCH (t:SqlTable {qualified: dst.table_qualified}) "
103
- "WHERE t.kind IN ['table', 'external'] "
104
- )
120
+ # By default, filter out CTE/derived intermediate nodes; --include-intermediate restores them.
121
+ # Half B (#38): OPTIONAL MATCH keeps sources whose SqlTable node is absent (NULL) or is a
122
+ # physical kind. WITH c, dst, t carries the three variables in scope at this interpolation
123
+ # point; direct and q are bound later in the query.
124
+ kind_filter = _kind_filter("dst", include_intermediate)
105
125
 
106
- with get_backend() as backend:
126
+ with get_backend(read_only=True) as backend:
107
127
  results = backend.run_read(
108
128
  f"MATCH (c:{NodeLabel.COLUMN} {{id: $ref}})"
109
129
  f"-[:{RelType.COLUMN_LINEAGE}*1..{depth}]->(dst:{NodeLabel.COLUMN}) "
@@ -171,7 +191,7 @@ def impact( # noqa: B008
171
191
  raw: bool = typer.Option(False, "--raw", help="Disable noise filtering on results"), # noqa: B008
172
192
  ) -> None:
173
193
  """Show all queries impacted by a table."""
174
- with get_backend() as backend:
194
+ with get_backend(read_only=True) as backend:
175
195
  results = backend.run_read(
176
196
  f"MATCH (t:{NodeLabel.TABLE} {{qualified: $t}})"
177
197
  f"<-[:{RelType.SELECTS_FROM}]-(q:{NodeLabel.QUERY}) "
@@ -207,7 +227,7 @@ def failures(
207
227
  with 'sqlcg db reset && sqlcg index <path>' if the graph was built with
208
228
  an earlier version.
209
229
  """
210
- with get_backend() as backend:
230
+ with get_backend(read_only=True) as backend:
211
231
  cypher = (
212
232
  f"MATCH (f:{NodeLabel.FILE}) WHERE f.parse_failed = true "
213
233
  "AND ($cause IS NULL OR f.parse_cause = $cause) "
@@ -224,7 +244,7 @@ def unused(
224
244
  raw: bool = typer.Option(False, "--raw", help="Disable noise filtering on results"), # noqa: B008
225
245
  ) -> None:
226
246
  """Find tables with no query references."""
227
- with get_backend() as backend:
247
+ with get_backend(read_only=True) as backend:
228
248
  results = backend.run_read(
229
249
  f"MATCH (t:{NodeLabel.TABLE}) WHERE NOT (t)<-[:{RelType.SELECTS_FROM}]-() "
230
250
  "RETURN DISTINCT t.qualified AS qualified LIMIT 100",
sqlcg/cli/commands/db.py CHANGED
@@ -75,7 +75,7 @@ def db_reset( # noqa: B008
75
75
  @app.command("info")
76
76
  def db_info() -> None:
77
77
  """Show database stats."""
78
- with get_backend() as backend:
78
+ with get_backend(read_only=True) as backend:
79
79
  version = backend.get_schema_version() or "unknown"
80
80
  console.print(f"Schema version: {version}")
81
81
 
@@ -167,7 +167,7 @@ def db_info() -> None:
167
167
  @app.command("list-repos")
168
168
  def list_repos() -> None:
169
169
  """List all indexed repositories."""
170
- with get_backend() as backend:
170
+ with get_backend(read_only=True) as backend:
171
171
  result = backend.run_read("MATCH (r:Repo) RETURN r.path AS path, r.name AS name", {})
172
172
 
173
173
  if not result:
@@ -18,7 +18,7 @@ def find_table( # noqa: B008
18
18
  ) -> None:
19
19
  """Find a table by name."""
20
20
  name = name.lower() # graph keys are lowercased at index time (C2 normalization)
21
- with get_backend() as backend:
21
+ with get_backend(read_only=True) as backend:
22
22
  results = backend.run_read(
23
23
  f"MATCH (t:{NodeLabel.TABLE}) WHERE t.qualified CONTAINS $name "
24
24
  "RETURN t.qualified AS qualified, t.kind AS kind LIMIT 50",
@@ -42,7 +42,7 @@ def find_column( # noqa: B008
42
42
  ) -> None:
43
43
  """Find a column by table.column reference."""
44
44
  ref = ref.lower() # graph keys are lowercased at index time (C2 normalization)
45
- with get_backend() as backend:
45
+ with get_backend(read_only=True) as backend:
46
46
  results = backend.run_read(
47
47
  f"MATCH (c:{NodeLabel.COLUMN}) WHERE c.id CONTAINS $ref RETURN c.id AS id LIMIT 50",
48
48
  {"ref": ref},
@@ -61,7 +61,7 @@ def find_pattern( # noqa: B008
61
61
  pattern: str = typer.Argument(..., help="SQL pattern to search for"), # noqa: B008
62
62
  ) -> None:
63
63
  """Find queries containing a SQL pattern."""
64
- with get_backend() as backend:
64
+ with get_backend(read_only=True) as backend:
65
65
  results = backend.run_read(
66
66
  f"MATCH (q:{NodeLabel.QUERY}) WHERE q.sql CONTAINS $pattern "
67
67
  "RETURN q.id AS id, q.kind AS kind LIMIT 50",
@@ -123,7 +123,7 @@ def gain_cmd(
123
123
  # Section F: parse quality from graph
124
124
  parse_quality: dict[str, int] | None = None
125
125
  try:
126
- with get_backend() as backend:
126
+ with get_backend(read_only=True) as backend:
127
127
  mode_rows = backend.run_read(
128
128
  "MATCH (q:SqlQuery) RETURN q.parsing_mode AS mode,"
129
129
  " COUNT(q) AS cnt ORDER BY cnt DESC",
sqlcg/core/config.py CHANGED
@@ -346,9 +346,25 @@ def get_external_consumers(path: Path) -> list[ExternalConsumerSpec]:
346
346
  return []
347
347
 
348
348
 
349
- def get_backend() -> "GraphBackend":
349
+ def get_backend(read_only: bool = False) -> "GraphBackend":
350
350
  """Get a graph backend instance respecting the SQLCG_BACKEND env var.
351
351
 
352
+ Args:
353
+ read_only: Open in read-only mode. When ``True``, the KùzuDB open
354
+ does not take an exclusive write lock, enabling *multiple concurrent
355
+ read-only opens* (reader/reader concurrency). CLI read commands
356
+ pass ``True`` so they do not hold the exclusive write lock and
357
+ therefore do not block other concurrent readers or a pending reindex.
358
+ Note: this does NOT allow reads while a read-write writer already
359
+ holds the exclusive lock — KùzuDB's exclusive write lock is
360
+ process-level; a ``read_only=True`` open still fails with
361
+ "Database is locked" when a writer is active. Reads during an
362
+ active writer remain a known limitation (future work: route reads
363
+ through the live MCP server).
364
+ Neo4j has no single-writer lock; this flag is a no-op there.
365
+ All writer call sites (index, reindex, db init/reset, server
366
+ init_backend) use the default ``False``.
367
+
352
368
  Returns:
353
369
  A GraphBackend instance (KuzuBackend by default, or Neo4jBackend)
354
370
 
@@ -361,14 +377,26 @@ def get_backend() -> "GraphBackend":
361
377
  from sqlcg.core.kuzu_backend import KuzuBackend
362
378
 
363
379
  kuzu_cfg = KuzuConfig.from_env()
364
- return KuzuBackend(
365
- str(kuzu_cfg.db_path),
366
- buffer_pool_size_mb=kuzu_cfg.buffer_pool_size_mb,
367
- )
380
+ try:
381
+ return KuzuBackend(
382
+ str(kuzu_cfg.db_path),
383
+ buffer_pool_size_mb=kuzu_cfg.buffer_pool_size_mb,
384
+ read_only=read_only,
385
+ )
386
+ except RuntimeError as exc:
387
+ if read_only and "READ ONLY" in str(exc):
388
+ # KùzuDB refuses to open a non-existent or empty DB in read-only
389
+ # mode ("Cannot create an empty database under READ ONLY mode").
390
+ # Surface the same empty-DB guidance the user sees from `db info`.
391
+ raise RuntimeError(
392
+ "Database not initialised — run 'sqlcg db init' and 'sqlcg index <path>' first."
393
+ ) from exc
394
+ raise
368
395
  elif backend_type == "neo4j":
369
396
  from sqlcg.core.neo4j_backend import Neo4jBackend
370
397
 
371
398
  neo4j_cfg = Neo4jConfig.from_env()
399
+ # Neo4j has no single-writer lock; read_only is a no-op here.
372
400
  return Neo4jBackend(neo4j_cfg.uri, neo4j_cfg.user, neo4j_cfg.password)
373
401
  else:
374
402
  raise ValueError(f"Unknown backend type: {backend_type}")
@@ -58,7 +58,10 @@ class KuzuBackend(GraphBackend):
58
58
  Args:
59
59
  db_path: Path to the KùzuDB database file (or ':memory:' for in-memory)
60
60
  buffer_pool_size_mb: Buffer pool size in MB (0 = use KuzuDB default)
61
- read_only: Open in read-only mode (allows concurrent indexing)
61
+ read_only: Open in read-only mode. Enables concurrent read-only
62
+ opens (reader/reader concurrency) by not taking the exclusive
63
+ write lock. Does NOT allow reads while a read-write writer
64
+ holds the lock — KùzuDB's exclusive lock is process-level.
62
65
 
63
66
  Raises:
64
67
  RuntimeError: If the database is locked or cannot be opened.
sqlcg/core/queries.cypher CHANGED
@@ -38,12 +38,6 @@ RETURN dst.id AS id, dst.col_name AS col_name, dst.table_qualified AS table_qual
38
38
  MATCH (dst:SqlColumn {id: $id})<-[:COLUMN_LINEAGE]-(src:SqlColumn)
39
39
  RETURN src.id AS id, src.col_name AS col_name, src.table_qualified AS table_qualified
40
40
 
41
- -- GET_UPSTREAM_DEPENDENCIES_FILTERED
42
- MATCH (dst:SqlColumn {id: $id})<-[:COLUMN_LINEAGE]-(src:SqlColumn)
43
- MATCH (t:SqlTable {qualified: src.table_qualified})
44
- WHERE t.kind IN ['table', 'external']
45
- RETURN src.id AS id, src.col_name AS col_name, src.table_qualified AS table_qualified
46
-
47
41
  -- SEARCH_SQL_PATTERN
48
42
  MATCH (q:SqlQuery)-[:QUERY_DEFINED_IN]->(f:File)
49
43
  WHERE contains(q.sql, $query)
sqlcg/core/queries.py CHANGED
@@ -28,7 +28,6 @@ TRACE_COLUMN_LINEAGE_QUERY = _Q["TRACE_COLUMN_LINEAGE"]
28
28
  FIND_TABLE_USAGES_QUERY = _Q["FIND_TABLE_USAGES"]
29
29
  GET_DOWNSTREAM_DEPENDENCIES_QUERY = _Q["GET_DOWNSTREAM_DEPENDENCIES"]
30
30
  GET_UPSTREAM_DEPENDENCIES_QUERY = _Q["GET_UPSTREAM_DEPENDENCIES"]
31
- GET_UPSTREAM_DEPENDENCIES_FILTERED_QUERY = _Q["GET_UPSTREAM_DEPENDENCIES_FILTERED"]
32
31
  SEARCH_SQL_PATTERN_QUERY = _Q["SEARCH_SQL_PATTERN"]
33
32
  LIST_DIALECTS_AND_REPOS_QUERY = _Q["LIST_DIALECTS_AND_REPOS"]
34
33
  EXPAND_STAR_SOURCES_QUERY = _Q["EXPAND_STAR_SOURCES"]
sqlcg/indexer/indexer.py CHANGED
@@ -1126,6 +1126,24 @@ class Indexer:
1126
1126
  "table_name": edge.src.table.name,
1127
1127
  }
1128
1128
  )
1129
+ # Half A (#39): emit a SqlTable node for the source table.
1130
+ # CTE-body-only sources are not in stmt.sources (which only covers
1131
+ # tables reachable via the parser's top-level FROM list), so they were
1132
+ # previously missing from the graph. edge.src.table is a frozen
1133
+ # TableRef with schema-aliasing already applied at parse time — the
1134
+ # qualified value is guaranteed to match edge.src.table_qualified.
1135
+ # key set is identical to other table_rows entries → upsert_nodes_bulk
1136
+ # homogeneity preserved; MERGE on primary key deduplicates re-emits.
1137
+ rows.table_rows.append(
1138
+ {
1139
+ "qualified": edge.src.table.full_id,
1140
+ "name": edge.src.table.name,
1141
+ "catalog": edge.src.table.catalog or "",
1142
+ "db": edge.src.table.db or "",
1143
+ "kind": edge.src.table.role,
1144
+ "defined_in_file": "",
1145
+ }
1146
+ )
1129
1147
  rows.column_rows.append(
1130
1148
  {
1131
1149
  "id": dst_id,
sqlcg/parsers/base.py CHANGED
@@ -967,10 +967,16 @@ class SqlParser(ABC):
967
967
  if not isinstance(cte_body, (exp.Select, exp.Union)):
968
968
  continue
969
969
 
970
- # For Union bodies, use the left branch's projections.
970
+ # For Union bodies, use the deepest left-branch Select's projections.
971
971
  # Union.expressions is always empty; projections are on Union.this.
972
+ # For N=2: cte_body.this is a Select — the while loop is a no-op.
973
+ # For N≥3: cte_body.this is a nested Union (A UNION ALL B UNION ALL C
974
+ # parses as Union(Union(A,B),C)), so we walk down to the deepest
975
+ # left-branch Select (whose star qualify() already expanded in place).
972
976
  if isinstance(cte_body, exp.Union):
973
977
  projection_source = cte_body.this
978
+ while isinstance(projection_source, exp.Union):
979
+ projection_source = projection_source.this
974
980
  else:
975
981
  projection_source = cte_body
976
982