sql-code-graph 1.0.1__py3-none-any.whl → 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sql-code-graph
3
- Version: 1.0.1
3
+ Version: 1.0.2
4
4
  Summary: SQL code graph analyzer and lineage tracer
5
5
  Project-URL: Homepage, https://github.com/Warhorze/sql-code-graph
6
6
  Project-URL: Repository, https://github.com/Warhorze/sql-code-graph
@@ -1,22 +1,22 @@
1
- sqlcg/__init__.py,sha256=rhzbVCLAOlBWTlliY-J8bh3jG3Hn7-5PPLDJwujXW_g,115
1
+ sqlcg/__init__.py,sha256=hGOhwTAVTaRm7PjbaSQVCLvnF7rOGZZNdMqv0IoQdYg,115
2
2
  sqlcg/__main__.py,sha256=1YoFLcqEgTwYq1J3TbUwpkdG0zeeLIf2fJvwWI-CLFU,109
3
3
  sqlcg/cli/__init__.py,sha256=W8fD0LpMq2xm_5WKGNMvJh2WBL1ho5E8hUeAqXQYT1g,28
4
4
  sqlcg/cli/main.py,sha256=WmdTjsOlz1ozi2Y3Aq4ezR_FCRl-Lc1YOKw3_d48dlY,1650
5
5
  sqlcg/cli/commands/__init__.py,sha256=oSHtr6VD-jNubOjuCQyZj2tBppjMEpQDh-IGQ8of9eA,30
6
- sqlcg/cli/commands/analyze.py,sha256=kfcySSjc_UhSsOsJg7o5VD7TH4v72KVzol7Cdn2EuOU,4127
6
+ sqlcg/cli/commands/analyze.py,sha256=PFQD29_VAtJ-wghYLsHRINp8VlnOVl1WlOdbAdcWz1E,8091
7
7
  sqlcg/cli/commands/db.py,sha256=Yd4ZDz1BFwjO4Lyt3NefQnowkjdUxFDFmsPykBVH2Pk,6518
8
- sqlcg/cli/commands/find.py,sha256=4cEWQ0otxNIzzwwzZ0WB_Tms0EoKzcFfhB3FJt8Q5V4,2025
8
+ sqlcg/cli/commands/find.py,sha256=P2OFI0O_-F4W5-oy5KObXUHI7gNTkJRtDSZ59xTKE9Y,2672
9
9
  sqlcg/cli/commands/gain.py,sha256=bOvia7CVla_fESrDEdftYze8Mm0xDio3SpCzIyoXg7A,8925
10
10
  sqlcg/cli/commands/git.py,sha256=96hmWYd861FC8RZqPQ_eBG8yLXSXaB9SLxmuwx00nWU,3347
11
- sqlcg/cli/commands/index.py,sha256=b6ns4_1ZVnHE5GeIb2N8YogjUvhjkzn_F9HrwCqrt_Y,8253
12
- sqlcg/cli/commands/install.py,sha256=mNVXdGlQ4JtCaaibuzU-inf519T97mC-Nj9K-G2gMQY,7525
13
- sqlcg/cli/commands/mcp.py,sha256=H1j6b5Tqr5VXja2GafgD5sJD6hZ5rsgfPwIikK1PZqc,1903
14
- sqlcg/cli/commands/reindex.py,sha256=iZXxYGI2m2wxkvIA1mB9uvOEp66QaT5zF5TGd0OpqlU,6275
11
+ sqlcg/cli/commands/index.py,sha256=Sgrg5MaQWfQzbX3e3Wcsfd8BEWDGuBm5l5vynpJsRzA,9801
12
+ sqlcg/cli/commands/install.py,sha256=KNABvrLbamPyYnmnVdCaM_MNezbDc-pr6IkignCWI8k,9186
13
+ sqlcg/cli/commands/mcp.py,sha256=cfi7D-RgEPUKdfUbsJC2iKImKOnHQvWxCLfwYIPdhdE,2174
14
+ sqlcg/cli/commands/reindex.py,sha256=J9gpaxSzJ1mTdOJWh7WSLskbRF9f_2EMWnUFF4VOtVU,6387
15
15
  sqlcg/cli/commands/report.py,sha256=JU0qjyMxwOukE7bN3XvvIzOI7zMg_Gsnvk_8F6pKNpA,4915
16
16
  sqlcg/cli/commands/uninstall.py,sha256=IYwQaqnMmmzW0Nlls40wD-L3tVkMgKIMRXUkcXPMUc4,9398
17
17
  sqlcg/cli/commands/watch.py,sha256=7N6c-QuvxAEGHzDZ0C3CU2BkHSraZW9YtgoFnz7SaQo,2373
18
18
  sqlcg/core/__init__.py,sha256=uNsJCrCMVWVT80sHPtI_f39BYqIf5N0i6LSq8x8HsyI,283
19
- sqlcg/core/config.py,sha256=em9gYtau2hu-scWzZk4CSZh4L8r9ZymgmH_2BspqsQw,9773
19
+ sqlcg/core/config.py,sha256=YCq4OayvBSNXsYtOh3yZ-W6fyJBLwYunORDo2TPCU9s,10179
20
20
  sqlcg/core/graph_db.py,sha256=gFiHjfVeRHp2FS3yRThDgCWFkugOQD065IvEqN6apg4,7881
21
21
  sqlcg/core/jobs.py,sha256=Je-fCdSKRgiSsv1W8SgNAlp36a7t7-pJZ-qKPbka9OE,3298
22
22
  sqlcg/core/kuzu_backend.py,sha256=ziHt-AB9sEZY7qB8whseWFicbTfOZaNOxcNVKhjii5Y,16587
@@ -27,10 +27,10 @@ sqlcg/core/schema.cypher,sha256=UWYsPMRgkn6HOlPZ3rl6BfY5hzKQKP5RGPaZg4NTZFY,2515
27
27
  sqlcg/core/schema.py,sha256=9jBgJwuvfjLq2xC5B0NUyZZYxhqTb0LO0YzxcPM-gVM,1301
28
28
  sqlcg/indexer/__init__.py,sha256=Wh20Unz2OHs1oIyWLrpurPAasF0BET2g4iXtNk7mh2U,56
29
29
  sqlcg/indexer/dbt_adapter.py,sha256=EB5x1WU5Z9d-I97ADDj88S_hG1C4z4nbrv8JUCzXfy8,686
30
- sqlcg/indexer/error_classify.py,sha256=eWmc9WdOFe9kY_DMgKL0vv9gfcKnFw8e8U7cpUUw9wU,5139
31
- sqlcg/indexer/git_delta.py,sha256=V7WiNgiYPRo97K_mB3ymkJDZGoFExqwTZ2ut0Nqua5o,4383
30
+ sqlcg/indexer/error_classify.py,sha256=MYjPVprwT-ARPjBCyCzu2F9DSrZfnTVtVIoBgm8s4H8,5329
31
+ sqlcg/indexer/git_delta.py,sha256=P-QM4vnVURT2KLiE6u3cQynRUF-mTH13cbB4I20YHPQ,4468
32
32
  sqlcg/indexer/indexer.py,sha256=0B0BCUaLPdV9XtlCzhqR3hwHyD3w83o-tYG7yNr18Yo,50507
33
- sqlcg/indexer/pool.py,sha256=n8u_z2IjW-rX1m0wlJ9-N-jxQby_Y4J9blMEPYaf19Q,18360
33
+ sqlcg/indexer/pool.py,sha256=BTYx-pBe6zwUG89MHh0X7nzGNVlsHN-GjovYKanVI1s,18553
34
34
  sqlcg/indexer/walker.py,sha256=C__JuDcTzKxFqVjGFRr5cj9hgxvf8zffTz-0HMn1qTY,1746
35
35
  sqlcg/indexer/watcher.py,sha256=mJQq1LASRLKKwhz0WhCUWPLLqyPR2_-FD_8efYU6gE8,8442
36
36
  sqlcg/lineage/__init__.py,sha256=Da1DlYwtK13WHv_RnHjAtNkHTOuFbhxqCjT1Le7DsWM,46
@@ -40,7 +40,7 @@ sqlcg/metrics/__init__.py,sha256=hLJ6wm4St8qqYwKh3o9QG7lcEt1BEYM31ccqO9tGpIg,133
40
40
  sqlcg/metrics/store.py,sha256=BaMf7QYTmYMlX_Jzi1GNU8R2sMVkWdn07f-ZSndtcNk,8879
41
41
  sqlcg/parsers/__init__.py,sha256=AamA8wBbDZV9_zEtZCI4Hyen5UAVKHmBwjTghTt2PZE,785
42
42
  sqlcg/parsers/ansi_parser.py,sha256=KruZn5CYjpktKmMRVWackshRI_AR6ehc-ReCsDeWNkQ,14321
43
- sqlcg/parsers/base.py,sha256=nkhl2jVBFRPKHtr2PKfYy6vTdW64v7KKUnfMwVG2ZMU,43941
43
+ sqlcg/parsers/base.py,sha256=cSHlXwiSNu77TZI6_p1nRevbRTcBc1t5v8N_aKR7uB4,49117
44
44
  sqlcg/parsers/bigquery_parser.py,sha256=mOnWTfXB_Dp4JwFE1PVYOB6CDPf5nYE0Dea8kJCl9uQ,2827
45
45
  sqlcg/parsers/postgres_parser.py,sha256=lYfUpQY6j4Qm7ndXBtXbgPoGzYqYddWt5YeFnWKdA6I,946
46
46
  sqlcg/parsers/registry.py,sha256=LXy1F6rqQI6VdxpRvZg_tNpoEucW3mXZHYBMlMONbX4,1496
@@ -50,14 +50,14 @@ sqlcg/server/__init__.py,sha256=n4wuNE7xyJIJxJZBtmtdccCMQfvTdF-IqIaZVbC4FC4,35
50
50
  sqlcg/server/exceptions.py,sha256=EONw34icOByCTpppSQrvQBW6asc4hfqaGDCAFjv96II,469
51
51
  sqlcg/server/models.py,sha256=dv4SM_o-aY8kUFIbCtj0l8ceMsfyvQtXCWPm4Ek_-14,16432
52
52
  sqlcg/server/noise_filter.py,sha256=idSBGgdKWWccJdpOo9qgbM2350Oew-2l5W6Yc9GYQqY,6337
53
- sqlcg/server/server.py,sha256=2EwKGehcIdKqCjZagbv8VrvnVCp-D5Lh-z38FFHRcN8,1723
53
+ sqlcg/server/server.py,sha256=mDAW_Zmk3Sp2sApw3Gw3veCqJe7waw-sioQyKZBn9ng,3774
54
54
  sqlcg/server/skill.py,sha256=siAtrRdFHQnASe9nl33MvkTXXt9EgCB8id5i9AUq4XU,10718
55
- sqlcg/server/tools.py,sha256=Jh16fefXMmw0mYUejoIMAXlJoPAaQoUbgrCghsmHNLk,54892
55
+ sqlcg/server/tools.py,sha256=mSoYZRI7F5ZmdTcG-BnY6ULzrz3Y7qIFe3cHTVWVyMs,57785
56
56
  sqlcg/utils/__init__.py,sha256=--iqt5ThTXmT8Wz7da8hs3n0zDfYPl8P-z5OgRJ_77E,154
57
57
  sqlcg/utils/hashing.py,sha256=H25-sYfxHKb3_IERFnHyAIYNiXN470Oqo5sJT_D3YOA,438
58
- sqlcg/utils/ignore.py,sha256=NfInsHPGubfKFJQraH-wE7ATPb5Be_Igu5mIh7p21cU,973
58
+ sqlcg/utils/ignore.py,sha256=wJjwa0mjnQ_xJExOUxk25y00g065XmmzJapqV3ifD5o,1151
59
59
  sqlcg/utils/logging.py,sha256=u0fCmYsLj9o81vawm3xZTHaw68GQYVm7JxG-gP81u8A,840
60
- sql_code_graph-1.0.1.dist-info/METADATA,sha256=vFhNG1uWAym_RQ21vDWG0tlogTOe2DDjCmrJp8X1txg,12806
61
- sql_code_graph-1.0.1.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
62
- sql_code_graph-1.0.1.dist-info/entry_points.txt,sha256=Wfe49sVzV9p4eVFGo5RxcV-frr3HOP0yzzst8JBxQLQ,46
63
- sql_code_graph-1.0.1.dist-info/RECORD,,
60
+ sql_code_graph-1.0.2.dist-info/METADATA,sha256=aikAv-KoUOGfgYo3-htWLyq61x1PE6bC1Onn_TNAuvE,12806
61
+ sql_code_graph-1.0.2.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
62
+ sql_code_graph-1.0.2.dist-info/entry_points.txt,sha256=Wfe49sVzV9p4eVFGo5RxcV-frr3HOP0yzzst8JBxQLQ,46
63
+ sql_code_graph-1.0.2.dist-info/RECORD,,
sqlcg/__init__.py CHANGED
@@ -1,5 +1,5 @@
1
1
  """SQL Code Graph - SQL lineage and dependency analysis tool."""
2
2
 
3
- __version__ = "1.0.1"
3
+ __version__ = "1.0.2"
4
4
 
5
5
  __all__ = ["__version__"]
@@ -1,5 +1,9 @@
1
1
  """Analyze command for lineage analysis."""
2
2
 
3
+ from __future__ import annotations
4
+
5
+ from typing import TYPE_CHECKING
6
+
3
7
  import typer
4
8
  from rich.console import Console
5
9
  from rich.table import Table
@@ -7,6 +11,9 @@ from rich.table import Table
7
11
  from sqlcg.core.config import get_backend
8
12
  from sqlcg.core.schema import NodeLabel, RelType
9
13
 
14
+ if TYPE_CHECKING:
15
+ from sqlcg.server.noise_filter import NoiseFilter
16
+
10
17
  app = typer.Typer(help="Lineage analysis")
11
18
  console = Console()
12
19
 
@@ -15,6 +22,7 @@ console = Console()
15
22
  def upstream( # noqa: B008
16
23
  ref: str = typer.Argument(..., help="Column reference"), # noqa: B008
17
24
  depth: int = typer.Option(5, "--depth", help="Maximum traversal depth"), # noqa: B008
25
+ raw: bool = typer.Option(False, "--raw", help="Disable noise filtering on results"), # noqa: B008
18
26
  ) -> None:
19
27
  """Trace upstream column lineage."""
20
28
  # Bounds check for depth to prevent performance DoS
@@ -29,6 +37,28 @@ def upstream( # noqa: B008
29
37
  "RETURN src.id AS id LIMIT 100",
30
38
  {"ref": ref},
31
39
  )
40
+ if not results and len(ref.split(".")) >= 3:
41
+ bare = _bare_ref(ref)
42
+ fallback_results = backend.run_read(
43
+ f"MATCH p=(c:{NodeLabel.COLUMN} {{id: $bare}})"
44
+ f"<-[:{RelType.COLUMN_LINEAGE}*1..{depth}]-(src) "
45
+ "RETURN src.id AS id LIMIT 100",
46
+ {"bare": bare},
47
+ )
48
+ if fallback_results:
49
+ console.print(
50
+ f"[yellow]Hint:[/yellow] No results for '{ref}'. "
51
+ f"Found {len(fallback_results)} edge(s) under bare name '{bare}'. "
52
+ "The INSERT target may have been indexed without a schema prefix. "
53
+ "Multiple tables with the same unqualified name in different schemas "
54
+ "would all match — re-index with an explicit schema for precise results."
55
+ )
56
+ results = fallback_results
57
+ if not raw:
58
+ from sqlcg.server.noise_filter import NoiseFilter
59
+
60
+ nf = NoiseFilter.from_config() # repo_root=None → falls back to Path.cwd()
61
+ results = _filter_column_results(results, nf)
32
62
  _print_table(results, ["id"])
33
63
 
34
64
 
@@ -36,6 +66,7 @@ def upstream( # noqa: B008
36
66
  def downstream( # noqa: B008
37
67
  ref: str = typer.Argument(..., help="Column reference"), # noqa: B008
38
68
  depth: int = typer.Option(5, "--depth", help="Maximum traversal depth"), # noqa: B008
69
+ raw: bool = typer.Option(False, "--raw", help="Disable noise filtering on results"), # noqa: B008
39
70
  ) -> None:
40
71
  """Trace downstream column lineage."""
41
72
  # Bounds check for depth to prevent performance DoS
@@ -50,6 +81,28 @@ def downstream( # noqa: B008
50
81
  "RETURN dst.id AS id LIMIT 100",
51
82
  {"ref": ref},
52
83
  )
84
+ if not results and len(ref.split(".")) >= 3:
85
+ bare = _bare_ref(ref)
86
+ fallback_results = backend.run_read(
87
+ f"MATCH p=(c:{NodeLabel.COLUMN} {{id: $bare}})"
88
+ f"-[:{RelType.COLUMN_LINEAGE}*1..{depth}]->(dst) "
89
+ "RETURN dst.id AS id LIMIT 100",
90
+ {"bare": bare},
91
+ )
92
+ if fallback_results:
93
+ console.print(
94
+ f"[yellow]Hint:[/yellow] No results for '{ref}'. "
95
+ f"Found {len(fallback_results)} edge(s) under bare name '{bare}'. "
96
+ "The INSERT target may have been indexed without a schema prefix. "
97
+ "Multiple tables with the same unqualified name in different schemas "
98
+ "would all match — re-index with an explicit schema for precise results."
99
+ )
100
+ results = fallback_results
101
+ if not raw:
102
+ from sqlcg.server.noise_filter import NoiseFilter
103
+
104
+ nf = NoiseFilter.from_config() # repo_root=None → falls back to Path.cwd()
105
+ results = _filter_column_results(results, nf)
53
106
  _print_table(results, ["id"])
54
107
 
55
108
 
@@ -106,6 +159,43 @@ def unused(
106
159
  _print_table(results, ["qualified"])
107
160
 
108
161
 
162
+ def _bare_ref(ref: str) -> str:
163
+ """Strip schema prefix from a ref string, keeping table.column.
164
+
165
+ For a 3-part ref ("mart.fact_t.amount") this returns "fact_t.amount".
166
+ For a 2-part ref ("fact_t.amount") this returns the ref unchanged.
167
+ Never uses rsplit — that would yield only the column name for 3-part refs.
168
+ """
169
+ parts = ref.split(".")
170
+ if len(parts) >= 3:
171
+ return ".".join(parts[1:]) # drop schema, keep table.column
172
+ return ref # already bare (no schema prefix)
173
+
174
+
175
+ def _col_id_to_table(col_id: str) -> str:
176
+ """Extract the table-qualified part from a column ID (schema.table.col → schema.table).
177
+
178
+ Column IDs follow the format: schema.table.column or table.column.
179
+ The table part is everything except the last component.
180
+
181
+ Args:
182
+ col_id: A column ID string from the graph.
183
+
184
+ Returns:
185
+ The table-qualified portion (all but the last dotted component).
186
+ """
187
+ parts = col_id.rsplit(".", 1)
188
+ return parts[0] if len(parts) == 2 else col_id
189
+
190
+
191
+ def _filter_column_results(
192
+ results: list[dict],
193
+ nf: NoiseFilter, # type: ignore[name-defined]
194
+ ) -> list[dict]:
195
+ """Filter column-ID result rows by NoiseFilter, dropping rows whose table is noise."""
196
+ return [r for r in results if not nf.is_noise(_col_id_to_table(r["id"]))]
197
+
198
+
109
199
  def _print_table(rows: list[dict], columns: list[str]) -> None:
110
200
  """Print results as a Rich table."""
111
201
  if not rows:
@@ -14,14 +14,24 @@ console = Console()
14
14
  @app.command("table")
15
15
  def find_table( # noqa: B008
16
16
  name: str = typer.Argument(..., help="Table name to search for"), # noqa: B008
17
+ raw: bool = typer.Option(False, "--raw", help="Disable noise filtering on results"), # noqa: B008
17
18
  ) -> None:
18
19
  """Find a table by name."""
20
+ name = name.lower() # graph keys are lowercased at index time (C2 normalization)
19
21
  with get_backend() as backend:
20
22
  results = backend.run_read(
21
23
  f"MATCH (t:{NodeLabel.TABLE}) WHERE t.qualified CONTAINS $name "
22
24
  "RETURN t.qualified AS qualified, t.kind AS kind LIMIT 50",
23
25
  {"name": name},
24
26
  )
27
+ if not raw:
28
+ from sqlcg.server.noise_filter import NoiseFilter
29
+
30
+ nf = NoiseFilter.from_config() # repo_root=None → falls back to Path.cwd()
31
+ ids = [r["qualified"] for r in results]
32
+ kept, _ = nf.filter_nodes(ids)
33
+ kept_set = set(kept)
34
+ results = [r for r in results if r["qualified"] in kept_set]
25
35
  _print_table(results, ["qualified", "kind"])
26
36
 
27
37
 
@@ -30,6 +40,7 @@ def find_column( # noqa: B008
30
40
  ref: str = typer.Argument(..., help="Column reference (table.column)"), # noqa: B008
31
41
  ) -> None:
32
42
  """Find a column by table.column reference."""
43
+ ref = ref.lower() # graph keys are lowercased at index time (C2 normalization)
33
44
  with get_backend() as backend:
34
45
  results = backend.run_read(
35
46
  f"MATCH (c:{NodeLabel.COLUMN}) WHERE c.id CONTAINS $ref RETURN c.id AS id LIMIT 50",
@@ -14,7 +14,7 @@ from rich.progress import (
14
14
  TimeRemainingColumn,
15
15
  )
16
16
 
17
- from sqlcg.core.config import get_backend, get_db_path, get_dialect
17
+ from sqlcg.core.config import KuzuConfig, get_backend, get_db_path, get_dialect
18
18
  from sqlcg.indexer.indexer import Indexer
19
19
 
20
20
  console = Console()
@@ -54,6 +54,9 @@ def index_cmd( # noqa: B008
54
54
  quiet: bool = typer.Option( # noqa: B008
55
55
  False, "--quiet", "-q", help="Suppress summary console output"
56
56
  ),
57
+ verbose: bool = typer.Option( # noqa: B008
58
+ False, "--verbose", "-v", help="Print parse warnings to stderr instead of log file"
59
+ ),
57
60
  debug: bool = typer.Option( # noqa: B008
58
61
  False, "--debug", help="Show detailed log output during indexing"
59
62
  ),
@@ -68,11 +71,40 @@ def index_cmd( # noqa: B008
68
71
  """
69
72
 
70
73
  import logging
74
+ import sys
71
75
 
72
76
  level = logging.DEBUG if debug else logging.CRITICAL
73
77
  logging.getLogger("sqlcg").setLevel(level)
74
78
  logging.getLogger("sqlglot").setLevel(level)
75
79
 
80
+ # Route parse warnings to stderr (--verbose) or to the configured log file.
81
+ sqlcg_log = logging.getLogger("sqlcg")
82
+
83
+ class _CountingHandler(logging.Handler):
84
+ """Counts WARNING+ records emitted during indexing."""
85
+
86
+ def __init__(self) -> None:
87
+ super().__init__(logging.WARNING)
88
+ self.count = 0
89
+
90
+ def emit(self, record: logging.LogRecord) -> None:
91
+ self.count += 1
92
+
93
+ _counter = _CountingHandler()
94
+ sqlcg_log.addHandler(_counter)
95
+
96
+ if verbose:
97
+ _warn_handler: logging.Handler = logging.StreamHandler(sys.stderr)
98
+ _warn_handler.setLevel(logging.WARNING)
99
+ sqlcg_log.addHandler(_warn_handler)
100
+ _warn_log_path = None
101
+ else:
102
+ _warn_log_path = KuzuConfig.from_env().log_path
103
+ _warn_log_path.parent.mkdir(parents=True, exist_ok=True)
104
+ _warn_handler = logging.FileHandler(_warn_log_path)
105
+ _warn_handler.setLevel(logging.WARNING)
106
+ sqlcg_log.addHandler(_warn_handler)
107
+
76
108
  # Set buffer pool size via env var if specified
77
109
  if buffer_pool_size > 0:
78
110
  os.environ["SQLCG_BUFFER_POOL_MB"] = str(buffer_pool_size)
@@ -100,6 +132,16 @@ def index_cmd( # noqa: B008
100
132
  # KuzuDB connection and released the lock by the time we get here.
101
133
  console.print("\n[yellow]Interrupted — no partial graph written. Re-run to index.[/yellow]")
102
134
  raise typer.Exit(130) from None
135
+ finally:
136
+ sqlcg_log.removeHandler(_warn_handler)
137
+ sqlcg_log.removeHandler(_counter)
138
+ _warn_handler.close()
139
+
140
+ if not verbose and not quiet and _counter.count > 0 and _warn_log_path is not None:
141
+ console.print(
142
+ f"[yellow]Parse warnings written to {_warn_log_path} "
143
+ "— use --verbose to show here.[/yellow]"
144
+ )
103
145
 
104
146
 
105
147
  def _run_index(
@@ -1,8 +1,19 @@
1
- """Install sqlcg as an MCP server in Claude Code."""
1
+ """Install sqlcg as an MCP server in Claude Code.
2
+
3
+ Write path (in priority order):
4
+ 1. ``claude mcp add -s user sql-code-graph <cmd> <args>`` — the official
5
+ Claude Code CLI write path (reads from ~/.claude.json under the hood).
6
+ 2. Fallback: write ``~/.claude.json`` directly under mcpServers.user when
7
+ the ``claude`` binary is not found or returns non-zero.
8
+
9
+ The previous target (~/.claude/settings.json) was incorrect — Claude Code does
10
+ NOT read MCP servers from that file. See ARCHITECTURE_REVIEW.md §9.2.
11
+ """
2
12
 
3
13
  import json
4
14
  import os
5
15
  import shutil
16
+ import subprocess
6
17
  import sys
7
18
  from pathlib import Path
8
19
 
@@ -11,7 +22,6 @@ from rich.console import Console
11
22
 
12
23
  console = Console()
13
24
 
14
- _SETTINGS_PATH = Path.home() / ".claude" / "settings.json"
15
25
  _SERVER_KEY = "sql-code-graph"
16
26
 
17
27
 
@@ -28,7 +38,10 @@ def install_cmd(
28
38
  help="Repository root for --scope project (default: current directory).",
29
39
  ),
30
40
  ) -> None:
31
- """Register sqlcg as an MCP server in Claude Code (~/.claude/settings.json).
41
+ """Register sqlcg as an MCP server in Claude Code.
42
+
43
+ Runs ``claude mcp add -s user sql-code-graph <cmd> <args>`` when the
44
+ ``claude`` CLI is on PATH; otherwise writes ~/.claude.json directly.
32
45
 
33
46
  Also provisions a Claude skill file (SKILL.md) at the chosen location.
34
47
  Pass --scope project or --scope global to specify where the skill is written.
@@ -39,68 +52,81 @@ def install_cmd(
39
52
  resolved_scope = _resolve_scope(scope)
40
53
 
41
54
  if shutil.which("sqlcg"):
42
- entry: dict = {"command": "sqlcg", "args": ["mcp", "start"]}
55
+ cmd_parts = ["sqlcg", "mcp", "start"]
43
56
  elif shutil.which("uvx"):
44
- entry = {"command": "uvx", "args": ["sql-code-graph", "mcp", "start"]}
57
+ cmd_parts = ["uvx", "sql-code-graph", "mcp", "start"]
45
58
  else:
46
59
  console.print("[red]Error:[/red] Neither 'sqlcg' nor 'uvx' found on PATH.")
47
60
  raise typer.Exit(1)
48
61
 
49
- settings_path = _SETTINGS_PATH
50
- if settings_path.exists():
62
+ entry: dict = {"command": cmd_parts[0], "args": cmd_parts[1:]}
63
+
64
+ if dry_run:
65
+ claude_bin = shutil.which("claude")
66
+ if claude_bin:
67
+ console.print("[dim]--dry-run: would run:[/dim]")
68
+ console.print(f" claude mcp add -s user {_SERVER_KEY} {' '.join(cmd_parts)}")
69
+ else:
70
+ claude_json = Path.home() / ".claude.json"
71
+ console.print("[dim]--dry-run: would write to ~/.claude.json:[/dim]")
72
+ _preview_claude_json(claude_json, entry)
73
+ _provision_skill(resolved_scope, repo, dry_run=True)
74
+ return
75
+
76
+ # --- Try official claude CLI first ---
77
+ claude_bin = shutil.which("claude")
78
+ if claude_bin:
79
+ proc = subprocess.run(
80
+ ["claude", "mcp", "add", "-s", "user", _SERVER_KEY] + cmd_parts,
81
+ capture_output=True,
82
+ text=True,
83
+ )
84
+ if proc.returncode == 0:
85
+ console.print(f"[green]Configured:[/green] {_SERVER_KEY} via `claude mcp add`")
86
+ console.print("\nRestart Claude Code to pick up the new MCP server.")
87
+ _provision_skill(resolved_scope, repo, dry_run=False)
88
+ return
89
+ # Non-zero: log and fall through to ~/.claude.json fallback
90
+ console.print(
91
+ f"[yellow]Warning:[/yellow] `claude mcp add` returned rc={proc.returncode}; "
92
+ "falling back to ~/.claude.json write."
93
+ )
94
+ if proc.stderr:
95
+ console.print(f"[dim]{proc.stderr.strip()}[/dim]")
96
+
97
+ # --- Fallback: write ~/.claude.json directly ---
98
+ claude_json = Path.home() / ".claude.json"
99
+ if claude_json.exists():
51
100
  try:
52
- settings: dict = json.loads(settings_path.read_text())
101
+ data: dict = json.loads(claude_json.read_text())
53
102
  except (json.JSONDecodeError, OSError, TypeError):
54
103
  console.print(
55
- f"[yellow]Warning:[/yellow] {settings_path} contains invalid JSON — "
104
+ f"[yellow]Warning:[/yellow] {claude_json} contains invalid JSON — "
56
105
  "mcpServers key will be added"
57
106
  )
58
- settings = {}
107
+ data = {}
59
108
  else:
60
- settings = {}
61
-
62
- mcp_servers: dict = settings.setdefault("mcpServers", {})
109
+ data = {}
63
110
 
64
- existing_entry = mcp_servers.get(_SERVER_KEY)
65
- if existing_entry == entry:
66
- cmd_str = f"{entry['command']} {' '.join(entry['args'])}"
67
- console.print(f"[green]Already configured:[/green] {_SERVER_KEY} → {cmd_str}")
68
- # Still provision the skill even when MCP entry already exists
69
- _provision_skill(resolved_scope, repo, dry_run)
111
+ existing = data.get("mcpServers", {}).get("user", {}).get(_SERVER_KEY)
112
+ if existing == entry:
113
+ console.print(f"[green]Already configured:[/green] {_SERVER_KEY} (in ~/.claude.json)")
114
+ _provision_skill(resolved_scope, repo, dry_run=False)
70
115
  return
71
116
 
72
- # Print upgrade notice if switching from uvx to sqlcg
73
- if (
74
- existing_entry
75
- and existing_entry.get("command") == "uvx"
76
- and entry.get("command") == "sqlcg"
77
- ):
78
- console.print(
79
- "[blue]Updating[/blue] MCP entry from [dim]uvx[/dim] to local "
80
- "[green]sqlcg[/green] binary (faster startup). Writing…"
81
- )
82
-
83
- mcp_servers[_SERVER_KEY] = entry
84
-
85
- if dry_run is True:
86
- console.print("[dim]--dry-run: would write:[/dim]")
87
- console.print_json(json.dumps(settings, indent=2))
88
- _provision_skill(resolved_scope, repo, dry_run)
89
- return
117
+ data.setdefault("mcpServers", {}).setdefault("user", {})[_SERVER_KEY] = entry
90
118
 
91
119
  try:
92
- settings_path.parent.mkdir(parents=True, exist_ok=True)
93
- tmp = settings_path.with_suffix(".tmp")
94
- tmp.write_text(json.dumps(settings, indent=2) + "\n")
95
- os.replace(tmp, settings_path)
120
+ tmp = claude_json.with_suffix(".tmp")
121
+ tmp.write_text(json.dumps(data, indent=2) + "\n")
122
+ os.replace(tmp, claude_json)
96
123
  except (OSError, TypeError, AttributeError):
97
124
  pass # Ignore file I/O errors in testing
98
125
 
99
- cmd_str = f"{entry['command']} {' '.join(entry['args'])}"
126
+ cmd_str = " ".join(cmd_parts)
100
127
  console.print(f"[green]Configured:[/green] {_SERVER_KEY} → {cmd_str}")
101
- console.print(f"[dim]Written to {settings_path}[/dim]")
128
+ console.print(f"[dim]Written to {claude_json}[/dim]")
102
129
 
103
- # Note about cold cache if uvx was chosen
104
130
  if entry.get("command") == "uvx":
105
131
  console.print(
106
132
  "[yellow]Note:[/yellow] First startup downloads dependencies (~30s). "
@@ -108,9 +134,20 @@ def install_cmd(
108
134
  )
109
135
 
110
136
  console.print("\nRestart Claude Code to pick up the new MCP server.")
137
+ _provision_skill(resolved_scope, repo, dry_run=False)
138
+
111
139
 
112
- # Provision the skill file
113
- _provision_skill(resolved_scope, repo, dry_run)
140
+ def _preview_claude_json(claude_json: Path, entry: dict) -> None:
141
+ """Print what would be written to ~/.claude.json without touching the file."""
142
+ if claude_json.exists():
143
+ try:
144
+ data: dict = json.loads(claude_json.read_text())
145
+ except (json.JSONDecodeError, OSError, TypeError):
146
+ data = {}
147
+ else:
148
+ data = {}
149
+ data.setdefault("mcpServers", {}).setdefault("user", {})[_SERVER_KEY] = entry
150
+ console.print_json(json.dumps(data, indent=2))
114
151
 
115
152
 
116
153
  def _resolve_scope(scope: str | None) -> str:
sqlcg/cli/commands/mcp.py CHANGED
@@ -22,28 +22,34 @@ def _server_entry() -> dict:
22
22
 
23
23
  @app.command("setup")
24
24
  def mcp_setup(print_only: bool = typer.Option(True, "--print/--write")) -> None:
25
- """Print or write MCP server config JSON."""
25
+ """Print or write MCP server config JSON.
26
+
27
+ --print (default): print the JSON snippet for manual insertion.
28
+ --write: write to ~/.claude.json under mcpServers.user (the correct path
29
+ for Claude Code — not settings.json, which Claude Code does not read
30
+ for MCP servers).
31
+ """
26
32
  entry = _server_entry()
27
33
  if print_only:
28
34
  console.print_json(json.dumps({"mcpServers": {_SERVER_KEY: entry}}, indent=2))
29
35
  return
30
36
 
31
- config_path = Path.home() / ".claude" / "settings.json"
32
- if config_path.exists():
37
+ # Write to ~/.claude.json (correct path for Claude Code MCP servers)
38
+ claude_json = Path.home() / ".claude.json"
39
+ if claude_json.exists():
33
40
  try:
34
- settings: dict = json.loads(config_path.read_text())
41
+ data: dict = json.loads(claude_json.read_text())
35
42
  except json.JSONDecodeError:
36
- settings = {}
43
+ data = {}
37
44
  else:
38
- settings = {}
45
+ data = {}
39
46
 
40
- settings.setdefault("mcpServers", {})[_SERVER_KEY] = entry
47
+ data.setdefault("mcpServers", {}).setdefault("user", {})[_SERVER_KEY] = entry
41
48
 
42
- config_path.parent.mkdir(parents=True, exist_ok=True)
43
- tmp = config_path.with_suffix(".tmp")
44
- tmp.write_text(json.dumps(settings, indent=2) + "\n")
45
- os.replace(tmp, config_path)
46
- console.print(f"[green]Configuration written to[/green] {config_path}")
49
+ tmp = claude_json.with_suffix(".tmp")
50
+ tmp.write_text(json.dumps(data, indent=2) + "\n")
51
+ os.replace(tmp, claude_json)
52
+ console.print(f"[green]Configuration written to[/green] {claude_json}")
47
53
  console.print("Note: Binary is `sqlcg`; PyPI package is `sql-code-graph`.")
48
54
 
49
55
 
@@ -60,6 +60,9 @@ def reindex_cmd( # noqa: B008
60
60
  from sqlcg.core.schema import SCHEMA_VERSION
61
61
  from sqlcg.indexer.indexer import Indexer
62
62
 
63
+ # Resolve to absolute path so ignore-spec and git delta receive an absolute root
64
+ path = path.resolve()
65
+
63
66
  # Resolve dialect
64
67
  if dialect == "auto":
65
68
  dialect = get_dialect(path)
sqlcg/core/config.py CHANGED
@@ -19,6 +19,10 @@ class KuzuConfig(BaseModel):
19
19
  default=0,
20
20
  description="KuzuDB buffer pool size in MB (0 = use KuzuDB default)",
21
21
  )
22
+ log_path: Path = Field(
23
+ default_factory=lambda: Path.home() / ".sqlcg" / "index.log",
24
+ description="Path for parse-warning log file written during indexing",
25
+ )
22
26
 
23
27
  @classmethod
24
28
  def from_env(cls) -> "KuzuConfig":
@@ -29,9 +33,11 @@ class KuzuConfig(BaseModel):
29
33
  """
30
34
  env_path = os.getenv("SQLCG_DB_PATH")
31
35
  env_buf = os.getenv("SQLCG_BUFFER_POOL_MB")
36
+ env_log = os.getenv("SQLCG_LOG_PATH")
32
37
  return cls(
33
38
  db_path=Path(env_path) if env_path else Path.home() / ".sqlcg" / "graph.db",
34
39
  buffer_pool_size_mb=int(env_buf) if env_buf else 0,
40
+ log_path=Path(env_log) if env_log else Path.home() / ".sqlcg" / "index.log",
35
41
  )
36
42
 
37
43
 
@@ -138,6 +144,7 @@ def get_noise_filter_patterns(path: Path) -> list[str]:
138
144
  """
139
145
  default_patterns = [
140
146
  "*_bck",
147
+ "*_bck_*", # catches mid-suffix variants e.g. foo_bck_us39553, bar_bck_archive
141
148
  "*_bck_us",
142
149
  "*_bck_[0-9]*",
143
150
  "*_backup",
@@ -93,10 +93,14 @@ def _classify_error(msg: str) -> str:
93
93
  if not msg:
94
94
  return "other"
95
95
 
96
- # Timeout errors
96
+ # Timeout errors (including pool-path poison retries)
97
97
  if msg.startswith("timeout:"):
98
98
  return "timeout"
99
99
 
100
+ # Poison-retry: file repeatedly timed out in pool worker; treat as timeout bucket
101
+ if msg.startswith("skipped:poison"):
102
+ return "timeout"
103
+
100
104
  # Skip markers
101
105
  if msg.startswith("col_lineage_skip:"):
102
106
  if "pure_ddl_file" in msg:
@@ -51,6 +51,7 @@ def git_name_status_delta(root: Path, old_sha: str, new_sha: str) -> Delta | Non
51
51
  unknown SHA, shallow clone, or git not available). Callers MUST fall
52
52
  back to a full index_repo when None is returned.
53
53
  """
54
+ root = root.resolve() # guard: caller may pass a relative path (e.g. Path("."))
54
55
  try:
55
56
  result = subprocess.run(
56
57
  ["git", "diff", "--name-status", old_sha, new_sha],
sqlcg/indexer/pool.py CHANGED
@@ -285,7 +285,9 @@ class HardKillPool:
285
285
  tidx = queue.pop(0)
286
286
  path = tasks[tidx].get("path", "")
287
287
  if kill_counts.get(path, 0) >= poison_retries:
288
- results[tidx] = _timeout_file(path, self._dialect, poison=True)
288
+ results[tidx] = _timeout_file(
289
+ path, self._dialect, timeout_s=per_task_timeout, poison=True
290
+ )
289
291
  logger.warning("Skipping %s — poisoned after %d kills", path, poison_retries)
290
292
  if on_result is not None:
291
293
  on_result()
@@ -375,7 +377,7 @@ class HardKillPool:
375
377
  slot,
376
378
  kill_counts[path],
377
379
  )
378
- results[tidx] = _timeout_file(path, self._dialect)
380
+ results[tidx] = _timeout_file(path, self._dialect, timeout_s=per_task_timeout)
379
381
  if on_result is not None:
380
382
  on_result()
381
383
  self._respawn(w)
@@ -486,11 +488,14 @@ class HardKillPool:
486
488
  def _timeout_file(
487
489
  path: str,
488
490
  dialect: str | None,
491
+ timeout_s: float = 0.0,
489
492
  poison: bool = False,
490
493
  ) -> ParsedFile:
491
494
  pf = ParsedFile(path=Path(path), dialect=dialect)
492
- msg = "skipped:poison" if poison else "timeout"
493
- pf.errors.append(f"{msg} file={Path(path).name}")
495
+ if poison:
496
+ pf.errors.append(f"skipped:poison file={Path(path).name}")
497
+ else:
498
+ pf.errors.append(f"timeout:{timeout_s:.0f}s file={Path(path).name}")
494
499
  return pf
495
500
 
496
501
 
sqlcg/parsers/base.py CHANGED
@@ -688,8 +688,126 @@ class SqlParser(ABC):
688
688
  except Exception:
689
689
  body_scope = None
690
690
 
691
- # Extract output columns
692
- for col_expr in col_expressions:
691
+ # INSERT positional column-list mapping (#25 fix).
692
+ # Compute the positional_col_names skip-set BEFORE the main column loop
693
+ # so the main loop can skip positions already handled here.
694
+ #
695
+ # When an INSERT has an explicit column list (INSERT INTO t (c1, c2) SELECT ...),
696
+ # the target column name at position idx is authoritative — the SELECT alias is
697
+ # cosmetic for the SELECT and meaningless to the INSERT target. This block
698
+ # overrides alias attribution for ALL positions (aliased or not).
699
+ #
700
+ # Guards applied here mirror the main column loop to preserve skip markers:
701
+ # - Star expressions → emit col_lineage_skip:star, register pos, skip sg_lineage
702
+ # - Pure-literal (no Column descendant) → register pos, skip sg_lineage (silent)
703
+ # - Unaliased non-Column (func/arith/CASE) → emit col_lineage_skip:func_fallback,
704
+ # register pos, skip sg_lineage
705
+ # - Plain Column / aliased expression → call sg_lineage (the #25 happy path)
706
+ #
707
+ # CLAUDE.md invariant: body_no_with = body.copy() + strip-WITH happens ONCE
708
+ # before the inner loop; only the single projection is swapped per column.
709
+ positional_col_names: dict[int, str] = {} # idx → insert_col_name
710
+ if isinstance(stmt, exp.Insert) and isinstance(stmt.this, exp.Schema):
711
+ insert_cols_list = [c.name for c in stmt.this.expressions]
712
+ # Build the WITH-stripped body ONCE here, before any per-column loop.
713
+ # Only the single projection is swapped per column below.
714
+ body_no_with = body.copy()
715
+ body_no_with.set("with_", None)
716
+ for _ins_idx, _col_expr in enumerate(col_expressions):
717
+ if _ins_idx >= len(insert_cols_list):
718
+ break
719
+ _insert_col = insert_cols_list[_ins_idx]
720
+ if not _insert_col:
721
+ continue
722
+ # Register position first so the main loop always skips it,
723
+ # regardless of which guard fires below.
724
+ positional_col_names[_ins_idx] = _insert_col
725
+
726
+ # Guard 1: Star projection — emit skip marker (same as main loop).
727
+ _inner_for_guard = (
728
+ _col_expr.this if isinstance(_col_expr, exp.Alias) else _col_expr
729
+ )
730
+ if isinstance(_inner_for_guard, exp.Star) or (
731
+ isinstance(_inner_for_guard, exp.Column)
732
+ and isinstance(_inner_for_guard.this, exp.Star)
733
+ ):
734
+ _qualifier = (
735
+ _inner_for_guard.table
736
+ if isinstance(_inner_for_guard, exp.Column)
737
+ else None
738
+ )
739
+ out.errors.append(f"col_lineage_skip:star:{_qualifier or '<unqualified>'}")
740
+ continue # no sg_lineage for star
741
+
742
+ # Guard 2: Pure-literal — no Column descendants, nothing to trace.
743
+ if not list(_col_expr.find_all(exp.Column)):
744
+ continue # silent skip, no sg_lineage
745
+
746
+ # NOTE: do NOT emit func_fallback here for unaliased non-Column
747
+ # expressions (functions, arithmetic, CASE …). The main loop emits
748
+ # func_fallback for such expressions because a plain SELECT/CREATE VIEW
749
+ # gives them no output column name. The positional INSERT column list
750
+ # DOES supply that name (_insert_col): below we wrap the expression as
751
+ # Alias(expr, _insert_col) and let sg_lineage trace through it — exactly
752
+ # as the aliased form (e.g. `DATE(col) AS a`) already resolves. Guard 2
753
+ # (above) already dropped genuinely-untraceable pure-literal expressions
754
+ # (no Column descendant). Skipping column-containing expressions here would
755
+ # make the #25 positional feature do its work and then discard the result,
756
+ # dropping real lineage edges (regressed by eb19f29; broke COALESCE).
757
+
758
+ # Positional mapping always wins — replace (or add) the alias with the
759
+ # INSERT target column name regardless of SELECT alias.
760
+ if _col_expr.alias and _col_expr.alias != _insert_col:
761
+ self._log.debug(
762
+ "INSERT positional override: SELECT alias %r → INSERT col %r"
763
+ " at position %d",
764
+ _col_expr.alias,
765
+ _insert_col,
766
+ _ins_idx,
767
+ )
768
+ # If the expression is already an Alias(inner, old_alias), unwrap it
769
+ # before re-wrapping — otherwise we produce Alias(Alias(inner, x), c1)
770
+ # which serialises as "inner AS x AS c1" (syntax error).
771
+ _inner = _col_expr.this if isinstance(_col_expr, exp.Alias) else _col_expr
772
+ _aliased = exp.Alias(this=_inner.copy(), alias=_insert_col)
773
+ body_no_with.set("expressions", [_aliased])
774
+ _patched_sql = body_no_with.sql(dialect=self.DIALECT)
775
+ # Pass sources= (not scope=) here: the patched SQL is a freshly
776
+ # serialised string — the scope was built from the original body AST
777
+ # and does not correspond to this new string.
778
+ #
779
+ # Use `sources` (the cross-statement temp/CTAS map), NOT
780
+ # `combined_sources`. combined_sources additionally carries the
781
+ # SAME-STATEMENT CTE bodies (added above). Since body_no_with strips
782
+ # the WITH clause from the patched SQL, those CTE names become opaque
783
+ # source relations — passing their bodies as sources= would expand them
784
+ # away, collapsing intermediate CTE→target hops into the deepest source
785
+ # (regressed by eb19f29; broke the MA_AANTAL_OP_ORDER anchor link 5).
786
+ # Cross-statement temps (e.g. CREATE TEMP TABLE t) live in `sources`
787
+ # and SHOULD still expand (E36 multi-temp: t → src).
788
+ try:
789
+ _root = sg_lineage(
790
+ _insert_col,
791
+ _patched_sql,
792
+ dialect=self.DIALECT,
793
+ sources=sources or {},
794
+ )
795
+ if _root:
796
+ _new_edges = self._lineage_node_to_edges(
797
+ _root,
798
+ dst_col_name=_insert_col,
799
+ dst_table=dst_table,
800
+ path=path,
801
+ out=out,
802
+ )
803
+ edges.extend(_new_edges)
804
+ except Exception:
805
+ pass
806
+
807
+ # Extract output columns — skip positions handled by the positional INSERT block
808
+ for loop_idx, col_expr in enumerate(col_expressions):
809
+ if loop_idx in positional_col_names:
810
+ continue # positional INSERT block already emitted this column
693
811
  # Skip star projections — sg_lineage requires a concrete column name.
694
812
  if isinstance(col_expr, exp.Star) or (
695
813
  isinstance(col_expr, exp.Column) and isinstance(col_expr.this, exp.Star)
@@ -919,47 +1037,6 @@ class SqlParser(ABC):
919
1037
  cte_col_name,
920
1038
  )
921
1039
 
922
- # INSERT column-list aliasing (T-07-02 link 5).
923
- # When an INSERT has an explicit column list and the SELECT expression has
924
- # no alias (e.g. SELECT SUM(x) FROM cte), the INSERT column at the same
925
- # position provides the destination col name. Stripping the WITH clause
926
- # stops sg_lineage at the CTE name boundary (doesn't expand into bodies).
927
- if isinstance(stmt, exp.Insert) and isinstance(stmt.this, exp.Schema):
928
- insert_cols = [c.name for c in stmt.this.expressions]
929
- # Build the WITH-stripped body ONCE before the loop and only swap its
930
- # single projection per column (regressed in 4234e5d, which moved the
931
- # full-body body.copy() inside the loop → O(N_cols) full-body deepcopies
932
- # for wide INSERT ... SELECT). Stripping WITH stops sg_lineage at the CTE
933
- # name boundary.
934
- body_no_with = body.copy()
935
- body_no_with.set("with_", None)
936
- for idx, col_expr in enumerate(col_expressions):
937
- if idx >= len(insert_cols):
938
- break
939
- if col_expr.alias:
940
- continue # already handled by the main col loop
941
- insert_col = insert_cols[idx]
942
- if not insert_col:
943
- continue
944
- # Patch the shared body with this column's aliased expression so
945
- # sg_lineage can trace it to the INSERT column name.
946
- aliased = exp.Alias(this=col_expr.copy(), alias=insert_col)
947
- body_no_with.set("expressions", [aliased])
948
- patched_sql = body_no_with.sql(dialect=self.DIALECT)
949
- try:
950
- root = sg_lineage(insert_col, patched_sql, dialect=self.DIALECT)
951
- if root:
952
- new_edges = self._lineage_node_to_edges(
953
- root,
954
- dst_col_name=insert_col,
955
- dst_table=dst_table,
956
- path=path,
957
- out=out,
958
- )
959
- edges.extend(new_edges)
960
- except Exception:
961
- pass
962
-
963
1040
  except Exception as exc:
964
1041
  self._log.debug(
965
1042
  "column lineage extraction failed for entire statement: file=%s error=%s",
sqlcg/server/server.py CHANGED
@@ -1,35 +1,76 @@
1
1
  """MCP server for SQL Code Graph.
2
2
 
3
3
  Exposes FastMCP tools for lineage queries, pattern search, and indexing.
4
- MCP protocol uses stdout for message transport, so this module redirects
5
- stdout to stderr to prevent user logs from corrupting the protocol stream.
4
+ MCP protocol uses stdout (fd 1) for JSON-RPC message transport. This module
5
+ captures fd 1 as a raw binary buffer BEFORE any logging redirection so that
6
+ the captured buffer can be passed explicitly to stdio_server(). This ensures
7
+ JSON-RPC frames always go to fd 1 regardless of what sys.stdout points to
8
+ at call time.
9
+
10
+ Ordering invariant (must not change):
11
+ 1. os.dup(1) → _real_stdout_buffer (first — before everything)
12
+ 2. from mcp.server import FastMCP (module-level import)
13
+ 3. mcp = FastMCP("SQL Code Graph") (module-level; tools.py registers here)
14
+ 4. main() calls _configure_mcp_logging() (not at module scope)
6
15
  """
7
16
 
17
+ import os
8
18
  import sys
9
19
 
10
- from dotenv import load_dotenv
11
- from mcp.server import FastMCP
20
+ # Capture the real fd 1 binary stream FIRST — before _configure_mcp_logging()
21
+ # (which replaces sys.stdout) AND before FastMCP("SQL Code Graph") construction.
22
+ # stdio_server() receives this explicitly so JSON-RPC frames go to fd 1
23
+ # regardless of what sys.stdout points to afterward.
24
+ # Guards against the v1.0.0/v1.0.1 regression where frames went to fd 2.
25
+ _real_stdout_buffer = os.fdopen(os.dup(1), "wb", buffering=0)
12
26
 
13
- from sqlcg.utils.logging import getLogger
27
+ from dotenv import load_dotenv # noqa: E402
28
+ from mcp.server import FastMCP # noqa: E402
29
+
30
+ from sqlcg.utils.logging import getLogger # noqa: E402
14
31
 
15
32
  logger = getLogger(__name__)
16
33
 
34
+ # Create FastMCP instance at module scope so tools.py can import and register with it.
35
+ # This is safe because _real_stdout_buffer has already captured fd 1 above.
36
+ mcp = FastMCP("SQL Code Graph")
37
+
17
38
 
18
39
  def _configure_mcp_logging() -> None:
19
- """Redirect sys.stdout to sys.stderr to protect MCP protocol.
40
+ """Redirect sys.stdout to sys.stderr and configure logging to stderr.
20
41
 
21
- MCP uses stdout for JSON-RPC messages. Any user print() or log output
22
- to stdout would corrupt the protocol. This function must be called before
23
- mcp.run() and before any code that might print to stdout.
42
+ sys.stdout is replaced with sys.stderr so that any stray print() call
43
+ does not pollute fd 1 (reserved for MCP JSON-RPC frames).
44
+ The real fd 1 binary stream is captured in _real_stdout_buffer at module
45
+ top before this replacement and passed explicitly to stdio_server().
46
+
47
+ Must be called inside main(), not at module scope, so that
48
+ _real_stdout_buffer captures fd 1 before the redirect.
24
49
  """
50
+ import logging
51
+
25
52
  sys.stdout = sys.stderr
53
+ logging.basicConfig(stream=sys.stderr, level=logging.WARNING)
26
54
 
27
55
 
28
- # Protect stdout before importing FastMCP (which may emit output during import)
29
- _configure_mcp_logging()
56
+ async def _run_stdio_async_with_real_stdout() -> None:
57
+ """Run the MCP server loop with JSON-RPC frames explicitly on fd 1.
30
58
 
31
- # Create FastMCP instance at module scope so tools.py can import and register with it
32
- mcp = FastMCP("SQL Code Graph")
59
+ Bypasses FastMCP.run_stdio_async() (which uses sys.stdout at call time)
60
+ and drives the server loop directly with the captured _real_stdout_buffer.
61
+ """
62
+ from io import TextIOWrapper
63
+
64
+ import anyio
65
+ from mcp.server.stdio import stdio_server
66
+
67
+ stdout_text = TextIOWrapper(_real_stdout_buffer, encoding="utf-8", line_buffering=False)
68
+ async with stdio_server(stdout=anyio.wrap_file(stdout_text)) as (read_stream, write_stream):
69
+ await mcp._mcp_server.run(
70
+ read_stream,
71
+ write_stream,
72
+ mcp._mcp_server.create_initialization_options(),
73
+ )
33
74
 
34
75
 
35
76
  def main(db_path: str | None = None) -> None:
@@ -38,10 +79,13 @@ def main(db_path: str | None = None) -> None:
38
79
  Args:
39
80
  db_path: Path to KùzuDB database. If None, uses SQLCG_DB_PATH env var
40
81
  or ~/.sqlcg/graph.db (via get_db_path in tools module).
41
-
42
- Raises:
43
- RuntimeError: If tools fail to initialize or FastMCP server fails.
44
82
  """
83
+ import anyio
84
+
85
+ # Must be first — redirects sys.stdout → sys.stderr so stray prints don't
86
+ # corrupt fd 1. _real_stdout_buffer was already captured at module top.
87
+ _configure_mcp_logging()
88
+
45
89
  load_dotenv()
46
90
 
47
91
  # Import tools module to trigger tool registration via @mcp.tool() decorators
@@ -50,8 +94,7 @@ def main(db_path: str | None = None) -> None:
50
94
  # Initialize the backend singleton used by all tools
51
95
  sqlcg.server.tools.init_backend(db_path)
52
96
 
53
- # Run the MCP server event loop, ensuring backend is closed on shutdown
54
97
  try:
55
- mcp.run()
98
+ anyio.run(_run_stdio_async_with_real_stdout)
56
99
  finally:
57
100
  sqlcg.server.tools.shutdown_backend()
sqlcg/server/tools.py CHANGED
@@ -183,6 +183,19 @@ def _assert_indexed(db: GraphBackend) -> None:
183
183
  )
184
184
 
185
185
 
186
+ def _bare_ref(ref: str) -> str:
187
+ """Strip schema prefix from a ref string, keeping table.column.
188
+
189
+ For a 3-part ref ("mart.fact_t.amount") this returns "fact_t.amount".
190
+ For a 2-part ref ("fact_t.amount") this returns the ref unchanged.
191
+ Never uses rsplit — that would yield only the column name for 3-part refs.
192
+ """
193
+ parts = ref.split(".")
194
+ if len(parts) >= 3:
195
+ return ".".join(parts[1:]) # drop schema, keep table.column
196
+ return ref # already bare (no schema prefix)
197
+
198
+
186
199
  def _parse_column_ref(col_ref: str) -> tuple[str, str]:
187
200
  """Parse column reference "table.column" or "catalog.db.table.column".
188
201
 
@@ -554,9 +567,54 @@ def trace_column_lineage(table_col: str, max_depth: int | None = None) -> Lineag
554
567
 
555
568
  mermaid = _build_mermaid(col_id, edges) if edges else None
556
569
 
570
+ # Bare-name fallback: when the primary query returns empty and the ref has a
571
+ # schema component (3+ parts), retry with the schema prefix stripped.
572
+ # This handles unqualified INSERT targets indexed without a schema prefix.
573
+ bare_fallback_used = False
574
+ if not lineage and len(table_col.split(".")) >= 3:
575
+ bare = _bare_ref(table_col)
576
+ bare_queue: deque[tuple[str, int]] = deque([(bare, 0)])
577
+ bare_visited: set[str] = set()
578
+ bare_emitted: set[str] = set()
579
+ while bare_queue:
580
+ current_id, depth = bare_queue.popleft()
581
+ if current_id in bare_visited or (max_depth is not None and depth > max_depth):
582
+ continue
583
+ if len(bare_visited) >= max_nodes:
584
+ break
585
+ bare_visited.add(current_id)
586
+ rows = db.run_read(TRACE_COLUMN_LINEAGE_QUERY, {"id": current_id})
587
+ for row in rows:
588
+ node_id = row["id"]
589
+ edges.append((node_id, current_id, row.get("transform") or "SELECT"))
590
+ if node_id not in bare_visited and node_id not in bare_emitted:
591
+ bare_emitted.add(node_id)
592
+ lineage.append(
593
+ LineageNode(
594
+ name=row.get("col_name", ""),
595
+ kind="column",
596
+ table=row.get("table_qualified"),
597
+ file=None,
598
+ confidence=row.get("confidence"),
599
+ )
600
+ )
601
+ if node_id not in bare_visited:
602
+ bare_queue.append((node_id, depth + 1))
603
+ if lineage:
604
+ bare_fallback_used = True
605
+ mermaid = _build_mermaid(bare, edges) if edges else None
606
+
557
607
  # Populate hint if result is empty (Step 4.1)
558
608
  hint = None
559
- if not lineage:
609
+ if bare_fallback_used:
610
+ bare = _bare_ref(table_col)
611
+ hint = (
612
+ f"No results for '{table_col}'. Found lineage under bare name '{bare}'. "
613
+ "The INSERT target may have been indexed without a schema prefix. "
614
+ "Multiple tables with the same unqualified name in different schemas "
615
+ "would all match — re-index with an explicit schema for precise results."
616
+ )
617
+ elif not lineage:
560
618
  hint = (
561
619
  "No lineage found. Ensure the column reference includes the schema prefix "
562
620
  "(e.g., ba.table_name.column_name). Check that 'sqlcg db info' shows "
sqlcg/utils/ignore.py CHANGED
@@ -14,6 +14,7 @@ def load_ignore_spec(root: Path) -> pathspec.PathSpec:
14
14
  Returns:
15
15
  PathSpec object for matching ignore patterns
16
16
  """
17
+ root = Path(root).resolve() # guard: caller may pass a relative path (e.g. Path("."))
17
18
  ignore_file = root / ".sqlcgignore"
18
19
  if ignore_file.exists():
19
20
  patterns = ignore_file.read_text().splitlines()
@@ -33,4 +34,5 @@ def is_ignored(path: Path, root: Path, spec: pathspec.PathSpec) -> bool:
33
34
  Returns:
34
35
  True if the path matches any ignore pattern
35
36
  """
37
+ root = Path(root).resolve() # guard: ensure root is absolute before relative_to()
36
38
  return spec.match_file(str(path.relative_to(root)))