sql-code-graph 1.0.0__py3-none-any.whl → 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sql-code-graph
3
- Version: 1.0.0
3
+ Version: 1.0.2
4
4
  Summary: SQL code graph analyzer and lineage tracer
5
5
  Project-URL: Homepage, https://github.com/Warhorze/sql-code-graph
6
6
  Project-URL: Repository, https://github.com/Warhorze/sql-code-graph
@@ -1,22 +1,22 @@
1
- sqlcg/__init__.py,sha256=2lT2oiKX19arg1oTOFf13dXA3qyyQNpRevdvKHZIOp4,115
1
+ sqlcg/__init__.py,sha256=hGOhwTAVTaRm7PjbaSQVCLvnF7rOGZZNdMqv0IoQdYg,115
2
2
  sqlcg/__main__.py,sha256=1YoFLcqEgTwYq1J3TbUwpkdG0zeeLIf2fJvwWI-CLFU,109
3
3
  sqlcg/cli/__init__.py,sha256=W8fD0LpMq2xm_5WKGNMvJh2WBL1ho5E8hUeAqXQYT1g,28
4
4
  sqlcg/cli/main.py,sha256=WmdTjsOlz1ozi2Y3Aq4ezR_FCRl-Lc1YOKw3_d48dlY,1650
5
5
  sqlcg/cli/commands/__init__.py,sha256=oSHtr6VD-jNubOjuCQyZj2tBppjMEpQDh-IGQ8of9eA,30
6
- sqlcg/cli/commands/analyze.py,sha256=kfcySSjc_UhSsOsJg7o5VD7TH4v72KVzol7Cdn2EuOU,4127
6
+ sqlcg/cli/commands/analyze.py,sha256=PFQD29_VAtJ-wghYLsHRINp8VlnOVl1WlOdbAdcWz1E,8091
7
7
  sqlcg/cli/commands/db.py,sha256=Yd4ZDz1BFwjO4Lyt3NefQnowkjdUxFDFmsPykBVH2Pk,6518
8
- sqlcg/cli/commands/find.py,sha256=4cEWQ0otxNIzzwwzZ0WB_Tms0EoKzcFfhB3FJt8Q5V4,2025
8
+ sqlcg/cli/commands/find.py,sha256=P2OFI0O_-F4W5-oy5KObXUHI7gNTkJRtDSZ59xTKE9Y,2672
9
9
  sqlcg/cli/commands/gain.py,sha256=bOvia7CVla_fESrDEdftYze8Mm0xDio3SpCzIyoXg7A,8925
10
10
  sqlcg/cli/commands/git.py,sha256=96hmWYd861FC8RZqPQ_eBG8yLXSXaB9SLxmuwx00nWU,3347
11
- sqlcg/cli/commands/index.py,sha256=6f-kaoY5roY4DDvEOi_HrDnBG9Jrqy0_A47gsxZsNUQ,7421
12
- sqlcg/cli/commands/install.py,sha256=mNVXdGlQ4JtCaaibuzU-inf519T97mC-Nj9K-G2gMQY,7525
13
- sqlcg/cli/commands/mcp.py,sha256=H1j6b5Tqr5VXja2GafgD5sJD6hZ5rsgfPwIikK1PZqc,1903
14
- sqlcg/cli/commands/reindex.py,sha256=iZXxYGI2m2wxkvIA1mB9uvOEp66QaT5zF5TGd0OpqlU,6275
11
+ sqlcg/cli/commands/index.py,sha256=Sgrg5MaQWfQzbX3e3Wcsfd8BEWDGuBm5l5vynpJsRzA,9801
12
+ sqlcg/cli/commands/install.py,sha256=KNABvrLbamPyYnmnVdCaM_MNezbDc-pr6IkignCWI8k,9186
13
+ sqlcg/cli/commands/mcp.py,sha256=cfi7D-RgEPUKdfUbsJC2iKImKOnHQvWxCLfwYIPdhdE,2174
14
+ sqlcg/cli/commands/reindex.py,sha256=J9gpaxSzJ1mTdOJWh7WSLskbRF9f_2EMWnUFF4VOtVU,6387
15
15
  sqlcg/cli/commands/report.py,sha256=JU0qjyMxwOukE7bN3XvvIzOI7zMg_Gsnvk_8F6pKNpA,4915
16
16
  sqlcg/cli/commands/uninstall.py,sha256=IYwQaqnMmmzW0Nlls40wD-L3tVkMgKIMRXUkcXPMUc4,9398
17
17
  sqlcg/cli/commands/watch.py,sha256=7N6c-QuvxAEGHzDZ0C3CU2BkHSraZW9YtgoFnz7SaQo,2373
18
18
  sqlcg/core/__init__.py,sha256=uNsJCrCMVWVT80sHPtI_f39BYqIf5N0i6LSq8x8HsyI,283
19
- sqlcg/core/config.py,sha256=em9gYtau2hu-scWzZk4CSZh4L8r9ZymgmH_2BspqsQw,9773
19
+ sqlcg/core/config.py,sha256=YCq4OayvBSNXsYtOh3yZ-W6fyJBLwYunORDo2TPCU9s,10179
20
20
  sqlcg/core/graph_db.py,sha256=gFiHjfVeRHp2FS3yRThDgCWFkugOQD065IvEqN6apg4,7881
21
21
  sqlcg/core/jobs.py,sha256=Je-fCdSKRgiSsv1W8SgNAlp36a7t7-pJZ-qKPbka9OE,3298
22
22
  sqlcg/core/kuzu_backend.py,sha256=ziHt-AB9sEZY7qB8whseWFicbTfOZaNOxcNVKhjii5Y,16587
@@ -27,10 +27,10 @@ sqlcg/core/schema.cypher,sha256=UWYsPMRgkn6HOlPZ3rl6BfY5hzKQKP5RGPaZg4NTZFY,2515
27
27
  sqlcg/core/schema.py,sha256=9jBgJwuvfjLq2xC5B0NUyZZYxhqTb0LO0YzxcPM-gVM,1301
28
28
  sqlcg/indexer/__init__.py,sha256=Wh20Unz2OHs1oIyWLrpurPAasF0BET2g4iXtNk7mh2U,56
29
29
  sqlcg/indexer/dbt_adapter.py,sha256=EB5x1WU5Z9d-I97ADDj88S_hG1C4z4nbrv8JUCzXfy8,686
30
- sqlcg/indexer/error_classify.py,sha256=eWmc9WdOFe9kY_DMgKL0vv9gfcKnFw8e8U7cpUUw9wU,5139
31
- sqlcg/indexer/git_delta.py,sha256=V7WiNgiYPRo97K_mB3ymkJDZGoFExqwTZ2ut0Nqua5o,4383
32
- sqlcg/indexer/indexer.py,sha256=Jes0SybIDXLWQlWbRrDAbxVfJ7OsdS3PDAVSoRcv3Tc,50605
33
- sqlcg/indexer/pool.py,sha256=Q9DQmgUsSeKL1S-gNAzMbCNPGI9WsG6Nmt_noh_O8M8,16069
30
+ sqlcg/indexer/error_classify.py,sha256=MYjPVprwT-ARPjBCyCzu2F9DSrZfnTVtVIoBgm8s4H8,5329
31
+ sqlcg/indexer/git_delta.py,sha256=P-QM4vnVURT2KLiE6u3cQynRUF-mTH13cbB4I20YHPQ,4468
32
+ sqlcg/indexer/indexer.py,sha256=0B0BCUaLPdV9XtlCzhqR3hwHyD3w83o-tYG7yNr18Yo,50507
33
+ sqlcg/indexer/pool.py,sha256=BTYx-pBe6zwUG89MHh0X7nzGNVlsHN-GjovYKanVI1s,18553
34
34
  sqlcg/indexer/walker.py,sha256=C__JuDcTzKxFqVjGFRr5cj9hgxvf8zffTz-0HMn1qTY,1746
35
35
  sqlcg/indexer/watcher.py,sha256=mJQq1LASRLKKwhz0WhCUWPLLqyPR2_-FD_8efYU6gE8,8442
36
36
  sqlcg/lineage/__init__.py,sha256=Da1DlYwtK13WHv_RnHjAtNkHTOuFbhxqCjT1Le7DsWM,46
@@ -40,7 +40,7 @@ sqlcg/metrics/__init__.py,sha256=hLJ6wm4St8qqYwKh3o9QG7lcEt1BEYM31ccqO9tGpIg,133
40
40
  sqlcg/metrics/store.py,sha256=BaMf7QYTmYMlX_Jzi1GNU8R2sMVkWdn07f-ZSndtcNk,8879
41
41
  sqlcg/parsers/__init__.py,sha256=AamA8wBbDZV9_zEtZCI4Hyen5UAVKHmBwjTghTt2PZE,785
42
42
  sqlcg/parsers/ansi_parser.py,sha256=KruZn5CYjpktKmMRVWackshRI_AR6ehc-ReCsDeWNkQ,14321
43
- sqlcg/parsers/base.py,sha256=aw-gueAMdt551peUY0g7lWbswQLPWx0FDCK4RDfUjDE,43205
43
+ sqlcg/parsers/base.py,sha256=cSHlXwiSNu77TZI6_p1nRevbRTcBc1t5v8N_aKR7uB4,49117
44
44
  sqlcg/parsers/bigquery_parser.py,sha256=mOnWTfXB_Dp4JwFE1PVYOB6CDPf5nYE0Dea8kJCl9uQ,2827
45
45
  sqlcg/parsers/postgres_parser.py,sha256=lYfUpQY6j4Qm7ndXBtXbgPoGzYqYddWt5YeFnWKdA6I,946
46
46
  sqlcg/parsers/registry.py,sha256=LXy1F6rqQI6VdxpRvZg_tNpoEucW3mXZHYBMlMONbX4,1496
@@ -50,14 +50,14 @@ sqlcg/server/__init__.py,sha256=n4wuNE7xyJIJxJZBtmtdccCMQfvTdF-IqIaZVbC4FC4,35
50
50
  sqlcg/server/exceptions.py,sha256=EONw34icOByCTpppSQrvQBW6asc4hfqaGDCAFjv96II,469
51
51
  sqlcg/server/models.py,sha256=dv4SM_o-aY8kUFIbCtj0l8ceMsfyvQtXCWPm4Ek_-14,16432
52
52
  sqlcg/server/noise_filter.py,sha256=idSBGgdKWWccJdpOo9qgbM2350Oew-2l5W6Yc9GYQqY,6337
53
- sqlcg/server/server.py,sha256=2EwKGehcIdKqCjZagbv8VrvnVCp-D5Lh-z38FFHRcN8,1723
53
+ sqlcg/server/server.py,sha256=mDAW_Zmk3Sp2sApw3Gw3veCqJe7waw-sioQyKZBn9ng,3774
54
54
  sqlcg/server/skill.py,sha256=siAtrRdFHQnASe9nl33MvkTXXt9EgCB8id5i9AUq4XU,10718
55
- sqlcg/server/tools.py,sha256=Jh16fefXMmw0mYUejoIMAXlJoPAaQoUbgrCghsmHNLk,54892
55
+ sqlcg/server/tools.py,sha256=mSoYZRI7F5ZmdTcG-BnY6ULzrz3Y7qIFe3cHTVWVyMs,57785
56
56
  sqlcg/utils/__init__.py,sha256=--iqt5ThTXmT8Wz7da8hs3n0zDfYPl8P-z5OgRJ_77E,154
57
57
  sqlcg/utils/hashing.py,sha256=H25-sYfxHKb3_IERFnHyAIYNiXN470Oqo5sJT_D3YOA,438
58
- sqlcg/utils/ignore.py,sha256=NfInsHPGubfKFJQraH-wE7ATPb5Be_Igu5mIh7p21cU,973
58
+ sqlcg/utils/ignore.py,sha256=wJjwa0mjnQ_xJExOUxk25y00g065XmmzJapqV3ifD5o,1151
59
59
  sqlcg/utils/logging.py,sha256=u0fCmYsLj9o81vawm3xZTHaw68GQYVm7JxG-gP81u8A,840
60
- sql_code_graph-1.0.0.dist-info/METADATA,sha256=HQdFHBzEKTlPlqnwRCT9n0iKrmWqkmM5mhM3fOi5lvo,12806
61
- sql_code_graph-1.0.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
62
- sql_code_graph-1.0.0.dist-info/entry_points.txt,sha256=Wfe49sVzV9p4eVFGo5RxcV-frr3HOP0yzzst8JBxQLQ,46
63
- sql_code_graph-1.0.0.dist-info/RECORD,,
60
+ sql_code_graph-1.0.2.dist-info/METADATA,sha256=aikAv-KoUOGfgYo3-htWLyq61x1PE6bC1Onn_TNAuvE,12806
61
+ sql_code_graph-1.0.2.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
62
+ sql_code_graph-1.0.2.dist-info/entry_points.txt,sha256=Wfe49sVzV9p4eVFGo5RxcV-frr3HOP0yzzst8JBxQLQ,46
63
+ sql_code_graph-1.0.2.dist-info/RECORD,,
sqlcg/__init__.py CHANGED
@@ -1,5 +1,5 @@
1
1
  """SQL Code Graph - SQL lineage and dependency analysis tool."""
2
2
 
3
- __version__ = "1.0.0"
3
+ __version__ = "1.0.2"
4
4
 
5
5
  __all__ = ["__version__"]
@@ -1,5 +1,9 @@
1
1
  """Analyze command for lineage analysis."""
2
2
 
3
+ from __future__ import annotations
4
+
5
+ from typing import TYPE_CHECKING
6
+
3
7
  import typer
4
8
  from rich.console import Console
5
9
  from rich.table import Table
@@ -7,6 +11,9 @@ from rich.table import Table
7
11
  from sqlcg.core.config import get_backend
8
12
  from sqlcg.core.schema import NodeLabel, RelType
9
13
 
14
+ if TYPE_CHECKING:
15
+ from sqlcg.server.noise_filter import NoiseFilter
16
+
10
17
  app = typer.Typer(help="Lineage analysis")
11
18
  console = Console()
12
19
 
@@ -15,6 +22,7 @@ console = Console()
15
22
  def upstream( # noqa: B008
16
23
  ref: str = typer.Argument(..., help="Column reference"), # noqa: B008
17
24
  depth: int = typer.Option(5, "--depth", help="Maximum traversal depth"), # noqa: B008
25
+ raw: bool = typer.Option(False, "--raw", help="Disable noise filtering on results"), # noqa: B008
18
26
  ) -> None:
19
27
  """Trace upstream column lineage."""
20
28
  # Bounds check for depth to prevent performance DoS
@@ -29,6 +37,28 @@ def upstream( # noqa: B008
29
37
  "RETURN src.id AS id LIMIT 100",
30
38
  {"ref": ref},
31
39
  )
40
+ if not results and len(ref.split(".")) >= 3:
41
+ bare = _bare_ref(ref)
42
+ fallback_results = backend.run_read(
43
+ f"MATCH p=(c:{NodeLabel.COLUMN} {{id: $bare}})"
44
+ f"<-[:{RelType.COLUMN_LINEAGE}*1..{depth}]-(src) "
45
+ "RETURN src.id AS id LIMIT 100",
46
+ {"bare": bare},
47
+ )
48
+ if fallback_results:
49
+ console.print(
50
+ f"[yellow]Hint:[/yellow] No results for '{ref}'. "
51
+ f"Found {len(fallback_results)} edge(s) under bare name '{bare}'. "
52
+ "The INSERT target may have been indexed without a schema prefix. "
53
+ "Multiple tables with the same unqualified name in different schemas "
54
+ "would all match — re-index with an explicit schema for precise results."
55
+ )
56
+ results = fallback_results
57
+ if not raw:
58
+ from sqlcg.server.noise_filter import NoiseFilter
59
+
60
+ nf = NoiseFilter.from_config() # repo_root=None → falls back to Path.cwd()
61
+ results = _filter_column_results(results, nf)
32
62
  _print_table(results, ["id"])
33
63
 
34
64
 
@@ -36,6 +66,7 @@ def upstream( # noqa: B008
36
66
  def downstream( # noqa: B008
37
67
  ref: str = typer.Argument(..., help="Column reference"), # noqa: B008
38
68
  depth: int = typer.Option(5, "--depth", help="Maximum traversal depth"), # noqa: B008
69
+ raw: bool = typer.Option(False, "--raw", help="Disable noise filtering on results"), # noqa: B008
39
70
  ) -> None:
40
71
  """Trace downstream column lineage."""
41
72
  # Bounds check for depth to prevent performance DoS
@@ -50,6 +81,28 @@ def downstream( # noqa: B008
50
81
  "RETURN dst.id AS id LIMIT 100",
51
82
  {"ref": ref},
52
83
  )
84
+ if not results and len(ref.split(".")) >= 3:
85
+ bare = _bare_ref(ref)
86
+ fallback_results = backend.run_read(
87
+ f"MATCH p=(c:{NodeLabel.COLUMN} {{id: $bare}})"
88
+ f"-[:{RelType.COLUMN_LINEAGE}*1..{depth}]->(dst) "
89
+ "RETURN dst.id AS id LIMIT 100",
90
+ {"bare": bare},
91
+ )
92
+ if fallback_results:
93
+ console.print(
94
+ f"[yellow]Hint:[/yellow] No results for '{ref}'. "
95
+ f"Found {len(fallback_results)} edge(s) under bare name '{bare}'. "
96
+ "The INSERT target may have been indexed without a schema prefix. "
97
+ "Multiple tables with the same unqualified name in different schemas "
98
+ "would all match — re-index with an explicit schema for precise results."
99
+ )
100
+ results = fallback_results
101
+ if not raw:
102
+ from sqlcg.server.noise_filter import NoiseFilter
103
+
104
+ nf = NoiseFilter.from_config() # repo_root=None → falls back to Path.cwd()
105
+ results = _filter_column_results(results, nf)
53
106
  _print_table(results, ["id"])
54
107
 
55
108
 
@@ -106,6 +159,43 @@ def unused(
106
159
  _print_table(results, ["qualified"])
107
160
 
108
161
 
162
+ def _bare_ref(ref: str) -> str:
163
+ """Strip schema prefix from a ref string, keeping table.column.
164
+
165
+ For a 3-part ref ("mart.fact_t.amount") this returns "fact_t.amount".
166
+ For a 2-part ref ("fact_t.amount") this returns the ref unchanged.
167
+ Never uses rsplit — that would yield only the column name for 3-part refs.
168
+ """
169
+ parts = ref.split(".")
170
+ if len(parts) >= 3:
171
+ return ".".join(parts[1:]) # drop schema, keep table.column
172
+ return ref # already bare (no schema prefix)
173
+
174
+
175
+ def _col_id_to_table(col_id: str) -> str:
176
+ """Extract the table-qualified part from a column ID (schema.table.col → schema.table).
177
+
178
+ Column IDs follow the format: schema.table.column or table.column.
179
+ The table part is everything except the last component.
180
+
181
+ Args:
182
+ col_id: A column ID string from the graph.
183
+
184
+ Returns:
185
+ The table-qualified portion (all but the last dotted component).
186
+ """
187
+ parts = col_id.rsplit(".", 1)
188
+ return parts[0] if len(parts) == 2 else col_id
189
+
190
+
191
+ def _filter_column_results(
192
+ results: list[dict],
193
+ nf: NoiseFilter, # type: ignore[name-defined]
194
+ ) -> list[dict]:
195
+ """Filter column-ID result rows by NoiseFilter, dropping rows whose table is noise."""
196
+ return [r for r in results if not nf.is_noise(_col_id_to_table(r["id"]))]
197
+
198
+
109
199
  def _print_table(rows: list[dict], columns: list[str]) -> None:
110
200
  """Print results as a Rich table."""
111
201
  if not rows:
@@ -14,14 +14,24 @@ console = Console()
14
14
  @app.command("table")
15
15
  def find_table( # noqa: B008
16
16
  name: str = typer.Argument(..., help="Table name to search for"), # noqa: B008
17
+ raw: bool = typer.Option(False, "--raw", help="Disable noise filtering on results"), # noqa: B008
17
18
  ) -> None:
18
19
  """Find a table by name."""
20
+ name = name.lower() # graph keys are lowercased at index time (C2 normalization)
19
21
  with get_backend() as backend:
20
22
  results = backend.run_read(
21
23
  f"MATCH (t:{NodeLabel.TABLE}) WHERE t.qualified CONTAINS $name "
22
24
  "RETURN t.qualified AS qualified, t.kind AS kind LIMIT 50",
23
25
  {"name": name},
24
26
  )
27
+ if not raw:
28
+ from sqlcg.server.noise_filter import NoiseFilter
29
+
30
+ nf = NoiseFilter.from_config() # repo_root=None → falls back to Path.cwd()
31
+ ids = [r["qualified"] for r in results]
32
+ kept, _ = nf.filter_nodes(ids)
33
+ kept_set = set(kept)
34
+ results = [r for r in results if r["qualified"] in kept_set]
25
35
  _print_table(results, ["qualified", "kind"])
26
36
 
27
37
 
@@ -30,6 +40,7 @@ def find_column( # noqa: B008
30
40
  ref: str = typer.Argument(..., help="Column reference (table.column)"), # noqa: B008
31
41
  ) -> None:
32
42
  """Find a column by table.column reference."""
43
+ ref = ref.lower() # graph keys are lowercased at index time (C2 normalization)
33
44
  with get_backend() as backend:
34
45
  results = backend.run_read(
35
46
  f"MATCH (c:{NodeLabel.COLUMN}) WHERE c.id CONTAINS $ref RETURN c.id AS id LIMIT 50",
@@ -14,7 +14,7 @@ from rich.progress import (
14
14
  TimeRemainingColumn,
15
15
  )
16
16
 
17
- from sqlcg.core.config import get_backend, get_db_path, get_dialect
17
+ from sqlcg.core.config import KuzuConfig, get_backend, get_db_path, get_dialect
18
18
  from sqlcg.indexer.indexer import Indexer
19
19
 
20
20
  console = Console()
@@ -54,6 +54,9 @@ def index_cmd( # noqa: B008
54
54
  quiet: bool = typer.Option( # noqa: B008
55
55
  False, "--quiet", "-q", help="Suppress summary console output"
56
56
  ),
57
+ verbose: bool = typer.Option( # noqa: B008
58
+ False, "--verbose", "-v", help="Print parse warnings to stderr instead of log file"
59
+ ),
57
60
  debug: bool = typer.Option( # noqa: B008
58
61
  False, "--debug", help="Show detailed log output during indexing"
59
62
  ),
@@ -68,11 +71,40 @@ def index_cmd( # noqa: B008
68
71
  """
69
72
 
70
73
  import logging
74
+ import sys
71
75
 
72
76
  level = logging.DEBUG if debug else logging.CRITICAL
73
77
  logging.getLogger("sqlcg").setLevel(level)
74
78
  logging.getLogger("sqlglot").setLevel(level)
75
79
 
80
+ # Route parse warnings to stderr (--verbose) or to the configured log file.
81
+ sqlcg_log = logging.getLogger("sqlcg")
82
+
83
+ class _CountingHandler(logging.Handler):
84
+ """Counts WARNING+ records emitted during indexing."""
85
+
86
+ def __init__(self) -> None:
87
+ super().__init__(logging.WARNING)
88
+ self.count = 0
89
+
90
+ def emit(self, record: logging.LogRecord) -> None:
91
+ self.count += 1
92
+
93
+ _counter = _CountingHandler()
94
+ sqlcg_log.addHandler(_counter)
95
+
96
+ if verbose:
97
+ _warn_handler: logging.Handler = logging.StreamHandler(sys.stderr)
98
+ _warn_handler.setLevel(logging.WARNING)
99
+ sqlcg_log.addHandler(_warn_handler)
100
+ _warn_log_path = None
101
+ else:
102
+ _warn_log_path = KuzuConfig.from_env().log_path
103
+ _warn_log_path.parent.mkdir(parents=True, exist_ok=True)
104
+ _warn_handler = logging.FileHandler(_warn_log_path)
105
+ _warn_handler.setLevel(logging.WARNING)
106
+ sqlcg_log.addHandler(_warn_handler)
107
+
76
108
  # Set buffer pool size via env var if specified
77
109
  if buffer_pool_size > 0:
78
110
  os.environ["SQLCG_BUFFER_POOL_MB"] = str(buffer_pool_size)
@@ -84,6 +116,45 @@ def index_cmd( # noqa: B008
84
116
  db_path = get_db_path()
85
117
  db_path.parent.mkdir(parents=True, exist_ok=True)
86
118
 
119
+ try:
120
+ _run_index(
121
+ path=path,
122
+ dialect=dialect,
123
+ dbt_manifest=dbt_manifest,
124
+ timeout_per_file=timeout_per_file,
125
+ no_ddl=no_ddl,
126
+ quiet=quiet,
127
+ batch_size=batch_size,
128
+ profile=profile,
129
+ )
130
+ except KeyboardInterrupt:
131
+ # The backend context manager (inside _run_index) has already closed the
132
+ # KuzuDB connection and released the lock by the time we get here.
133
+ console.print("\n[yellow]Interrupted — no partial graph written. Re-run to index.[/yellow]")
134
+ raise typer.Exit(130) from None
135
+ finally:
136
+ sqlcg_log.removeHandler(_warn_handler)
137
+ sqlcg_log.removeHandler(_counter)
138
+ _warn_handler.close()
139
+
140
+ if not verbose and not quiet and _counter.count > 0 and _warn_log_path is not None:
141
+ console.print(
142
+ f"[yellow]Parse warnings written to {_warn_log_path} "
143
+ "— use --verbose to show here.[/yellow]"
144
+ )
145
+
146
+
147
+ def _run_index(
148
+ *,
149
+ path: Path,
150
+ dialect: str | None,
151
+ dbt_manifest: Path | None,
152
+ timeout_per_file: int,
153
+ no_ddl: bool,
154
+ quiet: bool,
155
+ batch_size: int,
156
+ profile: bool,
157
+ ) -> None:
87
158
  with get_backend() as backend:
88
159
  backend.init_schema()
89
160
 
@@ -1,8 +1,19 @@
1
- """Install sqlcg as an MCP server in Claude Code."""
1
+ """Install sqlcg as an MCP server in Claude Code.
2
+
3
+ Write path (in priority order):
4
+ 1. ``claude mcp add -s user sql-code-graph <cmd> <args>`` — the official
5
+ Claude Code CLI write path (reads from ~/.claude.json under the hood).
6
+ 2. Fallback: write ``~/.claude.json`` directly under mcpServers.user when
7
+ the ``claude`` binary is not found or returns non-zero.
8
+
9
+ The previous target (~/.claude/settings.json) was incorrect — Claude Code does
10
+ NOT read MCP servers from that file. See ARCHITECTURE_REVIEW.md §9.2.
11
+ """
2
12
 
3
13
  import json
4
14
  import os
5
15
  import shutil
16
+ import subprocess
6
17
  import sys
7
18
  from pathlib import Path
8
19
 
@@ -11,7 +22,6 @@ from rich.console import Console
11
22
 
12
23
  console = Console()
13
24
 
14
- _SETTINGS_PATH = Path.home() / ".claude" / "settings.json"
15
25
  _SERVER_KEY = "sql-code-graph"
16
26
 
17
27
 
@@ -28,7 +38,10 @@ def install_cmd(
28
38
  help="Repository root for --scope project (default: current directory).",
29
39
  ),
30
40
  ) -> None:
31
- """Register sqlcg as an MCP server in Claude Code (~/.claude/settings.json).
41
+ """Register sqlcg as an MCP server in Claude Code.
42
+
43
+ Runs ``claude mcp add -s user sql-code-graph <cmd> <args>`` when the
44
+ ``claude`` CLI is on PATH; otherwise writes ~/.claude.json directly.
32
45
 
33
46
  Also provisions a Claude skill file (SKILL.md) at the chosen location.
34
47
  Pass --scope project or --scope global to specify where the skill is written.
@@ -39,68 +52,81 @@ def install_cmd(
39
52
  resolved_scope = _resolve_scope(scope)
40
53
 
41
54
  if shutil.which("sqlcg"):
42
- entry: dict = {"command": "sqlcg", "args": ["mcp", "start"]}
55
+ cmd_parts = ["sqlcg", "mcp", "start"]
43
56
  elif shutil.which("uvx"):
44
- entry = {"command": "uvx", "args": ["sql-code-graph", "mcp", "start"]}
57
+ cmd_parts = ["uvx", "sql-code-graph", "mcp", "start"]
45
58
  else:
46
59
  console.print("[red]Error:[/red] Neither 'sqlcg' nor 'uvx' found on PATH.")
47
60
  raise typer.Exit(1)
48
61
 
49
- settings_path = _SETTINGS_PATH
50
- if settings_path.exists():
62
+ entry: dict = {"command": cmd_parts[0], "args": cmd_parts[1:]}
63
+
64
+ if dry_run:
65
+ claude_bin = shutil.which("claude")
66
+ if claude_bin:
67
+ console.print("[dim]--dry-run: would run:[/dim]")
68
+ console.print(f" claude mcp add -s user {_SERVER_KEY} {' '.join(cmd_parts)}")
69
+ else:
70
+ claude_json = Path.home() / ".claude.json"
71
+ console.print("[dim]--dry-run: would write to ~/.claude.json:[/dim]")
72
+ _preview_claude_json(claude_json, entry)
73
+ _provision_skill(resolved_scope, repo, dry_run=True)
74
+ return
75
+
76
+ # --- Try official claude CLI first ---
77
+ claude_bin = shutil.which("claude")
78
+ if claude_bin:
79
+ proc = subprocess.run(
80
+ ["claude", "mcp", "add", "-s", "user", _SERVER_KEY] + cmd_parts,
81
+ capture_output=True,
82
+ text=True,
83
+ )
84
+ if proc.returncode == 0:
85
+ console.print(f"[green]Configured:[/green] {_SERVER_KEY} via `claude mcp add`")
86
+ console.print("\nRestart Claude Code to pick up the new MCP server.")
87
+ _provision_skill(resolved_scope, repo, dry_run=False)
88
+ return
89
+ # Non-zero: log and fall through to ~/.claude.json fallback
90
+ console.print(
91
+ f"[yellow]Warning:[/yellow] `claude mcp add` returned rc={proc.returncode}; "
92
+ "falling back to ~/.claude.json write."
93
+ )
94
+ if proc.stderr:
95
+ console.print(f"[dim]{proc.stderr.strip()}[/dim]")
96
+
97
+ # --- Fallback: write ~/.claude.json directly ---
98
+ claude_json = Path.home() / ".claude.json"
99
+ if claude_json.exists():
51
100
  try:
52
- settings: dict = json.loads(settings_path.read_text())
101
+ data: dict = json.loads(claude_json.read_text())
53
102
  except (json.JSONDecodeError, OSError, TypeError):
54
103
  console.print(
55
- f"[yellow]Warning:[/yellow] {settings_path} contains invalid JSON — "
104
+ f"[yellow]Warning:[/yellow] {claude_json} contains invalid JSON — "
56
105
  "mcpServers key will be added"
57
106
  )
58
- settings = {}
107
+ data = {}
59
108
  else:
60
- settings = {}
61
-
62
- mcp_servers: dict = settings.setdefault("mcpServers", {})
109
+ data = {}
63
110
 
64
- existing_entry = mcp_servers.get(_SERVER_KEY)
65
- if existing_entry == entry:
66
- cmd_str = f"{entry['command']} {' '.join(entry['args'])}"
67
- console.print(f"[green]Already configured:[/green] {_SERVER_KEY} → {cmd_str}")
68
- # Still provision the skill even when MCP entry already exists
69
- _provision_skill(resolved_scope, repo, dry_run)
111
+ existing = data.get("mcpServers", {}).get("user", {}).get(_SERVER_KEY)
112
+ if existing == entry:
113
+ console.print(f"[green]Already configured:[/green] {_SERVER_KEY} (in ~/.claude.json)")
114
+ _provision_skill(resolved_scope, repo, dry_run=False)
70
115
  return
71
116
 
72
- # Print upgrade notice if switching from uvx to sqlcg
73
- if (
74
- existing_entry
75
- and existing_entry.get("command") == "uvx"
76
- and entry.get("command") == "sqlcg"
77
- ):
78
- console.print(
79
- "[blue]Updating[/blue] MCP entry from [dim]uvx[/dim] to local "
80
- "[green]sqlcg[/green] binary (faster startup). Writing…"
81
- )
82
-
83
- mcp_servers[_SERVER_KEY] = entry
84
-
85
- if dry_run is True:
86
- console.print("[dim]--dry-run: would write:[/dim]")
87
- console.print_json(json.dumps(settings, indent=2))
88
- _provision_skill(resolved_scope, repo, dry_run)
89
- return
117
+ data.setdefault("mcpServers", {}).setdefault("user", {})[_SERVER_KEY] = entry
90
118
 
91
119
  try:
92
- settings_path.parent.mkdir(parents=True, exist_ok=True)
93
- tmp = settings_path.with_suffix(".tmp")
94
- tmp.write_text(json.dumps(settings, indent=2) + "\n")
95
- os.replace(tmp, settings_path)
120
+ tmp = claude_json.with_suffix(".tmp")
121
+ tmp.write_text(json.dumps(data, indent=2) + "\n")
122
+ os.replace(tmp, claude_json)
96
123
  except (OSError, TypeError, AttributeError):
97
124
  pass # Ignore file I/O errors in testing
98
125
 
99
- cmd_str = f"{entry['command']} {' '.join(entry['args'])}"
126
+ cmd_str = " ".join(cmd_parts)
100
127
  console.print(f"[green]Configured:[/green] {_SERVER_KEY} → {cmd_str}")
101
- console.print(f"[dim]Written to {settings_path}[/dim]")
128
+ console.print(f"[dim]Written to {claude_json}[/dim]")
102
129
 
103
- # Note about cold cache if uvx was chosen
104
130
  if entry.get("command") == "uvx":
105
131
  console.print(
106
132
  "[yellow]Note:[/yellow] First startup downloads dependencies (~30s). "
@@ -108,9 +134,20 @@ def install_cmd(
108
134
  )
109
135
 
110
136
  console.print("\nRestart Claude Code to pick up the new MCP server.")
137
+ _provision_skill(resolved_scope, repo, dry_run=False)
138
+
111
139
 
112
- # Provision the skill file
113
- _provision_skill(resolved_scope, repo, dry_run)
140
+ def _preview_claude_json(claude_json: Path, entry: dict) -> None:
141
+ """Print what would be written to ~/.claude.json without touching the file."""
142
+ if claude_json.exists():
143
+ try:
144
+ data: dict = json.loads(claude_json.read_text())
145
+ except (json.JSONDecodeError, OSError, TypeError):
146
+ data = {}
147
+ else:
148
+ data = {}
149
+ data.setdefault("mcpServers", {}).setdefault("user", {})[_SERVER_KEY] = entry
150
+ console.print_json(json.dumps(data, indent=2))
114
151
 
115
152
 
116
153
  def _resolve_scope(scope: str | None) -> str:
sqlcg/cli/commands/mcp.py CHANGED
@@ -22,28 +22,34 @@ def _server_entry() -> dict:
22
22
 
23
23
  @app.command("setup")
24
24
  def mcp_setup(print_only: bool = typer.Option(True, "--print/--write")) -> None:
25
- """Print or write MCP server config JSON."""
25
+ """Print or write MCP server config JSON.
26
+
27
+ --print (default): print the JSON snippet for manual insertion.
28
+ --write: write to ~/.claude.json under mcpServers.user (the correct path
29
+ for Claude Code — not settings.json, which Claude Code does not read
30
+ for MCP servers).
31
+ """
26
32
  entry = _server_entry()
27
33
  if print_only:
28
34
  console.print_json(json.dumps({"mcpServers": {_SERVER_KEY: entry}}, indent=2))
29
35
  return
30
36
 
31
- config_path = Path.home() / ".claude" / "settings.json"
32
- if config_path.exists():
37
+ # Write to ~/.claude.json (correct path for Claude Code MCP servers)
38
+ claude_json = Path.home() / ".claude.json"
39
+ if claude_json.exists():
33
40
  try:
34
- settings: dict = json.loads(config_path.read_text())
41
+ data: dict = json.loads(claude_json.read_text())
35
42
  except json.JSONDecodeError:
36
- settings = {}
43
+ data = {}
37
44
  else:
38
- settings = {}
45
+ data = {}
39
46
 
40
- settings.setdefault("mcpServers", {})[_SERVER_KEY] = entry
47
+ data.setdefault("mcpServers", {}).setdefault("user", {})[_SERVER_KEY] = entry
41
48
 
42
- config_path.parent.mkdir(parents=True, exist_ok=True)
43
- tmp = config_path.with_suffix(".tmp")
44
- tmp.write_text(json.dumps(settings, indent=2) + "\n")
45
- os.replace(tmp, config_path)
46
- console.print(f"[green]Configuration written to[/green] {config_path}")
49
+ tmp = claude_json.with_suffix(".tmp")
50
+ tmp.write_text(json.dumps(data, indent=2) + "\n")
51
+ os.replace(tmp, claude_json)
52
+ console.print(f"[green]Configuration written to[/green] {claude_json}")
47
53
  console.print("Note: Binary is `sqlcg`; PyPI package is `sql-code-graph`.")
48
54
 
49
55
 
@@ -60,6 +60,9 @@ def reindex_cmd( # noqa: B008
60
60
  from sqlcg.core.schema import SCHEMA_VERSION
61
61
  from sqlcg.indexer.indexer import Indexer
62
62
 
63
+ # Resolve to absolute path so ignore-spec and git delta receive an absolute root
64
+ path = path.resolve()
65
+
63
66
  # Resolve dialect
64
67
  if dialect == "auto":
65
68
  dialect = get_dialect(path)
sqlcg/core/config.py CHANGED
@@ -19,6 +19,10 @@ class KuzuConfig(BaseModel):
19
19
  default=0,
20
20
  description="KuzuDB buffer pool size in MB (0 = use KuzuDB default)",
21
21
  )
22
+ log_path: Path = Field(
23
+ default_factory=lambda: Path.home() / ".sqlcg" / "index.log",
24
+ description="Path for parse-warning log file written during indexing",
25
+ )
22
26
 
23
27
  @classmethod
24
28
  def from_env(cls) -> "KuzuConfig":
@@ -29,9 +33,11 @@ class KuzuConfig(BaseModel):
29
33
  """
30
34
  env_path = os.getenv("SQLCG_DB_PATH")
31
35
  env_buf = os.getenv("SQLCG_BUFFER_POOL_MB")
36
+ env_log = os.getenv("SQLCG_LOG_PATH")
32
37
  return cls(
33
38
  db_path=Path(env_path) if env_path else Path.home() / ".sqlcg" / "graph.db",
34
39
  buffer_pool_size_mb=int(env_buf) if env_buf else 0,
40
+ log_path=Path(env_log) if env_log else Path.home() / ".sqlcg" / "index.log",
35
41
  )
36
42
 
37
43
 
@@ -138,6 +144,7 @@ def get_noise_filter_patterns(path: Path) -> list[str]:
138
144
  """
139
145
  default_patterns = [
140
146
  "*_bck",
147
+ "*_bck_*", # catches mid-suffix variants e.g. foo_bck_us39553, bar_bck_archive
141
148
  "*_bck_us",
142
149
  "*_bck_[0-9]*",
143
150
  "*_backup",
@@ -93,10 +93,14 @@ def _classify_error(msg: str) -> str:
93
93
  if not msg:
94
94
  return "other"
95
95
 
96
- # Timeout errors
96
+ # Timeout errors (including pool-path poison retries)
97
97
  if msg.startswith("timeout:"):
98
98
  return "timeout"
99
99
 
100
+ # Poison-retry: file repeatedly timed out in pool worker; treat as timeout bucket
101
+ if msg.startswith("skipped:poison"):
102
+ return "timeout"
103
+
100
104
  # Skip markers
101
105
  if msg.startswith("col_lineage_skip:"):
102
106
  if "pure_ddl_file" in msg: