sql-code-graph 1.0.1__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sql-code-graph
3
- Version: 1.0.1
3
+ Version: 1.1.0
4
4
  Summary: SQL code graph analyzer and lineage tracer
5
5
  Project-URL: Homepage, https://github.com/Warhorze/sql-code-graph
6
6
  Project-URL: Repository, https://github.com/Warhorze/sql-code-graph
@@ -219,7 +219,7 @@ After indexing, `sqlcg db info` shows non-zero `STAR_EXPANSION lineage edges`, a
219
219
  | **Search & meta** | |
220
220
  | `search_sql_pattern(query)` | Full-text search across indexed SQL |
221
221
  | `list_dialects_and_repos()` | List indexed repos and dialects (catalogue) |
222
- | `db_info()` | Graph health, node counts, parse quality breakdown, warnings |
222
+ | `db_info()` | Graph health, node counts, parse quality breakdown, warnings, freshness (indexed SHA vs HEAD) |
223
223
  | `execute_cypher(query)` | Raw Cypher query against the graph |
224
224
  | `submit_feedback(...)` | Report a false positive/negative to improve metrics |
225
225
 
@@ -228,6 +228,12 @@ After indexing, `sqlcg db info` shows non-zero `STAR_EXPANSION lineage edges`, a
228
228
  > `table.column`. Each returned node carries both `name` (the bare column) and
229
229
  > `table` (the owning `schema.table`), so results are navigable without a second lookup.
230
230
 
231
+ > **Provenance fields**: lineage edges now carry `file`, `line`, and `expression`
232
+ > (where the lineage was derived from), a `confidence` of `1.0` for plainly-parsed
233
+ > facts (lower for inferred edges, with a `reason`), and a `table_kind`
234
+ > (`table` / `cte` / `derived` / `external`) so CTE and derived aliases are
235
+ > distinguishable from real tables.
236
+
231
237
  > **LLM agent tip**: call `db_info()` before lineage queries to check that
232
238
  > `SqlColumn > 0` and `warnings` is empty. If `parse_quality["scripting_block"]`
233
239
  > is high, column lineage will be limited for those files — use table-level tools
@@ -243,16 +249,21 @@ sqlcg db init # initialise graph database
243
249
  sqlcg index <path> --dialect snowflake # index SQL files (snowflake is the tested dialect)
244
250
  sqlcg index <path> --dialect auto # read dialect from .sqlcg.toml
245
251
  sqlcg index <path> --profile # index + print per-stage timing and slowest files
252
+ sqlcg index <path> --include-working-tree # also index uncommitted changes (marks graph dirty)
246
253
  sqlcg reindex <path> --from <sha> --to <sha> # incremental resync of only changed files
247
254
  sqlcg analyze unused # tables with no query references
248
255
  sqlcg analyze upstream/downstream # trace lineage from the CLI
249
256
  sqlcg find table/column/pattern # search the graph
250
257
  sqlcg watch <path> # watch for file changes
258
+ sqlcg db info # graph stats + freshness (indexed SHA vs HEAD)
251
259
  sqlcg git install-hooks # install post-checkout + post-merge resync hooks
252
260
  sqlcg gain # show usage metrics
253
261
  sqlcg report # generate FP/error report
254
262
  sqlcg mcp best-practices # print the fact/heuristic boundary for the MCP tools
255
263
  sqlcg mcp start # start MCP server manually
264
+ sqlcg mcp status # server status JSON (via control socket)
265
+ sqlcg mcp stop # stop the running MCP server gracefully
266
+ sqlcg mcp restart # stop the server (client must respawn it)
256
267
  sqlcg version # show installed version
257
268
  ```
258
269
 
@@ -0,0 +1,65 @@
1
+ sqlcg/__init__.py,sha256=CWoJX8Awg5Tf6p2E5lT66EFE8kd-Aru8aujKizglgdo,115
2
+ sqlcg/__main__.py,sha256=1YoFLcqEgTwYq1J3TbUwpkdG0zeeLIf2fJvwWI-CLFU,109
3
+ sqlcg/cli/__init__.py,sha256=W8fD0LpMq2xm_5WKGNMvJh2WBL1ho5E8hUeAqXQYT1g,28
4
+ sqlcg/cli/main.py,sha256=WmdTjsOlz1ozi2Y3Aq4ezR_FCRl-Lc1YOKw3_d48dlY,1650
5
+ sqlcg/cli/commands/__init__.py,sha256=oSHtr6VD-jNubOjuCQyZj2tBppjMEpQDh-IGQ8of9eA,30
6
+ sqlcg/cli/commands/analyze.py,sha256=qtvM_TeqYzaLClZksM_o5hAdksZ9sqLM9HGDtLDrXwY,12646
7
+ sqlcg/cli/commands/db.py,sha256=Q3VEdNJzhrs26KtskI5j9B3C0vBTZe4VN2sZXZG_6BY,7434
8
+ sqlcg/cli/commands/find.py,sha256=5MbGavA-QS75zwm35dYK-0H1bJ1Zd_gJHgQ_lXnpMDU,3126
9
+ sqlcg/cli/commands/gain.py,sha256=bOvia7CVla_fESrDEdftYze8Mm0xDio3SpCzIyoXg7A,8925
10
+ sqlcg/cli/commands/git.py,sha256=yMgWOuoTCTBr2P1QgmghRi5ikmUYHuxDUVyBDYerErw,5728
11
+ sqlcg/cli/commands/index.py,sha256=xMnxKDiUt5LH_3lKAotoRctL4VSOvcw7Gq--idLPtm0,11091
12
+ sqlcg/cli/commands/install.py,sha256=KNABvrLbamPyYnmnVdCaM_MNezbDc-pr6IkignCWI8k,9186
13
+ sqlcg/cli/commands/mcp.py,sha256=2gDsNvtj1Ql7PkjX9dHWAzOK0uCPRR5DGdBAzJa8PIU,6005
14
+ sqlcg/cli/commands/reindex.py,sha256=n1mQTYAZshtCKPgpR12S6ZMCqO3cSUtpCXjzb1PuZxU,11857
15
+ sqlcg/cli/commands/report.py,sha256=JU0qjyMxwOukE7bN3XvvIzOI7zMg_Gsnvk_8F6pKNpA,4915
16
+ sqlcg/cli/commands/uninstall.py,sha256=IYwQaqnMmmzW0Nlls40wD-L3tVkMgKIMRXUkcXPMUc4,9398
17
+ sqlcg/cli/commands/watch.py,sha256=7N6c-QuvxAEGHzDZ0C3CU2BkHSraZW9YtgoFnz7SaQo,2373
18
+ sqlcg/core/__init__.py,sha256=uNsJCrCMVWVT80sHPtI_f39BYqIf5N0i6LSq8x8HsyI,283
19
+ sqlcg/core/config.py,sha256=8QtFNRnrzLK1Zw93AKX37h6bSASDLv-42FzDQ7zxTtI,13079
20
+ sqlcg/core/freshness.py,sha256=gRb8pRPw5SdIUxAYkMXIJ00DTdQ6CegRZPAvWnv0rU0,4575
21
+ sqlcg/core/graph_db.py,sha256=Aa85wPFg26H-Ud9SrZyxCHH-99iitAI5S3X9T_62Yyw,7957
22
+ sqlcg/core/jobs.py,sha256=Je-fCdSKRgiSsv1W8SgNAlp36a7t7-pJZ-qKPbka9OE,3298
23
+ sqlcg/core/kuzu_backend.py,sha256=ziHt-AB9sEZY7qB8whseWFicbTfOZaNOxcNVKhjii5Y,16587
24
+ sqlcg/core/neo4j_backend.py,sha256=AM1TncP9GBGph-rSHwalZPmGUV2kFILzaJP-PSB0UYw,8437
25
+ sqlcg/core/queries.cypher,sha256=91Pb10-ekSi0812wuHJTdXcMY4sT53_5o-oHhfSP_DQ,4967
26
+ sqlcg/core/queries.py,sha256=JLgV4MIgP7KVIQ0xpGj3_-MBhBfY_9XPoCdcI2mO-TM,2148
27
+ sqlcg/core/schema.cypher,sha256=rK5QMhSrzZhuj73NeNXGX6oM-rPPPvxFjex0fEyUvkQ,2859
28
+ sqlcg/core/schema.py,sha256=JO5rkspYKjL9AEl5mt0VIJKn-IPOH3kJV_fVmAMuFCI,1467
29
+ sqlcg/indexer/__init__.py,sha256=Wh20Unz2OHs1oIyWLrpurPAasF0BET2g4iXtNk7mh2U,56
30
+ sqlcg/indexer/dbt_adapter.py,sha256=EB5x1WU5Z9d-I97ADDj88S_hG1C4z4nbrv8JUCzXfy8,686
31
+ sqlcg/indexer/error_classify.py,sha256=MYjPVprwT-ARPjBCyCzu2F9DSrZfnTVtVIoBgm8s4H8,5329
32
+ sqlcg/indexer/git_delta.py,sha256=P-QM4vnVURT2KLiE6u3cQynRUF-mTH13cbB4I20YHPQ,4468
33
+ sqlcg/indexer/indexer.py,sha256=DYdUr59hRKCjJTRiQUWOC72JUQ9TgBrH0W4UOYNwqx8,60913
34
+ sqlcg/indexer/pool.py,sha256=BTYx-pBe6zwUG89MHh0X7nzGNVlsHN-GjovYKanVI1s,18553
35
+ sqlcg/indexer/walker.py,sha256=umNaqDbuerr75VYG1TEOv0ATsbI40O3SIw35f7XJcDE,1931
36
+ sqlcg/indexer/watcher.py,sha256=mJQq1LASRLKKwhz0WhCUWPLLqyPR2_-FD_8efYU6gE8,8442
37
+ sqlcg/lineage/__init__.py,sha256=Da1DlYwtK13WHv_RnHjAtNkHTOuFbhxqCjT1Le7DsWM,46
38
+ sqlcg/lineage/aggregator.py,sha256=G1xsTjf981EVSgN1yIHcC_ecDvcTcSPvEp6Kb2HPXkY,4943
39
+ sqlcg/lineage/schema_resolver.py,sha256=iXt6LYF6UVWsGUpcfbmjmGn9wCgXl721lTGf_8AaWcc,7320
40
+ sqlcg/metrics/__init__.py,sha256=hLJ6wm4St8qqYwKh3o9QG7lcEt1BEYM31ccqO9tGpIg,133
41
+ sqlcg/metrics/store.py,sha256=BaMf7QYTmYMlX_Jzi1GNU8R2sMVkWdn07f-ZSndtcNk,8879
42
+ sqlcg/parsers/__init__.py,sha256=AamA8wBbDZV9_zEtZCI4Hyen5UAVKHmBwjTghTt2PZE,785
43
+ sqlcg/parsers/ansi_parser.py,sha256=tu1MWWaSYmpefKjgk2PPyGStIFjV47Z_1WjyBh5Zi2c,17180
44
+ sqlcg/parsers/base.py,sha256=uL0W22zpbIz_9eq-i-4LSlonxy2J1yChuISMLSYgvRU,49345
45
+ sqlcg/parsers/bigquery_parser.py,sha256=mOnWTfXB_Dp4JwFE1PVYOB6CDPf5nYE0Dea8kJCl9uQ,2827
46
+ sqlcg/parsers/postgres_parser.py,sha256=lYfUpQY6j4Qm7ndXBtXbgPoGzYqYddWt5YeFnWKdA6I,946
47
+ sqlcg/parsers/registry.py,sha256=LXy1F6rqQI6VdxpRvZg_tNpoEucW3mXZHYBMlMONbX4,1496
48
+ sqlcg/parsers/snowflake_parser.py,sha256=fovMyqfhWD2wmtEyiwTC0aoP4QWP-3XQZ8WYkXvs9hg,15511
49
+ sqlcg/parsers/tsql_parser.py,sha256=RRj1pACtAk2tLTDaFWRYF67a0IDvaf5A1YQXWIz0bpQ,956
50
+ sqlcg/server/__init__.py,sha256=n4wuNE7xyJIJxJZBtmtdccCMQfvTdF-IqIaZVbC4FC4,35
51
+ sqlcg/server/control.py,sha256=v-r21npODiHlHnJHuo_6KWrKclQKq_E1QyrzIWjqgtY,4508
52
+ sqlcg/server/exceptions.py,sha256=EONw34icOByCTpppSQrvQBW6asc4hfqaGDCAFjv96II,469
53
+ sqlcg/server/models.py,sha256=l7ORy6sbtzBW1y3qVaeLwEukbyAgBkz9S5VIm2q4b24,19378
54
+ sqlcg/server/noise_filter.py,sha256=idSBGgdKWWccJdpOo9qgbM2350Oew-2l5W6Yc9GYQqY,6337
55
+ sqlcg/server/server.py,sha256=gzeO5WbSNfGxgIKte01uy0VjO1_basI2ChSuAwr0dBc,14844
56
+ sqlcg/server/skill.py,sha256=GE8eeimk6yiGGJ74erGypqYAviur5peSR6_2a4QQWVM,12828
57
+ sqlcg/server/tools.py,sha256=JvijDC0h5uHjZyZUIZq9sztNG3W5sr-Yy5rHwOVuJec,66642
58
+ sqlcg/utils/__init__.py,sha256=--iqt5ThTXmT8Wz7da8hs3n0zDfYPl8P-z5OgRJ_77E,154
59
+ sqlcg/utils/hashing.py,sha256=H25-sYfxHKb3_IERFnHyAIYNiXN470Oqo5sJT_D3YOA,438
60
+ sqlcg/utils/ignore.py,sha256=wJjwa0mjnQ_xJExOUxk25y00g065XmmzJapqV3ifD5o,1151
61
+ sqlcg/utils/logging.py,sha256=u0fCmYsLj9o81vawm3xZTHaw68GQYVm7JxG-gP81u8A,840
62
+ sql_code_graph-1.1.0.dist-info/METADATA,sha256=blW1eYNjfy6P61747uUtc22qm5MDETMcVYImXPa762g,13615
63
+ sql_code_graph-1.1.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
64
+ sql_code_graph-1.1.0.dist-info/entry_points.txt,sha256=Wfe49sVzV9p4eVFGo5RxcV-frr3HOP0yzzst8JBxQLQ,46
65
+ sql_code_graph-1.1.0.dist-info/RECORD,,
sqlcg/__init__.py CHANGED
@@ -1,5 +1,5 @@
1
1
  """SQL Code Graph - SQL lineage and dependency analysis tool."""
2
2
 
3
- __version__ = "1.0.1"
3
+ __version__ = "1.1.0"
4
4
 
5
5
  __all__ = ["__version__"]
@@ -1,12 +1,20 @@
1
1
  """Analyze command for lineage analysis."""
2
2
 
3
+ from __future__ import annotations
4
+
5
+ from typing import TYPE_CHECKING
6
+
3
7
  import typer
4
8
  from rich.console import Console
5
9
  from rich.table import Table
6
10
 
7
11
  from sqlcg.core.config import get_backend
12
+ from sqlcg.core.queries import GET_TABLE_EXTERNAL_CONSUMERS_QUERY
8
13
  from sqlcg.core.schema import NodeLabel, RelType
9
14
 
15
+ if TYPE_CHECKING:
16
+ from sqlcg.server.noise_filter import NoiseFilter
17
+
10
18
  app = typer.Typer(help="Lineage analysis")
11
19
  console = Console()
12
20
 
@@ -15,6 +23,10 @@ console = Console()
15
23
  def upstream( # noqa: B008
16
24
  ref: str = typer.Argument(..., help="Column reference"), # noqa: B008
17
25
  depth: int = typer.Option(5, "--depth", help="Maximum traversal depth"), # noqa: B008
26
+ raw: bool = typer.Option(False, "--raw", help="Disable noise filtering on results"), # noqa: B008
27
+ include_intermediate: bool = typer.Option( # noqa: B008
28
+ False, "--include-intermediate", help="Include CTE/derived intermediate nodes"
29
+ ),
18
30
  ) -> None:
19
31
  """Trace upstream column lineage."""
20
32
  # Bounds check for depth to prevent performance DoS
@@ -22,20 +34,60 @@ def upstream( # noqa: B008
22
34
  console.print("[red]Error: --depth must be between 1 and 100[/red]")
23
35
  raise typer.Exit(1)
24
36
 
37
+ # By default, filter out CTE/derived intermediate nodes; --include-intermediate restores them
38
+ kind_filter = (
39
+ ""
40
+ if include_intermediate
41
+ else "MATCH (t:SqlTable {qualified: src.table_qualified}) "
42
+ "WHERE t.kind IN ['table', 'external'] "
43
+ )
44
+
25
45
  with get_backend() as backend:
26
46
  results = backend.run_read(
27
- f"MATCH p=(c:{NodeLabel.COLUMN} {{id: $ref}})"
28
- f"<-[:{RelType.COLUMN_LINEAGE}*1..{depth}]-(src) "
29
- "RETURN src.id AS id LIMIT 100",
47
+ f"MATCH (c:{NodeLabel.COLUMN} {{id: $ref}})"
48
+ f"<-[:{RelType.COLUMN_LINEAGE}*1..{depth}]-(src:{NodeLabel.COLUMN}) "
49
+ f"{kind_filter}"
50
+ f"OPTIONAL MATCH (src)-[direct:{RelType.COLUMN_LINEAGE}]->(c) "
51
+ "OPTIONAL MATCH (q:SqlQuery {id: direct.query_id}) "
52
+ "RETURN src.id AS id, q.file_path AS file, q.start_line AS line LIMIT 100",
30
53
  {"ref": ref},
31
54
  )
32
- _print_table(results, ["id"])
55
+ if not results and len(ref.split(".")) >= 3:
56
+ bare = _bare_ref(ref)
57
+ fallback_results = backend.run_read(
58
+ f"MATCH (c:{NodeLabel.COLUMN} {{id: $bare}})"
59
+ f"<-[:{RelType.COLUMN_LINEAGE}*1..{depth}]-(src:{NodeLabel.COLUMN}) "
60
+ f"{kind_filter}"
61
+ f"OPTIONAL MATCH (src)-[direct:{RelType.COLUMN_LINEAGE}]->(c) "
62
+ "OPTIONAL MATCH (q:SqlQuery {id: direct.query_id}) "
63
+ "RETURN src.id AS id, q.file_path AS file, q.start_line AS line LIMIT 100",
64
+ {"bare": bare},
65
+ )
66
+ if fallback_results:
67
+ console.print(
68
+ f"[yellow]Hint:[/yellow] No results for '{ref}'. "
69
+ f"Found {len(fallback_results)} edge(s) under bare name '{bare}'. "
70
+ "The INSERT target may have been indexed without a schema prefix. "
71
+ "Multiple tables with the same unqualified name in different schemas "
72
+ "would all match — re-index with an explicit schema for precise results."
73
+ )
74
+ results = fallback_results
75
+ if not raw:
76
+ from sqlcg.server.noise_filter import NoiseFilter
77
+
78
+ nf = NoiseFilter.from_config() # repo_root=None → falls back to Path.cwd()
79
+ results = _filter_column_results(results, nf)
80
+ _print_table(_add_file_line_col(results), ["id", "file:line"])
33
81
 
34
82
 
35
83
  @app.command("downstream")
36
84
  def downstream( # noqa: B008
37
85
  ref: str = typer.Argument(..., help="Column reference"), # noqa: B008
38
86
  depth: int = typer.Option(5, "--depth", help="Maximum traversal depth"), # noqa: B008
87
+ raw: bool = typer.Option(False, "--raw", help="Disable noise filtering on results"), # noqa: B008
88
+ include_intermediate: bool = typer.Option( # noqa: B008
89
+ False, "--include-intermediate", help="Include CTE/derived intermediate nodes"
90
+ ),
39
91
  ) -> None:
40
92
  """Trace downstream column lineage."""
41
93
  # Bounds check for depth to prevent performance DoS
@@ -43,40 +95,114 @@ def downstream( # noqa: B008
43
95
  console.print("[red]Error: --depth must be between 1 and 100[/red]")
44
96
  raise typer.Exit(1)
45
97
 
98
+ # By default, filter out CTE/derived intermediate nodes; --include-intermediate restores them
99
+ kind_filter = (
100
+ ""
101
+ if include_intermediate
102
+ else "MATCH (t:SqlTable {qualified: dst.table_qualified}) "
103
+ "WHERE t.kind IN ['table', 'external'] "
104
+ )
105
+
46
106
  with get_backend() as backend:
47
107
  results = backend.run_read(
48
- f"MATCH p=(c:{NodeLabel.COLUMN} {{id: $ref}})"
49
- f"-[:{RelType.COLUMN_LINEAGE}*1..{depth}]->(dst) "
50
- "RETURN dst.id AS id LIMIT 100",
108
+ f"MATCH (c:{NodeLabel.COLUMN} {{id: $ref}})"
109
+ f"-[:{RelType.COLUMN_LINEAGE}*1..{depth}]->(dst:{NodeLabel.COLUMN}) "
110
+ f"{kind_filter}"
111
+ f"OPTIONAL MATCH (c)-[direct:{RelType.COLUMN_LINEAGE}]->(dst) "
112
+ "OPTIONAL MATCH (q:SqlQuery {id: direct.query_id}) "
113
+ "RETURN dst.id AS id, q.file_path AS file, q.start_line AS line LIMIT 100",
51
114
  {"ref": ref},
52
115
  )
53
- _print_table(results, ["id"])
116
+ if not results and len(ref.split(".")) >= 3:
117
+ bare = _bare_ref(ref)
118
+ fallback_results = backend.run_read(
119
+ f"MATCH (c:{NodeLabel.COLUMN} {{id: $bare}})"
120
+ f"-[:{RelType.COLUMN_LINEAGE}*1..{depth}]->(dst:{NodeLabel.COLUMN}) "
121
+ f"{kind_filter}"
122
+ f"OPTIONAL MATCH (c)-[direct:{RelType.COLUMN_LINEAGE}]->(dst) "
123
+ "OPTIONAL MATCH (q:SqlQuery {id: direct.query_id}) "
124
+ "RETURN dst.id AS id, q.file_path AS file, q.start_line AS line LIMIT 100",
125
+ {"bare": bare},
126
+ )
127
+ if fallback_results:
128
+ console.print(
129
+ f"[yellow]Hint:[/yellow] No results for '{ref}'. "
130
+ f"Found {len(fallback_results)} edge(s) under bare name '{bare}'. "
131
+ "The INSERT target may have been indexed without a schema prefix. "
132
+ "Multiple tables with the same unqualified name in different schemas "
133
+ "would all match — re-index with an explicit schema for precise results."
134
+ )
135
+ results = fallback_results
136
+ if not raw:
137
+ from sqlcg.server.noise_filter import NoiseFilter
138
+
139
+ nf = NoiseFilter.from_config() # repo_root=None → falls back to Path.cwd()
140
+ results = _filter_column_results(results, nf)
141
+ _print_table(_add_file_line_col(results), ["id", "file:line"])
142
+
143
+ # Append external consumer rows for terminal tables (scalar query, one per terminal).
144
+ # Resolve terminal tables from the column results; fall back to the root column's table.
145
+ terminal_tables: set[str] = set()
146
+ for r in results:
147
+ tbl = _col_id_to_table(r["id"])
148
+ if tbl:
149
+ terminal_tables.add(tbl)
150
+ # Also check the root column's table (in case no downstream columns were found).
151
+ root_parts = ref.rsplit(".", 1)
152
+ if len(root_parts) == 2:
153
+ terminal_tables.add(root_parts[0])
154
+ consumer_rows: list[dict] = []
155
+ for tbl in sorted(terminal_tables):
156
+ rows_ec = backend.run_read(
157
+ GET_TABLE_EXTERNAL_CONSUMERS_QUERY,
158
+ {"table_qualified": tbl},
159
+ )
160
+ for ec in rows_ec:
161
+ consumer_rows.append(
162
+ {"id": f"[external] {ec['name']} ({ec['consumer_type']})", "file:line": ""}
163
+ )
164
+ if consumer_rows:
165
+ _print_table(consumer_rows, ["id", "file:line"])
54
166
 
55
167
 
56
168
  @app.command("impact")
57
169
  def impact( # noqa: B008
58
170
  table: str = typer.Argument(..., help="Table name to analyze"), # noqa: B008
171
+ raw: bool = typer.Option(False, "--raw", help="Disable noise filtering on results"), # noqa: B008
59
172
  ) -> None:
60
173
  """Show all queries impacted by a table."""
61
174
  with get_backend() as backend:
62
175
  results = backend.run_read(
63
176
  f"MATCH (t:{NodeLabel.TABLE} {{qualified: $t}})"
64
177
  f"<-[:{RelType.SELECTS_FROM}]-(q:{NodeLabel.QUERY}) "
65
- "RETURN q.id AS id, q.kind AS kind LIMIT 100",
178
+ "RETURN DISTINCT q.id AS id, q.kind AS kind, q.target_table AS target LIMIT 100",
66
179
  {"t": table},
67
180
  )
181
+ if not raw:
182
+ from sqlcg.server.noise_filter import NoiseFilter
183
+
184
+ nf = NoiseFilter.from_config()
185
+ results = [r for r in results if not nf.is_noise(r.get("target", ""))]
68
186
  _print_table(results, ["id", "kind"])
69
187
 
70
188
 
71
189
  @app.command("failures")
72
190
  def failures(
73
191
  cause: str | None = typer.Option( # noqa: B008
74
- None, "--cause", help="Filter by E-code bucket (e.g. E5, timeout)"
192
+ None,
193
+ "--cause",
194
+ help=(
195
+ "Filter by E-code bucket. Valid values: "
196
+ "timeout, E8, E3, E2, E5, E1, qualify_failed, func_fallback, pure_ddl_skip"
197
+ ),
75
198
  ),
76
199
  limit: int = typer.Option(100, "--limit", help="Maximum rows to return"), # noqa: B008
77
200
  ) -> None:
78
201
  """List files that failed to parse, with their dominant cause (E-code bucket).
79
202
 
203
+ Valid --cause buckets (from highest to lowest severity):
204
+ timeout, E8, E3, E2, E5, E1, qualify_failed, func_fallback, pure_ddl_skip.
205
+
80
206
  Requires a graph indexed with sqlcg >= v3 (schema version 3). Re-index
81
207
  with 'sqlcg db reset && sqlcg index <path>' if the graph was built with
82
208
  an earlier version.
@@ -95,17 +221,79 @@ def failures(
95
221
  @app.command("unused")
96
222
  def unused(
97
223
  threshold: int = typer.Option(0, "--threshold", help="Minimum reference count threshold"),
224
+ raw: bool = typer.Option(False, "--raw", help="Disable noise filtering on results"), # noqa: B008
98
225
  ) -> None:
99
226
  """Find tables with no query references."""
100
227
  with get_backend() as backend:
101
228
  results = backend.run_read(
102
229
  f"MATCH (t:{NodeLabel.TABLE}) WHERE NOT (t)<-[:{RelType.SELECTS_FROM}]-() "
103
- "RETURN t.qualified AS qualified LIMIT 100",
230
+ "RETURN DISTINCT t.qualified AS qualified LIMIT 100",
104
231
  {},
105
232
  )
233
+ if not raw:
234
+ from sqlcg.server.noise_filter import NoiseFilter
235
+
236
+ nf = NoiseFilter.from_config()
237
+ results = [r for r in results if not nf.is_noise(r["qualified"])]
106
238
  _print_table(results, ["qualified"])
107
239
 
108
240
 
241
+ def _bare_ref(ref: str) -> str:
242
+ """Strip schema prefix from a ref string, keeping table.column.
243
+
244
+ For a 3-part ref ("mart.fact_t.amount") this returns "fact_t.amount".
245
+ For a 2-part ref ("fact_t.amount") this returns the ref unchanged.
246
+ Never uses rsplit — that would yield only the column name for 3-part refs.
247
+ """
248
+ parts = ref.split(".")
249
+ if len(parts) >= 3:
250
+ return ".".join(parts[1:]) # drop schema, keep table.column
251
+ return ref # already bare (no schema prefix)
252
+
253
+
254
+ def _col_id_to_table(col_id: str) -> str:
255
+ """Extract the table-qualified part from a column ID (schema.table.col → schema.table).
256
+
257
+ Column IDs follow the format: schema.table.column or table.column.
258
+ The table part is everything except the last component.
259
+
260
+ Args:
261
+ col_id: A column ID string from the graph.
262
+
263
+ Returns:
264
+ The table-qualified portion (all but the last dotted component).
265
+ """
266
+ parts = col_id.rsplit(".", 1)
267
+ return parts[0] if len(parts) == 2 else col_id
268
+
269
+
270
+ def _filter_column_results(
271
+ results: list[dict],
272
+ nf: NoiseFilter, # type: ignore[name-defined]
273
+ ) -> list[dict]:
274
+ """Filter column-ID result rows by NoiseFilter, dropping rows whose table is noise."""
275
+ return [r for r in results if not nf.is_noise(_col_id_to_table(r["id"]))]
276
+
277
+
278
+ def _add_file_line_col(rows: list[dict]) -> list[dict]:
279
+ """Add a 'file:line' composite column from 'file' and 'line' fields.
280
+
281
+ Formats as 'path/to/file.sql:N' when both are present, or '?' when either
282
+ is absent (multi-hop upstream where file/line is not available).
283
+ """
284
+ result = []
285
+ for row in rows:
286
+ new_row = dict(row)
287
+ file = row.get("file")
288
+ line = row.get("line")
289
+ if file and line:
290
+ new_row["file:line"] = f"{file}:{line}"
291
+ else:
292
+ new_row["file:line"] = "?"
293
+ result.append(new_row)
294
+ return result
295
+
296
+
109
297
  def _print_table(rows: list[dict], columns: list[str]) -> None:
110
298
  """Print results as a Rich table."""
111
299
  if not rows:
sqlcg/cli/commands/db.py CHANGED
@@ -2,11 +2,13 @@
2
2
 
3
3
  import os
4
4
  import shutil
5
+ from pathlib import Path
5
6
 
6
7
  import typer
7
8
  from rich.console import Console
8
9
 
9
10
  from sqlcg.core.config import get_backend, get_db_path
11
+ from sqlcg.core.freshness import compute_freshness, render_freshness_line
10
12
  from sqlcg.core.schema import NodeLabel
11
13
  from sqlcg.utils.logging import getLogger
12
14
 
@@ -77,6 +79,21 @@ def db_info() -> None:
77
79
  version = backend.get_schema_version() or "unknown"
78
80
  console.print(f"Schema version: {version}")
79
81
 
82
+ # Freshness block — only shown when the DB has been indexed from a git repo
83
+ try:
84
+ indexed_sha = backend.get_indexed_sha()
85
+ repo_rows = backend.run_read("MATCH (r:Repo) RETURN r.path AS path LIMIT 1", {})
86
+ if repo_rows and indexed_sha is not None and repo_rows[0].get("path"):
87
+ repo_root = Path(repo_rows[0]["path"])
88
+ f = compute_freshness(repo_root, indexed_sha)
89
+ console.print(render_freshness_line(f))
90
+ except NotImplementedError:
91
+ # Neo4j backend raises NotImplementedError for get_indexed_sha — skip silently
92
+ pass
93
+ except Exception as e:
94
+ # Any unexpected error in the freshness block must not crash db info
95
+ logger.debug(f"Freshness check skipped: {e}")
96
+
80
97
  # Show node counts for all labels
81
98
  for label in NodeLabel:
82
99
  try:
@@ -14,27 +14,45 @@ console = Console()
14
14
  @app.command("table")
15
15
  def find_table( # noqa: B008
16
16
  name: str = typer.Argument(..., help="Table name to search for"), # noqa: B008
17
+ raw: bool = typer.Option(False, "--raw", help="Disable noise filtering on results"), # noqa: B008
17
18
  ) -> None:
18
19
  """Find a table by name."""
20
+ name = name.lower() # graph keys are lowercased at index time (C2 normalization)
19
21
  with get_backend() as backend:
20
22
  results = backend.run_read(
21
23
  f"MATCH (t:{NodeLabel.TABLE}) WHERE t.qualified CONTAINS $name "
22
24
  "RETURN t.qualified AS qualified, t.kind AS kind LIMIT 50",
23
25
  {"name": name},
24
26
  )
27
+ if not raw:
28
+ from sqlcg.server.noise_filter import NoiseFilter
29
+
30
+ nf = NoiseFilter.from_config() # repo_root=None → falls back to Path.cwd()
31
+ ids = [r["qualified"] for r in results]
32
+ kept, _ = nf.filter_nodes(ids)
33
+ kept_set = set(kept)
34
+ results = [r for r in results if r["qualified"] in kept_set]
25
35
  _print_table(results, ["qualified", "kind"])
26
36
 
27
37
 
28
38
  @app.command("column")
29
39
  def find_column( # noqa: B008
30
40
  ref: str = typer.Argument(..., help="Column reference (table.column)"), # noqa: B008
41
+ raw: bool = typer.Option(False, "--raw", help="Disable noise filtering on results"), # noqa: B008
31
42
  ) -> None:
32
43
  """Find a column by table.column reference."""
44
+ ref = ref.lower() # graph keys are lowercased at index time (C2 normalization)
33
45
  with get_backend() as backend:
34
46
  results = backend.run_read(
35
47
  f"MATCH (c:{NodeLabel.COLUMN}) WHERE c.id CONTAINS $ref RETURN c.id AS id LIMIT 50",
36
48
  {"ref": ref},
37
49
  )
50
+ if not raw:
51
+ from sqlcg.server.noise_filter import NoiseFilter
52
+
53
+ nf = NoiseFilter.from_config() # repo_root=None → falls back to Path.cwd()
54
+ # Filter on the schema.table portion of each column id (schema.table.column)
55
+ results = [r for r in results if not nf.is_noise(r["id"].rsplit(".", 1)[0])]
38
56
  _print_table(results, ["id"])
39
57
 
40
58
 
sqlcg/cli/commands/git.py CHANGED
@@ -1,5 +1,7 @@
1
1
  """Git integration commands for sqlcg."""
2
2
 
3
+ import shutil
4
+ import sys
3
5
  from pathlib import Path
4
6
  from typing import NamedTuple
5
7
 
@@ -14,36 +16,79 @@ app = typer.Typer(name="git", help="Git integration commands")
14
16
  class _HookSpec(NamedTuple):
15
17
  filename: str
16
18
  sentinel: str
17
- script: str
19
+ script_template: str
18
20
 
19
21
 
22
+ # Hook script templates — use {sqlcg_bin} as the placeholder for the resolved binary.
23
+ # The sentinel comments (e.g. "# sqlcg post-checkout hook") must stay byte-for-byte
24
+ # unchanged so R9 idempotency is preserved: _install_single_hook matches them verbatim.
20
25
  _HOOKS: list[_HookSpec] = [
21
26
  _HookSpec(
22
27
  filename="post-checkout",
23
28
  sentinel="# sqlcg post-checkout hook",
24
- script=(
29
+ script_template=(
25
30
  "#!/bin/sh\n"
26
31
  "# sqlcg post-checkout hook — incremental resync after branch switch\n"
27
32
  "# $3 == 1 means branch checkout (not file checkout); skip file checkouts\n"
28
33
  '[ "$3" = "1" ] || exit 0\n'
29
- 'sqlcg reindex --from "$1" --to "$2"'
30
- ' "$(git rev-parse --show-toplevel)" --dialect auto --quiet || true\n'
34
+ '{sqlcg_bin} reindex --from "$1" --to "$2"'
35
+ ' "$(git rev-parse --show-toplevel)" --dialect auto --quiet --notify'
36
+ ' || echo "sqlcg: graph not updated (server busy/locked)'
37
+ " -- run 'sqlcg mcp status'\" >&2\n"
31
38
  ),
32
39
  ),
33
40
  _HookSpec(
34
41
  filename="post-merge",
35
42
  sentinel="# sqlcg post-merge hook",
36
- script="""\
43
+ script_template="""\
37
44
  #!/bin/sh
38
45
  # sqlcg post-merge hook — incremental resync after pull/merge
39
- # post-merge receives only $1 (squash flag), no old/new SHA; use stored-SHA delta
40
- sqlcg reindex "$(git rev-parse --show-toplevel)" --dialect auto --quiet || true
46
+ # git sets ORIG_HEAD to the pre-merge HEAD; pass it as --from so --notify can route
47
+ # through a running server (same path as post-checkout). If ORIG_HEAD is unset (e.g.
48
+ # first-ever merge / gc'd), fall back to the standalone stored-SHA delta (direct write).
49
+ PREV=$(git rev-parse --verify --quiet ORIG_HEAD)
50
+ TOP=$(git rev-parse --show-toplevel)
51
+ if [ -n "$PREV" ]; then
52
+ {sqlcg_bin} reindex --from "$PREV" --to HEAD "$TOP" --dialect auto --quiet --notify \\
53
+ || echo "sqlcg: graph not updated (server busy/locked) -- run 'sqlcg mcp status'" >&2
54
+ else
55
+ {sqlcg_bin} reindex "$TOP" --dialect auto --quiet --notify \\
56
+ || echo "sqlcg: graph not updated (server busy/locked) -- run 'sqlcg mcp status'" >&2
57
+ fi
41
58
  """,
42
59
  ),
43
60
  ]
44
61
 
45
62
 
46
- def _install_single_hook(hooks_dir: Path, spec: _HookSpec) -> None:
63
+ def _resolve_sqlcg_bin() -> str:
64
+ """Resolve the absolute path of the installing sqlcg binary.
65
+
66
+ Resolution order:
67
+ 1. shutil.which("sqlcg") — the binary on the installer's $PATH.
68
+ 2. sys.argv[0] resolved via Path(...).resolve() if it ends in "sqlcg" and is executable.
69
+ 3. Bare "sqlcg" fallback (current behaviour) — prints a warning so the user knows.
70
+
71
+ Returns the resolved path string (absolute when resolvable, bare "sqlcg" otherwise).
72
+ """
73
+ # 1. Try $PATH first — the binary the user means
74
+ which_result = shutil.which("sqlcg")
75
+ if which_result:
76
+ return which_result
77
+
78
+ # 2. Try sys.argv[0] for python -m / editable-install invocations
79
+ argv0 = Path(sys.argv[0]).resolve()
80
+ if argv0.name == "sqlcg" and argv0.is_file() and argv0.stat().st_mode & 0o111:
81
+ return str(argv0)
82
+
83
+ # 3. Bare fallback — still functional but relies on $PATH at hook-run time
84
+ console.print(
85
+ "[yellow]Warning: could not resolve the sqlcg binary path; the generated hooks "
86
+ "will use bare 'sqlcg' and rely on $PATH at hook-run time.[/yellow]"
87
+ )
88
+ return "sqlcg"
89
+
90
+
91
+ def _install_single_hook(hooks_dir: Path, spec: _HookSpec, sqlcg_bin: str) -> None:
47
92
  """Install one git hook idempotently.
48
93
 
49
94
  If the hook file already contains the sentinel, it is already installed — skip silently.
@@ -51,6 +96,7 @@ def _install_single_hook(hooks_dir: Path, spec: _HookSpec) -> None:
51
96
  Otherwise, write the hook file and set 0o755.
52
97
  """
53
98
  hook_path = hooks_dir / spec.filename
99
+ script = spec.script_template.format(sqlcg_bin=sqlcg_bin)
54
100
 
55
101
  if hook_path.exists():
56
102
  existing_content = hook_path.read_text()
@@ -68,10 +114,10 @@ def _install_single_hook(hooks_dir: Path, spec: _HookSpec) -> None:
68
114
  f".git/hooks/{spec.filename}:[/yellow]"
69
115
  )
70
116
  console.print("")
71
- console.print("[cyan]" + spec.script.rstrip() + "[/cyan]")
117
+ console.print("[cyan]" + script.rstrip() + "[/cyan]")
72
118
  return
73
119
 
74
- hook_path.write_text(spec.script)
120
+ hook_path.write_text(script)
75
121
  hook_path.chmod(0o755)
76
122
  console.print(f"[green]Installed git hook:[/green] .git/hooks/{spec.filename}")
77
123
 
@@ -87,6 +133,8 @@ def install_hooks(
87
133
  Writes a post-checkout hook that triggers incremental resync after branch switches
88
134
  and a post-merge hook that triggers resync after pulls/merges.
89
135
  Idempotent: running multiple times produces one hook entry per hook.
136
+ The hooks embed the absolute path of the installing sqlcg binary so version skew
137
+ between the installed binary and the hook command is avoided.
90
138
  """
91
139
  if repo is None:
92
140
  repo = Path.cwd()
@@ -100,5 +148,7 @@ def install_hooks(
100
148
 
101
149
  hooks_dir.mkdir(parents=True, exist_ok=True)
102
150
 
151
+ sqlcg_bin = _resolve_sqlcg_bin()
152
+
103
153
  for spec in _HOOKS:
104
- _install_single_hook(hooks_dir, spec)
154
+ _install_single_hook(hooks_dir, spec, sqlcg_bin)