sql-code-graph 1.0.2__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sql-code-graph
3
- Version: 1.0.2
3
+ Version: 1.1.0
4
4
  Summary: SQL code graph analyzer and lineage tracer
5
5
  Project-URL: Homepage, https://github.com/Warhorze/sql-code-graph
6
6
  Project-URL: Repository, https://github.com/Warhorze/sql-code-graph
@@ -219,7 +219,7 @@ After indexing, `sqlcg db info` shows non-zero `STAR_EXPANSION lineage edges`, a
219
219
  | **Search & meta** | |
220
220
  | `search_sql_pattern(query)` | Full-text search across indexed SQL |
221
221
  | `list_dialects_and_repos()` | List indexed repos and dialects (catalogue) |
222
- | `db_info()` | Graph health, node counts, parse quality breakdown, warnings |
222
+ | `db_info()` | Graph health, node counts, parse quality breakdown, warnings, freshness (indexed SHA vs HEAD) |
223
223
  | `execute_cypher(query)` | Raw Cypher query against the graph |
224
224
  | `submit_feedback(...)` | Report a false positive/negative to improve metrics |
225
225
 
@@ -228,6 +228,12 @@ After indexing, `sqlcg db info` shows non-zero `STAR_EXPANSION lineage edges`, a
228
228
  > `table.column`. Each returned node carries both `name` (the bare column) and
229
229
  > `table` (the owning `schema.table`), so results are navigable without a second lookup.
230
230
 
231
+ > **Provenance fields**: lineage edges now carry `file`, `line`, and `expression`
232
+ > (where the lineage was derived from), a `confidence` of `1.0` for plainly-parsed
233
+ > facts (lower for inferred edges, with a `reason`), and a `table_kind`
234
+ > (`table` / `cte` / `derived` / `external`) so CTE and derived aliases are
235
+ > distinguishable from real tables.
236
+
231
237
  > **LLM agent tip**: call `db_info()` before lineage queries to check that
232
238
  > `SqlColumn > 0` and `warnings` is empty. If `parse_quality["scripting_block"]`
233
239
  > is high, column lineage will be limited for those files — use table-level tools
@@ -243,16 +249,21 @@ sqlcg db init # initialise graph database
243
249
  sqlcg index <path> --dialect snowflake # index SQL files (snowflake is the tested dialect)
244
250
  sqlcg index <path> --dialect auto # read dialect from .sqlcg.toml
245
251
  sqlcg index <path> --profile # index + print per-stage timing and slowest files
252
+ sqlcg index <path> --include-working-tree # also index uncommitted changes (marks graph dirty)
246
253
  sqlcg reindex <path> --from <sha> --to <sha> # incremental resync of only changed files
247
254
  sqlcg analyze unused # tables with no query references
248
255
  sqlcg analyze upstream/downstream # trace lineage from the CLI
249
256
  sqlcg find table/column/pattern # search the graph
250
257
  sqlcg watch <path> # watch for file changes
258
+ sqlcg db info # graph stats + freshness (indexed SHA vs HEAD)
251
259
  sqlcg git install-hooks # install post-checkout + post-merge resync hooks
252
260
  sqlcg gain # show usage metrics
253
261
  sqlcg report # generate FP/error report
254
262
  sqlcg mcp best-practices # print the fact/heuristic boundary for the MCP tools
255
263
  sqlcg mcp start # start MCP server manually
264
+ sqlcg mcp status # server status JSON (via control socket)
265
+ sqlcg mcp stop # stop the running MCP server gracefully
266
+ sqlcg mcp restart # stop the server (client must respawn it)
256
267
  sqlcg version # show installed version
257
268
  ```
258
269
 
@@ -1,37 +1,38 @@
1
- sqlcg/__init__.py,sha256=hGOhwTAVTaRm7PjbaSQVCLvnF7rOGZZNdMqv0IoQdYg,115
1
+ sqlcg/__init__.py,sha256=CWoJX8Awg5Tf6p2E5lT66EFE8kd-Aru8aujKizglgdo,115
2
2
  sqlcg/__main__.py,sha256=1YoFLcqEgTwYq1J3TbUwpkdG0zeeLIf2fJvwWI-CLFU,109
3
3
  sqlcg/cli/__init__.py,sha256=W8fD0LpMq2xm_5WKGNMvJh2WBL1ho5E8hUeAqXQYT1g,28
4
4
  sqlcg/cli/main.py,sha256=WmdTjsOlz1ozi2Y3Aq4ezR_FCRl-Lc1YOKw3_d48dlY,1650
5
5
  sqlcg/cli/commands/__init__.py,sha256=oSHtr6VD-jNubOjuCQyZj2tBppjMEpQDh-IGQ8of9eA,30
6
- sqlcg/cli/commands/analyze.py,sha256=PFQD29_VAtJ-wghYLsHRINp8VlnOVl1WlOdbAdcWz1E,8091
7
- sqlcg/cli/commands/db.py,sha256=Yd4ZDz1BFwjO4Lyt3NefQnowkjdUxFDFmsPykBVH2Pk,6518
8
- sqlcg/cli/commands/find.py,sha256=P2OFI0O_-F4W5-oy5KObXUHI7gNTkJRtDSZ59xTKE9Y,2672
6
+ sqlcg/cli/commands/analyze.py,sha256=qtvM_TeqYzaLClZksM_o5hAdksZ9sqLM9HGDtLDrXwY,12646
7
+ sqlcg/cli/commands/db.py,sha256=Q3VEdNJzhrs26KtskI5j9B3C0vBTZe4VN2sZXZG_6BY,7434
8
+ sqlcg/cli/commands/find.py,sha256=5MbGavA-QS75zwm35dYK-0H1bJ1Zd_gJHgQ_lXnpMDU,3126
9
9
  sqlcg/cli/commands/gain.py,sha256=bOvia7CVla_fESrDEdftYze8Mm0xDio3SpCzIyoXg7A,8925
10
- sqlcg/cli/commands/git.py,sha256=96hmWYd861FC8RZqPQ_eBG8yLXSXaB9SLxmuwx00nWU,3347
11
- sqlcg/cli/commands/index.py,sha256=Sgrg5MaQWfQzbX3e3Wcsfd8BEWDGuBm5l5vynpJsRzA,9801
10
+ sqlcg/cli/commands/git.py,sha256=yMgWOuoTCTBr2P1QgmghRi5ikmUYHuxDUVyBDYerErw,5728
11
+ sqlcg/cli/commands/index.py,sha256=xMnxKDiUt5LH_3lKAotoRctL4VSOvcw7Gq--idLPtm0,11091
12
12
  sqlcg/cli/commands/install.py,sha256=KNABvrLbamPyYnmnVdCaM_MNezbDc-pr6IkignCWI8k,9186
13
- sqlcg/cli/commands/mcp.py,sha256=cfi7D-RgEPUKdfUbsJC2iKImKOnHQvWxCLfwYIPdhdE,2174
14
- sqlcg/cli/commands/reindex.py,sha256=J9gpaxSzJ1mTdOJWh7WSLskbRF9f_2EMWnUFF4VOtVU,6387
13
+ sqlcg/cli/commands/mcp.py,sha256=2gDsNvtj1Ql7PkjX9dHWAzOK0uCPRR5DGdBAzJa8PIU,6005
14
+ sqlcg/cli/commands/reindex.py,sha256=n1mQTYAZshtCKPgpR12S6ZMCqO3cSUtpCXjzb1PuZxU,11857
15
15
  sqlcg/cli/commands/report.py,sha256=JU0qjyMxwOukE7bN3XvvIzOI7zMg_Gsnvk_8F6pKNpA,4915
16
16
  sqlcg/cli/commands/uninstall.py,sha256=IYwQaqnMmmzW0Nlls40wD-L3tVkMgKIMRXUkcXPMUc4,9398
17
17
  sqlcg/cli/commands/watch.py,sha256=7N6c-QuvxAEGHzDZ0C3CU2BkHSraZW9YtgoFnz7SaQo,2373
18
18
  sqlcg/core/__init__.py,sha256=uNsJCrCMVWVT80sHPtI_f39BYqIf5N0i6LSq8x8HsyI,283
19
- sqlcg/core/config.py,sha256=YCq4OayvBSNXsYtOh3yZ-W6fyJBLwYunORDo2TPCU9s,10179
20
- sqlcg/core/graph_db.py,sha256=gFiHjfVeRHp2FS3yRThDgCWFkugOQD065IvEqN6apg4,7881
19
+ sqlcg/core/config.py,sha256=8QtFNRnrzLK1Zw93AKX37h6bSASDLv-42FzDQ7zxTtI,13079
20
+ sqlcg/core/freshness.py,sha256=gRb8pRPw5SdIUxAYkMXIJ00DTdQ6CegRZPAvWnv0rU0,4575
21
+ sqlcg/core/graph_db.py,sha256=Aa85wPFg26H-Ud9SrZyxCHH-99iitAI5S3X9T_62Yyw,7957
21
22
  sqlcg/core/jobs.py,sha256=Je-fCdSKRgiSsv1W8SgNAlp36a7t7-pJZ-qKPbka9OE,3298
22
23
  sqlcg/core/kuzu_backend.py,sha256=ziHt-AB9sEZY7qB8whseWFicbTfOZaNOxcNVKhjii5Y,16587
23
24
  sqlcg/core/neo4j_backend.py,sha256=AM1TncP9GBGph-rSHwalZPmGUV2kFILzaJP-PSB0UYw,8437
24
- sqlcg/core/queries.cypher,sha256=auWIPJeVjgykk6wqTRMoNQCwRhzG2ZhF4MRufso2KYA,4182
25
- sqlcg/core/queries.py,sha256=XBdQTBSsX3WUqO3AdX5EWYH435GDrbwEg1BR9AvJSSo,1880
26
- sqlcg/core/schema.cypher,sha256=UWYsPMRgkn6HOlPZ3rl6BfY5hzKQKP5RGPaZg4NTZFY,2515
27
- sqlcg/core/schema.py,sha256=9jBgJwuvfjLq2xC5B0NUyZZYxhqTb0LO0YzxcPM-gVM,1301
25
+ sqlcg/core/queries.cypher,sha256=91Pb10-ekSi0812wuHJTdXcMY4sT53_5o-oHhfSP_DQ,4967
26
+ sqlcg/core/queries.py,sha256=JLgV4MIgP7KVIQ0xpGj3_-MBhBfY_9XPoCdcI2mO-TM,2148
27
+ sqlcg/core/schema.cypher,sha256=rK5QMhSrzZhuj73NeNXGX6oM-rPPPvxFjex0fEyUvkQ,2859
28
+ sqlcg/core/schema.py,sha256=JO5rkspYKjL9AEl5mt0VIJKn-IPOH3kJV_fVmAMuFCI,1467
28
29
  sqlcg/indexer/__init__.py,sha256=Wh20Unz2OHs1oIyWLrpurPAasF0BET2g4iXtNk7mh2U,56
29
30
  sqlcg/indexer/dbt_adapter.py,sha256=EB5x1WU5Z9d-I97ADDj88S_hG1C4z4nbrv8JUCzXfy8,686
30
31
  sqlcg/indexer/error_classify.py,sha256=MYjPVprwT-ARPjBCyCzu2F9DSrZfnTVtVIoBgm8s4H8,5329
31
32
  sqlcg/indexer/git_delta.py,sha256=P-QM4vnVURT2KLiE6u3cQynRUF-mTH13cbB4I20YHPQ,4468
32
- sqlcg/indexer/indexer.py,sha256=0B0BCUaLPdV9XtlCzhqR3hwHyD3w83o-tYG7yNr18Yo,50507
33
+ sqlcg/indexer/indexer.py,sha256=DYdUr59hRKCjJTRiQUWOC72JUQ9TgBrH0W4UOYNwqx8,60913
33
34
  sqlcg/indexer/pool.py,sha256=BTYx-pBe6zwUG89MHh0X7nzGNVlsHN-GjovYKanVI1s,18553
34
- sqlcg/indexer/walker.py,sha256=C__JuDcTzKxFqVjGFRr5cj9hgxvf8zffTz-0HMn1qTY,1746
35
+ sqlcg/indexer/walker.py,sha256=umNaqDbuerr75VYG1TEOv0ATsbI40O3SIw35f7XJcDE,1931
35
36
  sqlcg/indexer/watcher.py,sha256=mJQq1LASRLKKwhz0WhCUWPLLqyPR2_-FD_8efYU6gE8,8442
36
37
  sqlcg/lineage/__init__.py,sha256=Da1DlYwtK13WHv_RnHjAtNkHTOuFbhxqCjT1Le7DsWM,46
37
38
  sqlcg/lineage/aggregator.py,sha256=G1xsTjf981EVSgN1yIHcC_ecDvcTcSPvEp6Kb2HPXkY,4943
@@ -39,25 +40,26 @@ sqlcg/lineage/schema_resolver.py,sha256=iXt6LYF6UVWsGUpcfbmjmGn9wCgXl721lTGf_8Aa
39
40
  sqlcg/metrics/__init__.py,sha256=hLJ6wm4St8qqYwKh3o9QG7lcEt1BEYM31ccqO9tGpIg,133
40
41
  sqlcg/metrics/store.py,sha256=BaMf7QYTmYMlX_Jzi1GNU8R2sMVkWdn07f-ZSndtcNk,8879
41
42
  sqlcg/parsers/__init__.py,sha256=AamA8wBbDZV9_zEtZCI4Hyen5UAVKHmBwjTghTt2PZE,785
42
- sqlcg/parsers/ansi_parser.py,sha256=KruZn5CYjpktKmMRVWackshRI_AR6ehc-ReCsDeWNkQ,14321
43
- sqlcg/parsers/base.py,sha256=cSHlXwiSNu77TZI6_p1nRevbRTcBc1t5v8N_aKR7uB4,49117
43
+ sqlcg/parsers/ansi_parser.py,sha256=tu1MWWaSYmpefKjgk2PPyGStIFjV47Z_1WjyBh5Zi2c,17180
44
+ sqlcg/parsers/base.py,sha256=uL0W22zpbIz_9eq-i-4LSlonxy2J1yChuISMLSYgvRU,49345
44
45
  sqlcg/parsers/bigquery_parser.py,sha256=mOnWTfXB_Dp4JwFE1PVYOB6CDPf5nYE0Dea8kJCl9uQ,2827
45
46
  sqlcg/parsers/postgres_parser.py,sha256=lYfUpQY6j4Qm7ndXBtXbgPoGzYqYddWt5YeFnWKdA6I,946
46
47
  sqlcg/parsers/registry.py,sha256=LXy1F6rqQI6VdxpRvZg_tNpoEucW3mXZHYBMlMONbX4,1496
47
- sqlcg/parsers/snowflake_parser.py,sha256=Xc80vlhKiJqbt4cT7UcpYKcYzV9rSqFyG0d_oTc-eJE,12627
48
+ sqlcg/parsers/snowflake_parser.py,sha256=fovMyqfhWD2wmtEyiwTC0aoP4QWP-3XQZ8WYkXvs9hg,15511
48
49
  sqlcg/parsers/tsql_parser.py,sha256=RRj1pACtAk2tLTDaFWRYF67a0IDvaf5A1YQXWIz0bpQ,956
49
50
  sqlcg/server/__init__.py,sha256=n4wuNE7xyJIJxJZBtmtdccCMQfvTdF-IqIaZVbC4FC4,35
51
+ sqlcg/server/control.py,sha256=v-r21npODiHlHnJHuo_6KWrKclQKq_E1QyrzIWjqgtY,4508
50
52
  sqlcg/server/exceptions.py,sha256=EONw34icOByCTpppSQrvQBW6asc4hfqaGDCAFjv96II,469
51
- sqlcg/server/models.py,sha256=dv4SM_o-aY8kUFIbCtj0l8ceMsfyvQtXCWPm4Ek_-14,16432
53
+ sqlcg/server/models.py,sha256=l7ORy6sbtzBW1y3qVaeLwEukbyAgBkz9S5VIm2q4b24,19378
52
54
  sqlcg/server/noise_filter.py,sha256=idSBGgdKWWccJdpOo9qgbM2350Oew-2l5W6Yc9GYQqY,6337
53
- sqlcg/server/server.py,sha256=mDAW_Zmk3Sp2sApw3Gw3veCqJe7waw-sioQyKZBn9ng,3774
54
- sqlcg/server/skill.py,sha256=siAtrRdFHQnASe9nl33MvkTXXt9EgCB8id5i9AUq4XU,10718
55
- sqlcg/server/tools.py,sha256=mSoYZRI7F5ZmdTcG-BnY6ULzrz3Y7qIFe3cHTVWVyMs,57785
55
+ sqlcg/server/server.py,sha256=gzeO5WbSNfGxgIKte01uy0VjO1_basI2ChSuAwr0dBc,14844
56
+ sqlcg/server/skill.py,sha256=GE8eeimk6yiGGJ74erGypqYAviur5peSR6_2a4QQWVM,12828
57
+ sqlcg/server/tools.py,sha256=JvijDC0h5uHjZyZUIZq9sztNG3W5sr-Yy5rHwOVuJec,66642
56
58
  sqlcg/utils/__init__.py,sha256=--iqt5ThTXmT8Wz7da8hs3n0zDfYPl8P-z5OgRJ_77E,154
57
59
  sqlcg/utils/hashing.py,sha256=H25-sYfxHKb3_IERFnHyAIYNiXN470Oqo5sJT_D3YOA,438
58
60
  sqlcg/utils/ignore.py,sha256=wJjwa0mjnQ_xJExOUxk25y00g065XmmzJapqV3ifD5o,1151
59
61
  sqlcg/utils/logging.py,sha256=u0fCmYsLj9o81vawm3xZTHaw68GQYVm7JxG-gP81u8A,840
60
- sql_code_graph-1.0.2.dist-info/METADATA,sha256=aikAv-KoUOGfgYo3-htWLyq61x1PE6bC1Onn_TNAuvE,12806
61
- sql_code_graph-1.0.2.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
62
- sql_code_graph-1.0.2.dist-info/entry_points.txt,sha256=Wfe49sVzV9p4eVFGo5RxcV-frr3HOP0yzzst8JBxQLQ,46
63
- sql_code_graph-1.0.2.dist-info/RECORD,,
62
+ sql_code_graph-1.1.0.dist-info/METADATA,sha256=blW1eYNjfy6P61747uUtc22qm5MDETMcVYImXPa762g,13615
63
+ sql_code_graph-1.1.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
64
+ sql_code_graph-1.1.0.dist-info/entry_points.txt,sha256=Wfe49sVzV9p4eVFGo5RxcV-frr3HOP0yzzst8JBxQLQ,46
65
+ sql_code_graph-1.1.0.dist-info/RECORD,,
sqlcg/__init__.py CHANGED
@@ -1,5 +1,5 @@
1
1
  """SQL Code Graph - SQL lineage and dependency analysis tool."""
2
2
 
3
- __version__ = "1.0.2"
3
+ __version__ = "1.1.0"
4
4
 
5
5
  __all__ = ["__version__"]
@@ -9,6 +9,7 @@ from rich.console import Console
9
9
  from rich.table import Table
10
10
 
11
11
  from sqlcg.core.config import get_backend
12
+ from sqlcg.core.queries import GET_TABLE_EXTERNAL_CONSUMERS_QUERY
12
13
  from sqlcg.core.schema import NodeLabel, RelType
13
14
 
14
15
  if TYPE_CHECKING:
@@ -23,6 +24,9 @@ def upstream( # noqa: B008
23
24
  ref: str = typer.Argument(..., help="Column reference"), # noqa: B008
24
25
  depth: int = typer.Option(5, "--depth", help="Maximum traversal depth"), # noqa: B008
25
26
  raw: bool = typer.Option(False, "--raw", help="Disable noise filtering on results"), # noqa: B008
27
+ include_intermediate: bool = typer.Option( # noqa: B008
28
+ False, "--include-intermediate", help="Include CTE/derived intermediate nodes"
29
+ ),
26
30
  ) -> None:
27
31
  """Trace upstream column lineage."""
28
32
  # Bounds check for depth to prevent performance DoS
@@ -30,19 +34,33 @@ def upstream( # noqa: B008
30
34
  console.print("[red]Error: --depth must be between 1 and 100[/red]")
31
35
  raise typer.Exit(1)
32
36
 
37
+ # By default, filter out CTE/derived intermediate nodes; --include-intermediate restores them
38
+ kind_filter = (
39
+ ""
40
+ if include_intermediate
41
+ else "MATCH (t:SqlTable {qualified: src.table_qualified}) "
42
+ "WHERE t.kind IN ['table', 'external'] "
43
+ )
44
+
33
45
  with get_backend() as backend:
34
46
  results = backend.run_read(
35
- f"MATCH p=(c:{NodeLabel.COLUMN} {{id: $ref}})"
36
- f"<-[:{RelType.COLUMN_LINEAGE}*1..{depth}]-(src) "
37
- "RETURN src.id AS id LIMIT 100",
47
+ f"MATCH (c:{NodeLabel.COLUMN} {{id: $ref}})"
48
+ f"<-[:{RelType.COLUMN_LINEAGE}*1..{depth}]-(src:{NodeLabel.COLUMN}) "
49
+ f"{kind_filter}"
50
+ f"OPTIONAL MATCH (src)-[direct:{RelType.COLUMN_LINEAGE}]->(c) "
51
+ "OPTIONAL MATCH (q:SqlQuery {id: direct.query_id}) "
52
+ "RETURN src.id AS id, q.file_path AS file, q.start_line AS line LIMIT 100",
38
53
  {"ref": ref},
39
54
  )
40
55
  if not results and len(ref.split(".")) >= 3:
41
56
  bare = _bare_ref(ref)
42
57
  fallback_results = backend.run_read(
43
- f"MATCH p=(c:{NodeLabel.COLUMN} {{id: $bare}})"
44
- f"<-[:{RelType.COLUMN_LINEAGE}*1..{depth}]-(src) "
45
- "RETURN src.id AS id LIMIT 100",
58
+ f"MATCH (c:{NodeLabel.COLUMN} {{id: $bare}})"
59
+ f"<-[:{RelType.COLUMN_LINEAGE}*1..{depth}]-(src:{NodeLabel.COLUMN}) "
60
+ f"{kind_filter}"
61
+ f"OPTIONAL MATCH (src)-[direct:{RelType.COLUMN_LINEAGE}]->(c) "
62
+ "OPTIONAL MATCH (q:SqlQuery {id: direct.query_id}) "
63
+ "RETURN src.id AS id, q.file_path AS file, q.start_line AS line LIMIT 100",
46
64
  {"bare": bare},
47
65
  )
48
66
  if fallback_results:
@@ -59,7 +77,7 @@ def upstream( # noqa: B008
59
77
 
60
78
  nf = NoiseFilter.from_config() # repo_root=None → falls back to Path.cwd()
61
79
  results = _filter_column_results(results, nf)
62
- _print_table(results, ["id"])
80
+ _print_table(_add_file_line_col(results), ["id", "file:line"])
63
81
 
64
82
 
65
83
  @app.command("downstream")
@@ -67,6 +85,9 @@ def downstream( # noqa: B008
67
85
  ref: str = typer.Argument(..., help="Column reference"), # noqa: B008
68
86
  depth: int = typer.Option(5, "--depth", help="Maximum traversal depth"), # noqa: B008
69
87
  raw: bool = typer.Option(False, "--raw", help="Disable noise filtering on results"), # noqa: B008
88
+ include_intermediate: bool = typer.Option( # noqa: B008
89
+ False, "--include-intermediate", help="Include CTE/derived intermediate nodes"
90
+ ),
70
91
  ) -> None:
71
92
  """Trace downstream column lineage."""
72
93
  # Bounds check for depth to prevent performance DoS
@@ -74,19 +95,33 @@ def downstream( # noqa: B008
74
95
  console.print("[red]Error: --depth must be between 1 and 100[/red]")
75
96
  raise typer.Exit(1)
76
97
 
98
+ # By default, filter out CTE/derived intermediate nodes; --include-intermediate restores them
99
+ kind_filter = (
100
+ ""
101
+ if include_intermediate
102
+ else "MATCH (t:SqlTable {qualified: dst.table_qualified}) "
103
+ "WHERE t.kind IN ['table', 'external'] "
104
+ )
105
+
77
106
  with get_backend() as backend:
78
107
  results = backend.run_read(
79
- f"MATCH p=(c:{NodeLabel.COLUMN} {{id: $ref}})"
80
- f"-[:{RelType.COLUMN_LINEAGE}*1..{depth}]->(dst) "
81
- "RETURN dst.id AS id LIMIT 100",
108
+ f"MATCH (c:{NodeLabel.COLUMN} {{id: $ref}})"
109
+ f"-[:{RelType.COLUMN_LINEAGE}*1..{depth}]->(dst:{NodeLabel.COLUMN}) "
110
+ f"{kind_filter}"
111
+ f"OPTIONAL MATCH (c)-[direct:{RelType.COLUMN_LINEAGE}]->(dst) "
112
+ "OPTIONAL MATCH (q:SqlQuery {id: direct.query_id}) "
113
+ "RETURN dst.id AS id, q.file_path AS file, q.start_line AS line LIMIT 100",
82
114
  {"ref": ref},
83
115
  )
84
116
  if not results and len(ref.split(".")) >= 3:
85
117
  bare = _bare_ref(ref)
86
118
  fallback_results = backend.run_read(
87
- f"MATCH p=(c:{NodeLabel.COLUMN} {{id: $bare}})"
88
- f"-[:{RelType.COLUMN_LINEAGE}*1..{depth}]->(dst) "
89
- "RETURN dst.id AS id LIMIT 100",
119
+ f"MATCH (c:{NodeLabel.COLUMN} {{id: $bare}})"
120
+ f"-[:{RelType.COLUMN_LINEAGE}*1..{depth}]->(dst:{NodeLabel.COLUMN}) "
121
+ f"{kind_filter}"
122
+ f"OPTIONAL MATCH (c)-[direct:{RelType.COLUMN_LINEAGE}]->(dst) "
123
+ "OPTIONAL MATCH (q:SqlQuery {id: direct.query_id}) "
124
+ "RETURN dst.id AS id, q.file_path AS file, q.start_line AS line LIMIT 100",
90
125
  {"bare": bare},
91
126
  )
92
127
  if fallback_results:
@@ -103,33 +138,71 @@ def downstream( # noqa: B008
103
138
 
104
139
  nf = NoiseFilter.from_config() # repo_root=None → falls back to Path.cwd()
105
140
  results = _filter_column_results(results, nf)
106
- _print_table(results, ["id"])
141
+ _print_table(_add_file_line_col(results), ["id", "file:line"])
142
+
143
+ # Append external consumer rows for terminal tables (scalar query, one per terminal).
144
+ # Resolve terminal tables from the column results; fall back to the root column's table.
145
+ terminal_tables: set[str] = set()
146
+ for r in results:
147
+ tbl = _col_id_to_table(r["id"])
148
+ if tbl:
149
+ terminal_tables.add(tbl)
150
+ # Also check the root column's table (in case no downstream columns were found).
151
+ root_parts = ref.rsplit(".", 1)
152
+ if len(root_parts) == 2:
153
+ terminal_tables.add(root_parts[0])
154
+ consumer_rows: list[dict] = []
155
+ for tbl in sorted(terminal_tables):
156
+ rows_ec = backend.run_read(
157
+ GET_TABLE_EXTERNAL_CONSUMERS_QUERY,
158
+ {"table_qualified": tbl},
159
+ )
160
+ for ec in rows_ec:
161
+ consumer_rows.append(
162
+ {"id": f"[external] {ec['name']} ({ec['consumer_type']})", "file:line": ""}
163
+ )
164
+ if consumer_rows:
165
+ _print_table(consumer_rows, ["id", "file:line"])
107
166
 
108
167
 
109
168
  @app.command("impact")
110
169
  def impact( # noqa: B008
111
170
  table: str = typer.Argument(..., help="Table name to analyze"), # noqa: B008
171
+ raw: bool = typer.Option(False, "--raw", help="Disable noise filtering on results"), # noqa: B008
112
172
  ) -> None:
113
173
  """Show all queries impacted by a table."""
114
174
  with get_backend() as backend:
115
175
  results = backend.run_read(
116
176
  f"MATCH (t:{NodeLabel.TABLE} {{qualified: $t}})"
117
177
  f"<-[:{RelType.SELECTS_FROM}]-(q:{NodeLabel.QUERY}) "
118
- "RETURN q.id AS id, q.kind AS kind LIMIT 100",
178
+ "RETURN DISTINCT q.id AS id, q.kind AS kind, q.target_table AS target LIMIT 100",
119
179
  {"t": table},
120
180
  )
181
+ if not raw:
182
+ from sqlcg.server.noise_filter import NoiseFilter
183
+
184
+ nf = NoiseFilter.from_config()
185
+ results = [r for r in results if not nf.is_noise(r.get("target", ""))]
121
186
  _print_table(results, ["id", "kind"])
122
187
 
123
188
 
124
189
  @app.command("failures")
125
190
  def failures(
126
191
  cause: str | None = typer.Option( # noqa: B008
127
- None, "--cause", help="Filter by E-code bucket (e.g. E5, timeout)"
192
+ None,
193
+ "--cause",
194
+ help=(
195
+ "Filter by E-code bucket. Valid values: "
196
+ "timeout, E8, E3, E2, E5, E1, qualify_failed, func_fallback, pure_ddl_skip"
197
+ ),
128
198
  ),
129
199
  limit: int = typer.Option(100, "--limit", help="Maximum rows to return"), # noqa: B008
130
200
  ) -> None:
131
201
  """List files that failed to parse, with their dominant cause (E-code bucket).
132
202
 
203
+ Valid --cause buckets (from highest to lowest severity):
204
+ timeout, E8, E3, E2, E5, E1, qualify_failed, func_fallback, pure_ddl_skip.
205
+
133
206
  Requires a graph indexed with sqlcg >= v3 (schema version 3). Re-index
134
207
  with 'sqlcg db reset && sqlcg index <path>' if the graph was built with
135
208
  an earlier version.
@@ -148,14 +221,20 @@ def failures(
148
221
  @app.command("unused")
149
222
  def unused(
150
223
  threshold: int = typer.Option(0, "--threshold", help="Minimum reference count threshold"),
224
+ raw: bool = typer.Option(False, "--raw", help="Disable noise filtering on results"), # noqa: B008
151
225
  ) -> None:
152
226
  """Find tables with no query references."""
153
227
  with get_backend() as backend:
154
228
  results = backend.run_read(
155
229
  f"MATCH (t:{NodeLabel.TABLE}) WHERE NOT (t)<-[:{RelType.SELECTS_FROM}]-() "
156
- "RETURN t.qualified AS qualified LIMIT 100",
230
+ "RETURN DISTINCT t.qualified AS qualified LIMIT 100",
157
231
  {},
158
232
  )
233
+ if not raw:
234
+ from sqlcg.server.noise_filter import NoiseFilter
235
+
236
+ nf = NoiseFilter.from_config()
237
+ results = [r for r in results if not nf.is_noise(r["qualified"])]
159
238
  _print_table(results, ["qualified"])
160
239
 
161
240
 
@@ -196,6 +275,25 @@ def _filter_column_results(
196
275
  return [r for r in results if not nf.is_noise(_col_id_to_table(r["id"]))]
197
276
 
198
277
 
278
+ def _add_file_line_col(rows: list[dict]) -> list[dict]:
279
+ """Add a 'file:line' composite column from 'file' and 'line' fields.
280
+
281
+ Formats as 'path/to/file.sql:N' when both are present, or '?' when either
282
+ is absent (multi-hop upstream where file/line is not available).
283
+ """
284
+ result = []
285
+ for row in rows:
286
+ new_row = dict(row)
287
+ file = row.get("file")
288
+ line = row.get("line")
289
+ if file and line:
290
+ new_row["file:line"] = f"{file}:{line}"
291
+ else:
292
+ new_row["file:line"] = "?"
293
+ result.append(new_row)
294
+ return result
295
+
296
+
199
297
  def _print_table(rows: list[dict], columns: list[str]) -> None:
200
298
  """Print results as a Rich table."""
201
299
  if not rows:
sqlcg/cli/commands/db.py CHANGED
@@ -2,11 +2,13 @@
2
2
 
3
3
  import os
4
4
  import shutil
5
+ from pathlib import Path
5
6
 
6
7
  import typer
7
8
  from rich.console import Console
8
9
 
9
10
  from sqlcg.core.config import get_backend, get_db_path
11
+ from sqlcg.core.freshness import compute_freshness, render_freshness_line
10
12
  from sqlcg.core.schema import NodeLabel
11
13
  from sqlcg.utils.logging import getLogger
12
14
 
@@ -77,6 +79,21 @@ def db_info() -> None:
77
79
  version = backend.get_schema_version() or "unknown"
78
80
  console.print(f"Schema version: {version}")
79
81
 
82
+ # Freshness block — only shown when the DB has been indexed from a git repo
83
+ try:
84
+ indexed_sha = backend.get_indexed_sha()
85
+ repo_rows = backend.run_read("MATCH (r:Repo) RETURN r.path AS path LIMIT 1", {})
86
+ if repo_rows and indexed_sha is not None and repo_rows[0].get("path"):
87
+ repo_root = Path(repo_rows[0]["path"])
88
+ f = compute_freshness(repo_root, indexed_sha)
89
+ console.print(render_freshness_line(f))
90
+ except NotImplementedError:
91
+ # Neo4j backend raises NotImplementedError for get_indexed_sha — skip silently
92
+ pass
93
+ except Exception as e:
94
+ # Any unexpected error in the freshness block must not crash db info
95
+ logger.debug(f"Freshness check skipped: {e}")
96
+
80
97
  # Show node counts for all labels
81
98
  for label in NodeLabel:
82
99
  try:
@@ -38,6 +38,7 @@ def find_table( # noqa: B008
38
38
  @app.command("column")
39
39
  def find_column( # noqa: B008
40
40
  ref: str = typer.Argument(..., help="Column reference (table.column)"), # noqa: B008
41
+ raw: bool = typer.Option(False, "--raw", help="Disable noise filtering on results"), # noqa: B008
41
42
  ) -> None:
42
43
  """Find a column by table.column reference."""
43
44
  ref = ref.lower() # graph keys are lowercased at index time (C2 normalization)
@@ -46,6 +47,12 @@ def find_column( # noqa: B008
46
47
  f"MATCH (c:{NodeLabel.COLUMN}) WHERE c.id CONTAINS $ref RETURN c.id AS id LIMIT 50",
47
48
  {"ref": ref},
48
49
  )
50
+ if not raw:
51
+ from sqlcg.server.noise_filter import NoiseFilter
52
+
53
+ nf = NoiseFilter.from_config() # repo_root=None → falls back to Path.cwd()
54
+ # Filter on the schema.table portion of each column id (schema.table.column)
55
+ results = [r for r in results if not nf.is_noise(r["id"].rsplit(".", 1)[0])]
49
56
  _print_table(results, ["id"])
50
57
 
51
58
 
sqlcg/cli/commands/git.py CHANGED
@@ -1,5 +1,7 @@
1
1
  """Git integration commands for sqlcg."""
2
2
 
3
+ import shutil
4
+ import sys
3
5
  from pathlib import Path
4
6
  from typing import NamedTuple
5
7
 
@@ -14,36 +16,79 @@ app = typer.Typer(name="git", help="Git integration commands")
14
16
  class _HookSpec(NamedTuple):
15
17
  filename: str
16
18
  sentinel: str
17
- script: str
19
+ script_template: str
18
20
 
19
21
 
22
+ # Hook script templates — use {sqlcg_bin} as the placeholder for the resolved binary.
23
+ # The sentinel comments (e.g. "# sqlcg post-checkout hook") must stay byte-for-byte
24
+ # unchanged so R9 idempotency is preserved: _install_single_hook matches them verbatim.
20
25
  _HOOKS: list[_HookSpec] = [
21
26
  _HookSpec(
22
27
  filename="post-checkout",
23
28
  sentinel="# sqlcg post-checkout hook",
24
- script=(
29
+ script_template=(
25
30
  "#!/bin/sh\n"
26
31
  "# sqlcg post-checkout hook — incremental resync after branch switch\n"
27
32
  "# $3 == 1 means branch checkout (not file checkout); skip file checkouts\n"
28
33
  '[ "$3" = "1" ] || exit 0\n'
29
- 'sqlcg reindex --from "$1" --to "$2"'
30
- ' "$(git rev-parse --show-toplevel)" --dialect auto --quiet || true\n'
34
+ '{sqlcg_bin} reindex --from "$1" --to "$2"'
35
+ ' "$(git rev-parse --show-toplevel)" --dialect auto --quiet --notify'
36
+ ' || echo "sqlcg: graph not updated (server busy/locked)'
37
+ " -- run 'sqlcg mcp status'\" >&2\n"
31
38
  ),
32
39
  ),
33
40
  _HookSpec(
34
41
  filename="post-merge",
35
42
  sentinel="# sqlcg post-merge hook",
36
- script="""\
43
+ script_template="""\
37
44
  #!/bin/sh
38
45
  # sqlcg post-merge hook — incremental resync after pull/merge
39
- # post-merge receives only $1 (squash flag), no old/new SHA; use stored-SHA delta
40
- sqlcg reindex "$(git rev-parse --show-toplevel)" --dialect auto --quiet || true
46
+ # git sets ORIG_HEAD to the pre-merge HEAD; pass it as --from so --notify can route
47
+ # through a running server (same path as post-checkout). If ORIG_HEAD is unset (e.g.
48
+ # first-ever merge / gc'd), fall back to the standalone stored-SHA delta (direct write).
49
+ PREV=$(git rev-parse --verify --quiet ORIG_HEAD)
50
+ TOP=$(git rev-parse --show-toplevel)
51
+ if [ -n "$PREV" ]; then
52
+ {sqlcg_bin} reindex --from "$PREV" --to HEAD "$TOP" --dialect auto --quiet --notify \\
53
+ || echo "sqlcg: graph not updated (server busy/locked) -- run 'sqlcg mcp status'" >&2
54
+ else
55
+ {sqlcg_bin} reindex "$TOP" --dialect auto --quiet --notify \\
56
+ || echo "sqlcg: graph not updated (server busy/locked) -- run 'sqlcg mcp status'" >&2
57
+ fi
41
58
  """,
42
59
  ),
43
60
  ]
44
61
 
45
62
 
46
- def _install_single_hook(hooks_dir: Path, spec: _HookSpec) -> None:
63
+ def _resolve_sqlcg_bin() -> str:
64
+ """Resolve the absolute path of the installing sqlcg binary.
65
+
66
+ Resolution order:
67
+ 1. shutil.which("sqlcg") — the binary on the installer's $PATH.
68
+ 2. sys.argv[0] resolved via Path(...).resolve() if it ends in "sqlcg" and is executable.
69
+ 3. Bare "sqlcg" fallback (current behaviour) — prints a warning so the user knows.
70
+
71
+ Returns the resolved path string (absolute when resolvable, bare "sqlcg" otherwise).
72
+ """
73
+ # 1. Try $PATH first — the binary the user means
74
+ which_result = shutil.which("sqlcg")
75
+ if which_result:
76
+ return which_result
77
+
78
+ # 2. Try sys.argv[0] for python -m / editable-install invocations
79
+ argv0 = Path(sys.argv[0]).resolve()
80
+ if argv0.name == "sqlcg" and argv0.is_file() and argv0.stat().st_mode & 0o111:
81
+ return str(argv0)
82
+
83
+ # 3. Bare fallback — still functional but relies on $PATH at hook-run time
84
+ console.print(
85
+ "[yellow]Warning: could not resolve the sqlcg binary path; the generated hooks "
86
+ "will use bare 'sqlcg' and rely on $PATH at hook-run time.[/yellow]"
87
+ )
88
+ return "sqlcg"
89
+
90
+
91
+ def _install_single_hook(hooks_dir: Path, spec: _HookSpec, sqlcg_bin: str) -> None:
47
92
  """Install one git hook idempotently.
48
93
 
49
94
  If the hook file already contains the sentinel, it is already installed — skip silently.
@@ -51,6 +96,7 @@ def _install_single_hook(hooks_dir: Path, spec: _HookSpec) -> None:
51
96
  Otherwise, write the hook file and set 0o755.
52
97
  """
53
98
  hook_path = hooks_dir / spec.filename
99
+ script = spec.script_template.format(sqlcg_bin=sqlcg_bin)
54
100
 
55
101
  if hook_path.exists():
56
102
  existing_content = hook_path.read_text()
@@ -68,10 +114,10 @@ def _install_single_hook(hooks_dir: Path, spec: _HookSpec) -> None:
68
114
  f".git/hooks/{spec.filename}:[/yellow]"
69
115
  )
70
116
  console.print("")
71
- console.print("[cyan]" + spec.script.rstrip() + "[/cyan]")
117
+ console.print("[cyan]" + script.rstrip() + "[/cyan]")
72
118
  return
73
119
 
74
- hook_path.write_text(spec.script)
120
+ hook_path.write_text(script)
75
121
  hook_path.chmod(0o755)
76
122
  console.print(f"[green]Installed git hook:[/green] .git/hooks/{spec.filename}")
77
123
 
@@ -87,6 +133,8 @@ def install_hooks(
87
133
  Writes a post-checkout hook that triggers incremental resync after branch switches
88
134
  and a post-merge hook that triggers resync after pulls/merges.
89
135
  Idempotent: running multiple times produces one hook entry per hook.
136
+ The hooks embed the absolute path of the installing sqlcg binary so version skew
137
+ between the installed binary and the hook command is avoided.
90
138
  """
91
139
  if repo is None:
92
140
  repo = Path.cwd()
@@ -100,5 +148,7 @@ def install_hooks(
100
148
 
101
149
  hooks_dir.mkdir(parents=True, exist_ok=True)
102
150
 
151
+ sqlcg_bin = _resolve_sqlcg_bin()
152
+
103
153
  for spec in _HOOKS:
104
- _install_single_hook(hooks_dir, spec)
154
+ _install_single_hook(hooks_dir, spec, sqlcg_bin)
@@ -14,7 +14,7 @@ from rich.progress import (
14
14
  TimeRemainingColumn,
15
15
  )
16
16
 
17
- from sqlcg.core.config import KuzuConfig, get_backend, get_db_path, get_dialect
17
+ from sqlcg.core.config import KuzuConfig, config_file_present, get_backend, get_db_path, get_dialect
18
18
  from sqlcg.indexer.indexer import Indexer
19
19
 
20
20
  console = Console()
@@ -29,7 +29,7 @@ def index_cmd( # noqa: B008
29
29
  None, "--dbt-manifest", help="Path to dbt manifest"
30
30
  ),
31
31
  timeout_per_file: int = typer.Option( # noqa: B008
32
- 5, "--timeout-per-file", help="Timeout per file in seconds"
32
+ 10, "--timeout-per-file", help="Timeout per file in seconds"
33
33
  ),
34
34
  buffer_pool_size: int = typer.Option( # noqa: B008
35
35
  0,
@@ -63,6 +63,14 @@ def index_cmd( # noqa: B008
63
63
  profile: bool = typer.Option( # noqa: B008
64
64
  False, "--profile/--no-profile", help="Emit per-stage timing after indexing"
65
65
  ),
66
+ include_working_tree: bool = typer.Option( # noqa: B008
67
+ False,
68
+ "--include-working-tree",
69
+ help=(
70
+ "Index the working tree including uncommitted changes. "
71
+ "Marks freshness as 'indexed with working-tree changes'."
72
+ ),
73
+ ),
66
74
  ) -> None:
67
75
  """Index SQL files in a directory.
68
76
 
@@ -113,6 +121,13 @@ def index_cmd( # noqa: B008
113
121
  if dialect == "auto":
114
122
  dialect = get_dialect(path)
115
123
 
124
+ if not quiet and not config_file_present(path):
125
+ console.print(
126
+ f"[yellow]No .sqlcg.toml found at {path}/.sqlcg.toml — "
127
+ "using defaults (snowflake dialect, no aliases/prefixes). "
128
+ "Create .sqlcg.toml in the index directory to customise.[/yellow]"
129
+ )
130
+
116
131
  db_path = get_db_path()
117
132
  db_path.parent.mkdir(parents=True, exist_ok=True)
118
133
 
@@ -137,6 +152,19 @@ def index_cmd( # noqa: B008
137
152
  sqlcg_log.removeHandler(_counter)
138
153
  _warn_handler.close()
139
154
 
155
+ # --include-working-tree: if the working tree is dirty, overwrite the stored SHA
156
+ # with a "<head>+dirty" sentinel so 'db info' can distinguish clean-HEAD index
157
+ # from working-tree-inclusive index. The backend was closed inside _run_index,
158
+ # so we open a fresh context here for the single sentinel write.
159
+ if include_working_tree:
160
+ from sqlcg.core.freshness import _git
161
+
162
+ dirty_out = _git(path, "status", "--porcelain")
163
+ if dirty_out: # non-empty string → working tree is dirty
164
+ head = _git(path, "rev-parse", "HEAD") or "unknown"
165
+ with get_backend() as _b2:
166
+ _b2.set_indexed_sha(f"{head}+dirty")
167
+
140
168
  if not verbose and not quiet and _counter.count > 0 and _warn_log_path is not None:
141
169
  console.print(
142
170
  f"[yellow]Parse warnings written to {_warn_log_path} "