sql-code-graph 1.0.2__py3-none-any.whl → 1.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sql-code-graph
3
- Version: 1.0.2
3
+ Version: 1.1.3
4
4
  Summary: SQL code graph analyzer and lineage tracer
5
5
  Project-URL: Homepage, https://github.com/Warhorze/sql-code-graph
6
6
  Project-URL: Repository, https://github.com/Warhorze/sql-code-graph
@@ -219,7 +219,7 @@ After indexing, `sqlcg db info` shows non-zero `STAR_EXPANSION lineage edges`, a
219
219
  | **Search & meta** | |
220
220
  | `search_sql_pattern(query)` | Full-text search across indexed SQL |
221
221
  | `list_dialects_and_repos()` | List indexed repos and dialects (catalogue) |
222
- | `db_info()` | Graph health, node counts, parse quality breakdown, warnings |
222
+ | `db_info()` | Graph health, node counts, parse quality breakdown, warnings, freshness (indexed SHA vs HEAD) |
223
223
  | `execute_cypher(query)` | Raw Cypher query against the graph |
224
224
  | `submit_feedback(...)` | Report a false positive/negative to improve metrics |
225
225
 
@@ -228,6 +228,12 @@ After indexing, `sqlcg db info` shows non-zero `STAR_EXPANSION lineage edges`, a
228
228
  > `table.column`. Each returned node carries both `name` (the bare column) and
229
229
  > `table` (the owning `schema.table`), so results are navigable without a second lookup.
230
230
 
231
+ > **Provenance fields**: lineage edges now carry `file`, `line`, and `expression`
232
+ > (where the lineage was derived from), a `confidence` of `1.0` for plainly-parsed
233
+ > facts (lower for inferred edges, with a `reason`), and a `table_kind`
234
+ > (`table` / `cte` / `derived` / `external`) so CTE and derived aliases are
235
+ > distinguishable from real tables.
236
+
231
237
  > **LLM agent tip**: call `db_info()` before lineage queries to check that
232
238
  > `SqlColumn > 0` and `warnings` is empty. If `parse_quality["scripting_block"]`
233
239
  > is high, column lineage will be limited for those files — use table-level tools
@@ -243,16 +249,21 @@ sqlcg db init # initialise graph database
243
249
  sqlcg index <path> --dialect snowflake # index SQL files (snowflake is the tested dialect)
244
250
  sqlcg index <path> --dialect auto # read dialect from .sqlcg.toml
245
251
  sqlcg index <path> --profile # index + print per-stage timing and slowest files
252
+ sqlcg index <path> --include-working-tree # also index uncommitted changes (marks graph dirty)
246
253
  sqlcg reindex <path> --from <sha> --to <sha> # incremental resync of only changed files
247
254
  sqlcg analyze unused # tables with no query references
248
255
  sqlcg analyze upstream/downstream # trace lineage from the CLI
249
256
  sqlcg find table/column/pattern # search the graph
250
257
  sqlcg watch <path> # watch for file changes
258
+ sqlcg db info # graph stats + freshness (indexed SHA vs HEAD)
251
259
  sqlcg git install-hooks # install post-checkout + post-merge resync hooks
252
260
  sqlcg gain # show usage metrics
253
261
  sqlcg report # generate FP/error report
254
262
  sqlcg mcp best-practices # print the fact/heuristic boundary for the MCP tools
255
263
  sqlcg mcp start # start MCP server manually
264
+ sqlcg mcp status # server status JSON (via control socket)
265
+ sqlcg mcp stop # stop the running MCP server gracefully
266
+ sqlcg mcp restart # stop the server (client must respawn it)
256
267
  sqlcg version # show installed version
257
268
  ```
258
269
 
@@ -1,37 +1,38 @@
1
- sqlcg/__init__.py,sha256=hGOhwTAVTaRm7PjbaSQVCLvnF7rOGZZNdMqv0IoQdYg,115
1
+ sqlcg/__init__.py,sha256=YGDRrWVIrONmQholAKWh6hSKxlPd2dLcM1AdHHdBhEA,115
2
2
  sqlcg/__main__.py,sha256=1YoFLcqEgTwYq1J3TbUwpkdG0zeeLIf2fJvwWI-CLFU,109
3
3
  sqlcg/cli/__init__.py,sha256=W8fD0LpMq2xm_5WKGNMvJh2WBL1ho5E8hUeAqXQYT1g,28
4
4
  sqlcg/cli/main.py,sha256=WmdTjsOlz1ozi2Y3Aq4ezR_FCRl-Lc1YOKw3_d48dlY,1650
5
5
  sqlcg/cli/commands/__init__.py,sha256=oSHtr6VD-jNubOjuCQyZj2tBppjMEpQDh-IGQ8of9eA,30
6
- sqlcg/cli/commands/analyze.py,sha256=PFQD29_VAtJ-wghYLsHRINp8VlnOVl1WlOdbAdcWz1E,8091
7
- sqlcg/cli/commands/db.py,sha256=Yd4ZDz1BFwjO4Lyt3NefQnowkjdUxFDFmsPykBVH2Pk,6518
8
- sqlcg/cli/commands/find.py,sha256=P2OFI0O_-F4W5-oy5KObXUHI7gNTkJRtDSZ59xTKE9Y,2672
9
- sqlcg/cli/commands/gain.py,sha256=bOvia7CVla_fESrDEdftYze8Mm0xDio3SpCzIyoXg7A,8925
10
- sqlcg/cli/commands/git.py,sha256=96hmWYd861FC8RZqPQ_eBG8yLXSXaB9SLxmuwx00nWU,3347
11
- sqlcg/cli/commands/index.py,sha256=Sgrg5MaQWfQzbX3e3Wcsfd8BEWDGuBm5l5vynpJsRzA,9801
6
+ sqlcg/cli/commands/analyze.py,sha256=hiKj0R1m5i4ZmwrXBlVT14xGy6rs9jmv_ZDCLVZj4Tg,14282
7
+ sqlcg/cli/commands/db.py,sha256=5VpknLqYaimK6YA516w6iQVX6JmHcD52o6MuW5d088c,7462
8
+ sqlcg/cli/commands/find.py,sha256=SsK6q4YRPknrz_lIQ4Gun6HRoAdoVRGClwAYdm_s2OU,3168
9
+ sqlcg/cli/commands/gain.py,sha256=hz36QmuaXJXutI4vyNMDfcNsBeLTXa6EOw2bWe2AhTQ,8939
10
+ sqlcg/cli/commands/git.py,sha256=yMgWOuoTCTBr2P1QgmghRi5ikmUYHuxDUVyBDYerErw,5728
11
+ sqlcg/cli/commands/index.py,sha256=xMnxKDiUt5LH_3lKAotoRctL4VSOvcw7Gq--idLPtm0,11091
12
12
  sqlcg/cli/commands/install.py,sha256=KNABvrLbamPyYnmnVdCaM_MNezbDc-pr6IkignCWI8k,9186
13
- sqlcg/cli/commands/mcp.py,sha256=cfi7D-RgEPUKdfUbsJC2iKImKOnHQvWxCLfwYIPdhdE,2174
14
- sqlcg/cli/commands/reindex.py,sha256=J9gpaxSzJ1mTdOJWh7WSLskbRF9f_2EMWnUFF4VOtVU,6387
13
+ sqlcg/cli/commands/mcp.py,sha256=2gDsNvtj1Ql7PkjX9dHWAzOK0uCPRR5DGdBAzJa8PIU,6005
14
+ sqlcg/cli/commands/reindex.py,sha256=n1mQTYAZshtCKPgpR12S6ZMCqO3cSUtpCXjzb1PuZxU,11857
15
15
  sqlcg/cli/commands/report.py,sha256=JU0qjyMxwOukE7bN3XvvIzOI7zMg_Gsnvk_8F6pKNpA,4915
16
16
  sqlcg/cli/commands/uninstall.py,sha256=IYwQaqnMmmzW0Nlls40wD-L3tVkMgKIMRXUkcXPMUc4,9398
17
17
  sqlcg/cli/commands/watch.py,sha256=7N6c-QuvxAEGHzDZ0C3CU2BkHSraZW9YtgoFnz7SaQo,2373
18
18
  sqlcg/core/__init__.py,sha256=uNsJCrCMVWVT80sHPtI_f39BYqIf5N0i6LSq8x8HsyI,283
19
- sqlcg/core/config.py,sha256=YCq4OayvBSNXsYtOh3yZ-W6fyJBLwYunORDo2TPCU9s,10179
20
- sqlcg/core/graph_db.py,sha256=gFiHjfVeRHp2FS3yRThDgCWFkugOQD065IvEqN6apg4,7881
19
+ sqlcg/core/config.py,sha256=LuB8HWPsIt1OsjOshTT1bJdXWXN01w76ABl9M-VB9DM,14777
20
+ sqlcg/core/freshness.py,sha256=gRb8pRPw5SdIUxAYkMXIJ00DTdQ6CegRZPAvWnv0rU0,4575
21
+ sqlcg/core/graph_db.py,sha256=Aa85wPFg26H-Ud9SrZyxCHH-99iitAI5S3X9T_62Yyw,7957
21
22
  sqlcg/core/jobs.py,sha256=Je-fCdSKRgiSsv1W8SgNAlp36a7t7-pJZ-qKPbka9OE,3298
22
- sqlcg/core/kuzu_backend.py,sha256=ziHt-AB9sEZY7qB8whseWFicbTfOZaNOxcNVKhjii5Y,16587
23
+ sqlcg/core/kuzu_backend.py,sha256=3kL8bGEQm70fuxYUdt1p7fsY12lCLQ07x01NYg6FOGA,16821
23
24
  sqlcg/core/neo4j_backend.py,sha256=AM1TncP9GBGph-rSHwalZPmGUV2kFILzaJP-PSB0UYw,8437
24
- sqlcg/core/queries.cypher,sha256=auWIPJeVjgykk6wqTRMoNQCwRhzG2ZhF4MRufso2KYA,4182
25
- sqlcg/core/queries.py,sha256=XBdQTBSsX3WUqO3AdX5EWYH435GDrbwEg1BR9AvJSSo,1880
26
- sqlcg/core/schema.cypher,sha256=UWYsPMRgkn6HOlPZ3rl6BfY5hzKQKP5RGPaZg4NTZFY,2515
27
- sqlcg/core/schema.py,sha256=9jBgJwuvfjLq2xC5B0NUyZZYxhqTb0LO0YzxcPM-gVM,1301
25
+ sqlcg/core/queries.cypher,sha256=cvPOVe5GUOzJN4bxUvDxNI--xIIP8gm42TR-gUnea4U,4685
26
+ sqlcg/core/queries.py,sha256=gkl4bhkZM8FsvbSA-IaK17sRFcO3hB5YlVCemkCXgWM,2064
27
+ sqlcg/core/schema.cypher,sha256=rK5QMhSrzZhuj73NeNXGX6oM-rPPPvxFjex0fEyUvkQ,2859
28
+ sqlcg/core/schema.py,sha256=JO5rkspYKjL9AEl5mt0VIJKn-IPOH3kJV_fVmAMuFCI,1467
28
29
  sqlcg/indexer/__init__.py,sha256=Wh20Unz2OHs1oIyWLrpurPAasF0BET2g4iXtNk7mh2U,56
29
30
  sqlcg/indexer/dbt_adapter.py,sha256=EB5x1WU5Z9d-I97ADDj88S_hG1C4z4nbrv8JUCzXfy8,686
30
31
  sqlcg/indexer/error_classify.py,sha256=MYjPVprwT-ARPjBCyCzu2F9DSrZfnTVtVIoBgm8s4H8,5329
31
32
  sqlcg/indexer/git_delta.py,sha256=P-QM4vnVURT2KLiE6u3cQynRUF-mTH13cbB4I20YHPQ,4468
32
- sqlcg/indexer/indexer.py,sha256=0B0BCUaLPdV9XtlCzhqR3hwHyD3w83o-tYG7yNr18Yo,50507
33
+ sqlcg/indexer/indexer.py,sha256=KyyowxiSNU3Gm4JE-mj8gVm6D80XERJPd-he59I2sIk,62018
33
34
  sqlcg/indexer/pool.py,sha256=BTYx-pBe6zwUG89MHh0X7nzGNVlsHN-GjovYKanVI1s,18553
34
- sqlcg/indexer/walker.py,sha256=C__JuDcTzKxFqVjGFRr5cj9hgxvf8zffTz-0HMn1qTY,1746
35
+ sqlcg/indexer/walker.py,sha256=umNaqDbuerr75VYG1TEOv0ATsbI40O3SIw35f7XJcDE,1931
35
36
  sqlcg/indexer/watcher.py,sha256=mJQq1LASRLKKwhz0WhCUWPLLqyPR2_-FD_8efYU6gE8,8442
36
37
  sqlcg/lineage/__init__.py,sha256=Da1DlYwtK13WHv_RnHjAtNkHTOuFbhxqCjT1Le7DsWM,46
37
38
  sqlcg/lineage/aggregator.py,sha256=G1xsTjf981EVSgN1yIHcC_ecDvcTcSPvEp6Kb2HPXkY,4943
@@ -39,25 +40,26 @@ sqlcg/lineage/schema_resolver.py,sha256=iXt6LYF6UVWsGUpcfbmjmGn9wCgXl721lTGf_8Aa
39
40
  sqlcg/metrics/__init__.py,sha256=hLJ6wm4St8qqYwKh3o9QG7lcEt1BEYM31ccqO9tGpIg,133
40
41
  sqlcg/metrics/store.py,sha256=BaMf7QYTmYMlX_Jzi1GNU8R2sMVkWdn07f-ZSndtcNk,8879
41
42
  sqlcg/parsers/__init__.py,sha256=AamA8wBbDZV9_zEtZCI4Hyen5UAVKHmBwjTghTt2PZE,785
42
- sqlcg/parsers/ansi_parser.py,sha256=KruZn5CYjpktKmMRVWackshRI_AR6ehc-ReCsDeWNkQ,14321
43
- sqlcg/parsers/base.py,sha256=cSHlXwiSNu77TZI6_p1nRevbRTcBc1t5v8N_aKR7uB4,49117
43
+ sqlcg/parsers/ansi_parser.py,sha256=tu1MWWaSYmpefKjgk2PPyGStIFjV47Z_1WjyBh5Zi2c,17180
44
+ sqlcg/parsers/base.py,sha256=IiOkVsm6jz9-48RqDCXiW-UXAraNxQ4pKXvSA7aolnA,49907
44
45
  sqlcg/parsers/bigquery_parser.py,sha256=mOnWTfXB_Dp4JwFE1PVYOB6CDPf5nYE0Dea8kJCl9uQ,2827
45
46
  sqlcg/parsers/postgres_parser.py,sha256=lYfUpQY6j4Qm7ndXBtXbgPoGzYqYddWt5YeFnWKdA6I,946
46
47
  sqlcg/parsers/registry.py,sha256=LXy1F6rqQI6VdxpRvZg_tNpoEucW3mXZHYBMlMONbX4,1496
47
- sqlcg/parsers/snowflake_parser.py,sha256=Xc80vlhKiJqbt4cT7UcpYKcYzV9rSqFyG0d_oTc-eJE,12627
48
+ sqlcg/parsers/snowflake_parser.py,sha256=fovMyqfhWD2wmtEyiwTC0aoP4QWP-3XQZ8WYkXvs9hg,15511
48
49
  sqlcg/parsers/tsql_parser.py,sha256=RRj1pACtAk2tLTDaFWRYF67a0IDvaf5A1YQXWIz0bpQ,956
49
50
  sqlcg/server/__init__.py,sha256=n4wuNE7xyJIJxJZBtmtdccCMQfvTdF-IqIaZVbC4FC4,35
51
+ sqlcg/server/control.py,sha256=v-r21npODiHlHnJHuo_6KWrKclQKq_E1QyrzIWjqgtY,4508
50
52
  sqlcg/server/exceptions.py,sha256=EONw34icOByCTpppSQrvQBW6asc4hfqaGDCAFjv96II,469
51
- sqlcg/server/models.py,sha256=dv4SM_o-aY8kUFIbCtj0l8ceMsfyvQtXCWPm4Ek_-14,16432
53
+ sqlcg/server/models.py,sha256=l7ORy6sbtzBW1y3qVaeLwEukbyAgBkz9S5VIm2q4b24,19378
52
54
  sqlcg/server/noise_filter.py,sha256=idSBGgdKWWccJdpOo9qgbM2350Oew-2l5W6Yc9GYQqY,6337
53
- sqlcg/server/server.py,sha256=mDAW_Zmk3Sp2sApw3Gw3veCqJe7waw-sioQyKZBn9ng,3774
54
- sqlcg/server/skill.py,sha256=siAtrRdFHQnASe9nl33MvkTXXt9EgCB8id5i9AUq4XU,10718
55
- sqlcg/server/tools.py,sha256=mSoYZRI7F5ZmdTcG-BnY6ULzrz3Y7qIFe3cHTVWVyMs,57785
55
+ sqlcg/server/server.py,sha256=gzeO5WbSNfGxgIKte01uy0VjO1_basI2ChSuAwr0dBc,14844
56
+ sqlcg/server/skill.py,sha256=GE8eeimk6yiGGJ74erGypqYAviur5peSR6_2a4QQWVM,12828
57
+ sqlcg/server/tools.py,sha256=JvijDC0h5uHjZyZUIZq9sztNG3W5sr-Yy5rHwOVuJec,66642
56
58
  sqlcg/utils/__init__.py,sha256=--iqt5ThTXmT8Wz7da8hs3n0zDfYPl8P-z5OgRJ_77E,154
57
59
  sqlcg/utils/hashing.py,sha256=H25-sYfxHKb3_IERFnHyAIYNiXN470Oqo5sJT_D3YOA,438
58
60
  sqlcg/utils/ignore.py,sha256=wJjwa0mjnQ_xJExOUxk25y00g065XmmzJapqV3ifD5o,1151
59
61
  sqlcg/utils/logging.py,sha256=u0fCmYsLj9o81vawm3xZTHaw68GQYVm7JxG-gP81u8A,840
60
- sql_code_graph-1.0.2.dist-info/METADATA,sha256=aikAv-KoUOGfgYo3-htWLyq61x1PE6bC1Onn_TNAuvE,12806
61
- sql_code_graph-1.0.2.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
62
- sql_code_graph-1.0.2.dist-info/entry_points.txt,sha256=Wfe49sVzV9p4eVFGo5RxcV-frr3HOP0yzzst8JBxQLQ,46
63
- sql_code_graph-1.0.2.dist-info/RECORD,,
62
+ sql_code_graph-1.1.3.dist-info/METADATA,sha256=Z_aRnsDOgZ_ngAHkIr3x2XpEjF-x6UMUQwcIkAMlGjo,13615
63
+ sql_code_graph-1.1.3.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
64
+ sql_code_graph-1.1.3.dist-info/entry_points.txt,sha256=Wfe49sVzV9p4eVFGo5RxcV-frr3HOP0yzzst8JBxQLQ,46
65
+ sql_code_graph-1.1.3.dist-info/RECORD,,
sqlcg/__init__.py CHANGED
@@ -1,5 +1,5 @@
1
1
  """SQL Code Graph - SQL lineage and dependency analysis tool."""
2
2
 
3
- __version__ = "1.0.2"
3
+ __version__ = "1.1.3"
4
4
 
5
5
  __all__ = ["__version__"]
@@ -9,6 +9,7 @@ from rich.console import Console
9
9
  from rich.table import Table
10
10
 
11
11
  from sqlcg.core.config import get_backend
12
+ from sqlcg.core.queries import GET_TABLE_EXTERNAL_CONSUMERS_QUERY
12
13
  from sqlcg.core.schema import NodeLabel, RelType
13
14
 
14
15
  if TYPE_CHECKING:
@@ -18,11 +19,37 @@ app = typer.Typer(help="Lineage analysis")
18
19
  console = Console()
19
20
 
20
21
 
22
+ def _kind_filter(source_alias: str, include_intermediate: bool) -> str:
23
+ """Build the Half-B (#38) kind filter for the lineage traversal query.
24
+
25
+ When ``include_intermediate`` is False, the filter uses ``OPTIONAL MATCH`` plus
26
+ ``t.kind IS NULL OR t.kind IN ['table', 'external']`` so a source whose SqlTable
27
+ node is ABSENT (a CTE-body source on a graph indexed before the #39 fix, or not yet
28
+ re-indexed) is KEPT rather than silently dropped. Reverting this to an inner
29
+ ``MATCH (t:SqlTable {...}) ... WHERE t.kind IN [...]`` is the #38 regression:
30
+ node-less physical sources vanish from results.
31
+
32
+ ``source_alias`` is ``src`` for upstream and ``dst`` for downstream — it names both
33
+ the node whose table is looked up and the variable carried through the WITH clause.
34
+ This is the single production source of the filter string; the #40 recall guard
35
+ imports it so reverting Half B here turns the guard red.
36
+ """
37
+ if include_intermediate:
38
+ return ""
39
+ return (
40
+ f"OPTIONAL MATCH (t:SqlTable {{qualified: {source_alias}.table_qualified}}) "
41
+ f"WITH c, {source_alias}, t WHERE t.kind IS NULL OR t.kind IN ['table', 'external'] "
42
+ )
43
+
44
+
21
45
  @app.command("upstream")
22
46
  def upstream( # noqa: B008
23
47
  ref: str = typer.Argument(..., help="Column reference"), # noqa: B008
24
48
  depth: int = typer.Option(5, "--depth", help="Maximum traversal depth"), # noqa: B008
25
49
  raw: bool = typer.Option(False, "--raw", help="Disable noise filtering on results"), # noqa: B008
50
+ include_intermediate: bool = typer.Option( # noqa: B008
51
+ False, "--include-intermediate", help="Include CTE/derived intermediate nodes"
52
+ ),
26
53
  ) -> None:
27
54
  """Trace upstream column lineage."""
28
55
  # Bounds check for depth to prevent performance DoS
@@ -30,19 +57,32 @@ def upstream( # noqa: B008
30
57
  console.print("[red]Error: --depth must be between 1 and 100[/red]")
31
58
  raise typer.Exit(1)
32
59
 
33
- with get_backend() as backend:
60
+ # By default, filter out CTE/derived intermediate nodes; --include-intermediate restores them.
61
+ # Half B (#38): use OPTIONAL MATCH so a missing SqlTable node (e.g. CTE-body source not yet
62
+ # re-indexed after #39 fix) is KEPT rather than silently dropped. WHERE t.kind IS NULL OR
63
+ # t.kind IN [...] means: keep when node absent (NULL) OR when kind is a physical source.
64
+ # CTE aliases (kind='cte') and derived tables (kind='derived') are filtered out.
65
+ kind_filter = _kind_filter("src", include_intermediate)
66
+
67
+ with get_backend(read_only=True) as backend:
34
68
  results = backend.run_read(
35
- f"MATCH p=(c:{NodeLabel.COLUMN} {{id: $ref}})"
36
- f"<-[:{RelType.COLUMN_LINEAGE}*1..{depth}]-(src) "
37
- "RETURN src.id AS id LIMIT 100",
69
+ f"MATCH (c:{NodeLabel.COLUMN} {{id: $ref}})"
70
+ f"<-[:{RelType.COLUMN_LINEAGE}*1..{depth}]-(src:{NodeLabel.COLUMN}) "
71
+ f"{kind_filter}"
72
+ f"OPTIONAL MATCH (src)-[direct:{RelType.COLUMN_LINEAGE}]->(c) "
73
+ "OPTIONAL MATCH (q:SqlQuery {id: direct.query_id}) "
74
+ "RETURN src.id AS id, q.file_path AS file, q.start_line AS line LIMIT 100",
38
75
  {"ref": ref},
39
76
  )
40
77
  if not results and len(ref.split(".")) >= 3:
41
78
  bare = _bare_ref(ref)
42
79
  fallback_results = backend.run_read(
43
- f"MATCH p=(c:{NodeLabel.COLUMN} {{id: $bare}})"
44
- f"<-[:{RelType.COLUMN_LINEAGE}*1..{depth}]-(src) "
45
- "RETURN src.id AS id LIMIT 100",
80
+ f"MATCH (c:{NodeLabel.COLUMN} {{id: $bare}})"
81
+ f"<-[:{RelType.COLUMN_LINEAGE}*1..{depth}]-(src:{NodeLabel.COLUMN}) "
82
+ f"{kind_filter}"
83
+ f"OPTIONAL MATCH (src)-[direct:{RelType.COLUMN_LINEAGE}]->(c) "
84
+ "OPTIONAL MATCH (q:SqlQuery {id: direct.query_id}) "
85
+ "RETURN src.id AS id, q.file_path AS file, q.start_line AS line LIMIT 100",
46
86
  {"bare": bare},
47
87
  )
48
88
  if fallback_results:
@@ -59,7 +99,7 @@ def upstream( # noqa: B008
59
99
 
60
100
  nf = NoiseFilter.from_config() # repo_root=None → falls back to Path.cwd()
61
101
  results = _filter_column_results(results, nf)
62
- _print_table(results, ["id"])
102
+ _print_table(_add_file_line_col(results), ["id", "file:line"])
63
103
 
64
104
 
65
105
  @app.command("downstream")
@@ -67,6 +107,9 @@ def downstream( # noqa: B008
67
107
  ref: str = typer.Argument(..., help="Column reference"), # noqa: B008
68
108
  depth: int = typer.Option(5, "--depth", help="Maximum traversal depth"), # noqa: B008
69
109
  raw: bool = typer.Option(False, "--raw", help="Disable noise filtering on results"), # noqa: B008
110
+ include_intermediate: bool = typer.Option( # noqa: B008
111
+ False, "--include-intermediate", help="Include CTE/derived intermediate nodes"
112
+ ),
70
113
  ) -> None:
71
114
  """Trace downstream column lineage."""
72
115
  # Bounds check for depth to prevent performance DoS
@@ -74,19 +117,31 @@ def downstream( # noqa: B008
74
117
  console.print("[red]Error: --depth must be between 1 and 100[/red]")
75
118
  raise typer.Exit(1)
76
119
 
77
- with get_backend() as backend:
120
+ # By default, filter out CTE/derived intermediate nodes; --include-intermediate restores them.
121
+ # Half B (#38): OPTIONAL MATCH keeps sources whose SqlTable node is absent (NULL) or is a
122
+ # physical kind. WITH c, dst, t carries the three variables in scope at this interpolation
123
+ # point; direct and q are bound later in the query.
124
+ kind_filter = _kind_filter("dst", include_intermediate)
125
+
126
+ with get_backend(read_only=True) as backend:
78
127
  results = backend.run_read(
79
- f"MATCH p=(c:{NodeLabel.COLUMN} {{id: $ref}})"
80
- f"-[:{RelType.COLUMN_LINEAGE}*1..{depth}]->(dst) "
81
- "RETURN dst.id AS id LIMIT 100",
128
+ f"MATCH (c:{NodeLabel.COLUMN} {{id: $ref}})"
129
+ f"-[:{RelType.COLUMN_LINEAGE}*1..{depth}]->(dst:{NodeLabel.COLUMN}) "
130
+ f"{kind_filter}"
131
+ f"OPTIONAL MATCH (c)-[direct:{RelType.COLUMN_LINEAGE}]->(dst) "
132
+ "OPTIONAL MATCH (q:SqlQuery {id: direct.query_id}) "
133
+ "RETURN dst.id AS id, q.file_path AS file, q.start_line AS line LIMIT 100",
82
134
  {"ref": ref},
83
135
  )
84
136
  if not results and len(ref.split(".")) >= 3:
85
137
  bare = _bare_ref(ref)
86
138
  fallback_results = backend.run_read(
87
- f"MATCH p=(c:{NodeLabel.COLUMN} {{id: $bare}})"
88
- f"-[:{RelType.COLUMN_LINEAGE}*1..{depth}]->(dst) "
89
- "RETURN dst.id AS id LIMIT 100",
139
+ f"MATCH (c:{NodeLabel.COLUMN} {{id: $bare}})"
140
+ f"-[:{RelType.COLUMN_LINEAGE}*1..{depth}]->(dst:{NodeLabel.COLUMN}) "
141
+ f"{kind_filter}"
142
+ f"OPTIONAL MATCH (c)-[direct:{RelType.COLUMN_LINEAGE}]->(dst) "
143
+ "OPTIONAL MATCH (q:SqlQuery {id: direct.query_id}) "
144
+ "RETURN dst.id AS id, q.file_path AS file, q.start_line AS line LIMIT 100",
90
145
  {"bare": bare},
91
146
  )
92
147
  if fallback_results:
@@ -103,38 +158,76 @@ def downstream( # noqa: B008
103
158
 
104
159
  nf = NoiseFilter.from_config() # repo_root=None → falls back to Path.cwd()
105
160
  results = _filter_column_results(results, nf)
106
- _print_table(results, ["id"])
161
+ _print_table(_add_file_line_col(results), ["id", "file:line"])
162
+
163
+ # Append external consumer rows for terminal tables (scalar query, one per terminal).
164
+ # Resolve terminal tables from the column results; fall back to the root column's table.
165
+ terminal_tables: set[str] = set()
166
+ for r in results:
167
+ tbl = _col_id_to_table(r["id"])
168
+ if tbl:
169
+ terminal_tables.add(tbl)
170
+ # Also check the root column's table (in case no downstream columns were found).
171
+ root_parts = ref.rsplit(".", 1)
172
+ if len(root_parts) == 2:
173
+ terminal_tables.add(root_parts[0])
174
+ consumer_rows: list[dict] = []
175
+ for tbl in sorted(terminal_tables):
176
+ rows_ec = backend.run_read(
177
+ GET_TABLE_EXTERNAL_CONSUMERS_QUERY,
178
+ {"table_qualified": tbl},
179
+ )
180
+ for ec in rows_ec:
181
+ consumer_rows.append(
182
+ {"id": f"[external] {ec['name']} ({ec['consumer_type']})", "file:line": ""}
183
+ )
184
+ if consumer_rows:
185
+ _print_table(consumer_rows, ["id", "file:line"])
107
186
 
108
187
 
109
188
  @app.command("impact")
110
189
  def impact( # noqa: B008
111
190
  table: str = typer.Argument(..., help="Table name to analyze"), # noqa: B008
191
+ raw: bool = typer.Option(False, "--raw", help="Disable noise filtering on results"), # noqa: B008
112
192
  ) -> None:
113
193
  """Show all queries impacted by a table."""
114
- with get_backend() as backend:
194
+ with get_backend(read_only=True) as backend:
115
195
  results = backend.run_read(
116
196
  f"MATCH (t:{NodeLabel.TABLE} {{qualified: $t}})"
117
197
  f"<-[:{RelType.SELECTS_FROM}]-(q:{NodeLabel.QUERY}) "
118
- "RETURN q.id AS id, q.kind AS kind LIMIT 100",
198
+ "RETURN DISTINCT q.id AS id, q.kind AS kind, q.target_table AS target LIMIT 100",
119
199
  {"t": table},
120
200
  )
201
+ if not raw:
202
+ from sqlcg.server.noise_filter import NoiseFilter
203
+
204
+ nf = NoiseFilter.from_config()
205
+ results = [r for r in results if not nf.is_noise(r.get("target", ""))]
121
206
  _print_table(results, ["id", "kind"])
122
207
 
123
208
 
124
209
  @app.command("failures")
125
210
  def failures(
126
211
  cause: str | None = typer.Option( # noqa: B008
127
- None, "--cause", help="Filter by E-code bucket (e.g. E5, timeout)"
212
+ None,
213
+ "--cause",
214
+ help=(
215
+ "Filter by E-code bucket. Valid values: "
216
+ "timeout, E8, E3, E2, E5, E1, qualify_failed, func_fallback, pure_ddl_skip"
217
+ ),
128
218
  ),
129
219
  limit: int = typer.Option(100, "--limit", help="Maximum rows to return"), # noqa: B008
130
220
  ) -> None:
131
221
  """List files that failed to parse, with their dominant cause (E-code bucket).
132
222
 
223
+ Valid --cause buckets (from highest to lowest severity):
224
+ timeout, E8, E3, E2, E5, E1, qualify_failed, func_fallback, pure_ddl_skip.
225
+
133
226
  Requires a graph indexed with sqlcg >= v3 (schema version 3). Re-index
134
227
  with 'sqlcg db reset && sqlcg index <path>' if the graph was built with
135
228
  an earlier version.
136
229
  """
137
- with get_backend() as backend:
230
+ with get_backend(read_only=True) as backend:
138
231
  cypher = (
139
232
  f"MATCH (f:{NodeLabel.FILE}) WHERE f.parse_failed = true "
140
233
  "AND ($cause IS NULL OR f.parse_cause = $cause) "
@@ -148,14 +241,20 @@ def failures(
148
241
  @app.command("unused")
149
242
  def unused(
150
243
  threshold: int = typer.Option(0, "--threshold", help="Minimum reference count threshold"),
244
+ raw: bool = typer.Option(False, "--raw", help="Disable noise filtering on results"), # noqa: B008
151
245
  ) -> None:
152
246
  """Find tables with no query references."""
153
- with get_backend() as backend:
247
+ with get_backend(read_only=True) as backend:
154
248
  results = backend.run_read(
155
249
  f"MATCH (t:{NodeLabel.TABLE}) WHERE NOT (t)<-[:{RelType.SELECTS_FROM}]-() "
156
- "RETURN t.qualified AS qualified LIMIT 100",
250
+ "RETURN DISTINCT t.qualified AS qualified LIMIT 100",
157
251
  {},
158
252
  )
253
+ if not raw:
254
+ from sqlcg.server.noise_filter import NoiseFilter
255
+
256
+ nf = NoiseFilter.from_config()
257
+ results = [r for r in results if not nf.is_noise(r["qualified"])]
159
258
  _print_table(results, ["qualified"])
160
259
 
161
260
 
@@ -196,6 +295,25 @@ def _filter_column_results(
196
295
  return [r for r in results if not nf.is_noise(_col_id_to_table(r["id"]))]
197
296
 
198
297
 
298
+ def _add_file_line_col(rows: list[dict]) -> list[dict]:
299
+ """Add a 'file:line' composite column from 'file' and 'line' fields.
300
+
301
+ Formats as 'path/to/file.sql:N' when both are present, or '?' when either
302
+ is absent (multi-hop upstream where file/line is not available).
303
+ """
304
+ result = []
305
+ for row in rows:
306
+ new_row = dict(row)
307
+ file = row.get("file")
308
+ line = row.get("line")
309
+ if file and line:
310
+ new_row["file:line"] = f"{file}:{line}"
311
+ else:
312
+ new_row["file:line"] = "?"
313
+ result.append(new_row)
314
+ return result
315
+
316
+
199
317
  def _print_table(rows: list[dict], columns: list[str]) -> None:
200
318
  """Print results as a Rich table."""
201
319
  if not rows:
sqlcg/cli/commands/db.py CHANGED
@@ -2,11 +2,13 @@
2
2
 
3
3
  import os
4
4
  import shutil
5
+ from pathlib import Path
5
6
 
6
7
  import typer
7
8
  from rich.console import Console
8
9
 
9
10
  from sqlcg.core.config import get_backend, get_db_path
11
+ from sqlcg.core.freshness import compute_freshness, render_freshness_line
10
12
  from sqlcg.core.schema import NodeLabel
11
13
  from sqlcg.utils.logging import getLogger
12
14
 
@@ -73,10 +75,25 @@ def db_reset( # noqa: B008
73
75
  @app.command("info")
74
76
  def db_info() -> None:
75
77
  """Show database stats."""
76
- with get_backend() as backend:
78
+ with get_backend(read_only=True) as backend:
77
79
  version = backend.get_schema_version() or "unknown"
78
80
  console.print(f"Schema version: {version}")
79
81
 
82
+ # Freshness block — only shown when the DB has been indexed from a git repo
83
+ try:
84
+ indexed_sha = backend.get_indexed_sha()
85
+ repo_rows = backend.run_read("MATCH (r:Repo) RETURN r.path AS path LIMIT 1", {})
86
+ if repo_rows and indexed_sha is not None and repo_rows[0].get("path"):
87
+ repo_root = Path(repo_rows[0]["path"])
88
+ f = compute_freshness(repo_root, indexed_sha)
89
+ console.print(render_freshness_line(f))
90
+ except NotImplementedError:
91
+ # Neo4j backend raises NotImplementedError for get_indexed_sha — skip silently
92
+ pass
93
+ except Exception as e:
94
+ # Any unexpected error in the freshness block must not crash db info
95
+ logger.debug(f"Freshness check skipped: {e}")
96
+
80
97
  # Show node counts for all labels
81
98
  for label in NodeLabel:
82
99
  try:
@@ -150,7 +167,7 @@ def db_info() -> None:
150
167
  @app.command("list-repos")
151
168
  def list_repos() -> None:
152
169
  """List all indexed repositories."""
153
- with get_backend() as backend:
170
+ with get_backend(read_only=True) as backend:
154
171
  result = backend.run_read("MATCH (r:Repo) RETURN r.path AS path, r.name AS name", {})
155
172
 
156
173
  if not result:
@@ -18,7 +18,7 @@ def find_table( # noqa: B008
18
18
  ) -> None:
19
19
  """Find a table by name."""
20
20
  name = name.lower() # graph keys are lowercased at index time (C2 normalization)
21
- with get_backend() as backend:
21
+ with get_backend(read_only=True) as backend:
22
22
  results = backend.run_read(
23
23
  f"MATCH (t:{NodeLabel.TABLE}) WHERE t.qualified CONTAINS $name "
24
24
  "RETURN t.qualified AS qualified, t.kind AS kind LIMIT 50",
@@ -38,14 +38,21 @@ def find_table( # noqa: B008
38
38
  @app.command("column")
39
39
  def find_column( # noqa: B008
40
40
  ref: str = typer.Argument(..., help="Column reference (table.column)"), # noqa: B008
41
+ raw: bool = typer.Option(False, "--raw", help="Disable noise filtering on results"), # noqa: B008
41
42
  ) -> None:
42
43
  """Find a column by table.column reference."""
43
44
  ref = ref.lower() # graph keys are lowercased at index time (C2 normalization)
44
- with get_backend() as backend:
45
+ with get_backend(read_only=True) as backend:
45
46
  results = backend.run_read(
46
47
  f"MATCH (c:{NodeLabel.COLUMN}) WHERE c.id CONTAINS $ref RETURN c.id AS id LIMIT 50",
47
48
  {"ref": ref},
48
49
  )
50
+ if not raw:
51
+ from sqlcg.server.noise_filter import NoiseFilter
52
+
53
+ nf = NoiseFilter.from_config() # repo_root=None → falls back to Path.cwd()
54
+ # Filter on the schema.table portion of each column id (schema.table.column)
55
+ results = [r for r in results if not nf.is_noise(r["id"].rsplit(".", 1)[0])]
49
56
  _print_table(results, ["id"])
50
57
 
51
58
 
@@ -54,7 +61,7 @@ def find_pattern( # noqa: B008
54
61
  pattern: str = typer.Argument(..., help="SQL pattern to search for"), # noqa: B008
55
62
  ) -> None:
56
63
  """Find queries containing a SQL pattern."""
57
- with get_backend() as backend:
64
+ with get_backend(read_only=True) as backend:
58
65
  results = backend.run_read(
59
66
  f"MATCH (q:{NodeLabel.QUERY}) WHERE q.sql CONTAINS $pattern "
60
67
  "RETURN q.id AS id, q.kind AS kind LIMIT 50",
@@ -123,7 +123,7 @@ def gain_cmd(
123
123
  # Section F: parse quality from graph
124
124
  parse_quality: dict[str, int] | None = None
125
125
  try:
126
- with get_backend() as backend:
126
+ with get_backend(read_only=True) as backend:
127
127
  mode_rows = backend.run_read(
128
128
  "MATCH (q:SqlQuery) RETURN q.parsing_mode AS mode,"
129
129
  " COUNT(q) AS cnt ORDER BY cnt DESC",