PyPI - sql-code-graph - Versions diffs - 1.0.2__py3-none-any.whl → 1.1.3__py3-none-any.whl - Mend

sql-code-graph 1.0.2py3-none-any.whl → 1.1.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

{sql_code_graph-1.0.2.dist-info → sql_code_graph-1.1.3.dist-info}/METADATA +13 -2
{sql_code_graph-1.0.2.dist-info → sql_code_graph-1.1.3.dist-info}/RECORD +31 -29
sqlcg/__init__.py +1 -1
sqlcg/cli/commands/analyze.py +140 -22
sqlcg/cli/commands/db.py +19 -2
sqlcg/cli/commands/find.py +10 -3
sqlcg/cli/commands/gain.py +1 -1
sqlcg/cli/commands/git.py +61 -11
sqlcg/cli/commands/index.py +30 -2
sqlcg/cli/commands/mcp.py +103 -0
sqlcg/cli/commands/reindex.py +122 -12
sqlcg/core/config.py +113 -5
sqlcg/core/freshness.py +134 -0
sqlcg/core/graph_db.py +2 -0
sqlcg/core/kuzu_backend.py +4 -1
sqlcg/core/queries.cypher +18 -6
sqlcg/core/queries.py +3 -1
sqlcg/core/schema.cypher +13 -1
sqlcg/core/schema.py +5 -1
sqlcg/indexer/indexer.py +394 -160
sqlcg/indexer/walker.py +3 -0
sqlcg/parsers/ansi_parser.py +56 -0
sqlcg/parsers/base.py +13 -4
sqlcg/parsers/snowflake_parser.py +46 -6
sqlcg/server/control.py +144 -0
sqlcg/server/models.py +68 -0
sqlcg/server/server.py +283 -1
sqlcg/server/skill.py +20 -4
sqlcg/server/tools.py +203 -13
{sql_code_graph-1.0.2.dist-info → sql_code_graph-1.1.3.dist-info}/WHEEL +0 -0
{sql_code_graph-1.0.2.dist-info → sql_code_graph-1.1.3.dist-info}/entry_points.txt +0 -0

{sql_code_graph-1.0.2.dist-info → sql_code_graph-1.1.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: sql-code-graph
-Version: 1.0.2
+Version: 1.1.3
 Summary: SQL code graph analyzer and lineage tracer
 Project-URL: Homepage, https://github.com/Warhorze/sql-code-graph
 Project-URL: Repository, https://github.com/Warhorze/sql-code-graph
@@ -219,7 +219,7 @@ After indexing, `sqlcg db info` shows non-zero `STAR_EXPANSION lineage edges`, a
 | **Search & meta** | |
 | `search_sql_pattern(query)` | Full-text search across indexed SQL |
 | `list_dialects_and_repos()` | List indexed repos and dialects (catalogue) |
-| `db_info()` | Graph health, node counts, parse quality breakdown, warnings |
+| `db_info()` | Graph health, node counts, parse quality breakdown, warnings, freshness (indexed SHA vs HEAD) |
 | `execute_cypher(query)` | Raw Cypher query against the graph |
 | `submit_feedback(...)` | Report a false positive/negative to improve metrics |
@@ -228,6 +228,12 @@ After indexing, `sqlcg db info` shows non-zero `STAR_EXPANSION lineage edges`, a
 > `table.column`. Each returned node carries both `name` (the bare column) and
 > `table` (the owning `schema.table`), so results are navigable without a second lookup.
+> **Provenance fields**: lineage edges now carry `file`, `line`, and `expression`
+> (where the lineage was derived from), a `confidence` of `1.0` for plainly-parsed
+> facts (lower for inferred edges, with a `reason`), and a `table_kind`
+> (`table` / `cte` / `derived` / `external`) so CTE and derived aliases are
+> distinguishable from real tables.
 > **LLM agent tip**: call `db_info()` before lineage queries to check that
 > `SqlColumn > 0` and `warnings` is empty. If `parse_quality["scripting_block"]`
 > is high, column lineage will be limited for those files — use table-level tools
@@ -243,16 +249,21 @@ sqlcg db init                          # initialise graph database
 sqlcg index <path> --dialect snowflake # index SQL files (snowflake is the tested dialect)
 sqlcg index <path> --dialect auto      # read dialect from .sqlcg.toml
 sqlcg index <path> --profile           # index + print per-stage timing and slowest files
+sqlcg index <path> --include-working-tree  # also index uncommitted changes (marks graph dirty)
 sqlcg reindex <path> --from <sha> --to <sha>  # incremental resync of only changed files
 sqlcg analyze unused                   # tables with no query references
 sqlcg analyze upstream/downstream      # trace lineage from the CLI
 sqlcg find table/column/pattern        # search the graph
 sqlcg watch <path>                     # watch for file changes
+sqlcg db info                          # graph stats + freshness (indexed SHA vs HEAD)
 sqlcg git install-hooks                # install post-checkout + post-merge resync hooks
 sqlcg gain                             # show usage metrics
 sqlcg report                           # generate FP/error report
 sqlcg mcp best-practices               # print the fact/heuristic boundary for the MCP tools
 sqlcg mcp start                        # start MCP server manually
+sqlcg mcp status                       # server status JSON (via control socket)
+sqlcg mcp stop                         # stop the running MCP server gracefully
+sqlcg mcp restart                      # stop the server (client must respawn it)
 sqlcg version                          # show installed version
 ```

{sql_code_graph-1.0.2.dist-info → sql_code_graph-1.1.3.dist-info}/RECORD RENAMED Viewed

@@ -1,37 +1,38 @@
-sqlcg/__init__.py,sha256=hGOhwTAVTaRm7PjbaSQVCLvnF7rOGZZNdMqv0IoQdYg,115
+sqlcg/__init__.py,sha256=YGDRrWVIrONmQholAKWh6hSKxlPd2dLcM1AdHHdBhEA,115
 sqlcg/__main__.py,sha256=1YoFLcqEgTwYq1J3TbUwpkdG0zeeLIf2fJvwWI-CLFU,109
 sqlcg/cli/__init__.py,sha256=W8fD0LpMq2xm_5WKGNMvJh2WBL1ho5E8hUeAqXQYT1g,28
 sqlcg/cli/main.py,sha256=WmdTjsOlz1ozi2Y3Aq4ezR_FCRl-Lc1YOKw3_d48dlY,1650
 sqlcg/cli/commands/__init__.py,sha256=oSHtr6VD-jNubOjuCQyZj2tBppjMEpQDh-IGQ8of9eA,30
-sqlcg/cli/commands/analyze.py,sha256=PFQD29_VAtJ-wghYLsHRINp8VlnOVl1WlOdbAdcWz1E,8091
-sqlcg/cli/commands/db.py,sha256=Yd4ZDz1BFwjO4Lyt3NefQnowkjdUxFDFmsPykBVH2Pk,6518
-sqlcg/cli/commands/find.py,sha256=P2OFI0O_-F4W5-oy5KObXUHI7gNTkJRtDSZ59xTKE9Y,2672
-sqlcg/cli/commands/gain.py,sha256=bOvia7CVla_fESrDEdftYze8Mm0xDio3SpCzIyoXg7A,8925
-sqlcg/cli/commands/git.py,sha256=96hmWYd861FC8RZqPQ_eBG8yLXSXaB9SLxmuwx00nWU,3347
-sqlcg/cli/commands/index.py,sha256=Sgrg5MaQWfQzbX3e3Wcsfd8BEWDGuBm5l5vynpJsRzA,9801
+sqlcg/cli/commands/analyze.py,sha256=hiKj0R1m5i4ZmwrXBlVT14xGy6rs9jmv_ZDCLVZj4Tg,14282
+sqlcg/cli/commands/db.py,sha256=5VpknLqYaimK6YA516w6iQVX6JmHcD52o6MuW5d088c,7462
+sqlcg/cli/commands/find.py,sha256=SsK6q4YRPknrz_lIQ4Gun6HRoAdoVRGClwAYdm_s2OU,3168
+sqlcg/cli/commands/gain.py,sha256=hz36QmuaXJXutI4vyNMDfcNsBeLTXa6EOw2bWe2AhTQ,8939
+sqlcg/cli/commands/git.py,sha256=yMgWOuoTCTBr2P1QgmghRi5ikmUYHuxDUVyBDYerErw,5728
+sqlcg/cli/commands/index.py,sha256=xMnxKDiUt5LH_3lKAotoRctL4VSOvcw7Gq--idLPtm0,11091
 sqlcg/cli/commands/install.py,sha256=KNABvrLbamPyYnmnVdCaM_MNezbDc-pr6IkignCWI8k,9186
-sqlcg/cli/commands/mcp.py,sha256=cfi7D-RgEPUKdfUbsJC2iKImKOnHQvWxCLfwYIPdhdE,2174
-sqlcg/cli/commands/reindex.py,sha256=J9gpaxSzJ1mTdOJWh7WSLskbRF9f_2EMWnUFF4VOtVU,6387
+sqlcg/cli/commands/mcp.py,sha256=2gDsNvtj1Ql7PkjX9dHWAzOK0uCPRR5DGdBAzJa8PIU,6005
+sqlcg/cli/commands/reindex.py,sha256=n1mQTYAZshtCKPgpR12S6ZMCqO3cSUtpCXjzb1PuZxU,11857
 sqlcg/cli/commands/report.py,sha256=JU0qjyMxwOukE7bN3XvvIzOI7zMg_Gsnvk_8F6pKNpA,4915
 sqlcg/cli/commands/uninstall.py,sha256=IYwQaqnMmmzW0Nlls40wD-L3tVkMgKIMRXUkcXPMUc4,9398
 sqlcg/cli/commands/watch.py,sha256=7N6c-QuvxAEGHzDZ0C3CU2BkHSraZW9YtgoFnz7SaQo,2373
 sqlcg/core/__init__.py,sha256=uNsJCrCMVWVT80sHPtI_f39BYqIf5N0i6LSq8x8HsyI,283
-sqlcg/core/config.py,sha256=YCq4OayvBSNXsYtOh3yZ-W6fyJBLwYunORDo2TPCU9s,10179
-sqlcg/core/graph_db.py,sha256=gFiHjfVeRHp2FS3yRThDgCWFkugOQD065IvEqN6apg4,7881
+sqlcg/core/config.py,sha256=LuB8HWPsIt1OsjOshTT1bJdXWXN01w76ABl9M-VB9DM,14777
+sqlcg/core/freshness.py,sha256=gRb8pRPw5SdIUxAYkMXIJ00DTdQ6CegRZPAvWnv0rU0,4575
+sqlcg/core/graph_db.py,sha256=Aa85wPFg26H-Ud9SrZyxCHH-99iitAI5S3X9T_62Yyw,7957
 sqlcg/core/jobs.py,sha256=Je-fCdSKRgiSsv1W8SgNAlp36a7t7-pJZ-qKPbka9OE,3298
-sqlcg/core/kuzu_backend.py,sha256=ziHt-AB9sEZY7qB8whseWFicbTfOZaNOxcNVKhjii5Y,16587
+sqlcg/core/kuzu_backend.py,sha256=3kL8bGEQm70fuxYUdt1p7fsY12lCLQ07x01NYg6FOGA,16821
 sqlcg/core/neo4j_backend.py,sha256=AM1TncP9GBGph-rSHwalZPmGUV2kFILzaJP-PSB0UYw,8437
-sqlcg/core/queries.cypher,sha256=auWIPJeVjgykk6wqTRMoNQCwRhzG2ZhF4MRufso2KYA,4182
-sqlcg/core/queries.py,sha256=XBdQTBSsX3WUqO3AdX5EWYH435GDrbwEg1BR9AvJSSo,1880
-sqlcg/core/schema.cypher,sha256=UWYsPMRgkn6HOlPZ3rl6BfY5hzKQKP5RGPaZg4NTZFY,2515
-sqlcg/core/schema.py,sha256=9jBgJwuvfjLq2xC5B0NUyZZYxhqTb0LO0YzxcPM-gVM,1301
+sqlcg/core/queries.cypher,sha256=cvPOVe5GUOzJN4bxUvDxNI--xIIP8gm42TR-gUnea4U,4685
+sqlcg/core/queries.py,sha256=gkl4bhkZM8FsvbSA-IaK17sRFcO3hB5YlVCemkCXgWM,2064
+sqlcg/core/schema.cypher,sha256=rK5QMhSrzZhuj73NeNXGX6oM-rPPPvxFjex0fEyUvkQ,2859
+sqlcg/core/schema.py,sha256=JO5rkspYKjL9AEl5mt0VIJKn-IPOH3kJV_fVmAMuFCI,1467
 sqlcg/indexer/__init__.py,sha256=Wh20Unz2OHs1oIyWLrpurPAasF0BET2g4iXtNk7mh2U,56
 sqlcg/indexer/dbt_adapter.py,sha256=EB5x1WU5Z9d-I97ADDj88S_hG1C4z4nbrv8JUCzXfy8,686
 sqlcg/indexer/error_classify.py,sha256=MYjPVprwT-ARPjBCyCzu2F9DSrZfnTVtVIoBgm8s4H8,5329
 sqlcg/indexer/git_delta.py,sha256=P-QM4vnVURT2KLiE6u3cQynRUF-mTH13cbB4I20YHPQ,4468
-sqlcg/indexer/indexer.py,sha256=0B0BCUaLPdV9XtlCzhqR3hwHyD3w83o-tYG7yNr18Yo,50507
+sqlcg/indexer/indexer.py,sha256=KyyowxiSNU3Gm4JE-mj8gVm6D80XERJPd-he59I2sIk,62018
 sqlcg/indexer/pool.py,sha256=BTYx-pBe6zwUG89MHh0X7nzGNVlsHN-GjovYKanVI1s,18553
-sqlcg/indexer/walker.py,sha256=C__JuDcTzKxFqVjGFRr5cj9hgxvf8zffTz-0HMn1qTY,1746
+sqlcg/indexer/walker.py,sha256=umNaqDbuerr75VYG1TEOv0ATsbI40O3SIw35f7XJcDE,1931
 sqlcg/indexer/watcher.py,sha256=mJQq1LASRLKKwhz0WhCUWPLLqyPR2_-FD_8efYU6gE8,8442
 sqlcg/lineage/__init__.py,sha256=Da1DlYwtK13WHv_RnHjAtNkHTOuFbhxqCjT1Le7DsWM,46
 sqlcg/lineage/aggregator.py,sha256=G1xsTjf981EVSgN1yIHcC_ecDvcTcSPvEp6Kb2HPXkY,4943
@@ -39,25 +40,26 @@ sqlcg/lineage/schema_resolver.py,sha256=iXt6LYF6UVWsGUpcfbmjmGn9wCgXl721lTGf_8Aa
 sqlcg/metrics/__init__.py,sha256=hLJ6wm4St8qqYwKh3o9QG7lcEt1BEYM31ccqO9tGpIg,133
 sqlcg/metrics/store.py,sha256=BaMf7QYTmYMlX_Jzi1GNU8R2sMVkWdn07f-ZSndtcNk,8879
 sqlcg/parsers/__init__.py,sha256=AamA8wBbDZV9_zEtZCI4Hyen5UAVKHmBwjTghTt2PZE,785
-sqlcg/parsers/ansi_parser.py,sha256=KruZn5CYjpktKmMRVWackshRI_AR6ehc-ReCsDeWNkQ,14321
-sqlcg/parsers/base.py,sha256=cSHlXwiSNu77TZI6_p1nRevbRTcBc1t5v8N_aKR7uB4,49117
+sqlcg/parsers/ansi_parser.py,sha256=tu1MWWaSYmpefKjgk2PPyGStIFjV47Z_1WjyBh5Zi2c,17180
+sqlcg/parsers/base.py,sha256=IiOkVsm6jz9-48RqDCXiW-UXAraNxQ4pKXvSA7aolnA,49907
 sqlcg/parsers/bigquery_parser.py,sha256=mOnWTfXB_Dp4JwFE1PVYOB6CDPf5nYE0Dea8kJCl9uQ,2827
 sqlcg/parsers/postgres_parser.py,sha256=lYfUpQY6j4Qm7ndXBtXbgPoGzYqYddWt5YeFnWKdA6I,946
 sqlcg/parsers/registry.py,sha256=LXy1F6rqQI6VdxpRvZg_tNpoEucW3mXZHYBMlMONbX4,1496
-sqlcg/parsers/snowflake_parser.py,sha256=Xc80vlhKiJqbt4cT7UcpYKcYzV9rSqFyG0d_oTc-eJE,12627
+sqlcg/parsers/snowflake_parser.py,sha256=fovMyqfhWD2wmtEyiwTC0aoP4QWP-3XQZ8WYkXvs9hg,15511
 sqlcg/parsers/tsql_parser.py,sha256=RRj1pACtAk2tLTDaFWRYF67a0IDvaf5A1YQXWIz0bpQ,956
 sqlcg/server/__init__.py,sha256=n4wuNE7xyJIJxJZBtmtdccCMQfvTdF-IqIaZVbC4FC4,35
+sqlcg/server/control.py,sha256=v-r21npODiHlHnJHuo_6KWrKclQKq_E1QyrzIWjqgtY,4508
 sqlcg/server/exceptions.py,sha256=EONw34icOByCTpppSQrvQBW6asc4hfqaGDCAFjv96II,469
-sqlcg/server/models.py,sha256=dv4SM_o-aY8kUFIbCtj0l8ceMsfyvQtXCWPm4Ek_-14,16432
+sqlcg/server/models.py,sha256=l7ORy6sbtzBW1y3qVaeLwEukbyAgBkz9S5VIm2q4b24,19378
 sqlcg/server/noise_filter.py,sha256=idSBGgdKWWccJdpOo9qgbM2350Oew-2l5W6Yc9GYQqY,6337
-sqlcg/server/server.py,sha256=mDAW_Zmk3Sp2sApw3Gw3veCqJe7waw-sioQyKZBn9ng,3774
-sqlcg/server/skill.py,sha256=siAtrRdFHQnASe9nl33MvkTXXt9EgCB8id5i9AUq4XU,10718
-sqlcg/server/tools.py,sha256=mSoYZRI7F5ZmdTcG-BnY6ULzrz3Y7qIFe3cHTVWVyMs,57785
+sqlcg/server/server.py,sha256=gzeO5WbSNfGxgIKte01uy0VjO1_basI2ChSuAwr0dBc,14844
+sqlcg/server/skill.py,sha256=GE8eeimk6yiGGJ74erGypqYAviur5peSR6_2a4QQWVM,12828
+sqlcg/server/tools.py,sha256=JvijDC0h5uHjZyZUIZq9sztNG3W5sr-Yy5rHwOVuJec,66642
 sqlcg/utils/__init__.py,sha256=--iqt5ThTXmT8Wz7da8hs3n0zDfYPl8P-z5OgRJ_77E,154
 sqlcg/utils/hashing.py,sha256=H25-sYfxHKb3_IERFnHyAIYNiXN470Oqo5sJT_D3YOA,438
 sqlcg/utils/ignore.py,sha256=wJjwa0mjnQ_xJExOUxk25y00g065XmmzJapqV3ifD5o,1151
 sqlcg/utils/logging.py,sha256=u0fCmYsLj9o81vawm3xZTHaw68GQYVm7JxG-gP81u8A,840
-sql_code_graph-1.0.2.dist-info/METADATA,sha256=aikAv-KoUOGfgYo3-htWLyq61x1PE6bC1Onn_TNAuvE,12806
-sql_code_graph-1.0.2.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
-sql_code_graph-1.0.2.dist-info/entry_points.txt,sha256=Wfe49sVzV9p4eVFGo5RxcV-frr3HOP0yzzst8JBxQLQ,46
-sql_code_graph-1.0.2.dist-info/RECORD,,
+sql_code_graph-1.1.3.dist-info/METADATA,sha256=Z_aRnsDOgZ_ngAHkIr3x2XpEjF-x6UMUQwcIkAMlGjo,13615
+sql_code_graph-1.1.3.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
+sql_code_graph-1.1.3.dist-info/entry_points.txt,sha256=Wfe49sVzV9p4eVFGo5RxcV-frr3HOP0yzzst8JBxQLQ,46
+sql_code_graph-1.1.3.dist-info/RECORD,,

sqlcg/__init__.py CHANGED Viewed

@@ -1,5 +1,5 @@
 """SQL Code Graph - SQL lineage and dependency analysis tool."""
-__version__ = "1.0.2"
+__version__ = "1.1.3"
 __all__ = ["__version__"]

sqlcg/cli/commands/analyze.py CHANGED Viewed

@@ -9,6 +9,7 @@ from rich.console import Console
 from rich.table import Table
 from sqlcg.core.config import get_backend
+from sqlcg.core.queries import GET_TABLE_EXTERNAL_CONSUMERS_QUERY
 from sqlcg.core.schema import NodeLabel, RelType
 if TYPE_CHECKING:
@@ -18,11 +19,37 @@ app = typer.Typer(help="Lineage analysis")
 console = Console()
+def _kind_filter(source_alias: str, include_intermediate: bool) -> str:
+    """Build the Half-B (#38) kind filter for the lineage traversal query.
+    When ``include_intermediate`` is False, the filter uses ``OPTIONAL MATCH`` plus
+    ``t.kind IS NULL OR t.kind IN ['table', 'external']`` so a source whose SqlTable
+    node is ABSENT (a CTE-body source on a graph indexed before the #39 fix, or not yet
+    re-indexed) is KEPT rather than silently dropped.  Reverting this to an inner
+    ``MATCH (t:SqlTable {...}) ... WHERE t.kind IN [...]`` is the #38 regression:
+    node-less physical sources vanish from results.
+    ``source_alias`` is ``src`` for upstream and ``dst`` for downstream — it names both
+    the node whose table is looked up and the variable carried through the WITH clause.
+    This is the single production source of the filter string; the #40 recall guard
+    imports it so reverting Half B here turns the guard red.
+    """
+    if include_intermediate:
+        return ""
+    return (
+        f"OPTIONAL MATCH (t:SqlTable {{qualified: {source_alias}.table_qualified}}) "
+        f"WITH c, {source_alias}, t WHERE t.kind IS NULL OR t.kind IN ['table', 'external'] "
+    )
 @app.command("upstream")
 def upstream(  # noqa: B008
     ref: str = typer.Argument(..., help="Column reference"),  # noqa: B008
     depth: int = typer.Option(5, "--depth", help="Maximum traversal depth"),  # noqa: B008
     raw: bool = typer.Option(False, "--raw", help="Disable noise filtering on results"),  # noqa: B008
+    include_intermediate: bool = typer.Option(  # noqa: B008
+        False, "--include-intermediate", help="Include CTE/derived intermediate nodes"
+    ),
 ) -> None:
     """Trace upstream column lineage."""
     # Bounds check for depth to prevent performance DoS
@@ -30,19 +57,32 @@ def upstream(  # noqa: B008
         console.print("[red]Error: --depth must be between 1 and 100[/red]")
         raise typer.Exit(1)
-    with get_backend() as backend:
+    # By default, filter out CTE/derived intermediate nodes; --include-intermediate restores them.
+    # Half B (#38): use OPTIONAL MATCH so a missing SqlTable node (e.g. CTE-body source not yet
+    # re-indexed after #39 fix) is KEPT rather than silently dropped.  WHERE t.kind IS NULL OR
+    # t.kind IN [...] means: keep when node absent (NULL) OR when kind is a physical source.
+    # CTE aliases (kind='cte') and derived tables (kind='derived') are filtered out.
+    kind_filter = _kind_filter("src", include_intermediate)
+    with get_backend(read_only=True) as backend:
         results = backend.run_read(
-            f"MATCH p=(c:{NodeLabel.COLUMN} {{id: $ref}})"
-            f"<-[:{RelType.COLUMN_LINEAGE}*1..{depth}]-(src) "
-            "RETURN src.id AS id LIMIT 100",
+            f"MATCH (c:{NodeLabel.COLUMN} {{id: $ref}})"
+            f"<-[:{RelType.COLUMN_LINEAGE}*1..{depth}]-(src:{NodeLabel.COLUMN}) "
+            f"{kind_filter}"
+            f"OPTIONAL MATCH (src)-[direct:{RelType.COLUMN_LINEAGE}]->(c) "
+            "OPTIONAL MATCH (q:SqlQuery {id: direct.query_id}) "
+            "RETURN src.id AS id, q.file_path AS file, q.start_line AS line LIMIT 100",
             {"ref": ref},
         )
         if not results and len(ref.split(".")) >= 3:
             bare = _bare_ref(ref)
             fallback_results = backend.run_read(
-                f"MATCH p=(c:{NodeLabel.COLUMN} {{id: $bare}})"
-                f"<-[:{RelType.COLUMN_LINEAGE}*1..{depth}]-(src) "
-                "RETURN src.id AS id LIMIT 100",
+                f"MATCH (c:{NodeLabel.COLUMN} {{id: $bare}})"
+                f"<-[:{RelType.COLUMN_LINEAGE}*1..{depth}]-(src:{NodeLabel.COLUMN}) "
+                f"{kind_filter}"
+                f"OPTIONAL MATCH (src)-[direct:{RelType.COLUMN_LINEAGE}]->(c) "
+                "OPTIONAL MATCH (q:SqlQuery {id: direct.query_id}) "
+                "RETURN src.id AS id, q.file_path AS file, q.start_line AS line LIMIT 100",
                 {"bare": bare},
             )
             if fallback_results:
@@ -59,7 +99,7 @@ def upstream(  # noqa: B008
             nf = NoiseFilter.from_config()  # repo_root=None → falls back to Path.cwd()
             results = _filter_column_results(results, nf)
-        _print_table(results, ["id"])
+        _print_table(_add_file_line_col(results), ["id", "file:line"])
 @app.command("downstream")
@@ -67,6 +107,9 @@ def downstream(  # noqa: B008
     ref: str = typer.Argument(..., help="Column reference"),  # noqa: B008
     depth: int = typer.Option(5, "--depth", help="Maximum traversal depth"),  # noqa: B008
     raw: bool = typer.Option(False, "--raw", help="Disable noise filtering on results"),  # noqa: B008
+    include_intermediate: bool = typer.Option(  # noqa: B008
+        False, "--include-intermediate", help="Include CTE/derived intermediate nodes"
+    ),
 ) -> None:
     """Trace downstream column lineage."""
     # Bounds check for depth to prevent performance DoS
@@ -74,19 +117,31 @@ def downstream(  # noqa: B008
         console.print("[red]Error: --depth must be between 1 and 100[/red]")
         raise typer.Exit(1)
-    with get_backend() as backend:
+    # By default, filter out CTE/derived intermediate nodes; --include-intermediate restores them.
+    # Half B (#38): OPTIONAL MATCH keeps sources whose SqlTable node is absent (NULL) or is a
+    # physical kind.  WITH c, dst, t carries the three variables in scope at this interpolation
+    # point; direct and q are bound later in the query.
+    kind_filter = _kind_filter("dst", include_intermediate)
+    with get_backend(read_only=True) as backend:
         results = backend.run_read(
-            f"MATCH p=(c:{NodeLabel.COLUMN} {{id: $ref}})"
-            f"-[:{RelType.COLUMN_LINEAGE}*1..{depth}]->(dst) "
-            "RETURN dst.id AS id LIMIT 100",
+            f"MATCH (c:{NodeLabel.COLUMN} {{id: $ref}})"
+            f"-[:{RelType.COLUMN_LINEAGE}*1..{depth}]->(dst:{NodeLabel.COLUMN}) "
+            f"{kind_filter}"
+            f"OPTIONAL MATCH (c)-[direct:{RelType.COLUMN_LINEAGE}]->(dst) "
+            "OPTIONAL MATCH (q:SqlQuery {id: direct.query_id}) "
+            "RETURN dst.id AS id, q.file_path AS file, q.start_line AS line LIMIT 100",
             {"ref": ref},
         )
         if not results and len(ref.split(".")) >= 3:
             bare = _bare_ref(ref)
             fallback_results = backend.run_read(
-                f"MATCH p=(c:{NodeLabel.COLUMN} {{id: $bare}})"
-                f"-[:{RelType.COLUMN_LINEAGE}*1..{depth}]->(dst) "
-                "RETURN dst.id AS id LIMIT 100",
+                f"MATCH (c:{NodeLabel.COLUMN} {{id: $bare}})"
+                f"-[:{RelType.COLUMN_LINEAGE}*1..{depth}]->(dst:{NodeLabel.COLUMN}) "
+                f"{kind_filter}"
+                f"OPTIONAL MATCH (c)-[direct:{RelType.COLUMN_LINEAGE}]->(dst) "
+                "OPTIONAL MATCH (q:SqlQuery {id: direct.query_id}) "
+                "RETURN dst.id AS id, q.file_path AS file, q.start_line AS line LIMIT 100",
                 {"bare": bare},
             )
             if fallback_results:
@@ -103,38 +158,76 @@ def downstream(  # noqa: B008
             nf = NoiseFilter.from_config()  # repo_root=None → falls back to Path.cwd()
             results = _filter_column_results(results, nf)
-        _print_table(results, ["id"])
+        _print_table(_add_file_line_col(results), ["id", "file:line"])
+        # Append external consumer rows for terminal tables (scalar query, one per terminal).
+        # Resolve terminal tables from the column results; fall back to the root column's table.
+        terminal_tables: set[str] = set()
+        for r in results:
+            tbl = _col_id_to_table(r["id"])
+            if tbl:
+                terminal_tables.add(tbl)
+        # Also check the root column's table (in case no downstream columns were found).
+        root_parts = ref.rsplit(".", 1)
+        if len(root_parts) == 2:
+            terminal_tables.add(root_parts[0])
+        consumer_rows: list[dict] = []
+        for tbl in sorted(terminal_tables):
+            rows_ec = backend.run_read(
+                GET_TABLE_EXTERNAL_CONSUMERS_QUERY,
+                {"table_qualified": tbl},
+            )
+            for ec in rows_ec:
+                consumer_rows.append(
+                    {"id": f"[external] {ec['name']} ({ec['consumer_type']})", "file:line": ""}
+                )
+        if consumer_rows:
+            _print_table(consumer_rows, ["id", "file:line"])
 @app.command("impact")
 def impact(  # noqa: B008
     table: str = typer.Argument(..., help="Table name to analyze"),  # noqa: B008
+    raw: bool = typer.Option(False, "--raw", help="Disable noise filtering on results"),  # noqa: B008
 ) -> None:
     """Show all queries impacted by a table."""
-    with get_backend() as backend:
+    with get_backend(read_only=True) as backend:
         results = backend.run_read(
             f"MATCH (t:{NodeLabel.TABLE} {{qualified: $t}})"
             f"<-[:{RelType.SELECTS_FROM}]-(q:{NodeLabel.QUERY}) "
-            "RETURN q.id AS id, q.kind AS kind LIMIT 100",
+            "RETURN DISTINCT q.id AS id, q.kind AS kind, q.target_table AS target LIMIT 100",
             {"t": table},
         )
+        if not raw:
+            from sqlcg.server.noise_filter import NoiseFilter
+            nf = NoiseFilter.from_config()
+            results = [r for r in results if not nf.is_noise(r.get("target", ""))]
         _print_table(results, ["id", "kind"])
 @app.command("failures")
 def failures(
     cause: str | None = typer.Option(  # noqa: B008
-        None, "--cause", help="Filter by E-code bucket (e.g. E5, timeout)"
+        None,
+        "--cause",
+        help=(
+            "Filter by E-code bucket. Valid values: "
+            "timeout, E8, E3, E2, E5, E1, qualify_failed, func_fallback, pure_ddl_skip"
+        ),
     ),
     limit: int = typer.Option(100, "--limit", help="Maximum rows to return"),  # noqa: B008
 ) -> None:
     """List files that failed to parse, with their dominant cause (E-code bucket).
+    Valid --cause buckets (from highest to lowest severity):
+    timeout, E8, E3, E2, E5, E1, qualify_failed, func_fallback, pure_ddl_skip.
     Requires a graph indexed with sqlcg >= v3 (schema version 3). Re-index
     with 'sqlcg db reset && sqlcg index <path>' if the graph was built with
     an earlier version.
     """
-    with get_backend() as backend:
+    with get_backend(read_only=True) as backend:
         cypher = (
             f"MATCH (f:{NodeLabel.FILE}) WHERE f.parse_failed = true "
             "AND ($cause IS NULL OR f.parse_cause = $cause) "
@@ -148,14 +241,20 @@ def failures(
 @app.command("unused")
 def unused(
     threshold: int = typer.Option(0, "--threshold", help="Minimum reference count threshold"),
+    raw: bool = typer.Option(False, "--raw", help="Disable noise filtering on results"),  # noqa: B008
 ) -> None:
     """Find tables with no query references."""
-    with get_backend() as backend:
+    with get_backend(read_only=True) as backend:
         results = backend.run_read(
             f"MATCH (t:{NodeLabel.TABLE}) WHERE NOT (t)<-[:{RelType.SELECTS_FROM}]-() "
-            "RETURN t.qualified AS qualified LIMIT 100",
+            "RETURN DISTINCT t.qualified AS qualified LIMIT 100",
             {},
         )
+        if not raw:
+            from sqlcg.server.noise_filter import NoiseFilter
+            nf = NoiseFilter.from_config()
+            results = [r for r in results if not nf.is_noise(r["qualified"])]
         _print_table(results, ["qualified"])
@@ -196,6 +295,25 @@ def _filter_column_results(
     return [r for r in results if not nf.is_noise(_col_id_to_table(r["id"]))]
+def _add_file_line_col(rows: list[dict]) -> list[dict]:
+    """Add a 'file:line' composite column from 'file' and 'line' fields.
+    Formats as 'path/to/file.sql:N' when both are present, or '?' when either
+    is absent (multi-hop upstream where file/line is not available).
+    """
+    result = []
+    for row in rows:
+        new_row = dict(row)
+        file = row.get("file")
+        line = row.get("line")
+        if file and line:
+            new_row["file:line"] = f"{file}:{line}"
+        else:
+            new_row["file:line"] = "?"
+        result.append(new_row)
+    return result
 def _print_table(rows: list[dict], columns: list[str]) -> None:
     """Print results as a Rich table."""
     if not rows:

sqlcg/cli/commands/db.py CHANGED Viewed

@@ -2,11 +2,13 @@
 import os
 import shutil
+from pathlib import Path
 import typer
 from rich.console import Console
 from sqlcg.core.config import get_backend, get_db_path
+from sqlcg.core.freshness import compute_freshness, render_freshness_line
 from sqlcg.core.schema import NodeLabel
 from sqlcg.utils.logging import getLogger
@@ -73,10 +75,25 @@ def db_reset(  # noqa: B008
 @app.command("info")
 def db_info() -> None:
     """Show database stats."""
-    with get_backend() as backend:
+    with get_backend(read_only=True) as backend:
         version = backend.get_schema_version() or "unknown"
         console.print(f"Schema version: {version}")
+        # Freshness block — only shown when the DB has been indexed from a git repo
+        try:
+            indexed_sha = backend.get_indexed_sha()
+            repo_rows = backend.run_read("MATCH (r:Repo) RETURN r.path AS path LIMIT 1", {})
+            if repo_rows and indexed_sha is not None and repo_rows[0].get("path"):
+                repo_root = Path(repo_rows[0]["path"])
+                f = compute_freshness(repo_root, indexed_sha)
+                console.print(render_freshness_line(f))
+        except NotImplementedError:
+            # Neo4j backend raises NotImplementedError for get_indexed_sha — skip silently
+            pass
+        except Exception as e:
+            # Any unexpected error in the freshness block must not crash db info
+            logger.debug(f"Freshness check skipped: {e}")
         # Show node counts for all labels
         for label in NodeLabel:
             try:
@@ -150,7 +167,7 @@ def db_info() -> None:
 @app.command("list-repos")
 def list_repos() -> None:
     """List all indexed repositories."""
-    with get_backend() as backend:
+    with get_backend(read_only=True) as backend:
         result = backend.run_read("MATCH (r:Repo) RETURN r.path AS path, r.name AS name", {})
         if not result:

sqlcg/cli/commands/find.py CHANGED Viewed

@@ -18,7 +18,7 @@ def find_table(  # noqa: B008
 ) -> None:
     """Find a table by name."""
     name = name.lower()  # graph keys are lowercased at index time (C2 normalization)
-    with get_backend() as backend:
+    with get_backend(read_only=True) as backend:
         results = backend.run_read(
             f"MATCH (t:{NodeLabel.TABLE}) WHERE t.qualified CONTAINS $name "
             "RETURN t.qualified AS qualified, t.kind AS kind LIMIT 50",
@@ -38,14 +38,21 @@ def find_table(  # noqa: B008
 @app.command("column")
 def find_column(  # noqa: B008
     ref: str = typer.Argument(..., help="Column reference (table.column)"),  # noqa: B008
+    raw: bool = typer.Option(False, "--raw", help="Disable noise filtering on results"),  # noqa: B008
 ) -> None:
     """Find a column by table.column reference."""
     ref = ref.lower()  # graph keys are lowercased at index time (C2 normalization)
-    with get_backend() as backend:
+    with get_backend(read_only=True) as backend:
         results = backend.run_read(
             f"MATCH (c:{NodeLabel.COLUMN}) WHERE c.id CONTAINS $ref RETURN c.id AS id LIMIT 50",
             {"ref": ref},
         )
+        if not raw:
+            from sqlcg.server.noise_filter import NoiseFilter
+            nf = NoiseFilter.from_config()  # repo_root=None → falls back to Path.cwd()
+            # Filter on the schema.table portion of each column id (schema.table.column)
+            results = [r for r in results if not nf.is_noise(r["id"].rsplit(".", 1)[0])]
         _print_table(results, ["id"])
@@ -54,7 +61,7 @@ def find_pattern(  # noqa: B008
     pattern: str = typer.Argument(..., help="SQL pattern to search for"),  # noqa: B008
 ) -> None:
     """Find queries containing a SQL pattern."""
-    with get_backend() as backend:
+    with get_backend(read_only=True) as backend:
         results = backend.run_read(
             f"MATCH (q:{NodeLabel.QUERY}) WHERE q.sql CONTAINS $pattern "
             "RETURN q.id AS id, q.kind AS kind LIMIT 50",

sqlcg/cli/commands/gain.py CHANGED Viewed

@@ -123,7 +123,7 @@ def gain_cmd(
         # Section F: parse quality from graph
         parse_quality: dict[str, int] | None = None
         try:
-            with get_backend() as backend:
+            with get_backend(read_only=True) as backend:
                 mode_rows = backend.run_read(
                     "MATCH (q:SqlQuery) RETURN q.parsing_mode AS mode,"
                     " COUNT(q) AS cnt ORDER BY cnt DESC",

sql-code-graph 1.0.2__py3-none-any.whl → 1.1.3__py3-none-any.whl

sql-code-graph 1.0.2py3-none-any.whl → 1.1.3py3-none-any.whl