wanda-fabric 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. wanda_fabric-0.1.0/LICENSE +44 -0
  2. wanda_fabric-0.1.0/PKG-INFO +296 -0
  3. wanda_fabric-0.1.0/docs/README.md +221 -0
  4. wanda_fabric-0.1.0/pyproject.toml +51 -0
  5. wanda_fabric-0.1.0/setup.cfg +4 -0
  6. wanda_fabric-0.1.0/src/wanda/__init__.py +20 -0
  7. wanda_fabric-0.1.0/src/wanda/__main__.py +4 -0
  8. wanda_fabric-0.1.0/src/wanda/agent.py +130 -0
  9. wanda_fabric-0.1.0/src/wanda/cli.py +68 -0
  10. wanda_fabric-0.1.0/src/wanda/config.py +173 -0
  11. wanda_fabric-0.1.0/src/wanda/core.py +169 -0
  12. wanda_fabric-0.1.0/src/wanda/fabric_tools.py +625 -0
  13. wanda_fabric-0.1.0/src/wanda/llm_provider.py +420 -0
  14. wanda_fabric-0.1.0/src/wanda/log_setup.py +31 -0
  15. wanda_fabric-0.1.0/src/wanda/mcp_server.py +33 -0
  16. wanda_fabric-0.1.0/src/wanda/prompts/investigate.md +27 -0
  17. wanda_fabric-0.1.0/src/wanda/prompts/scan.md +83 -0
  18. wanda_fabric-0.1.0/src/wanda/render_report.py +671 -0
  19. wanda_fabric-0.1.0/src/wanda/telemetry.py +108 -0
  20. wanda_fabric-0.1.0/src/wanda_fabric.egg-info/PKG-INFO +296 -0
  21. wanda_fabric-0.1.0/src/wanda_fabric.egg-info/SOURCES.txt +28 -0
  22. wanda_fabric-0.1.0/src/wanda_fabric.egg-info/dependency_links.txt +1 -0
  23. wanda_fabric-0.1.0/src/wanda_fabric.egg-info/entry_points.txt +2 -0
  24. wanda_fabric-0.1.0/src/wanda_fabric.egg-info/requires.txt +12 -0
  25. wanda_fabric-0.1.0/src/wanda_fabric.egg-info/top_level.txt +1 -0
  26. wanda_fabric-0.1.0/tests/test_agent.py +139 -0
  27. wanda_fabric-0.1.0/tests/test_config.py +61 -0
  28. wanda_fabric-0.1.0/tests/test_fabric_tools.py +47 -0
  29. wanda_fabric-0.1.0/tests/test_llm_provider.py +314 -0
  30. wanda_fabric-0.1.0/tests/test_telemetry.py +77 -0
@@ -0,0 +1,44 @@
1
+ Wanda — Beta Evaluation License
2
+ Copyright (c) 2026 CM Labs. All rights reserved.
3
+
4
+ This software ("Wanda") is proprietary to CM Labs and is provided for
5
+ closed-beta evaluation only. By installing or using Wanda you agree to the
6
+ terms below.
7
+
8
+ 1. GRANT. CM Labs grants you a limited, non-exclusive, non-transferable,
9
+ revocable license to install and use Wanda solely to evaluate it against
10
+ your own Microsoft Fabric workspaces during the beta program.
11
+
12
+ 2. RESTRICTIONS. You may not (a) redistribute, sublicense, sell, rent, or
13
+ publish Wanda or any part of it; (b) modify, adapt, or create derivative
14
+ works for distribution; (c) reverse engineer, decompile, or disassemble
15
+ Wanda except to the extent this restriction is prohibited by law; (d) use
16
+ Wanda to build or train a competing product or service; or (e) remove or
17
+ alter any copyright, trademark, or other proprietary notices.
18
+
19
+ 3. FEEDBACK. If you give CM Labs feedback, suggestions, or bug reports, CM
20
+ Labs may use them for any purpose without obligation to you.
21
+
22
+ 4. YOUR DATA AND COSTS. Wanda runs against your own Fabric workspace using
23
+ your own credentials and your own LLM provider key. You are responsible
24
+ for those credentials, for any usage charges they incur, and for the
25
+ resources you point Wanda at.
26
+
27
+ 5. NO WARRANTY. Wanda is provided "AS IS" and "AS AVAILABLE", without
28
+ warranty of any kind, express or implied, including merchantability,
29
+ fitness for a particular purpose, and non-infringement. This is beta
30
+ software and may be incomplete, change, or be withdrawn at any time.
31
+
32
+ 6. LIMITATION OF LIABILITY. To the maximum extent permitted by law, CM Labs
33
+ shall not be liable for any indirect, incidental, special, consequential,
34
+ or punitive damages, or any loss of data, profits, or revenue, arising
35
+ out of or relating to your use of Wanda.
36
+
37
+ 7. TERMINATION. This license terminates automatically if you breach it, and
38
+ CM Labs may terminate the beta or this license at any time. On termination
39
+ you must stop using Wanda and delete all copies.
40
+
41
+ 8. RESERVATION OF RIGHTS. All rights not expressly granted here are reserved
42
+ by CM Labs.
43
+
44
+ Contact: CM Labs — matthewarrogante@gmail.com
@@ -0,0 +1,296 @@
1
+ Metadata-Version: 2.4
2
+ Name: wanda-fabric
3
+ Version: 0.1.0
4
+ Summary: An AI Data Engineer for Microsoft Fabric — investigates failed pipelines and produces evidence-backed root-cause reports.
5
+ Author: CM Labs
6
+ License: Wanda — Beta Evaluation License
7
+ Copyright (c) 2026 CM Labs. All rights reserved.
8
+
9
+ This software ("Wanda") is proprietary to CM Labs and is provided for
10
+ closed-beta evaluation only. By installing or using Wanda you agree to the
11
+ terms below.
12
+
13
+ 1. GRANT. CM Labs grants you a limited, non-exclusive, non-transferable,
14
+ revocable license to install and use Wanda solely to evaluate it against
15
+ your own Microsoft Fabric workspaces during the beta program.
16
+
17
+ 2. RESTRICTIONS. You may not (a) redistribute, sublicense, sell, rent, or
18
+ publish Wanda or any part of it; (b) modify, adapt, or create derivative
19
+ works for distribution; (c) reverse engineer, decompile, or disassemble
20
+ Wanda except to the extent this restriction is prohibited by law; (d) use
21
+ Wanda to build or train a competing product or service; or (e) remove or
22
+ alter any copyright, trademark, or other proprietary notices.
23
+
24
+ 3. FEEDBACK. If you give CM Labs feedback, suggestions, or bug reports, CM
25
+ Labs may use them for any purpose without obligation to you.
26
+
27
+ 4. YOUR DATA AND COSTS. Wanda runs against your own Fabric workspace using
28
+ your own credentials and your own LLM provider key. You are responsible
29
+ for those credentials, for any usage charges they incur, and for the
30
+ resources you point Wanda at.
31
+
32
+ 5. NO WARRANTY. Wanda is provided "AS IS" and "AS AVAILABLE", without
33
+ warranty of any kind, express or implied, including merchantability,
34
+ fitness for a particular purpose, and non-infringement. This is beta
35
+ software and may be incomplete, change, or be withdrawn at any time.
36
+
37
+ 6. LIMITATION OF LIABILITY. To the maximum extent permitted by law, CM Labs
38
+ shall not be liable for any indirect, incidental, special, consequential,
39
+ or punitive damages, or any loss of data, profits, or revenue, arising
40
+ out of or relating to your use of Wanda.
41
+
42
+ 7. TERMINATION. This license terminates automatically if you breach it, and
43
+ CM Labs may terminate the beta or this license at any time. On termination
44
+ you must stop using Wanda and delete all copies.
45
+
46
+ 8. RESERVATION OF RIGHTS. All rights not expressly granted here are reserved
47
+ by CM Labs.
48
+
49
+ Contact: CM Labs — matthewarrogante@gmail.com
50
+
51
+ Project-URL: Homepage, https://github.com/cmlabs-ai/wanda
52
+ Keywords: microsoft-fabric,data-engineering,llm,agent,root-cause-analysis
53
+ Classifier: Development Status :: 4 - Beta
54
+ Classifier: Programming Language :: Python :: 3
55
+ Classifier: Programming Language :: Python :: 3.11
56
+ Classifier: Programming Language :: Python :: 3.12
57
+ Classifier: Intended Audience :: Developers
58
+ Classifier: Topic :: Software Development :: Quality Assurance
59
+ Classifier: Topic :: Database
60
+ Classifier: License :: Other/Proprietary License
61
+ Classifier: Operating System :: OS Independent
62
+ Requires-Python: >=3.11
63
+ Description-Content-Type: text/markdown
64
+ License-File: LICENSE
65
+ Requires-Dist: requests>=2.32
66
+ Requires-Dist: python-dotenv>=1.0
67
+ Provides-Extra: sql
68
+ Requires-Dist: pyodbc>=5; extra == "sql"
69
+ Provides-Extra: mcp
70
+ Requires-Dist: fastmcp>=3; extra == "mcp"
71
+ Provides-Extra: all
72
+ Requires-Dist: pyodbc>=5; extra == "all"
73
+ Requires-Dist: fastmcp>=3; extra == "all"
74
+ Dynamic: license-file
75
+
76
+ # Wanda
77
+
78
+ > An AI Data Engineer for Microsoft Fabric. Hours → minutes for pipeline root-cause analysis.
79
+
80
+ Wanda is an AI Data Engineer that investigates failed Microsoft Fabric pipelines
81
+ and produces evidence-backed root-cause reports. It drives an LLM (Claude by
82
+ default) through an agentic tool-use loop, reaching Fabric directly through the
83
+ Fabric REST API and SQL endpoint.
84
+
85
+ A CM Labs product — born at the **GitHub Copilot SDK Hackathon (Web Summit
86
+ Vancouver 2026)** and since rebuilt for real-world use.
87
+
88
+ ## Problem
89
+
90
+ When a Fabric pipeline fails, a data engineer typically spends 1–2 hours on:
91
+ - Reading raw failure logs
92
+ - Opening each failed notebook to read the source
93
+ - Querying the lakehouse to verify what tables/columns actually exist
94
+ - Cross-referencing all of the above to find the root cause
95
+
96
+ Most of that work is mechanical evidence-gathering, not analysis. Wanda
97
+ takes ownership of the routine investigation so the human data engineer
98
+ can focus on the fix.
99
+
100
+ ## Solution
101
+
102
+ Wanda automates the evidence chain a senior data engineer would walk:
103
+
104
+ 1. Pulls the failed pipeline run from the Fabric REST API
105
+ 2. Reads the source of the failing notebook
106
+ 3. Decides whether to query the SQL endpoint based on the error type
107
+ 4. Writes a definitive root-cause report — no guessing
108
+
109
+ The agent makes those decisions itself. Different failures lead to different
110
+ investigation paths.
111
+
112
+ ## Architecture
113
+
114
+ The model talks to its provider **directly**, and the Fabric tools are plain
115
+ Python functions called **inline** — no subprocess — which is what lets Wanda
116
+ run anywhere from a CLI to a Fabric notebook.
117
+
118
+ ```
119
+ Wanda class / CLI (wanda.core / wanda.cli)
120
+ │ .investigate() · .scan() → WandaReport
121
+
122
+ Agent loop (wanda.agent) bounded tool-use loop
123
+ ├──────────────► LLM provider (wanda.llm_provider)
124
+ │ Claude (Anthropic / Azure) · GPT (Azure OpenAI)
125
+ └──────────────► 6 Fabric tools (wanda.fabric_tools) ──► Microsoft
126
+ inline — no subprocess Fabric
127
+ REST + SQL
128
+ ```
129
+
130
+ - `wanda.core` — the `Wanda` class and `WandaReport`. Imports the tools and runs the loop inline.
131
+ - `wanda.cli` — the `wanda` command-line entry point.
132
+ - `wanda.agent` — provider-agnostic tool-use loop (bounded steps, result truncation, token accounting).
133
+ - `wanda.llm_provider` — swappable LLM backend. `WANDA_PROVIDER` selects `anthropic`, `azure-openai`, or `azure-anthropic`. The Anthropic path uses prompt caching.
134
+ - `wanda.fabric_tools` — the 6 Fabric tools as plain functions (REST + SQL), with retry/backoff and token refresh.
135
+ - `wanda.mcp_server` — a thin **MCP** wrapper over the *same* 6 tools, so any MCP-compatible client (Claude Desktop, Cursor, VS Code) can use them too — see `mcp.json`.
136
+
137
+ ## Use it as a library (notebook or script)
138
+
139
+ ```python
140
+ from wanda import Wanda
141
+
142
+ wanda = Wanda(anthropic_api_key="sk-ant-...") # or rely on .env
143
+ report = wanda.investigate("LoadSalesPipeline")
144
+ report.display() # inline HTML in a notebook
145
+ print(report.text) # or the raw text
146
+ ```
147
+
148
+ ## Demo scenarios
149
+
150
+ Demo pipelines in the Fabric workspace, each failing in a different way. The
151
+ agent takes a different investigation path for each.
152
+
153
+ **Scenario 1 — `LoadSalesPipeline`** (missing table — *verified live run*)
154
+ 1. `get_pipeline_run` / `get_pipeline_definition` → identifies the failing activity `Write_Gold_Orders`
155
+ 2. `get_notebook_source` (×3) → reads the notebooks and finds `Write_Gold_Orders` reads `order_enriched` (missing the **s**) instead of `orders_enriched`
156
+ 3. `query_sql_endpoint` → confirms `orders_enriched` exists in the lakehouse but `order_enriched` does not → `TABLE_OR_VIEW_NOT_FOUND`
157
+ 4. Reports the exact line to fix
158
+
159
+ **Scenario 2 — `TransformSalesPipeline`** (code bug)
160
+ 1. `get_pipeline_run` → finds an `AttributeError` (e.g. a wrong DataFrame column reference)
161
+ 2. `get_notebook_source` → reads the offending line
162
+ 3. Skips the SQL check — code bug, not a missing table
163
+
164
+ **Scenario 3 — `DailySalesETL`** (multi-activity ETL chain)
165
+ A multi-activity pipeline: Copy → cleanup notebook → parallel branches (aggregate notebook + stored procedure) → summarize notebook.
166
+ 1. `get_pipeline_definition` → walks the activity graph
167
+ 2. `get_pipeline_run` → identifies the single failed activity in the chain
168
+ 3. Reports which activities succeeded and which one failed, with the root cause
169
+
170
+ The divergent tool paths are the proof that the agent is genuinely agentic.
171
+
172
+ ## Prerequisites
173
+
174
+ - Windows or macOS, Python 3.11+
175
+ - An Azure tenant with a Microsoft Fabric trial or capacity
176
+ - A Fabric workspace with a Lakehouse, demo pipelines, and notebooks
177
+ - An Entra ID App Registration (Service Principal) with access to the workspace
178
+ - ODBC Driver 18 for SQL Server (for the SQL endpoint tool)
179
+ - An **Anthropic API key** (default), *or* an Azure OpenAI / Azure-hosted Claude deployment
180
+
181
+ ## Install
182
+
183
+ Wanda is a pip-installable package (`wanda-fabric`).
184
+
185
+ ```bash
186
+ pip install "wanda-fabric[sql]"
187
+ ```
188
+
189
+ The `[sql]` extra adds `pyodbc` for the SQL-endpoint tools; `[mcp]` adds `fastmcp`
190
+ for the standalone MCP server; `[all]` adds both. The core install stays light for
191
+ notebooks. The SQL tools also need the OS-level **ODBC Driver 18 for SQL Server**.
192
+
193
+ **New here?** [docs/GETTING_STARTED.md](GETTING_STARTED.md) walks the full first-time
194
+ setup (Service Principal, ODBC driver, API key) in ~15 minutes.
195
+
196
+ *CM Labs internal — develop from the private repo:*
197
+
198
+ ```bash
199
+ git clone https://github.com/cmlabs-ai/wanda.git
200
+ cd wanda
201
+ python -m venv .venv
202
+ .\.venv\Scripts\Activate.ps1 # Windows (macOS/Linux: source .venv/bin/activate)
203
+ pip install -e ".[all]" # core + sql (pyodbc) + mcp (fastmcp) extras
204
+ ```
205
+
206
+ Then configure credentials:
207
+
208
+ ```bash
209
+ cp .env.example .env
210
+ # Edit .env: Fabric Service Principal values + ANTHROPIC_API_KEY (and WANDA_PROVIDER if not "anthropic")
211
+ ```
212
+
213
+ No GitHub Copilot login is required — Wanda calls the model provider directly.
214
+
215
+ ## Run
216
+
217
+ Point Wanda at a pipeline that failed in **your** workspace:
218
+
219
+ ```bash
220
+ # Investigate a failed pipeline (default mode)
221
+ wanda "Your Failed Pipeline Name"
222
+
223
+ # Pre-run scan: audit a pipeline before it runs
224
+ wanda "Your Pipeline Name" --scan
225
+
226
+ # (equivalently: python -m wanda "Your Pipeline Name")
227
+ ```
228
+
229
+ You'll see each tool call logged to stderr as it happens, the final root-cause
230
+ report printed, and a polished HTML report saved to `./reports/`.
231
+
232
+ > The demo scenarios below run against CM Labs' own demo workspace
233
+ > (`LoadSalesPipeline`, etc.) — substitute your own pipeline names.
234
+
235
+ ## Configuration
236
+
237
+ Set in `.env` (see `.env.example`):
238
+
239
+ | Variable | Purpose |
240
+ |---|---|
241
+ | `FABRIC_TENANT_ID` / `FABRIC_CLIENT_ID` / `FABRIC_CLIENT_SECRET` / `FABRIC_WORKSPACE_ID` | Service Principal + workspace |
242
+ | `WANDA_PROVIDER` | `anthropic` (default) · `azure-openai` · `azure-anthropic` |
243
+ | `ANTHROPIC_API_KEY` | for the default `anthropic` provider |
244
+ | `WANDA_MODEL` | optional model override (default `claude-sonnet-4-6`) |
245
+ | `AZURE_OPENAI_*` / `AZURE_ANTHROPIC_*` | for the Azure providers |
246
+ | `WANDA_LOG_LEVEL` | logging verbosity (default `INFO`) |
247
+
248
+ ## Repository layout
249
+
250
+ ```
251
+ wanda/
252
+ ├── src/wanda/ the installable package (wanda-fabric)
253
+ │ ├── __init__.py exports Wanda, WandaReport
254
+ │ ├── core.py Wanda class + WandaReport
255
+ │ ├── cli.py command-line entry point (the `wanda` command)
256
+ │ ├── __main__.py enables `python -m wanda`
257
+ │ ├── agent.py provider-agnostic tool-use loop
258
+ │ ├── llm_provider.py swappable LLM backend (Anthropic / Azure OpenAI)
259
+ │ ├── fabric_tools.py the 6 Fabric tools (REST + SQL), called inline
260
+ │ ├── mcp_server.py thin MCP wrapper over the same tools
261
+ │ ├── config.py typed, fail-fast configuration
262
+ │ ├── log_setup.py logging (stderr)
263
+ │ ├── render_report.py text → self-contained HTML report
264
+ │ └── prompts/ investigate.md, scan.md (bundled package data)
265
+ ├── notebooks/ template notebook for Fabric users
266
+ ├── tests/ 34 offline tests (providers, agent loop, config)
267
+ ├── docs/ this README + architecture/business docs
268
+ ├── presentations/ decks
269
+ ├── reports/ generated HTML reports (gitignored)
270
+ ├── pyproject.toml packaging + dependencies
271
+ ├── mcp.json MCP server config (for any MCP client)
272
+ └── .env.example
273
+ ```
274
+
275
+ ## Responsible AI notes
276
+
277
+ - **Read-only — enforced in code.** Wanda calls Fabric REST and SQL endpoints in read mode only; the SQL tools reject anything that isn't a `SELECT`/`WITH` query, so Wanda cannot modify pipelines, notebooks, or table data.
278
+ - **Secrets stay local.** Credentials live in `.env` (gitignored) and are never logged, sent to the LLM, or written into reports.
279
+ - **Minimal data exposure.** Notebook source, pipeline structure, and table/column names go to the LLM so it can reason. A pre-run scan may read a *small sample* of rows (e.g. `SELECT TOP 1 *`) to validate data — never bulk data.
280
+ - **Evidence-based.** The system prompts restrict Wanda to evidence from its tool calls. Recommendations are descriptive ("change `order_enriched` to `orders_enriched`"), never actions Wanda performs itself.
281
+ - **Scoped access.** Service Principal authentication scopes Wanda's access to a single workspace.
282
+
283
+ ## Tech stack
284
+
285
+ - **Agent runtime:** custom tool-use loop calling the LLM provider directly (Anthropic Messages API / Azure OpenAI), dependency-light (`requests`, no vendor SDKs)
286
+ - **Tools:** plain Python functions, also exposed via the Model Context Protocol (FastMCP)
287
+ - **Cloud:** Microsoft Fabric REST API, Fabric SQL endpoint, Microsoft Entra ID
288
+ - **Drivers:** Microsoft ODBC Driver 18 (SQL endpoint), Service Principal auth
289
+
290
+ ## Origin
291
+
292
+ Wanda began at the **GitHub Copilot SDK Hackathon** (Web Summit Vancouver 2026),
293
+ where the original prototype ran on the GitHub Copilot SDK with an MCP subprocess.
294
+ It has since been rebuilt by **CM Labs** to call its model provider directly,
295
+ run inline (notebook-ready), and switch LLM providers via configuration —
296
+ the foundation for a Microsoft Fabric beta.
@@ -0,0 +1,221 @@
1
+ # Wanda
2
+
3
+ > An AI Data Engineer for Microsoft Fabric. Hours → minutes for pipeline root-cause analysis.
4
+
5
+ Wanda is an AI Data Engineer that investigates failed Microsoft Fabric pipelines
6
+ and produces evidence-backed root-cause reports. It drives an LLM (Claude by
7
+ default) through an agentic tool-use loop, reaching Fabric directly through the
8
+ Fabric REST API and SQL endpoint.
9
+
10
+ A CM Labs product — born at the **GitHub Copilot SDK Hackathon (Web Summit
11
+ Vancouver 2026)** and since rebuilt for real-world use.
12
+
13
+ ## Problem
14
+
15
+ When a Fabric pipeline fails, a data engineer typically spends 1–2 hours on:
16
+ - Reading raw failure logs
17
+ - Opening each failed notebook to read the source
18
+ - Querying the lakehouse to verify what tables/columns actually exist
19
+ - Cross-referencing all of the above to find the root cause
20
+
21
+ Most of that work is mechanical evidence-gathering, not analysis. Wanda
22
+ takes ownership of the routine investigation so the human data engineer
23
+ can focus on the fix.
24
+
25
+ ## Solution
26
+
27
+ Wanda automates the evidence chain a senior data engineer would walk:
28
+
29
+ 1. Pulls the failed pipeline run from the Fabric REST API
30
+ 2. Reads the source of the failing notebook
31
+ 3. Decides whether to query the SQL endpoint based on the error type
32
+ 4. Writes a definitive root-cause report — no guessing
33
+
34
+ The agent makes those decisions itself. Different failures lead to different
35
+ investigation paths.
36
+
37
+ ## Architecture
38
+
39
+ The model talks to its provider **directly**, and the Fabric tools are plain
40
+ Python functions called **inline** — no subprocess — which is what lets Wanda
41
+ run anywhere from a CLI to a Fabric notebook.
42
+
43
+ ```
44
+ Wanda class / CLI (wanda.core / wanda.cli)
45
+ │ .investigate() · .scan() → WandaReport
46
+
47
+ Agent loop (wanda.agent) bounded tool-use loop
48
+ ├──────────────► LLM provider (wanda.llm_provider)
49
+ │ Claude (Anthropic / Azure) · GPT (Azure OpenAI)
50
+ └──────────────► 6 Fabric tools (wanda.fabric_tools) ──► Microsoft
51
+ inline — no subprocess Fabric
52
+ REST + SQL
53
+ ```
54
+
55
+ - `wanda.core` — the `Wanda` class and `WandaReport`. Imports the tools and runs the loop inline.
56
+ - `wanda.cli` — the `wanda` command-line entry point.
57
+ - `wanda.agent` — provider-agnostic tool-use loop (bounded steps, result truncation, token accounting).
58
+ - `wanda.llm_provider` — swappable LLM backend. `WANDA_PROVIDER` selects `anthropic`, `azure-openai`, or `azure-anthropic`. The Anthropic path uses prompt caching.
59
+ - `wanda.fabric_tools` — the 6 Fabric tools as plain functions (REST + SQL), with retry/backoff and token refresh.
60
+ - `wanda.mcp_server` — a thin **MCP** wrapper over the *same* 6 tools, so any MCP-compatible client (Claude Desktop, Cursor, VS Code) can use them too — see `mcp.json`.
61
+
62
+ ## Use it as a library (notebook or script)
63
+
64
+ ```python
65
+ from wanda import Wanda
66
+
67
+ wanda = Wanda(anthropic_api_key="sk-ant-...") # or rely on .env
68
+ report = wanda.investigate("LoadSalesPipeline")
69
+ report.display() # inline HTML in a notebook
70
+ print(report.text) # or the raw text
71
+ ```
72
+
73
+ ## Demo scenarios
74
+
75
+ Demo pipelines in the Fabric workspace, each failing in a different way. The
76
+ agent takes a different investigation path for each.
77
+
78
+ **Scenario 1 — `LoadSalesPipeline`** (missing table — *verified live run*)
79
+ 1. `get_pipeline_run` / `get_pipeline_definition` → identifies the failing activity `Write_Gold_Orders`
80
+ 2. `get_notebook_source` (×3) → reads the notebooks and finds `Write_Gold_Orders` reads `order_enriched` (missing the **s**) instead of `orders_enriched`
81
+ 3. `query_sql_endpoint` → confirms `orders_enriched` exists in the lakehouse but `order_enriched` does not → `TABLE_OR_VIEW_NOT_FOUND`
82
+ 4. Reports the exact line to fix
83
+
84
+ **Scenario 2 — `TransformSalesPipeline`** (code bug)
85
+ 1. `get_pipeline_run` → finds an `AttributeError` (e.g. a wrong DataFrame column reference)
86
+ 2. `get_notebook_source` → reads the offending line
87
+ 3. Skips the SQL check — code bug, not a missing table
88
+
89
+ **Scenario 3 — `DailySalesETL`** (multi-activity ETL chain)
90
+ A multi-activity pipeline: Copy → cleanup notebook → parallel branches (aggregate notebook + stored procedure) → summarize notebook.
91
+ 1. `get_pipeline_definition` → walks the activity graph
92
+ 2. `get_pipeline_run` → identifies the single failed activity in the chain
93
+ 3. Reports which activities succeeded and which one failed, with the root cause
94
+
95
+ The divergent tool paths are the proof that the agent is genuinely agentic.
96
+
97
+ ## Prerequisites
98
+
99
+ - Windows or macOS, Python 3.11+
100
+ - An Azure tenant with a Microsoft Fabric trial or capacity
101
+ - A Fabric workspace with a Lakehouse, demo pipelines, and notebooks
102
+ - An Entra ID App Registration (Service Principal) with access to the workspace
103
+ - ODBC Driver 18 for SQL Server (for the SQL endpoint tool)
104
+ - An **Anthropic API key** (default), *or* an Azure OpenAI / Azure-hosted Claude deployment
105
+
106
+ ## Install
107
+
108
+ Wanda is a pip-installable package (`wanda-fabric`).
109
+
110
+ ```bash
111
+ pip install "wanda-fabric[sql]"
112
+ ```
113
+
114
+ The `[sql]` extra adds `pyodbc` for the SQL-endpoint tools; `[mcp]` adds `fastmcp`
115
+ for the standalone MCP server; `[all]` adds both. The core install stays light for
116
+ notebooks. The SQL tools also need the OS-level **ODBC Driver 18 for SQL Server**.
117
+
118
+ **New here?** [docs/GETTING_STARTED.md](GETTING_STARTED.md) walks the full first-time
119
+ setup (Service Principal, ODBC driver, API key) in ~15 minutes.
120
+
121
+ *CM Labs internal — develop from the private repo:*
122
+
123
+ ```bash
124
+ git clone https://github.com/cmlabs-ai/wanda.git
125
+ cd wanda
126
+ python -m venv .venv
127
+ .\.venv\Scripts\Activate.ps1 # Windows (macOS/Linux: source .venv/bin/activate)
128
+ pip install -e ".[all]" # core + sql (pyodbc) + mcp (fastmcp) extras
129
+ ```
130
+
131
+ Then configure credentials:
132
+
133
+ ```bash
134
+ cp .env.example .env
135
+ # Edit .env: Fabric Service Principal values + ANTHROPIC_API_KEY (and WANDA_PROVIDER if not "anthropic")
136
+ ```
137
+
138
+ No GitHub Copilot login is required — Wanda calls the model provider directly.
139
+
140
+ ## Run
141
+
142
+ Point Wanda at a pipeline that failed in **your** workspace:
143
+
144
+ ```bash
145
+ # Investigate a failed pipeline (default mode)
146
+ wanda "Your Failed Pipeline Name"
147
+
148
+ # Pre-run scan: audit a pipeline before it runs
149
+ wanda "Your Pipeline Name" --scan
150
+
151
+ # (equivalently: python -m wanda "Your Pipeline Name")
152
+ ```
153
+
154
+ You'll see each tool call logged to stderr as it happens, the final root-cause
155
+ report printed, and a polished HTML report saved to `./reports/`.
156
+
157
+ > The demo scenarios below run against CM Labs' own demo workspace
158
+ > (`LoadSalesPipeline`, etc.) — substitute your own pipeline names.
159
+
160
+ ## Configuration
161
+
162
+ Set in `.env` (see `.env.example`):
163
+
164
+ | Variable | Purpose |
165
+ |---|---|
166
+ | `FABRIC_TENANT_ID` / `FABRIC_CLIENT_ID` / `FABRIC_CLIENT_SECRET` / `FABRIC_WORKSPACE_ID` | Service Principal + workspace |
167
+ | `WANDA_PROVIDER` | `anthropic` (default) · `azure-openai` · `azure-anthropic` |
168
+ | `ANTHROPIC_API_KEY` | for the default `anthropic` provider |
169
+ | `WANDA_MODEL` | optional model override (default `claude-sonnet-4-6`) |
170
+ | `AZURE_OPENAI_*` / `AZURE_ANTHROPIC_*` | for the Azure providers |
171
+ | `WANDA_LOG_LEVEL` | logging verbosity (default `INFO`) |
172
+
173
+ ## Repository layout
174
+
175
+ ```
176
+ wanda/
177
+ ├── src/wanda/ the installable package (wanda-fabric)
178
+ │ ├── __init__.py exports Wanda, WandaReport
179
+ │ ├── core.py Wanda class + WandaReport
180
+ │ ├── cli.py command-line entry point (the `wanda` command)
181
+ │ ├── __main__.py enables `python -m wanda`
182
+ │ ├── agent.py provider-agnostic tool-use loop
183
+ │ ├── llm_provider.py swappable LLM backend (Anthropic / Azure OpenAI)
184
+ │ ├── fabric_tools.py the 6 Fabric tools (REST + SQL), called inline
185
+ │ ├── mcp_server.py thin MCP wrapper over the same tools
186
+ │ ├── config.py typed, fail-fast configuration
187
+ │ ├── log_setup.py logging (stderr)
188
+ │ ├── render_report.py text → self-contained HTML report
189
+ │ └── prompts/ investigate.md, scan.md (bundled package data)
190
+ ├── notebooks/ template notebook for Fabric users
191
+ ├── tests/ 34 offline tests (providers, agent loop, config)
192
+ ├── docs/ this README + architecture/business docs
193
+ ├── presentations/ decks
194
+ ├── reports/ generated HTML reports (gitignored)
195
+ ├── pyproject.toml packaging + dependencies
196
+ ├── mcp.json MCP server config (for any MCP client)
197
+ └── .env.example
198
+ ```
199
+
200
+ ## Responsible AI notes
201
+
202
+ - **Read-only — enforced in code.** Wanda calls Fabric REST and SQL endpoints in read mode only; the SQL tools reject anything that isn't a `SELECT`/`WITH` query, so Wanda cannot modify pipelines, notebooks, or table data.
203
+ - **Secrets stay local.** Credentials live in `.env` (gitignored) and are never logged, sent to the LLM, or written into reports.
204
+ - **Minimal data exposure.** Notebook source, pipeline structure, and table/column names go to the LLM so it can reason. A pre-run scan may read a *small sample* of rows (e.g. `SELECT TOP 1 *`) to validate data — never bulk data.
205
+ - **Evidence-based.** The system prompts restrict Wanda to evidence from its tool calls. Recommendations are descriptive ("change `order_enriched` to `orders_enriched`"), never actions Wanda performs itself.
206
+ - **Scoped access.** Service Principal authentication scopes Wanda's access to a single workspace.
207
+
208
+ ## Tech stack
209
+
210
+ - **Agent runtime:** custom tool-use loop calling the LLM provider directly (Anthropic Messages API / Azure OpenAI), dependency-light (`requests`, no vendor SDKs)
211
+ - **Tools:** plain Python functions, also exposed via the Model Context Protocol (FastMCP)
212
+ - **Cloud:** Microsoft Fabric REST API, Fabric SQL endpoint, Microsoft Entra ID
213
+ - **Drivers:** Microsoft ODBC Driver 18 (SQL endpoint), Service Principal auth
214
+
215
+ ## Origin
216
+
217
+ Wanda began at the **GitHub Copilot SDK Hackathon** (Web Summit Vancouver 2026),
218
+ where the original prototype ran on the GitHub Copilot SDK with an MCP subprocess.
219
+ It has since been rebuilt by **CM Labs** to call its model provider directly,
220
+ run inline (notebook-ready), and switch LLM providers via configuration —
221
+ the foundation for a Microsoft Fabric beta.
@@ -0,0 +1,51 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "wanda-fabric"
7
+ version = "0.1.0"
8
+ description = "An AI Data Engineer for Microsoft Fabric — investigates failed pipelines and produces evidence-backed root-cause reports."
9
+ readme = "docs/README.md"
10
+ requires-python = ">=3.11"
11
+ authors = [{ name = "CM Labs" }]
12
+ license = { file = "LICENSE" }
13
+ keywords = ["microsoft-fabric", "data-engineering", "llm", "agent", "root-cause-analysis"]
14
+ classifiers = [
15
+ "Development Status :: 4 - Beta",
16
+ "Programming Language :: Python :: 3",
17
+ "Programming Language :: Python :: 3.11",
18
+ "Programming Language :: Python :: 3.12",
19
+ "Intended Audience :: Developers",
20
+ "Topic :: Software Development :: Quality Assurance",
21
+ "Topic :: Database",
22
+ "License :: Other/Proprietary License",
23
+ "Operating System :: OS Independent",
24
+ ]
25
+ # Core stays dependency-light so it installs cleanly in a Fabric notebook.
26
+ dependencies = [
27
+ "requests>=2.32",
28
+ "python-dotenv>=1.0",
29
+ ]
30
+
31
+ [project.optional-dependencies]
32
+ # pyodbc powers the SQL-endpoint tools; absent it, those tools degrade gracefully.
33
+ sql = ["pyodbc>=5"]
34
+ # fastmcp powers the standalone MCP server (for Claude Desktop / Cursor / VS Code).
35
+ mcp = ["fastmcp>=3"]
36
+ all = ["pyodbc>=5", "fastmcp>=3"]
37
+
38
+ [project.scripts]
39
+ wanda = "wanda.cli:main"
40
+
41
+ [project.urls]
42
+ Homepage = "https://github.com/cmlabs-ai/wanda"
43
+
44
+ [tool.setuptools]
45
+ package-dir = { "" = "src" }
46
+
47
+ [tool.setuptools.packages.find]
48
+ where = ["src"]
49
+
50
+ [tool.setuptools.package-data]
51
+ wanda = ["prompts/*.md"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,20 @@
1
+ """
2
+ Wanda — an AI Data Engineer for Microsoft Fabric.
3
+
4
+ Investigates failed Fabric pipelines (and audits them pre-run) and produces
5
+ evidence-backed root-cause reports, by driving an LLM through an agentic
6
+ tool-use loop over the Fabric REST API and SQL endpoint.
7
+
8
+ from wanda import Wanda
9
+ report = Wanda(anthropic_api_key="sk-ant-...").investigate("MyPipeline")
10
+ report.display()
11
+ """
12
+ from .core import Wanda, WandaReport
13
+
14
+ __all__ = ["Wanda", "WandaReport"]
15
+
16
+ try:
17
+ from importlib.metadata import version
18
+ __version__ = version("wanda-fabric")
19
+ except Exception: # running from a source tree that isn't installed
20
+ __version__ = "0.1.0"
@@ -0,0 +1,4 @@
1
+ from .cli import main
2
+
3
+ if __name__ == "__main__":
4
+ main()