wanda-fabric 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- wanda_fabric-0.1.0/LICENSE +44 -0
- wanda_fabric-0.1.0/PKG-INFO +296 -0
- wanda_fabric-0.1.0/docs/README.md +221 -0
- wanda_fabric-0.1.0/pyproject.toml +51 -0
- wanda_fabric-0.1.0/setup.cfg +4 -0
- wanda_fabric-0.1.0/src/wanda/__init__.py +20 -0
- wanda_fabric-0.1.0/src/wanda/__main__.py +4 -0
- wanda_fabric-0.1.0/src/wanda/agent.py +130 -0
- wanda_fabric-0.1.0/src/wanda/cli.py +68 -0
- wanda_fabric-0.1.0/src/wanda/config.py +173 -0
- wanda_fabric-0.1.0/src/wanda/core.py +169 -0
- wanda_fabric-0.1.0/src/wanda/fabric_tools.py +625 -0
- wanda_fabric-0.1.0/src/wanda/llm_provider.py +420 -0
- wanda_fabric-0.1.0/src/wanda/log_setup.py +31 -0
- wanda_fabric-0.1.0/src/wanda/mcp_server.py +33 -0
- wanda_fabric-0.1.0/src/wanda/prompts/investigate.md +27 -0
- wanda_fabric-0.1.0/src/wanda/prompts/scan.md +83 -0
- wanda_fabric-0.1.0/src/wanda/render_report.py +671 -0
- wanda_fabric-0.1.0/src/wanda/telemetry.py +108 -0
- wanda_fabric-0.1.0/src/wanda_fabric.egg-info/PKG-INFO +296 -0
- wanda_fabric-0.1.0/src/wanda_fabric.egg-info/SOURCES.txt +28 -0
- wanda_fabric-0.1.0/src/wanda_fabric.egg-info/dependency_links.txt +1 -0
- wanda_fabric-0.1.0/src/wanda_fabric.egg-info/entry_points.txt +2 -0
- wanda_fabric-0.1.0/src/wanda_fabric.egg-info/requires.txt +12 -0
- wanda_fabric-0.1.0/src/wanda_fabric.egg-info/top_level.txt +1 -0
- wanda_fabric-0.1.0/tests/test_agent.py +139 -0
- wanda_fabric-0.1.0/tests/test_config.py +61 -0
- wanda_fabric-0.1.0/tests/test_fabric_tools.py +47 -0
- wanda_fabric-0.1.0/tests/test_llm_provider.py +314 -0
- wanda_fabric-0.1.0/tests/test_telemetry.py +77 -0
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
Wanda — Beta Evaluation License
|
|
2
|
+
Copyright (c) 2026 CM Labs. All rights reserved.
|
|
3
|
+
|
|
4
|
+
This software ("Wanda") is proprietary to CM Labs and is provided for
|
|
5
|
+
closed-beta evaluation only. By installing or using Wanda you agree to the
|
|
6
|
+
terms below.
|
|
7
|
+
|
|
8
|
+
1. GRANT. CM Labs grants you a limited, non-exclusive, non-transferable,
|
|
9
|
+
revocable license to install and use Wanda solely to evaluate it against
|
|
10
|
+
your own Microsoft Fabric workspaces during the beta program.
|
|
11
|
+
|
|
12
|
+
2. RESTRICTIONS. You may not (a) redistribute, sublicense, sell, rent, or
|
|
13
|
+
publish Wanda or any part of it; (b) modify, adapt, or create derivative
|
|
14
|
+
works for distribution; (c) reverse engineer, decompile, or disassemble
|
|
15
|
+
Wanda except to the extent this restriction is prohibited by law; (d) use
|
|
16
|
+
Wanda to build or train a competing product or service; or (e) remove or
|
|
17
|
+
alter any copyright, trademark, or other proprietary notices.
|
|
18
|
+
|
|
19
|
+
3. FEEDBACK. If you give CM Labs feedback, suggestions, or bug reports, CM
|
|
20
|
+
Labs may use them for any purpose without obligation to you.
|
|
21
|
+
|
|
22
|
+
4. YOUR DATA AND COSTS. Wanda runs against your own Fabric workspace using
|
|
23
|
+
your own credentials and your own LLM provider key. You are responsible
|
|
24
|
+
for those credentials, for any usage charges they incur, and for the
|
|
25
|
+
resources you point Wanda at.
|
|
26
|
+
|
|
27
|
+
5. NO WARRANTY. Wanda is provided "AS IS" and "AS AVAILABLE", without
|
|
28
|
+
warranty of any kind, express or implied, including merchantability,
|
|
29
|
+
fitness for a particular purpose, and non-infringement. This is beta
|
|
30
|
+
software and may be incomplete, change, or be withdrawn at any time.
|
|
31
|
+
|
|
32
|
+
6. LIMITATION OF LIABILITY. To the maximum extent permitted by law, CM Labs
|
|
33
|
+
shall not be liable for any indirect, incidental, special, consequential,
|
|
34
|
+
or punitive damages, or any loss of data, profits, or revenue, arising
|
|
35
|
+
out of or relating to your use of Wanda.
|
|
36
|
+
|
|
37
|
+
7. TERMINATION. This license terminates automatically if you breach it, and
|
|
38
|
+
CM Labs may terminate the beta or this license at any time. On termination
|
|
39
|
+
you must stop using Wanda and delete all copies.
|
|
40
|
+
|
|
41
|
+
8. RESERVATION OF RIGHTS. All rights not expressly granted here are reserved
|
|
42
|
+
by CM Labs.
|
|
43
|
+
|
|
44
|
+
Contact: CM Labs — matthewarrogante@gmail.com
|
|
@@ -0,0 +1,296 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: wanda-fabric
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: An AI Data Engineer for Microsoft Fabric — investigates failed pipelines and produces evidence-backed root-cause reports.
|
|
5
|
+
Author: CM Labs
|
|
6
|
+
License: Wanda — Beta Evaluation License
|
|
7
|
+
Copyright (c) 2026 CM Labs. All rights reserved.
|
|
8
|
+
|
|
9
|
+
This software ("Wanda") is proprietary to CM Labs and is provided for
|
|
10
|
+
closed-beta evaluation only. By installing or using Wanda you agree to the
|
|
11
|
+
terms below.
|
|
12
|
+
|
|
13
|
+
1. GRANT. CM Labs grants you a limited, non-exclusive, non-transferable,
|
|
14
|
+
revocable license to install and use Wanda solely to evaluate it against
|
|
15
|
+
your own Microsoft Fabric workspaces during the beta program.
|
|
16
|
+
|
|
17
|
+
2. RESTRICTIONS. You may not (a) redistribute, sublicense, sell, rent, or
|
|
18
|
+
publish Wanda or any part of it; (b) modify, adapt, or create derivative
|
|
19
|
+
works for distribution; (c) reverse engineer, decompile, or disassemble
|
|
20
|
+
Wanda except to the extent this restriction is prohibited by law; (d) use
|
|
21
|
+
Wanda to build or train a competing product or service; or (e) remove or
|
|
22
|
+
alter any copyright, trademark, or other proprietary notices.
|
|
23
|
+
|
|
24
|
+
3. FEEDBACK. If you give CM Labs feedback, suggestions, or bug reports, CM
|
|
25
|
+
Labs may use them for any purpose without obligation to you.
|
|
26
|
+
|
|
27
|
+
4. YOUR DATA AND COSTS. Wanda runs against your own Fabric workspace using
|
|
28
|
+
your own credentials and your own LLM provider key. You are responsible
|
|
29
|
+
for those credentials, for any usage charges they incur, and for the
|
|
30
|
+
resources you point Wanda at.
|
|
31
|
+
|
|
32
|
+
5. NO WARRANTY. Wanda is provided "AS IS" and "AS AVAILABLE", without
|
|
33
|
+
warranty of any kind, express or implied, including merchantability,
|
|
34
|
+
fitness for a particular purpose, and non-infringement. This is beta
|
|
35
|
+
software and may be incomplete, change, or be withdrawn at any time.
|
|
36
|
+
|
|
37
|
+
6. LIMITATION OF LIABILITY. To the maximum extent permitted by law, CM Labs
|
|
38
|
+
shall not be liable for any indirect, incidental, special, consequential,
|
|
39
|
+
or punitive damages, or any loss of data, profits, or revenue, arising
|
|
40
|
+
out of or relating to your use of Wanda.
|
|
41
|
+
|
|
42
|
+
7. TERMINATION. This license terminates automatically if you breach it, and
|
|
43
|
+
CM Labs may terminate the beta or this license at any time. On termination
|
|
44
|
+
you must stop using Wanda and delete all copies.
|
|
45
|
+
|
|
46
|
+
8. RESERVATION OF RIGHTS. All rights not expressly granted here are reserved
|
|
47
|
+
by CM Labs.
|
|
48
|
+
|
|
49
|
+
Contact: CM Labs — matthewarrogante@gmail.com
|
|
50
|
+
|
|
51
|
+
Project-URL: Homepage, https://github.com/cmlabs-ai/wanda
|
|
52
|
+
Keywords: microsoft-fabric,data-engineering,llm,agent,root-cause-analysis
|
|
53
|
+
Classifier: Development Status :: 4 - Beta
|
|
54
|
+
Classifier: Programming Language :: Python :: 3
|
|
55
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
56
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
57
|
+
Classifier: Intended Audience :: Developers
|
|
58
|
+
Classifier: Topic :: Software Development :: Quality Assurance
|
|
59
|
+
Classifier: Topic :: Database
|
|
60
|
+
Classifier: License :: Other/Proprietary License
|
|
61
|
+
Classifier: Operating System :: OS Independent
|
|
62
|
+
Requires-Python: >=3.11
|
|
63
|
+
Description-Content-Type: text/markdown
|
|
64
|
+
License-File: LICENSE
|
|
65
|
+
Requires-Dist: requests>=2.32
|
|
66
|
+
Requires-Dist: python-dotenv>=1.0
|
|
67
|
+
Provides-Extra: sql
|
|
68
|
+
Requires-Dist: pyodbc>=5; extra == "sql"
|
|
69
|
+
Provides-Extra: mcp
|
|
70
|
+
Requires-Dist: fastmcp>=3; extra == "mcp"
|
|
71
|
+
Provides-Extra: all
|
|
72
|
+
Requires-Dist: pyodbc>=5; extra == "all"
|
|
73
|
+
Requires-Dist: fastmcp>=3; extra == "all"
|
|
74
|
+
Dynamic: license-file
|
|
75
|
+
|
|
76
|
+
# Wanda
|
|
77
|
+
|
|
78
|
+
> An AI Data Engineer for Microsoft Fabric. Hours → minutes for pipeline root-cause analysis.
|
|
79
|
+
|
|
80
|
+
Wanda is an AI Data Engineer that investigates failed Microsoft Fabric pipelines
|
|
81
|
+
and produces evidence-backed root-cause reports. It drives an LLM (Claude by
|
|
82
|
+
default) through an agentic tool-use loop, reaching Fabric directly through the
|
|
83
|
+
Fabric REST API and SQL endpoint.
|
|
84
|
+
|
|
85
|
+
A CM Labs product — born at the **GitHub Copilot SDK Hackathon (Web Summit
|
|
86
|
+
Vancouver 2026)** and since rebuilt for real-world use.
|
|
87
|
+
|
|
88
|
+
## Problem
|
|
89
|
+
|
|
90
|
+
When a Fabric pipeline fails, a data engineer typically spends 1–2 hours on:
|
|
91
|
+
- Reading raw failure logs
|
|
92
|
+
- Opening each failed notebook to read the source
|
|
93
|
+
- Querying the lakehouse to verify what tables/columns actually exist
|
|
94
|
+
- Cross-referencing all of the above to find the root cause
|
|
95
|
+
|
|
96
|
+
Most of that work is mechanical evidence-gathering, not analysis. Wanda
|
|
97
|
+
takes ownership of the routine investigation so the human data engineer
|
|
98
|
+
can focus on the fix.
|
|
99
|
+
|
|
100
|
+
## Solution
|
|
101
|
+
|
|
102
|
+
Wanda automates the evidence chain a senior data engineer would walk:
|
|
103
|
+
|
|
104
|
+
1. Pulls the failed pipeline run from the Fabric REST API
|
|
105
|
+
2. Reads the source of the failing notebook
|
|
106
|
+
3. Decides whether to query the SQL endpoint based on the error type
|
|
107
|
+
4. Writes a definitive root-cause report — no guessing
|
|
108
|
+
|
|
109
|
+
The agent makes those decisions itself. Different failures lead to different
|
|
110
|
+
investigation paths.
|
|
111
|
+
|
|
112
|
+
## Architecture
|
|
113
|
+
|
|
114
|
+
The model talks to its provider **directly**, and the Fabric tools are plain
|
|
115
|
+
Python functions called **inline** — no subprocess — which is what lets Wanda
|
|
116
|
+
run anywhere from a CLI to a Fabric notebook.
|
|
117
|
+
|
|
118
|
+
```
|
|
119
|
+
Wanda class / CLI (wanda.core / wanda.cli)
|
|
120
|
+
│ .investigate() · .scan() → WandaReport
|
|
121
|
+
▼
|
|
122
|
+
Agent loop (wanda.agent) bounded tool-use loop
|
|
123
|
+
├──────────────► LLM provider (wanda.llm_provider)
|
|
124
|
+
│ Claude (Anthropic / Azure) · GPT (Azure OpenAI)
|
|
125
|
+
└──────────────► 6 Fabric tools (wanda.fabric_tools) ──► Microsoft
|
|
126
|
+
inline — no subprocess Fabric
|
|
127
|
+
REST + SQL
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
- `wanda.core` — the `Wanda` class and `WandaReport`. Imports the tools and runs the loop inline.
|
|
131
|
+
- `wanda.cli` — the `wanda` command-line entry point.
|
|
132
|
+
- `wanda.agent` — provider-agnostic tool-use loop (bounded steps, result truncation, token accounting).
|
|
133
|
+
- `wanda.llm_provider` — swappable LLM backend. `WANDA_PROVIDER` selects `anthropic`, `azure-openai`, or `azure-anthropic`. The Anthropic path uses prompt caching.
|
|
134
|
+
- `wanda.fabric_tools` — the 6 Fabric tools as plain functions (REST + SQL), with retry/backoff and token refresh.
|
|
135
|
+
- `wanda.mcp_server` — a thin **MCP** wrapper over the *same* 6 tools, so any MCP-compatible client (Claude Desktop, Cursor, VS Code) can use them too — see `mcp.json`.
|
|
136
|
+
|
|
137
|
+
## Use it as a library (notebook or script)
|
|
138
|
+
|
|
139
|
+
```python
|
|
140
|
+
from wanda import Wanda
|
|
141
|
+
|
|
142
|
+
wanda = Wanda(anthropic_api_key="sk-ant-...") # or rely on .env
|
|
143
|
+
report = wanda.investigate("LoadSalesPipeline")
|
|
144
|
+
report.display() # inline HTML in a notebook
|
|
145
|
+
print(report.text) # or the raw text
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
## Demo scenarios
|
|
149
|
+
|
|
150
|
+
Demo pipelines in the Fabric workspace, each failing in a different way. The
|
|
151
|
+
agent takes a different investigation path for each.
|
|
152
|
+
|
|
153
|
+
**Scenario 1 — `LoadSalesPipeline`** (missing table — *verified live run*)
|
|
154
|
+
1. `get_pipeline_run` / `get_pipeline_definition` → identifies the failing activity `Write_Gold_Orders`
|
|
155
|
+
2. `get_notebook_source` (×3) → reads the notebooks and finds `Write_Gold_Orders` reads `order_enriched` (missing the **s**) instead of `orders_enriched`
|
|
156
|
+
3. `query_sql_endpoint` → confirms `orders_enriched` exists in the lakehouse but `order_enriched` does not → `TABLE_OR_VIEW_NOT_FOUND`
|
|
157
|
+
4. Reports the exact line to fix
|
|
158
|
+
|
|
159
|
+
**Scenario 2 — `TransformSalesPipeline`** (code bug)
|
|
160
|
+
1. `get_pipeline_run` → finds an `AttributeError` (e.g. a wrong DataFrame column reference)
|
|
161
|
+
2. `get_notebook_source` → reads the offending line
|
|
162
|
+
3. Skips the SQL check — code bug, not a missing table
|
|
163
|
+
|
|
164
|
+
**Scenario 3 — `DailySalesETL`** (multi-activity ETL chain)
|
|
165
|
+
A multi-activity pipeline: Copy → cleanup notebook → parallel branches (aggregate notebook + stored procedure) → summarize notebook.
|
|
166
|
+
1. `get_pipeline_definition` → walks the activity graph
|
|
167
|
+
2. `get_pipeline_run` → identifies the single failed activity in the chain
|
|
168
|
+
3. Reports which activities succeeded and which one failed, with the root cause
|
|
169
|
+
|
|
170
|
+
The divergent tool paths are the proof that the agent is genuinely agentic.
|
|
171
|
+
|
|
172
|
+
## Prerequisites
|
|
173
|
+
|
|
174
|
+
- Windows or macOS, Python 3.11+
|
|
175
|
+
- An Azure tenant with a Microsoft Fabric trial or capacity
|
|
176
|
+
- A Fabric workspace with a Lakehouse, demo pipelines, and notebooks
|
|
177
|
+
- An Entra ID App Registration (Service Principal) with access to the workspace
|
|
178
|
+
- ODBC Driver 18 for SQL Server (for the SQL endpoint tool)
|
|
179
|
+
- An **Anthropic API key** (default), *or* an Azure OpenAI / Azure-hosted Claude deployment
|
|
180
|
+
|
|
181
|
+
## Install
|
|
182
|
+
|
|
183
|
+
Wanda is a pip-installable package (`wanda-fabric`).
|
|
184
|
+
|
|
185
|
+
```bash
|
|
186
|
+
pip install "wanda-fabric[sql]"
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
The `[sql]` extra adds `pyodbc` for the SQL-endpoint tools; `[mcp]` adds `fastmcp`
|
|
190
|
+
for the standalone MCP server; `[all]` adds both. The core install stays light for
|
|
191
|
+
notebooks. The SQL tools also need the OS-level **ODBC Driver 18 for SQL Server**.
|
|
192
|
+
|
|
193
|
+
**New here?** [docs/GETTING_STARTED.md](GETTING_STARTED.md) walks the full first-time
|
|
194
|
+
setup (Service Principal, ODBC driver, API key) in ~15 minutes.
|
|
195
|
+
|
|
196
|
+
*CM Labs internal — develop from the private repo:*
|
|
197
|
+
|
|
198
|
+
```bash
|
|
199
|
+
git clone https://github.com/cmlabs-ai/wanda.git
|
|
200
|
+
cd wanda
|
|
201
|
+
python -m venv .venv
|
|
202
|
+
.\.venv\Scripts\Activate.ps1 # Windows (macOS/Linux: source .venv/bin/activate)
|
|
203
|
+
pip install -e ".[all]" # core + sql (pyodbc) + mcp (fastmcp) extras
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
Then configure credentials:
|
|
207
|
+
|
|
208
|
+
```bash
|
|
209
|
+
cp .env.example .env
|
|
210
|
+
# Edit .env: Fabric Service Principal values + ANTHROPIC_API_KEY (and WANDA_PROVIDER if not "anthropic")
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
No GitHub Copilot login is required — Wanda calls the model provider directly.
|
|
214
|
+
|
|
215
|
+
## Run
|
|
216
|
+
|
|
217
|
+
Point Wanda at a pipeline that failed in **your** workspace:
|
|
218
|
+
|
|
219
|
+
```bash
|
|
220
|
+
# Investigate a failed pipeline (default mode)
|
|
221
|
+
wanda "Your Failed Pipeline Name"
|
|
222
|
+
|
|
223
|
+
# Pre-run scan: audit a pipeline before it runs
|
|
224
|
+
wanda "Your Pipeline Name" --scan
|
|
225
|
+
|
|
226
|
+
# (equivalently: python -m wanda "Your Pipeline Name")
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
You'll see each tool call logged to stderr as it happens, the final root-cause
|
|
230
|
+
report printed, and a polished HTML report saved to `./reports/`.
|
|
231
|
+
|
|
232
|
+
> The demo scenarios below run against CM Labs' own demo workspace
|
|
233
|
+
> (`LoadSalesPipeline`, etc.) — substitute your own pipeline names.
|
|
234
|
+
|
|
235
|
+
## Configuration
|
|
236
|
+
|
|
237
|
+
Set in `.env` (see `.env.example`):
|
|
238
|
+
|
|
239
|
+
| Variable | Purpose |
|
|
240
|
+
|---|---|
|
|
241
|
+
| `FABRIC_TENANT_ID` / `FABRIC_CLIENT_ID` / `FABRIC_CLIENT_SECRET` / `FABRIC_WORKSPACE_ID` | Service Principal + workspace |
|
|
242
|
+
| `WANDA_PROVIDER` | `anthropic` (default) · `azure-openai` · `azure-anthropic` |
|
|
243
|
+
| `ANTHROPIC_API_KEY` | for the default `anthropic` provider |
|
|
244
|
+
| `WANDA_MODEL` | optional model override (default `claude-sonnet-4-6`) |
|
|
245
|
+
| `AZURE_OPENAI_*` / `AZURE_ANTHROPIC_*` | for the Azure providers |
|
|
246
|
+
| `WANDA_LOG_LEVEL` | logging verbosity (default `INFO`) |
|
|
247
|
+
|
|
248
|
+
## Repository layout
|
|
249
|
+
|
|
250
|
+
```
|
|
251
|
+
wanda/
|
|
252
|
+
├── src/wanda/ the installable package (wanda-fabric)
|
|
253
|
+
│ ├── __init__.py exports Wanda, WandaReport
|
|
254
|
+
│ ├── core.py Wanda class + WandaReport
|
|
255
|
+
│ ├── cli.py command-line entry point (the `wanda` command)
|
|
256
|
+
│ ├── __main__.py enables `python -m wanda`
|
|
257
|
+
│ ├── agent.py provider-agnostic tool-use loop
|
|
258
|
+
│ ├── llm_provider.py swappable LLM backend (Anthropic / Azure OpenAI)
|
|
259
|
+
│ ├── fabric_tools.py the 6 Fabric tools (REST + SQL), called inline
|
|
260
|
+
│ ├── mcp_server.py thin MCP wrapper over the same tools
|
|
261
|
+
│ ├── config.py typed, fail-fast configuration
|
|
262
|
+
│ ├── log_setup.py logging (stderr)
|
|
263
|
+
│ ├── render_report.py text → self-contained HTML report
|
|
264
|
+
│ └── prompts/ investigate.md, scan.md (bundled package data)
|
|
265
|
+
├── notebooks/ template notebook for Fabric users
|
|
266
|
+
├── tests/ 34 offline tests (providers, agent loop, config)
|
|
267
|
+
├── docs/ this README + architecture/business docs
|
|
268
|
+
├── presentations/ decks
|
|
269
|
+
├── reports/ generated HTML reports (gitignored)
|
|
270
|
+
├── pyproject.toml packaging + dependencies
|
|
271
|
+
├── mcp.json MCP server config (for any MCP client)
|
|
272
|
+
└── .env.example
|
|
273
|
+
```
|
|
274
|
+
|
|
275
|
+
## Responsible AI notes
|
|
276
|
+
|
|
277
|
+
- **Read-only — enforced in code.** Wanda calls Fabric REST and SQL endpoints in read mode only; the SQL tools reject anything that isn't a `SELECT`/`WITH` query, so Wanda cannot modify pipelines, notebooks, or table data.
|
|
278
|
+
- **Secrets stay local.** Credentials live in `.env` (gitignored) and are never logged, sent to the LLM, or written into reports.
|
|
279
|
+
- **Minimal data exposure.** Notebook source, pipeline structure, and table/column names go to the LLM so it can reason. A pre-run scan may read a *small sample* of rows (e.g. `SELECT TOP 1 *`) to validate data — never bulk data.
|
|
280
|
+
- **Evidence-based.** The system prompts restrict Wanda to evidence from its tool calls. Recommendations are descriptive ("change `order_enriched` to `orders_enriched`"), never actions Wanda performs itself.
|
|
281
|
+
- **Scoped access.** Service Principal authentication scopes Wanda's access to a single workspace.
|
|
282
|
+
|
|
283
|
+
## Tech stack
|
|
284
|
+
|
|
285
|
+
- **Agent runtime:** custom tool-use loop calling the LLM provider directly (Anthropic Messages API / Azure OpenAI), dependency-light (`requests`, no vendor SDKs)
|
|
286
|
+
- **Tools:** plain Python functions, also exposed via the Model Context Protocol (FastMCP)
|
|
287
|
+
- **Cloud:** Microsoft Fabric REST API, Fabric SQL endpoint, Microsoft Entra ID
|
|
288
|
+
- **Drivers:** Microsoft ODBC Driver 18 (SQL endpoint), Service Principal auth
|
|
289
|
+
|
|
290
|
+
## Origin
|
|
291
|
+
|
|
292
|
+
Wanda began at the **GitHub Copilot SDK Hackathon** (Web Summit Vancouver 2026),
|
|
293
|
+
where the original prototype ran on the GitHub Copilot SDK with an MCP subprocess.
|
|
294
|
+
It has since been rebuilt by **CM Labs** to call its model provider directly,
|
|
295
|
+
run inline (notebook-ready), and switch LLM providers via configuration —
|
|
296
|
+
the foundation for a Microsoft Fabric beta.
|
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
# Wanda
|
|
2
|
+
|
|
3
|
+
> An AI Data Engineer for Microsoft Fabric. Hours → minutes for pipeline root-cause analysis.
|
|
4
|
+
|
|
5
|
+
Wanda is an AI Data Engineer that investigates failed Microsoft Fabric pipelines
|
|
6
|
+
and produces evidence-backed root-cause reports. It drives an LLM (Claude by
|
|
7
|
+
default) through an agentic tool-use loop, reaching Fabric directly through the
|
|
8
|
+
Fabric REST API and SQL endpoint.
|
|
9
|
+
|
|
10
|
+
A CM Labs product — born at the **GitHub Copilot SDK Hackathon (Web Summit
|
|
11
|
+
Vancouver 2026)** and since rebuilt for real-world use.
|
|
12
|
+
|
|
13
|
+
## Problem
|
|
14
|
+
|
|
15
|
+
When a Fabric pipeline fails, a data engineer typically spends 1–2 hours on:
|
|
16
|
+
- Reading raw failure logs
|
|
17
|
+
- Opening each failed notebook to read the source
|
|
18
|
+
- Querying the lakehouse to verify what tables/columns actually exist
|
|
19
|
+
- Cross-referencing all of the above to find the root cause
|
|
20
|
+
|
|
21
|
+
Most of that work is mechanical evidence-gathering, not analysis. Wanda
|
|
22
|
+
takes ownership of the routine investigation so the human data engineer
|
|
23
|
+
can focus on the fix.
|
|
24
|
+
|
|
25
|
+
## Solution
|
|
26
|
+
|
|
27
|
+
Wanda automates the evidence chain a senior data engineer would walk:
|
|
28
|
+
|
|
29
|
+
1. Pulls the failed pipeline run from the Fabric REST API
|
|
30
|
+
2. Reads the source of the failing notebook
|
|
31
|
+
3. Decides whether to query the SQL endpoint based on the error type
|
|
32
|
+
4. Writes a definitive root-cause report — no guessing
|
|
33
|
+
|
|
34
|
+
The agent makes those decisions itself. Different failures lead to different
|
|
35
|
+
investigation paths.
|
|
36
|
+
|
|
37
|
+
## Architecture
|
|
38
|
+
|
|
39
|
+
The model talks to its provider **directly**, and the Fabric tools are plain
|
|
40
|
+
Python functions called **inline** — no subprocess — which is what lets Wanda
|
|
41
|
+
run anywhere from a CLI to a Fabric notebook.
|
|
42
|
+
|
|
43
|
+
```
|
|
44
|
+
Wanda class / CLI (wanda.core / wanda.cli)
|
|
45
|
+
│ .investigate() · .scan() → WandaReport
|
|
46
|
+
▼
|
|
47
|
+
Agent loop (wanda.agent) bounded tool-use loop
|
|
48
|
+
├──────────────► LLM provider (wanda.llm_provider)
|
|
49
|
+
│ Claude (Anthropic / Azure) · GPT (Azure OpenAI)
|
|
50
|
+
└──────────────► 6 Fabric tools (wanda.fabric_tools) ──► Microsoft
|
|
51
|
+
inline — no subprocess Fabric
|
|
52
|
+
REST + SQL
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
- `wanda.core` — the `Wanda` class and `WandaReport`. Imports the tools and runs the loop inline.
|
|
56
|
+
- `wanda.cli` — the `wanda` command-line entry point.
|
|
57
|
+
- `wanda.agent` — provider-agnostic tool-use loop (bounded steps, result truncation, token accounting).
|
|
58
|
+
- `wanda.llm_provider` — swappable LLM backend. `WANDA_PROVIDER` selects `anthropic`, `azure-openai`, or `azure-anthropic`. The Anthropic path uses prompt caching.
|
|
59
|
+
- `wanda.fabric_tools` — the 6 Fabric tools as plain functions (REST + SQL), with retry/backoff and token refresh.
|
|
60
|
+
- `wanda.mcp_server` — a thin **MCP** wrapper over the *same* 6 tools, so any MCP-compatible client (Claude Desktop, Cursor, VS Code) can use them too — see `mcp.json`.
|
|
61
|
+
|
|
62
|
+
## Use it as a library (notebook or script)
|
|
63
|
+
|
|
64
|
+
```python
|
|
65
|
+
from wanda import Wanda
|
|
66
|
+
|
|
67
|
+
wanda = Wanda(anthropic_api_key="sk-ant-...") # or rely on .env
|
|
68
|
+
report = wanda.investigate("LoadSalesPipeline")
|
|
69
|
+
report.display() # inline HTML in a notebook
|
|
70
|
+
print(report.text) # or the raw text
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
## Demo scenarios
|
|
74
|
+
|
|
75
|
+
Demo pipelines in the Fabric workspace, each failing in a different way. The
|
|
76
|
+
agent takes a different investigation path for each.
|
|
77
|
+
|
|
78
|
+
**Scenario 1 — `LoadSalesPipeline`** (missing table — *verified live run*)
|
|
79
|
+
1. `get_pipeline_run` / `get_pipeline_definition` → identifies the failing activity `Write_Gold_Orders`
|
|
80
|
+
2. `get_notebook_source` (×3) → reads the notebooks and finds `Write_Gold_Orders` reads `order_enriched` (missing the **s**) instead of `orders_enriched`
|
|
81
|
+
3. `query_sql_endpoint` → confirms `orders_enriched` exists in the lakehouse but `order_enriched` does not → `TABLE_OR_VIEW_NOT_FOUND`
|
|
82
|
+
4. Reports the exact line to fix
|
|
83
|
+
|
|
84
|
+
**Scenario 2 — `TransformSalesPipeline`** (code bug)
|
|
85
|
+
1. `get_pipeline_run` → finds an `AttributeError` (e.g. a wrong DataFrame column reference)
|
|
86
|
+
2. `get_notebook_source` → reads the offending line
|
|
87
|
+
3. Skips the SQL check — code bug, not a missing table
|
|
88
|
+
|
|
89
|
+
**Scenario 3 — `DailySalesETL`** (multi-activity ETL chain)
|
|
90
|
+
A multi-activity pipeline: Copy → cleanup notebook → parallel branches (aggregate notebook + stored procedure) → summarize notebook.
|
|
91
|
+
1. `get_pipeline_definition` → walks the activity graph
|
|
92
|
+
2. `get_pipeline_run` → identifies the single failed activity in the chain
|
|
93
|
+
3. Reports which activities succeeded and which one failed, with the root cause
|
|
94
|
+
|
|
95
|
+
The divergent tool paths are the proof that the agent is genuinely agentic.
|
|
96
|
+
|
|
97
|
+
## Prerequisites
|
|
98
|
+
|
|
99
|
+
- Windows or macOS, Python 3.11+
|
|
100
|
+
- An Azure tenant with a Microsoft Fabric trial or capacity
|
|
101
|
+
- A Fabric workspace with a Lakehouse, demo pipelines, and notebooks
|
|
102
|
+
- An Entra ID App Registration (Service Principal) with access to the workspace
|
|
103
|
+
- ODBC Driver 18 for SQL Server (for the SQL endpoint tool)
|
|
104
|
+
- An **Anthropic API key** (default), *or* an Azure OpenAI / Azure-hosted Claude deployment
|
|
105
|
+
|
|
106
|
+
## Install
|
|
107
|
+
|
|
108
|
+
Wanda is a pip-installable package (`wanda-fabric`).
|
|
109
|
+
|
|
110
|
+
```bash
|
|
111
|
+
pip install "wanda-fabric[sql]"
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
The `[sql]` extra adds `pyodbc` for the SQL-endpoint tools; `[mcp]` adds `fastmcp`
|
|
115
|
+
for the standalone MCP server; `[all]` adds both. The core install stays light for
|
|
116
|
+
notebooks. The SQL tools also need the OS-level **ODBC Driver 18 for SQL Server**.
|
|
117
|
+
|
|
118
|
+
**New here?** [docs/GETTING_STARTED.md](GETTING_STARTED.md) walks the full first-time
|
|
119
|
+
setup (Service Principal, ODBC driver, API key) in ~15 minutes.
|
|
120
|
+
|
|
121
|
+
*CM Labs internal — develop from the private repo:*
|
|
122
|
+
|
|
123
|
+
```bash
|
|
124
|
+
git clone https://github.com/cmlabs-ai/wanda.git
|
|
125
|
+
cd wanda
|
|
126
|
+
python -m venv .venv
|
|
127
|
+
.\.venv\Scripts\Activate.ps1 # Windows (macOS/Linux: source .venv/bin/activate)
|
|
128
|
+
pip install -e ".[all]" # core + sql (pyodbc) + mcp (fastmcp) extras
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
Then configure credentials:
|
|
132
|
+
|
|
133
|
+
```bash
|
|
134
|
+
cp .env.example .env
|
|
135
|
+
# Edit .env: Fabric Service Principal values + ANTHROPIC_API_KEY (and WANDA_PROVIDER if not "anthropic")
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
No GitHub Copilot login is required — Wanda calls the model provider directly.
|
|
139
|
+
|
|
140
|
+
## Run
|
|
141
|
+
|
|
142
|
+
Point Wanda at a pipeline that failed in **your** workspace:
|
|
143
|
+
|
|
144
|
+
```bash
|
|
145
|
+
# Investigate a failed pipeline (default mode)
|
|
146
|
+
wanda "Your Failed Pipeline Name"
|
|
147
|
+
|
|
148
|
+
# Pre-run scan: audit a pipeline before it runs
|
|
149
|
+
wanda "Your Pipeline Name" --scan
|
|
150
|
+
|
|
151
|
+
# (equivalently: python -m wanda "Your Pipeline Name")
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
You'll see each tool call logged to stderr as it happens, the final root-cause
|
|
155
|
+
report printed, and a polished HTML report saved to `./reports/`.
|
|
156
|
+
|
|
157
|
+
> The demo scenarios below run against CM Labs' own demo workspace
|
|
158
|
+
> (`LoadSalesPipeline`, etc.) — substitute your own pipeline names.
|
|
159
|
+
|
|
160
|
+
## Configuration
|
|
161
|
+
|
|
162
|
+
Set in `.env` (see `.env.example`):
|
|
163
|
+
|
|
164
|
+
| Variable | Purpose |
|
|
165
|
+
|---|---|
|
|
166
|
+
| `FABRIC_TENANT_ID` / `FABRIC_CLIENT_ID` / `FABRIC_CLIENT_SECRET` / `FABRIC_WORKSPACE_ID` | Service Principal + workspace |
|
|
167
|
+
| `WANDA_PROVIDER` | `anthropic` (default) · `azure-openai` · `azure-anthropic` |
|
|
168
|
+
| `ANTHROPIC_API_KEY` | for the default `anthropic` provider |
|
|
169
|
+
| `WANDA_MODEL` | optional model override (default `claude-sonnet-4-6`) |
|
|
170
|
+
| `AZURE_OPENAI_*` / `AZURE_ANTHROPIC_*` | for the Azure providers |
|
|
171
|
+
| `WANDA_LOG_LEVEL` | logging verbosity (default `INFO`) |
|
|
172
|
+
|
|
173
|
+
## Repository layout
|
|
174
|
+
|
|
175
|
+
```
|
|
176
|
+
wanda/
|
|
177
|
+
├── src/wanda/ the installable package (wanda-fabric)
|
|
178
|
+
│ ├── __init__.py exports Wanda, WandaReport
|
|
179
|
+
│ ├── core.py Wanda class + WandaReport
|
|
180
|
+
│ ├── cli.py command-line entry point (the `wanda` command)
|
|
181
|
+
│ ├── __main__.py enables `python -m wanda`
|
|
182
|
+
│ ├── agent.py provider-agnostic tool-use loop
|
|
183
|
+
│ ├── llm_provider.py swappable LLM backend (Anthropic / Azure OpenAI)
|
|
184
|
+
│ ├── fabric_tools.py the 6 Fabric tools (REST + SQL), called inline
|
|
185
|
+
│ ├── mcp_server.py thin MCP wrapper over the same tools
|
|
186
|
+
│ ├── config.py typed, fail-fast configuration
|
|
187
|
+
│ ├── log_setup.py logging (stderr)
|
|
188
|
+
│ ├── render_report.py text → self-contained HTML report
|
|
189
|
+
│ └── prompts/ investigate.md, scan.md (bundled package data)
|
|
190
|
+
├── notebooks/ template notebook for Fabric users
|
|
191
|
+
├── tests/ 34 offline tests (providers, agent loop, config)
|
|
192
|
+
├── docs/ this README + architecture/business docs
|
|
193
|
+
├── presentations/ decks
|
|
194
|
+
├── reports/ generated HTML reports (gitignored)
|
|
195
|
+
├── pyproject.toml packaging + dependencies
|
|
196
|
+
├── mcp.json MCP server config (for any MCP client)
|
|
197
|
+
└── .env.example
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
## Responsible AI notes
|
|
201
|
+
|
|
202
|
+
- **Read-only — enforced in code.** Wanda calls Fabric REST and SQL endpoints in read mode only; the SQL tools reject anything that isn't a `SELECT`/`WITH` query, so Wanda cannot modify pipelines, notebooks, or table data.
|
|
203
|
+
- **Secrets stay local.** Credentials live in `.env` (gitignored) and are never logged, sent to the LLM, or written into reports.
|
|
204
|
+
- **Minimal data exposure.** Notebook source, pipeline structure, and table/column names go to the LLM so it can reason. A pre-run scan may read a *small sample* of rows (e.g. `SELECT TOP 1 *`) to validate data — never bulk data.
|
|
205
|
+
- **Evidence-based.** The system prompts restrict Wanda to evidence from its tool calls. Recommendations are descriptive ("change `order_enriched` to `orders_enriched`"), never actions Wanda performs itself.
|
|
206
|
+
- **Scoped access.** Service Principal authentication scopes Wanda's access to a single workspace.
|
|
207
|
+
|
|
208
|
+
## Tech stack
|
|
209
|
+
|
|
210
|
+
- **Agent runtime:** custom tool-use loop calling the LLM provider directly (Anthropic Messages API / Azure OpenAI), dependency-light (`requests`, no vendor SDKs)
|
|
211
|
+
- **Tools:** plain Python functions, also exposed via the Model Context Protocol (FastMCP)
|
|
212
|
+
- **Cloud:** Microsoft Fabric REST API, Fabric SQL endpoint, Microsoft Entra ID
|
|
213
|
+
- **Drivers:** Microsoft ODBC Driver 18 (SQL endpoint), Service Principal auth
|
|
214
|
+
|
|
215
|
+
## Origin
|
|
216
|
+
|
|
217
|
+
Wanda began at the **GitHub Copilot SDK Hackathon** (Web Summit Vancouver 2026),
|
|
218
|
+
where the original prototype ran on the GitHub Copilot SDK with an MCP subprocess.
|
|
219
|
+
It has since been rebuilt by **CM Labs** to call its model provider directly,
|
|
220
|
+
run inline (notebook-ready), and switch LLM providers via configuration —
|
|
221
|
+
the foundation for a Microsoft Fabric beta.
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "wanda-fabric"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "An AI Data Engineer for Microsoft Fabric — investigates failed pipelines and produces evidence-backed root-cause reports."
|
|
9
|
+
readme = "docs/README.md"
|
|
10
|
+
requires-python = ">=3.11"
|
|
11
|
+
authors = [{ name = "CM Labs" }]
|
|
12
|
+
license = { file = "LICENSE" }
|
|
13
|
+
keywords = ["microsoft-fabric", "data-engineering", "llm", "agent", "root-cause-analysis"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 4 - Beta",
|
|
16
|
+
"Programming Language :: Python :: 3",
|
|
17
|
+
"Programming Language :: Python :: 3.11",
|
|
18
|
+
"Programming Language :: Python :: 3.12",
|
|
19
|
+
"Intended Audience :: Developers",
|
|
20
|
+
"Topic :: Software Development :: Quality Assurance",
|
|
21
|
+
"Topic :: Database",
|
|
22
|
+
"License :: Other/Proprietary License",
|
|
23
|
+
"Operating System :: OS Independent",
|
|
24
|
+
]
|
|
25
|
+
# Core stays dependency-light so it installs cleanly in a Fabric notebook.
|
|
26
|
+
dependencies = [
|
|
27
|
+
"requests>=2.32",
|
|
28
|
+
"python-dotenv>=1.0",
|
|
29
|
+
]
|
|
30
|
+
|
|
31
|
+
[project.optional-dependencies]
|
|
32
|
+
# pyodbc powers the SQL-endpoint tools; absent it, those tools degrade gracefully.
|
|
33
|
+
sql = ["pyodbc>=5"]
|
|
34
|
+
# fastmcp powers the standalone MCP server (for Claude Desktop / Cursor / VS Code).
|
|
35
|
+
mcp = ["fastmcp>=3"]
|
|
36
|
+
all = ["pyodbc>=5", "fastmcp>=3"]
|
|
37
|
+
|
|
38
|
+
[project.scripts]
|
|
39
|
+
wanda = "wanda.cli:main"
|
|
40
|
+
|
|
41
|
+
[project.urls]
|
|
42
|
+
Homepage = "https://github.com/cmlabs-ai/wanda"
|
|
43
|
+
|
|
44
|
+
[tool.setuptools]
|
|
45
|
+
package-dir = { "" = "src" }
|
|
46
|
+
|
|
47
|
+
[tool.setuptools.packages.find]
|
|
48
|
+
where = ["src"]
|
|
49
|
+
|
|
50
|
+
[tool.setuptools.package-data]
|
|
51
|
+
wanda = ["prompts/*.md"]
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Wanda — an AI Data Engineer for Microsoft Fabric.
|
|
3
|
+
|
|
4
|
+
Investigates failed Fabric pipelines (and audits them pre-run) and produces
|
|
5
|
+
evidence-backed root-cause reports, by driving an LLM through an agentic
|
|
6
|
+
tool-use loop over the Fabric REST API and SQL endpoint.
|
|
7
|
+
|
|
8
|
+
from wanda import Wanda
|
|
9
|
+
report = Wanda(anthropic_api_key="sk-ant-...").investigate("MyPipeline")
|
|
10
|
+
report.display()
|
|
11
|
+
"""
|
|
12
|
+
from .core import Wanda, WandaReport
|
|
13
|
+
|
|
14
|
+
__all__ = ["Wanda", "WandaReport"]
|
|
15
|
+
|
|
16
|
+
try:
|
|
17
|
+
from importlib.metadata import version
|
|
18
|
+
__version__ = version("wanda-fabric")
|
|
19
|
+
except Exception: # running from a source tree that isn't installed
|
|
20
|
+
__version__ = "0.1.0"
|