stata-code 0.7.2__tar.gz → 0.8.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {stata_code-0.7.2 → stata_code-0.8.1}/CHANGELOG.md +41 -1
- {stata_code-0.7.2 → stata_code-0.8.1}/PKG-INFO +111 -10
- {stata_code-0.7.2 → stata_code-0.8.1}/README.md +110 -9
- stata_code-0.8.1/docs/industry-leader-roadmap.md +99 -0
- stata_code-0.8.1/examples/06-cross-stack-parity-audit.md +101 -0
- stata_code-0.8.1/examples/07-data-mcp-handoff.md +77 -0
- {stata_code-0.7.2 → stata_code-0.8.1}/examples/README.md +2 -0
- {stata_code-0.7.2 → stata_code-0.8.1}/pyproject.toml +2 -1
- stata_code-0.8.1/scripts/check_github_actions.py +96 -0
- {stata_code-0.7.2 → stata_code-0.8.1}/stata_code/__init__.py +1 -1
- stata_code-0.8.1/stata_code/__main__.py +6 -0
- stata_code-0.8.1/stata_code/cli.py +73 -0
- stata_code-0.8.1/stata_code/doctor.py +285 -0
- {stata_code-0.7.2 → stata_code-0.8.1}/stata_code/mcp/server.py +458 -1
- stata_code-0.8.1/tests/test_doctor.py +148 -0
- stata_code-0.8.1/tests/test_github_actions.py +52 -0
- {stata_code-0.7.2 → stata_code-0.8.1}/tests/test_mcp.py +133 -0
- stata_code-0.8.1/tests/test_method_prompts.py +167 -0
- {stata_code-0.7.2 → stata_code-0.8.1}/tests/test_skill_package.py +10 -0
- {stata_code-0.7.2 → stata_code-0.8.1}/.gitignore +0 -0
- {stata_code-0.7.2 → stata_code-0.8.1}/LICENSE +0 -0
- {stata_code-0.7.2 → stata_code-0.8.1}/LICENSE-POLICY.md +0 -0
- {stata_code-0.7.2 → stata_code-0.8.1}/PUBLISHING.md +0 -0
- {stata_code-0.7.2 → stata_code-0.8.1}/SCHEMA.md +0 -0
- {stata_code-0.7.2 → stata_code-0.8.1}/docs/design/hard_timeout.md +0 -0
- {stata_code-0.7.2 → stata_code-0.8.1}/examples/01-basic-regression.md +0 -0
- {stata_code-0.7.2 → stata_code-0.8.1}/examples/02-did-card-krueger.md +0 -0
- {stata_code-0.7.2 → stata_code-0.8.1}/examples/03-graphs.md +0 -0
- {stata_code-0.7.2 → stata_code-0.8.1}/examples/04-multi-session.md +0 -0
- {stata_code-0.7.2 → stata_code-0.8.1}/examples/05-large-matrix.md +0 -0
- {stata_code-0.7.2 → stata_code-0.8.1}/schema/run_result.schema.json +0 -0
- {stata_code-0.7.2 → stata_code-0.8.1}/scripts/build_skill_zip.py +0 -0
- {stata_code-0.7.2 → stata_code-0.8.1}/scripts/check_versions.py +0 -0
- {stata_code-0.7.2 → stata_code-0.8.1}/scripts/export_schema.py +0 -0
- {stata_code-0.7.2 → stata_code-0.8.1}/stata_code/core/__init__.py +0 -0
- {stata_code-0.7.2 → stata_code-0.8.1}/stata_code/core/_pool.py +0 -0
- {stata_code-0.7.2 → stata_code-0.8.1}/stata_code/core/_refs.py +0 -0
- {stata_code-0.7.2 → stata_code-0.8.1}/stata_code/core/_runtime.py +0 -0
- {stata_code-0.7.2 → stata_code-0.8.1}/stata_code/core/errors.py +0 -0
- {stata_code-0.7.2 → stata_code-0.8.1}/stata_code/core/log_artifacts.py +0 -0
- {stata_code-0.7.2 → stata_code-0.8.1}/stata_code/core/notebook.py +0 -0
- {stata_code-0.7.2 → stata_code-0.8.1}/stata_code/core/run_index.py +0 -0
- {stata_code-0.7.2 → stata_code-0.8.1}/stata_code/core/runner.py +0 -0
- {stata_code-0.7.2 → stata_code-0.8.1}/stata_code/core/schema.py +0 -0
- {stata_code-0.7.2 → stata_code-0.8.1}/stata_code/kernel/__init__.py +0 -0
- {stata_code-0.7.2 → stata_code-0.8.1}/stata_code/kernel/__main__.py +0 -0
- {stata_code-0.7.2 → stata_code-0.8.1}/stata_code/kernel/assets/logo-32x32.png +0 -0
- {stata_code-0.7.2 → stata_code-0.8.1}/stata_code/kernel/assets/logo-64x64.png +0 -0
- {stata_code-0.7.2 → stata_code-0.8.1}/stata_code/kernel/assets/logo-svg.svg +0 -0
- {stata_code-0.7.2 → stata_code-0.8.1}/stata_code/kernel/kernel.py +0 -0
- {stata_code-0.7.2 → stata_code-0.8.1}/stata_code/mcp/__init__.py +0 -0
- {stata_code-0.7.2 → stata_code-0.8.1}/stata_code/mcp/__main__.py +0 -0
- {stata_code-0.7.2 → stata_code-0.8.1}/tests/__init__.py +0 -0
- {stata_code-0.7.2 → stata_code-0.8.1}/tests/conftest.py +0 -0
- {stata_code-0.7.2 → stata_code-0.8.1}/tests/fixtures/.gitkeep +0 -0
- {stata_code-0.7.2 → stata_code-0.8.1}/tests/test_cancel.py +0 -0
- {stata_code-0.7.2 → stata_code-0.8.1}/tests/test_errors.py +0 -0
- {stata_code-0.7.2 → stata_code-0.8.1}/tests/test_kernel.py +0 -0
- {stata_code-0.7.2 → stata_code-0.8.1}/tests/test_log_artifacts.py +0 -0
- {stata_code-0.7.2 → stata_code-0.8.1}/tests/test_mcp_stdio.py +0 -0
- {stata_code-0.7.2 → stata_code-0.8.1}/tests/test_new_tools.py +0 -0
- {stata_code-0.7.2 → stata_code-0.8.1}/tests/test_notebook.py +0 -0
- {stata_code-0.7.2 → stata_code-0.8.1}/tests/test_notebook_phase2.py +0 -0
- {stata_code-0.7.2 → stata_code-0.8.1}/tests/test_pool.py +0 -0
- {stata_code-0.7.2 → stata_code-0.8.1}/tests/test_public_api.py +0 -0
- {stata_code-0.7.2 → stata_code-0.8.1}/tests/test_release_versions.py +0 -0
- {stata_code-0.7.2 → stata_code-0.8.1}/tests/test_run_index.py +0 -0
- {stata_code-0.7.2 → stata_code-0.8.1}/tests/test_runner.py +0 -0
- {stata_code-0.7.2 → stata_code-0.8.1}/tests/test_runtime_discovery.py +0 -0
- {stata_code-0.7.2 → stata_code-0.8.1}/tests/test_schema.py +0 -0
- {stata_code-0.7.2 → stata_code-0.8.1}/tests/test_schema_artifact.py +0 -0
|
@@ -4,7 +4,47 @@ All notable changes to `stata-code` are documented here. The format follows
|
|
|
4
4
|
[Keep a Changelog](https://keepachangelog.com/en/1.1.0/); the project adheres
|
|
5
5
|
to semver-major.minor for the result schema (see `SCHEMA.md` §6).
|
|
6
6
|
|
|
7
|
-
##
|
|
7
|
+
## 0.8.1 — 2026-06-20
|
|
8
|
+
|
|
9
|
+
### Changed
|
|
10
|
+
|
|
11
|
+
- **README & metadata refresh.** Documented the VS Code extension's
|
|
12
|
+
seven-view sidebar (added the **Data** variables browser and the
|
|
13
|
+
**Outputs** table/export-artifact panel), corrected the error taxonomy
|
|
14
|
+
count to 31 kinds, and sharpened the Claude Code plugin / VS Code
|
|
15
|
+
Marketplace descriptions to lead with the empirical-economics workflow
|
|
16
|
+
(DiD/IV/RDD, publication tables, StatsPAI cross-validation).
|
|
17
|
+
- **Partner module.** Added a Stanford REAP × CoPaper.AI partner block
|
|
18
|
+
(logos, QR, links) to both the English and Chinese README, with the logo
|
|
19
|
+
assets bundled under `branding/partners/`.
|
|
20
|
+
|
|
21
|
+
## 0.8.0 — 2026-06-20
|
|
22
|
+
|
|
23
|
+
### Added
|
|
24
|
+
|
|
25
|
+
- **Economist workflow coordination and roadmap.** Added
|
|
26
|
+
`AGENT_COORDINATION.md` for concurrent-agent lanes and
|
|
27
|
+
`docs/industry-leader-roadmap.md` for the one-month product plan: workflow
|
|
28
|
+
intelligence, parity audits, data-MCP handoff, editor/artifact polish, and
|
|
29
|
+
distribution diagnostics.
|
|
30
|
+
- **Cross-stack and data-MCP workflow references.** The `stata-code` skill now
|
|
31
|
+
includes `references/parity-audit.md` and
|
|
32
|
+
`references/data-mcp-handoff.md`, plus cookbook examples for cross-stack
|
|
33
|
+
parity audits and external-data-MCP handoff into Stata.
|
|
34
|
+
- **Modern empirical-economics package notes.** Added package references for
|
|
35
|
+
`csdid`, `drdid`, `did_imputation`, `eventstudyinteract`,
|
|
36
|
+
`did_multiplegt_dyn`, `rdrobust`, `ivreg2`, `ivreghdfe`, `boottest`, and
|
|
37
|
+
`outreg2`, and wired them into the skill routing table.
|
|
38
|
+
- **MCP prompt discoverability for economist workflows.** Added
|
|
39
|
+
`plan_cross_stack_parity_audit`, `data_mcp_to_stata_handoff`,
|
|
40
|
+
`did_event_study`, `iv_2sls`, `rdd`, `publication_table`, and
|
|
41
|
+
`cross_validate_did` prompts so clients can discover the new protocols and
|
|
42
|
+
turnkey empirical recipes directly through MCP.
|
|
43
|
+
- **Read-only installation diagnostics.** Added the top-level `stata-code`
|
|
44
|
+
console script with `doctor` / `verify` commands. The diagnostic reports
|
|
45
|
+
package/Python version, MCP and kernel extras, `pystata` discovery, console
|
|
46
|
+
scripts on `PATH`, client/VS Code hints, and an optional live Stata
|
|
47
|
+
version/edition probe without mutating user configuration.
|
|
8
48
|
|
|
9
49
|
## 0.7.2 — 2026-06-20
|
|
10
50
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: stata-code
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.8.1
|
|
4
4
|
Summary: Agent-native Stata bridge — one core, multiple frontends (MCP, Jupyter, VSCode)
|
|
5
5
|
Project-URL: Homepage, https://github.com/brycewang-stanford/stata-code
|
|
6
6
|
Project-URL: Repository, https://github.com/brycewang-stanford/stata-code
|
|
@@ -59,6 +59,24 @@ Description-Content-Type: text/markdown
|
|
|
59
59
|
[](https://github.com/brycewang-stanford/stata-code/releases)
|
|
60
60
|
[](https://github.com/brycewang-stanford/stata-code)
|
|
61
61
|
|
|
62
|
+
<div align="center">
|
|
63
|
+
|
|
64
|
+
<table>
|
|
65
|
+
<tr>
|
|
66
|
+
<td align="center">
|
|
67
|
+
<a href="https://copaper.ai"><img src="https://raw.githubusercontent.com/brycewang-stanford/stata-code/main/branding/partners/copaper-logo.png" alt="CoPaper.AI" width="200" /></a>
|
|
68
|
+
</td>
|
|
69
|
+
<td width="48"></td>
|
|
70
|
+
<td align="center">
|
|
71
|
+
<a href="https://sccei.fsi.stanford.edu/reap"><img src="https://raw.githubusercontent.com/brycewang-stanford/stata-code/main/branding/partners/stanford-reap-logo.png" alt="Stanford REAP — Center on China's Economy & Institutions" width="280" /></a>
|
|
72
|
+
</td>
|
|
73
|
+
</tr>
|
|
74
|
+
</table>
|
|
75
|
+
|
|
76
|
+
<sub><strong>Stanford REAP × CoPaper.AI</strong> · an academic–industrial AI toolkit for empirical research</sub>
|
|
77
|
+
|
|
78
|
+
</div>
|
|
79
|
+
|
|
62
80
|
<p align="center">
|
|
63
81
|
<img src="https://raw.githubusercontent.com/brycewang-stanford/stata-code/main/branding/github-instructions.png" alt="stata-code: agent-native Stata bridge — one Python core, multiple frontends (Jupyter kernel, MCP server, VS Code extension)" width="720" />
|
|
64
82
|
</p>
|
|
@@ -67,6 +85,22 @@ Description-Content-Type: text/markdown
|
|
|
67
85
|
|
|
68
86
|
`stata-code` lets you drive Stata from modern environments: an LLM agent (Claude Code, Cursor, Claude Desktop), a Jupyter notebook, or a VS Code editor session. All frontends share one Python core and return a stable, structured, **agent-friendly** result schema.
|
|
69
87
|
|
|
88
|
+
**For empirical economists.** Drive Stata in plain language: run **DiD, IV, RDD, and publication-ready `esttab` tables in one conversation** — then cross-check each estimate across Stata and Python so you only trust results that *agree* (the Cunningham cross-package robustness check).
|
|
89
|
+
|
|
90
|
+
**Try it in 60 seconds** with [Claude Code](https://github.com/anthropics/claude-code) — no global install needed:
|
|
91
|
+
|
|
92
|
+
```bash
|
|
93
|
+
claude mcp add stata-code --scope user -- uvx --from "stata-code[mcp]" stata-code-mcp
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
Then just ask:
|
|
97
|
+
|
|
98
|
+
> *"Using `data/cfps_panel.dta`, run a two-way fixed-effects regression of monthly wage on the treatment (controls: `age age2 edu industry`), then test heterogeneous effects with Callaway-Sant'Anna, and export an `esttab` table."*
|
|
99
|
+
|
|
100
|
+
`stata-code` writes the do-file, runs it, returns the table, and interprets the result — and can re-estimate the same ATT with [StatsPAI](https://github.com/brycewang-stanford/StatsPAI) to confirm the two stacks agree. These workflows ship as one-call MCP prompts (`did_event_study`, `iv_2sls`, `rdd`, `publication_table`, `cross_validate_did`) backed by an on-demand [recipe library](skills/stata-code/references/recipes/).
|
|
101
|
+
|
|
102
|
+
**Why `stata-code`:** MIT-licensed · ships as an MCP server, a bundled agent skill, a Jupyter kernel, **and** a VS Code extension · one structured, token-economy result schema (typed errors, native `r()` / `e()`) · cross-stack validation with StatsPAI for the Cunningham check.
|
|
103
|
+
|
|
70
104
|
```text
|
|
71
105
|
┌────────────────────────────────────────┐
|
|
72
106
|
│ stata-code core (Python) │
|
|
@@ -84,12 +118,18 @@ Description-Content-Type: text/markdown
|
|
|
84
118
|
└─────────────┘ └────────────┘ └─────────────────┘
|
|
85
119
|
```
|
|
86
120
|
|
|
87
|
-
**Status: v0.
|
|
121
|
+
**Status: v0.8 (June 2026)** — the core, MCP server, Jupyter kernel, and VS Code extension work end-to-end against Stata 18 MP. The test suite covers schema, runner, MCP, kernel, notebook, run-index, subprocess-pool, and VS Code modules; CI also checks linting, type safety, schema generation, package metadata, and VSIX packaging. License: **MIT**.
|
|
88
122
|
|
|
89
|
-
|
|
123
|
+
Three workflows the current tree explicitly supports for end users and agents:
|
|
90
124
|
|
|
91
125
|
- **Run Stata code from a Jupyter notebook.** `pip install "stata-code[kernel]"` + `stata-code-kernel install --user` registers a **Stata** kernel that the Jupyter Notebook UI, JupyterLab, and the VS Code Jupyter extension all pick up by name. Cells render Stata logs, graphs, and warnings inline (the kernel logo bundled since v0.5 makes it appear in VS Code's kernel picker too). See [As a Jupyter Kernel](#as-a-jupyter-kernel).
|
|
92
126
|
- **Optional agent "fix and rerun" loop.** `stata_run` returns typed `error.kind/line/context` plus `suggestions` on every failure. By default Claude Code only reports diagnostics — but if you explicitly say "fix this and rerun until it passes", the agent uses the same fields to edit your `.do` file and re-call `stata_run` until the run is green. The repair loop is **opt-in**: failed runs are diagnostics first, not automatic rewrite permission. See [Error Recovery in Agent Workflows](#error-recovery-in-agent-workflows).
|
|
127
|
+
- **Economist workflow guides.** The bundled skill and cookbook now cover
|
|
128
|
+
modern DiD, IV/weak-IV, RDD, table export, data-MCP handoff, and
|
|
129
|
+
cross-stack parity audits. `stata-code` runs and audits the Stata leg; R,
|
|
130
|
+
Python, and official data MCPs remain separate tools with explicit handoff
|
|
131
|
+
files and source metadata. See [`skills/stata-code/references/`](skills/stata-code/references/)
|
|
132
|
+
and [`examples/`](examples/).
|
|
93
133
|
|
|
94
134
|
---
|
|
95
135
|
|
|
@@ -138,6 +178,19 @@ pip install -e ".[mcp,kernel]"
|
|
|
138
178
|
|
|
139
179
|
Note: `pystata` is **not** on PyPI; it ships with Stata. `stata-code` auto-discovers it on macOS at `/Applications/Stata/utilities/pystata` and at equivalent Linux / Windows paths. If your install is elsewhere, add it to `PYTHONPATH` before importing.
|
|
140
180
|
|
|
181
|
+
Verify the local setup with the read-only doctor:
|
|
182
|
+
|
|
183
|
+
```bash
|
|
184
|
+
stata-code doctor
|
|
185
|
+
stata-code doctor --json # machine-readable output
|
|
186
|
+
stata-code doctor --no-stata-probe # skip live Stata initialization
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
The doctor reports the package/Python version, MCP and Jupyter extras, `pystata`
|
|
190
|
+
discovery, console scripts on `PATH`, client/VS Code configuration hints, and a
|
|
191
|
+
best-effort Stata version/edition probe. It never edits shell, Stata, Claude, or
|
|
192
|
+
VS Code config.
|
|
193
|
+
|
|
141
194
|
---
|
|
142
195
|
|
|
143
196
|
## Quick Start
|
|
@@ -315,8 +368,11 @@ resources:
|
|
|
315
368
|
|
|
316
369
|
MCP prompts are available for common agent workflows:
|
|
317
370
|
`run_do_file_and_report`, `debug_stata_error`,
|
|
318
|
-
`fix_and_rerun_until_passes`, `replication_audit`,
|
|
319
|
-
`
|
|
371
|
+
`fix_and_rerun_until_passes`, `replication_audit`,
|
|
372
|
+
`plan_cross_stack_parity_audit`, `data_mcp_to_stata_handoff`,
|
|
373
|
+
`summarize_estimation_results`, `run_notebook_cell_and_report`,
|
|
374
|
+
`fix_and_rerun_notebook_cell`, `did_event_study`, `iv_2sls`, `rdd`,
|
|
375
|
+
`publication_table`, and `cross_validate_did`.
|
|
320
376
|
|
|
321
377
|
### As a Jupyter Kernel
|
|
322
378
|
|
|
@@ -347,7 +403,7 @@ Then open Jupyter Notebook / JupyterLab (or a `.ipynb` in VS Code), pick **Stata
|
|
|
347
403
|
|
|
348
404
|
### As a VS Code Extension
|
|
349
405
|
|
|
350
|
-
The companion extension is on the Marketplace as [`brycewang-stanford.stata-code-vscode`](https://marketplace.visualstudio.com/items?itemName=brycewang-stanford.stata-code-vscode). It spawns `stata-code-mcp` as a child process and adds syntax highlighting, an Outline view for `**#` sections and `program define` blocks, code-lens "Run cell" and "Run section" actions on `.do` files, a sidebar (sessions / last result / run history / logs / graphs)
|
|
406
|
+
The companion extension is on the Marketplace as [`brycewang-stanford.stata-code-vscode`](https://marketplace.visualstudio.com/items?itemName=brycewang-stanford.stata-code-vscode). It spawns `stata-code-mcp` as a child process and adds syntax highlighting, an Outline view for `**#` sections and `program define` blocks, code-lens "Run cell" and "Run section" actions on `.do` files, a **seven-view sidebar** (sessions / last result / **data variables** / run history / logs / graphs / **outputs**) — including an agent-native equivalent of Stata's **Variables window** and an **Outputs** panel that surfaces the `esttab` tables and `export` files each run writes to disk — status-bar indicators, completions, help lookup, conservative variable rename, and inline diagnostics from the v1.0 typed errors.
|
|
351
407
|
|
|
352
408
|
```bash
|
|
353
409
|
# from the VS Code CLI
|
|
@@ -358,6 +414,12 @@ Or open the **Extensions** sidebar in VS Code and search `stata-code`. The exten
|
|
|
358
414
|
|
|
359
415
|
On first activation the extension probes for `stata-code-mcp` on `PATH` (and in any workspace `.venv` / `venv`). If nothing resolves, it shows a one-time install hint with the exact `pip install "stata-code[mcp]"` command — choose **Don't show again** to silence it for the installed extension version.
|
|
360
416
|
|
|
417
|
+
If the extension or an MCP client cannot find the server, run
|
|
418
|
+
`stata-code doctor --no-stata-probe` in the same Python environment. It reports
|
|
419
|
+
whether `stata-code-mcp` is on `PATH` and suggests absolute-path or
|
|
420
|
+
`python -m stata_code.mcp` fallbacks for GUI clients whose `PATH` differs from
|
|
421
|
+
your shell.
|
|
422
|
+
|
|
361
423
|
#### Cell and section conventions
|
|
362
424
|
|
|
363
425
|
The extension recognizes two complementary structural markers inside `.do` files. Either can be mixed in the same file; they do not conflict.
|
|
@@ -437,7 +499,7 @@ stata_code/
|
|
|
437
499
|
| Jupyter kernel | ✓ | — | — | ✓ |
|
|
438
500
|
| Unified result schema | ✓ ([SCHEMA.md](SCHEMA.md)) | per-tool | per-tool | per-tool |
|
|
439
501
|
| Token-economy defaults | ✓ (log refs, graph refs) | — | — | — |
|
|
440
|
-
| Typed errors + suggestions | ✓ (
|
|
502
|
+
| Typed errors + suggestions | ✓ (31 kinds) | — | — | — |
|
|
441
503
|
| Multi-session | ✓ (Stata frames) | partial | — | — |
|
|
442
504
|
| Mature ecosystem | early | ✓ (statamcp.com, cookbook) | ✓ (11k installs) | ✓ |
|
|
443
505
|
|
|
@@ -447,7 +509,7 @@ stata_code/
|
|
|
447
509
|
|
|
448
510
|
## Roadmap
|
|
449
511
|
|
|
450
|
-
### Done (
|
|
512
|
+
### Done (current tree)
|
|
451
513
|
|
|
452
514
|
- v1.0 result schema ([SCHEMA.md](SCHEMA.md))
|
|
453
515
|
- `pystata`-based runner with native-typed `r()`, `e()`, and matrices
|
|
@@ -456,15 +518,21 @@ stata_code/
|
|
|
456
518
|
- Graph capture: `png` / `svg` / `pdf` with ref store and source-command attribution
|
|
457
519
|
- Log truncation with ref store
|
|
458
520
|
- Warning extraction: 5 categories + generic notes
|
|
459
|
-
-
|
|
521
|
+
- 31-kind error taxonomy with canonical suggestions
|
|
460
522
|
- MCP server: 18 tools, including notebook navigation / search / atomic edits, the run-bundle index (`list_runs`), log grep (`search_log`), dataset inspection (`inspect_data`), and package installation (`install_package`)
|
|
461
523
|
- Jupyter kernel: rewired to the v1.0 pipeline, kernel logos bundled
|
|
462
524
|
- Matrix size cap + `get_matrix(ref)` for large matrices (>10k cells)
|
|
463
525
|
- Subprocess-backed hard timeout and cancellation for the public Python API and MCP server: `timeout_ms`, `cancel(session_id)`, and MCP `cancel_session`
|
|
464
526
|
- Per-cell repair loop on `.ipynb` via `notebook_outline` / `notebook_get_cell` / `notebook_edit_cell` with optimistic-concurrency `expected_source` guards and `origin_cell_id` echo on `RunResult`
|
|
465
527
|
- Persistent run bundles + `list_runs` query over `manifest.json` files (filter by cell / origin / session / since / ok; page with limit / offset)
|
|
528
|
+
- Read-only `stata-code doctor` / `verify` diagnostics for package version,
|
|
529
|
+
extras, `pystata` discovery, console scripts, client hints, and optional live
|
|
530
|
+
Stata version probing
|
|
531
|
+
- Economist workflow layer: skill references and examples for modern DiD,
|
|
532
|
+
IV/weak-IV, RDD, table export, data-MCP handoff, and cross-stack parity
|
|
533
|
+
audits
|
|
466
534
|
- JSON Schema artifact auto-generated from `schema.py`: [`schema/run_result.schema.json`](schema/run_result.schema.json)
|
|
467
|
-
- VS Code extension published to the Marketplace as [`brycewang-stanford.stata-code-vscode`](https://marketplace.visualstudio.com/items?itemName=brycewang-stanford.stata-code-vscode): syntax highlighting, section outline/navigation, code-lens cell and section runners, sidebar (sessions / last result / run history / logs / graphs), status bar, completions, conservative variable rename, diagnostics, MCP child-process spawn
|
|
535
|
+
- VS Code extension published to the Marketplace as [`brycewang-stanford.stata-code-vscode`](https://marketplace.visualstudio.com/items?itemName=brycewang-stanford.stata-code-vscode): syntax highlighting, section outline/navigation, code-lens cell and section runners, seven-view sidebar (sessions / last result / data variables / run history / logs / graphs / outputs), status bar, completions, conservative variable rename, diagnostics, MCP child-process spawn
|
|
468
536
|
- Clean-room license policy ([LICENSE-POLICY.md](LICENSE-POLICY.md))
|
|
469
537
|
|
|
470
538
|
### Next Up
|
|
@@ -510,3 +578,36 @@ Stata is a registered trademark of StataCorp LLC. This project is independent an
|
|
|
510
578
|
## Acknowledgements
|
|
511
579
|
|
|
512
580
|
The Stata tooling landscape that this project builds on and learns from is surveyed in [References-tools.md](References-tools.md). All listed projects retain their own licenses and authorship; please consult each repository before reuse.
|
|
581
|
+
|
|
582
|
+
---
|
|
583
|
+
|
|
584
|
+
<div align="center">
|
|
585
|
+
|
|
586
|
+
<table>
|
|
587
|
+
<tr>
|
|
588
|
+
<td align="center">
|
|
589
|
+
<a href="https://copaper.ai"><img src="https://raw.githubusercontent.com/brycewang-stanford/stata-code/main/branding/partners/copaper-logo.png" alt="CoPaper.AI" width="200" /></a>
|
|
590
|
+
</td>
|
|
591
|
+
<td width="40"></td>
|
|
592
|
+
<td align="center">
|
|
593
|
+
<a href="https://sccei.fsi.stanford.edu/reap"><img src="https://raw.githubusercontent.com/brycewang-stanford/stata-code/main/branding/partners/stanford-reap-logo.png" alt="Stanford REAP" width="280" /></a>
|
|
594
|
+
</td>
|
|
595
|
+
</tr>
|
|
596
|
+
</table>
|
|
597
|
+
|
|
598
|
+
<table>
|
|
599
|
+
<tr>
|
|
600
|
+
<td align="center">
|
|
601
|
+
<a href="https://copaper.ai"><img src="https://raw.githubusercontent.com/brycewang-stanford/stata-code/main/branding/partners/copaper-qrcode.png" alt="Visit copaper.ai" width="160" /></a><br/>
|
|
602
|
+
<strong>Visit <a href="https://copaper.ai">copaper.ai</a></strong>
|
|
603
|
+
</td>
|
|
604
|
+
<td align="center">
|
|
605
|
+
<img src="https://raw.githubusercontent.com/brycewang-stanford/stata-code/main/branding/partners/copaper-wechat.jpg" alt="CoPaper.AI WeChat" width="160" /><br/>
|
|
606
|
+
<strong>WeChat: CoPaper.AI</strong>
|
|
607
|
+
</td>
|
|
608
|
+
</tr>
|
|
609
|
+
</table>
|
|
610
|
+
|
|
611
|
+
<sub>Maintained by <a href="https://copaper.ai"><strong>CoPaper.AI</strong></a>, incubated at <a href="https://sccei.fsi.stanford.edu/reap"><strong>Stanford REAP / SCCEI</strong></a> · AI Assistant for Empirical Research</sub>
|
|
612
|
+
|
|
613
|
+
</div>
|
|
@@ -20,6 +20,24 @@
|
|
|
20
20
|
[](https://github.com/brycewang-stanford/stata-code/releases)
|
|
21
21
|
[](https://github.com/brycewang-stanford/stata-code)
|
|
22
22
|
|
|
23
|
+
<div align="center">
|
|
24
|
+
|
|
25
|
+
<table>
|
|
26
|
+
<tr>
|
|
27
|
+
<td align="center">
|
|
28
|
+
<a href="https://copaper.ai"><img src="https://raw.githubusercontent.com/brycewang-stanford/stata-code/main/branding/partners/copaper-logo.png" alt="CoPaper.AI" width="200" /></a>
|
|
29
|
+
</td>
|
|
30
|
+
<td width="48"></td>
|
|
31
|
+
<td align="center">
|
|
32
|
+
<a href="https://sccei.fsi.stanford.edu/reap"><img src="https://raw.githubusercontent.com/brycewang-stanford/stata-code/main/branding/partners/stanford-reap-logo.png" alt="Stanford REAP — Center on China's Economy & Institutions" width="280" /></a>
|
|
33
|
+
</td>
|
|
34
|
+
</tr>
|
|
35
|
+
</table>
|
|
36
|
+
|
|
37
|
+
<sub><strong>Stanford REAP × CoPaper.AI</strong> · an academic–industrial AI toolkit for empirical research</sub>
|
|
38
|
+
|
|
39
|
+
</div>
|
|
40
|
+
|
|
23
41
|
<p align="center">
|
|
24
42
|
<img src="https://raw.githubusercontent.com/brycewang-stanford/stata-code/main/branding/github-instructions.png" alt="stata-code: agent-native Stata bridge — one Python core, multiple frontends (Jupyter kernel, MCP server, VS Code extension)" width="720" />
|
|
25
43
|
</p>
|
|
@@ -28,6 +46,22 @@
|
|
|
28
46
|
|
|
29
47
|
`stata-code` lets you drive Stata from modern environments: an LLM agent (Claude Code, Cursor, Claude Desktop), a Jupyter notebook, or a VS Code editor session. All frontends share one Python core and return a stable, structured, **agent-friendly** result schema.
|
|
30
48
|
|
|
49
|
+
**For empirical economists.** Drive Stata in plain language: run **DiD, IV, RDD, and publication-ready `esttab` tables in one conversation** — then cross-check each estimate across Stata and Python so you only trust results that *agree* (the Cunningham cross-package robustness check).
|
|
50
|
+
|
|
51
|
+
**Try it in 60 seconds** with [Claude Code](https://github.com/anthropics/claude-code) — no global install needed:
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
claude mcp add stata-code --scope user -- uvx --from "stata-code[mcp]" stata-code-mcp
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
Then just ask:
|
|
58
|
+
|
|
59
|
+
> *"Using `data/cfps_panel.dta`, run a two-way fixed-effects regression of monthly wage on the treatment (controls: `age age2 edu industry`), then test heterogeneous effects with Callaway-Sant'Anna, and export an `esttab` table."*
|
|
60
|
+
|
|
61
|
+
`stata-code` writes the do-file, runs it, returns the table, and interprets the result — and can re-estimate the same ATT with [StatsPAI](https://github.com/brycewang-stanford/StatsPAI) to confirm the two stacks agree. These workflows ship as one-call MCP prompts (`did_event_study`, `iv_2sls`, `rdd`, `publication_table`, `cross_validate_did`) backed by an on-demand [recipe library](skills/stata-code/references/recipes/).
|
|
62
|
+
|
|
63
|
+
**Why `stata-code`:** MIT-licensed · ships as an MCP server, a bundled agent skill, a Jupyter kernel, **and** a VS Code extension · one structured, token-economy result schema (typed errors, native `r()` / `e()`) · cross-stack validation with StatsPAI for the Cunningham check.
|
|
64
|
+
|
|
31
65
|
```text
|
|
32
66
|
┌────────────────────────────────────────┐
|
|
33
67
|
│ stata-code core (Python) │
|
|
@@ -45,12 +79,18 @@
|
|
|
45
79
|
└─────────────┘ └────────────┘ └─────────────────┘
|
|
46
80
|
```
|
|
47
81
|
|
|
48
|
-
**Status: v0.
|
|
82
|
+
**Status: v0.8 (June 2026)** — the core, MCP server, Jupyter kernel, and VS Code extension work end-to-end against Stata 18 MP. The test suite covers schema, runner, MCP, kernel, notebook, run-index, subprocess-pool, and VS Code modules; CI also checks linting, type safety, schema generation, package metadata, and VSIX packaging. License: **MIT**.
|
|
49
83
|
|
|
50
|
-
|
|
84
|
+
Three workflows the current tree explicitly supports for end users and agents:
|
|
51
85
|
|
|
52
86
|
- **Run Stata code from a Jupyter notebook.** `pip install "stata-code[kernel]"` + `stata-code-kernel install --user` registers a **Stata** kernel that the Jupyter Notebook UI, JupyterLab, and the VS Code Jupyter extension all pick up by name. Cells render Stata logs, graphs, and warnings inline (the kernel logo bundled since v0.5 makes it appear in VS Code's kernel picker too). See [As a Jupyter Kernel](#as-a-jupyter-kernel).
|
|
53
87
|
- **Optional agent "fix and rerun" loop.** `stata_run` returns typed `error.kind/line/context` plus `suggestions` on every failure. By default Claude Code only reports diagnostics — but if you explicitly say "fix this and rerun until it passes", the agent uses the same fields to edit your `.do` file and re-call `stata_run` until the run is green. The repair loop is **opt-in**: failed runs are diagnostics first, not automatic rewrite permission. See [Error Recovery in Agent Workflows](#error-recovery-in-agent-workflows).
|
|
88
|
+
- **Economist workflow guides.** The bundled skill and cookbook now cover
|
|
89
|
+
modern DiD, IV/weak-IV, RDD, table export, data-MCP handoff, and
|
|
90
|
+
cross-stack parity audits. `stata-code` runs and audits the Stata leg; R,
|
|
91
|
+
Python, and official data MCPs remain separate tools with explicit handoff
|
|
92
|
+
files and source metadata. See [`skills/stata-code/references/`](skills/stata-code/references/)
|
|
93
|
+
and [`examples/`](examples/).
|
|
54
94
|
|
|
55
95
|
---
|
|
56
96
|
|
|
@@ -99,6 +139,19 @@ pip install -e ".[mcp,kernel]"
|
|
|
99
139
|
|
|
100
140
|
Note: `pystata` is **not** on PyPI; it ships with Stata. `stata-code` auto-discovers it on macOS at `/Applications/Stata/utilities/pystata` and at equivalent Linux / Windows paths. If your install is elsewhere, add it to `PYTHONPATH` before importing.
|
|
101
141
|
|
|
142
|
+
Verify the local setup with the read-only doctor:
|
|
143
|
+
|
|
144
|
+
```bash
|
|
145
|
+
stata-code doctor
|
|
146
|
+
stata-code doctor --json # machine-readable output
|
|
147
|
+
stata-code doctor --no-stata-probe # skip live Stata initialization
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
The doctor reports the package/Python version, MCP and Jupyter extras, `pystata`
|
|
151
|
+
discovery, console scripts on `PATH`, client/VS Code configuration hints, and a
|
|
152
|
+
best-effort Stata version/edition probe. It never edits shell, Stata, Claude, or
|
|
153
|
+
VS Code config.
|
|
154
|
+
|
|
102
155
|
---
|
|
103
156
|
|
|
104
157
|
## Quick Start
|
|
@@ -276,8 +329,11 @@ resources:
|
|
|
276
329
|
|
|
277
330
|
MCP prompts are available for common agent workflows:
|
|
278
331
|
`run_do_file_and_report`, `debug_stata_error`,
|
|
279
|
-
`fix_and_rerun_until_passes`, `replication_audit`,
|
|
280
|
-
`
|
|
332
|
+
`fix_and_rerun_until_passes`, `replication_audit`,
|
|
333
|
+
`plan_cross_stack_parity_audit`, `data_mcp_to_stata_handoff`,
|
|
334
|
+
`summarize_estimation_results`, `run_notebook_cell_and_report`,
|
|
335
|
+
`fix_and_rerun_notebook_cell`, `did_event_study`, `iv_2sls`, `rdd`,
|
|
336
|
+
`publication_table`, and `cross_validate_did`.
|
|
281
337
|
|
|
282
338
|
### As a Jupyter Kernel
|
|
283
339
|
|
|
@@ -308,7 +364,7 @@ Then open Jupyter Notebook / JupyterLab (or a `.ipynb` in VS Code), pick **Stata
|
|
|
308
364
|
|
|
309
365
|
### As a VS Code Extension
|
|
310
366
|
|
|
311
|
-
The companion extension is on the Marketplace as [`brycewang-stanford.stata-code-vscode`](https://marketplace.visualstudio.com/items?itemName=brycewang-stanford.stata-code-vscode). It spawns `stata-code-mcp` as a child process and adds syntax highlighting, an Outline view for `**#` sections and `program define` blocks, code-lens "Run cell" and "Run section" actions on `.do` files, a sidebar (sessions / last result / run history / logs / graphs)
|
|
367
|
+
The companion extension is on the Marketplace as [`brycewang-stanford.stata-code-vscode`](https://marketplace.visualstudio.com/items?itemName=brycewang-stanford.stata-code-vscode). It spawns `stata-code-mcp` as a child process and adds syntax highlighting, an Outline view for `**#` sections and `program define` blocks, code-lens "Run cell" and "Run section" actions on `.do` files, a **seven-view sidebar** (sessions / last result / **data variables** / run history / logs / graphs / **outputs**) — including an agent-native equivalent of Stata's **Variables window** and an **Outputs** panel that surfaces the `esttab` tables and `export` files each run writes to disk — status-bar indicators, completions, help lookup, conservative variable rename, and inline diagnostics from the v1.0 typed errors.
|
|
312
368
|
|
|
313
369
|
```bash
|
|
314
370
|
# from the VS Code CLI
|
|
@@ -319,6 +375,12 @@ Or open the **Extensions** sidebar in VS Code and search `stata-code`. The exten
|
|
|
319
375
|
|
|
320
376
|
On first activation the extension probes for `stata-code-mcp` on `PATH` (and in any workspace `.venv` / `venv`). If nothing resolves, it shows a one-time install hint with the exact `pip install "stata-code[mcp]"` command — choose **Don't show again** to silence it for the installed extension version.
|
|
321
377
|
|
|
378
|
+
If the extension or an MCP client cannot find the server, run
|
|
379
|
+
`stata-code doctor --no-stata-probe` in the same Python environment. It reports
|
|
380
|
+
whether `stata-code-mcp` is on `PATH` and suggests absolute-path or
|
|
381
|
+
`python -m stata_code.mcp` fallbacks for GUI clients whose `PATH` differs from
|
|
382
|
+
your shell.
|
|
383
|
+
|
|
322
384
|
#### Cell and section conventions
|
|
323
385
|
|
|
324
386
|
The extension recognizes two complementary structural markers inside `.do` files. Either can be mixed in the same file; they do not conflict.
|
|
@@ -398,7 +460,7 @@ stata_code/
|
|
|
398
460
|
| Jupyter kernel | ✓ | — | — | ✓ |
|
|
399
461
|
| Unified result schema | ✓ ([SCHEMA.md](SCHEMA.md)) | per-tool | per-tool | per-tool |
|
|
400
462
|
| Token-economy defaults | ✓ (log refs, graph refs) | — | — | — |
|
|
401
|
-
| Typed errors + suggestions | ✓ (
|
|
463
|
+
| Typed errors + suggestions | ✓ (31 kinds) | — | — | — |
|
|
402
464
|
| Multi-session | ✓ (Stata frames) | partial | — | — |
|
|
403
465
|
| Mature ecosystem | early | ✓ (statamcp.com, cookbook) | ✓ (11k installs) | ✓ |
|
|
404
466
|
|
|
@@ -408,7 +470,7 @@ stata_code/
|
|
|
408
470
|
|
|
409
471
|
## Roadmap
|
|
410
472
|
|
|
411
|
-
### Done (
|
|
473
|
+
### Done (current tree)
|
|
412
474
|
|
|
413
475
|
- v1.0 result schema ([SCHEMA.md](SCHEMA.md))
|
|
414
476
|
- `pystata`-based runner with native-typed `r()`, `e()`, and matrices
|
|
@@ -417,15 +479,21 @@ stata_code/
|
|
|
417
479
|
- Graph capture: `png` / `svg` / `pdf` with ref store and source-command attribution
|
|
418
480
|
- Log truncation with ref store
|
|
419
481
|
- Warning extraction: 5 categories + generic notes
|
|
420
|
-
-
|
|
482
|
+
- 31-kind error taxonomy with canonical suggestions
|
|
421
483
|
- MCP server: 18 tools, including notebook navigation / search / atomic edits, the run-bundle index (`list_runs`), log grep (`search_log`), dataset inspection (`inspect_data`), and package installation (`install_package`)
|
|
422
484
|
- Jupyter kernel: rewired to the v1.0 pipeline, kernel logos bundled
|
|
423
485
|
- Matrix size cap + `get_matrix(ref)` for large matrices (>10k cells)
|
|
424
486
|
- Subprocess-backed hard timeout and cancellation for the public Python API and MCP server: `timeout_ms`, `cancel(session_id)`, and MCP `cancel_session`
|
|
425
487
|
- Per-cell repair loop on `.ipynb` via `notebook_outline` / `notebook_get_cell` / `notebook_edit_cell` with optimistic-concurrency `expected_source` guards and `origin_cell_id` echo on `RunResult`
|
|
426
488
|
- Persistent run bundles + `list_runs` query over `manifest.json` files (filter by cell / origin / session / since / ok; page with limit / offset)
|
|
489
|
+
- Read-only `stata-code doctor` / `verify` diagnostics for package version,
|
|
490
|
+
extras, `pystata` discovery, console scripts, client hints, and optional live
|
|
491
|
+
Stata version probing
|
|
492
|
+
- Economist workflow layer: skill references and examples for modern DiD,
|
|
493
|
+
IV/weak-IV, RDD, table export, data-MCP handoff, and cross-stack parity
|
|
494
|
+
audits
|
|
427
495
|
- JSON Schema artifact auto-generated from `schema.py`: [`schema/run_result.schema.json`](schema/run_result.schema.json)
|
|
428
|
-
- VS Code extension published to the Marketplace as [`brycewang-stanford.stata-code-vscode`](https://marketplace.visualstudio.com/items?itemName=brycewang-stanford.stata-code-vscode): syntax highlighting, section outline/navigation, code-lens cell and section runners, sidebar (sessions / last result / run history / logs / graphs), status bar, completions, conservative variable rename, diagnostics, MCP child-process spawn
|
|
496
|
+
- VS Code extension published to the Marketplace as [`brycewang-stanford.stata-code-vscode`](https://marketplace.visualstudio.com/items?itemName=brycewang-stanford.stata-code-vscode): syntax highlighting, section outline/navigation, code-lens cell and section runners, seven-view sidebar (sessions / last result / data variables / run history / logs / graphs / outputs), status bar, completions, conservative variable rename, diagnostics, MCP child-process spawn
|
|
429
497
|
- Clean-room license policy ([LICENSE-POLICY.md](LICENSE-POLICY.md))
|
|
430
498
|
|
|
431
499
|
### Next Up
|
|
@@ -471,3 +539,36 @@ Stata is a registered trademark of StataCorp LLC. This project is independent an
|
|
|
471
539
|
## Acknowledgements
|
|
472
540
|
|
|
473
541
|
The Stata tooling landscape that this project builds on and learns from is surveyed in [References-tools.md](References-tools.md). All listed projects retain their own licenses and authorship; please consult each repository before reuse.
|
|
542
|
+
|
|
543
|
+
---
|
|
544
|
+
|
|
545
|
+
<div align="center">
|
|
546
|
+
|
|
547
|
+
<table>
|
|
548
|
+
<tr>
|
|
549
|
+
<td align="center">
|
|
550
|
+
<a href="https://copaper.ai"><img src="https://raw.githubusercontent.com/brycewang-stanford/stata-code/main/branding/partners/copaper-logo.png" alt="CoPaper.AI" width="200" /></a>
|
|
551
|
+
</td>
|
|
552
|
+
<td width="40"></td>
|
|
553
|
+
<td align="center">
|
|
554
|
+
<a href="https://sccei.fsi.stanford.edu/reap"><img src="https://raw.githubusercontent.com/brycewang-stanford/stata-code/main/branding/partners/stanford-reap-logo.png" alt="Stanford REAP" width="280" /></a>
|
|
555
|
+
</td>
|
|
556
|
+
</tr>
|
|
557
|
+
</table>
|
|
558
|
+
|
|
559
|
+
<table>
|
|
560
|
+
<tr>
|
|
561
|
+
<td align="center">
|
|
562
|
+
<a href="https://copaper.ai"><img src="https://raw.githubusercontent.com/brycewang-stanford/stata-code/main/branding/partners/copaper-qrcode.png" alt="Visit copaper.ai" width="160" /></a><br/>
|
|
563
|
+
<strong>Visit <a href="https://copaper.ai">copaper.ai</a></strong>
|
|
564
|
+
</td>
|
|
565
|
+
<td align="center">
|
|
566
|
+
<img src="https://raw.githubusercontent.com/brycewang-stanford/stata-code/main/branding/partners/copaper-wechat.jpg" alt="CoPaper.AI WeChat" width="160" /><br/>
|
|
567
|
+
<strong>WeChat: CoPaper.AI</strong>
|
|
568
|
+
</td>
|
|
569
|
+
</tr>
|
|
570
|
+
</table>
|
|
571
|
+
|
|
572
|
+
<sub>Maintained by <a href="https://copaper.ai"><strong>CoPaper.AI</strong></a>, incubated at <a href="https://sccei.fsi.stanford.edu/reap"><strong>Stanford REAP / SCCEI</strong></a> · AI Assistant for Empirical Research</sub>
|
|
573
|
+
|
|
574
|
+
</div>
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
# stata-code Industry Leadership Roadmap
|
|
2
|
+
|
|
3
|
+
This roadmap translates the June 2026 empirical-research MCP landscape into
|
|
4
|
+
work that fits `stata-code`'s architecture. The project should win by being the
|
|
5
|
+
most reliable agent-native Stata execution and audit layer for empirical
|
|
6
|
+
economists, not by becoming a grab-bag data platform or a second R/Python
|
|
7
|
+
runtime.
|
|
8
|
+
|
|
9
|
+
## North Star
|
|
10
|
+
|
|
11
|
+
`stata-code` should be the default way an AI agent runs, inspects, repairs, and
|
|
12
|
+
audits Stata work:
|
|
13
|
+
|
|
14
|
+
- one execution core across Python, MCP, Jupyter, and VS Code;
|
|
15
|
+
- stable `RunResult` schema with typed errors and native `r()` / `e()` values;
|
|
16
|
+
- token-efficient logs, graphs, matrices, and run bundles;
|
|
17
|
+
- economist-facing workflows for DiD, IV, RDD, tables, data handoff, and
|
|
18
|
+
cross-package verification.
|
|
19
|
+
|
|
20
|
+
## Product Pillars
|
|
21
|
+
|
|
22
|
+
1. **Reliable execution contract.** Keep `SCHEMA.md` load-bearing. Agents
|
|
23
|
+
branch on `ok`, `error.kind`, `results.e`, refs, and run manifests instead
|
|
24
|
+
of parsing log prose.
|
|
25
|
+
2. **Econometrics workflow intelligence.** Ship concise skill references and
|
|
26
|
+
prompts that know the Stata commands economists actually use: `csdid`,
|
|
27
|
+
`did_imputation`, `eventstudyinteract`, `rdrobust`, `ivreg2`,
|
|
28
|
+
`ivreghdfe`, `boottest`, `esttab`, `collect`, and related packages.
|
|
29
|
+
3. **Cross-stack parity audits.** Treat R/Python/Stata disagreement as a first
|
|
30
|
+
class research risk. `stata-code` should run the Stata leg and define the
|
|
31
|
+
comparison protocol without pretending to own the R or Python runtimes.
|
|
32
|
+
4. **Data-MCP handoff.** External MCP servers can discover and fetch official
|
|
33
|
+
data. `stata-code` should document and validate the handoff into Stata:
|
|
34
|
+
source metadata, stable raw files, key checks, and reproducible imports.
|
|
35
|
+
5. **Editor and artifact ergonomics.** VS Code should make sessions, graphs,
|
|
36
|
+
logs, tables, data previews, and run bundles easy to inspect without hiding
|
|
37
|
+
the underlying structured result.
|
|
38
|
+
6. **Distribution confidence.** Install and runtime checks should be easy to
|
|
39
|
+
verify without mutating user config. Prefer `doctor`/`verify` diagnostics
|
|
40
|
+
before any automatic config writer.
|
|
41
|
+
|
|
42
|
+
## Scope Boundaries
|
|
43
|
+
|
|
44
|
+
`stata-code` should not directly bundle data-provider APIs, R sessions, Python
|
|
45
|
+
causal libraries, or paid services. Those are separate tools. The durable
|
|
46
|
+
boundary is: external data/model tools produce files or results; `stata-code`
|
|
47
|
+
executes and audits the Stata side with traceable artifacts.
|
|
48
|
+
|
|
49
|
+
## One-Month Execution Plan
|
|
50
|
+
|
|
51
|
+
### Week 1: Workflow Layer
|
|
52
|
+
|
|
53
|
+
- Add cross-agent coordination and this roadmap.
|
|
54
|
+
- Expand the skill reference library for modern DiD, IV/weak-IV, RDD,
|
|
55
|
+
table-export, data-MCP handoff, and parity audits.
|
|
56
|
+
- Add examples that show how agents should use the workflows without claiming
|
|
57
|
+
unsupported automation.
|
|
58
|
+
- Add MCP prompts for parity audit planning, data-MCP-to-Stata handoff, and
|
|
59
|
+
turnkey method templates for DiD/event study, IV/2SLS, RDD, and publication
|
|
60
|
+
tables.
|
|
61
|
+
- Validate with skill packaging tests, MCP prompt tests, and markdown hygiene.
|
|
62
|
+
|
|
63
|
+
### Week 2: Diagnostics and Setup Confidence
|
|
64
|
+
|
|
65
|
+
- Ship a read-only `stata-code doctor` / `verify` command that reports Python,
|
|
66
|
+
`stata-code`, MCP extras, `pystata` discovery, Stata version/edition, PATH
|
|
67
|
+
resolution, and common client config hints.
|
|
68
|
+
- Keep config writing out of scope until backups and dry-run behavior exist.
|
|
69
|
+
- Add tests for missing `pystata`, missing MCP extra, path mismatch, and JSON
|
|
70
|
+
output.
|
|
71
|
+
|
|
72
|
+
### Week 3: VS Code and Artifacts
|
|
73
|
+
|
|
74
|
+
- Improve dataset preview from first-100 text output toward a paged/filterable
|
|
75
|
+
view or a clearly documented intermediate step.
|
|
76
|
+
- Surface table/export artifacts from run bundles more explicitly.
|
|
77
|
+
- Add tests around formatter and tree-provider behavior before broad UI work.
|
|
78
|
+
|
|
79
|
+
### Week 4: Release Quality
|
|
80
|
+
|
|
81
|
+
- Sweep README.md, README.zh.md, vscode/README.md, CHANGELOG.md, examples,
|
|
82
|
+
and skill docs for drift.
|
|
83
|
+
- Run release-relevant checks: version guard, schema export, skill zip build,
|
|
84
|
+
MCP tests, core tests that do not require Stata, and VS Code compile/tests if
|
|
85
|
+
touched.
|
|
86
|
+
- Prepare release notes that separate shipped features from roadmap items.
|
|
87
|
+
|
|
88
|
+
## Success Criteria
|
|
89
|
+
|
|
90
|
+
- Agents can find a documented path for the top empirical workflows without
|
|
91
|
+
loading the whole reference library.
|
|
92
|
+
- Parity audits preserve sample definitions, package versions, estimator
|
|
93
|
+
defaults, failure/refusal behavior, and numeric tolerances.
|
|
94
|
+
- Data pulled by external MCP servers enters Stata through a reproducible raw
|
|
95
|
+
file plus metadata handoff, not through unstated browser-copy steps.
|
|
96
|
+
- User-facing docs explain that `stata-code` runs Stata and coordinates with
|
|
97
|
+
other MCP tools; they do not imply that it directly runs R/Python or hosts
|
|
98
|
+
official data APIs.
|
|
99
|
+
- All changed surfaces have targeted validation evidence before handoff.
|