stata-code 0.7.2__tar.gz → 0.8.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. {stata_code-0.7.2 → stata_code-0.8.1}/CHANGELOG.md +41 -1
  2. {stata_code-0.7.2 → stata_code-0.8.1}/PKG-INFO +111 -10
  3. {stata_code-0.7.2 → stata_code-0.8.1}/README.md +110 -9
  4. stata_code-0.8.1/docs/industry-leader-roadmap.md +99 -0
  5. stata_code-0.8.1/examples/06-cross-stack-parity-audit.md +101 -0
  6. stata_code-0.8.1/examples/07-data-mcp-handoff.md +77 -0
  7. {stata_code-0.7.2 → stata_code-0.8.1}/examples/README.md +2 -0
  8. {stata_code-0.7.2 → stata_code-0.8.1}/pyproject.toml +2 -1
  9. stata_code-0.8.1/scripts/check_github_actions.py +96 -0
  10. {stata_code-0.7.2 → stata_code-0.8.1}/stata_code/__init__.py +1 -1
  11. stata_code-0.8.1/stata_code/__main__.py +6 -0
  12. stata_code-0.8.1/stata_code/cli.py +73 -0
  13. stata_code-0.8.1/stata_code/doctor.py +285 -0
  14. {stata_code-0.7.2 → stata_code-0.8.1}/stata_code/mcp/server.py +458 -1
  15. stata_code-0.8.1/tests/test_doctor.py +148 -0
  16. stata_code-0.8.1/tests/test_github_actions.py +52 -0
  17. {stata_code-0.7.2 → stata_code-0.8.1}/tests/test_mcp.py +133 -0
  18. stata_code-0.8.1/tests/test_method_prompts.py +167 -0
  19. {stata_code-0.7.2 → stata_code-0.8.1}/tests/test_skill_package.py +10 -0
  20. {stata_code-0.7.2 → stata_code-0.8.1}/.gitignore +0 -0
  21. {stata_code-0.7.2 → stata_code-0.8.1}/LICENSE +0 -0
  22. {stata_code-0.7.2 → stata_code-0.8.1}/LICENSE-POLICY.md +0 -0
  23. {stata_code-0.7.2 → stata_code-0.8.1}/PUBLISHING.md +0 -0
  24. {stata_code-0.7.2 → stata_code-0.8.1}/SCHEMA.md +0 -0
  25. {stata_code-0.7.2 → stata_code-0.8.1}/docs/design/hard_timeout.md +0 -0
  26. {stata_code-0.7.2 → stata_code-0.8.1}/examples/01-basic-regression.md +0 -0
  27. {stata_code-0.7.2 → stata_code-0.8.1}/examples/02-did-card-krueger.md +0 -0
  28. {stata_code-0.7.2 → stata_code-0.8.1}/examples/03-graphs.md +0 -0
  29. {stata_code-0.7.2 → stata_code-0.8.1}/examples/04-multi-session.md +0 -0
  30. {stata_code-0.7.2 → stata_code-0.8.1}/examples/05-large-matrix.md +0 -0
  31. {stata_code-0.7.2 → stata_code-0.8.1}/schema/run_result.schema.json +0 -0
  32. {stata_code-0.7.2 → stata_code-0.8.1}/scripts/build_skill_zip.py +0 -0
  33. {stata_code-0.7.2 → stata_code-0.8.1}/scripts/check_versions.py +0 -0
  34. {stata_code-0.7.2 → stata_code-0.8.1}/scripts/export_schema.py +0 -0
  35. {stata_code-0.7.2 → stata_code-0.8.1}/stata_code/core/__init__.py +0 -0
  36. {stata_code-0.7.2 → stata_code-0.8.1}/stata_code/core/_pool.py +0 -0
  37. {stata_code-0.7.2 → stata_code-0.8.1}/stata_code/core/_refs.py +0 -0
  38. {stata_code-0.7.2 → stata_code-0.8.1}/stata_code/core/_runtime.py +0 -0
  39. {stata_code-0.7.2 → stata_code-0.8.1}/stata_code/core/errors.py +0 -0
  40. {stata_code-0.7.2 → stata_code-0.8.1}/stata_code/core/log_artifacts.py +0 -0
  41. {stata_code-0.7.2 → stata_code-0.8.1}/stata_code/core/notebook.py +0 -0
  42. {stata_code-0.7.2 → stata_code-0.8.1}/stata_code/core/run_index.py +0 -0
  43. {stata_code-0.7.2 → stata_code-0.8.1}/stata_code/core/runner.py +0 -0
  44. {stata_code-0.7.2 → stata_code-0.8.1}/stata_code/core/schema.py +0 -0
  45. {stata_code-0.7.2 → stata_code-0.8.1}/stata_code/kernel/__init__.py +0 -0
  46. {stata_code-0.7.2 → stata_code-0.8.1}/stata_code/kernel/__main__.py +0 -0
  47. {stata_code-0.7.2 → stata_code-0.8.1}/stata_code/kernel/assets/logo-32x32.png +0 -0
  48. {stata_code-0.7.2 → stata_code-0.8.1}/stata_code/kernel/assets/logo-64x64.png +0 -0
  49. {stata_code-0.7.2 → stata_code-0.8.1}/stata_code/kernel/assets/logo-svg.svg +0 -0
  50. {stata_code-0.7.2 → stata_code-0.8.1}/stata_code/kernel/kernel.py +0 -0
  51. {stata_code-0.7.2 → stata_code-0.8.1}/stata_code/mcp/__init__.py +0 -0
  52. {stata_code-0.7.2 → stata_code-0.8.1}/stata_code/mcp/__main__.py +0 -0
  53. {stata_code-0.7.2 → stata_code-0.8.1}/tests/__init__.py +0 -0
  54. {stata_code-0.7.2 → stata_code-0.8.1}/tests/conftest.py +0 -0
  55. {stata_code-0.7.2 → stata_code-0.8.1}/tests/fixtures/.gitkeep +0 -0
  56. {stata_code-0.7.2 → stata_code-0.8.1}/tests/test_cancel.py +0 -0
  57. {stata_code-0.7.2 → stata_code-0.8.1}/tests/test_errors.py +0 -0
  58. {stata_code-0.7.2 → stata_code-0.8.1}/tests/test_kernel.py +0 -0
  59. {stata_code-0.7.2 → stata_code-0.8.1}/tests/test_log_artifacts.py +0 -0
  60. {stata_code-0.7.2 → stata_code-0.8.1}/tests/test_mcp_stdio.py +0 -0
  61. {stata_code-0.7.2 → stata_code-0.8.1}/tests/test_new_tools.py +0 -0
  62. {stata_code-0.7.2 → stata_code-0.8.1}/tests/test_notebook.py +0 -0
  63. {stata_code-0.7.2 → stata_code-0.8.1}/tests/test_notebook_phase2.py +0 -0
  64. {stata_code-0.7.2 → stata_code-0.8.1}/tests/test_pool.py +0 -0
  65. {stata_code-0.7.2 → stata_code-0.8.1}/tests/test_public_api.py +0 -0
  66. {stata_code-0.7.2 → stata_code-0.8.1}/tests/test_release_versions.py +0 -0
  67. {stata_code-0.7.2 → stata_code-0.8.1}/tests/test_run_index.py +0 -0
  68. {stata_code-0.7.2 → stata_code-0.8.1}/tests/test_runner.py +0 -0
  69. {stata_code-0.7.2 → stata_code-0.8.1}/tests/test_runtime_discovery.py +0 -0
  70. {stata_code-0.7.2 → stata_code-0.8.1}/tests/test_schema.py +0 -0
  71. {stata_code-0.7.2 → stata_code-0.8.1}/tests/test_schema_artifact.py +0 -0
@@ -4,7 +4,47 @@ All notable changes to `stata-code` are documented here. The format follows
4
4
  [Keep a Changelog](https://keepachangelog.com/en/1.1.0/); the project adheres
5
5
  to semver-major.minor for the result schema (see `SCHEMA.md` §6).
6
6
 
7
- ## Unreleased
7
+ ## 0.8.1 — 2026-06-20
8
+
9
+ ### Changed
10
+
11
+ - **README & metadata refresh.** Documented the VS Code extension's
12
+ seven-view sidebar (added the **Data** variables browser and the
13
+ **Outputs** table/export-artifact panel), corrected the error taxonomy
14
+ count to 31 kinds, and sharpened the Claude Code plugin / VS Code
15
+ Marketplace descriptions to lead with the empirical-economics workflow
16
+ (DiD/IV/RDD, publication tables, StatsPAI cross-validation).
17
+ - **Partner module.** Added a Stanford REAP × CoPaper.AI partner block
18
+ (logos, QR, links) to both the English and Chinese README, with the logo
19
+ assets bundled under `branding/partners/`.
20
+
21
+ ## 0.8.0 — 2026-06-20
22
+
23
+ ### Added
24
+
25
+ - **Economist workflow coordination and roadmap.** Added
26
+ `AGENT_COORDINATION.md` for concurrent-agent lanes and
27
+ `docs/industry-leader-roadmap.md` for the one-month product plan: workflow
28
+ intelligence, parity audits, data-MCP handoff, editor/artifact polish, and
29
+ distribution diagnostics.
30
+ - **Cross-stack and data-MCP workflow references.** The `stata-code` skill now
31
+ includes `references/parity-audit.md` and
32
+ `references/data-mcp-handoff.md`, plus cookbook examples for cross-stack
33
+ parity audits and external-data-MCP handoff into Stata.
34
+ - **Modern empirical-economics package notes.** Added package references for
35
+ `csdid`, `drdid`, `did_imputation`, `eventstudyinteract`,
36
+ `did_multiplegt_dyn`, `rdrobust`, `ivreg2`, `ivreghdfe`, `boottest`, and
37
+ `outreg2`, and wired them into the skill routing table.
38
+ - **MCP prompt discoverability for economist workflows.** Added
39
+ `plan_cross_stack_parity_audit`, `data_mcp_to_stata_handoff`,
40
+ `did_event_study`, `iv_2sls`, `rdd`, `publication_table`, and
41
+ `cross_validate_did` prompts so clients can discover the new protocols and
42
+ turnkey empirical recipes directly through MCP.
43
+ - **Read-only installation diagnostics.** Added the top-level `stata-code`
44
+ console script with `doctor` / `verify` commands. The diagnostic reports
45
+ package/Python version, MCP and kernel extras, `pystata` discovery, console
46
+ scripts on `PATH`, client/VS Code hints, and an optional live Stata
47
+ version/edition probe without mutating user configuration.
8
48
 
9
49
  ## 0.7.2 — 2026-06-20
10
50
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: stata-code
3
- Version: 0.7.2
3
+ Version: 0.8.1
4
4
  Summary: Agent-native Stata bridge — one core, multiple frontends (MCP, Jupyter, VSCode)
5
5
  Project-URL: Homepage, https://github.com/brycewang-stanford/stata-code
6
6
  Project-URL: Repository, https://github.com/brycewang-stanford/stata-code
@@ -59,6 +59,24 @@ Description-Content-Type: text/markdown
59
59
  [![GitHub release](https://img.shields.io/github/v/release/brycewang-stanford/stata-code)](https://github.com/brycewang-stanford/stata-code/releases)
60
60
  [![GitHub stars](https://img.shields.io/github/stars/brycewang-stanford/stata-code?style=social)](https://github.com/brycewang-stanford/stata-code)
61
61
 
62
+ <div align="center">
63
+
64
+ <table>
65
+ <tr>
66
+ <td align="center">
67
+ <a href="https://copaper.ai"><img src="https://raw.githubusercontent.com/brycewang-stanford/stata-code/main/branding/partners/copaper-logo.png" alt="CoPaper.AI" width="200" /></a>
68
+ </td>
69
+ <td width="48"></td>
70
+ <td align="center">
71
+ <a href="https://sccei.fsi.stanford.edu/reap"><img src="https://raw.githubusercontent.com/brycewang-stanford/stata-code/main/branding/partners/stanford-reap-logo.png" alt="Stanford REAP — Center on China's Economy & Institutions" width="280" /></a>
72
+ </td>
73
+ </tr>
74
+ </table>
75
+
76
+ <sub><strong>Stanford REAP × CoPaper.AI</strong> · an academic–industrial AI toolkit for empirical research</sub>
77
+
78
+ </div>
79
+
62
80
  <p align="center">
63
81
  <img src="https://raw.githubusercontent.com/brycewang-stanford/stata-code/main/branding/github-instructions.png" alt="stata-code: agent-native Stata bridge — one Python core, multiple frontends (Jupyter kernel, MCP server, VS Code extension)" width="720" />
64
82
  </p>
@@ -67,6 +85,22 @@ Description-Content-Type: text/markdown
67
85
 
68
86
  `stata-code` lets you drive Stata from modern environments: an LLM agent (Claude Code, Cursor, Claude Desktop), a Jupyter notebook, or a VS Code editor session. All frontends share one Python core and return a stable, structured, **agent-friendly** result schema.
69
87
 
88
+ **For empirical economists.** Drive Stata in plain language: run **DiD, IV, RDD, and publication-ready `esttab` tables in one conversation** — then cross-check each estimate across Stata and Python so you only trust results that *agree* (the Cunningham cross-package robustness check).
89
+
90
+ **Try it in 60 seconds** with [Claude Code](https://github.com/anthropics/claude-code) — no global install needed:
91
+
92
+ ```bash
93
+ claude mcp add stata-code --scope user -- uvx --from "stata-code[mcp]" stata-code-mcp
94
+ ```
95
+
96
+ Then just ask:
97
+
98
+ > *"Using `data/cfps_panel.dta`, run a two-way fixed-effects regression of monthly wage on the treatment (controls: `age age2 edu industry`), then test heterogeneous effects with Callaway-Sant'Anna, and export an `esttab` table."*
99
+
100
+ `stata-code` writes the do-file, runs it, returns the table, and interprets the result — and can re-estimate the same ATT with [StatsPAI](https://github.com/brycewang-stanford/StatsPAI) to confirm the two stacks agree. These workflows ship as one-call MCP prompts (`did_event_study`, `iv_2sls`, `rdd`, `publication_table`, `cross_validate_did`) backed by an on-demand [recipe library](skills/stata-code/references/recipes/).
101
+
102
+ **Why `stata-code`:** MIT-licensed · ships as an MCP server, a bundled agent skill, a Jupyter kernel, **and** a VS Code extension · one structured, token-economy result schema (typed errors, native `r()` / `e()`) · cross-stack validation with StatsPAI for the Cunningham check.
103
+
70
104
  ```text
71
105
  ┌────────────────────────────────────────┐
72
106
  │ stata-code core (Python) │
@@ -84,12 +118,18 @@ Description-Content-Type: text/markdown
84
118
  └─────────────┘ └────────────┘ └─────────────────┘
85
119
  ```
86
120
 
87
- **Status: v0.7 (May 2026)** — the core, MCP server, Jupyter kernel, and VS Code extension work end-to-end against Stata 18 MP. The test suite covers schema, runner, MCP, kernel, notebook, run-index, subprocess-pool, and VS Code modules; CI also checks linting, type safety, schema generation, package metadata, and VSIX packaging. License: **MIT**.
121
+ **Status: v0.8 (June 2026)** — the core, MCP server, Jupyter kernel, and VS Code extension work end-to-end against Stata 18 MP. The test suite covers schema, runner, MCP, kernel, notebook, run-index, subprocess-pool, and VS Code modules; CI also checks linting, type safety, schema generation, package metadata, and VSIX packaging. License: **MIT**.
88
122
 
89
- Two workflows the current release explicitly supports for end users:
123
+ Three workflows the current tree explicitly supports for end users and agents:
90
124
 
91
125
  - **Run Stata code from a Jupyter notebook.** `pip install "stata-code[kernel]"` + `stata-code-kernel install --user` registers a **Stata** kernel that the Jupyter Notebook UI, JupyterLab, and the VS Code Jupyter extension all pick up by name. Cells render Stata logs, graphs, and warnings inline (the kernel logo bundled since v0.5 makes it appear in VS Code's kernel picker too). See [As a Jupyter Kernel](#as-a-jupyter-kernel).
92
126
  - **Optional agent "fix and rerun" loop.** `stata_run` returns typed `error.kind/line/context` plus `suggestions` on every failure. By default Claude Code only reports diagnostics — but if you explicitly say "fix this and rerun until it passes", the agent uses the same fields to edit your `.do` file and re-call `stata_run` until the run is green. The repair loop is **opt-in**: failed runs are diagnostics first, not automatic rewrite permission. See [Error Recovery in Agent Workflows](#error-recovery-in-agent-workflows).
127
+ - **Economist workflow guides.** The bundled skill and cookbook now cover
128
+ modern DiD, IV/weak-IV, RDD, table export, data-MCP handoff, and
129
+ cross-stack parity audits. `stata-code` runs and audits the Stata leg; R,
130
+ Python, and official data MCPs remain separate tools with explicit handoff
131
+ files and source metadata. See [`skills/stata-code/references/`](skills/stata-code/references/)
132
+ and [`examples/`](examples/).
93
133
 
94
134
  ---
95
135
 
@@ -138,6 +178,19 @@ pip install -e ".[mcp,kernel]"
138
178
 
139
179
  Note: `pystata` is **not** on PyPI; it ships with Stata. `stata-code` auto-discovers it on macOS at `/Applications/Stata/utilities/pystata` and at equivalent Linux / Windows paths. If your install is elsewhere, add it to `PYTHONPATH` before importing.
140
180
 
181
+ Verify the local setup with the read-only doctor:
182
+
183
+ ```bash
184
+ stata-code doctor
185
+ stata-code doctor --json # machine-readable output
186
+ stata-code doctor --no-stata-probe # skip live Stata initialization
187
+ ```
188
+
189
+ The doctor reports the package/Python version, MCP and Jupyter extras, `pystata`
190
+ discovery, console scripts on `PATH`, client/VS Code configuration hints, and a
191
+ best-effort Stata version/edition probe. It never edits shell, Stata, Claude, or
192
+ VS Code config.
193
+
141
194
  ---
142
195
 
143
196
  ## Quick Start
@@ -315,8 +368,11 @@ resources:
315
368
 
316
369
  MCP prompts are available for common agent workflows:
317
370
  `run_do_file_and_report`, `debug_stata_error`,
318
- `fix_and_rerun_until_passes`, `replication_audit`, and
319
- `summarize_estimation_results`.
371
+ `fix_and_rerun_until_passes`, `replication_audit`,
372
+ `plan_cross_stack_parity_audit`, `data_mcp_to_stata_handoff`,
373
+ `summarize_estimation_results`, `run_notebook_cell_and_report`,
374
+ `fix_and_rerun_notebook_cell`, `did_event_study`, `iv_2sls`, `rdd`,
375
+ `publication_table`, and `cross_validate_did`.
320
376
 
321
377
  ### As a Jupyter Kernel
322
378
 
@@ -347,7 +403,7 @@ Then open Jupyter Notebook / JupyterLab (or a `.ipynb` in VS Code), pick **Stata
347
403
 
348
404
  ### As a VS Code Extension
349
405
 
350
- The companion extension is on the Marketplace as [`brycewang-stanford.stata-code-vscode`](https://marketplace.visualstudio.com/items?itemName=brycewang-stanford.stata-code-vscode). It spawns `stata-code-mcp` as a child process and adds syntax highlighting, an Outline view for `**#` sections and `program define` blocks, code-lens "Run cell" and "Run section" actions on `.do` files, a sidebar (sessions / last result / run history / logs / graphs), status-bar indicators, completions, help lookup, conservative variable rename, and inline diagnostics from the v1.0 typed errors.
406
+ The companion extension is on the Marketplace as [`brycewang-stanford.stata-code-vscode`](https://marketplace.visualstudio.com/items?itemName=brycewang-stanford.stata-code-vscode). It spawns `stata-code-mcp` as a child process and adds syntax highlighting, an Outline view for `**#` sections and `program define` blocks, code-lens "Run cell" and "Run section" actions on `.do` files, a **seven-view sidebar** (sessions / last result / **data variables** / run history / logs / graphs / **outputs**) — including an agent-native equivalent of Stata's **Variables window** and an **Outputs** panel that surfaces the `esttab` tables and `export` files each run writes to disk — status-bar indicators, completions, help lookup, conservative variable rename, and inline diagnostics from the v1.0 typed errors.
351
407
 
352
408
  ```bash
353
409
  # from the VS Code CLI
@@ -358,6 +414,12 @@ Or open the **Extensions** sidebar in VS Code and search `stata-code`. The exten
358
414
 
359
415
  On first activation the extension probes for `stata-code-mcp` on `PATH` (and in any workspace `.venv` / `venv`). If nothing resolves, it shows a one-time install hint with the exact `pip install "stata-code[mcp]"` command — choose **Don't show again** to silence it for the installed extension version.
360
416
 
417
+ If the extension or an MCP client cannot find the server, run
418
+ `stata-code doctor --no-stata-probe` in the same Python environment. It reports
419
+ whether `stata-code-mcp` is on `PATH` and suggests absolute-path or
420
+ `python -m stata_code.mcp` fallbacks for GUI clients whose `PATH` differs from
421
+ your shell.
422
+
361
423
  #### Cell and section conventions
362
424
 
363
425
  The extension recognizes two complementary structural markers inside `.do` files. Either can be mixed in the same file; they do not conflict.
@@ -437,7 +499,7 @@ stata_code/
437
499
  | Jupyter kernel | ✓ | — | — | ✓ |
438
500
  | Unified result schema | ✓ ([SCHEMA.md](SCHEMA.md)) | per-tool | per-tool | per-tool |
439
501
  | Token-economy defaults | ✓ (log refs, graph refs) | — | — | — |
440
- | Typed errors + suggestions | ✓ (32 kinds) | — | — | — |
502
+ | Typed errors + suggestions | ✓ (31 kinds) | — | — | — |
441
503
  | Multi-session | ✓ (Stata frames) | partial | — | — |
442
504
  | Mature ecosystem | early | ✓ (statamcp.com, cookbook) | ✓ (11k installs) | ✓ |
443
505
 
@@ -447,7 +509,7 @@ stata_code/
447
509
 
448
510
  ## Roadmap
449
511
 
450
- ### Done (through v0.7 — May 2026)
512
+ ### Done (current tree)
451
513
 
452
514
  - v1.0 result schema ([SCHEMA.md](SCHEMA.md))
453
515
  - `pystata`-based runner with native-typed `r()`, `e()`, and matrices
@@ -456,15 +518,21 @@ stata_code/
456
518
  - Graph capture: `png` / `svg` / `pdf` with ref store and source-command attribution
457
519
  - Log truncation with ref store
458
520
  - Warning extraction: 5 categories + generic notes
459
- - 32-kind error taxonomy with canonical suggestions
521
+ - 31-kind error taxonomy with canonical suggestions
460
522
  - MCP server: 18 tools, including notebook navigation / search / atomic edits, the run-bundle index (`list_runs`), log grep (`search_log`), dataset inspection (`inspect_data`), and package installation (`install_package`)
461
523
  - Jupyter kernel: rewired to the v1.0 pipeline, kernel logos bundled
462
524
  - Matrix size cap + `get_matrix(ref)` for large matrices (>10k cells)
463
525
  - Subprocess-backed hard timeout and cancellation for the public Python API and MCP server: `timeout_ms`, `cancel(session_id)`, and MCP `cancel_session`
464
526
  - Per-cell repair loop on `.ipynb` via `notebook_outline` / `notebook_get_cell` / `notebook_edit_cell` with optimistic-concurrency `expected_source` guards and `origin_cell_id` echo on `RunResult`
465
527
  - Persistent run bundles + `list_runs` query over `manifest.json` files (filter by cell / origin / session / since / ok; page with limit / offset)
528
+ - Read-only `stata-code doctor` / `verify` diagnostics for package version,
529
+ extras, `pystata` discovery, console scripts, client hints, and optional live
530
+ Stata version probing
531
+ - Economist workflow layer: skill references and examples for modern DiD,
532
+ IV/weak-IV, RDD, table export, data-MCP handoff, and cross-stack parity
533
+ audits
466
534
  - JSON Schema artifact auto-generated from `schema.py`: [`schema/run_result.schema.json`](schema/run_result.schema.json)
467
- - VS Code extension published to the Marketplace as [`brycewang-stanford.stata-code-vscode`](https://marketplace.visualstudio.com/items?itemName=brycewang-stanford.stata-code-vscode): syntax highlighting, section outline/navigation, code-lens cell and section runners, sidebar (sessions / last result / run history / logs / graphs), status bar, completions, conservative variable rename, diagnostics, MCP child-process spawn
535
+ - VS Code extension published to the Marketplace as [`brycewang-stanford.stata-code-vscode`](https://marketplace.visualstudio.com/items?itemName=brycewang-stanford.stata-code-vscode): syntax highlighting, section outline/navigation, code-lens cell and section runners, seven-view sidebar (sessions / last result / data variables / run history / logs / graphs / outputs), status bar, completions, conservative variable rename, diagnostics, MCP child-process spawn
468
536
  - Clean-room license policy ([LICENSE-POLICY.md](LICENSE-POLICY.md))
469
537
 
470
538
  ### Next Up
@@ -510,3 +578,36 @@ Stata is a registered trademark of StataCorp LLC. This project is independent an
510
578
  ## Acknowledgements
511
579
 
512
580
  The Stata tooling landscape that this project builds on and learns from is surveyed in [References-tools.md](References-tools.md). All listed projects retain their own licenses and authorship; please consult each repository before reuse.
581
+
582
+ ---
583
+
584
+ <div align="center">
585
+
586
+ <table>
587
+ <tr>
588
+ <td align="center">
589
+ <a href="https://copaper.ai"><img src="https://raw.githubusercontent.com/brycewang-stanford/stata-code/main/branding/partners/copaper-logo.png" alt="CoPaper.AI" width="200" /></a>
590
+ </td>
591
+ <td width="40"></td>
592
+ <td align="center">
593
+ <a href="https://sccei.fsi.stanford.edu/reap"><img src="https://raw.githubusercontent.com/brycewang-stanford/stata-code/main/branding/partners/stanford-reap-logo.png" alt="Stanford REAP" width="280" /></a>
594
+ </td>
595
+ </tr>
596
+ </table>
597
+
598
+ <table>
599
+ <tr>
600
+ <td align="center">
601
+ <a href="https://copaper.ai"><img src="https://raw.githubusercontent.com/brycewang-stanford/stata-code/main/branding/partners/copaper-qrcode.png" alt="Visit copaper.ai" width="160" /></a><br/>
602
+ <strong>Visit <a href="https://copaper.ai">copaper.ai</a></strong>
603
+ </td>
604
+ <td align="center">
605
+ <img src="https://raw.githubusercontent.com/brycewang-stanford/stata-code/main/branding/partners/copaper-wechat.jpg" alt="CoPaper.AI WeChat" width="160" /><br/>
606
+ <strong>WeChat: CoPaper.AI</strong>
607
+ </td>
608
+ </tr>
609
+ </table>
610
+
611
+ <sub>Maintained by <a href="https://copaper.ai"><strong>CoPaper.AI</strong></a>, incubated at <a href="https://sccei.fsi.stanford.edu/reap"><strong>Stanford REAP / SCCEI</strong></a> · AI Assistant for Empirical Research</sub>
612
+
613
+ </div>
@@ -20,6 +20,24 @@
20
20
  [![GitHub release](https://img.shields.io/github/v/release/brycewang-stanford/stata-code)](https://github.com/brycewang-stanford/stata-code/releases)
21
21
  [![GitHub stars](https://img.shields.io/github/stars/brycewang-stanford/stata-code?style=social)](https://github.com/brycewang-stanford/stata-code)
22
22
 
23
+ <div align="center">
24
+
25
+ <table>
26
+ <tr>
27
+ <td align="center">
28
+ <a href="https://copaper.ai"><img src="https://raw.githubusercontent.com/brycewang-stanford/stata-code/main/branding/partners/copaper-logo.png" alt="CoPaper.AI" width="200" /></a>
29
+ </td>
30
+ <td width="48"></td>
31
+ <td align="center">
32
+ <a href="https://sccei.fsi.stanford.edu/reap"><img src="https://raw.githubusercontent.com/brycewang-stanford/stata-code/main/branding/partners/stanford-reap-logo.png" alt="Stanford REAP — Center on China's Economy & Institutions" width="280" /></a>
33
+ </td>
34
+ </tr>
35
+ </table>
36
+
37
+ <sub><strong>Stanford REAP × CoPaper.AI</strong> · an academic–industrial AI toolkit for empirical research</sub>
38
+
39
+ </div>
40
+
23
41
  <p align="center">
24
42
  <img src="https://raw.githubusercontent.com/brycewang-stanford/stata-code/main/branding/github-instructions.png" alt="stata-code: agent-native Stata bridge — one Python core, multiple frontends (Jupyter kernel, MCP server, VS Code extension)" width="720" />
25
43
  </p>
@@ -28,6 +46,22 @@
28
46
 
29
47
  `stata-code` lets you drive Stata from modern environments: an LLM agent (Claude Code, Cursor, Claude Desktop), a Jupyter notebook, or a VS Code editor session. All frontends share one Python core and return a stable, structured, **agent-friendly** result schema.
30
48
 
49
+ **For empirical economists.** Drive Stata in plain language: run **DiD, IV, RDD, and publication-ready `esttab` tables in one conversation** — then cross-check each estimate across Stata and Python so you only trust results that *agree* (the Cunningham cross-package robustness check).
50
+
51
+ **Try it in 60 seconds** with [Claude Code](https://github.com/anthropics/claude-code) — no global install needed:
52
+
53
+ ```bash
54
+ claude mcp add stata-code --scope user -- uvx --from "stata-code[mcp]" stata-code-mcp
55
+ ```
56
+
57
+ Then just ask:
58
+
59
+ > *"Using `data/cfps_panel.dta`, run a two-way fixed-effects regression of monthly wage on the treatment (controls: `age age2 edu industry`), then test heterogeneous effects with Callaway-Sant'Anna, and export an `esttab` table."*
60
+
61
+ `stata-code` writes the do-file, runs it, returns the table, and interprets the result — and can re-estimate the same ATT with [StatsPAI](https://github.com/brycewang-stanford/StatsPAI) to confirm the two stacks agree. These workflows ship as one-call MCP prompts (`did_event_study`, `iv_2sls`, `rdd`, `publication_table`, `cross_validate_did`) backed by an on-demand [recipe library](skills/stata-code/references/recipes/).
62
+
63
+ **Why `stata-code`:** MIT-licensed · ships as an MCP server, a bundled agent skill, a Jupyter kernel, **and** a VS Code extension · one structured, token-economy result schema (typed errors, native `r()` / `e()`) · cross-stack validation with StatsPAI for the Cunningham check.
64
+
31
65
  ```text
32
66
  ┌────────────────────────────────────────┐
33
67
  │ stata-code core (Python) │
@@ -45,12 +79,18 @@
45
79
  └─────────────┘ └────────────┘ └─────────────────┘
46
80
  ```
47
81
 
48
- **Status: v0.7 (May 2026)** — the core, MCP server, Jupyter kernel, and VS Code extension work end-to-end against Stata 18 MP. The test suite covers schema, runner, MCP, kernel, notebook, run-index, subprocess-pool, and VS Code modules; CI also checks linting, type safety, schema generation, package metadata, and VSIX packaging. License: **MIT**.
82
+ **Status: v0.8 (June 2026)** — the core, MCP server, Jupyter kernel, and VS Code extension work end-to-end against Stata 18 MP. The test suite covers schema, runner, MCP, kernel, notebook, run-index, subprocess-pool, and VS Code modules; CI also checks linting, type safety, schema generation, package metadata, and VSIX packaging. License: **MIT**.
49
83
 
50
- Two workflows the current release explicitly supports for end users:
84
+ Three workflows the current tree explicitly supports for end users and agents:
51
85
 
52
86
  - **Run Stata code from a Jupyter notebook.** `pip install "stata-code[kernel]"` + `stata-code-kernel install --user` registers a **Stata** kernel that the Jupyter Notebook UI, JupyterLab, and the VS Code Jupyter extension all pick up by name. Cells render Stata logs, graphs, and warnings inline (the kernel logo bundled since v0.5 makes it appear in VS Code's kernel picker too). See [As a Jupyter Kernel](#as-a-jupyter-kernel).
53
87
  - **Optional agent "fix and rerun" loop.** `stata_run` returns typed `error.kind/line/context` plus `suggestions` on every failure. By default Claude Code only reports diagnostics — but if you explicitly say "fix this and rerun until it passes", the agent uses the same fields to edit your `.do` file and re-call `stata_run` until the run is green. The repair loop is **opt-in**: failed runs are diagnostics first, not automatic rewrite permission. See [Error Recovery in Agent Workflows](#error-recovery-in-agent-workflows).
88
+ - **Economist workflow guides.** The bundled skill and cookbook now cover
89
+ modern DiD, IV/weak-IV, RDD, table export, data-MCP handoff, and
90
+ cross-stack parity audits. `stata-code` runs and audits the Stata leg; R,
91
+ Python, and official data MCPs remain separate tools with explicit handoff
92
+ files and source metadata. See [`skills/stata-code/references/`](skills/stata-code/references/)
93
+ and [`examples/`](examples/).
54
94
 
55
95
  ---
56
96
 
@@ -99,6 +139,19 @@ pip install -e ".[mcp,kernel]"
99
139
 
100
140
  Note: `pystata` is **not** on PyPI; it ships with Stata. `stata-code` auto-discovers it on macOS at `/Applications/Stata/utilities/pystata` and at equivalent Linux / Windows paths. If your install is elsewhere, add it to `PYTHONPATH` before importing.
101
141
 
142
+ Verify the local setup with the read-only doctor:
143
+
144
+ ```bash
145
+ stata-code doctor
146
+ stata-code doctor --json # machine-readable output
147
+ stata-code doctor --no-stata-probe # skip live Stata initialization
148
+ ```
149
+
150
+ The doctor reports the package/Python version, MCP and Jupyter extras, `pystata`
151
+ discovery, console scripts on `PATH`, client/VS Code configuration hints, and a
152
+ best-effort Stata version/edition probe. It never edits shell, Stata, Claude, or
153
+ VS Code config.
154
+
102
155
  ---
103
156
 
104
157
  ## Quick Start
@@ -276,8 +329,11 @@ resources:
276
329
 
277
330
  MCP prompts are available for common agent workflows:
278
331
  `run_do_file_and_report`, `debug_stata_error`,
279
- `fix_and_rerun_until_passes`, `replication_audit`, and
280
- `summarize_estimation_results`.
332
+ `fix_and_rerun_until_passes`, `replication_audit`,
333
+ `plan_cross_stack_parity_audit`, `data_mcp_to_stata_handoff`,
334
+ `summarize_estimation_results`, `run_notebook_cell_and_report`,
335
+ `fix_and_rerun_notebook_cell`, `did_event_study`, `iv_2sls`, `rdd`,
336
+ `publication_table`, and `cross_validate_did`.
281
337
 
282
338
  ### As a Jupyter Kernel
283
339
 
@@ -308,7 +364,7 @@ Then open Jupyter Notebook / JupyterLab (or a `.ipynb` in VS Code), pick **Stata
308
364
 
309
365
  ### As a VS Code Extension
310
366
 
311
- The companion extension is on the Marketplace as [`brycewang-stanford.stata-code-vscode`](https://marketplace.visualstudio.com/items?itemName=brycewang-stanford.stata-code-vscode). It spawns `stata-code-mcp` as a child process and adds syntax highlighting, an Outline view for `**#` sections and `program define` blocks, code-lens "Run cell" and "Run section" actions on `.do` files, a sidebar (sessions / last result / run history / logs / graphs), status-bar indicators, completions, help lookup, conservative variable rename, and inline diagnostics from the v1.0 typed errors.
367
+ The companion extension is on the Marketplace as [`brycewang-stanford.stata-code-vscode`](https://marketplace.visualstudio.com/items?itemName=brycewang-stanford.stata-code-vscode). It spawns `stata-code-mcp` as a child process and adds syntax highlighting, an Outline view for `**#` sections and `program define` blocks, code-lens "Run cell" and "Run section" actions on `.do` files, a **seven-view sidebar** (sessions / last result / **data variables** / run history / logs / graphs / **outputs**) — including an agent-native equivalent of Stata's **Variables window** and an **Outputs** panel that surfaces the `esttab` tables and `export` files each run writes to disk — status-bar indicators, completions, help lookup, conservative variable rename, and inline diagnostics from the v1.0 typed errors.
312
368
 
313
369
  ```bash
314
370
  # from the VS Code CLI
@@ -319,6 +375,12 @@ Or open the **Extensions** sidebar in VS Code and search `stata-code`. The exten
319
375
 
320
376
  On first activation the extension probes for `stata-code-mcp` on `PATH` (and in any workspace `.venv` / `venv`). If nothing resolves, it shows a one-time install hint with the exact `pip install "stata-code[mcp]"` command — choose **Don't show again** to silence it for the installed extension version.
321
377
 
378
+ If the extension or an MCP client cannot find the server, run
379
+ `stata-code doctor --no-stata-probe` in the same Python environment. It reports
380
+ whether `stata-code-mcp` is on `PATH` and suggests absolute-path or
381
+ `python -m stata_code.mcp` fallbacks for GUI clients whose `PATH` differs from
382
+ your shell.
383
+
322
384
  #### Cell and section conventions
323
385
 
324
386
  The extension recognizes two complementary structural markers inside `.do` files. Either can be mixed in the same file; they do not conflict.
@@ -398,7 +460,7 @@ stata_code/
398
460
  | Jupyter kernel | ✓ | — | — | ✓ |
399
461
  | Unified result schema | ✓ ([SCHEMA.md](SCHEMA.md)) | per-tool | per-tool | per-tool |
400
462
  | Token-economy defaults | ✓ (log refs, graph refs) | — | — | — |
401
- | Typed errors + suggestions | ✓ (32 kinds) | — | — | — |
463
+ | Typed errors + suggestions | ✓ (31 kinds) | — | — | — |
402
464
  | Multi-session | ✓ (Stata frames) | partial | — | — |
403
465
  | Mature ecosystem | early | ✓ (statamcp.com, cookbook) | ✓ (11k installs) | ✓ |
404
466
 
@@ -408,7 +470,7 @@ stata_code/
408
470
 
409
471
  ## Roadmap
410
472
 
411
- ### Done (through v0.7 — May 2026)
473
+ ### Done (current tree)
412
474
 
413
475
  - v1.0 result schema ([SCHEMA.md](SCHEMA.md))
414
476
  - `pystata`-based runner with native-typed `r()`, `e()`, and matrices
@@ -417,15 +479,21 @@ stata_code/
417
479
  - Graph capture: `png` / `svg` / `pdf` with ref store and source-command attribution
418
480
  - Log truncation with ref store
419
481
  - Warning extraction: 5 categories + generic notes
420
- - 32-kind error taxonomy with canonical suggestions
482
+ - 31-kind error taxonomy with canonical suggestions
421
483
  - MCP server: 18 tools, including notebook navigation / search / atomic edits, the run-bundle index (`list_runs`), log grep (`search_log`), dataset inspection (`inspect_data`), and package installation (`install_package`)
422
484
  - Jupyter kernel: rewired to the v1.0 pipeline, kernel logos bundled
423
485
  - Matrix size cap + `get_matrix(ref)` for large matrices (>10k cells)
424
486
  - Subprocess-backed hard timeout and cancellation for the public Python API and MCP server: `timeout_ms`, `cancel(session_id)`, and MCP `cancel_session`
425
487
  - Per-cell repair loop on `.ipynb` via `notebook_outline` / `notebook_get_cell` / `notebook_edit_cell` with optimistic-concurrency `expected_source` guards and `origin_cell_id` echo on `RunResult`
426
488
  - Persistent run bundles + `list_runs` query over `manifest.json` files (filter by cell / origin / session / since / ok; page with limit / offset)
489
+ - Read-only `stata-code doctor` / `verify` diagnostics for package version,
490
+ extras, `pystata` discovery, console scripts, client hints, and optional live
491
+ Stata version probing
492
+ - Economist workflow layer: skill references and examples for modern DiD,
493
+ IV/weak-IV, RDD, table export, data-MCP handoff, and cross-stack parity
494
+ audits
427
495
  - JSON Schema artifact auto-generated from `schema.py`: [`schema/run_result.schema.json`](schema/run_result.schema.json)
428
- - VS Code extension published to the Marketplace as [`brycewang-stanford.stata-code-vscode`](https://marketplace.visualstudio.com/items?itemName=brycewang-stanford.stata-code-vscode): syntax highlighting, section outline/navigation, code-lens cell and section runners, sidebar (sessions / last result / run history / logs / graphs), status bar, completions, conservative variable rename, diagnostics, MCP child-process spawn
496
+ - VS Code extension published to the Marketplace as [`brycewang-stanford.stata-code-vscode`](https://marketplace.visualstudio.com/items?itemName=brycewang-stanford.stata-code-vscode): syntax highlighting, section outline/navigation, code-lens cell and section runners, seven-view sidebar (sessions / last result / data variables / run history / logs / graphs / outputs), status bar, completions, conservative variable rename, diagnostics, MCP child-process spawn
429
497
  - Clean-room license policy ([LICENSE-POLICY.md](LICENSE-POLICY.md))
430
498
 
431
499
  ### Next Up
@@ -471,3 +539,36 @@ Stata is a registered trademark of StataCorp LLC. This project is independent an
471
539
  ## Acknowledgements
472
540
 
473
541
  The Stata tooling landscape that this project builds on and learns from is surveyed in [References-tools.md](References-tools.md). All listed projects retain their own licenses and authorship; please consult each repository before reuse.
542
+
543
+ ---
544
+
545
+ <div align="center">
546
+
547
+ <table>
548
+ <tr>
549
+ <td align="center">
550
+ <a href="https://copaper.ai"><img src="https://raw.githubusercontent.com/brycewang-stanford/stata-code/main/branding/partners/copaper-logo.png" alt="CoPaper.AI" width="200" /></a>
551
+ </td>
552
+ <td width="40"></td>
553
+ <td align="center">
554
+ <a href="https://sccei.fsi.stanford.edu/reap"><img src="https://raw.githubusercontent.com/brycewang-stanford/stata-code/main/branding/partners/stanford-reap-logo.png" alt="Stanford REAP" width="280" /></a>
555
+ </td>
556
+ </tr>
557
+ </table>
558
+
559
+ <table>
560
+ <tr>
561
+ <td align="center">
562
+ <a href="https://copaper.ai"><img src="https://raw.githubusercontent.com/brycewang-stanford/stata-code/main/branding/partners/copaper-qrcode.png" alt="Visit copaper.ai" width="160" /></a><br/>
563
+ <strong>Visit <a href="https://copaper.ai">copaper.ai</a></strong>
564
+ </td>
565
+ <td align="center">
566
+ <img src="https://raw.githubusercontent.com/brycewang-stanford/stata-code/main/branding/partners/copaper-wechat.jpg" alt="CoPaper.AI WeChat" width="160" /><br/>
567
+ <strong>WeChat: CoPaper.AI</strong>
568
+ </td>
569
+ </tr>
570
+ </table>
571
+
572
+ <sub>Maintained by <a href="https://copaper.ai"><strong>CoPaper.AI</strong></a>, incubated at <a href="https://sccei.fsi.stanford.edu/reap"><strong>Stanford REAP / SCCEI</strong></a> · AI Assistant for Empirical Research</sub>
573
+
574
+ </div>
@@ -0,0 +1,99 @@
1
+ # stata-code Industry Leadership Roadmap
2
+
3
+ This roadmap translates the June 2026 empirical-research MCP landscape into
4
+ work that fits `stata-code`'s architecture. The project should win by being the
5
+ most reliable agent-native Stata execution and audit layer for empirical
6
+ economists, not by becoming a grab-bag data platform or a second R/Python
7
+ runtime.
8
+
9
+ ## North Star
10
+
11
+ `stata-code` should be the default way an AI agent runs, inspects, repairs, and
12
+ audits Stata work:
13
+
14
+ - one execution core across Python, MCP, Jupyter, and VS Code;
15
+ - stable `RunResult` schema with typed errors and native `r()` / `e()` values;
16
+ - token-efficient logs, graphs, matrices, and run bundles;
17
+ - economist-facing workflows for DiD, IV, RDD, tables, data handoff, and
18
+ cross-package verification.
19
+
20
+ ## Product Pillars
21
+
22
+ 1. **Reliable execution contract.** Keep `SCHEMA.md` load-bearing. Agents
23
+ branch on `ok`, `error.kind`, `results.e`, refs, and run manifests instead
24
+ of parsing log prose.
25
+ 2. **Econometrics workflow intelligence.** Ship concise skill references and
26
+ prompts that know the Stata commands economists actually use: `csdid`,
27
+ `did_imputation`, `eventstudyinteract`, `rdrobust`, `ivreg2`,
28
+ `ivreghdfe`, `boottest`, `esttab`, `collect`, and related packages.
29
+ 3. **Cross-stack parity audits.** Treat R/Python/Stata disagreement as a first
30
+ class research risk. `stata-code` should run the Stata leg and define the
31
+ comparison protocol without pretending to own the R or Python runtimes.
32
+ 4. **Data-MCP handoff.** External MCP servers can discover and fetch official
33
+ data. `stata-code` should document and validate the handoff into Stata:
34
+ source metadata, stable raw files, key checks, and reproducible imports.
35
+ 5. **Editor and artifact ergonomics.** VS Code should make sessions, graphs,
36
+ logs, tables, data previews, and run bundles easy to inspect without hiding
37
+ the underlying structured result.
38
+ 6. **Distribution confidence.** Install and runtime checks should be easy to
39
+ verify without mutating user config. Prefer `doctor`/`verify` diagnostics
40
+ before any automatic config writer.
41
+
42
+ ## Scope Boundaries
43
+
44
+ `stata-code` should not directly bundle data-provider APIs, R sessions, Python
45
+ causal libraries, or paid services. Those are separate tools. The durable
46
+ boundary is: external data/model tools produce files or results; `stata-code`
47
+ executes and audits the Stata side with traceable artifacts.
48
+
49
+ ## One-Month Execution Plan
50
+
51
+ ### Week 1: Workflow Layer
52
+
53
+ - Add cross-agent coordination and this roadmap.
54
+ - Expand the skill reference library for modern DiD, IV/weak-IV, RDD,
55
+ table-export, data-MCP handoff, and parity audits.
56
+ - Add examples that show how agents should use the workflows without claiming
57
+ unsupported automation.
58
+ - Add MCP prompts for parity audit planning, data-MCP-to-Stata handoff, and
59
+ turnkey method templates for DiD/event study, IV/2SLS, RDD, and publication
60
+ tables.
61
+ - Validate with skill packaging tests, MCP prompt tests, and markdown hygiene.
62
+
63
+ ### Week 2: Diagnostics and Setup Confidence
64
+
65
+ - Ship a read-only `stata-code doctor` / `verify` command that reports Python,
66
+ `stata-code`, MCP extras, `pystata` discovery, Stata version/edition, PATH
67
+ resolution, and common client config hints.
68
+ - Keep config writing out of scope until backups and dry-run behavior exist.
69
+ - Add tests for missing `pystata`, missing MCP extra, path mismatch, and JSON
70
+ output.
71
+
72
+ ### Week 3: VS Code and Artifacts
73
+
74
+ - Improve dataset preview from first-100 text output toward a paged/filterable
75
+ view or a clearly documented intermediate step.
76
+ - Surface table/export artifacts from run bundles more explicitly.
77
+ - Add tests around formatter and tree-provider behavior before broad UI work.
78
+
79
+ ### Week 4: Release Quality
80
+
81
+ - Sweep README.md, README.zh.md, vscode/README.md, CHANGELOG.md, examples,
82
+ and skill docs for drift.
83
+ - Run release-relevant checks: version guard, schema export, skill zip build,
84
+ MCP tests, core tests that do not require Stata, and VS Code compile/tests if
85
+ touched.
86
+ - Prepare release notes that separate shipped features from roadmap items.
87
+
88
+ ## Success Criteria
89
+
90
+ - Agents can find a documented path for the top empirical workflows without
91
+ loading the whole reference library.
92
+ - Parity audits preserve sample definitions, package versions, estimator
93
+ defaults, failure/refusal behavior, and numeric tolerances.
94
+ - Data pulled by external MCP servers enters Stata through a reproducible raw
95
+ file plus metadata handoff, not through unstated browser-copy steps.
96
+ - User-facing docs explain that `stata-code` runs Stata and coordinates with
97
+ other MCP tools; they do not imply that it directly runs R/Python or hosts
98
+ official data APIs.
99
+ - All changed surfaces have targeted validation evidence before handoff.