stata-code 0.7.1__tar.gz → 0.8.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. {stata_code-0.7.1 → stata_code-0.8.0}/CHANGELOG.md +54 -1
  2. {stata_code-0.7.1 → stata_code-0.8.0}/PKG-INFO +63 -10
  3. {stata_code-0.7.1 → stata_code-0.8.0}/README.md +62 -9
  4. stata_code-0.8.0/docs/industry-leader-roadmap.md +99 -0
  5. stata_code-0.8.0/examples/06-cross-stack-parity-audit.md +101 -0
  6. stata_code-0.8.0/examples/07-data-mcp-handoff.md +77 -0
  7. {stata_code-0.7.1 → stata_code-0.8.0}/examples/README.md +2 -0
  8. {stata_code-0.7.1 → stata_code-0.8.0}/pyproject.toml +2 -1
  9. stata_code-0.8.0/scripts/build_skill_zip.py +105 -0
  10. stata_code-0.8.0/scripts/check_github_actions.py +96 -0
  11. {stata_code-0.7.1 → stata_code-0.8.0}/stata_code/__init__.py +1 -1
  12. stata_code-0.8.0/stata_code/__main__.py +6 -0
  13. stata_code-0.8.0/stata_code/cli.py +73 -0
  14. {stata_code-0.7.1 → stata_code-0.8.0}/stata_code/core/runner.py +101 -0
  15. stata_code-0.8.0/stata_code/doctor.py +285 -0
  16. {stata_code-0.7.1 → stata_code-0.8.0}/stata_code/mcp/server.py +833 -1
  17. stata_code-0.8.0/tests/test_doctor.py +148 -0
  18. stata_code-0.8.0/tests/test_github_actions.py +52 -0
  19. {stata_code-0.7.1 → stata_code-0.8.0}/tests/test_mcp.py +133 -0
  20. stata_code-0.8.0/tests/test_method_prompts.py +167 -0
  21. stata_code-0.8.0/tests/test_new_tools.py +472 -0
  22. stata_code-0.8.0/tests/test_skill_package.py +75 -0
  23. {stata_code-0.7.1 → stata_code-0.8.0}/.gitignore +0 -0
  24. {stata_code-0.7.1 → stata_code-0.8.0}/LICENSE +0 -0
  25. {stata_code-0.7.1 → stata_code-0.8.0}/LICENSE-POLICY.md +0 -0
  26. {stata_code-0.7.1 → stata_code-0.8.0}/PUBLISHING.md +0 -0
  27. {stata_code-0.7.1 → stata_code-0.8.0}/SCHEMA.md +0 -0
  28. {stata_code-0.7.1 → stata_code-0.8.0}/docs/design/hard_timeout.md +0 -0
  29. {stata_code-0.7.1 → stata_code-0.8.0}/examples/01-basic-regression.md +0 -0
  30. {stata_code-0.7.1 → stata_code-0.8.0}/examples/02-did-card-krueger.md +0 -0
  31. {stata_code-0.7.1 → stata_code-0.8.0}/examples/03-graphs.md +0 -0
  32. {stata_code-0.7.1 → stata_code-0.8.0}/examples/04-multi-session.md +0 -0
  33. {stata_code-0.7.1 → stata_code-0.8.0}/examples/05-large-matrix.md +0 -0
  34. {stata_code-0.7.1 → stata_code-0.8.0}/schema/run_result.schema.json +0 -0
  35. {stata_code-0.7.1 → stata_code-0.8.0}/scripts/check_versions.py +0 -0
  36. {stata_code-0.7.1 → stata_code-0.8.0}/scripts/export_schema.py +0 -0
  37. {stata_code-0.7.1 → stata_code-0.8.0}/stata_code/core/__init__.py +0 -0
  38. {stata_code-0.7.1 → stata_code-0.8.0}/stata_code/core/_pool.py +0 -0
  39. {stata_code-0.7.1 → stata_code-0.8.0}/stata_code/core/_refs.py +0 -0
  40. {stata_code-0.7.1 → stata_code-0.8.0}/stata_code/core/_runtime.py +0 -0
  41. {stata_code-0.7.1 → stata_code-0.8.0}/stata_code/core/errors.py +0 -0
  42. {stata_code-0.7.1 → stata_code-0.8.0}/stata_code/core/log_artifacts.py +0 -0
  43. {stata_code-0.7.1 → stata_code-0.8.0}/stata_code/core/notebook.py +0 -0
  44. {stata_code-0.7.1 → stata_code-0.8.0}/stata_code/core/run_index.py +0 -0
  45. {stata_code-0.7.1 → stata_code-0.8.0}/stata_code/core/schema.py +0 -0
  46. {stata_code-0.7.1 → stata_code-0.8.0}/stata_code/kernel/__init__.py +0 -0
  47. {stata_code-0.7.1 → stata_code-0.8.0}/stata_code/kernel/__main__.py +0 -0
  48. {stata_code-0.7.1 → stata_code-0.8.0}/stata_code/kernel/assets/logo-32x32.png +0 -0
  49. {stata_code-0.7.1 → stata_code-0.8.0}/stata_code/kernel/assets/logo-64x64.png +0 -0
  50. {stata_code-0.7.1 → stata_code-0.8.0}/stata_code/kernel/assets/logo-svg.svg +0 -0
  51. {stata_code-0.7.1 → stata_code-0.8.0}/stata_code/kernel/kernel.py +0 -0
  52. {stata_code-0.7.1 → stata_code-0.8.0}/stata_code/mcp/__init__.py +0 -0
  53. {stata_code-0.7.1 → stata_code-0.8.0}/stata_code/mcp/__main__.py +0 -0
  54. {stata_code-0.7.1 → stata_code-0.8.0}/tests/__init__.py +0 -0
  55. {stata_code-0.7.1 → stata_code-0.8.0}/tests/conftest.py +0 -0
  56. {stata_code-0.7.1 → stata_code-0.8.0}/tests/fixtures/.gitkeep +0 -0
  57. {stata_code-0.7.1 → stata_code-0.8.0}/tests/test_cancel.py +0 -0
  58. {stata_code-0.7.1 → stata_code-0.8.0}/tests/test_errors.py +0 -0
  59. {stata_code-0.7.1 → stata_code-0.8.0}/tests/test_kernel.py +0 -0
  60. {stata_code-0.7.1 → stata_code-0.8.0}/tests/test_log_artifacts.py +0 -0
  61. {stata_code-0.7.1 → stata_code-0.8.0}/tests/test_mcp_stdio.py +0 -0
  62. {stata_code-0.7.1 → stata_code-0.8.0}/tests/test_notebook.py +0 -0
  63. {stata_code-0.7.1 → stata_code-0.8.0}/tests/test_notebook_phase2.py +0 -0
  64. {stata_code-0.7.1 → stata_code-0.8.0}/tests/test_pool.py +0 -0
  65. {stata_code-0.7.1 → stata_code-0.8.0}/tests/test_public_api.py +0 -0
  66. {stata_code-0.7.1 → stata_code-0.8.0}/tests/test_release_versions.py +0 -0
  67. {stata_code-0.7.1 → stata_code-0.8.0}/tests/test_run_index.py +0 -0
  68. {stata_code-0.7.1 → stata_code-0.8.0}/tests/test_runner.py +0 -0
  69. {stata_code-0.7.1 → stata_code-0.8.0}/tests/test_runtime_discovery.py +0 -0
  70. {stata_code-0.7.1 → stata_code-0.8.0}/tests/test_schema.py +0 -0
  71. {stata_code-0.7.1 → stata_code-0.8.0}/tests/test_schema_artifact.py +0 -0
@@ -4,7 +4,60 @@ All notable changes to `stata-code` are documented here. The format follows
4
4
  [Keep a Changelog](https://keepachangelog.com/en/1.1.0/); the project adheres
5
5
  to semver-major.minor for the result schema (see `SCHEMA.md` §6).
6
6
 
7
- ## Unreleased
7
+ ## 0.8.0 — 2026-06-20
8
+
9
+ ### Added
10
+
11
+ - **Economist workflow coordination and roadmap.** Added
12
+ `AGENT_COORDINATION.md` for concurrent-agent lanes and
13
+ `docs/industry-leader-roadmap.md` for the one-month product plan: workflow
14
+ intelligence, parity audits, data-MCP handoff, editor/artifact polish, and
15
+ distribution diagnostics.
16
+ - **Cross-stack and data-MCP workflow references.** The `stata-code` skill now
17
+ includes `references/parity-audit.md` and
18
+ `references/data-mcp-handoff.md`, plus cookbook examples for cross-stack
19
+ parity audits and external-data-MCP handoff into Stata.
20
+ - **Modern empirical-economics package notes.** Added package references for
21
+ `csdid`, `drdid`, `did_imputation`, `eventstudyinteract`,
22
+ `did_multiplegt_dyn`, `rdrobust`, `ivreg2`, `ivreghdfe`, `boottest`, and
23
+ `outreg2`, and wired them into the skill routing table.
24
+ - **MCP prompt discoverability for economist workflows.** Added
25
+ `plan_cross_stack_parity_audit`, `data_mcp_to_stata_handoff`,
26
+ `did_event_study`, `iv_2sls`, `rdd`, `publication_table`, and
27
+ `cross_validate_did` prompts so clients can discover the new protocols and
28
+ turnkey empirical recipes directly through MCP.
29
+ - **Read-only installation diagnostics.** Added the top-level `stata-code`
30
+ console script with `doctor` / `verify` commands. The diagnostic reports
31
+ package/Python version, MCP and kernel extras, `pystata` discovery, console
32
+ scripts on `PATH`, client/VS Code hints, and an optional live Stata
33
+ version/edition probe without mutating user configuration.
34
+
35
+ ## 0.7.2 — 2026-06-20
36
+
37
+ ### Added
38
+
39
+ - **Three convenience MCP tools** raise the tool surface from 15 to 18:
40
+ - `install_package(name, source?, url?, replace?, session_id?)` — installs a
41
+ community package via `ssc install` / `net install` without the agent
42
+ having to remember the syntax, then verifies it resolves with `which`.
43
+ Package names and URLs are validated to keep them out of the generated
44
+ command line; failures surface the typed `error` block (e.g. `network`).
45
+ - `search_log(ref, pattern, is_regex?, ignore_case?, context?, max_matches?)`
46
+ — greps within a truncated `log://` payload and returns only the matching
47
+ lines (with optional context), so a long log can be inspected without
48
+ pulling the whole transcript back through `get_log`.
49
+ - `inspect_data(varlist?, detail?, session_id?)` — runs `describe` +
50
+ `codebook` and returns the structured `dataset` block plus the codebook
51
+ log: a one-call "what's in this dataset" the agent doesn't have to spell out.
52
+ - **On-demand Stata reference library** under `skills/stata-code/references/`
53
+ (~4,200 lines): topic files for core syntax, data management, econometrics,
54
+ causal inference, panel/time series, graphics, and table export; load-bearing
55
+ `error-codes.md` (the full `rc → kind → fix` table + self-repair loop, aligned
56
+ with the typed-error taxonomy) and `defensive-coding.md`; and per-package notes
57
+ for `reghdfe`, `coefplot`, `estout`, and `gtools`. `SKILL.md` gained a routing
58
+ table (read 1–3 files on demand) and a live-vs-offline execution-mode section.
59
+ - **`scripts/build_skill_zip.py`** packages the skill into a deterministic
60
+ `build/stata-code-skill.zip` for upload as Claude.ai project knowledge.
8
61
 
9
62
  ## 0.7.1 — 2026-06-19
10
63
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: stata-code
3
- Version: 0.7.1
3
+ Version: 0.8.0
4
4
  Summary: Agent-native Stata bridge — one core, multiple frontends (MCP, Jupyter, VSCode)
5
5
  Project-URL: Homepage, https://github.com/brycewang-stanford/stata-code
6
6
  Project-URL: Repository, https://github.com/brycewang-stanford/stata-code
@@ -67,6 +67,22 @@ Description-Content-Type: text/markdown
67
67
 
68
68
  `stata-code` lets you drive Stata from modern environments: an LLM agent (Claude Code, Cursor, Claude Desktop), a Jupyter notebook, or a VS Code editor session. All frontends share one Python core and return a stable, structured, **agent-friendly** result schema.
69
69
 
70
+ **For empirical economists.** Drive Stata in plain language: run **DiD, IV, RDD, and publication-ready `esttab` tables in one conversation** — then cross-check each estimate across Stata and Python so you only trust results that *agree* (the Cunningham cross-package robustness check).
71
+
72
+ **Try it in 60 seconds** with [Claude Code](https://github.com/anthropics/claude-code) — no global install needed:
73
+
74
+ ```bash
75
+ claude mcp add stata-code --scope user -- uvx --from "stata-code[mcp]" stata-code-mcp
76
+ ```
77
+
78
+ Then just ask:
79
+
80
+ > *"Using `data/cfps_panel.dta`, run a two-way fixed-effects regression of monthly wage on the treatment (controls: `age age2 edu industry`), then test heterogeneous effects with Callaway-Sant'Anna, and export an `esttab` table."*
81
+
82
+ `stata-code` writes the do-file, runs it, returns the table, and interprets the result — and can re-estimate the same ATT with [StatsPAI](https://github.com/brycewang-stanford/StatsPAI) to confirm the two stacks agree. These workflows ship as one-call MCP prompts (`did_event_study`, `iv_2sls`, `rdd`, `publication_table`, `cross_validate_did`) backed by an on-demand [recipe library](skills/stata-code/references/recipes/).
83
+
84
+ **Why `stata-code`:** MIT-licensed · ships as an MCP server, a bundled agent skill, a Jupyter kernel, **and** a VS Code extension · one structured, token-economy result schema (typed errors, native `r()` / `e()`) · cross-stack validation with StatsPAI for the Cunningham check.
85
+
70
86
  ```text
71
87
  ┌────────────────────────────────────────┐
72
88
  │ stata-code core (Python) │
@@ -84,12 +100,18 @@ Description-Content-Type: text/markdown
84
100
  └─────────────┘ └────────────┘ └─────────────────┘
85
101
  ```
86
102
 
87
- **Status: v0.7 (May 2026)** — the core, MCP server, Jupyter kernel, and VS Code extension work end-to-end against Stata 18 MP. The test suite covers schema, runner, MCP, kernel, notebook, run-index, subprocess-pool, and VS Code modules; CI also checks linting, type safety, schema generation, package metadata, and VSIX packaging. License: **MIT**.
103
+ **Status: v0.8 (June 2026)** — the core, MCP server, Jupyter kernel, and VS Code extension work end-to-end against Stata 18 MP. The test suite covers schema, runner, MCP, kernel, notebook, run-index, subprocess-pool, and VS Code modules; CI also checks linting, type safety, schema generation, package metadata, and VSIX packaging. License: **MIT**.
88
104
 
89
- Two workflows the current release explicitly supports for end users:
105
+ Three workflows the current tree explicitly supports for end users and agents:
90
106
 
91
107
  - **Run Stata code from a Jupyter notebook.** `pip install "stata-code[kernel]"` + `stata-code-kernel install --user` registers a **Stata** kernel that the Jupyter Notebook UI, JupyterLab, and the VS Code Jupyter extension all pick up by name. Cells render Stata logs, graphs, and warnings inline (the kernel logo bundled since v0.5 makes it appear in VS Code's kernel picker too). See [As a Jupyter Kernel](#as-a-jupyter-kernel).
92
108
  - **Optional agent "fix and rerun" loop.** `stata_run` returns typed `error.kind/line/context` plus `suggestions` on every failure. By default Claude Code only reports diagnostics — but if you explicitly say "fix this and rerun until it passes", the agent uses the same fields to edit your `.do` file and re-call `stata_run` until the run is green. The repair loop is **opt-in**: failed runs are diagnostics first, not automatic rewrite permission. See [Error Recovery in Agent Workflows](#error-recovery-in-agent-workflows).
109
+ - **Economist workflow guides.** The bundled skill and cookbook now cover
110
+ modern DiD, IV/weak-IV, RDD, table export, data-MCP handoff, and
111
+ cross-stack parity audits. `stata-code` runs and audits the Stata leg; R,
112
+ Python, and official data MCPs remain separate tools with explicit handoff
113
+ files and source metadata. See [`skills/stata-code/references/`](skills/stata-code/references/)
114
+ and [`examples/`](examples/).
93
115
 
94
116
  ---
95
117
 
@@ -138,6 +160,19 @@ pip install -e ".[mcp,kernel]"
138
160
 
139
161
  Note: `pystata` is **not** on PyPI; it ships with Stata. `stata-code` auto-discovers it on macOS at `/Applications/Stata/utilities/pystata` and at equivalent Linux / Windows paths. If your install is elsewhere, add it to `PYTHONPATH` before importing.
140
162
 
163
+ Verify the local setup with the read-only doctor:
164
+
165
+ ```bash
166
+ stata-code doctor
167
+ stata-code doctor --json # machine-readable output
168
+ stata-code doctor --no-stata-probe # skip live Stata initialization
169
+ ```
170
+
171
+ The doctor reports the package/Python version, MCP and Jupyter extras, `pystata`
172
+ discovery, console scripts on `PATH`, client/VS Code configuration hints, and a
173
+ best-effort Stata version/edition probe. It never edits shell, Stata, Claude, or
174
+ VS Code config.
175
+
141
176
  ---
142
177
 
143
178
  ## Quick Start
@@ -188,7 +223,7 @@ claude mcp add stata-code --scope local -- stata-code-mcp
188
223
  claude mcp add stata-code --scope project -- stata-code-mcp
189
224
  ```
190
225
 
191
- Then launch `claude` and type `/mcp` to confirm `stata-code` shows up with its 15 tools (`stata_run`, `stata_info`, `get_log`, `get_graph`, `get_matrix`, `list_sessions`, `cancel_session`, `reset_session`, `notebook_outline`, `notebook_get_cell`, `notebook_locate`, `notebook_edit_cell`, `notebook_insert_cell`, `notebook_delete_cell`, `list_runs`).
226
+ Then launch `claude` and type `/mcp` to confirm `stata-code` shows up with its 18 tools (`stata_run`, `stata_info`, `get_log`, `search_log`, `get_graph`, `get_matrix`, `inspect_data`, `install_package`, `list_sessions`, `cancel_session`, `reset_session`, `notebook_outline`, `notebook_get_cell`, `notebook_locate`, `notebook_edit_cell`, `notebook_insert_cell`, `notebook_delete_cell`, `list_runs`).
192
227
 
193
228
  #### Error Recovery in Agent Workflows
194
229
 
@@ -276,15 +311,18 @@ If an OpenAI-backed client reports `API Error: 400 Invalid schema for function
276
311
  upgrade to `stata-code>=0.6.5`, then restart the MCP client. Older server
277
312
  processes keep advertising the stale schema until they are restarted.
278
313
 
279
- The MCP server registers 15 tools:
314
+ The MCP server registers 18 tools:
280
315
 
281
316
  | Tool | Purpose |
282
317
  | --- | --- |
283
318
  | `stata_run` | Execute Stata code and return a v1.0 RunResult JSON |
284
319
  | `stata_info` | Report Stata edition, version, and capabilities |
285
320
  | `get_log` | Fetch the full log behind a `log://` ref |
321
+ | `search_log` | Search matching lines inside a stored `log://` payload |
286
322
  | `get_graph` | Fetch graph bytes behind a `graph://` ref (`ImageContent`) |
287
323
  | `get_matrix` | Fetch matrix payloads behind a `matrix://` ref |
324
+ | `inspect_data` | Run `describe` + `codebook` and return compact dataset metadata |
325
+ | `install_package` | Install an SSC or explicit `net install` package and verify it resolves |
288
326
  | `list_sessions` | Enumerate live sessions |
289
327
  | `cancel_session` | Cancel a session; the subprocess-backed path terminates in-flight runs and short-circuits pending ones |
290
328
  | `reset_session` | Drop a session's data |
@@ -312,8 +350,11 @@ resources:
312
350
 
313
351
  MCP prompts are available for common agent workflows:
314
352
  `run_do_file_and_report`, `debug_stata_error`,
315
- `fix_and_rerun_until_passes`, `replication_audit`, and
316
- `summarize_estimation_results`.
353
+ `fix_and_rerun_until_passes`, `replication_audit`,
354
+ `plan_cross_stack_parity_audit`, `data_mcp_to_stata_handoff`,
355
+ `summarize_estimation_results`, `run_notebook_cell_and_report`,
356
+ `fix_and_rerun_notebook_cell`, `did_event_study`, `iv_2sls`, `rdd`,
357
+ `publication_table`, and `cross_validate_did`.
317
358
 
318
359
  ### As a Jupyter Kernel
319
360
 
@@ -355,6 +396,12 @@ Or open the **Extensions** sidebar in VS Code and search `stata-code`. The exten
355
396
 
356
397
  On first activation the extension probes for `stata-code-mcp` on `PATH` (and in any workspace `.venv` / `venv`). If nothing resolves, it shows a one-time install hint with the exact `pip install "stata-code[mcp]"` command — choose **Don't show again** to silence it for the installed extension version.
357
398
 
399
+ If the extension or an MCP client cannot find the server, run
400
+ `stata-code doctor --no-stata-probe` in the same Python environment. It reports
401
+ whether `stata-code-mcp` is on `PATH` and suggests absolute-path or
402
+ `python -m stata_code.mcp` fallbacks for GUI clients whose `PATH` differs from
403
+ your shell.
404
+
358
405
  #### Cell and section conventions
359
406
 
360
407
  The extension recognizes two complementary structural markers inside `.do` files. Either can be mixed in the same file; they do not conflict.
@@ -416,7 +463,7 @@ stata_code/
416
463
  │ ├── runner.py # in-process execute(); collects everything via sfi
417
464
  │ └── _pool.py # subprocess workers for public API / MCP hard timeouts
418
465
  ├── mcp/
419
- │ └── server.py # MCP server (15 tools)
466
+ │ └── server.py # MCP server (18 tools)
420
467
  └── kernel/
421
468
  └── kernel.py # Jupyter kernel
422
469
  ```
@@ -444,7 +491,7 @@ stata_code/
444
491
 
445
492
  ## Roadmap
446
493
 
447
- ### Done (through v0.7 — May 2026)
494
+ ### Done (current tree)
448
495
 
449
496
  - v1.0 result schema ([SCHEMA.md](SCHEMA.md))
450
497
  - `pystata`-based runner with native-typed `r()`, `e()`, and matrices
@@ -454,12 +501,18 @@ stata_code/
454
501
  - Log truncation with ref store
455
502
  - Warning extraction: 5 categories + generic notes
456
503
  - 32-kind error taxonomy with canonical suggestions
457
- - MCP server: 15 tools, including notebook navigation / search / atomic edits and the run-bundle index (`list_runs`)
504
+ - MCP server: 18 tools, including notebook navigation / search / atomic edits, the run-bundle index (`list_runs`), log grep (`search_log`), dataset inspection (`inspect_data`), and package installation (`install_package`)
458
505
  - Jupyter kernel: rewired to the v1.0 pipeline, kernel logos bundled
459
506
  - Matrix size cap + `get_matrix(ref)` for large matrices (>10k cells)
460
507
  - Subprocess-backed hard timeout and cancellation for the public Python API and MCP server: `timeout_ms`, `cancel(session_id)`, and MCP `cancel_session`
461
508
  - Per-cell repair loop on `.ipynb` via `notebook_outline` / `notebook_get_cell` / `notebook_edit_cell` with optimistic-concurrency `expected_source` guards and `origin_cell_id` echo on `RunResult`
462
509
  - Persistent run bundles + `list_runs` query over `manifest.json` files (filter by cell / origin / session / since / ok; page with limit / offset)
510
+ - Read-only `stata-code doctor` / `verify` diagnostics for package version,
511
+ extras, `pystata` discovery, console scripts, client hints, and optional live
512
+ Stata version probing
513
+ - Economist workflow layer: skill references and examples for modern DiD,
514
+ IV/weak-IV, RDD, table export, data-MCP handoff, and cross-stack parity
515
+ audits
463
516
  - JSON Schema artifact auto-generated from `schema.py`: [`schema/run_result.schema.json`](schema/run_result.schema.json)
464
517
  - VS Code extension published to the Marketplace as [`brycewang-stanford.stata-code-vscode`](https://marketplace.visualstudio.com/items?itemName=brycewang-stanford.stata-code-vscode): syntax highlighting, section outline/navigation, code-lens cell and section runners, sidebar (sessions / last result / run history / logs / graphs), status bar, completions, conservative variable rename, diagnostics, MCP child-process spawn
465
518
  - Clean-room license policy ([LICENSE-POLICY.md](LICENSE-POLICY.md))
@@ -28,6 +28,22 @@
28
28
 
29
29
  `stata-code` lets you drive Stata from modern environments: an LLM agent (Claude Code, Cursor, Claude Desktop), a Jupyter notebook, or a VS Code editor session. All frontends share one Python core and return a stable, structured, **agent-friendly** result schema.
30
30
 
31
+ **For empirical economists.** Drive Stata in plain language: run **DiD, IV, RDD, and publication-ready `esttab` tables in one conversation** — then cross-check each estimate across Stata and Python so you only trust results that *agree* (the Cunningham cross-package robustness check).
32
+
33
+ **Try it in 60 seconds** with [Claude Code](https://github.com/anthropics/claude-code) — no global install needed:
34
+
35
+ ```bash
36
+ claude mcp add stata-code --scope user -- uvx --from "stata-code[mcp]" stata-code-mcp
37
+ ```
38
+
39
+ Then just ask:
40
+
41
+ > *"Using `data/cfps_panel.dta`, run a two-way fixed-effects regression of monthly wage on the treatment (controls: `age age2 edu industry`), then test heterogeneous effects with Callaway-Sant'Anna, and export an `esttab` table."*
42
+
43
+ `stata-code` writes the do-file, runs it, returns the table, and interprets the result — and can re-estimate the same ATT with [StatsPAI](https://github.com/brycewang-stanford/StatsPAI) to confirm the two stacks agree. These workflows ship as one-call MCP prompts (`did_event_study`, `iv_2sls`, `rdd`, `publication_table`, `cross_validate_did`) backed by an on-demand [recipe library](skills/stata-code/references/recipes/).
44
+
45
+ **Why `stata-code`:** MIT-licensed · ships as an MCP server, a bundled agent skill, a Jupyter kernel, **and** a VS Code extension · one structured, token-economy result schema (typed errors, native `r()` / `e()`) · cross-stack validation with StatsPAI for the Cunningham check.
46
+
31
47
  ```text
32
48
  ┌────────────────────────────────────────┐
33
49
  │ stata-code core (Python) │
@@ -45,12 +61,18 @@
45
61
  └─────────────┘ └────────────┘ └─────────────────┘
46
62
  ```
47
63
 
48
- **Status: v0.7 (May 2026)** — the core, MCP server, Jupyter kernel, and VS Code extension work end-to-end against Stata 18 MP. The test suite covers schema, runner, MCP, kernel, notebook, run-index, subprocess-pool, and VS Code modules; CI also checks linting, type safety, schema generation, package metadata, and VSIX packaging. License: **MIT**.
64
+ **Status: v0.8 (June 2026)** — the core, MCP server, Jupyter kernel, and VS Code extension work end-to-end against Stata 18 MP. The test suite covers schema, runner, MCP, kernel, notebook, run-index, subprocess-pool, and VS Code modules; CI also checks linting, type safety, schema generation, package metadata, and VSIX packaging. License: **MIT**.
49
65
 
50
- Two workflows the current release explicitly supports for end users:
66
+ Three workflows the current tree explicitly supports for end users and agents:
51
67
 
52
68
  - **Run Stata code from a Jupyter notebook.** `pip install "stata-code[kernel]"` + `stata-code-kernel install --user` registers a **Stata** kernel that the Jupyter Notebook UI, JupyterLab, and the VS Code Jupyter extension all pick up by name. Cells render Stata logs, graphs, and warnings inline (the kernel logo bundled since v0.5 makes it appear in VS Code's kernel picker too). See [As a Jupyter Kernel](#as-a-jupyter-kernel).
53
69
  - **Optional agent "fix and rerun" loop.** `stata_run` returns typed `error.kind/line/context` plus `suggestions` on every failure. By default Claude Code only reports diagnostics — but if you explicitly say "fix this and rerun until it passes", the agent uses the same fields to edit your `.do` file and re-call `stata_run` until the run is green. The repair loop is **opt-in**: failed runs are diagnostics first, not automatic rewrite permission. See [Error Recovery in Agent Workflows](#error-recovery-in-agent-workflows).
70
+ - **Economist workflow guides.** The bundled skill and cookbook now cover
71
+ modern DiD, IV/weak-IV, RDD, table export, data-MCP handoff, and
72
+ cross-stack parity audits. `stata-code` runs and audits the Stata leg; R,
73
+ Python, and official data MCPs remain separate tools with explicit handoff
74
+ files and source metadata. See [`skills/stata-code/references/`](skills/stata-code/references/)
75
+ and [`examples/`](examples/).
54
76
 
55
77
  ---
56
78
 
@@ -99,6 +121,19 @@ pip install -e ".[mcp,kernel]"
99
121
 
100
122
  Note: `pystata` is **not** on PyPI; it ships with Stata. `stata-code` auto-discovers it on macOS at `/Applications/Stata/utilities/pystata` and at equivalent Linux / Windows paths. If your install is elsewhere, add it to `PYTHONPATH` before importing.
101
123
 
124
+ Verify the local setup with the read-only doctor:
125
+
126
+ ```bash
127
+ stata-code doctor
128
+ stata-code doctor --json # machine-readable output
129
+ stata-code doctor --no-stata-probe # skip live Stata initialization
130
+ ```
131
+
132
+ The doctor reports the package/Python version, MCP and Jupyter extras, `pystata`
133
+ discovery, console scripts on `PATH`, client/VS Code configuration hints, and a
134
+ best-effort Stata version/edition probe. It never edits shell, Stata, Claude, or
135
+ VS Code config.
136
+
102
137
  ---
103
138
 
104
139
  ## Quick Start
@@ -149,7 +184,7 @@ claude mcp add stata-code --scope local -- stata-code-mcp
149
184
  claude mcp add stata-code --scope project -- stata-code-mcp
150
185
  ```
151
186
 
152
- Then launch `claude` and type `/mcp` to confirm `stata-code` shows up with its 15 tools (`stata_run`, `stata_info`, `get_log`, `get_graph`, `get_matrix`, `list_sessions`, `cancel_session`, `reset_session`, `notebook_outline`, `notebook_get_cell`, `notebook_locate`, `notebook_edit_cell`, `notebook_insert_cell`, `notebook_delete_cell`, `list_runs`).
187
+ Then launch `claude` and type `/mcp` to confirm `stata-code` shows up with its 18 tools (`stata_run`, `stata_info`, `get_log`, `search_log`, `get_graph`, `get_matrix`, `inspect_data`, `install_package`, `list_sessions`, `cancel_session`, `reset_session`, `notebook_outline`, `notebook_get_cell`, `notebook_locate`, `notebook_edit_cell`, `notebook_insert_cell`, `notebook_delete_cell`, `list_runs`).
153
188
 
154
189
  #### Error Recovery in Agent Workflows
155
190
 
@@ -237,15 +272,18 @@ If an OpenAI-backed client reports `API Error: 400 Invalid schema for function
237
272
  upgrade to `stata-code>=0.6.5`, then restart the MCP client. Older server
238
273
  processes keep advertising the stale schema until they are restarted.
239
274
 
240
- The MCP server registers 15 tools:
275
+ The MCP server registers 18 tools:
241
276
 
242
277
  | Tool | Purpose |
243
278
  | --- | --- |
244
279
  | `stata_run` | Execute Stata code and return a v1.0 RunResult JSON |
245
280
  | `stata_info` | Report Stata edition, version, and capabilities |
246
281
  | `get_log` | Fetch the full log behind a `log://` ref |
282
+ | `search_log` | Search matching lines inside a stored `log://` payload |
247
283
  | `get_graph` | Fetch graph bytes behind a `graph://` ref (`ImageContent`) |
248
284
  | `get_matrix` | Fetch matrix payloads behind a `matrix://` ref |
285
+ | `inspect_data` | Run `describe` + `codebook` and return compact dataset metadata |
286
+ | `install_package` | Install an SSC or explicit `net install` package and verify it resolves |
249
287
  | `list_sessions` | Enumerate live sessions |
250
288
  | `cancel_session` | Cancel a session; the subprocess-backed path terminates in-flight runs and short-circuits pending ones |
251
289
  | `reset_session` | Drop a session's data |
@@ -273,8 +311,11 @@ resources:
273
311
 
274
312
  MCP prompts are available for common agent workflows:
275
313
  `run_do_file_and_report`, `debug_stata_error`,
276
- `fix_and_rerun_until_passes`, `replication_audit`, and
277
- `summarize_estimation_results`.
314
+ `fix_and_rerun_until_passes`, `replication_audit`,
315
+ `plan_cross_stack_parity_audit`, `data_mcp_to_stata_handoff`,
316
+ `summarize_estimation_results`, `run_notebook_cell_and_report`,
317
+ `fix_and_rerun_notebook_cell`, `did_event_study`, `iv_2sls`, `rdd`,
318
+ `publication_table`, and `cross_validate_did`.
278
319
 
279
320
  ### As a Jupyter Kernel
280
321
 
@@ -316,6 +357,12 @@ Or open the **Extensions** sidebar in VS Code and search `stata-code`. The exten
316
357
 
317
358
  On first activation the extension probes for `stata-code-mcp` on `PATH` (and in any workspace `.venv` / `venv`). If nothing resolves, it shows a one-time install hint with the exact `pip install "stata-code[mcp]"` command — choose **Don't show again** to silence it for the installed extension version.
318
359
 
360
+ If the extension or an MCP client cannot find the server, run
361
+ `stata-code doctor --no-stata-probe` in the same Python environment. It reports
362
+ whether `stata-code-mcp` is on `PATH` and suggests absolute-path or
363
+ `python -m stata_code.mcp` fallbacks for GUI clients whose `PATH` differs from
364
+ your shell.
365
+
319
366
  #### Cell and section conventions
320
367
 
321
368
  The extension recognizes two complementary structural markers inside `.do` files. Either can be mixed in the same file; they do not conflict.
@@ -377,7 +424,7 @@ stata_code/
377
424
  │ ├── runner.py # in-process execute(); collects everything via sfi
378
425
  │ └── _pool.py # subprocess workers for public API / MCP hard timeouts
379
426
  ├── mcp/
380
- │ └── server.py # MCP server (15 tools)
427
+ │ └── server.py # MCP server (18 tools)
381
428
  └── kernel/
382
429
  └── kernel.py # Jupyter kernel
383
430
  ```
@@ -405,7 +452,7 @@ stata_code/
405
452
 
406
453
  ## Roadmap
407
454
 
408
- ### Done (through v0.7 — May 2026)
455
+ ### Done (current tree)
409
456
 
410
457
  - v1.0 result schema ([SCHEMA.md](SCHEMA.md))
411
458
  - `pystata`-based runner with native-typed `r()`, `e()`, and matrices
@@ -415,12 +462,18 @@ stata_code/
415
462
  - Log truncation with ref store
416
463
  - Warning extraction: 5 categories + generic notes
417
464
  - 32-kind error taxonomy with canonical suggestions
418
- - MCP server: 15 tools, including notebook navigation / search / atomic edits and the run-bundle index (`list_runs`)
465
+ - MCP server: 18 tools, including notebook navigation / search / atomic edits, the run-bundle index (`list_runs`), log grep (`search_log`), dataset inspection (`inspect_data`), and package installation (`install_package`)
419
466
  - Jupyter kernel: rewired to the v1.0 pipeline, kernel logos bundled
420
467
  - Matrix size cap + `get_matrix(ref)` for large matrices (>10k cells)
421
468
  - Subprocess-backed hard timeout and cancellation for the public Python API and MCP server: `timeout_ms`, `cancel(session_id)`, and MCP `cancel_session`
422
469
  - Per-cell repair loop on `.ipynb` via `notebook_outline` / `notebook_get_cell` / `notebook_edit_cell` with optimistic-concurrency `expected_source` guards and `origin_cell_id` echo on `RunResult`
423
470
  - Persistent run bundles + `list_runs` query over `manifest.json` files (filter by cell / origin / session / since / ok; page with limit / offset)
471
+ - Read-only `stata-code doctor` / `verify` diagnostics for package version,
472
+ extras, `pystata` discovery, console scripts, client hints, and optional live
473
+ Stata version probing
474
+ - Economist workflow layer: skill references and examples for modern DiD,
475
+ IV/weak-IV, RDD, table export, data-MCP handoff, and cross-stack parity
476
+ audits
424
477
  - JSON Schema artifact auto-generated from `schema.py`: [`schema/run_result.schema.json`](schema/run_result.schema.json)
425
478
  - VS Code extension published to the Marketplace as [`brycewang-stanford.stata-code-vscode`](https://marketplace.visualstudio.com/items?itemName=brycewang-stanford.stata-code-vscode): syntax highlighting, section outline/navigation, code-lens cell and section runners, sidebar (sessions / last result / run history / logs / graphs), status bar, completions, conservative variable rename, diagnostics, MCP child-process spawn
426
479
  - Clean-room license policy ([LICENSE-POLICY.md](LICENSE-POLICY.md))
@@ -0,0 +1,99 @@
1
+ # stata-code Industry Leadership Roadmap
2
+
3
+ This roadmap translates the June 2026 empirical-research MCP landscape into
4
+ work that fits `stata-code`'s architecture. The project should win by being the
5
+ most reliable agent-native Stata execution and audit layer for empirical
6
+ economists, not by becoming a grab-bag data platform or a second R/Python
7
+ runtime.
8
+
9
+ ## North Star
10
+
11
+ `stata-code` should be the default way an AI agent runs, inspects, repairs, and
12
+ audits Stata work:
13
+
14
+ - one execution core across Python, MCP, Jupyter, and VS Code;
15
+ - stable `RunResult` schema with typed errors and native `r()` / `e()` values;
16
+ - token-efficient logs, graphs, matrices, and run bundles;
17
+ - economist-facing workflows for DiD, IV, RDD, tables, data handoff, and
18
+ cross-package verification.
19
+
20
+ ## Product Pillars
21
+
22
+ 1. **Reliable execution contract.** Keep `SCHEMA.md` load-bearing. Agents
23
+ branch on `ok`, `error.kind`, `results.e`, refs, and run manifests instead
24
+ of parsing log prose.
25
+ 2. **Econometrics workflow intelligence.** Ship concise skill references and
26
+ prompts that know the Stata commands economists actually use: `csdid`,
27
+ `did_imputation`, `eventstudyinteract`, `rdrobust`, `ivreg2`,
28
+ `ivreghdfe`, `boottest`, `esttab`, `collect`, and related packages.
29
+ 3. **Cross-stack parity audits.** Treat R/Python/Stata disagreement as a first
30
+ class research risk. `stata-code` should run the Stata leg and define the
31
+ comparison protocol without pretending to own the R or Python runtimes.
32
+ 4. **Data-MCP handoff.** External MCP servers can discover and fetch official
33
+ data. `stata-code` should document and validate the handoff into Stata:
34
+ source metadata, stable raw files, key checks, and reproducible imports.
35
+ 5. **Editor and artifact ergonomics.** VS Code should make sessions, graphs,
36
+ logs, tables, data previews, and run bundles easy to inspect without hiding
37
+ the underlying structured result.
38
+ 6. **Distribution confidence.** Install and runtime checks should be easy to
39
+ verify without mutating user config. Prefer `doctor`/`verify` diagnostics
40
+ before any automatic config writer.
41
+
42
+ ## Scope Boundaries
43
+
44
+ `stata-code` should not directly bundle data-provider APIs, R sessions, Python
45
+ causal libraries, or paid services. Those are separate tools. The durable
46
+ boundary is: external data/model tools produce files or results; `stata-code`
47
+ executes and audits the Stata side with traceable artifacts.
48
+
49
+ ## One-Month Execution Plan
50
+
51
+ ### Week 1: Workflow Layer
52
+
53
+ - Add cross-agent coordination and this roadmap.
54
+ - Expand the skill reference library for modern DiD, IV/weak-IV, RDD,
55
+ table-export, data-MCP handoff, and parity audits.
56
+ - Add examples that show how agents should use the workflows without claiming
57
+ unsupported automation.
58
+ - Add MCP prompts for parity audit planning, data-MCP-to-Stata handoff, and
59
+ turnkey method templates for DiD/event study, IV/2SLS, RDD, and publication
60
+ tables.
61
+ - Validate with skill packaging tests, MCP prompt tests, and markdown hygiene.
62
+
63
+ ### Week 2: Diagnostics and Setup Confidence
64
+
65
+ - Ship a read-only `stata-code doctor` / `verify` command that reports Python,
66
+ `stata-code`, MCP extras, `pystata` discovery, Stata version/edition, PATH
67
+ resolution, and common client config hints.
68
+ - Keep config writing out of scope until backups and dry-run behavior exist.
69
+ - Add tests for missing `pystata`, missing MCP extra, path mismatch, and JSON
70
+ output.
71
+
72
+ ### Week 3: VS Code and Artifacts
73
+
74
+ - Improve dataset preview from first-100 text output toward a paged/filterable
75
+ view or a clearly documented intermediate step.
76
+ - Surface table/export artifacts from run bundles more explicitly.
77
+ - Add tests around formatter and tree-provider behavior before broad UI work.
78
+
79
+ ### Week 4: Release Quality
80
+
81
+ - Sweep README.md, README.zh.md, vscode/README.md, CHANGELOG.md, examples,
82
+ and skill docs for drift.
83
+ - Run release-relevant checks: version guard, schema export, skill zip build,
84
+ MCP tests, core tests that do not require Stata, and VS Code compile/tests if
85
+ touched.
86
+ - Prepare release notes that separate shipped features from roadmap items.
87
+
88
+ ## Success Criteria
89
+
90
+ - Agents can find a documented path for the top empirical workflows without
91
+ loading the whole reference library.
92
+ - Parity audits preserve sample definitions, package versions, estimator
93
+ defaults, failure/refusal behavior, and numeric tolerances.
94
+ - Data pulled by external MCP servers enters Stata through a reproducible raw
95
+ file plus metadata handoff, not through unstated browser-copy steps.
96
+ - User-facing docs explain that `stata-code` runs Stata and coordinates with
97
+ other MCP tools; they do not imply that it directly runs R/Python or hosts
98
+ official data APIs.
99
+ - All changed surfaces have targeted validation evidence before handoff.
@@ -0,0 +1,101 @@
1
+ # 06 — Cross-stack parity audit
2
+
3
+ > **Goal:** show how an agent should use `stata-code` for the Stata leg of a
4
+ > Stata/R/Python robustness audit without pretending that one tool owns every
5
+ > runtime.
6
+
7
+ This example is intentionally protocol-first. The exact R/Python calls depend
8
+ on which external MCP servers or local runtimes the user has installed. The
9
+ Stata leg is concrete and traceable through `stata_run`.
10
+
11
+ ## Step 1: freeze the common sample
12
+
13
+ **Agent calls:**
14
+
15
+ ```json
16
+ {
17
+ "tool": "stata_run",
18
+ "arguments": {
19
+ "code": "use data/panel.dta, clear\negen unit_id = group(firm_id), label\negen time_id = group(year), label\ngen byte audit_sample = !missing(y, first_treat, unit_id, time_id, x1, x2)\nkeep if audit_sample\nisid unit_id time_id\ncompress\ndatasignature set, reset\nsave data/derived/parity_sample.dta, replace\nexport delimited using data/derived/parity_sample.csv, replace",
20
+ "origin_path": "/abs/project/analysis/00_freeze_parity_sample.do",
21
+ "origin_kind": "file",
22
+ "persist_log_files": true
23
+ }
24
+ }
25
+ ```
26
+
27
+ **Agent reads:**
28
+
29
+ - `ok`, `rc`, and any typed error.
30
+ - `dataset.n_obs` and `dataset.n_vars`.
31
+ - `log.files.directory` for the run bundle.
32
+ - generated files copied into `outputs/` when persistence is enabled.
33
+
34
+ The CSV is the handoff file for R/Python tools. The DTA is the Stata source for
35
+ the Stata estimators. Do not let every package define its own missing-value
36
+ sample.
37
+
38
+ ## Step 2: run the Stata estimator
39
+
40
+ **Agent calls:**
41
+
42
+ ```json
43
+ {
44
+ "tool": "stata_run",
45
+ "arguments": {
46
+ "code": "use data/derived/parity_sample.dta, clear\ncsdid y x1 x2, ivar(unit_id) time(time_id) gvar(first_treat) method(dripw)\nestat simple\nestat event\ncsdid_plot",
47
+ "session_id": "stata_csdid",
48
+ "origin_path": "/abs/project/analysis/01_stata_csdid.do",
49
+ "origin_kind": "file",
50
+ "persist_log_files": true
51
+ }
52
+ }
53
+ ```
54
+
55
+ **Agent reads:**
56
+
57
+ - `results.e.scalars` for `N` and available fit/ATT scalars.
58
+ - `results.e.matrices` for coefficient and VCE payloads.
59
+ - `graphs[0].ref` for the event-study plot.
60
+ - `warnings` and `log.error_window` for dropped cohorts or estimator refusal.
61
+
62
+ If `csdid` is missing, the repair loop may call:
63
+
64
+ ```json
65
+ {"tool": "install_package", "arguments": {"name": "csdid"}}
66
+ ```
67
+
68
+ and, if needed:
69
+
70
+ ```json
71
+ {"tool": "install_package", "arguments": {"name": "drdid"}}
72
+ ```
73
+
74
+ ## Step 3: run external legs with their own tools
75
+
76
+ The agent should hand `data/derived/parity_sample.csv` plus the written parity
77
+ contract to the R/Python tools that are actually available. `stata-code` should
78
+ not claim those estimates. It should record their package versions, options,
79
+ sample `N`, warnings/refusals, and output files in the comparison table.
80
+
81
+ ## Step 4: compare only like with like
82
+
83
+ | Stack | Package | Target | N | Estimate | SE | Warning/refusal |
84
+ | --- | --- | --- | ---: | ---: | ---: | --- |
85
+ | Stata | `csdid` | overall ATT from `estat simple` | from `results.e` | from `e(b)`/scalar | from `e(V)` | from `warnings` |
86
+ | R | external | same target | external | external | external | external |
87
+ | Python | external | same target | external | external | external | external |
88
+
89
+ Do not compare an overall ATT to an event-time coefficient. Do not hide package
90
+ refusals. If sample `N` differs, stop and fix the sample before interpreting
91
+ coefficient differences.
92
+
93
+ ## Step 5: report conservatively
94
+
95
+ Use language like:
96
+
97
+ - "The Stata `csdid` leg ran on the frozen sample and produced ..."
98
+ - "The R/Python legs were run by external tools; stata-code only coordinated the
99
+ handoff and Stata audit trail."
100
+ - "The estimates agree within the predeclared tolerance" or "they diverge, with
101
+ the likely source being sample/default/failure differences."