stata-code 0.3.1__tar.gz → 0.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. {stata_code-0.3.1 → stata_code-0.5.0}/.gitignore +14 -0
  2. {stata_code-0.3.1 → stata_code-0.5.0}/CHANGELOG.md +100 -1
  3. {stata_code-0.3.1 → stata_code-0.5.0}/PKG-INFO +69 -15
  4. {stata_code-0.3.1 → stata_code-0.5.0}/README.md +68 -14
  5. {stata_code-0.3.1 → stata_code-0.5.0}/SCHEMA.md +41 -1
  6. {stata_code-0.3.1 → stata_code-0.5.0}/pyproject.toml +1 -1
  7. {stata_code-0.3.1 → stata_code-0.5.0}/schema/run_result.schema.json +126 -0
  8. {stata_code-0.3.1 → stata_code-0.5.0}/stata_code/__init__.py +3 -1
  9. {stata_code-0.3.1 → stata_code-0.5.0}/stata_code/core/__init__.py +2 -0
  10. stata_code-0.5.0/stata_code/core/log_artifacts.py +431 -0
  11. {stata_code-0.3.1 → stata_code-0.5.0}/stata_code/core/runner.py +145 -4
  12. {stata_code-0.3.1 → stata_code-0.5.0}/stata_code/core/schema.py +19 -0
  13. stata_code-0.5.0/stata_code/kernel/assets/logo-32x32.png +0 -0
  14. stata_code-0.5.0/stata_code/kernel/assets/logo-64x64.png +0 -0
  15. stata_code-0.5.0/stata_code/kernel/assets/logo-svg.svg +41 -0
  16. {stata_code-0.3.1 → stata_code-0.5.0}/stata_code/kernel/kernel.py +15 -1
  17. {stata_code-0.3.1 → stata_code-0.5.0}/stata_code/mcp/server.py +62 -2
  18. {stata_code-0.3.1 → stata_code-0.5.0}/tests/test_kernel.py +32 -1
  19. stata_code-0.5.0/tests/test_log_artifacts.py +154 -0
  20. {stata_code-0.3.1 → stata_code-0.5.0}/tests/test_mcp.py +9 -0
  21. {stata_code-0.3.1 → stata_code-0.5.0}/LICENSE +0 -0
  22. {stata_code-0.3.1 → stata_code-0.5.0}/LICENSE-POLICY.md +0 -0
  23. {stata_code-0.3.1 → stata_code-0.5.0}/PUBLISHING.md +0 -0
  24. {stata_code-0.3.1 → stata_code-0.5.0}/docs/design/hard_timeout.md +0 -0
  25. {stata_code-0.3.1 → stata_code-0.5.0}/examples/01-basic-regression.md +0 -0
  26. {stata_code-0.3.1 → stata_code-0.5.0}/examples/02-did-card-krueger.md +0 -0
  27. {stata_code-0.3.1 → stata_code-0.5.0}/examples/03-graphs.md +0 -0
  28. {stata_code-0.3.1 → stata_code-0.5.0}/examples/04-multi-session.md +0 -0
  29. {stata_code-0.3.1 → stata_code-0.5.0}/examples/05-large-matrix.md +0 -0
  30. {stata_code-0.3.1 → stata_code-0.5.0}/examples/README.md +0 -0
  31. {stata_code-0.3.1 → stata_code-0.5.0}/scripts/export_schema.py +0 -0
  32. {stata_code-0.3.1 → stata_code-0.5.0}/stata_code/core/_pool.py +0 -0
  33. {stata_code-0.3.1 → stata_code-0.5.0}/stata_code/core/_refs.py +0 -0
  34. {stata_code-0.3.1 → stata_code-0.5.0}/stata_code/core/_runtime.py +0 -0
  35. {stata_code-0.3.1 → stata_code-0.5.0}/stata_code/core/errors.py +0 -0
  36. {stata_code-0.3.1 → stata_code-0.5.0}/stata_code/kernel/__init__.py +0 -0
  37. {stata_code-0.3.1 → stata_code-0.5.0}/stata_code/kernel/__main__.py +0 -0
  38. {stata_code-0.3.1 → stata_code-0.5.0}/stata_code/mcp/__init__.py +0 -0
  39. {stata_code-0.3.1 → stata_code-0.5.0}/stata_code/mcp/__main__.py +0 -0
  40. {stata_code-0.3.1 → stata_code-0.5.0}/tests/__init__.py +0 -0
  41. {stata_code-0.3.1 → stata_code-0.5.0}/tests/fixtures/.gitkeep +0 -0
  42. {stata_code-0.3.1 → stata_code-0.5.0}/tests/test_cancel.py +0 -0
  43. {stata_code-0.3.1 → stata_code-0.5.0}/tests/test_errors.py +0 -0
  44. {stata_code-0.3.1 → stata_code-0.5.0}/tests/test_pool.py +0 -0
  45. {stata_code-0.3.1 → stata_code-0.5.0}/tests/test_runner.py +0 -0
  46. {stata_code-0.3.1 → stata_code-0.5.0}/tests/test_schema.py +0 -0
  47. {stata_code-0.3.1 → stata_code-0.5.0}/tests/test_schema_artifact.py +0 -0
@@ -218,7 +218,21 @@ __marimo__/
218
218
  .streamlit/secrets.toml
219
219
 
220
220
  # Stata-specific
221
+ log-files/
221
222
  *.gph
222
223
  *.smcl
223
224
  *.dta
224
225
  !tests/fixtures/*.dta
226
+
227
+ # macOS
228
+ .DS_Store
229
+ **/.DS_Store
230
+
231
+ # VS Code workspace settings (contain user-machine absolute paths)
232
+ .vscode/
233
+
234
+ # Claude Code scratch (worktrees, transcripts)
235
+ .claude/
236
+
237
+ # Demo / scratch notebooks (real tests live under tests/)
238
+ demo-tests/*.ipynb
@@ -4,7 +4,106 @@ All notable changes to `stata-code` are documented here. The format follows
4
4
  [Keep a Changelog](https://keepachangelog.com/en/1.1.0/); the project adheres
5
5
  to semver-major.minor for the result schema (see `SCHEMA.md` §6).
6
6
 
7
- ## [Unreleased]
7
+ ## [0.5.0] — 2026-05-08
8
+
9
+ ### Added
10
+
11
+ - **Bundled Jupyter kernel logos.** `stata-code-kernel install --user`
12
+ now copies `stata_code/kernel/assets/{logo-32x32.png,logo-64x64.png,
13
+ logo-svg.svg}` into the kernelspec source dir before
14
+ `KernelSpecManager().install_kernel_spec` runs. VS Code's Jupyter
15
+ extension filters out kernelspecs that lack logo files, so prior
16
+ releases were invisible in its kernel picker; v0.5 fixes that without
17
+ affecting JupyterLab or classic Jupyter (which both already worked).
18
+ - **TestPyPI publishing step in `release.yml`.** Tag `v*` now publishes
19
+ to TestPyPI (via OIDC trusted publishing, environment `testpypi`)
20
+ before publishing to PyPI proper. `continue-on-error: true` keeps
21
+ PyPI + GitHub Release on the happy path even when TestPyPI is
22
+ misconfigured. Setup mirrors the PyPI trusted publisher and is
23
+ documented in [CLAUDE.md](CLAUDE.md).
24
+
25
+ ### Changed
26
+
27
+ - **`stata_run` tool description and README** clarify the boundary
28
+ between non-mutating execution and the optional agent "fix and
29
+ rerun" repair loop. The tool itself never rewrites your `.do` file
30
+ — but the submitted Stata code can still produce logs, graphs, and
31
+ output files as usual. Repair loops require explicit user opt-in;
32
+ failed runs are diagnostics first, not automatic rewrite permission.
33
+ - **VSCode MCP-client handshake version aligned to 0.5.0** (was a
34
+ stale 0.3.2 since the v0.3.2 release).
35
+
36
+ ### Fixed
37
+
38
+ - **`install_kernel` no longer `.resolve()`s `sys.executable`.** On
39
+ macOS Homebrew venvs (and other layouts that use a `python` symlink
40
+ outside the venv's `bin/` to a Cellar-style real interpreter),
41
+ resolving the symlink pointed Jupyter at an interpreter that
42
+ couldn't import `stata_code`. The kernelspec now keeps the
43
+ unresolved `sys.executable`, so the venv's `python` (with
44
+ `stata_code` on its `sys.path`) launches the kernel.
45
+
46
+ ## [0.4.0] — 2026-05-07
47
+
48
+ ### Added
49
+
50
+ - **Persistent per-run log bundles.** When a `.do` file path is supplied as
51
+ `origin_path`, the runner writes an immutable `log-files/<run>/` directory
52
+ next to the source file containing:
53
+ - `<run>.log` and `<run>.smcl` — Stata's textual and SMCL logs
54
+ - `manifest.json` — run metadata (elapsed_ms, rc, session, Stata edition)
55
+ - `submitted.do` — a snapshot of the code that was executed
56
+ - `graphs/` — captured graph files materialized from graph refs
57
+ - `outputs/` — newly created or modified table/export files copied from
58
+ the run's working directory
59
+
60
+ The directory name encodes UTC timestamp, session, and request IDs so
61
+ parallel runs and reruns are never ambiguous.
62
+
63
+ - **Working-directory defaults from `origin_path`.** Before running,
64
+ Stata `cd`s to the `.do` file's parent so relative `graph export`,
65
+ `putexcel`, `esttab using`, `collect export`, etc. output next to the
66
+ source. Toggle with `use_origin_workdir` / `useDoFileDirectory` setting.
67
+ Explicit `working_dir` overrides this.
68
+
69
+ - **Schema extensions.** `LogInfo.files` (`LogFileInfo`) carries the
70
+ bundle paths and derived `graphs_dir`/`outputs_dir`; `GraphInfo.file_path`
71
+ records where a graph was materialized; two new capabilities
72
+ `log_files` and `run_artifacts` signal support.
73
+
74
+ - **MCP tool options.** `stata_run` gains `persist_log_files`,
75
+ `persist_generated_files`, `origin_path`, `origin_kind`,
76
+ `origin_label`, `use_origin_workdir`, `working_dir`.
77
+
78
+ - **VS Code settings.** Three new configuration options:
79
+ `stataCode.persistLogFiles` (default `true`),
80
+ `stataCode.persistGeneratedFiles` (default `true`),
81
+ `stataCode.useDoFileDirectory` (default `true`).
82
+
83
+ - **VS Code tree views.** The Last Result tree now shows "saved" and
84
+ "N outputs" badges on the log node when artifacts are present; the
85
+ output log header prints `working_dir:`, `log_file:`, `smcl_file:`,
86
+ `graphs_dir:`, `outputs_dir:` for each run.
87
+
88
+ ### Changed
89
+
90
+ - **VSCode MCP startup.** The extension now expands common macOS Python
91
+ script directories before spawning `stata-code-mcp`, tries workspace
92
+ `.venv` and `python -m stata_code.mcp` fallbacks for the default command,
93
+ and writes child-process stderr to the `stata-code` output channel so
94
+ missing PATH / missing dependency failures are actionable.
95
+ - **VSCode toolbar ordering.** Run-all and run-selection now share the same
96
+ ordinary `editor/title` toolbar sequence, with ordering moved later in the
97
+ `navigation` group to reduce interleaving from other extensions.
98
+
99
+ ## [0.3.2] — 2026-05-08
100
+
101
+ ### Changed
102
+
103
+ - **VSCode toolbar ordering.** Editor title-bar actions now live in one
104
+ contiguous `navigation` group so `stata-code` buttons stay together. The
105
+ order prioritizes run commands first, then data/output views, session
106
+ controls, cancellation/reset, and working-directory actions.
8
107
 
9
108
  ## [0.3.1] — 2026-05-07
10
109
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: stata-code
3
- Version: 0.3.1
3
+ Version: 0.5.0
4
4
  Summary: Agent-native Stata bridge — one core, multiple frontends (MCP, Jupyter, VSCode)
5
5
  Project-URL: Homepage, https://github.com/brycewang-stanford/stata-code
6
6
  Project-URL: Repository, https://github.com/brycewang-stanford/stata-code
@@ -59,7 +59,12 @@ Description-Content-Type: text/markdown
59
59
  └─────────────┘ └────────────┘ └─────────────────┘
60
60
  ```
61
61
 
62
- **Status: v0.2 (May 2026)** — the core, MCP server, and Jupyter kernel work end-to-end against Stata 18 MP. Current test suite: 144 passing tests (88 no-Stata unit tests + 56 real-Stata integration tests). License: **MIT**.
62
+ **Status: v0.5 (May 2026)** — the core, MCP server, Jupyter kernel, and VS Code extension work end-to-end against Stata 18 MP. Current test suite: 218 passing tests across schema, runner, MCP, kernel, and ref-store modules. License: **MIT**.
63
+
64
+ Two workflows v0.5 explicitly supports for end users:
65
+
66
+ - **Run Stata code from a Jupyter notebook.** `pip install "stata-code[kernel]"` + `stata-code-kernel install --user` registers a **Stata** kernel that the Jupyter Notebook UI, JupyterLab, and the VS Code Jupyter extension all pick up by name. Cells render Stata logs, graphs, and warnings inline (the kernel logo bundled in v0.5 makes it appear in VS Code's kernel picker too). See [As a Jupyter Kernel](#as-a-jupyter-kernel).
67
+ - **Optional agent "fix and rerun" loop.** `stata_run` returns typed `error.kind/line/context` plus `suggestions` on every failure. By default Claude Code only reports diagnostics — but if you explicitly say "fix this and rerun until it passes", the agent uses the same fields to edit your `.do` file and re-call `stata_run` until the run is green. The repair loop is **opt-in**: failed runs are diagnostics first, not automatic rewrite permission. See [Error Recovery in Agent Workflows](#error-recovery-in-agent-workflows).
63
68
 
64
69
  ---
65
70
 
@@ -135,7 +140,7 @@ else:
135
140
 
136
141
  ### As an MCP Server
137
142
 
138
- After `pip install stata-code`, the `stata-code-mcp` binary is on your `PATH`. You can wire it into Claude Code, Cursor, Claude Desktop, or any other MCP-compatible client.
143
+ After `pip install "stata-code[mcp]"`, the `stata-code-mcp` binary is on your `PATH`. You can wire it into Claude Code, Cursor, Claude Desktop, or any other MCP-compatible client.
139
144
 
140
145
  #### Claude Code via `claude mcp add` (recommended)
141
146
 
@@ -156,6 +161,15 @@ claude mcp add stata-code --scope project -- stata-code-mcp
156
161
 
157
162
  Then launch `claude` and type `/mcp` to confirm `stata-code` shows up with its 8 tools (`stata_run`, `stata_info`, `get_log`, `get_graph`, `get_matrix`, `list_sessions`, `cancel_session`, `reset_session`).
158
163
 
164
+ #### Error Recovery in Agent Workflows
165
+
166
+ `stata_run` does not rewrite the source `.do` file or change code on its own. It executes the submitted Stata code, so that code may still create logs, graphs, tables, or other outputs as usual. When Stata fails, `stata_run` returns typed diagnostics (`error.kind`, `error.message`, `error.line`, `error.context`) plus best-effort `suggestions`. That supports two distinct Claude Code workflows:
167
+
168
+ - For "run this do-file" or "verify this code", Claude can report the failure and suggested next steps without changing source files.
169
+ - For "fix this and rerun until it passes", Claude can use the same structured error fields to edit the `.do` file, call `stata_run` again, and iterate.
170
+
171
+ If you want the repair loop, say so explicitly. Otherwise, treat failed runs as diagnostics first, not as automatic permission to rewrite code.
172
+
159
173
  #### `uvx` (no global pip install)
160
174
 
161
175
  If you prefer not to `pip install stata-code` globally, run it ephemerally through [`uv`](https://github.com/astral-sh/uv):
@@ -201,17 +215,30 @@ The MCP server registers 8 tools:
201
215
 
202
216
  ### As a Jupyter Kernel
203
217
 
218
+ `stata-code` ships a Jupyter kernel as part of the Python package — there is no separate "Jupyter plugin" in the JupyterLab extension marketplace. Installation is two steps: `pip install` the package with the `kernel` extra, then register the kernelspec with Jupyter.
219
+
220
+ **Prerequisites**: Stata 17+ installed locally with a valid license (the kernel calls Stata via `pystata`), and Python 3.10+ with `jupyter`/`jupyterlab` already on the same environment.
221
+
204
222
  ```bash
223
+ # 1. Install stata-code with the kernel extra (pulls in ipykernel)
224
+ pip install "stata-code[kernel]"
225
+
226
+ # 2. Register the kernelspec into Jupyter's user data dir
205
227
  stata-code-kernel install --user
228
+ # Or, equivalently:
229
+ # python -m stata_code.kernel install --user
206
230
  ```
207
231
 
208
- Or install it as a module:
232
+ Verify the kernel is registered:
209
233
 
210
234
  ```bash
211
- python -m stata_code.kernel install --user
235
+ jupyter kernelspec list
236
+ # should include an entry named `stata`
212
237
  ```
213
238
 
214
- Then open a notebook and select the **Stata** kernel. Stata commands run in cells; logs, graphs, and warnings render inline.
239
+ Then open Jupyter Notebook / JupyterLab (or a `.ipynb` in VS Code), pick **Stata** in the kernel selector, and run Stata commands in cells. Logs, graphs, and warnings render inline.
240
+
241
+ > JupyterLab's Extension Manager only installs front-end JS extensions, so it cannot install a kernel — `pip install` plus the `install --user` step above is the only supported path.
215
242
 
216
243
  ### As a VS Code Extension
217
244
 
@@ -224,7 +251,7 @@ code --install-extension brycewang-stanford.stata-code-vscode
224
251
 
225
252
  Or open the **Extensions** sidebar in VS Code and search `stata-code`.
226
253
 
227
- The extension still requires `stata-code` itself to be importable on your system Python (`pip install stata-code`), so that `stata-code-mcp` resolves on `PATH`. Stata 17+ and a valid Stata license are required as for any other frontend.
254
+ The extension still requires the MCP extra on your system Python (`pip install "stata-code[mcp]"`), so that `stata-code-mcp` resolves on `PATH` and can import the MCP SDK. Stata 17+ and a valid Stata license are required as for any other frontend.
228
255
 
229
256
  ---
230
257
 
@@ -301,7 +328,7 @@ stata_code/
301
328
 
302
329
  ## Roadmap
303
330
 
304
- ### Done (v0.2 — May 2026)
331
+ ### Done (through v0.5 — May 2026)
305
332
 
306
333
  - v1.0 result schema ([SCHEMA.md](SCHEMA.md))
307
334
  - `pystata`-based runner with native-typed `r()`, `e()`, and matrices
@@ -334,7 +361,7 @@ See [SCHEMA.md §7](SCHEMA.md) for explicitly out-of-scope items.
334
361
 
335
362
  ```bash
336
363
  pip install -e ".[dev,mcp,kernel]"
337
- pytest # full suite (144 tests)
364
+ pytest # full suite (218 tests)
338
365
  pytest -m "not stata_required" # CI subset; no Stata needed
339
366
  pytest -m "stata_required" -v # Stata-only integration tests
340
367
  ```
@@ -389,7 +416,12 @@ The Stata tooling landscape that this project builds on and learns from is surve
389
416
  └─────────────┘ └────────────┘ └─────────────────┘
390
417
  ```
391
418
 
392
- **当前状态:v0.2(2026 年 5 月)** —— core、MCP serverJupyter kernel 已经可以在 Stata 18 MP 上端到端运行。当前测试:144 passing(88 个不需要 Stata 的单元测试 + 56 个真实 Stata 集成测试)。许可证:**MIT**。
419
+ **当前状态:v0.5(2026 年 5 月)** —— core、MCP serverJupyter kernel、VS Code 扩展都已经在 Stata 18 MP 上端到端跑通。测试套件:218 passing tests,覆盖 schema、runner、MCP、kernel ref-store。许可证:**MIT**。
420
+
421
+ v0.5 明确支持的两种用户工作流:
422
+
423
+ - **在 Jupyter notebook 里跑 Stata 代码。** `pip install "stata-code[kernel]"` + `stata-code-kernel install --user` 会注册一个名为 **Stata** 的 kernel,Jupyter Notebook、JupyterLab、以及 VS Code 的 Jupyter 扩展都能在 kernel 选择器里看到它。Cell 里直接写 Stata 命令,日志、图形和警告会内联渲染(v0.5 把 kernel logo 一起打包进 PyPI wheel,VS Code 的 Jupyter kernel picker 也能正常显示)。详见下文 [作为 Jupyter kernel](#作为-jupyter-kernel)。
424
+ - **可选的 agent「修复并重跑」循环。** `stata_run` 在每次失败时都会返回结构化的 `error.kind/line/context` 和 `suggestions`。默认情况下 Claude Code 只把它当作诊断信息上报;但如果你明确说「帮我修到跑通」「修复并反复运行直到成功」,agent 就会用同一组字段去改 `.do` 文件、再调 `stata_run`,直到代码通过。这个修复循环是 **opt-in** 的:默认失败 = 诊断,不是自动改写授权。详见下文 [Agent 工作流里的报错恢复](#agent-工作流里的报错恢复)。
393
425
 
394
426
  ---
395
427
 
@@ -464,7 +496,7 @@ else:
464
496
 
465
497
  ### 作为 MCP server
466
498
 
467
- `pip install stata-code` 之后,`stata-code-mcp` 会出现在你的 `PATH` 中。可以接到 Claude Code、Cursor、Claude Desktop 等任何兼容 MCP 的客户端里。
499
+ `pip install "stata-code[mcp]"` 之后,`stata-code-mcp` 会出现在你的 `PATH` 中。可以接到 Claude Code、Cursor、Claude Desktop 等任何兼容 MCP 的客户端里。
468
500
 
469
501
  #### 用 `claude mcp add` 接入 Claude Code(推荐)
470
502
 
@@ -485,6 +517,15 @@ claude mcp add stata-code --scope project -- stata-code-mcp
485
517
 
486
518
  接着运行 `claude`,输入 `/mcp` 确认 `stata-code` 出现并带有 8 个工具(`stata_run`, `stata_info`, `get_log`, `get_graph`, `get_matrix`, `list_sessions`, `cancel_session`, `reset_session`)。
487
519
 
520
+ #### Agent 工作流里的报错恢复
521
+
522
+ `stata_run` 不会自行改写源 `.do` 文件或替你改代码。它执行提交的 Stata 代码,所以代码本身仍可能照常生成日志、图形、表格或其他输出。Stata 报错时,`stata_run` 返回结构化诊断(`error.kind`, `error.message`, `error.line`, `error.context`)和尽力生成的 `suggestions`。这支持两种不同的 Claude Code 工作流:
523
+
524
+ - 如果你说的是「运行这个 do-file」或「验证这段代码」,Claude 可以只报告失败原因和建议的下一步,不修改源文件。
525
+ - 如果你明确说「帮我修到跑通」或「修复并反复运行直到成功」,Claude 可以基于同一组结构化错误字段修改 `.do` 文件,再调用 `stata_run` 继续迭代。
526
+
527
+ 如果需要自动修复循环,请明确说出来。否则,失败的运行应先被视为诊断结果,而不是自动改写代码的授权。
528
+
488
529
  #### 用 `uvx`(不必全局 pip install)
489
530
 
490
531
  如果不想全局 `pip install stata-code`,可以用 [`uv`](https://github.com/astral-sh/uv) 临时运行:
@@ -530,17 +571,30 @@ MCP server 注册了 8 个工具:
530
571
 
531
572
  ### 作为 Jupyter kernel
532
573
 
574
+ `stata-code` 的 Jupyter 支持是以 **kernel** 形式打包在 Python 包里的 —— JupyterLab 插件市场里**没有**独立的 "stata-code 插件"。安装分两步:先 `pip install` 安装带 `kernel` extra 的包,再把 kernelspec 注册到 Jupyter。
575
+
576
+ **前置条件**:本机已经安装 Stata 17+ 且持有合法许可证(kernel 通过 `pystata` 调用本地 Stata),同一个 Python 环境里已经装好 `jupyter`/`jupyterlab`,Python 版本 ≥ 3.10。
577
+
533
578
  ```bash
579
+ # 1. 安装带 kernel extra 的 stata-code(会同时装上 ipykernel)
580
+ pip install "stata-code[kernel]"
581
+
582
+ # 2. 把 kernelspec 注册到当前用户的 Jupyter data dir
534
583
  stata-code-kernel install --user
584
+ # 等价命令:
585
+ # python -m stata_code.kernel install --user
535
586
  ```
536
587
 
537
- 也可以直接以 module 方式安装:
588
+ 检查 kernel 是否注册成功:
538
589
 
539
590
  ```bash
540
- python -m stata_code.kernel install --user
591
+ jupyter kernelspec list
592
+ # 输出里应该能看到名为 `stata` 的条目
541
593
  ```
542
594
 
543
- 然后打开 notebook,选择 **Stata** kernel。Stata 命令会在 cell 中运行,日志、图形和 warnings 会以内联方式显示。
595
+ 然后打开 Jupyter Notebook / JupyterLab(或 VS Code 中的 `.ipynb`),在 kernel 选择器里挑 **Stata**,cell 里直接写 Stata 命令即可,日志、graphs warnings 会以内联方式显示。
596
+
597
+ > JupyterLab 的 Extension Manager 只能安装前端 JS 扩展,**装不了 kernel**。所以上面的 `pip install` + `install --user` 是唯一支持的安装路径。
544
598
 
545
599
  ### 作为 VS Code 扩展
546
600
 
@@ -553,7 +607,7 @@ code --install-extension brycewang-stanford.stata-code-vscode
553
607
 
554
608
  或者打开 VS Code 的 **Extensions** 侧栏,搜索 `stata-code`。
555
609
 
556
- 扩展仍然依赖系统 Python 上能导入 `stata-code`(`pip install stata-code`),从而保证 `stata-code-mcp` 在 `PATH` 上可用。和其它前端一样,需要 Stata 17+ 和有效的 Stata 许可证。
610
+ 扩展仍然依赖系统 Python 上安装了 MCP extra(`pip install "stata-code[mcp]"`),从而保证 `stata-code-mcp` 在 `PATH` 上可用,并且能导入 MCP SDK。和其它前端一样,需要 Stata 17+ 和有效的 Stata 许可证。
557
611
 
558
612
  ---
559
613
 
@@ -21,7 +21,12 @@
21
21
  └─────────────┘ └────────────┘ └─────────────────┘
22
22
  ```
23
23
 
24
- **Status: v0.2 (May 2026)** — the core, MCP server, and Jupyter kernel work end-to-end against Stata 18 MP. Current test suite: 144 passing tests (88 no-Stata unit tests + 56 real-Stata integration tests). License: **MIT**.
24
+ **Status: v0.5 (May 2026)** — the core, MCP server, Jupyter kernel, and VS Code extension work end-to-end against Stata 18 MP. Current test suite: 218 passing tests across schema, runner, MCP, kernel, and ref-store modules. License: **MIT**.
25
+
26
+ Two workflows v0.5 explicitly supports for end users:
27
+
28
+ - **Run Stata code from a Jupyter notebook.** `pip install "stata-code[kernel]"` + `stata-code-kernel install --user` registers a **Stata** kernel that the Jupyter Notebook UI, JupyterLab, and the VS Code Jupyter extension all pick up by name. Cells render Stata logs, graphs, and warnings inline (the kernel logo bundled in v0.5 makes it appear in VS Code's kernel picker too). See [As a Jupyter Kernel](#as-a-jupyter-kernel).
29
+ - **Optional agent "fix and rerun" loop.** `stata_run` returns typed `error.kind/line/context` plus `suggestions` on every failure. By default Claude Code only reports diagnostics — but if you explicitly say "fix this and rerun until it passes", the agent uses the same fields to edit your `.do` file and re-call `stata_run` until the run is green. The repair loop is **opt-in**: failed runs are diagnostics first, not automatic rewrite permission. See [Error Recovery in Agent Workflows](#error-recovery-in-agent-workflows).
25
30
 
26
31
  ---
27
32
 
@@ -97,7 +102,7 @@ else:
97
102
 
98
103
  ### As an MCP Server
99
104
 
100
- After `pip install stata-code`, the `stata-code-mcp` binary is on your `PATH`. You can wire it into Claude Code, Cursor, Claude Desktop, or any other MCP-compatible client.
105
+ After `pip install "stata-code[mcp]"`, the `stata-code-mcp` binary is on your `PATH`. You can wire it into Claude Code, Cursor, Claude Desktop, or any other MCP-compatible client.
101
106
 
102
107
  #### Claude Code via `claude mcp add` (recommended)
103
108
 
@@ -118,6 +123,15 @@ claude mcp add stata-code --scope project -- stata-code-mcp
118
123
 
119
124
  Then launch `claude` and type `/mcp` to confirm `stata-code` shows up with its 8 tools (`stata_run`, `stata_info`, `get_log`, `get_graph`, `get_matrix`, `list_sessions`, `cancel_session`, `reset_session`).
120
125
 
126
+ #### Error Recovery in Agent Workflows
127
+
128
+ `stata_run` does not rewrite the source `.do` file or change code on its own. It executes the submitted Stata code, so that code may still create logs, graphs, tables, or other outputs as usual. When Stata fails, `stata_run` returns typed diagnostics (`error.kind`, `error.message`, `error.line`, `error.context`) plus best-effort `suggestions`. That supports two distinct Claude Code workflows:
129
+
130
+ - For "run this do-file" or "verify this code", Claude can report the failure and suggested next steps without changing source files.
131
+ - For "fix this and rerun until it passes", Claude can use the same structured error fields to edit the `.do` file, call `stata_run` again, and iterate.
132
+
133
+ If you want the repair loop, say so explicitly. Otherwise, treat failed runs as diagnostics first, not as automatic permission to rewrite code.
134
+
121
135
  #### `uvx` (no global pip install)
122
136
 
123
137
  If you prefer not to `pip install stata-code` globally, run it ephemerally through [`uv`](https://github.com/astral-sh/uv):
@@ -163,17 +177,30 @@ The MCP server registers 8 tools:
163
177
 
164
178
  ### As a Jupyter Kernel
165
179
 
180
+ `stata-code` ships a Jupyter kernel as part of the Python package — there is no separate "Jupyter plugin" in the JupyterLab extension marketplace. Installation is two steps: `pip install` the package with the `kernel` extra, then register the kernelspec with Jupyter.
181
+
182
+ **Prerequisites**: Stata 17+ installed locally with a valid license (the kernel calls Stata via `pystata`), and Python 3.10+ with `jupyter`/`jupyterlab` already on the same environment.
183
+
166
184
  ```bash
185
+ # 1. Install stata-code with the kernel extra (pulls in ipykernel)
186
+ pip install "stata-code[kernel]"
187
+
188
+ # 2. Register the kernelspec into Jupyter's user data dir
167
189
  stata-code-kernel install --user
190
+ # Or, equivalently:
191
+ # python -m stata_code.kernel install --user
168
192
  ```
169
193
 
170
- Or install it as a module:
194
+ Verify the kernel is registered:
171
195
 
172
196
  ```bash
173
- python -m stata_code.kernel install --user
197
+ jupyter kernelspec list
198
+ # should include an entry named `stata`
174
199
  ```
175
200
 
176
- Then open a notebook and select the **Stata** kernel. Stata commands run in cells; logs, graphs, and warnings render inline.
201
+ Then open Jupyter Notebook / JupyterLab (or a `.ipynb` in VS Code), pick **Stata** in the kernel selector, and run Stata commands in cells. Logs, graphs, and warnings render inline.
202
+
203
+ > JupyterLab's Extension Manager only installs front-end JS extensions, so it cannot install a kernel — `pip install` plus the `install --user` step above is the only supported path.
177
204
 
178
205
  ### As a VS Code Extension
179
206
 
@@ -186,7 +213,7 @@ code --install-extension brycewang-stanford.stata-code-vscode
186
213
 
187
214
  Or open the **Extensions** sidebar in VS Code and search `stata-code`.
188
215
 
189
- The extension still requires `stata-code` itself to be importable on your system Python (`pip install stata-code`), so that `stata-code-mcp` resolves on `PATH`. Stata 17+ and a valid Stata license are required as for any other frontend.
216
+ The extension still requires the MCP extra on your system Python (`pip install "stata-code[mcp]"`), so that `stata-code-mcp` resolves on `PATH` and can import the MCP SDK. Stata 17+ and a valid Stata license are required as for any other frontend.
190
217
 
191
218
  ---
192
219
 
@@ -263,7 +290,7 @@ stata_code/
263
290
 
264
291
  ## Roadmap
265
292
 
266
- ### Done (v0.2 — May 2026)
293
+ ### Done (through v0.5 — May 2026)
267
294
 
268
295
  - v1.0 result schema ([SCHEMA.md](SCHEMA.md))
269
296
  - `pystata`-based runner with native-typed `r()`, `e()`, and matrices
@@ -296,7 +323,7 @@ See [SCHEMA.md §7](SCHEMA.md) for explicitly out-of-scope items.
296
323
 
297
324
  ```bash
298
325
  pip install -e ".[dev,mcp,kernel]"
299
- pytest # full suite (144 tests)
326
+ pytest # full suite (218 tests)
300
327
  pytest -m "not stata_required" # CI subset; no Stata needed
301
328
  pytest -m "stata_required" -v # Stata-only integration tests
302
329
  ```
@@ -351,7 +378,12 @@ The Stata tooling landscape that this project builds on and learns from is surve
351
378
  └─────────────┘ └────────────┘ └─────────────────┘
352
379
  ```
353
380
 
354
- **当前状态:v0.2(2026 年 5 月)** —— core、MCP serverJupyter kernel 已经可以在 Stata 18 MP 上端到端运行。当前测试:144 passing(88 个不需要 Stata 的单元测试 + 56 个真实 Stata 集成测试)。许可证:**MIT**。
381
+ **当前状态:v0.5(2026 年 5 月)** —— core、MCP serverJupyter kernel、VS Code 扩展都已经在 Stata 18 MP 上端到端跑通。测试套件:218 passing tests,覆盖 schema、runner、MCP、kernel ref-store。许可证:**MIT**。
382
+
383
+ v0.5 明确支持的两种用户工作流:
384
+
385
+ - **在 Jupyter notebook 里跑 Stata 代码。** `pip install "stata-code[kernel]"` + `stata-code-kernel install --user` 会注册一个名为 **Stata** 的 kernel,Jupyter Notebook、JupyterLab、以及 VS Code 的 Jupyter 扩展都能在 kernel 选择器里看到它。Cell 里直接写 Stata 命令,日志、图形和警告会内联渲染(v0.5 把 kernel logo 一起打包进 PyPI wheel,VS Code 的 Jupyter kernel picker 也能正常显示)。详见下文 [作为 Jupyter kernel](#作为-jupyter-kernel)。
386
+ - **可选的 agent「修复并重跑」循环。** `stata_run` 在每次失败时都会返回结构化的 `error.kind/line/context` 和 `suggestions`。默认情况下 Claude Code 只把它当作诊断信息上报;但如果你明确说「帮我修到跑通」「修复并反复运行直到成功」,agent 就会用同一组字段去改 `.do` 文件、再调 `stata_run`,直到代码通过。这个修复循环是 **opt-in** 的:默认失败 = 诊断,不是自动改写授权。详见下文 [Agent 工作流里的报错恢复](#agent-工作流里的报错恢复)。
355
387
 
356
388
  ---
357
389
 
@@ -426,7 +458,7 @@ else:
426
458
 
427
459
  ### 作为 MCP server
428
460
 
429
- `pip install stata-code` 之后,`stata-code-mcp` 会出现在你的 `PATH` 中。可以接到 Claude Code、Cursor、Claude Desktop 等任何兼容 MCP 的客户端里。
461
+ `pip install "stata-code[mcp]"` 之后,`stata-code-mcp` 会出现在你的 `PATH` 中。可以接到 Claude Code、Cursor、Claude Desktop 等任何兼容 MCP 的客户端里。
430
462
 
431
463
  #### 用 `claude mcp add` 接入 Claude Code(推荐)
432
464
 
@@ -447,6 +479,15 @@ claude mcp add stata-code --scope project -- stata-code-mcp
447
479
 
448
480
  接着运行 `claude`,输入 `/mcp` 确认 `stata-code` 出现并带有 8 个工具(`stata_run`, `stata_info`, `get_log`, `get_graph`, `get_matrix`, `list_sessions`, `cancel_session`, `reset_session`)。
449
481
 
482
+ #### Agent 工作流里的报错恢复
483
+
484
+ `stata_run` 不会自行改写源 `.do` 文件或替你改代码。它执行提交的 Stata 代码,所以代码本身仍可能照常生成日志、图形、表格或其他输出。Stata 报错时,`stata_run` 返回结构化诊断(`error.kind`, `error.message`, `error.line`, `error.context`)和尽力生成的 `suggestions`。这支持两种不同的 Claude Code 工作流:
485
+
486
+ - 如果你说的是「运行这个 do-file」或「验证这段代码」,Claude 可以只报告失败原因和建议的下一步,不修改源文件。
487
+ - 如果你明确说「帮我修到跑通」或「修复并反复运行直到成功」,Claude 可以基于同一组结构化错误字段修改 `.do` 文件,再调用 `stata_run` 继续迭代。
488
+
489
+ 如果需要自动修复循环,请明确说出来。否则,失败的运行应先被视为诊断结果,而不是自动改写代码的授权。
490
+
450
491
  #### 用 `uvx`(不必全局 pip install)
451
492
 
452
493
  如果不想全局 `pip install stata-code`,可以用 [`uv`](https://github.com/astral-sh/uv) 临时运行:
@@ -492,17 +533,30 @@ MCP server 注册了 8 个工具:
492
533
 
493
534
  ### 作为 Jupyter kernel
494
535
 
536
+ `stata-code` 的 Jupyter 支持是以 **kernel** 形式打包在 Python 包里的 —— JupyterLab 插件市场里**没有**独立的 "stata-code 插件"。安装分两步:先 `pip install` 安装带 `kernel` extra 的包,再把 kernelspec 注册到 Jupyter。
537
+
538
+ **前置条件**:本机已经安装 Stata 17+ 且持有合法许可证(kernel 通过 `pystata` 调用本地 Stata),同一个 Python 环境里已经装好 `jupyter`/`jupyterlab`,Python 版本 ≥ 3.10。
539
+
495
540
  ```bash
541
+ # 1. 安装带 kernel extra 的 stata-code(会同时装上 ipykernel)
542
+ pip install "stata-code[kernel]"
543
+
544
+ # 2. 把 kernelspec 注册到当前用户的 Jupyter data dir
496
545
  stata-code-kernel install --user
546
+ # 等价命令:
547
+ # python -m stata_code.kernel install --user
497
548
  ```
498
549
 
499
- 也可以直接以 module 方式安装:
550
+ 检查 kernel 是否注册成功:
500
551
 
501
552
  ```bash
502
- python -m stata_code.kernel install --user
553
+ jupyter kernelspec list
554
+ # 输出里应该能看到名为 `stata` 的条目
503
555
  ```
504
556
 
505
- 然后打开 notebook,选择 **Stata** kernel。Stata 命令会在 cell 中运行,日志、图形和 warnings 会以内联方式显示。
557
+ 然后打开 Jupyter Notebook / JupyterLab(或 VS Code 中的 `.ipynb`),在 kernel 选择器里挑 **Stata**,cell 里直接写 Stata 命令即可,日志、graphs warnings 会以内联方式显示。
558
+
559
+ > JupyterLab 的 Extension Manager 只能安装前端 JS 扩展,**装不了 kernel**。所以上面的 `pip install` + `install --user` 是唯一支持的安装路径。
506
560
 
507
561
  ### 作为 VS Code 扩展
508
562
 
@@ -515,7 +569,7 @@ code --install-extension brycewang-stanford.stata-code-vscode
515
569
 
516
570
  或者打开 VS Code 的 **Extensions** 侧栏,搜索 `stata-code`。
517
571
 
518
- 扩展仍然依赖系统 Python 上能导入 `stata-code`(`pip install stata-code`),从而保证 `stata-code-mcp` 在 `PATH` 上可用。和其它前端一样,需要 Stata 17+ 和有效的 Stata 许可证。
572
+ 扩展仍然依赖系统 Python 上安装了 MCP extra(`pip install "stata-code[mcp]"`),从而保证 `stata-code-mcp` 在 `PATH` 上可用,并且能导入 MCP SDK。和其它前端一样,需要 Stata 17+ 和有效的 Stata 许可证。
519
573
 
520
574
  ---
521
575
 
@@ -228,6 +228,7 @@ The single biggest token-economy decision in the schema. Default response carrie
228
228
  | `complete` | `bool` | Reserved for v2 streaming. Always `true` in v1. v2 may emit interim results with `complete: false`. |
229
229
  | `error_window` | `string \| null` | When `error` is non-null, the ~10 log lines immediately surrounding the failing emission (regardless of `head`/`tail` window). Cheap for the producer to compute; saves agents from bumping `log_lines` or fetching the full log just to see "what did Stata say right when it broke." `null` on success or when not computable. |
230
230
  | `ref` | `string \| null` | Opaque reference for `get_log`. Required when `truncated: true`; may be set when `truncated: false` for caller convenience; `null` is allowed when full log is in `head`. |
231
+ | `files` | `object \| null` | Persistent `.log` / `.smcl` artifacts written for file-backed runs when requested. `null` when no files were written. See "Persistent log files" below. |
231
232
 
232
233
  **ANSI handling.** All log views (`head`, `tail`, `error_window`, the payload returned by `get_log(ref)`) are ANSI-escape-stripped, consistently.
233
234
 
@@ -237,6 +238,37 @@ The single biggest token-economy decision in the schema. Default response carrie
237
238
 
238
239
  **Defaults.** `head=20`, `tail=20`. Configurable per call via `log_lines_head` / `log_lines_tail` (see §4). If `lines_total ≤ head+tail`, the producer MUST set `truncated: false`, place the full log in `head`, set `tail: ""`, and set `ref: null`.
239
240
 
241
+ **Persistent log files.** When a frontend passes a source `.do` path and requests `persist_log_files`, producers write immutable run artifacts under:
242
+
243
+ ```text
244
+ <do-file-dir>/log-files/<do-stem>__<UTC timestamp>__<session_id>__<request_id>/
245
+ ```
246
+
247
+ `log.files` then has:
248
+
249
+ ```json
250
+ {
251
+ "directory": "/abs/path/log-files/test1__20260508T012233123Z__main__abc123",
252
+ "log_path": "/abs/path/.../test1__20260508T012233123Z__main__abc123.log",
253
+ "smcl_path": "/abs/path/.../test1__20260508T012233123Z__main__abc123.smcl",
254
+ "manifest_path": "/abs/path/.../manifest.json",
255
+ "code_path": "/abs/path/.../submitted.do",
256
+ "working_dir": "/abs/path",
257
+ "graphs_dir": "/abs/path/.../graphs",
258
+ "outputs_dir": "/abs/path/.../outputs",
259
+ "graph_paths": ["/abs/path/.../graphs/01-Graph.png"],
260
+ "output_paths": ["/abs/path/.../outputs/table.xlsx"],
261
+ "policy": "per_run_directory",
262
+ "append": false
263
+ }
264
+ ```
265
+
266
+ The stable folder name is `log-files`; timestamps belong on child run directories, not on the root. Producers SHOULD NOT append different executions into one log file, because parallel sessions, reruns after a pause, and selection/cell executions become ambiguous. Each run directory SHOULD include a manifest and submitted-code snapshot so the log is attributable without relying on editor history.
267
+
268
+ When `origin_path` is supplied, producers SHOULD default Stata's working directory to the source `.do` file's directory before running. This mirrors how users organize project-relative `graph export`, `putexcel`, `esttab using`, `collect export`, and similar output commands. Frontends may disable this with `use_origin_workdir: false` or override it with `working_dir`.
269
+
270
+ When `persist_generated_files` is true, producers SHOULD copy newly created or modified common output files from the run working directory into `outputs/`, preserving relative paths where practical. Captured graph refs SHOULD also be materialized into `graphs/`, with the corresponding `GraphInfo.file_path` set.
271
+
240
272
  ### 3.4 `results`
241
273
 
242
274
  Stata's `r()` and `e()` return dictionaries, structurally separated. Each follows the same shape:
@@ -317,6 +349,7 @@ Each entry describes one captured graph. By default the bytes are **not** inline
317
349
  | `source_command` | `string \| null` | The user-submitted command line that produced this graph, when isolatable. |
318
350
  | `source_line` | `int \| null` | 1-indexed line within the submitted code that produced this graph. |
319
351
  | `inline` | `string \| null` | Base64-encoded bytes when the caller explicitly asked for inline (`include_graphs: "inline"`); else `null`. |
352
+ | `file_path` | `string \| null` | Persistent graph file path when the run bundle materialized captured graphs under `log.files.graphs_dir`; else `null`. |
320
353
 
321
354
  ### 3.7 `error`
322
355
 
@@ -356,7 +389,7 @@ Populated iff `ok: false`. The schema's most important contribution to agent UX:
356
389
  }
357
390
  ```
358
391
 
359
- Suggestions are best-effort; agents should treat them as hints, not directives. The `kind` enum below documents what suggestions are typically populated.
392
+ Suggestions are best-effort; agents should treat them as hints, not directives. A suggestion is not consent to mutate source files or silently retry changed code; consumers should apply fixes automatically only in workflows where the user requested repair or approved iteration. The `kind` enum below documents what suggestions are typically populated.
360
393
 
361
394
  **`kind` enum (v1.0):**
362
395
 
@@ -426,6 +459,11 @@ The schema also dictates what callers may *ask for*. Every frontend exposes the
426
459
  | `graph_format` | `"png" \| "svg" \| "pdf"` | `"png"` | Render format. |
427
460
  | `include_dataset_variables` | `bool` | `true` | Set `false` to omit `dataset.variables`. |
428
461
  | `timeout_ms` | `int \| null` | `600000` (10 min) | Hard timeout. `null` disables. On expiry, returns `ok: false`, `error.kind: "timeout"`, `rc: -2`. Frontends MAY override the default if their use case demands. |
462
+ | `persist_log_files` | `bool` | `false` | With `origin_path`, writes immutable `.log` / `.smcl` / manifest files under the source `.do` file's `log-files/` directory. |
463
+ | `persist_generated_files` | `bool` | `true` | When log files are persisted, also copies newly created or modified table/export files into `outputs/` and captured graphs into `graphs/`. |
464
+ | `origin_path` | `string \| null` | `null` | Absolute source `.do` path used for working-directory defaults and run-bundle placement. |
465
+ | `use_origin_workdir` | `bool` | `true` | With `origin_path`, `cd` Stata to the source `.do` directory before running. |
466
+ | `working_dir` | `string \| null` | `null` | Explicit Stata working directory; overrides the source `.do` directory. |
429
467
 
430
468
  Frontends translate their native idiom (MCP `inputSchema`, Jupyter kernel options, VSCode commands) into these names without renaming.
431
469
 
@@ -481,6 +519,8 @@ These are *additions* to `run()`. A minimal client only needs `run()` plus which
481
519
  | `matrix_ref` | Producer can emit large matrices as refs and supports `get_matrix`. |
482
520
  | `multi_session` | Producer supports `session_id != "main"` and `list_sessions`. |
483
521
  | `inline_graphs` | Producer supports `include_graphs: "inline"`. |
522
+ | `log_files` | Producer can persist immutable per-run `.log` / `.smcl` bundles. |
523
+ | `run_artifacts` | Producer can materialize captured graphs and copied table/export outputs into the run bundle. |
484
524
 
485
525
  Consumers detect optional features via `capabilities`, not by parsing `schema_version`. Producers may add entries; agents MUST treat unknown capability names as opaque.
486
526
 
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "stata-code"
7
- version = "0.3.1"
7
+ version = "0.5.0"
8
8
  description = "Agent-native Stata bridge — one core, multiple frontends (MCP, Jupyter, VSCode)"
9
9
  readme = "README.md"
10
10
  license = "MIT"