stata-code 0.7.0__tar.gz → 0.7.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. {stata_code-0.7.0 → stata_code-0.7.2}/.gitignore +2 -0
  2. {stata_code-0.7.0 → stata_code-0.7.2}/CHANGELOG.md +60 -0
  3. {stata_code-0.7.0 → stata_code-0.7.2}/PKG-INFO +11 -8
  4. {stata_code-0.7.0 → stata_code-0.7.2}/README.md +10 -7
  5. {stata_code-0.7.0 → stata_code-0.7.2}/pyproject.toml +1 -1
  6. stata_code-0.7.2/scripts/build_skill_zip.py +105 -0
  7. {stata_code-0.7.0 → stata_code-0.7.2}/stata_code/__init__.py +1 -1
  8. {stata_code-0.7.0 → stata_code-0.7.2}/stata_code/core/runner.py +141 -10
  9. {stata_code-0.7.0 → stata_code-0.7.2}/stata_code/kernel/kernel.py +32 -2
  10. {stata_code-0.7.0 → stata_code-0.7.2}/stata_code/mcp/server.py +376 -1
  11. {stata_code-0.7.0 → stata_code-0.7.2}/tests/test_kernel.py +113 -0
  12. stata_code-0.7.2/tests/test_new_tools.py +472 -0
  13. {stata_code-0.7.0 → stata_code-0.7.2}/tests/test_runner.py +61 -0
  14. stata_code-0.7.2/tests/test_skill_package.py +65 -0
  15. {stata_code-0.7.0 → stata_code-0.7.2}/LICENSE +0 -0
  16. {stata_code-0.7.0 → stata_code-0.7.2}/LICENSE-POLICY.md +0 -0
  17. {stata_code-0.7.0 → stata_code-0.7.2}/PUBLISHING.md +0 -0
  18. {stata_code-0.7.0 → stata_code-0.7.2}/SCHEMA.md +0 -0
  19. {stata_code-0.7.0 → stata_code-0.7.2}/docs/design/hard_timeout.md +0 -0
  20. {stata_code-0.7.0 → stata_code-0.7.2}/examples/01-basic-regression.md +0 -0
  21. {stata_code-0.7.0 → stata_code-0.7.2}/examples/02-did-card-krueger.md +0 -0
  22. {stata_code-0.7.0 → stata_code-0.7.2}/examples/03-graphs.md +0 -0
  23. {stata_code-0.7.0 → stata_code-0.7.2}/examples/04-multi-session.md +0 -0
  24. {stata_code-0.7.0 → stata_code-0.7.2}/examples/05-large-matrix.md +0 -0
  25. {stata_code-0.7.0 → stata_code-0.7.2}/examples/README.md +0 -0
  26. {stata_code-0.7.0 → stata_code-0.7.2}/schema/run_result.schema.json +0 -0
  27. {stata_code-0.7.0 → stata_code-0.7.2}/scripts/check_versions.py +0 -0
  28. {stata_code-0.7.0 → stata_code-0.7.2}/scripts/export_schema.py +0 -0
  29. {stata_code-0.7.0 → stata_code-0.7.2}/stata_code/core/__init__.py +0 -0
  30. {stata_code-0.7.0 → stata_code-0.7.2}/stata_code/core/_pool.py +0 -0
  31. {stata_code-0.7.0 → stata_code-0.7.2}/stata_code/core/_refs.py +0 -0
  32. {stata_code-0.7.0 → stata_code-0.7.2}/stata_code/core/_runtime.py +0 -0
  33. {stata_code-0.7.0 → stata_code-0.7.2}/stata_code/core/errors.py +0 -0
  34. {stata_code-0.7.0 → stata_code-0.7.2}/stata_code/core/log_artifacts.py +0 -0
  35. {stata_code-0.7.0 → stata_code-0.7.2}/stata_code/core/notebook.py +0 -0
  36. {stata_code-0.7.0 → stata_code-0.7.2}/stata_code/core/run_index.py +0 -0
  37. {stata_code-0.7.0 → stata_code-0.7.2}/stata_code/core/schema.py +0 -0
  38. {stata_code-0.7.0 → stata_code-0.7.2}/stata_code/kernel/__init__.py +0 -0
  39. {stata_code-0.7.0 → stata_code-0.7.2}/stata_code/kernel/__main__.py +0 -0
  40. {stata_code-0.7.0 → stata_code-0.7.2}/stata_code/kernel/assets/logo-32x32.png +0 -0
  41. {stata_code-0.7.0 → stata_code-0.7.2}/stata_code/kernel/assets/logo-64x64.png +0 -0
  42. {stata_code-0.7.0 → stata_code-0.7.2}/stata_code/kernel/assets/logo-svg.svg +0 -0
  43. {stata_code-0.7.0 → stata_code-0.7.2}/stata_code/mcp/__init__.py +0 -0
  44. {stata_code-0.7.0 → stata_code-0.7.2}/stata_code/mcp/__main__.py +0 -0
  45. {stata_code-0.7.0 → stata_code-0.7.2}/tests/__init__.py +0 -0
  46. {stata_code-0.7.0 → stata_code-0.7.2}/tests/conftest.py +0 -0
  47. {stata_code-0.7.0 → stata_code-0.7.2}/tests/fixtures/.gitkeep +0 -0
  48. {stata_code-0.7.0 → stata_code-0.7.2}/tests/test_cancel.py +0 -0
  49. {stata_code-0.7.0 → stata_code-0.7.2}/tests/test_errors.py +0 -0
  50. {stata_code-0.7.0 → stata_code-0.7.2}/tests/test_log_artifacts.py +0 -0
  51. {stata_code-0.7.0 → stata_code-0.7.2}/tests/test_mcp.py +0 -0
  52. {stata_code-0.7.0 → stata_code-0.7.2}/tests/test_mcp_stdio.py +0 -0
  53. {stata_code-0.7.0 → stata_code-0.7.2}/tests/test_notebook.py +0 -0
  54. {stata_code-0.7.0 → stata_code-0.7.2}/tests/test_notebook_phase2.py +0 -0
  55. {stata_code-0.7.0 → stata_code-0.7.2}/tests/test_pool.py +0 -0
  56. {stata_code-0.7.0 → stata_code-0.7.2}/tests/test_public_api.py +0 -0
  57. {stata_code-0.7.0 → stata_code-0.7.2}/tests/test_release_versions.py +0 -0
  58. {stata_code-0.7.0 → stata_code-0.7.2}/tests/test_run_index.py +0 -0
  59. {stata_code-0.7.0 → stata_code-0.7.2}/tests/test_runtime_discovery.py +0 -0
  60. {stata_code-0.7.0 → stata_code-0.7.2}/tests/test_schema.py +0 -0
  61. {stata_code-0.7.0 → stata_code-0.7.2}/tests/test_schema_artifact.py +0 -0
@@ -223,6 +223,8 @@ log-files/
223
223
  *.smcl
224
224
  *.dta
225
225
  !tests/fixtures/*.dta
226
+ # Graph-export artifact written by the runner graph-capture tests
227
+ stata_code_test_export.png
226
228
 
227
229
  # macOS
228
230
  .DS_Store
@@ -6,6 +6,66 @@ to semver-major.minor for the result schema (see `SCHEMA.md` §6).
6
6
 
7
7
  ## Unreleased
8
8
 
9
+ ## 0.7.2 — 2026-06-20
10
+
11
+ ### Added
12
+
13
+ - **Three convenience MCP tools** raise the tool surface from 15 to 18:
14
+ - `install_package(name, source?, url?, replace?, session_id?)` — installs a
15
+ community package via `ssc install` / `net install` without the agent
16
+ having to remember the syntax, then verifies it resolves with `which`.
17
+ Package names and URLs are validated to keep them out of the generated
18
+ command line; failures surface the typed `error` block (e.g. `network`).
19
+ - `search_log(ref, pattern, is_regex?, ignore_case?, context?, max_matches?)`
20
+ — greps within a truncated `log://` payload and returns only the matching
21
+ lines (with optional context), so a long log can be inspected without
22
+ pulling the whole transcript back through `get_log`.
23
+ - `inspect_data(varlist?, detail?, session_id?)` — runs `describe` +
24
+ `codebook` and returns the structured `dataset` block plus the codebook
25
+ log: a one-call "what's in this dataset" the agent doesn't have to spell out.
26
+ - **On-demand Stata reference library** under `skills/stata-code/references/`
27
+ (~4,200 lines): topic files for core syntax, data management, econometrics,
28
+ causal inference, panel/time series, graphics, and table export; load-bearing
29
+ `error-codes.md` (the full `rc → kind → fix` table + self-repair loop, aligned
30
+ with the typed-error taxonomy) and `defensive-coding.md`; and per-package notes
31
+ for `reghdfe`, `coefplot`, `estout`, and `gtools`. `SKILL.md` gained a routing
32
+ table (read 1–3 files on demand) and a live-vs-offline execution-mode section.
33
+ - **`scripts/build_skill_zip.py`** packages the skill into a deterministic
34
+ `build/stata-code-skill.zip` for upload as Claude.ai project knowledge.
35
+
36
+ ## 0.7.1 — 2026-06-19
37
+
38
+ ### Fixed
39
+
40
+ - **Jupyter kernel: graphs after the first cell now display.** Graph capture
41
+ detected new graphs by diffing in-memory graph names before/after a run.
42
+ Because Stata keeps only one graph per name and every unnamed graph command
43
+ overwrites the default `Graph` in place, the second and later cells of a
44
+ persistent session produced no name delta and their graphs were silently
45
+ dropped — only the first cell's graph ever rendered. Capture now also
46
+ re-exports any graph the cell's own source shows it (re)drew (every
47
+ `name(...)` target, plus the default `Graph` for any unnamed graph command),
48
+ so in-place redraws surface every time. The same fix covers repeated MCP
49
+ `stata_run` calls in one session. The graph-command detector was tightened
50
+ to distinguish drawing commands from `graph` utility subcommands (`export`,
51
+ `display`, `dir`, `drop`, …) so a utility-only cell no longer re-surfaces a
52
+ stale graph.
53
+ - **Jupyter kernel: no more duplicated code echo in cell output.** pystata
54
+ runs a multi-line cell as a temporary do-file, and Stata echoes every
55
+ submitted command (`. cmd` / `> continuation`) regardless of `echo=False`
56
+ (which only suppresses echo for a single inline command). For a cell with no
57
+ textual output (e.g. a graph) that echo was the *only* thing shown — a
58
+ useless repeat of the source already visible in the input cell. The kernel
59
+ now strips command-echo lines before streaming, keeping genuine command
60
+ output. The full log (with echo) is unchanged in `RunResult.log` for MCP /
61
+ agent consumers.
62
+
63
+ ### Changed
64
+
65
+ - **VS Code extension now ships a Marketplace icon** (coef-plot mark, Anthropic
66
+ palette on white) so the listing and Extensions sidebar render branded
67
+ artwork instead of the default placeholder.
68
+
9
69
  ## 0.7.0 — 2026-05-30
10
70
 
11
71
  ### Added
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: stata-code
3
- Version: 0.7.0
3
+ Version: 0.7.2
4
4
  Summary: Agent-native Stata bridge — one core, multiple frontends (MCP, Jupyter, VSCode)
5
5
  Project-URL: Homepage, https://github.com/brycewang-stanford/stata-code
6
6
  Project-URL: Repository, https://github.com/brycewang-stanford/stata-code
@@ -84,9 +84,9 @@ Description-Content-Type: text/markdown
84
84
  └─────────────┘ └────────────┘ └─────────────────┘
85
85
  ```
86
86
 
87
- **Status: v0.6 (May 2026)** — the core, MCP server, Jupyter kernel, and VS Code extension work end-to-end against Stata 18 MP. The test suite covers schema, runner, MCP, kernel, notebook, run-index, subprocess-pool, and VS Code modules; CI also checks linting, type safety, schema generation, package metadata, and VSIX packaging. License: **MIT**.
87
+ **Status: v0.7 (May 2026)** — the core, MCP server, Jupyter kernel, and VS Code extension work end-to-end against Stata 18 MP. The test suite covers schema, runner, MCP, kernel, notebook, run-index, subprocess-pool, and VS Code modules; CI also checks linting, type safety, schema generation, package metadata, and VSIX packaging. License: **MIT**.
88
88
 
89
- Two workflows v0.6 explicitly supports for end users:
89
+ Two workflows the current release explicitly supports for end users:
90
90
 
91
91
  - **Run Stata code from a Jupyter notebook.** `pip install "stata-code[kernel]"` + `stata-code-kernel install --user` registers a **Stata** kernel that the Jupyter Notebook UI, JupyterLab, and the VS Code Jupyter extension all pick up by name. Cells render Stata logs, graphs, and warnings inline (the kernel logo bundled since v0.5 makes it appear in VS Code's kernel picker too). See [As a Jupyter Kernel](#as-a-jupyter-kernel).
92
92
  - **Optional agent "fix and rerun" loop.** `stata_run` returns typed `error.kind/line/context` plus `suggestions` on every failure. By default Claude Code only reports diagnostics — but if you explicitly say "fix this and rerun until it passes", the agent uses the same fields to edit your `.do` file and re-call `stata_run` until the run is green. The repair loop is **opt-in**: failed runs are diagnostics first, not automatic rewrite permission. See [Error Recovery in Agent Workflows](#error-recovery-in-agent-workflows).
@@ -188,7 +188,7 @@ claude mcp add stata-code --scope local -- stata-code-mcp
188
188
  claude mcp add stata-code --scope project -- stata-code-mcp
189
189
  ```
190
190
 
191
- Then launch `claude` and type `/mcp` to confirm `stata-code` shows up with its 15 tools (`stata_run`, `stata_info`, `get_log`, `get_graph`, `get_matrix`, `list_sessions`, `cancel_session`, `reset_session`, `notebook_outline`, `notebook_get_cell`, `notebook_locate`, `notebook_edit_cell`, `notebook_insert_cell`, `notebook_delete_cell`, `list_runs`).
191
+ Then launch `claude` and type `/mcp` to confirm `stata-code` shows up with its 18 tools (`stata_run`, `stata_info`, `get_log`, `search_log`, `get_graph`, `get_matrix`, `inspect_data`, `install_package`, `list_sessions`, `cancel_session`, `reset_session`, `notebook_outline`, `notebook_get_cell`, `notebook_locate`, `notebook_edit_cell`, `notebook_insert_cell`, `notebook_delete_cell`, `list_runs`).
192
192
 
193
193
  #### Error Recovery in Agent Workflows
194
194
 
@@ -276,15 +276,18 @@ If an OpenAI-backed client reports `API Error: 400 Invalid schema for function
276
276
  upgrade to `stata-code>=0.6.5`, then restart the MCP client. Older server
277
277
  processes keep advertising the stale schema until they are restarted.
278
278
 
279
- The MCP server registers 15 tools:
279
+ The MCP server registers 18 tools:
280
280
 
281
281
  | Tool | Purpose |
282
282
  | --- | --- |
283
283
  | `stata_run` | Execute Stata code and return a v1.0 RunResult JSON |
284
284
  | `stata_info` | Report Stata edition, version, and capabilities |
285
285
  | `get_log` | Fetch the full log behind a `log://` ref |
286
+ | `search_log` | Search matching lines inside a stored `log://` payload |
286
287
  | `get_graph` | Fetch graph bytes behind a `graph://` ref (`ImageContent`) |
287
288
  | `get_matrix` | Fetch matrix payloads behind a `matrix://` ref |
289
+ | `inspect_data` | Run `describe` + `codebook` and return compact dataset metadata |
290
+ | `install_package` | Install an SSC or explicit `net install` package and verify it resolves |
288
291
  | `list_sessions` | Enumerate live sessions |
289
292
  | `cancel_session` | Cancel a session; the subprocess-backed path terminates in-flight runs and short-circuits pending ones |
290
293
  | `reset_session` | Drop a session's data |
@@ -416,7 +419,7 @@ stata_code/
416
419
  │ ├── runner.py # in-process execute(); collects everything via sfi
417
420
  │ └── _pool.py # subprocess workers for public API / MCP hard timeouts
418
421
  ├── mcp/
419
- │ └── server.py # MCP server (15 tools)
422
+ │ └── server.py # MCP server (18 tools)
420
423
  └── kernel/
421
424
  └── kernel.py # Jupyter kernel
422
425
  ```
@@ -444,7 +447,7 @@ stata_code/
444
447
 
445
448
  ## Roadmap
446
449
 
447
- ### Done (through v0.6 — May 2026)
450
+ ### Done (through v0.7 — May 2026)
448
451
 
449
452
  - v1.0 result schema ([SCHEMA.md](SCHEMA.md))
450
453
  - `pystata`-based runner with native-typed `r()`, `e()`, and matrices
@@ -454,7 +457,7 @@ stata_code/
454
457
  - Log truncation with ref store
455
458
  - Warning extraction: 5 categories + generic notes
456
459
  - 32-kind error taxonomy with canonical suggestions
457
- - MCP server: 15 tools, including notebook navigation / search / atomic edits and the run-bundle index (`list_runs`)
460
+ - MCP server: 18 tools, including notebook navigation / search / atomic edits, the run-bundle index (`list_runs`), log grep (`search_log`), dataset inspection (`inspect_data`), and package installation (`install_package`)
458
461
  - Jupyter kernel: rewired to the v1.0 pipeline, kernel logos bundled
459
462
  - Matrix size cap + `get_matrix(ref)` for large matrices (>10k cells)
460
463
  - Subprocess-backed hard timeout and cancellation for the public Python API and MCP server: `timeout_ms`, `cancel(session_id)`, and MCP `cancel_session`
@@ -45,9 +45,9 @@
45
45
  └─────────────┘ └────────────┘ └─────────────────┘
46
46
  ```
47
47
 
48
- **Status: v0.6 (May 2026)** — the core, MCP server, Jupyter kernel, and VS Code extension work end-to-end against Stata 18 MP. The test suite covers schema, runner, MCP, kernel, notebook, run-index, subprocess-pool, and VS Code modules; CI also checks linting, type safety, schema generation, package metadata, and VSIX packaging. License: **MIT**.
48
+ **Status: v0.7 (May 2026)** — the core, MCP server, Jupyter kernel, and VS Code extension work end-to-end against Stata 18 MP. The test suite covers schema, runner, MCP, kernel, notebook, run-index, subprocess-pool, and VS Code modules; CI also checks linting, type safety, schema generation, package metadata, and VSIX packaging. License: **MIT**.
49
49
 
50
- Two workflows v0.6 explicitly supports for end users:
50
+ Two workflows the current release explicitly supports for end users:
51
51
 
52
52
  - **Run Stata code from a Jupyter notebook.** `pip install "stata-code[kernel]"` + `stata-code-kernel install --user` registers a **Stata** kernel that the Jupyter Notebook UI, JupyterLab, and the VS Code Jupyter extension all pick up by name. Cells render Stata logs, graphs, and warnings inline (the kernel logo bundled since v0.5 makes it appear in VS Code's kernel picker too). See [As a Jupyter Kernel](#as-a-jupyter-kernel).
53
53
  - **Optional agent "fix and rerun" loop.** `stata_run` returns typed `error.kind/line/context` plus `suggestions` on every failure. By default Claude Code only reports diagnostics — but if you explicitly say "fix this and rerun until it passes", the agent uses the same fields to edit your `.do` file and re-call `stata_run` until the run is green. The repair loop is **opt-in**: failed runs are diagnostics first, not automatic rewrite permission. See [Error Recovery in Agent Workflows](#error-recovery-in-agent-workflows).
@@ -149,7 +149,7 @@ claude mcp add stata-code --scope local -- stata-code-mcp
149
149
  claude mcp add stata-code --scope project -- stata-code-mcp
150
150
  ```
151
151
 
152
- Then launch `claude` and type `/mcp` to confirm `stata-code` shows up with its 15 tools (`stata_run`, `stata_info`, `get_log`, `get_graph`, `get_matrix`, `list_sessions`, `cancel_session`, `reset_session`, `notebook_outline`, `notebook_get_cell`, `notebook_locate`, `notebook_edit_cell`, `notebook_insert_cell`, `notebook_delete_cell`, `list_runs`).
152
+ Then launch `claude` and type `/mcp` to confirm `stata-code` shows up with its 18 tools (`stata_run`, `stata_info`, `get_log`, `search_log`, `get_graph`, `get_matrix`, `inspect_data`, `install_package`, `list_sessions`, `cancel_session`, `reset_session`, `notebook_outline`, `notebook_get_cell`, `notebook_locate`, `notebook_edit_cell`, `notebook_insert_cell`, `notebook_delete_cell`, `list_runs`).
153
153
 
154
154
  #### Error Recovery in Agent Workflows
155
155
 
@@ -237,15 +237,18 @@ If an OpenAI-backed client reports `API Error: 400 Invalid schema for function
237
237
  upgrade to `stata-code>=0.6.5`, then restart the MCP client. Older server
238
238
  processes keep advertising the stale schema until they are restarted.
239
239
 
240
- The MCP server registers 15 tools:
240
+ The MCP server registers 18 tools:
241
241
 
242
242
  | Tool | Purpose |
243
243
  | --- | --- |
244
244
  | `stata_run` | Execute Stata code and return a v1.0 RunResult JSON |
245
245
  | `stata_info` | Report Stata edition, version, and capabilities |
246
246
  | `get_log` | Fetch the full log behind a `log://` ref |
247
+ | `search_log` | Search matching lines inside a stored `log://` payload |
247
248
  | `get_graph` | Fetch graph bytes behind a `graph://` ref (`ImageContent`) |
248
249
  | `get_matrix` | Fetch matrix payloads behind a `matrix://` ref |
250
+ | `inspect_data` | Run `describe` + `codebook` and return compact dataset metadata |
251
+ | `install_package` | Install an SSC or explicit `net install` package and verify it resolves |
249
252
  | `list_sessions` | Enumerate live sessions |
250
253
  | `cancel_session` | Cancel a session; the subprocess-backed path terminates in-flight runs and short-circuits pending ones |
251
254
  | `reset_session` | Drop a session's data |
@@ -377,7 +380,7 @@ stata_code/
377
380
  │ ├── runner.py # in-process execute(); collects everything via sfi
378
381
  │ └── _pool.py # subprocess workers for public API / MCP hard timeouts
379
382
  ├── mcp/
380
- │ └── server.py # MCP server (15 tools)
383
+ │ └── server.py # MCP server (18 tools)
381
384
  └── kernel/
382
385
  └── kernel.py # Jupyter kernel
383
386
  ```
@@ -405,7 +408,7 @@ stata_code/
405
408
 
406
409
  ## Roadmap
407
410
 
408
- ### Done (through v0.6 — May 2026)
411
+ ### Done (through v0.7 — May 2026)
409
412
 
410
413
  - v1.0 result schema ([SCHEMA.md](SCHEMA.md))
411
414
  - `pystata`-based runner with native-typed `r()`, `e()`, and matrices
@@ -415,7 +418,7 @@ stata_code/
415
418
  - Log truncation with ref store
416
419
  - Warning extraction: 5 categories + generic notes
417
420
  - 32-kind error taxonomy with canonical suggestions
418
- - MCP server: 15 tools, including notebook navigation / search / atomic edits and the run-bundle index (`list_runs`)
421
+ - MCP server: 18 tools, including notebook navigation / search / atomic edits, the run-bundle index (`list_runs`), log grep (`search_log`), dataset inspection (`inspect_data`), and package installation (`install_package`)
419
422
  - Jupyter kernel: rewired to the v1.0 pipeline, kernel logos bundled
420
423
  - Matrix size cap + `get_matrix(ref)` for large matrices (>10k cells)
421
424
  - Subprocess-backed hard timeout and cancellation for the public Python API and MCP server: `timeout_ms`, `cancel(session_id)`, and MCP `cancel_session`
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "stata-code"
7
- version = "0.7.0"
7
+ version = "0.7.2"
8
8
  description = "Agent-native Stata bridge — one core, multiple frontends (MCP, Jupyter, VSCode)"
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -0,0 +1,105 @@
1
+ """Package the ``stata-code`` skill into a single uploadable ``.zip``.
2
+
3
+ The skill (``skills/stata-code/SKILL.md`` + the ``references/`` library) is
4
+ consumed two ways:
5
+
6
+ * In-repo / Claude Code — read straight from ``skills/stata-code/``.
7
+ * Claude.ai project knowledge — uploaded as a ``.zip``. This script builds
8
+ that archive.
9
+
10
+ The archive contains a single top-level ``stata-code/`` folder so it extracts
11
+ cleanly::
12
+
13
+ stata-code/SKILL.md
14
+ stata-code/references/econometrics.md
15
+ stata-code/references/packages/reghdfe.md
16
+ ...
17
+
18
+ Run::
19
+
20
+ python scripts/build_skill_zip.py # -> build/stata-code-skill.zip
21
+ python scripts/build_skill_zip.py -o /tmp/out.zip # custom destination
22
+
23
+ The build is deterministic (sorted entries, fixed timestamps) so re-running it
24
+ on unchanged inputs produces a byte-identical archive.
25
+ """
26
+
27
+ from __future__ import annotations
28
+
29
+ import argparse
30
+ import sys
31
+ import zipfile
32
+ from pathlib import Path
33
+
34
+ REPO_ROOT = Path(__file__).resolve().parent.parent
35
+ SKILL_DIR = REPO_ROOT / "skills" / "stata-code"
36
+ DEFAULT_OUTPUT = REPO_ROOT / "build" / "stata-code-skill.zip"
37
+ ARCHIVE_PREFIX = "stata-code"
38
+
39
+ # Fixed timestamp for reproducible archives (zip epoch starts at 1980).
40
+ _FIXED_DATE_TIME = (1980, 1, 1, 0, 0, 0)
41
+
42
+
43
+ def collect_files(skill_dir: Path = SKILL_DIR) -> list[Path]:
44
+ """Return every shippable skill file, sorted, relative-stable.
45
+
46
+ Excludes editor/OS cruft so the archive is clean.
47
+ """
48
+ if not skill_dir.is_dir():
49
+ raise FileNotFoundError(f"skill directory not found: {skill_dir}")
50
+ skip = {".DS_Store"}
51
+ files = [
52
+ p
53
+ for p in skill_dir.rglob("*")
54
+ if p.is_file() and p.name not in skip and "__pycache__" not in p.parts
55
+ ]
56
+ return sorted(files)
57
+
58
+
59
+ def build_zip(
60
+ dest: Path = DEFAULT_OUTPUT,
61
+ skill_dir: Path = SKILL_DIR,
62
+ ) -> list[str]:
63
+ """Write the skill archive to ``dest``; return the arcnames included."""
64
+ files = collect_files(skill_dir)
65
+ if not files:
66
+ raise FileNotFoundError(f"no skill files under {skill_dir}")
67
+
68
+ dest.parent.mkdir(parents=True, exist_ok=True)
69
+ arcnames: list[str] = []
70
+ with zipfile.ZipFile(dest, "w", compression=zipfile.ZIP_DEFLATED) as zf:
71
+ for path in files:
72
+ rel = path.relative_to(skill_dir).as_posix()
73
+ arcname = f"{ARCHIVE_PREFIX}/{rel}"
74
+ info = zipfile.ZipInfo(arcname, date_time=_FIXED_DATE_TIME)
75
+ info.compress_type = zipfile.ZIP_DEFLATED
76
+ info.external_attr = 0o644 << 16 # regular file, rw-r--r--
77
+ zf.writestr(info, path.read_bytes())
78
+ arcnames.append(arcname)
79
+ return arcnames
80
+
81
+
82
+ def main() -> int:
83
+ parser = argparse.ArgumentParser(description=__doc__)
84
+ parser.add_argument(
85
+ "-o",
86
+ "--output",
87
+ type=Path,
88
+ default=DEFAULT_OUTPUT,
89
+ help=f"Destination .zip (default: {DEFAULT_OUTPUT.relative_to(REPO_ROOT)}).",
90
+ )
91
+ args = parser.parse_args()
92
+
93
+ try:
94
+ arcnames = build_zip(args.output)
95
+ except FileNotFoundError as exc:
96
+ print(f"error: {exc}", file=sys.stderr)
97
+ return 1
98
+
99
+ size = args.output.stat().st_size
100
+ print(f"wrote: {args.output} ({len(arcnames)} files, {size:,} bytes)")
101
+ return 0
102
+
103
+
104
+ if __name__ == "__main__":
105
+ sys.exit(main())
@@ -174,7 +174,7 @@ def is_available() -> bool:
174
174
  return True
175
175
 
176
176
 
177
- __version__ = "0.7.0"
177
+ __version__ = "0.7.2"
178
178
 
179
179
  __all__ = [
180
180
  # Primary entry points
@@ -218,6 +218,107 @@ def get_log(ref: str) -> dict[str, Any]:
218
218
  }
219
219
 
220
220
 
221
+ def search_log(
222
+ ref: str,
223
+ pattern: str,
224
+ *,
225
+ is_regex: bool = False,
226
+ ignore_case: bool = True,
227
+ context: int = 0,
228
+ max_matches: int = 50,
229
+ ) -> dict[str, Any]:
230
+ """Auxiliary tool: grep within a stored ``log://`` payload.
231
+
232
+ Pairs with the token-economy default of returning long logs by
233
+ reference: instead of pulling the whole log back with
234
+ :func:`get_log`, the agent can find just the lines it cares about.
235
+
236
+ Parameters
237
+ ----------
238
+ ref : str
239
+ A ``log://<request_id>`` ref produced by a truncated ``stata_run``.
240
+ pattern : str
241
+ Substring (default) or regular expression (``is_regex=True``) to
242
+ match against each line.
243
+ is_regex : bool
244
+ Treat ``pattern`` as a Python regular expression. A malformed
245
+ regex raises :class:`ValueError` (surfaced as ``invalid_request``).
246
+ ignore_case : bool
247
+ Case-insensitive matching (default ``True``).
248
+ context : int
249
+ Lines of surrounding context to include on each side of a match
250
+ (capped at 10). ``before`` / ``after`` are omitted when 0.
251
+ max_matches : int
252
+ Stop after this many matches; ``truncated`` reports whether more
253
+ existed (capped at 1000).
254
+
255
+ Returns
256
+ -------
257
+ dict
258
+ ``{ref, pattern, is_regex, lines_total, match_count, truncated,
259
+ matches: [{line_no, text, before?, after?}]}``. ``line_no`` is
260
+ 1-based. Raises :class:`RefNotFound` for an unknown ref.
261
+ """
262
+ payload = _refs.get(ref)
263
+ if (
264
+ not isinstance(payload, dict)
265
+ or not isinstance(payload.get("text"), str)
266
+ or "lines_total" not in payload
267
+ ):
268
+ raise RefNotFound(ref, kind="unknown_log_ref")
269
+ if not pattern:
270
+ raise ValueError("pattern must be a non-empty string")
271
+
272
+ context = max(0, min(int(context), 10))
273
+ max_matches = max(1, min(int(max_matches), 1000))
274
+
275
+ flags = re.IGNORECASE if ignore_case else 0
276
+ if is_regex:
277
+ try:
278
+ matcher = re.compile(pattern, flags)
279
+ except re.error as exc:
280
+ raise ValueError(f"invalid regex: {exc}") from exc
281
+
282
+ def _hit(line: str) -> bool:
283
+ return matcher.search(line) is not None
284
+ else:
285
+ needle = pattern.lower() if ignore_case else pattern
286
+
287
+ def _hit(line: str) -> bool:
288
+ hay = line.lower() if ignore_case else line
289
+ return needle in hay
290
+
291
+ text: str = payload["text"]
292
+ lines = text.split("\n")
293
+ matches: list[dict[str, Any]] = []
294
+ truncated = False
295
+ for idx, line in enumerate(lines):
296
+ if not _hit(line):
297
+ continue
298
+ if len(matches) >= max_matches:
299
+ truncated = True
300
+ break
301
+ entry: dict[str, Any] = {"line_no": idx + 1, "text": line}
302
+ if context:
303
+ before = lines[max(0, idx - context):idx]
304
+ after = lines[idx + 1:idx + 1 + context]
305
+ if before:
306
+ entry["before"] = before
307
+ if after:
308
+ entry["after"] = after
309
+ matches.append(entry)
310
+
311
+ return {
312
+ "ref": ref,
313
+ "pattern": pattern,
314
+ "is_regex": is_regex,
315
+ "lines_total": payload["lines_total"],
316
+ "match_count": len(matches),
317
+ "truncated": truncated,
318
+ "matches": matches,
319
+ }
320
+
321
+
221
322
  def cancel(session_id: str = "main") -> bool:
222
323
  """Request cancellation of the next ``execute()`` call for ``session_id``.
223
324
 
@@ -1195,11 +1296,20 @@ def _extract_warnings(log: str) -> list: # list[StataWarning]
1195
1296
 
1196
1297
 
1197
1298
  _GRAPH_NAME_RE = re.compile(r"\bname\(\s*([A-Za-z_][A-Za-z0-9_]*)", re.IGNORECASE)
1299
+ # Stata's default in-memory graph name, (re)used by any graph command that
1300
+ # omits an explicit `name(...)` option. Capture/redraw detection keys off this.
1301
+ _DEFAULT_GRAPH_NAME = "Graph"
1302
+ # Commands that actually *draw* a graph (and thus create/overwrite an
1303
+ # in-memory graph). Deliberately excludes the `graph` utility subcommands
1304
+ # (export, display, dir, drop, describe, save, use, rename, set, copy, query,
1305
+ # replay) — those operate on existing graphs and must not be mistaken for a
1306
+ # redraw, or a bare `graph export` cell would spuriously re-surface a stale
1307
+ # graph.
1198
1308
  _GRAPH_COMMAND_RE = re.compile(
1199
1309
  r"^\s*(?:"
1200
- r"graph\s+\w+|"
1201
- r"twoway|scatter|line|connected|histogram|kdensity|lowess|lfit|qfit|"
1202
- r"coefplot|binscatter"
1310
+ r"graph\s+(?:bar|hbar|box|hbox|dot|pie|twoway|matrix|combine)\b|"
1311
+ r"twoway|scatter|line|connected|histogram|hist|kdensity|lpoly|lowess|"
1312
+ r"lfit|qfit|coefplot|binscatter|marginsplot"
1203
1313
  r")\b",
1204
1314
  re.IGNORECASE,
1205
1315
  )
@@ -1262,19 +1372,40 @@ def _collect_graphs(
1262
1372
  source_hints: dict[str, tuple[str, int]] | None = None,
1263
1373
  unnamed_source_hints: list[tuple[str, int]] | None = None,
1264
1374
  ) -> list[GraphInfo]:
1265
- """Capture graphs that user code newly created.
1375
+ """Capture graphs that user code newly created or redrew.
1266
1376
 
1267
1377
  Strategy: snapshot graph names before user code (`pre_existing`), call
1268
- after to find the post-existing list, take the set difference. For each
1269
- new graph: `graph display <name>` (makes it active), `graph export` to a
1270
- tmpfile, read bytes, store under a ref. Tmpfile is deleted after.
1378
+ after to find the post-existing list. Capture a graph when its name is
1379
+ genuinely new *or* when this cell's source shows it (re)drew that name.
1380
+
1381
+ The redraw case matters because Stata keeps only one in-memory graph per
1382
+ name, so a command that overwrites an existing name (most commonly the
1383
+ default ``Graph``, produced by any unnamed graph command) leaves the
1384
+ ``graph dir`` name set unchanged. A pure set-difference against
1385
+ `pre_existing` therefore misses it — which is why, in a persistent session
1386
+ (Jupyter cell 2+, repeated MCP runs), only the first graph ever surfaced.
1387
+
1388
+ For each captured graph: `graph display <name>` (makes it active),
1389
+ `graph export` to a tmpfile, read bytes, store under a ref. Tmpfile is
1390
+ deleted after.
1271
1391
  """
1272
1392
  after_names = _list_graph_names(rt)
1273
- new_names = [n for n in after_names if n not in pre_existing]
1274
- if not new_names:
1275
- return []
1276
1393
  source_hints = source_hints or {}
1277
1394
  unnamed_source_hints = unnamed_source_hints or []
1395
+
1396
+ # Names this cell explicitly drew, inferred from its source: every
1397
+ # `name(...)` option, plus the default graph when any unnamed graph
1398
+ # command ran. These are re-captured even if they already existed, so an
1399
+ # in-place redraw is not dropped.
1400
+ redrawn = set(source_hints)
1401
+ if unnamed_source_hints:
1402
+ redrawn.add(_DEFAULT_GRAPH_NAME)
1403
+
1404
+ new_names = [
1405
+ n for n in after_names if n not in pre_existing or n in redrawn
1406
+ ]
1407
+ if not new_names:
1408
+ return []
1278
1409
  unattributed_names = [n for n in new_names if n not in source_hints]
1279
1410
  unnamed_by_graph: dict[str, tuple[str, int]] = {}
1280
1411
  if len(unattributed_names) == len(unnamed_source_hints):
@@ -102,6 +102,35 @@ def _word_at_cursor(code: str, cursor_pos: int) -> tuple[str, int, int]:
102
102
  return code[start:end], start, end
103
103
 
104
104
 
105
+ def _strip_command_echo(log_text: str) -> str:
106
+ """Drop Stata's do-file command echo from a captured cell log.
107
+
108
+ pystata runs a multi-line cell as a temporary do-file, and Stata echoes
109
+ every submitted command — ``. cmd`` for the first line of each command and
110
+ ``> ...`` for wrapped/continued lines — regardless of the ``echo=False``
111
+ flag (which only suppresses echo for a single inline command). In a
112
+ notebook the input cell already shows the source, so the echo is pure
113
+ duplication; for a cell with no textual output (e.g. a graph) the echo is
114
+ the *only* thing shown, which reads as a useless repeat of the code.
115
+
116
+ Strip the echoed command/continuation lines, keep genuine command output,
117
+ and collapse the blank-line runs the removal leaves behind. Echoed lines
118
+ always start at column 0 with ``. `` (dot-space) or ``> `` (continuation);
119
+ real Stata output never begins that way, so this is safe.
120
+ """
121
+ kept: list[str] = []
122
+ for line in log_text.split("\n"):
123
+ if line.startswith(". ") or line.startswith("> "):
124
+ continue
125
+ # Collapse leading and consecutive blank lines left by removed echoes.
126
+ if not line.strip() and (not kept or not kept[-1].strip()):
127
+ continue
128
+ kept.append(line)
129
+ while kept and not kept[-1].strip():
130
+ kept.pop()
131
+ return "\n".join(kept)
132
+
133
+
105
134
  # ─────────────────────────────────────────────────────────────────────────────
106
135
  # Kernel
107
136
  # ─────────────────────────────────────────────────────────────────────────────
@@ -155,8 +184,9 @@ class StataKernel(_KernelBase):
155
184
  self._last_result = result
156
185
 
157
186
  if not silent:
158
- if result.log.head:
159
- self._stream("stdout", result.log.head + "\n")
187
+ log_text = _strip_command_echo(result.log.head) if result.log.head else ""
188
+ if log_text:
189
+ self._stream("stdout", log_text + "\n")
160
190
  if result.warnings:
161
191
  for w in result.warnings:
162
192
  self._stream("stderr", f"[{w.kind}] {w.message}\n")