stata-code 0.8.1__tar.gz → 0.9.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. {stata_code-0.8.1 → stata_code-0.9.0}/CHANGELOG.md +66 -0
  2. {stata_code-0.8.1 → stata_code-0.9.0}/PKG-INFO +9 -5
  3. {stata_code-0.8.1 → stata_code-0.9.0}/README.md +8 -4
  4. {stata_code-0.8.1 → stata_code-0.9.0}/SCHEMA.md +101 -20
  5. stata_code-0.9.0/docs/competitive-landscape.md +163 -0
  6. {stata_code-0.8.1 → stata_code-0.9.0}/docs/industry-leader-roadmap.md +15 -0
  7. {stata_code-0.8.1 → stata_code-0.9.0}/pyproject.toml +1 -1
  8. {stata_code-0.8.1 → stata_code-0.9.0}/schema/run_result.schema.json +284 -0
  9. {stata_code-0.8.1 → stata_code-0.9.0}/stata_code/__init__.py +41 -2
  10. {stata_code-0.8.1 → stata_code-0.9.0}/stata_code/cli.py +12 -0
  11. {stata_code-0.8.1 → stata_code-0.9.0}/stata_code/core/__init__.py +40 -1
  12. {stata_code-0.8.1 → stata_code-0.9.0}/stata_code/core/_pool.py +22 -19
  13. stata_code-0.9.0/stata_code/core/errors.py +802 -0
  14. stata_code-0.9.0/stata_code/core/estimation.py +377 -0
  15. stata_code-0.9.0/stata_code/core/handoff.py +96 -0
  16. stata_code-0.9.0/stata_code/core/provenance.py +250 -0
  17. {stata_code-0.8.1 → stata_code-0.9.0}/stata_code/core/runner.py +46 -51
  18. {stata_code-0.8.1 → stata_code-0.9.0}/stata_code/core/schema.py +96 -14
  19. {stata_code-0.8.1 → stata_code-0.9.0}/stata_code/doctor.py +170 -13
  20. {stata_code-0.8.1 → stata_code-0.9.0}/stata_code/mcp/server.py +1 -1
  21. {stata_code-0.8.1 → stata_code-0.9.0}/tests/test_doctor.py +99 -4
  22. stata_code-0.9.0/tests/test_errors.py +462 -0
  23. stata_code-0.9.0/tests/test_estimation.py +273 -0
  24. stata_code-0.9.0/tests/test_handoff.py +85 -0
  25. {stata_code-0.8.1 → stata_code-0.9.0}/tests/test_pool.py +22 -1
  26. stata_code-0.9.0/tests/test_provenance.py +193 -0
  27. stata_code-0.9.0/tests/test_real_stata.py +221 -0
  28. {stata_code-0.8.1 → stata_code-0.9.0}/tests/test_runner.py +16 -0
  29. {stata_code-0.8.1 → stata_code-0.9.0}/tests/test_schema.py +39 -2
  30. stata_code-0.8.1/stata_code/core/errors.py +0 -456
  31. stata_code-0.8.1/tests/test_errors.py +0 -231
  32. {stata_code-0.8.1 → stata_code-0.9.0}/.gitignore +0 -0
  33. {stata_code-0.8.1 → stata_code-0.9.0}/LICENSE +0 -0
  34. {stata_code-0.8.1 → stata_code-0.9.0}/LICENSE-POLICY.md +0 -0
  35. {stata_code-0.8.1 → stata_code-0.9.0}/PUBLISHING.md +0 -0
  36. {stata_code-0.8.1 → stata_code-0.9.0}/docs/design/hard_timeout.md +0 -0
  37. {stata_code-0.8.1 → stata_code-0.9.0}/examples/01-basic-regression.md +0 -0
  38. {stata_code-0.8.1 → stata_code-0.9.0}/examples/02-did-card-krueger.md +0 -0
  39. {stata_code-0.8.1 → stata_code-0.9.0}/examples/03-graphs.md +0 -0
  40. {stata_code-0.8.1 → stata_code-0.9.0}/examples/04-multi-session.md +0 -0
  41. {stata_code-0.8.1 → stata_code-0.9.0}/examples/05-large-matrix.md +0 -0
  42. {stata_code-0.8.1 → stata_code-0.9.0}/examples/06-cross-stack-parity-audit.md +0 -0
  43. {stata_code-0.8.1 → stata_code-0.9.0}/examples/07-data-mcp-handoff.md +0 -0
  44. {stata_code-0.8.1 → stata_code-0.9.0}/examples/README.md +0 -0
  45. {stata_code-0.8.1 → stata_code-0.9.0}/scripts/build_skill_zip.py +0 -0
  46. {stata_code-0.8.1 → stata_code-0.9.0}/scripts/check_github_actions.py +0 -0
  47. {stata_code-0.8.1 → stata_code-0.9.0}/scripts/check_versions.py +0 -0
  48. {stata_code-0.8.1 → stata_code-0.9.0}/scripts/export_schema.py +0 -0
  49. {stata_code-0.8.1 → stata_code-0.9.0}/stata_code/__main__.py +0 -0
  50. {stata_code-0.8.1 → stata_code-0.9.0}/stata_code/core/_refs.py +0 -0
  51. {stata_code-0.8.1 → stata_code-0.9.0}/stata_code/core/_runtime.py +0 -0
  52. {stata_code-0.8.1 → stata_code-0.9.0}/stata_code/core/log_artifacts.py +0 -0
  53. {stata_code-0.8.1 → stata_code-0.9.0}/stata_code/core/notebook.py +0 -0
  54. {stata_code-0.8.1 → stata_code-0.9.0}/stata_code/core/run_index.py +0 -0
  55. {stata_code-0.8.1 → stata_code-0.9.0}/stata_code/kernel/__init__.py +0 -0
  56. {stata_code-0.8.1 → stata_code-0.9.0}/stata_code/kernel/__main__.py +0 -0
  57. {stata_code-0.8.1 → stata_code-0.9.0}/stata_code/kernel/assets/logo-32x32.png +0 -0
  58. {stata_code-0.8.1 → stata_code-0.9.0}/stata_code/kernel/assets/logo-64x64.png +0 -0
  59. {stata_code-0.8.1 → stata_code-0.9.0}/stata_code/kernel/assets/logo-svg.svg +0 -0
  60. {stata_code-0.8.1 → stata_code-0.9.0}/stata_code/kernel/kernel.py +0 -0
  61. {stata_code-0.8.1 → stata_code-0.9.0}/stata_code/mcp/__init__.py +0 -0
  62. {stata_code-0.8.1 → stata_code-0.9.0}/stata_code/mcp/__main__.py +0 -0
  63. {stata_code-0.8.1 → stata_code-0.9.0}/tests/__init__.py +0 -0
  64. {stata_code-0.8.1 → stata_code-0.9.0}/tests/conftest.py +0 -0
  65. {stata_code-0.8.1 → stata_code-0.9.0}/tests/fixtures/.gitkeep +0 -0
  66. {stata_code-0.8.1 → stata_code-0.9.0}/tests/test_cancel.py +0 -0
  67. {stata_code-0.8.1 → stata_code-0.9.0}/tests/test_github_actions.py +0 -0
  68. {stata_code-0.8.1 → stata_code-0.9.0}/tests/test_kernel.py +0 -0
  69. {stata_code-0.8.1 → stata_code-0.9.0}/tests/test_log_artifacts.py +0 -0
  70. {stata_code-0.8.1 → stata_code-0.9.0}/tests/test_mcp.py +0 -0
  71. {stata_code-0.8.1 → stata_code-0.9.0}/tests/test_mcp_stdio.py +0 -0
  72. {stata_code-0.8.1 → stata_code-0.9.0}/tests/test_method_prompts.py +0 -0
  73. {stata_code-0.8.1 → stata_code-0.9.0}/tests/test_new_tools.py +0 -0
  74. {stata_code-0.8.1 → stata_code-0.9.0}/tests/test_notebook.py +0 -0
  75. {stata_code-0.8.1 → stata_code-0.9.0}/tests/test_notebook_phase2.py +0 -0
  76. {stata_code-0.8.1 → stata_code-0.9.0}/tests/test_public_api.py +0 -0
  77. {stata_code-0.8.1 → stata_code-0.9.0}/tests/test_release_versions.py +0 -0
  78. {stata_code-0.8.1 → stata_code-0.9.0}/tests/test_run_index.py +0 -0
  79. {stata_code-0.8.1 → stata_code-0.9.0}/tests/test_runtime_discovery.py +0 -0
  80. {stata_code-0.8.1 → stata_code-0.9.0}/tests/test_schema_artifact.py +0 -0
  81. {stata_code-0.8.1 → stata_code-0.9.0}/tests/test_skill_package.py +0 -0
@@ -4,6 +4,72 @@ All notable changes to `stata-code` are documented here. The format follows
4
4
  [Keep a Changelog](https://keepachangelog.com/en/1.1.0/); the project adheres
5
5
  to semver-major.minor for the result schema (see `SCHEMA.md` §6).
6
6
 
7
+ ## 0.9.0 — 2026-06-23
8
+
9
+ ### Fixed
10
+
11
+ - **Error-taxonomy correctness.** Audited the `_rc` → `ErrorKind` table against
12
+ StataCorp's `[P] error` manual (Stata 19) and corrected several
13
+ misclassifications: `not_sorted` is now `r(5)` (was the unrelated `r(119)`
14
+ "statement out of context" / `r(459)` "data is not…"); numlist errors
15
+ `r(122)`/`r(123)` are now `syntax` (were `invalid_name`); `r(322)` and
16
+ `r(1400)` map to `estimation_failure` (was `file_not_found` /
17
+ `estimation_sample_empty`); `r(480)` maps to `infeasible` (was
18
+ `out_of_memory`); local I/O `r(691)`–`r(693)` map to `file_io` (were
19
+ `network`). Misleading mappings for `r(9)`/`r(604)`/`r(615)`/`r(616)` were
20
+ removed (they fall through to `unknown` rather than assert a wrong kind).
21
+ - **Command "did you mean?" now fires.** The `command_not_found` (rc 199) name
22
+ extractor expected `"<X> unrecognized command"`, but Stata's actual message is
23
+ `"command <X> is unrecognized"` — so the fuzzy suggestion never matched in
24
+ practice (synthetic unit tests passed the name in directly and hid it). Fixed
25
+ the regex and added a real-Stata integration test so a typo like `regresss`
26
+ now surfaces "Did you mean `regress`?".
27
+
28
+ ### Added
29
+
30
+ - **Typed estimation contract.** `RunResult.results.estimation` now exposes a
31
+ frontend-neutral coefficient table derived from verified `r(table)` when
32
+ possible, or from inline `e(b)` / `e(V)` as a clearly marked fallback. New
33
+ public helpers `build_estimation_result()` and
34
+ `build_estimation_from_returns()` keep the contract unit-testable without
35
+ Stata. The contract also carries a coarse `command_family`
36
+ (ols/iv/gmm/panel/count/did/…) and command-aware `diagnostics` — identification
37
+ and specification tests surfaced from `e()` for the commands economists must
38
+ report (`ivreg2`/`ivreghdfe` weak-ID F and Hansen J, `xtabond2` AR(2)/Hansen,
39
+ `reghdfe` within-R²/absorbed FE, `xtreg` rho). Only scalars actually present in
40
+ `e()` are surfaced — never fabricated.
41
+ - **Machine-readable recovery contract.** `error.recovery` now classifies each
42
+ `ErrorKind` by failure domain and tells agents whether an unchanged retry,
43
+ code edit, or user/out-of-band action is likely needed. Synthetic timeout,
44
+ cancellation, and adapter-crash errors carry the same recovery metadata as
45
+ ordinary Stata errors.
46
+ - **Reproducibility provenance helpers.** New `Provenance`,
47
+ `build_provenance()`, and `build_reproducible_do()` helpers turn a completed
48
+ `RunResult` plus original code into a runtime provenance envelope and a
49
+ re-runnable `.do` script preamble with Stata `version`, `set more off`, and an
50
+ optional `set seed`. Provenance now also records **per-package dependencies**
51
+ parsed from the script (`extract_package_installs()` →
52
+ `Provenance.packages`: `ssc`/`net install` name, source, and `from()` URL),
53
+ and `build_submission_package()` assembles a self-contained
54
+ replication/journal-submission bundle (`analysis.do` + `PROVENANCE.json` +
55
+ a `README.md` manifest listing runtime, seed, and required community packages).
56
+ - **Data-MCP handoff verifier.** New `verify_dataset()` and `DatasetCheck`
57
+ helpers validate imported datasets against provider metadata such as expected
58
+ row count, variable count, observation bounds, and required variables.
59
+ - **`error.rc_label` is now populated for real Stata errors.** New
60
+ `RC_LABEL` table and `label_for_rc()` (public API) supply Stata's canonical
61
+ short message (e.g. `r(111)` → "variable not found") so agents have a stable,
62
+ transcript-independent descriptor to branch and group on. Unverified codes
63
+ yield an empty label rather than a guess.
64
+ - **More return codes classified** (shrinking `unknown`): real network codes
65
+ `r(2)`/`r(631)`/`r(672)`/`r(677)` → `network`; `r(688)` → `file_corrupt`;
66
+ `r(907)` → `stata_limit`; `r(950)` → `out_of_memory`; numlist `r(124)`–`r(127)`
67
+ → `syntax`.
68
+ - **Remediation suggestions for more error kinds.** `suggestions_for()` now
69
+ emits actionable hints for `network`, `infeasible`, `type_mismatch`,
70
+ `file_io`, `file_corrupt`, `permission`, `estimation_failure`, and
71
+ `matrix_missing`, so nearly every common failure ships a recovery hint.
72
+
7
73
  ## 0.8.1 — 2026-06-20
8
74
 
9
75
  ### Changed
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: stata-code
3
- Version: 0.8.1
3
+ Version: 0.9.0
4
4
  Summary: Agent-native Stata bridge — one core, multiple frontends (MCP, Jupyter, VSCode)
5
5
  Project-URL: Homepage, https://github.com/brycewang-stanford/stata-code
6
6
  Project-URL: Repository, https://github.com/brycewang-stanford/stata-code
@@ -184,12 +184,14 @@ Verify the local setup with the read-only doctor:
184
184
  stata-code doctor
185
185
  stata-code doctor --json # machine-readable output
186
186
  stata-code doctor --no-stata-probe # skip live Stata initialization
187
+ stata-code doctor --workspace /path/to/project --no-user-config-scan
187
188
  ```
188
189
 
189
190
  The doctor reports the package/Python version, MCP and Jupyter extras, `pystata`
190
- discovery, console scripts on `PATH`, client/VS Code configuration hints, and a
191
- best-effort Stata version/edition probe. It never edits shell, Stata, Claude, or
192
- VS Code config.
191
+ discovery, console scripts on `PATH`, common project/user MCP client config
192
+ files, client/VS Code configuration hints, and a best-effort Stata
193
+ version/edition probe. It never edits shell, Stata, Claude, Cursor, or VS Code
194
+ config.
193
195
 
194
196
  ---
195
197
 
@@ -418,7 +420,9 @@ If the extension or an MCP client cannot find the server, run
418
420
  `stata-code doctor --no-stata-probe` in the same Python environment. It reports
419
421
  whether `stata-code-mcp` is on `PATH` and suggests absolute-path or
420
422
  `python -m stata_code.mcp` fallbacks for GUI clients whose `PATH` differs from
421
- your shell.
423
+ your shell. It also reads common MCP config files in the current workspace and
424
+ user config directories so you can see whether a client is already wired to
425
+ `stata-code`.
422
426
 
423
427
  #### Cell and section conventions
424
428
 
@@ -145,12 +145,14 @@ Verify the local setup with the read-only doctor:
145
145
  stata-code doctor
146
146
  stata-code doctor --json # machine-readable output
147
147
  stata-code doctor --no-stata-probe # skip live Stata initialization
148
+ stata-code doctor --workspace /path/to/project --no-user-config-scan
148
149
  ```
149
150
 
150
151
  The doctor reports the package/Python version, MCP and Jupyter extras, `pystata`
151
- discovery, console scripts on `PATH`, client/VS Code configuration hints, and a
152
- best-effort Stata version/edition probe. It never edits shell, Stata, Claude, or
153
- VS Code config.
152
+ discovery, console scripts on `PATH`, common project/user MCP client config
153
+ files, client/VS Code configuration hints, and a best-effort Stata
154
+ version/edition probe. It never edits shell, Stata, Claude, Cursor, or VS Code
155
+ config.
154
156
 
155
157
  ---
156
158
 
@@ -379,7 +381,9 @@ If the extension or an MCP client cannot find the server, run
379
381
  `stata-code doctor --no-stata-probe` in the same Python environment. It reports
380
382
  whether `stata-code-mcp` is on `PATH` and suggests absolute-path or
381
383
  `python -m stata_code.mcp` fallbacks for GUI clients whose `PATH` differs from
382
- your shell.
384
+ your shell. It also reads common MCP config files in the current workspace and
385
+ user config directories so you can see whether a client is already wired to
386
+ `stata-code`.
383
387
 
384
388
  #### Cell and section conventions
385
389
 
@@ -77,7 +77,38 @@ Every successful or failed Stata execution returns one result object:
77
77
  }
78
78
  }
79
79
  },
80
- "last_estimation_cmd": "regress"
80
+ "last_estimation_cmd": "regress",
81
+ "estimation": {
82
+ "command": "regress",
83
+ "depvar": "mpg",
84
+ "n_obs": 74,
85
+ "df_model": 1,
86
+ "df_resid": null,
87
+ "statistic_kind": "z",
88
+ "source": "e_b_v",
89
+ "ci_level": 95.0,
90
+ "coefficients": [
91
+ {
92
+ "term": "weight",
93
+ "b": -0.006,
94
+ "se": null,
95
+ "statistic": null,
96
+ "p_value": null,
97
+ "ci_low": null,
98
+ "ci_high": null
99
+ },
100
+ {
101
+ "term": "_cons",
102
+ "b": 39.44,
103
+ "se": null,
104
+ "statistic": null,
105
+ "p_value": null,
106
+ "ci_low": null,
107
+ "ci_high": null
108
+ }
109
+ ],
110
+ "model_stats": {"N": 74, "df_m": 1, "r2": 0.219}
111
+ }
81
112
  },
82
113
 
83
114
  "dataset": {
@@ -140,7 +171,8 @@ A failed execution sets `ok: false`, `rc != 0`, and populates `error`:
140
171
 
141
172
  "results": { "r": {"scalars": {}, "macros": {}, "matrices": {}},
142
173
  "e": {"scalars": {}, "macros": {}, "matrices": {}},
143
- "last_estimation_cmd": null },
174
+ "last_estimation_cmd": null,
175
+ "estimation": null },
144
176
 
145
177
  "dataset": { "frame": "default", "n_obs": 74, "n_vars": 12, "changed": false,
146
178
  "filename": "auto.dta", "variables": null },
@@ -167,7 +199,13 @@ A failed execution sets `ok: false`, `rc != 0`, and populates `error`:
167
199
  "suggestions": [
168
200
  {"action": "Check the variable name. Did you mean `mpg`?",
169
201
  "command": "describe"}
170
- ]
202
+ ],
203
+ "recovery": {
204
+ "category": "user_code",
205
+ "retriable": false,
206
+ "needs_code_change": true,
207
+ "needs_user_input": false
208
+ }
171
209
  },
172
210
 
173
211
  "schema_version": "1.0",
@@ -315,6 +353,34 @@ Stata's `r()` and `e()` return dictionaries, structurally separated. Each follow
315
353
  | Field | Type | Notes |
316
354
  | --- | --- | --- |
317
355
  | `last_estimation_cmd` | `string \| null` | Mirrors `e(cmd)` for callers who don't want to dig into `e.macros`. After multi-command code, this reflects the *last* command that wrote to `e()`. `null` if no estimation has been performed. |
356
+ | `estimation` | `EstimationResult \| null` | Typed coefficient table derived from `r(table)` or `e(b)` / `e(V)`. `null` when no inline `e(b)` is available. |
357
+
358
+ **`EstimationResult` shape:**
359
+
360
+ | Field | Type | Notes |
361
+ | --- | --- | --- |
362
+ | `command` | `string \| null` | Mirrors `e(cmd)` when available; falls back to `last_estimation_cmd`. |
363
+ | `depvar` | `string \| null` | Mirrors `e(depvar)`. |
364
+ | `n_obs` | `int \| null` | Integer form of `e(N)` when available. |
365
+ | `df_model` | `number \| null` | Mirrors `e(df_m)`. |
366
+ | `df_resid` | `number \| null` | Mirrors `e(df_r)`. |
367
+ | `statistic_kind` | `"t" \| "z"` | Which statistic fills each coefficient's `statistic` field. |
368
+ | `source` | `"r_table" \| "e_b_v"` | `r_table` means values were copied from Stata's displayed `r(table)` after verifying its columns and `b` row match `e(b)`; `e_b_v` means point estimates come from `e(b)` and inference, when present, is computed from `e(V)` with a normal approximation. |
369
+ | `ci_level` | `number` | Confidence level used for `ci_low` / `ci_high`; currently `95.0`. |
370
+ | `coefficients` | `array<Coefficient>` | One row per term in `e(b)`. |
371
+ | `model_stats` | `dict<str, number \| null>` | High-signal subset of `e()` scalars such as `N`, `df_m`, `df_r`, `r2`, `F`, `chi2`, `ll`, and `rmse`. Full scalars remain under `results.e.scalars`. |
372
+
373
+ **`Coefficient` shape:**
374
+
375
+ | Field | Type | Notes |
376
+ | --- | --- | --- |
377
+ | `term` | `string` | Term / coefficient column name. |
378
+ | `b` | `number \| null` | Point estimate. |
379
+ | `se` | `number \| null` | Standard error when available. |
380
+ | `statistic` | `number \| null` | `t` or `z`, per `EstimationResult.statistic_kind`. |
381
+ | `p_value` | `number \| null` | Two-sided p-value when available. |
382
+ | `ci_low` | `number \| null` | Lower confidence interval bound when available. |
383
+ | `ci_high` | `number \| null` | Upper confidence interval bound when available. |
318
384
 
319
385
  **Empty is empty.** Sub-dicts are `{}` when Stata returned nothing — never absent, never `null`.
320
386
 
@@ -377,6 +443,7 @@ Populated iff `ok: false`. The schema's most important contribution to agent UX:
377
443
  | `varname` | `string \| null` | For `varname_not_found` and related, the variable name at issue. |
378
444
  | `name` | `string \| null` | For `name_conflict` and `invalid_name`, the conflicting/invalid name. |
379
445
  | `suggestions` | `array<Suggestion>` | Producer-supplied remediation hints. Empty when none apply. See below. |
446
+ | `recovery` | `Recovery \| null` | Machine-readable recovery contract for agents. Present on current producers; old or third-party producers may omit it, so consumers should handle `null`. |
380
447
 
381
448
  **`context` shape:**
382
449
 
@@ -399,36 +466,47 @@ Populated iff `ok: false`. The schema's most important contribution to agent UX:
399
466
 
400
467
  Suggestions are best-effort; agents should treat them as hints, not directives. A suggestion is not consent to mutate source files or silently retry changed code; consumers should apply fixes automatically only in workflows where the user requested repair or approved iteration. The `kind` enum below documents what suggestions are typically populated.
401
468
 
469
+ **`Recovery` shape:**
470
+
471
+ | Field | Type | Notes |
472
+ | --- | --- | --- |
473
+ | `category` | `"user_code" \| "data" \| "model" \| "resource" \| "environment" \| "internal" \| "unknown"` | Broad failure domain for routing. |
474
+ | `retriable` | `bool` | Whether re-running the exact same code may succeed. True mainly for transient environment or producer-side failures. |
475
+ | `needs_code_change` | `bool` | Whether the submitted Stata code must change to succeed. |
476
+ | `needs_user_input` | `bool` | Whether resolution likely requires a human or out-of-band action such as permissions, license/edition limits, or re-acquiring a corrupt file. |
477
+
402
478
  **`kind` enum (v1.0):**
403
479
 
480
+ rc(s) below cite StataCorp `[P] error` (Stata 19, 2025). The code is authoritative; this table is a readable mirror.
481
+
404
482
  | `kind` | Typical rc(s) | Notes / suggestion seed |
405
483
  | --- | --- | --- |
406
- | `syntax` | 9, 100, 101, 102, 103, 121, 130, 132, 197, 198 | Generic parser failure. No automatic suggestion. |
484
+ | `syntax` | 100, 101, 102, 103, 121–127, 130, 132, 197, 198 | Generic parser failure (incl. numlist errors 121–127). No automatic suggestion. |
407
485
  | `command_not_found` | 199 | Often resolved by `ssc install` or `net install`; suggestions populated when Stata reports a likely package name. |
408
486
  | `varname_not_found` | 111 | `varname` populated. Suggestions may include similar varnames from `dataset.variables`. |
409
- | `invalid_name` | 122, 123 | `name` populated. |
410
- | `type_mismatch` | 109, 408 | |
487
+ | `invalid_name` | (no dedicated rc) | Stata folds "invalid name" into r(198). `name` populated when constructed by a producer. |
488
+ | `type_mismatch` | 109, 408 | Suggestion: `destring`/`tostring`. |
411
489
  | `name_conflict` | 110 | `name` populated. Suggestion typically: `replace`. |
412
- | `not_sorted` | 119, 459 | Suggestion: `sort <varlist>`. |
490
+ | `not_sorted` | 5 | Suggestion: `sort <varlist>`. |
413
491
  | `convergence` | 430 | |
414
- | `infeasible` | 491 | Distinct from convergence: starting values not feasible. |
415
- | `estimation_sample_empty` | 1400, 2000 (in estimation context) | |
416
- | `estimation_failure` | 1401, 1402 | |
492
+ | `infeasible` | 480, 491 | Distinct from convergence: starting values not feasible (e.g. `nl`, `ml`). |
493
+ | `estimation_sample_empty` | (no dedicated rc) | Empty estimation samples surface as r(2000); producer-set otherwise. |
494
+ | `estimation_failure` | 322, 1400, 1401, 1402 | Postestimation/prefix saw an unexpected result, or numerical overflow. |
417
495
  | `no_estimation_results` | 301 | Common when calling `predict`/`margins` without prior estimation. |
418
496
  | `no_observations` | 2000, 2001 | |
419
497
  | `data_in_memory` | 4 | Suggestion: `clear`. |
420
498
  | `matrix_singular` | 506, 508 | Matrix not positive definite / not invertible. |
421
- | `matrix_conformability` | 503, 507 | Dimension mismatch. |
499
+ | `matrix_conformability` | 503, 507 | Dimension mismatch; 507 is a `matrix post` row/col name conflict kept in the matrix bucket. |
422
500
  | `matrix_missing` | 504 | Matrix has missing values. |
423
- | `file_not_found` | 322, 601 | `path` populated. |
501
+ | `file_not_found` | 601 | `path` populated. |
424
502
  | `file_exists` | 602 | `path` populated. Suggestion: pass `replace` option. |
425
- | `file_corrupt` | 604, 610 | `path` populated. Often "not a Stata file." |
426
- | `file_io` | 603, 691 (local) | `path` populated. Catch-all for open/read/write failures not otherwise classified. |
427
- | `network` | 691 (network), 692, 693 | URL fetches, network reads. |
428
- | `permission` | 608 | `path` populated. Includes Stata-license-limit errors (615/616 family that surface as permission denials). |
429
- | `encoding` | 615, 616 | Unicode / encoding-conversion failures. |
430
- | `stata_limit` | 901, 902, 903 | Edition / matsize / similar Stata-imposed caps. Distinct from OS OOM. Suggestion: `set maxvar` or upgrade edition. |
431
- | `out_of_memory` | 480, 909 | OS-level memory exhaustion. |
503
+ | `file_corrupt` | 610, 688 | `path` populated. "Not a Stata file" (610) or genuinely corrupt (688). |
504
+ | `file_io` | 603, 691, 692, 693 | `path` populated. Catch-all for open/read/write failures (691–693 are local filesystem I/O). |
505
+ | `network` | 2, 631, 672, 677 | Connection timed out / host not found / server refused / remote connection failed. |
506
+ | `permission` | 608 | `path` populated. File is read-only / not writable. |
507
+ | `encoding` | (no dedicated rc) | Unicode / encoding-conversion failures; producer-set. |
508
+ | `stata_limit` | 901, 902, 903, 907 | Edition / maxvar / width caps. Distinct from OS OOM. Suggestion: `set maxvar` or upgrade edition. |
509
+ | `out_of_memory` | 909, 950 | OS-level memory exhaustion. Suggestion: `compress`. |
432
510
  | `interrupt` | 1 | User Break / Ctrl-C from a frontend. |
433
511
  | `cancelled` | (synthetic `rc: -3`) | Cancellation was requested. Subprocess-backed producers may terminate an in-flight worker; the direct in-process runner only short-circuits before Stata receives code. |
434
512
  | `timeout` | (synthetic `rc: -2`) | Adapter-imposed time limit exceeded. |
@@ -607,6 +685,8 @@ This section tracks how much of the schema is wired up in code. Not normative
607
685
  emit a `matrix://<request_id>/<r|e>/<name>` ref instead, retrievable
608
686
  via `get_matrix(ref)`.
609
687
  - `results.last_estimation_cmd` (mirrors `e(cmd)`).
688
+ - `results.estimation` typed coefficient table, copied from verified
689
+ `r(table)` when possible and otherwise derived from inline `e(b)` / `e(V)`.
610
690
  - `dataset` block — `n_obs`, `n_vars`, `frame`, `changed`, `filename`,
611
691
  and `variables` (capped at 200 entries).
612
692
  - `graphs[]` with `ref` + on-disk capture pipeline; format restricted to
@@ -617,7 +697,8 @@ This section tracks how much of the schema is wired up in code. Not normative
617
697
  extracted from Stata's English error text by regex, structured
618
698
  `context` (`{before, failing, after}`), `commands_executed` parsed
619
699
  from pystata's multi-line transcript, `suggestions` generated by
620
- `core.errors.suggestions_for`.
700
+ `core.errors.suggestions_for`, and `recovery` generated by
701
+ `core.errors.recovery_for`.
621
702
  - `request_id` (uuid4 hex), `started_at` (ISO 8601 UTC ms),
622
703
  `stata_elapsed_ms`, `capabilities`.
623
704
  - Multi-session via Stata frames — `session_id="main"` ↔ `default`
@@ -0,0 +1,163 @@
1
+ # Competitive Landscape & Long-Term Goals
2
+
3
+ Last updated: 2026-06-23
4
+
5
+ This document is the **evidence base** behind
6
+ [industry-leader-roadmap.md](industry-leader-roadmap.md). The roadmap says *what
7
+ we will build*; this file says *who else is in the market, where the open lane
8
+ is, and which long-term bets follow from that*. Keep the two in sync: when the
9
+ landscape shifts, update this file first, then re-derive the roadmap.
10
+
11
+ Star counts, install counts, and versions below are point-in-time reads from
12
+ June 2026 and will drift. Treat them as relative signal, not live data.
13
+
14
+ ## North-star positioning
15
+
16
+ > `stata-code` should be the most **reliable, agent-native, typed** way to run,
17
+ > inspect, repair, and audit Stata — winning on *fidelity to the authoritative
18
+ > Stata runtime* and *referee-grade reproducibility*, not on method count or
19
+ > raw breadth.
20
+
21
+ The single fact that defines our lane: **no competitor ships all three of**
22
+
23
+ 1. a typed **error taxonomy** (stable `error.kind` values an agent branches on,
24
+ not return codes + red text it must string-match);
25
+ 2. **typed `r()` / `e()` result contracts** (not a generic results dump); and
26
+ 3. **token-efficient by-reference artifacts** for logs, graphs, *and* matrices.
27
+
28
+ Everything else is either AGPL/GPL-licensed, editor-bound, raw-log-only, or a
29
+ Python/R reimplementation that does not touch the real Stata runtime.
30
+
31
+ ## The field (June 2026)
32
+
33
+ | Tool | License | Exec | Structured out | Typed error kinds | Frontends | Maturity |
34
+ | --- | --- | --- | --- | --- | --- | --- |
35
+ | **stata-code** (this) | MIT | pystata | typed `RunResult` + `r()/e()` | **yes** | MCP + kernel + VS Code | pre-1.0 |
36
+ | tmonk/mcp-stata + workbench | AGPL-3.0 | pystata | JSON + `r()/e()/s()` | no (rc + red text) | MCP + ext | ~69★, very active |
37
+ | hanlulong/stata-mcp | MIT | pystata | no (filtered raw log) | no | VS Code + MCP | ~440★, ~15.6k installs |
38
+ | SepineTam/mcp-for-stata | AGPL-3.0 | do-file subprocess | no (raw SMCL) | no | MCP + CLI (7+ agents) | ~204★, very active |
39
+ | haoyu-haoyu/stata-ai-fusion | MIT | pexpect | partial (`r()/e()/c()`) | no (text-flagged) | MCP + Skill + ext | ~32★, new |
40
+ | stata_kernel (kylebarron) | GPL-3.0 | automation/console | no (raw log) | no | Jupyter | ~278★ |
41
+ | nbstata (hugetim) | GPL-3.0 | pystata | no (raw log + widgets) | no | Jupyter | ~59★ |
42
+ | pystata (StataCorp) | proprietary | native | `r()/e()`→dict, mat→NumPy | no (Py exceptions) | IPython magics | ships w/ Stata |
43
+ | StatsPAI (sibling, Python) | MIT | Python reimpl. | yes (agent cards) | partial (validation) | MCP | ~244★, daily |
44
+
45
+ ### Reading the table
46
+
47
+ - **The genuine head-to-head is `tmonk/mcp-stata`** — pystata-backed, MCP-native,
48
+ returns `r()/e()`, ships a skills catalog, and has StataCorp-newsletter
49
+ visibility. Its two gaps are our two wedges: **(a) no typed error taxonomy**
50
+ (agents still parse return codes + preserved red text) and **(b) AGPL-3.0**,
51
+ a hard blocker for commercial/embeddable adoption. MIT + typed kinds is the
52
+ clean answer to both.
53
+ - **The adoption leader is `hanlulong/stata-mcp`** (~15.6k installs). Despite
54
+ using pystata — which *could* expose stored results — it surfaces **filtered
55
+ raw log text**, not a schema. Distribution, not schema quality, is its moat;
56
+ we must not assume a better contract auto-wins installs.
57
+ - **Jupyter kernels and pystata itself are human-facing**, not agent-native:
58
+ raw log + images, Python exceptions, no MCP, no by-reference economy.
59
+ - **The real *category* threat is the Python/R reimplementation wave**
60
+ (StatsPAI, rmcp): they took the "first agent-native econometrics" framing.
61
+ We do not beat them on method count — we beat them by being the authoritative
62
+ Stata leg they themselves reach for when cross-validating.
63
+
64
+ ## Why the error taxonomy is the defensible identity
65
+
66
+ A typed error taxonomy is the one capability **no competitor has** and the
67
+ **hardest to retrofit** onto a raw-log design. It is also cheap for us to lead
68
+ on because we already have the architecture (`error.kind`, `error.suggestions`,
69
+ `error.rc_label`, pinpoint context). The 2026-06-23 core pass made this concrete:
70
+
71
+ - audited `RC_TO_KIND` against StataCorp `[P] error` (Stata 19) and corrected
72
+ multiple misclassifications (e.g. `not_sorted` is `r(5)`, not the unrelated
73
+ `r(119)`/`r(459)`);
74
+ - populated `error.rc_label` with Stata's canonical short message via
75
+ `label_for_rc()` (it was silently empty for every real error before);
76
+ - expanded `suggestions_for()` so nearly every common failure ships an
77
+ actionable recovery hint.
78
+
79
+ The moat is not "we have error kinds" — it is "**our error kinds are correct,
80
+ labeled, and paired with a recovery action**, verified against the manual." That
81
+ is the kind of trust an empirical economist and a referee both need.
82
+
83
+ ## Long-term goals (6–12 months), by leverage
84
+
85
+ Ranked. Each ties back to a roadmap pillar and is phrased as a durable outcome,
86
+ not a feature list. Status legend: ✅ shipped · 🟡 foundation shipped, stretch
87
+ remaining · ⬜ not started.
88
+
89
+ 1. ✅ **Own "agent-native typed Stata errors" as the headline.** *Shipped
90
+ 2026-06-23.* `error.kind` is a stable, manual-verified contract (audited
91
+ against `[P] error`); every classified rc ships a canonical `rc_label`
92
+ (`label_for_rc`) and, where actionable, a remediation `suggestion`. The
93
+ **agent recovery contract** (`error.recovery` / `recovery_for`) gives a
94
+ defined next action per kind: retry-as-is, change-code, or escalate.
95
+ → Roadmap pillar 1 (reliable execution contract).
96
+ 2. ✅ **Per-command typed `r()/e()` result contracts for mandated commands.**
97
+ *Shipped 2026-06-23.* `RunResult.results.estimation` is a typed coefficient
98
+ table (term/b/se/statistic/p/CI + model_stats) from referee-grade `r(table)`
99
+ (or `e(b)`/`e(V)` fallback). It now also carries `command_family`
100
+ (ols/iv/gmm/panel/count/…) and command-aware `diagnostics` — the
101
+ identification/spec tests economists must report (`ivreg2`/`ivreghdfe`
102
+ weak-ID F + Hansen J, `xtabond2` AR(2)/Hansen, `reghdfe` within-R²/absorbed
103
+ FE, `xtreg` rho), surfaced only when present in `e()`. This is the
104
+ StatsPAI-defense: referee-grade numbers from the exact mandated command.
105
+ → Roadmap pillar 2.
106
+ 3. ✅ **Reproducibility / provenance envelope.** *Shipped 2026-06-23.*
107
+ `build_provenance()` captures Stata version/edition, `e(cmd)`, stata-code +
108
+ schema versions, timestamp, seed, and **per-package dependencies** parsed
109
+ from the script (`ssc`/`net install` → `Provenance.packages`);
110
+ `build_reproducible_do()` renders a `version`-pinned, seed-set re-runnable
111
+ `.do`; and `build_submission_package()` assembles a replication/journal
112
+ bundle (do + `PROVENANCE.json` + README manifest). *Stretch:* package
113
+ *version* pinning (vs. name only) and a journal-specific layout.
114
+ → Roadmap pillar 1 + 3.
115
+ 4. ✅ **Data-MCP integration bridge** (FRED / World Bank / Census).
116
+ *Shipped 2026-06-23.* `verify_dataset()` enforces the handoff's key checks
117
+ (row/var counts, observation bounds, required columns) on the captured
118
+ `DatasetInfo` — the executable companion to the `data-mcp-handoff` protocol,
119
+ documented in `references/structured-results.md`. *Stretch:* first-class
120
+ adapters that ferry source metadata (row hash, series ids) into the check
121
+ automatically. **No Stata MCP has done this composition.** → Roadmap pillar 4.
122
+ 5. 🟡 **Typed-schema-anchored skills catalog** — replication audits, robustness
123
+ sweeps, publication QA, legacy `.do` modernization — each anchored to typed
124
+ results + provenance, under MIT. *Foundation shipped:*
125
+ `references/structured-results.md` teaches agents to consume the typed
126
+ contracts (`results.estimation`/`diagnostics`, `error.recovery`/`rc_label`,
127
+ reproducible-do / submission bundles, `verify_dataset`). *Stretch:* the
128
+ audit/robustness/QA recipe set in the skills lane
129
+ (`skills/stata-code/references/recipes/**`). → Roadmap pillar 2.
130
+
131
+ ## Risks & threats
132
+
133
+ - **StataCorp native AI — LOW near-term, monitor.** Stata 19 shipped classical
134
+ H2O ML only; no LLM/copilot/agent feature is shipped or announced, and
135
+ StataCorp frames AI as community-built. Watch *New in Stata* / StataNow for any
136
+ shift from tutorials to a shipped feature. Our durable hedge: we *require* a
137
+ genuine Stata license, so our incentives align with StataCorp's rather than
138
+ competing with them.
139
+ - **Distribution gap.** hanlulong already has ~15.6k installs and tmonk has
140
+ newsletter visibility. A superior contract does not auto-win adoption; the
141
+ typed-error / reproducibility story must reach economists where they are
142
+ (SSC, Statalist, replication and referee communities).
143
+ - **Category framing already taken by Python/R.** "Agent-native econometrics
144
+ with structured results + MCP" converged across StatsPAI and rmcp. Defend by
145
+ positioning on **authoritative Stata-runtime fidelity**, not breadth.
146
+ - **Generic code-exec substitution.** A Jupyter-MCP + statsmodels sandbox can
147
+ "do econometrics" with zero domain tooling. Defense: *mandated Stata commands
148
+ + verifiable typed `e()` contracts* a generic sandbox cannot provide.
149
+ - **License contagion.** The two structured/MCP-native competitors (tmonk,
150
+ SepineTam) are AGPL; the kernels are GPL. To preserve our MIT clean-room
151
+ wedge we must never vendor their code paths (see `LICENSE-POLICY.md`).
152
+ - **Naming/trademark.** "Stata" is a StataCorp trademark and the `*stata-mcp`
153
+ namespace is crowded. Keep the "not affiliated with StataCorp" disclaimer
154
+ prominent and avoid implying endorsement.
155
+
156
+ ## Sources
157
+
158
+ Competitor repos and listings (June 2026): tmonk/mcp-stata, tmonk/stata-workbench;
159
+ hanlulong/stata-mcp (VS Code Marketplace: DeepEcon.stata-mcp); SepineTam/mcp-for-stata
160
+ (PyPI `stata-mcp`); haoyu-haoyu/stata-ai-fusion; kylebarron/stata_kernel; hugetim/nbstata;
161
+ StataCorp pystata docs and *New in Stata 19*; brycewang-stanford/StatsPAI; finite-sample/rmcp;
162
+ data MCPs (datacommonsorg/agent-toolkit, stefanoamorelli/fred-mcp-server, worldbank/data360-mcp).
163
+ StataCorp `[P] error` (Stata 19, 2025) is the authoritative source for the error-code audit.
@@ -46,6 +46,21 @@ causal libraries, or paid services. Those are separate tools. The durable
46
46
  boundary is: external data/model tools produce files or results; `stata-code`
47
47
  executes and audits the Stata side with traceable artifacts.
48
48
 
49
+ ## Market Refresh (2026-06-23)
50
+
51
+ | Adjacent tool | Current strength | Implication for `stata-code` |
52
+ | --- | --- | --- |
53
+ | Official Stata PyStata + Jupyter support | Official Python-side Stata API, IPython magics, and notebook workflow | Keep `pystata` discovery reliable and treat official Stata as the execution source of truth |
54
+ | `nbstata` / `stata_kernel` | Stata-first notebooks, autocomplete, graphs, data browsing, and rich notebook interaction | Win on the shared execution contract across notebooks, MCP, and VS Code rather than duplicating every notebook UI feature |
55
+ | Stata Workbench / `mcp-stata` | Agent-facing IDE workflow with Stata execution, variables, graphs, logs, and multi-session framing | Compete through structured `RunResult`, token-economic artifacts, and clear audit trails |
56
+ | `stata-mcp` / DeepEcon Stata MCP | One-command multi-agent install story, doctor diagnostics, and broad client messaging | Close setup-confidence gaps with read-only client-config visibility and explicit fallback commands |
57
+ | Stata All in One / Stata Enhanced | Human editor polish: syntax, outline, hints, execution, and data viewing | Keep VS Code ergonomics practical, but route execution and artifacts through the same MCP/core schema |
58
+
59
+ Near-term priority: keep the project boringly dependable for agents. That means
60
+ read-only setup diagnostics, visible MCP client wiring, stable schema contracts,
61
+ and artifact discovery should outrank broad new integrations unless a testable
62
+ workflow needs them.
63
+
49
64
  ## One-Month Execution Plan
50
65
 
51
66
  ### Week 1: Workflow Layer
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "stata-code"
7
- version = "0.8.1"
7
+ version = "0.9.0"
8
8
  description = "Agent-native Stata bridge — one core, multiple frontends (MCP, Jupyter, VSCode)"
9
9
  readme = "README.md"
10
10
  license = "MIT"