stata-code 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- stata_code-0.3.0/.gitignore +224 -0
- stata_code-0.3.0/CHANGELOG.md +205 -0
- stata_code-0.3.0/LICENSE +21 -0
- stata_code-0.3.0/LICENSE-POLICY.md +125 -0
- stata_code-0.3.0/PKG-INFO +389 -0
- stata_code-0.3.0/PUBLISHING.md +122 -0
- stata_code-0.3.0/README.md +351 -0
- stata_code-0.3.0/SCHEMA.md +593 -0
- stata_code-0.3.0/docs/design/hard_timeout.md +161 -0
- stata_code-0.3.0/examples/01-basic-regression.md +170 -0
- stata_code-0.3.0/examples/02-did-card-krueger.md +307 -0
- stata_code-0.3.0/examples/03-graphs.md +155 -0
- stata_code-0.3.0/examples/04-multi-session.md +191 -0
- stata_code-0.3.0/examples/05-large-matrix.md +140 -0
- stata_code-0.3.0/examples/README.md +15 -0
- stata_code-0.3.0/pyproject.toml +97 -0
- stata_code-0.3.0/schema/run_result.schema.json +726 -0
- stata_code-0.3.0/scripts/export_schema.py +82 -0
- stata_code-0.3.0/stata_code/__init__.py +100 -0
- stata_code-0.3.0/stata_code/core/__init__.py +73 -0
- stata_code-0.3.0/stata_code/core/_pool.py +808 -0
- stata_code-0.3.0/stata_code/core/_refs.py +97 -0
- stata_code-0.3.0/stata_code/core/_runtime.py +179 -0
- stata_code-0.3.0/stata_code/core/errors.py +447 -0
- stata_code-0.3.0/stata_code/core/runner.py +1092 -0
- stata_code-0.3.0/stata_code/core/schema.py +317 -0
- stata_code-0.3.0/stata_code/kernel/__init__.py +5 -0
- stata_code-0.3.0/stata_code/kernel/__main__.py +6 -0
- stata_code-0.3.0/stata_code/kernel/kernel.py +331 -0
- stata_code-0.3.0/stata_code/mcp/__init__.py +3 -0
- stata_code-0.3.0/stata_code/mcp/__main__.py +6 -0
- stata_code-0.3.0/stata_code/mcp/server.py +360 -0
- stata_code-0.3.0/tests/__init__.py +1 -0
- stata_code-0.3.0/tests/fixtures/.gitkeep +0 -0
- stata_code-0.3.0/tests/test_cancel.py +133 -0
- stata_code-0.3.0/tests/test_errors.py +231 -0
- stata_code-0.3.0/tests/test_kernel.py +267 -0
- stata_code-0.3.0/tests/test_mcp.py +249 -0
- stata_code-0.3.0/tests/test_pool.py +365 -0
- stata_code-0.3.0/tests/test_runner.py +777 -0
- stata_code-0.3.0/tests/test_schema.py +561 -0
- stata_code-0.3.0/tests/test_schema_artifact.py +61 -0
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[codz]
|
|
4
|
+
*$py.class
|
|
5
|
+
|
|
6
|
+
# C extensions
|
|
7
|
+
*.so
|
|
8
|
+
|
|
9
|
+
# Distribution / packaging
|
|
10
|
+
.Python
|
|
11
|
+
build/
|
|
12
|
+
develop-eggs/
|
|
13
|
+
dist/
|
|
14
|
+
downloads/
|
|
15
|
+
eggs/
|
|
16
|
+
.eggs/
|
|
17
|
+
lib/
|
|
18
|
+
lib64/
|
|
19
|
+
parts/
|
|
20
|
+
sdist/
|
|
21
|
+
var/
|
|
22
|
+
wheels/
|
|
23
|
+
share/python-wheels/
|
|
24
|
+
*.egg-info/
|
|
25
|
+
.installed.cfg
|
|
26
|
+
*.egg
|
|
27
|
+
MANIFEST
|
|
28
|
+
|
|
29
|
+
# PyInstaller
|
|
30
|
+
# Usually these files are written by a python script from a template
|
|
31
|
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
|
32
|
+
*.manifest
|
|
33
|
+
*.spec
|
|
34
|
+
|
|
35
|
+
# Installer logs
|
|
36
|
+
pip-log.txt
|
|
37
|
+
pip-delete-this-directory.txt
|
|
38
|
+
|
|
39
|
+
# Unit test / coverage reports
|
|
40
|
+
htmlcov/
|
|
41
|
+
.tox/
|
|
42
|
+
.nox/
|
|
43
|
+
.coverage
|
|
44
|
+
.coverage.*
|
|
45
|
+
.cache
|
|
46
|
+
nosetests.xml
|
|
47
|
+
coverage.xml
|
|
48
|
+
*.cover
|
|
49
|
+
*.py.cover
|
|
50
|
+
.hypothesis/
|
|
51
|
+
.pytest_cache/
|
|
52
|
+
cover/
|
|
53
|
+
|
|
54
|
+
# Translations
|
|
55
|
+
*.mo
|
|
56
|
+
*.pot
|
|
57
|
+
|
|
58
|
+
# Django stuff:
|
|
59
|
+
*.log
|
|
60
|
+
local_settings.py
|
|
61
|
+
db.sqlite3
|
|
62
|
+
db.sqlite3-journal
|
|
63
|
+
|
|
64
|
+
# Flask stuff:
|
|
65
|
+
instance/
|
|
66
|
+
.webassets-cache
|
|
67
|
+
|
|
68
|
+
# Scrapy stuff:
|
|
69
|
+
.scrapy
|
|
70
|
+
|
|
71
|
+
# Sphinx documentation
|
|
72
|
+
docs/_build/
|
|
73
|
+
|
|
74
|
+
# PyBuilder
|
|
75
|
+
.pybuilder/
|
|
76
|
+
target/
|
|
77
|
+
|
|
78
|
+
# Jupyter Notebook
|
|
79
|
+
.ipynb_checkpoints
|
|
80
|
+
|
|
81
|
+
# IPython
|
|
82
|
+
profile_default/
|
|
83
|
+
ipython_config.py
|
|
84
|
+
|
|
85
|
+
# pyenv
|
|
86
|
+
# For a library or package, you might want to ignore these files since the code is
|
|
87
|
+
# intended to run in multiple environments; otherwise, check them in:
|
|
88
|
+
# .python-version
|
|
89
|
+
|
|
90
|
+
# pipenv
|
|
91
|
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
|
92
|
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
|
93
|
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
|
94
|
+
# install all needed dependencies.
|
|
95
|
+
# Pipfile.lock
|
|
96
|
+
|
|
97
|
+
# UV
|
|
98
|
+
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
|
99
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
100
|
+
# commonly ignored for libraries.
|
|
101
|
+
# uv.lock
|
|
102
|
+
|
|
103
|
+
# poetry
|
|
104
|
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
|
105
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
106
|
+
# commonly ignored for libraries.
|
|
107
|
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
|
108
|
+
# poetry.lock
|
|
109
|
+
# poetry.toml
|
|
110
|
+
|
|
111
|
+
# pdm
|
|
112
|
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
|
113
|
+
# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
|
|
114
|
+
# https://pdm-project.org/en/latest/usage/project/#working-with-version-control
|
|
115
|
+
# pdm.lock
|
|
116
|
+
# pdm.toml
|
|
117
|
+
.pdm-python
|
|
118
|
+
.pdm-build/
|
|
119
|
+
|
|
120
|
+
# pixi
|
|
121
|
+
# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
|
|
122
|
+
# pixi.lock
|
|
123
|
+
# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
|
|
124
|
+
# in the .venv directory. It is recommended not to include this directory in version control.
|
|
125
|
+
.pixi
|
|
126
|
+
|
|
127
|
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
|
128
|
+
__pypackages__/
|
|
129
|
+
|
|
130
|
+
# Celery stuff
|
|
131
|
+
celerybeat-schedule
|
|
132
|
+
celerybeat.pid
|
|
133
|
+
|
|
134
|
+
# Redis
|
|
135
|
+
*.rdb
|
|
136
|
+
*.aof
|
|
137
|
+
*.pid
|
|
138
|
+
|
|
139
|
+
# RabbitMQ
|
|
140
|
+
mnesia/
|
|
141
|
+
rabbitmq/
|
|
142
|
+
rabbitmq-data/
|
|
143
|
+
|
|
144
|
+
# ActiveMQ
|
|
145
|
+
activemq-data/
|
|
146
|
+
|
|
147
|
+
# SageMath parsed files
|
|
148
|
+
*.sage.py
|
|
149
|
+
|
|
150
|
+
# Environments
|
|
151
|
+
.env
|
|
152
|
+
.envrc
|
|
153
|
+
.venv
|
|
154
|
+
env/
|
|
155
|
+
venv/
|
|
156
|
+
ENV/
|
|
157
|
+
env.bak/
|
|
158
|
+
venv.bak/
|
|
159
|
+
|
|
160
|
+
# Spyder project settings
|
|
161
|
+
.spyderproject
|
|
162
|
+
.spyproject
|
|
163
|
+
|
|
164
|
+
# Rope project settings
|
|
165
|
+
.ropeproject
|
|
166
|
+
|
|
167
|
+
# mkdocs documentation
|
|
168
|
+
/site
|
|
169
|
+
|
|
170
|
+
# mypy
|
|
171
|
+
.mypy_cache/
|
|
172
|
+
.dmypy.json
|
|
173
|
+
dmypy.json
|
|
174
|
+
|
|
175
|
+
# Pyre type checker
|
|
176
|
+
.pyre/
|
|
177
|
+
|
|
178
|
+
# pytype static type analyzer
|
|
179
|
+
.pytype/
|
|
180
|
+
|
|
181
|
+
# Cython debug symbols
|
|
182
|
+
cython_debug/
|
|
183
|
+
|
|
184
|
+
# PyCharm
|
|
185
|
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
|
186
|
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
|
187
|
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
|
188
|
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
|
189
|
+
# .idea/
|
|
190
|
+
|
|
191
|
+
# Abstra
|
|
192
|
+
# Abstra is an AI-powered process automation framework.
|
|
193
|
+
# Ignore directories containing user credentials, local state, and settings.
|
|
194
|
+
# Learn more at https://abstra.io/docs
|
|
195
|
+
.abstra/
|
|
196
|
+
|
|
197
|
+
# Visual Studio Code
|
|
198
|
+
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
|
|
199
|
+
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
|
|
200
|
+
# and can be added to the global gitignore or merged into this file. However, if you prefer,
|
|
201
|
+
# you could uncomment the following to ignore the entire vscode folder
|
|
202
|
+
# .vscode/
|
|
203
|
+
# Temporary file for partial code execution
|
|
204
|
+
tempCodeRunnerFile.py
|
|
205
|
+
|
|
206
|
+
# Ruff stuff:
|
|
207
|
+
.ruff_cache/
|
|
208
|
+
|
|
209
|
+
# PyPI configuration file
|
|
210
|
+
.pypirc
|
|
211
|
+
|
|
212
|
+
# Marimo
|
|
213
|
+
marimo/_static/
|
|
214
|
+
marimo/_lsp/
|
|
215
|
+
__marimo__/
|
|
216
|
+
|
|
217
|
+
# Streamlit
|
|
218
|
+
.streamlit/secrets.toml
|
|
219
|
+
|
|
220
|
+
# Stata-specific
|
|
221
|
+
*.gph
|
|
222
|
+
*.smcl
|
|
223
|
+
*.dta
|
|
224
|
+
!tests/fixtures/*.dta
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to `stata-code` are documented here. The format follows
|
|
4
|
+
[Keep a Changelog](https://keepachangelog.com/en/1.1.0/); the project adheres
|
|
5
|
+
to semver-major.minor for the result schema (see `SCHEMA.md` §6).
|
|
6
|
+
|
|
7
|
+
## [Unreleased]
|
|
8
|
+
|
|
9
|
+
## [0.3.0] — 2026-05-07
|
|
10
|
+
|
|
11
|
+
### Changed
|
|
12
|
+
|
|
13
|
+
- **PyPI distribution renamed to `stata-code`.** Previously published as
|
|
14
|
+
`stata_code`. Install with `pip install stata-code` going forward; the
|
|
15
|
+
Python import name remains `stata_code` (Python identifier rules — same
|
|
16
|
+
pattern as `scikit-learn` → `import sklearn`). Existing users on
|
|
17
|
+
`pip install stata_code` will keep working until that PyPI project
|
|
18
|
+
stops receiving new versions, but should migrate.
|
|
19
|
+
- **Project URLs in `pyproject.toml` corrected** to
|
|
20
|
+
`github.com/brycewang-stanford/stata-code` (the actual repository
|
|
21
|
+
URL — the previous metadata had `stata_code`).
|
|
22
|
+
- **MCP server announces itself as `stata-code`** (was `stata_code`).
|
|
23
|
+
This is the protocol-level server name; tool ids
|
|
24
|
+
(`stata_run`, `get_log`, etc.) are unchanged.
|
|
25
|
+
- **VSCode extension display name unified to `stata-code`** in the
|
|
26
|
+
Marketplace, activity-bar tile, command-palette `category`, output
|
|
27
|
+
channel, all toast messages, and webview title. Code identifiers
|
|
28
|
+
(`stataCode.*` command / view / setting ids; npm `name`
|
|
29
|
+
`stata-code-vscode`) are unchanged so existing keybindings keep
|
|
30
|
+
working.
|
|
31
|
+
- **Version aligned across surfaces.** `pyproject.toml`,
|
|
32
|
+
`stata_code/__init__.py`, `stata_code/mcp/server.py`,
|
|
33
|
+
`vscode/package.json`, and the VSCode MCP-client handshake all
|
|
34
|
+
declare `0.3.0`.
|
|
35
|
+
|
|
36
|
+
### Added
|
|
37
|
+
|
|
38
|
+
- **VSCode extension v0.3 — full UI surface** (`vscode/`). Beyond the
|
|
39
|
+
v0.1 "run from command palette" scaffold, the extension now ships
|
|
40
|
+
every common GUI affordance, so users who don't drive Stata through
|
|
41
|
+
Claude Code / Cursor can still operate the same MCP server from the
|
|
42
|
+
editor:
|
|
43
|
+
- **Editor title-bar ▶ button** (`editor/title/run` menu) and
|
|
44
|
+
editor right-click menu entries (`Run Selection` / `Run Active File`).
|
|
45
|
+
- **Status bar item** showing the current session; click for a
|
|
46
|
+
QuickPick (`Switch session…` / `Cancel` / `Reset`). The icon
|
|
47
|
+
swaps to a spinner during runs and the run progress notification
|
|
48
|
+
now has a Cancel button (cooperative cancellation through the
|
|
49
|
+
MCP `cancel_session` tool).
|
|
50
|
+
- **Activity-bar sidebar** with four views: live `Sessions` (with
|
|
51
|
+
inline Cancel/Reset/Close per item — `main` is non-closable;
|
|
52
|
+
locally-known but not-yet-started sessions persist via
|
|
53
|
+
`workspaceState`), `Last Result` (collapsible
|
|
54
|
+
`r()` / `e()` / warnings / dataset / log / graphs), `Graphs`
|
|
55
|
+
history (click-to-open + per-item Save…), and `Logs`
|
|
56
|
+
history (click-to-open + per-item Save…). Section-header buttons
|
|
57
|
+
for Clear (logs / graphs) and New / Refresh (sessions).
|
|
58
|
+
- **Inline error decorations.** Failed runs now publish a
|
|
59
|
+
`DiagnosticCollection` entry on the failing file/line, complete
|
|
60
|
+
with the typed error message, failing snippet, and any
|
|
61
|
+
suggestions surfaced in `runResult.error.suggestions`. Hover
|
|
62
|
+
shows the full text; the Problems panel lists the entry under
|
|
63
|
+
`source: stata-code, code: <error.kind>`.
|
|
64
|
+
- **Code-lens "Run Cell" support.** Lines starting with `* %%`
|
|
65
|
+
get an inline `▶ Run Cell` lens; clicking submits the code
|
|
66
|
+
between markers. Cell ranges map back to the original file
|
|
67
|
+
lines so error squigglies still anchor correctly.
|
|
68
|
+
- **Graph webview action buttons.** The webview now uses a strict
|
|
69
|
+
nonce-based CSP and exposes `Save as…`, `Open externally`, and
|
|
70
|
+
`Refresh` per-graph and panel-level buttons. PNG/SVG/PDF bytes
|
|
71
|
+
still flow lazily through `get_graph(ref)`.
|
|
72
|
+
- Bumped the extension version to `0.2.0`.
|
|
73
|
+
|
|
74
|
+
- **Matrix size cap + `get_matrix(ref)`.** Matrices larger than
|
|
75
|
+
`MATRIX_INLINE_CELL_CAP` (default 10,000 cells) now drop their
|
|
76
|
+
`values` from the envelope and surface a `matrix://<request_id>/<r|e>/
|
|
77
|
+
<name>` ref instead. Callers fetch the values via `get_matrix(ref)`,
|
|
78
|
+
which mirrors the existing `get_log` / `get_graph` pattern. The MCP
|
|
79
|
+
server gains a seventh tool, `get_matrix`, returning JSON
|
|
80
|
+
`{rows, cols, values}`. Closes the last open §3.4 todo from
|
|
81
|
+
SCHEMA.md and prevents pathological commands (e.g., `correlate` over
|
|
82
|
+
hundreds of variables) from blowing up the result envelope.
|
|
83
|
+
|
|
84
|
+
- **VSCode extension scaffold** (`vscode/`). TypeScript extension that
|
|
85
|
+
spawns `stata-code-mcp` over stdio and registers four commands
|
|
86
|
+
(`Run Selection`, `Run Active File`, `Show Graphs`, `Show Last
|
|
87
|
+
Result`). Hand-rolled TypeScript types in
|
|
88
|
+
`vscode/src/types/runResult.ts` mirror the Pydantic envelope;
|
|
89
|
+
`npm run gen-types` regenerates a full copy from
|
|
90
|
+
`schema/run_result.schema.json` for cross-checking. Source-only —
|
|
91
|
+
build with `npm install && npm run compile`.
|
|
92
|
+
|
|
93
|
+
- **VSCode graph webview** (`vscode/src/graphPanel.ts`). Successful
|
|
94
|
+
runs that capture graphs auto-open a side-by-side webview that
|
|
95
|
+
renders PNG / SVG / PDF inline. The webview lazily fetches each
|
|
96
|
+
graph's bytes via `get_graph(ref)` rather than embedding them in
|
|
97
|
+
the original `RunResult`, so token economy is preserved end-to-end
|
|
98
|
+
(an agent driving the same MCP server pays nothing extra for
|
|
99
|
+
inlining). Strict CSP (`default-src 'none'`, no scripts).
|
|
100
|
+
Marketplace publishing still deferred.
|
|
101
|
+
|
|
102
|
+
- **`stata_required` pytest marker.** Integration tests against a
|
|
103
|
+
real Stata installation are now tagged with the marker; CI runs
|
|
104
|
+
`pytest -m "not stata_required"`, completing in ~1.5s instead of
|
|
105
|
+
~19s. Local without Stata, the same tests still skip cleanly.
|
|
106
|
+
|
|
107
|
+
- **Cooperative cancellation.** New `cancel(session_id)` /
|
|
108
|
+
`clear_cancel(session_id)` / `is_cancel_pending(session_id)` Python
|
|
109
|
+
API plus the MCP `cancel_session` tool (eighth tool). A pending
|
|
110
|
+
cancel short-circuits the next `execute()` call for that session
|
|
111
|
+
and returns a `RunResult` with `ok=false`, `rc=-3` (synthetic),
|
|
112
|
+
`error.kind="cancelled"`. The flag is one-shot per cancel, isolated
|
|
113
|
+
per session, and thread-safe. Note: this is *cooperative* — it does
|
|
114
|
+
not interrupt code that is currently mid-`stata.run()` (pystata is
|
|
115
|
+
in-process and has no clean cancel primitive). Hard interruption
|
|
116
|
+
remains deferred to the subprocess-based runtime planned for v0.3+.
|
|
117
|
+
|
|
118
|
+
### Changed
|
|
119
|
+
|
|
120
|
+
- **MCP server tool count is now 8** (added `get_matrix`,
|
|
121
|
+
`cancel_session`).
|
|
122
|
+
|
|
123
|
+
## [0.2.0] — 2026-05-07
|
|
124
|
+
|
|
125
|
+
The first release that actually ships an end-to-end Stata pipeline. The v1.0
|
|
126
|
+
result schema is the load-bearing artifact; everything below is implemented
|
|
127
|
+
against it and end-to-end-tested on Stata 18 MP.
|
|
128
|
+
|
|
129
|
+
### Added
|
|
130
|
+
|
|
131
|
+
- **`SCHEMA.md` v1.0** — normative result-envelope contract: `ok` / `rc`,
|
|
132
|
+
typed `error` (32 `kind` values), structured `r()` / `e()` (scalars,
|
|
133
|
+
macros, matrices), `dataset` snapshot with variable list, log
|
|
134
|
+
head+tail+ref, graph refs with PNG/SVG/PDF support, multi-session id,
|
|
135
|
+
forward-compat clauses.
|
|
136
|
+
- **`stata_code.run()`** (= `execute()`) — the real-Stata pipeline. Uses
|
|
137
|
+
pystata in-process; collects native-typed return values via `sfi`;
|
|
138
|
+
builds a `RunResult` end to end.
|
|
139
|
+
- **`get_log` / `get_graph` / `list_sessions` / `reset_session`** —
|
|
140
|
+
auxiliary tools per `SCHEMA.md` §5.
|
|
141
|
+
- **MCP server** (`stata_code.mcp.server`) — six tools: `stata_run`,
|
|
142
|
+
`stata_info`, `get_log`, `get_graph`, `list_sessions`,
|
|
143
|
+
`reset_session`. Console script: `stata-code-mcp`. Module entry:
|
|
144
|
+
`python -m stata_code.mcp`.
|
|
145
|
+
- **Jupyter kernel** (`stata_code.kernel`) rewired to the v1.0 pipeline.
|
|
146
|
+
Defaults tuned for notebooks (`include_full_log=True`,
|
|
147
|
+
`include_graphs="inline"`). Console script: `stata-code-kernel`.
|
|
148
|
+
Module entry: `python -m stata_code.kernel`.
|
|
149
|
+
- **Multi-session via Stata frames**. `session_id="main"` maps to the
|
|
150
|
+
default frame; other ids create/route to same-named frames.
|
|
151
|
+
- **Per-line error attribution** — `error.line`, `commands_executed`,
|
|
152
|
+
and `context.{before, failing, after}` are populated by parsing
|
|
153
|
+
pystata's multi-line transcript.
|
|
154
|
+
- **Warning extraction** — five built-in patterns
|
|
155
|
+
(`omitted_collinear`, `convergence`, `singular`, `boundary`, generic
|
|
156
|
+
`note`) + dedup.
|
|
157
|
+
- **Graph capture pipeline** — `graph dir` snapshot delta + `graph
|
|
158
|
+
display` + `graph export`; PNG `width`/`height` parsed from IHDR;
|
|
159
|
+
bytes stored under a `_refs` LRU.
|
|
160
|
+
- **`_refs` LRU eviction** — bounded ref store (default 256 entries)
|
|
161
|
+
to keep long-running MCP processes from growing unboundedly.
|
|
162
|
+
- **`LICENSE-POLICY.md`** — clean-room policy that forbids opening
|
|
163
|
+
AGPL/GPL Stata project source.
|
|
164
|
+
- **138 tests** covering schema, runner integration, MCP, kernel,
|
|
165
|
+
`_refs`, and error helpers. Real-Stata tests run against Stata 18 MP
|
|
166
|
+
when available.
|
|
167
|
+
|
|
168
|
+
### Changed
|
|
169
|
+
|
|
170
|
+
- Top-level `stata_code.run()` now returns the new `RunResult` (Pydantic
|
|
171
|
+
v2). The legacy `StataResult` dataclass and the `capture_graphs`/
|
|
172
|
+
`capture_log`/`timeout` keyword arguments are gone.
|
|
173
|
+
- Wheel build now ships **all** of `stata_code` (`core`, `mcp`,
|
|
174
|
+
`kernel`). Previously the wheel only contained `core`.
|
|
175
|
+
|
|
176
|
+
### Removed
|
|
177
|
+
|
|
178
|
+
- **Legacy modules** — `core/pystata_adapter.py`, `core/console_fallback.py`,
|
|
179
|
+
`core/result.py`, `core/version.py`. Their behavior is now provided by
|
|
180
|
+
`core/runner.py`, `core/_runtime.py`, `core/schema.py`.
|
|
181
|
+
- **Legacy tests** — `tests/test_result.py`, `tests/test_version.py`,
|
|
182
|
+
`tests/test_integration.py`. Coverage moved to `tests/test_runner.py`,
|
|
183
|
+
`tests/test_schema.py`, and `tests/test_mcp.py`.
|
|
184
|
+
|
|
185
|
+
### Migration notes
|
|
186
|
+
|
|
187
|
+
| Before (v0.1) | After (v0.2) |
|
|
188
|
+
| --- | --- |
|
|
189
|
+
| `from stata_code import run` returns `StataResult` | Returns `RunResult` |
|
|
190
|
+
| `result.log` (string) | `result.log.head` / `result.log.tail` (and `get_log(ref)` for full) |
|
|
191
|
+
| `result.results["r(mean)"]` | `result.results.r.scalars["mean"]` (native float) |
|
|
192
|
+
| `result.error` (string) | `result.error.kind` (typed) + `result.error.message` |
|
|
193
|
+
| `result.graphs[0].data` (bytes) | `result.graphs[0].ref` + `get_graph(ref)` |
|
|
194
|
+
| `run(code, capture_graphs=True)` | `run(code, include_graphs="ref" \| "inline" \| "none")` |
|
|
195
|
+
| `run(code, timeout=120)` | `run(code, timeout_ms=120_000)` |
|
|
196
|
+
|
|
197
|
+
`pystata` is no longer declared as a runtime dependency in
|
|
198
|
+
`pyproject.toml` — it is sourced from your local Stata install per the
|
|
199
|
+
documented `_runtime` discovery path.
|
|
200
|
+
|
|
201
|
+
## [0.1.0] — 2026-04
|
|
202
|
+
|
|
203
|
+
Initial scaffolding. `pystata_adapter`, `console_fallback`, basic kernel
|
|
204
|
+
and MCP server, `References-tools.md` survey, project vision in
|
|
205
|
+
`README.md`. Largely superseded by 0.2.
|
stata_code-0.3.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 brycew6m
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
# License Policy
|
|
2
|
+
|
|
3
|
+
`stata_code` is released under the **MIT License**. To keep the codebase legally clean and freely usable downstream (including by commercial and closed-source projects), this repository follows a strict **protocol-first, clean-room** development policy. This document is the binding policy; contributors must read it before opening a pull request.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## 1. Project license
|
|
8
|
+
|
|
9
|
+
- **License:** MIT (see `LICENSE`).
|
|
10
|
+
- **Goal:** Anyone — including commercial and closed-source projects — can integrate, fork, or redistribute `stata_code` without copyleft obligations.
|
|
11
|
+
|
|
12
|
+
This goal is incompatible with deriving from AGPL-3.0 / GPL-3.0 source code. The rules below exist to prevent that.
|
|
13
|
+
|
|
14
|
+
---
|
|
15
|
+
|
|
16
|
+
## 2. The three categories of references
|
|
17
|
+
|
|
18
|
+
Every external project relevant to `stata_code` falls into one of three buckets:
|
|
19
|
+
|
|
20
|
+
### 2.1 Open standards & vendor docs (always allowed)
|
|
21
|
+
|
|
22
|
+
These define **public protocols and APIs**. Reading them, citing them, and implementing against them does not contaminate our code.
|
|
23
|
+
|
|
24
|
+
- **Anthropic MCP specification** — protocol shape, message formats, tool registration semantics.
|
|
25
|
+
- **Jupyter kernel protocol** — `kernel_info`, `execute_request`, message routing.
|
|
26
|
+
- **Language Server Protocol (LSP)** — for any future LSP work.
|
|
27
|
+
- **StataCorp pystata documentation** — official Python API surface.
|
|
28
|
+
- **StataCorp Stata documentation** (`help`, manuals) — `r()`, `e()`, `_rc`, system values.
|
|
29
|
+
- **Stata `.dta` file format documentation** — published by StataCorp.
|
|
30
|
+
- **Anthropic / OpenAI tool-use docs** — function-calling shapes.
|
|
31
|
+
|
|
32
|
+
### 2.2 Permissively-licensed projects (allowed with attribution if reused)
|
|
33
|
+
|
|
34
|
+
MIT, BSD, Apache 2.0, ISC. Reading source is allowed; copying must follow the license terms (preserve copyright notice, etc.). Even when allowed, we **prefer independent implementation** to keep authorship clean.
|
|
35
|
+
|
|
36
|
+
- `kylebarron/stata-enhanced` — MIT (TextMate grammar; we do not reuse it).
|
|
37
|
+
- `kylebarron/stata-exec` — MIT (Atom; not reused).
|
|
38
|
+
- `kylebarron/language-stata` — MIT (Atom grammar; not reused).
|
|
39
|
+
- `hanlulong/stata-mcp` — MIT (we do not consult its source; see §4).
|
|
40
|
+
- `lbraglia/RStata` — design reference only.
|
|
41
|
+
- `euglevi/stata-language-server` — MIT.
|
|
42
|
+
|
|
43
|
+
### 2.3 Copyleft projects (source code forbidden)
|
|
44
|
+
|
|
45
|
+
**Source code of these projects must not be read by anyone contributing to `stata_code`.** Their READMEs, public issues, demos, screenshots, and documentation describing user-facing behavior are fine — copyright protects expression, not ideas. But the source itself contaminates.
|
|
46
|
+
|
|
47
|
+
- `SepineTam/stata-mcp` — AGPL-3.0
|
|
48
|
+
- `tmonk/mcp-stata` — AGPL-3.0
|
|
49
|
+
- `tmonk/stata-workbench` — AGPL-3.0
|
|
50
|
+
- `kylebarron/stata_kernel` — GPL-3.0
|
|
51
|
+
- `hugetim/nbstata` — GPL-3.0
|
|
52
|
+
|
|
53
|
+
If new copyleft Stata projects appear, add them here in the same PR that first references them.
|
|
54
|
+
|
|
55
|
+
---
|
|
56
|
+
|
|
57
|
+
## 3. The clean-room rule
|
|
58
|
+
|
|
59
|
+
When designing or implementing any feature that overlaps with a copyleft project's behavior:
|
|
60
|
+
|
|
61
|
+
1. **Do not open the copyleft project's source files.** Not in a browser, not in `git clone`, not in an IDE.
|
|
62
|
+
2. **You may** read its README, feature list, screenshots, public issues, blog posts, and conference talks describing what it does.
|
|
63
|
+
3. **You may** read the underlying public protocol or API spec (MCP, pystata, etc.) and implement against that.
|
|
64
|
+
4. **You may** look at the inputs and outputs (call its tools, observe responses) — black-box behavioral observation is fine.
|
|
65
|
+
5. **Design from first principles.** Our schema (`SCHEMA.md`) was designed from agent-token-economy principles and the public pystata API. It was not derived by simplifying or rearranging anyone else's schema.
|
|
66
|
+
|
|
67
|
+
If you find yourself thinking *"how does project X handle Y?"*, the answer is: read its docs and observe its behavior. Do not open its source.
|
|
68
|
+
|
|
69
|
+
---
|
|
70
|
+
|
|
71
|
+
## 4. If you accidentally read forbidden source
|
|
72
|
+
|
|
73
|
+
It happens. Honesty is the only safe response.
|
|
74
|
+
|
|
75
|
+
1. **Stop reading immediately.** Close the file.
|
|
76
|
+
2. **Disclose in the PR or issue.** Note what you read and approximately how much.
|
|
77
|
+
3. **Wait at least 30 days** before contributing code in the affected area. If the area is small (one function), a fresh contributor implements it. If broad, that contributor sits out the area indefinitely.
|
|
78
|
+
4. **Do not** quote, paraphrase, or rewrite from memory.
|
|
79
|
+
|
|
80
|
+
This is the same posture used by clean-room reverse-engineering teams. It is conservative on purpose.
|
|
81
|
+
|
|
82
|
+
---
|
|
83
|
+
|
|
84
|
+
## 5. Adding a new reference
|
|
85
|
+
|
|
86
|
+
When introducing any new external project to documentation, code, or discussion:
|
|
87
|
+
|
|
88
|
+
1. Add it to one of the three lists in §2 of this file in the same PR.
|
|
89
|
+
2. State its license explicitly (check `LICENSE` file, not `package.json`/`README` — those drift).
|
|
90
|
+
3. If copyleft, the PR must not include any code; only the bucket-3 listing.
|
|
91
|
+
|
|
92
|
+
Reviewers should reject PRs that mention an external project without classifying it.
|
|
93
|
+
|
|
94
|
+
---
|
|
95
|
+
|
|
96
|
+
## 6. Dependencies vs. derivation
|
|
97
|
+
|
|
98
|
+
Note the difference:
|
|
99
|
+
|
|
100
|
+
- **Depending** on an MIT/BSD/Apache library at runtime is fine and does not contaminate.
|
|
101
|
+
- **Depending** on a GPL/AGPL library at runtime *does* contaminate the distributed package; we don't do that for any package we ship under MIT.
|
|
102
|
+
- **Depending** on a GPL/AGPL library only in a separate, GPL-licensed sub-package (e.g., `stata-code-jupyter-glue`) is acceptable as long as the MIT core does not import it. Any such split must be called out at the top of the README and in `pyproject.toml`.
|
|
103
|
+
|
|
104
|
+
---
|
|
105
|
+
|
|
106
|
+
## 7. Why this matters
|
|
107
|
+
|
|
108
|
+
Stata is a small ecosystem with active and vigilant maintainers, several of whom have publicly enforced their AGPL terms. A clean license posture:
|
|
109
|
+
|
|
110
|
+
- Keeps `stata_code` usable by any downstream — universities, central banks, commercial vendors.
|
|
111
|
+
- Prevents "rip-off" accusations that have already been levied at fork-style projects in the space.
|
|
112
|
+
- Makes future fundraising, hiring, and acquisitions trivial on the IP side.
|
|
113
|
+
- Protects contributors personally — clean-room compliance is auditable.
|
|
114
|
+
|
|
115
|
+
The cost of this policy is small (some independent design work). The cost of getting it wrong is irreversible: a contaminated codebase cannot be "scrubbed" of AGPL after the fact; only rewritten from scratch by uncontaminated authors.
|
|
116
|
+
|
|
117
|
+
---
|
|
118
|
+
|
|
119
|
+
## 8. Acknowledgement on first contribution
|
|
120
|
+
|
|
121
|
+
Every first-time contributor to `stata_code` adds the following line to their first PR description:
|
|
122
|
+
|
|
123
|
+
> I have read `LICENSE-POLICY.md` and confirm I have not consulted source code from the copyleft projects listed therein for the purposes of this contribution.
|
|
124
|
+
|
|
125
|
+
Maintainers may decline contributions without this acknowledgement.
|