stata-code 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- stata_code/__init__.py +100 -0
- stata_code/core/__init__.py +73 -0
- stata_code/core/_pool.py +808 -0
- stata_code/core/_refs.py +97 -0
- stata_code/core/_runtime.py +179 -0
- stata_code/core/errors.py +447 -0
- stata_code/core/runner.py +1092 -0
- stata_code/core/schema.py +317 -0
- stata_code/kernel/__init__.py +5 -0
- stata_code/kernel/__main__.py +6 -0
- stata_code/kernel/kernel.py +331 -0
- stata_code/mcp/__init__.py +3 -0
- stata_code/mcp/__main__.py +6 -0
- stata_code/mcp/server.py +360 -0
- stata_code-0.3.0.dist-info/METADATA +389 -0
- stata_code-0.3.0.dist-info/RECORD +20 -0
- stata_code-0.3.0.dist-info/WHEEL +4 -0
- stata_code-0.3.0.dist-info/entry_points.txt +3 -0
- stata_code-0.3.0.dist-info/licenses/LICENSE +21 -0
- stata_code-0.3.0.dist-info/licenses/LICENSE-POLICY.md +125 -0
stata_code/mcp/server.py
ADDED
|
@@ -0,0 +1,360 @@
|
|
|
1
|
+
"""MCP server exposing the stata_code v1.0 pipeline.
|
|
2
|
+
|
|
3
|
+
Tools registered:
|
|
4
|
+
- stata_run — execute Stata code, return a v1.0 RunResult JSON
|
|
5
|
+
- stata_info — report Stata edition / version / capabilities
|
|
6
|
+
- get_log — fetch full log behind a `log://` ref
|
|
7
|
+
- get_graph — fetch graph bytes behind a `graph://` ref (ImageContent)
|
|
8
|
+
- list_sessions — enumerate live sessions (frames)
|
|
9
|
+
- reset_session — drop a session's data
|
|
10
|
+
|
|
11
|
+
The result envelope, token-economy defaults (log head+tail+ref, graph refs
|
|
12
|
+
not inline), session model, and error taxonomy follow SCHEMA.md.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import asyncio
|
|
18
|
+
import json
|
|
19
|
+
import sys
|
|
20
|
+
from typing import Any
|
|
21
|
+
|
|
22
|
+
try:
|
|
23
|
+
from mcp.server import Server
|
|
24
|
+
from mcp.server.stdio import stdio_server
|
|
25
|
+
from mcp.types import ImageContent, TextContent, Tool
|
|
26
|
+
|
|
27
|
+
_MCP_AVAILABLE = True
|
|
28
|
+
except ImportError: # pragma: no cover - environment without mcp installed
|
|
29
|
+
Server = None # type: ignore[assignment,misc]
|
|
30
|
+
Tool = None # type: ignore[assignment,misc]
|
|
31
|
+
TextContent = None # type: ignore[assignment,misc]
|
|
32
|
+
ImageContent = None # type: ignore[assignment,misc]
|
|
33
|
+
stdio_server = None # type: ignore[assignment]
|
|
34
|
+
_MCP_AVAILABLE = False
|
|
35
|
+
|
|
36
|
+
from stata_code.core._pool import get_default_pool, pool_execute
|
|
37
|
+
from stata_code.core._runtime import PystataNotAvailable, is_available
|
|
38
|
+
from stata_code.core.runner import (
|
|
39
|
+
cancel,
|
|
40
|
+
get_graph,
|
|
41
|
+
get_log,
|
|
42
|
+
get_matrix,
|
|
43
|
+
is_cancel_pending,
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
__version__ = "0.3.0"
|
|
47
|
+
|
|
48
|
+
APP: Any = Server("stata-code") if _MCP_AVAILABLE else None
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
52
|
+
# Tool registry
|
|
53
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _tool_definitions() -> list[Tool]:
|
|
57
|
+
return [
|
|
58
|
+
Tool(
|
|
59
|
+
name="stata_run",
|
|
60
|
+
description=(
|
|
61
|
+
"Execute Stata code and return a v1.0 stata_code RunResult "
|
|
62
|
+
"(see SCHEMA.md). The result is a JSON object with ok, rc, "
|
|
63
|
+
"error (typed), log (head+tail+ref by default), results.r/e "
|
|
64
|
+
"(scalars/macros/matrices, native types), dataset metadata, "
|
|
65
|
+
"graphs, warnings, and capabilities. Use the structured "
|
|
66
|
+
"fields rather than parsing the log."
|
|
67
|
+
),
|
|
68
|
+
inputSchema={
|
|
69
|
+
"type": "object",
|
|
70
|
+
"properties": {
|
|
71
|
+
"code": {
|
|
72
|
+
"type": "string",
|
|
73
|
+
"description": "Stata code to execute. Multi-line OK.",
|
|
74
|
+
},
|
|
75
|
+
"session_id": {
|
|
76
|
+
"type": "string",
|
|
77
|
+
"default": "main",
|
|
78
|
+
"description": (
|
|
79
|
+
"Session name. 'main' is the master frame; "
|
|
80
|
+
"other names create/route to that Stata frame "
|
|
81
|
+
"(data isolation only; r()/e() remain global)."
|
|
82
|
+
),
|
|
83
|
+
},
|
|
84
|
+
"include_graphs": {
|
|
85
|
+
"type": "string",
|
|
86
|
+
"enum": ["ref", "inline", "none"],
|
|
87
|
+
"default": "ref",
|
|
88
|
+
},
|
|
89
|
+
"graph_format": {
|
|
90
|
+
"type": "string",
|
|
91
|
+
"enum": ["png", "svg", "pdf"],
|
|
92
|
+
"default": "png",
|
|
93
|
+
},
|
|
94
|
+
"log_lines_head": {"type": "integer", "default": 20},
|
|
95
|
+
"log_lines_tail": {"type": "integer", "default": 20},
|
|
96
|
+
"include_full_log": {"type": "boolean", "default": False},
|
|
97
|
+
"include_dataset_variables": {
|
|
98
|
+
"type": "boolean",
|
|
99
|
+
"default": True,
|
|
100
|
+
},
|
|
101
|
+
},
|
|
102
|
+
"required": ["code"],
|
|
103
|
+
},
|
|
104
|
+
),
|
|
105
|
+
Tool(
|
|
106
|
+
name="stata_info",
|
|
107
|
+
description=(
|
|
108
|
+
"Report installed Stata edition, version, backend, and "
|
|
109
|
+
"whether the runtime is initialized."
|
|
110
|
+
),
|
|
111
|
+
inputSchema={"type": "object", "properties": {}},
|
|
112
|
+
),
|
|
113
|
+
Tool(
|
|
114
|
+
name="get_log",
|
|
115
|
+
description=(
|
|
116
|
+
"Fetch the full log text behind a log:// ref returned by a "
|
|
117
|
+
"prior stata_run call. Returns JSON {text, lines_total, "
|
|
118
|
+
"bytes_total}."
|
|
119
|
+
),
|
|
120
|
+
inputSchema={
|
|
121
|
+
"type": "object",
|
|
122
|
+
"properties": {"ref": {"type": "string"}},
|
|
123
|
+
"required": ["ref"],
|
|
124
|
+
},
|
|
125
|
+
),
|
|
126
|
+
Tool(
|
|
127
|
+
name="get_graph",
|
|
128
|
+
description=(
|
|
129
|
+
"Fetch graph bytes behind a graph:// ref. Returns an "
|
|
130
|
+
"ImageContent (base64 bytes + mimeType) suitable for direct "
|
|
131
|
+
"display by vision-capable clients."
|
|
132
|
+
),
|
|
133
|
+
inputSchema={
|
|
134
|
+
"type": "object",
|
|
135
|
+
"properties": {
|
|
136
|
+
"ref": {"type": "string"},
|
|
137
|
+
"format": {
|
|
138
|
+
"type": "string",
|
|
139
|
+
"enum": ["png", "svg", "pdf"],
|
|
140
|
+
},
|
|
141
|
+
},
|
|
142
|
+
"required": ["ref"],
|
|
143
|
+
},
|
|
144
|
+
),
|
|
145
|
+
Tool(
|
|
146
|
+
name="get_matrix",
|
|
147
|
+
description=(
|
|
148
|
+
"Fetch a matrix's values, rows, and cols behind a matrix:// "
|
|
149
|
+
"ref. Producers emit a ref instead of inlining values when "
|
|
150
|
+
"the matrix exceeds ~10,000 cells. Returns JSON {rows, cols, "
|
|
151
|
+
"values}."
|
|
152
|
+
),
|
|
153
|
+
inputSchema={
|
|
154
|
+
"type": "object",
|
|
155
|
+
"properties": {"ref": {"type": "string"}},
|
|
156
|
+
"required": ["ref"],
|
|
157
|
+
},
|
|
158
|
+
),
|
|
159
|
+
Tool(
|
|
160
|
+
name="list_sessions",
|
|
161
|
+
description=(
|
|
162
|
+
"Enumerate live sessions. Each entry has session_id, frame "
|
|
163
|
+
"(Stata frame name), and n_obs."
|
|
164
|
+
),
|
|
165
|
+
inputSchema={"type": "object", "properties": {}},
|
|
166
|
+
),
|
|
167
|
+
Tool(
|
|
168
|
+
name="cancel_session",
|
|
169
|
+
description=(
|
|
170
|
+
"Request cooperative cancellation of the next stata_run for "
|
|
171
|
+
"this session. The flag is consumed by the next call and "
|
|
172
|
+
"produces a RunResult with ok=false, rc=-3, "
|
|
173
|
+
"error.kind='cancelled'. Does NOT interrupt code that is "
|
|
174
|
+
"currently mid-execution (pystata is in-process). Returns "
|
|
175
|
+
"JSON {session_id, was_pending, is_pending}."
|
|
176
|
+
),
|
|
177
|
+
inputSchema={
|
|
178
|
+
"type": "object",
|
|
179
|
+
"properties": {
|
|
180
|
+
"session_id": {"type": "string", "default": "main"},
|
|
181
|
+
},
|
|
182
|
+
},
|
|
183
|
+
),
|
|
184
|
+
Tool(
|
|
185
|
+
name="reset_session",
|
|
186
|
+
description=(
|
|
187
|
+
"Drop a session's data. session_id='main' performs `clear "
|
|
188
|
+
"all` in place (default frame cannot be dropped); other "
|
|
189
|
+
"names drop the corresponding Stata frame."
|
|
190
|
+
),
|
|
191
|
+
inputSchema={
|
|
192
|
+
"type": "object",
|
|
193
|
+
"properties": {
|
|
194
|
+
"session_id": {"type": "string", "default": "main"},
|
|
195
|
+
},
|
|
196
|
+
},
|
|
197
|
+
),
|
|
198
|
+
]
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
if _MCP_AVAILABLE:
|
|
202
|
+
|
|
203
|
+
@APP.list_tools()
|
|
204
|
+
async def list_tools() -> list[Tool]:
|
|
205
|
+
return _tool_definitions()
|
|
206
|
+
|
|
207
|
+
@APP.call_tool()
|
|
208
|
+
async def call_tool(name: str, arguments: dict[str, Any]) -> list[Any]:
|
|
209
|
+
return await _dispatch(name, arguments)
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
213
|
+
# Dispatch (kept module-level for testability)
|
|
214
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
_GRAPH_MIME = {
|
|
218
|
+
"png": "image/png",
|
|
219
|
+
"svg": "image/svg+xml",
|
|
220
|
+
"pdf": "application/pdf",
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
async def _dispatch(name: str, arguments: dict[str, Any]) -> list[Any]:
|
|
225
|
+
try:
|
|
226
|
+
if name == "stata_run":
|
|
227
|
+
return _run_tool(arguments)
|
|
228
|
+
if name == "stata_info":
|
|
229
|
+
return [TextContent(type="text", text=_info_payload())]
|
|
230
|
+
if name == "get_log":
|
|
231
|
+
payload = get_log(arguments["ref"])
|
|
232
|
+
return [TextContent(type="text", text=json.dumps(payload))]
|
|
233
|
+
if name == "get_graph":
|
|
234
|
+
payload = get_graph(arguments["ref"])
|
|
235
|
+
mime = _GRAPH_MIME.get(payload["format"], "image/png")
|
|
236
|
+
return [
|
|
237
|
+
ImageContent(
|
|
238
|
+
type="image", data=payload["bytes_b64"], mimeType=mime
|
|
239
|
+
)
|
|
240
|
+
]
|
|
241
|
+
if name == "get_matrix":
|
|
242
|
+
payload = get_matrix(arguments["ref"])
|
|
243
|
+
return [TextContent(type="text", text=json.dumps(payload))]
|
|
244
|
+
if name == "list_sessions":
|
|
245
|
+
# In subprocess-pool mode each session lives in its own worker
|
|
246
|
+
# process, so the parent's `list_sessions()` (which queries the
|
|
247
|
+
# parent's pystata frames) is empty. Authoritative source is
|
|
248
|
+
# `pool.list_session_info()`, which round-trips a no-payload
|
|
249
|
+
# `list_sessions` op to each live worker and aggregates. Dead
|
|
250
|
+
# or unresponsive workers are skipped silently — partial info
|
|
251
|
+
# beats failing the whole list call.
|
|
252
|
+
sessions = get_default_pool().list_session_info()
|
|
253
|
+
return [TextContent(type="text", text=json.dumps(sessions))]
|
|
254
|
+
if name == "cancel_session":
|
|
255
|
+
sid = arguments.get("session_id", "main")
|
|
256
|
+
was_pending = not cancel(sid) # cancel() returns False if already pending
|
|
257
|
+
# In subprocess-pool mode, also SIGTERM the worker so an in-flight
|
|
258
|
+
# call that's blocked inside Stata C-land actually terminates rather
|
|
259
|
+
# than waiting for the next inter-command cooperative checkpoint.
|
|
260
|
+
killed_worker = get_default_pool().kill_session(sid)
|
|
261
|
+
return [
|
|
262
|
+
TextContent(
|
|
263
|
+
type="text",
|
|
264
|
+
text=json.dumps(
|
|
265
|
+
{
|
|
266
|
+
"session_id": sid,
|
|
267
|
+
"was_pending": was_pending,
|
|
268
|
+
"is_pending": is_cancel_pending(sid),
|
|
269
|
+
"killed_worker": killed_worker,
|
|
270
|
+
}
|
|
271
|
+
),
|
|
272
|
+
)
|
|
273
|
+
]
|
|
274
|
+
if name == "reset_session":
|
|
275
|
+
sid = arguments.get("session_id", "main")
|
|
276
|
+
# Pool-mode: killing the session's worker drops its data and
|
|
277
|
+
# all in-memory state; the next stata_run for that session
|
|
278
|
+
# respawns a fresh worker. For "main" this is equivalent to
|
|
279
|
+
# `clear all` (both wipe data + r()/e()), with the wrinkle
|
|
280
|
+
# that ref-store entries this session produced stay valid in
|
|
281
|
+
# the parent's `_refs` LRU until naturally evicted.
|
|
282
|
+
dropped = get_default_pool().kill_session(sid)
|
|
283
|
+
return [
|
|
284
|
+
TextContent(
|
|
285
|
+
type="text",
|
|
286
|
+
text=json.dumps(
|
|
287
|
+
{
|
|
288
|
+
"session_id": sid,
|
|
289
|
+
"dropped_frame": dropped,
|
|
290
|
+
}
|
|
291
|
+
),
|
|
292
|
+
)
|
|
293
|
+
]
|
|
294
|
+
return [TextContent(type="text", text=f"Unknown tool: {name}")]
|
|
295
|
+
except KeyError as exc:
|
|
296
|
+
return [TextContent(type="text", text=f"Unknown ref: {exc}")]
|
|
297
|
+
except PystataNotAvailable as exc:
|
|
298
|
+
return [TextContent(type="text", text=f"Stata not available: {exc}")]
|
|
299
|
+
except (ValueError, NotImplementedError) as exc:
|
|
300
|
+
return [TextContent(type="text", text=f"{type(exc).__name__}: {exc}")]
|
|
301
|
+
except Exception as exc: # noqa: BLE001 - last-resort safety net
|
|
302
|
+
return [TextContent(type="text", text=f"Error: {type(exc).__name__}: {exc}")]
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
def _run_tool(arguments: dict[str, Any]) -> list[Any]:
|
|
306
|
+
args = dict(arguments)
|
|
307
|
+
code = args.pop("code", None)
|
|
308
|
+
if not code:
|
|
309
|
+
return [TextContent(type="text", text='{"error": "code is required"}')]
|
|
310
|
+
try:
|
|
311
|
+
result = pool_execute(code, **args)
|
|
312
|
+
except (ValueError, NotImplementedError) as exc:
|
|
313
|
+
return [
|
|
314
|
+
TextContent(
|
|
315
|
+
type="text",
|
|
316
|
+
text=json.dumps({"error": f"{type(exc).__name__}: {exc}"}),
|
|
317
|
+
)
|
|
318
|
+
]
|
|
319
|
+
return [TextContent(type="text", text=result.model_dump_json())]
|
|
320
|
+
|
|
321
|
+
|
|
322
|
+
def _info_payload() -> str:
|
|
323
|
+
if not is_available():
|
|
324
|
+
return json.dumps({"available": False})
|
|
325
|
+
from stata_code.core._runtime import get_runtime
|
|
326
|
+
|
|
327
|
+
rt = get_runtime()
|
|
328
|
+
return json.dumps(
|
|
329
|
+
{
|
|
330
|
+
"available": True,
|
|
331
|
+
"edition": rt.edition,
|
|
332
|
+
"backend": "pystata",
|
|
333
|
+
"schema_version": "1.0",
|
|
334
|
+
}
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
339
|
+
# Entrypoint
|
|
340
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
async def main() -> None:
|
|
344
|
+
if not _MCP_AVAILABLE:
|
|
345
|
+
print(
|
|
346
|
+
"ERROR: mcp package not installed. Install with: pip install mcp",
|
|
347
|
+
file=sys.stderr,
|
|
348
|
+
)
|
|
349
|
+
sys.exit(1)
|
|
350
|
+
async with stdio_server() as (read, write):
|
|
351
|
+
await APP.run(read, write, APP.create_initialization_options())
|
|
352
|
+
|
|
353
|
+
|
|
354
|
+
def run_main() -> None:
|
|
355
|
+
"""Synchronous entry point for the `stata-code-mcp` console script."""
|
|
356
|
+
asyncio.run(main())
|
|
357
|
+
|
|
358
|
+
|
|
359
|
+
if __name__ == "__main__": # pragma: no cover
|
|
360
|
+
run_main()
|