stata-code 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,360 @@
1
+ """MCP server exposing the stata_code v1.0 pipeline.
2
+
3
+ Tools registered:
4
+ - stata_run — execute Stata code, return a v1.0 RunResult JSON
5
+ - stata_info — report Stata edition / version / capabilities
6
+ - get_log — fetch full log behind a `log://` ref
7
+ - get_graph — fetch graph bytes behind a `graph://` ref (ImageContent)
8
+ - list_sessions — enumerate live sessions (frames)
9
+ - reset_session — drop a session's data
10
+
11
+ The result envelope, token-economy defaults (log head+tail+ref, graph refs
12
+ not inline), session model, and error taxonomy follow SCHEMA.md.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import asyncio
18
+ import json
19
+ import sys
20
+ from typing import Any
21
+
22
+ try:
23
+ from mcp.server import Server
24
+ from mcp.server.stdio import stdio_server
25
+ from mcp.types import ImageContent, TextContent, Tool
26
+
27
+ _MCP_AVAILABLE = True
28
+ except ImportError: # pragma: no cover - environment without mcp installed
29
+ Server = None # type: ignore[assignment,misc]
30
+ Tool = None # type: ignore[assignment,misc]
31
+ TextContent = None # type: ignore[assignment,misc]
32
+ ImageContent = None # type: ignore[assignment,misc]
33
+ stdio_server = None # type: ignore[assignment]
34
+ _MCP_AVAILABLE = False
35
+
36
+ from stata_code.core._pool import get_default_pool, pool_execute
37
+ from stata_code.core._runtime import PystataNotAvailable, is_available
38
+ from stata_code.core.runner import (
39
+ cancel,
40
+ get_graph,
41
+ get_log,
42
+ get_matrix,
43
+ is_cancel_pending,
44
+ )
45
+
46
+ __version__ = "0.3.0"
47
+
48
+ APP: Any = Server("stata-code") if _MCP_AVAILABLE else None
49
+
50
+
51
+ # ─────────────────────────────────────────────────────────────────────────────
52
+ # Tool registry
53
+ # ─────────────────────────────────────────────────────────────────────────────
54
+
55
+
56
+ def _tool_definitions() -> list[Tool]:
57
+ return [
58
+ Tool(
59
+ name="stata_run",
60
+ description=(
61
+ "Execute Stata code and return a v1.0 stata_code RunResult "
62
+ "(see SCHEMA.md). The result is a JSON object with ok, rc, "
63
+ "error (typed), log (head+tail+ref by default), results.r/e "
64
+ "(scalars/macros/matrices, native types), dataset metadata, "
65
+ "graphs, warnings, and capabilities. Use the structured "
66
+ "fields rather than parsing the log."
67
+ ),
68
+ inputSchema={
69
+ "type": "object",
70
+ "properties": {
71
+ "code": {
72
+ "type": "string",
73
+ "description": "Stata code to execute. Multi-line OK.",
74
+ },
75
+ "session_id": {
76
+ "type": "string",
77
+ "default": "main",
78
+ "description": (
79
+ "Session name. 'main' is the master frame; "
80
+ "other names create/route to that Stata frame "
81
+ "(data isolation only; r()/e() remain global)."
82
+ ),
83
+ },
84
+ "include_graphs": {
85
+ "type": "string",
86
+ "enum": ["ref", "inline", "none"],
87
+ "default": "ref",
88
+ },
89
+ "graph_format": {
90
+ "type": "string",
91
+ "enum": ["png", "svg", "pdf"],
92
+ "default": "png",
93
+ },
94
+ "log_lines_head": {"type": "integer", "default": 20},
95
+ "log_lines_tail": {"type": "integer", "default": 20},
96
+ "include_full_log": {"type": "boolean", "default": False},
97
+ "include_dataset_variables": {
98
+ "type": "boolean",
99
+ "default": True,
100
+ },
101
+ },
102
+ "required": ["code"],
103
+ },
104
+ ),
105
+ Tool(
106
+ name="stata_info",
107
+ description=(
108
+ "Report installed Stata edition, version, backend, and "
109
+ "whether the runtime is initialized."
110
+ ),
111
+ inputSchema={"type": "object", "properties": {}},
112
+ ),
113
+ Tool(
114
+ name="get_log",
115
+ description=(
116
+ "Fetch the full log text behind a log:// ref returned by a "
117
+ "prior stata_run call. Returns JSON {text, lines_total, "
118
+ "bytes_total}."
119
+ ),
120
+ inputSchema={
121
+ "type": "object",
122
+ "properties": {"ref": {"type": "string"}},
123
+ "required": ["ref"],
124
+ },
125
+ ),
126
+ Tool(
127
+ name="get_graph",
128
+ description=(
129
+ "Fetch graph bytes behind a graph:// ref. Returns an "
130
+ "ImageContent (base64 bytes + mimeType) suitable for direct "
131
+ "display by vision-capable clients."
132
+ ),
133
+ inputSchema={
134
+ "type": "object",
135
+ "properties": {
136
+ "ref": {"type": "string"},
137
+ "format": {
138
+ "type": "string",
139
+ "enum": ["png", "svg", "pdf"],
140
+ },
141
+ },
142
+ "required": ["ref"],
143
+ },
144
+ ),
145
+ Tool(
146
+ name="get_matrix",
147
+ description=(
148
+ "Fetch a matrix's values, rows, and cols behind a matrix:// "
149
+ "ref. Producers emit a ref instead of inlining values when "
150
+ "the matrix exceeds ~10,000 cells. Returns JSON {rows, cols, "
151
+ "values}."
152
+ ),
153
+ inputSchema={
154
+ "type": "object",
155
+ "properties": {"ref": {"type": "string"}},
156
+ "required": ["ref"],
157
+ },
158
+ ),
159
+ Tool(
160
+ name="list_sessions",
161
+ description=(
162
+ "Enumerate live sessions. Each entry has session_id, frame "
163
+ "(Stata frame name), and n_obs."
164
+ ),
165
+ inputSchema={"type": "object", "properties": {}},
166
+ ),
167
+ Tool(
168
+ name="cancel_session",
169
+ description=(
170
+ "Request cooperative cancellation of the next stata_run for "
171
+ "this session. The flag is consumed by the next call and "
172
+ "produces a RunResult with ok=false, rc=-3, "
173
+ "error.kind='cancelled'. Does NOT interrupt code that is "
174
+ "currently mid-execution (pystata is in-process). Returns "
175
+ "JSON {session_id, was_pending, is_pending}."
176
+ ),
177
+ inputSchema={
178
+ "type": "object",
179
+ "properties": {
180
+ "session_id": {"type": "string", "default": "main"},
181
+ },
182
+ },
183
+ ),
184
+ Tool(
185
+ name="reset_session",
186
+ description=(
187
+ "Drop a session's data. session_id='main' performs `clear "
188
+ "all` in place (default frame cannot be dropped); other "
189
+ "names drop the corresponding Stata frame."
190
+ ),
191
+ inputSchema={
192
+ "type": "object",
193
+ "properties": {
194
+ "session_id": {"type": "string", "default": "main"},
195
+ },
196
+ },
197
+ ),
198
+ ]
199
+
200
+
201
+ if _MCP_AVAILABLE:
202
+
203
+ @APP.list_tools()
204
+ async def list_tools() -> list[Tool]:
205
+ return _tool_definitions()
206
+
207
+ @APP.call_tool()
208
+ async def call_tool(name: str, arguments: dict[str, Any]) -> list[Any]:
209
+ return await _dispatch(name, arguments)
210
+
211
+
212
+ # ─────────────────────────────────────────────────────────────────────────────
213
+ # Dispatch (kept module-level for testability)
214
+ # ─────────────────────────────────────────────────────────────────────────────
215
+
216
+
217
+ _GRAPH_MIME = {
218
+ "png": "image/png",
219
+ "svg": "image/svg+xml",
220
+ "pdf": "application/pdf",
221
+ }
222
+
223
+
224
+ async def _dispatch(name: str, arguments: dict[str, Any]) -> list[Any]:
225
+ try:
226
+ if name == "stata_run":
227
+ return _run_tool(arguments)
228
+ if name == "stata_info":
229
+ return [TextContent(type="text", text=_info_payload())]
230
+ if name == "get_log":
231
+ payload = get_log(arguments["ref"])
232
+ return [TextContent(type="text", text=json.dumps(payload))]
233
+ if name == "get_graph":
234
+ payload = get_graph(arguments["ref"])
235
+ mime = _GRAPH_MIME.get(payload["format"], "image/png")
236
+ return [
237
+ ImageContent(
238
+ type="image", data=payload["bytes_b64"], mimeType=mime
239
+ )
240
+ ]
241
+ if name == "get_matrix":
242
+ payload = get_matrix(arguments["ref"])
243
+ return [TextContent(type="text", text=json.dumps(payload))]
244
+ if name == "list_sessions":
245
+ # In subprocess-pool mode each session lives in its own worker
246
+ # process, so the parent's `list_sessions()` (which queries the
247
+ # parent's pystata frames) is empty. Authoritative source is
248
+ # `pool.list_session_info()`, which round-trips a no-payload
249
+ # `list_sessions` op to each live worker and aggregates. Dead
250
+ # or unresponsive workers are skipped silently — partial info
251
+ # beats failing the whole list call.
252
+ sessions = get_default_pool().list_session_info()
253
+ return [TextContent(type="text", text=json.dumps(sessions))]
254
+ if name == "cancel_session":
255
+ sid = arguments.get("session_id", "main")
256
+ was_pending = not cancel(sid) # cancel() returns False if already pending
257
+ # In subprocess-pool mode, also SIGTERM the worker so an in-flight
258
+ # call that's blocked inside Stata C-land actually terminates rather
259
+ # than waiting for the next inter-command cooperative checkpoint.
260
+ killed_worker = get_default_pool().kill_session(sid)
261
+ return [
262
+ TextContent(
263
+ type="text",
264
+ text=json.dumps(
265
+ {
266
+ "session_id": sid,
267
+ "was_pending": was_pending,
268
+ "is_pending": is_cancel_pending(sid),
269
+ "killed_worker": killed_worker,
270
+ }
271
+ ),
272
+ )
273
+ ]
274
+ if name == "reset_session":
275
+ sid = arguments.get("session_id", "main")
276
+ # Pool-mode: killing the session's worker drops its data and
277
+ # all in-memory state; the next stata_run for that session
278
+ # respawns a fresh worker. For "main" this is equivalent to
279
+ # `clear all` (both wipe data + r()/e()), with the wrinkle
280
+ # that ref-store entries this session produced stay valid in
281
+ # the parent's `_refs` LRU until naturally evicted.
282
+ dropped = get_default_pool().kill_session(sid)
283
+ return [
284
+ TextContent(
285
+ type="text",
286
+ text=json.dumps(
287
+ {
288
+ "session_id": sid,
289
+ "dropped_frame": dropped,
290
+ }
291
+ ),
292
+ )
293
+ ]
294
+ return [TextContent(type="text", text=f"Unknown tool: {name}")]
295
+ except KeyError as exc:
296
+ return [TextContent(type="text", text=f"Unknown ref: {exc}")]
297
+ except PystataNotAvailable as exc:
298
+ return [TextContent(type="text", text=f"Stata not available: {exc}")]
299
+ except (ValueError, NotImplementedError) as exc:
300
+ return [TextContent(type="text", text=f"{type(exc).__name__}: {exc}")]
301
+ except Exception as exc: # noqa: BLE001 - last-resort safety net
302
+ return [TextContent(type="text", text=f"Error: {type(exc).__name__}: {exc}")]
303
+
304
+
305
+ def _run_tool(arguments: dict[str, Any]) -> list[Any]:
306
+ args = dict(arguments)
307
+ code = args.pop("code", None)
308
+ if not code:
309
+ return [TextContent(type="text", text='{"error": "code is required"}')]
310
+ try:
311
+ result = pool_execute(code, **args)
312
+ except (ValueError, NotImplementedError) as exc:
313
+ return [
314
+ TextContent(
315
+ type="text",
316
+ text=json.dumps({"error": f"{type(exc).__name__}: {exc}"}),
317
+ )
318
+ ]
319
+ return [TextContent(type="text", text=result.model_dump_json())]
320
+
321
+
322
+ def _info_payload() -> str:
323
+ if not is_available():
324
+ return json.dumps({"available": False})
325
+ from stata_code.core._runtime import get_runtime
326
+
327
+ rt = get_runtime()
328
+ return json.dumps(
329
+ {
330
+ "available": True,
331
+ "edition": rt.edition,
332
+ "backend": "pystata",
333
+ "schema_version": "1.0",
334
+ }
335
+ )
336
+
337
+
338
+ # ─────────────────────────────────────────────────────────────────────────────
339
+ # Entrypoint
340
+ # ─────────────────────────────────────────────────────────────────────────────
341
+
342
+
343
+ async def main() -> None:
344
+ if not _MCP_AVAILABLE:
345
+ print(
346
+ "ERROR: mcp package not installed. Install with: pip install mcp",
347
+ file=sys.stderr,
348
+ )
349
+ sys.exit(1)
350
+ async with stdio_server() as (read, write):
351
+ await APP.run(read, write, APP.create_initialization_options())
352
+
353
+
354
+ def run_main() -> None:
355
+ """Synchronous entry point for the `stata-code-mcp` console script."""
356
+ asyncio.run(main())
357
+
358
+
359
+ if __name__ == "__main__": # pragma: no cover
360
+ run_main()