PyPI - stata-code - Versions diffs - 0.3.0__py3-none-any.whl - Mend

stata-code 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

stata_code/__init__.py +100 -0
stata_code/core/__init__.py +73 -0
stata_code/core/_pool.py +808 -0
stata_code/core/_refs.py +97 -0
stata_code/core/_runtime.py +179 -0
stata_code/core/errors.py +447 -0
stata_code/core/runner.py +1092 -0
stata_code/core/schema.py +317 -0
stata_code/kernel/__init__.py +5 -0
stata_code/kernel/__main__.py +6 -0
stata_code/kernel/kernel.py +331 -0
stata_code/mcp/__init__.py +3 -0
stata_code/mcp/__main__.py +6 -0
stata_code/mcp/server.py +360 -0
stata_code-0.3.0.dist-info/METADATA +389 -0
stata_code-0.3.0.dist-info/RECORD +20 -0
stata_code-0.3.0.dist-info/WHEEL +4 -0
stata_code-0.3.0.dist-info/entry_points.txt +3 -0
stata_code-0.3.0.dist-info/licenses/LICENSE +21 -0
stata_code-0.3.0.dist-info/licenses/LICENSE-POLICY.md +125 -0

stata_code/core/schema.py ADDED Viewed

@@ -0,0 +1,317 @@
+"""Pydantic v2 models for the stata_code v1.0 result schema (see SCHEMA.md)."""
+from __future__ import annotations
+import re
+from enum import Enum
+from typing import Literal
+from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator
+# ─────────────────────────────────────────────────────────────────────────────
+# Enums (closed at v1.0; new values are minor-version additive)
+# ─────────────────────────────────────────────────────────────────────────────
+class ErrorKind(str, Enum):
+    SYNTAX = "syntax"
+    COMMAND_NOT_FOUND = "command_not_found"
+    VARNAME_NOT_FOUND = "varname_not_found"
+    INVALID_NAME = "invalid_name"
+    TYPE_MISMATCH = "type_mismatch"
+    NAME_CONFLICT = "name_conflict"
+    NOT_SORTED = "not_sorted"
+    CONVERGENCE = "convergence"
+    INFEASIBLE = "infeasible"
+    ESTIMATION_SAMPLE_EMPTY = "estimation_sample_empty"
+    ESTIMATION_FAILURE = "estimation_failure"
+    NO_ESTIMATION_RESULTS = "no_estimation_results"
+    NO_OBSERVATIONS = "no_observations"
+    DATA_IN_MEMORY = "data_in_memory"
+    MATRIX_SINGULAR = "matrix_singular"
+    MATRIX_CONFORMABILITY = "matrix_conformability"
+    MATRIX_MISSING = "matrix_missing"
+    FILE_NOT_FOUND = "file_not_found"
+    FILE_EXISTS = "file_exists"
+    FILE_CORRUPT = "file_corrupt"
+    FILE_IO = "file_io"
+    NETWORK = "network"
+    PERMISSION = "permission"
+    ENCODING = "encoding"
+    STATA_LIMIT = "stata_limit"
+    OUT_OF_MEMORY = "out_of_memory"
+    INTERRUPT = "interrupt"
+    CANCELLED = "cancelled"
+    TIMEOUT = "timeout"
+    ADAPTER_CRASH = "adapter_crash"
+    UNKNOWN = "unknown"
+class StataEdition(str, Enum):
+    MP = "MP"
+    SE = "SE"
+    IC = "IC"
+    BE = "BE"
+    UNKNOWN = "unknown"
+class GraphFormat(str, Enum):
+    PNG = "png"
+    SVG = "svg"
+    PDF = "pdf"
+class IncludeGraphs(str, Enum):
+    REF = "ref"
+    INLINE = "inline"
+    NONE = "none"
+class Backend(str, Enum):
+    PYSTATA = "pystata"
+    CONSOLE = "console"
+# ─────────────────────────────────────────────────────────────────────────────
+# Base config — every model is forward-compat (tolerates unknown fields)
+# ─────────────────────────────────────────────────────────────────────────────
+class _Base(BaseModel):
+    """Base for all schema models; allows unknown fields per §6 forward-compat."""
+    model_config = ConfigDict(extra="allow", validate_assignment=True)
+# ─────────────────────────────────────────────────────────────────────────────
+# Sub-models
+# ─────────────────────────────────────────────────────────────────────────────
+class StataInfo(_Base):
+    version: str | None = None
+    edition: StataEdition = StataEdition.UNKNOWN
+    backend: Backend
+class LogInfo(_Base):
+    head: str = ""
+    tail: str = ""
+    lines_total: int = 0
+    bytes_total: int = 0
+    truncated: bool = False
+    complete: bool = True
+    error_window: str | None = None
+    ref: str | None = None
+    @model_validator(mode="after")
+    def _check_invariants(self) -> LogInfo:
+        if self.truncated and self.ref is None:
+            raise ValueError("log.truncated=True requires log.ref to be set")
+        if not self.truncated and self.tail != "":
+            raise ValueError(
+                "log.truncated=False requires log.tail to be empty "
+                "(see SCHEMA.md §3.3)"
+            )
+        if self.lines_total < 0 or self.bytes_total < 0:
+            raise ValueError("log.lines_total and log.bytes_total must be ≥ 0")
+        return self
+class Matrix(_Base):
+    rows: list[str]
+    cols: list[str]
+    values: list[list[float | None]] | None = None
+    ref: str | None = None
+    @model_validator(mode="after")
+    def _check_shape(self) -> Matrix:
+        if self.values is None and self.ref is None:
+            raise ValueError("matrix must have either values or ref set (or both)")
+        if self.values is not None:
+            if len(self.values) != len(self.rows):
+                raise ValueError(
+                    f"matrix.values has {len(self.values)} rows, "
+                    f"expected {len(self.rows)}"
+                )
+            ncols = len(self.cols)
+            for i, row in enumerate(self.values):
+                if len(row) != ncols:
+                    raise ValueError(
+                        f"matrix.values row {i} has {len(row)} cols, "
+                        f"expected {ncols}"
+                    )
+        return self
+class StataReturns(_Base):
+    """Shape shared by r() and e() — distinct instances at RunResult.results.{r,e}."""
+    scalars: dict[str, float | None] = Field(default_factory=dict)
+    macros: dict[str, str] = Field(default_factory=dict)
+    matrices: dict[str, Matrix] = Field(default_factory=dict)
+class ResultsInfo(_Base):
+    r: StataReturns = Field(default_factory=StataReturns)
+    e: StataReturns = Field(default_factory=StataReturns)
+    last_estimation_cmd: str | None = None
+class VariableInfo(_Base):
+    name: str
+    type: str  # Stata storage type: byte/int/long/float/double/str#/strL
+    label: str = ""
+class DatasetInfo(_Base):
+    frame: str = "default"
+    n_obs: int = 0
+    n_vars: int = 0
+    changed: bool = False
+    filename: str | None = None
+    variables: list[VariableInfo] | None = None
+class GraphInfo(_Base):
+    ref: str
+    name: str = "Graph"
+    format: GraphFormat = GraphFormat.PNG
+    width: int | None = None
+    height: int | None = None
+    source_command: str | None = None
+    source_line: int | None = None
+    inline: str | None = None  # base64 of the bytes when explicitly requested
+class Suggestion(_Base):
+    action: str
+    command: str | None = None
+class ErrorContext(_Base):
+    before: list[str] = Field(default_factory=list)
+    failing: str = ""
+    after: list[str] = Field(default_factory=list)
+_MESSAGE_MAX = 4096
+_COMMAND_MAX = 1024
+_WARNING_MAX = 1024
+_TRUNC_MARK = "…"
+def _truncate(text: str, limit: int) -> str:
+    if len(text) <= limit:
+        return text
+    return text[: limit - len(_TRUNC_MARK)] + _TRUNC_MARK
+class ErrorInfo(_Base):
+    kind: ErrorKind
+    rc: int
+    rc_label: str = ""
+    message: str = ""
+    command: str | None = None
+    line: int | None = None
+    context: ErrorContext = Field(default_factory=ErrorContext)
+    commands_executed: int | None = None
+    path: str | None = None
+    varname: str | None = None
+    name: str | None = None
+    suggestions: list[Suggestion] = Field(default_factory=list)
+    @field_validator("message")
+    @classmethod
+    def _truncate_message(cls, v: str) -> str:
+        return _truncate(v, _MESSAGE_MAX)
+    @field_validator("command")
+    @classmethod
+    def _truncate_command(cls, v: str | None) -> str | None:
+        return None if v is None else _truncate(v, _COMMAND_MAX)
+class StataWarning(_Base):
+    """JSON wire name is `warnings`; class avoids shadowing the builtin `Warning`."""
+    kind: str = "unknown"
+    message: str = ""
+    @field_validator("message")
+    @classmethod
+    def _truncate(cls, v: str) -> str:
+        return _truncate(v, _WARNING_MAX)
+# ─────────────────────────────────────────────────────────────────────────────
+# Top-level
+# ─────────────────────────────────────────────────────────────────────────────
+_SESSION_ID_RE = re.compile(r"[A-Za-z0-9_-]+")
+class RunResult(_Base):
+    """Top-level v1.0 schema. SCHEMA.md is normative; this is its derived form."""
+    ok: bool
+    rc: int
+    session_id: str = "main"
+    request_id: str
+    started_at: str  # ISO 8601 UTC with millisecond precision
+    elapsed_ms: int
+    stata_elapsed_ms: int | None = None
+    stata: StataInfo
+    log: LogInfo = Field(default_factory=LogInfo)
+    results: ResultsInfo = Field(default_factory=ResultsInfo)
+    dataset: DatasetInfo = Field(default_factory=DatasetInfo)
+    graphs: list[GraphInfo] = Field(default_factory=list)
+    warnings: list[StataWarning] = Field(default_factory=list)
+    error: ErrorInfo | None = None
+    schema_version: Literal["1.0"] = "1.0"
+    capabilities: list[str] = Field(default_factory=list)
+    @field_validator("session_id")
+    @classmethod
+    def _check_session_id(cls, v: str) -> str:
+        if not _SESSION_ID_RE.fullmatch(v):
+            raise ValueError(
+                f"session_id must match [A-Za-z0-9_-]+; got {v!r}. "
+                "':' is reserved for future remote prefixing."
+            )
+        return v
+    @field_validator("elapsed_ms")
+    @classmethod
+    def _nonneg_elapsed(cls, v: int) -> int:
+        if v < 0:
+            raise ValueError(f"elapsed_ms must be ≥ 0; got {v}")
+        return v
+    @field_validator("stata_elapsed_ms")
+    @classmethod
+    def _nonneg_stata_elapsed(cls, v: int | None) -> int | None:
+        if v is not None and v < 0:
+            raise ValueError(f"stata_elapsed_ms must be ≥ 0; got {v}")
+        return v
+    @model_validator(mode="after")
+    def _consistency(self) -> RunResult:
+        if self.ok:
+            if self.error is not None:
+                raise ValueError("ok=True requires error to be None (SCHEMA.md §3.1)")
+            if self.rc != 0:
+                raise ValueError(f"ok=True requires rc=0; got {self.rc}")
+        else:
+            if self.error is None:
+                raise ValueError(
+                    "ok=False requires error to be non-None (SCHEMA.md §3.1)"
+                )
+            if self.error.rc != self.rc:
+                raise ValueError(
+                    f"top-level rc ({self.rc}) must equal error.rc ({self.error.rc})"
+                )
+        return self

stata_code/kernel/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""Jupyter kernel package for stata_code."""
+from stata_code.kernel.kernel import StataKernel, install_kernel, run_main
+__all__ = ["StataKernel", "install_kernel", "run_main"]

stata_code/kernel/__main__.py ADDED Viewed

@@ -0,0 +1,6 @@
+"""Allow `python -m stata_code.kernel` to launch or install the kernel."""
+from stata_code.kernel.kernel import run_main
+if __name__ == "__main__":  # pragma: no cover
+    run_main()

stata_code/kernel/kernel.py ADDED Viewed

@@ -0,0 +1,331 @@
+"""Stata Jupyter kernel — exposes the v1.0 stata_code pipeline.
+The kernel uses `runner.execute()` for every cell. Defaults are tuned for
+human/notebook use rather than agent use:
+- `include_full_log=True`: full log shown in stdout (no head/tail truncation)
+- `include_graphs="inline"`: graph bytes embedded for direct rendering
+- `session_id="main"`: single-session unless the kernel is configured with
+  multiple kernel specs
+Install via `python -m stata_code.kernel install --user`.
+"""
+from __future__ import annotations
+import json
+import sys
+import traceback
+from pathlib import Path
+from tempfile import TemporaryDirectory
+from typing import Any
+try:
+    from ipykernel.kernelbase import Kernel
+    _HAS_IPYKERNEL = True
+except ImportError:
+    Kernel = object  # type: ignore[misc,assignment]
+    _HAS_IPYKERNEL = False
+from stata_code.core._runtime import PystataNotAvailable
+from stata_code.core.runner import execute
+from stata_code.core.schema import RunResult
+# ─────────────────────────────────────────────────────────────────────────────
+# Static keyword / help tables (carried over verbatim — independent of
+# pipeline; used by do_complete / do_inspect)
+# ─────────────────────────────────────────────────────────────────────────────
+STATA_KEYWORDS: tuple[str, ...] = (
+    " quietly", " noisily", " capture",
+    " summarize", " summarize, detail", " describe", " browse",
+    " list", " inspect", " count", " assert",
+    " generate", " egen", " replace", " recode", " destring", " tostring",
+    " merge", " append", " joinby", " cross",
+    " sort", " gsort", " by", " bysort", " collapse", " contract", " stack",
+    " reshape", " xpose", " fillin",
+    " regress", " logistic", " probit", " tobit", " ivreg", " areg",
+    " xtreg", " logit", " ologit", " oprobit", " mlogit",
+    " estimates", " eststo", " esttab", " estpost",
+    " label", " label variable", " label define", " label values",
+    " keep", " drop", " use", " save", " clear", " insheet", " infile",
+    " infix", " import", " export", " outfile", " outreg",
+    " graph", " graph bar", " graph box", " graph twoway", " graph export",
+    " display", " putexcel", " putdocx",
+    " tempfile", " tempvar", " global", " local",
+    " foreach", " forvalues", " while", " if", " else", " continue",
+    " set", " update", " restore", " preserve",
+    " version", " mata", " python",
+)
+STATA_HELP: dict[str, str] = {
+    "summarize": "summarize [varlist] [if] [in] [weight] [, detail]\n\nCompute summary statistics.",
+    "regress": "regress depvar [indepvars] [if] [in] [weight] [, options]\n\nLinear regression.",
+    "logistic": "logistic depvar [indepvars] [if] [in] [weight] [, options]\n\nLogistic regression.",
+    "generate": "generate newvar = exp\n\ngenerate creates a new variable.",
+    "replace": "replace oldvar = exp [if] [in]\n\nreplace replaces the values of an existing variable.",
+    "merge": "merge [n] 1:1 varlist using filename [, options]\n\nmerge joins data from disk.",
+    "graph": "graph [type] plot [if] [in] [, options]\n\ngraph creates twoway plots.",
+    "by": "by varlist: command\n\nby repeats command for each subset of data.",
+}
+# ─────────────────────────────────────────────────────────────────────────────
+# Kernel
+# ─────────────────────────────────────────────────────────────────────────────
+class StataKernel(Kernel if _HAS_IPYKERNEL else object):
+    protocol_version = "5.3"
+    implementation = "stata_code.kernel"
+    implementation_version = "0.2.0"
+    language_info: dict[str, Any] = {
+        "name": "stata",
+        "codemirror_mode": "stata",
+        "file_extension": ".do",
+        "mimetype": "text/x-stata",
+        "pygments_lexer": "stata",
+        "version": "18.0",
+    }
+    banner = "Stata kernel (stata_code) — backed by the v1.0 runner pipeline"
+    help_links = [{"text": "Stata Help", "url": "https://www.stata.com/help.cgi?"}]
+    _last_result: RunResult | None = None
+    # ── Execution ──────────────────────────────────────────────────────────
+    def do_execute(
+        self,
+        code: str,
+        silent: bool = False,
+        store_history: bool = True,
+        user_expressions: dict[str, Any] | None = None,
+        allow_stdin: bool = False,
+    ) -> dict[str, Any]:
+        if not _HAS_IPYKERNEL:
+            return self._error_reply("ipykernel not installed")
+        try:
+            result = execute(
+                code.strip(),
+                include_full_log=True,
+                include_graphs="inline",
+            )
+        except PystataNotAvailable as exc:
+            return self._error_reply(f"Stata not available: {exc}")
+        except Exception as exc:  # noqa: BLE001
+            traceback.print_exc()
+            return self._error_reply(str(exc))
+        self._last_result = result
+        if not silent:
+            if result.log.head:
+                self._stream("stdout", result.log.head + "\n")
+            if result.warnings:
+                for w in result.warnings:
+                    self._stream("stderr", f"[{w.kind}] {w.message}\n")
+            for graph in result.graphs:
+                if graph.inline:
+                    self._publish_image(graph.inline, graph.format.value)
+            if result.error:
+                msg = self._format_error(result)
+                self._stream("stderr", msg + "\n")
+        return self._reply(result)
+    # ── Reply helpers ──────────────────────────────────────────────────────
+    def _reply(self, r: RunResult) -> dict[str, Any]:
+        if r.error is None:
+            return {
+                "status": "ok",
+                "execution_count": self.execution_count,
+                "payload": [],
+                "user_expressions": {},
+            }
+        return {
+            "status": "error",
+            "execution_count": self.execution_count,
+            "ename": f"StataError({r.error.kind.value})",
+            "evalue": r.error.message,
+            "traceback": [self._format_error(r)],
+        }
+    def _format_error(self, r: RunResult) -> str:
+        e = r.error
+        assert e is not None
+        parts = [f"!!! Stata error: {e.kind.value} (rc={e.rc})", f"    {e.message}"]
+        if e.line is not None:
+            parts.append(f"    at line {e.line}: {e.context.failing!r}")
+        for s in e.suggestions:
+            parts.append(f"    → {s.action}")
+        return "\n".join(parts)
+    def _error_reply(self, msg: str) -> dict[str, Any]:
+        return {
+            "status": "error",
+            "execution_count": self.execution_count,
+            "ename": "RuntimeError",
+            "evalue": msg,
+            "traceback": [msg],
+        }
+    def _stream(self, name: str, text: str) -> None:
+        if not text:
+            return
+        try:
+            self.send_response(
+                self.iopub_socket, "stream", {"name": name, "text": text}
+            )
+        except Exception:  # noqa: BLE001
+            pass  # non-kernel context (tests)
+    def _publish_image(self, b64_data: str, fmt: str) -> None:
+        mime = {
+            "png": "image/png",
+            "svg": "image/svg+xml",
+            "pdf": "application/pdf",
+        }.get(fmt, "image/png")
+        try:
+            self.send_response(
+                self.iopub_socket,
+                "display_data",
+                {
+                    "data": {mime: b64_data, "text/plain": f"[graph: {fmt}]"},
+                    "metadata": {},
+                },
+            )
+        except Exception:  # noqa: BLE001
+            pass
+    # ── Completion / Inspection (unchanged from prior kernel) ──────────────
+    def do_complete(self, code: str, cursor_pos: int) -> dict[str, Any]:
+        line = code[:cursor_pos]
+        token_start = len(line) - 1
+        while token_start > 0 and line[token_start - 1] not in (" \t\n\r(,"):
+            token_start -= 1
+        token = line[token_start:cursor_pos]
+        matches = sorted(kw for kw in STATA_KEYWORDS if kw.lstrip().startswith(token))
+        return {
+            "status": "ok",
+            "matches": matches,
+            "cursor_start": token_start,
+            "cursor_end": cursor_pos,
+        }
+    def do_inspect(
+        self, code: str, cursor_pos: int, detail_level: int = 0
+    ) -> dict[str, Any]:
+        word_end = cursor_pos
+        word_start = word_end - 1
+        while word_start > 0 and code[word_start - 1].isalnum():
+            word_start -= 1
+        word = code[word_start:word_end]
+        found = STATA_HELP.get(word.lower())
+        if found:
+            return {
+                "status": "ok",
+                "found": True,
+                "name": word,
+                "documentation": found,
+                "cursor_start": word_start,
+                "cursor_end": word_end,
+            }
+        return {"status": "ok", "found": False}
+    def do_kernel_info(self) -> dict[str, Any]:
+        return {
+            "protocol_version": self.protocol_version,
+            "implementation": self.implementation,
+            "implementation_version": self.implementation_version,
+            "language_info": self.language_info,
+            "banner": self.banner,
+            "help_links": self.help_links,
+        }
+# ─────────────────────────────────────────────────────────────────────────────
+# Kernel installation CLI
+# ─────────────────────────────────────────────────────────────────────────────
+def install_kernel(user: bool = True, system: bool = False) -> None:
+    """Register the Stata kernel with Jupyter.
+    By default installs into the current user's Jupyter data dir. Pass
+    `system=True` to request a non-user install through Jupyter's kernelspec
+    manager.
+    """
+    from jupyter_client.kernelspec import KernelSpecManager
+    py_exec = Path(sys.executable).resolve()
+    kernel_json = {
+        "argv": [
+            str(py_exec),
+            "-m",
+            "stata_code.kernel",
+            "-f",
+            "{connection_file}",
+        ],
+        "display_name": "Stata",
+        "language": "stata",
+        "metadata": {"debugger": False},
+    }
+    install_user = False if system else user
+    with TemporaryDirectory(prefix="stata_code_kernel_") as td:
+        src_dir = Path(td)
+        (src_dir / "kernel.json").write_text(json.dumps(kernel_json, indent=2))
+        dest = KernelSpecManager().install_kernel_spec(
+            str(src_dir),
+            kernel_name="stata",
+            user=install_user,
+            replace=True,
+        )
+    print(f"Kernel installed to: {dest}")
+    print("Restart Jupyter and select 'Stata' as the kernel.")
+def run_main() -> None:
+    """Console script entry point — installer or kernel launcher.
+    Usage::
+        stata-code-kernel install [--user|--system]   # install kernel spec
+        stata-code-kernel -f <connection_file>        # launch the kernel
+                                                       # (Jupyter calls this)
+    """
+    import argparse
+    import sys as _sys
+    # Distinguish the "install" subcommand from any other invocation (Jupyter
+    # passes connection-file flags that argparse subparsers can't see).
+    if len(_sys.argv) == 1 or _sys.argv[1] in {"-h", "--help"}:
+        print(
+            "usage: stata-code-kernel install [--user|--system]\n"
+            "       stata-code-kernel -f <connection_file>\n\n"
+            "Install or launch the Stata Jupyter kernel."
+        )
+        return
+    if len(_sys.argv) > 1 and _sys.argv[1] == "install":
+        parser = argparse.ArgumentParser(prog="stata-code-kernel install")
+        target = parser.add_mutually_exclusive_group()
+        target.add_argument("--user", dest="user", action="store_true", default=True)
+        target.add_argument("--system", dest="user", action="store_false")
+        args = parser.parse_args(_sys.argv[2:])
+        install_kernel(user=args.user, system=not args.user)
+        return
+    from ipykernel.kernelapp import IPKernelApp
+    IPKernelApp.launch_instance(kernel_class=StataKernel)
+if __name__ == "__main__":  # pragma: no cover
+    run_main()

stata_code/mcp/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+"""MCP server package for stata_code."""
+__all__ = ["APP", "main"]

stata_code/mcp/__main__.py ADDED Viewed

@@ -0,0 +1,6 @@
+"""Allow `python -m stata_code.mcp` to launch the MCP server."""
+from stata_code.mcp.server import run_main
+if __name__ == "__main__":  # pragma: no cover
+    run_main()