PyPI - specsmith - Versions diffs - 0.4.0__tar.gz → 0.4.0.dev222__tar.gz - Mend

specsmith 0.4.0tar.gz → 0.4.0.dev222tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (169) hide show

{specsmith-0.4.0/src/specsmith.egg-info → specsmith-0.4.0.dev222}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: specsmith
-Version: 0.4.0
+Version: 0.4.0.dev222
 Summary: Applied Epistemic Engineering toolkit — AEE agent sessions, execution profiles, FPGA/HDL governance, tool installer, 50+ CLI commands.
 Author: BitConcepts
 License-Expression: MIT

{specsmith-0.4.0 → specsmith-0.4.0.dev222}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "specsmith"
-version = "0.4.0"
+version = "0.4.0.dev222"
 description = "Applied Epistemic Engineering toolkit — AEE agent sessions, execution profiles, FPGA/HDL governance, tool installer, 50+ CLI commands."
 readme = "README.md"
 license = "MIT"

specsmith-0.4.0.dev222/src/specsmith/agent/events.py ADDED Viewed

@@ -0,0 +1,176 @@
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2026 BitConcepts, LLC. All rights reserved.
+"""Block-based JSONL event protocol for `specsmith chat` (REQ-112, REQ-113, REQ-114).
+The protocol is the contract between the Specsmith chat backend and any
+client (the Nexus REPL itself, the VS Code extension, or future TUIs).
+Every event is a single JSON object on its own line with a ``type`` key.
+Event kinds
+-----------
+* ``block_start``      - begins a new block (kinds: ``plan``, ``message``,
+                         ``tool_call``, ``tool_result``, ``diff``,
+                         ``test_results``, ``verdict``).
+* ``block_complete``   - closes the block opened by ``block_start``.
+* ``token``            - incremental LLM token within a ``message`` block.
+* ``tool_call``        - the LLM has decided to invoke a tool.
+* ``tool_request``     - safe-mode permission request (REQ-115).
+* ``tool_result``      - completed tool execution.
+* ``plan_step``        - status transition for a step in the active plan
+                         block (REQ-114).
+* ``task_complete``    - final block; carries final summary + profile.
+"""
+from __future__ import annotations
+import contextlib
+import json
+import sys
+import time
+import uuid
+from dataclasses import dataclass, field
+from typing import IO, Any
+def _now_iso() -> str:
+    """Return a UTC ISO-8601 timestamp (second precision)."""
+    return time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
+def _new_block_id() -> str:
+    return f"blk_{uuid.uuid4().hex[:12]}"
+@dataclass
+class EventEmitter:
+    """Writes JSONL events to a stream (default stdout).
+    Used by the `specsmith chat` CLI and by the test suite. Each event is
+    flushed immediately so consumers can react in real time.
+    """
+    stream: IO[str] = field(default_factory=lambda: sys.stdout)
+    def emit(self, event: dict[str, Any]) -> None:
+        line = json.dumps(event, ensure_ascii=False)
+        self.stream.write(line + "\n")
+        # Some test buffers (e.g. capsys) don't support flush; ignore.
+        with contextlib.suppress(Exception):
+            self.stream.flush()
+    # ── Block helpers ────────────────────────────────────────────────────
+    def block_start(self, kind: str, *, agent: str = "nexus", **payload: Any) -> str:
+        """Open a new block of ``kind`` and return its id."""
+        block_id = _new_block_id()
+        self.emit(
+            {
+                "type": "block_start",
+                "block_id": block_id,
+                "kind": kind,
+                "agent": agent,
+                "timestamp": _now_iso(),
+                "payload": payload,
+            }
+        )
+        return block_id
+    def block_complete(self, block_id: str, **payload: Any) -> None:
+        self.emit(
+            {
+                "type": "block_complete",
+                "block_id": block_id,
+                "timestamp": _now_iso(),
+                "payload": payload,
+            }
+        )
+    def token(self, block_id: str, text: str) -> None:
+        self.emit(
+            {
+                "type": "token",
+                "block_id": block_id,
+                "text": text,
+            }
+        )
+    def tool_call(self, block_id: str, name: str, args: dict[str, Any]) -> None:
+        self.emit(
+            {
+                "type": "tool_call",
+                "block_id": block_id,
+                "name": name,
+                "args": args,
+            }
+        )
+    def tool_request(self, block_id: str, name: str, args: dict[str, Any]) -> None:
+        self.emit(
+            {
+                "type": "tool_request",
+                "block_id": block_id,
+                "name": name,
+                "args": args,
+            }
+        )
+    def tool_result(self, block_id: str, name: str, ok: bool, output: str) -> None:
+        self.emit(
+            {
+                "type": "tool_result",
+                "block_id": block_id,
+                "name": name,
+                "ok": ok,
+                "output": output,
+            }
+        )
+    def plan(self, steps: list[dict[str, Any]]) -> str:
+        return self.block_start("plan", steps=steps)
+    def plan_step(
+        self,
+        block_id: str,
+        step_id: str,
+        status: str,
+        **payload: Any,
+    ) -> None:
+        self.emit(
+            {
+                "type": "plan_step",
+                "block_id": block_id,
+                "step_id": step_id,
+                "status": status,
+                "timestamp": _now_iso(),
+                "payload": payload,
+            }
+        )
+    def diff(self, path: str, body: str) -> str:
+        return self.block_start("diff", path=path, body=body)
+    def task_complete(
+        self,
+        *,
+        success: bool,
+        confidence: float,
+        summary: str,
+        profile: str,
+        comments: list[dict[str, Any]] | None = None,
+        **extra: Any,
+    ) -> None:
+        self.emit(
+            {
+                "type": "task_complete",
+                "timestamp": _now_iso(),
+                "success": success,
+                "confidence": confidence,
+                "summary": summary,
+                "profile": profile,
+                "comments": comments or [],
+                **extra,
+            }
+        )
+__all__ = ["EventEmitter"]

specsmith-0.4.0.dev222/src/specsmith/agent/mcp.py ADDED Viewed

@@ -0,0 +1,117 @@
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2026 BitConcepts, LLC. All rights reserved.
+"""MCP (Model Context Protocol) tool consumption for Nexus (REQ-121).
+Reads ``.specsmith/mcp.yml`` (a list of server configs) and returns a list
+of tool wrappers that Nexus can register alongside its built-in tool set.
+The wrappers are invoked over stdio per the MCP spec (subprocess +
+JSON-RPC framing). For 1.0 we ship the loader and the wrapper interface;
+the actual stdio JSON-RPC client is implemented but kept narrow so the
+Specsmith safety middleware fully wraps every call.
+"""
+from __future__ import annotations
+import json
+import subprocess
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+@dataclass
+class MCPServerSpec:
+    """Static configuration for an MCP server.
+    Mirrors `.specsmith/mcp.yml` entries; the YAML parser turns each
+    entry into one of these.
+    """
+    name: str
+    command: str
+    args: list[str]
+    env: dict[str, str]
+@dataclass
+class MCPTool:
+    """A Nexus-side handle to an MCP server.
+    Calling ``invoke(payload)`` opens a subprocess, sends the payload as
+    a JSON-RPC ``tools/call`` request, and returns the response. Errors
+    surface as plain strings; the orchestrator wraps the call with the
+    standard Specsmith safety middleware so destructive payloads are
+    blocked exactly the same way as native Nexus tools.
+    """
+    spec: MCPServerSpec
+    @property
+    def name(self) -> str:
+        return self.spec.name
+    def invoke(self, payload: dict[str, Any]) -> str:
+        request = {
+            "jsonrpc": "2.0",
+            "id": 1,
+            "method": "tools/call",
+            "params": payload,
+        }
+        body = json.dumps(request) + "\n"
+        try:
+            proc = subprocess.run(  # noqa: S603 - argv is configured by user
+                [self.spec.command, *self.spec.args],
+                input=body,
+                capture_output=True,
+                text=True,
+                timeout=30,
+                env={**self.spec.env},
+                check=False,
+            )
+        except (OSError, subprocess.TimeoutExpired) as exc:
+            return f"mcp error: {exc}"
+        if proc.returncode != 0:
+            return f"mcp error: {proc.stderr.strip() or 'non-zero exit'}"
+        return proc.stdout.strip() or "(empty mcp response)"
+def load_mcp_tools(project_dir: Path) -> list[MCPTool]:
+    """Read ``.specsmith/mcp.yml`` and return a list of :class:`MCPTool`.
+    Returns an empty list when the file is absent or unparseable so the
+    rest of the orchestrator continues to function with zero MCP servers
+    configured (the default).
+    """
+    cfg_path = Path(project_dir) / ".specsmith" / "mcp.yml"
+    if not cfg_path.is_file():
+        return []
+    try:
+        import yaml
+        raw = yaml.safe_load(cfg_path.read_text(encoding="utf-8")) or []
+    except Exception:  # noqa: BLE001
+        return []
+    if not isinstance(raw, list):
+        return []
+    out: list[MCPTool] = []
+    for entry in raw:
+        if not isinstance(entry, dict):
+            continue
+        name = str(entry.get("name", "")).strip()
+        command = str(entry.get("command", "")).strip()
+        if not name or not command:
+            continue
+        args_raw = entry.get("args", []) or []
+        env_raw = entry.get("env", {}) or {}
+        spec = MCPServerSpec(
+            name=name,
+            command=command,
+            args=[str(a) for a in args_raw if isinstance(a, (str, int, float))],
+            env={str(k): str(v) for k, v in env_raw.items()},
+        )
+        out.append(MCPTool(spec=spec))
+    return out
+__all__ = ["MCPServerSpec", "MCPTool", "load_mcp_tools"]

specsmith-0.4.0.dev222/src/specsmith/agent/memory.py ADDED Viewed

@@ -0,0 +1,81 @@
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2026 BitConcepts, LLC. All rights reserved.
+"""Persistent session memory for the Nexus chat surface (REQ-120, REQ-125).
+Every chat turn (user utterance, broker decision, task result, tool calls)
+is appended as JSONL to ``.specsmith/sessions/<session_id>/turns.jsonl``.
+The orchestrator prepends the most-recent turns (capped by character
+budget) to its first message so the LLM has continuity across runs.
+"""
+from __future__ import annotations
+import json
+import time
+from pathlib import Path
+from typing import Any
+def _session_dir(project_dir: Path, session_id: str) -> Path:
+    return Path(project_dir) / ".specsmith" / "sessions" / session_id
+def _turns_path(project_dir: Path, session_id: str) -> Path:
+    return _session_dir(project_dir, session_id) / "turns.jsonl"
+def append_turn(
+    project_dir: Path,
+    session_id: str,
+    turn: dict[str, Any],
+) -> None:
+    """Append ``turn`` to the session log. Adds a UTC timestamp if missing."""
+    path = _turns_path(project_dir, session_id)
+    path.parent.mkdir(parents=True, exist_ok=True)
+    record = dict(turn)
+    record.setdefault("timestamp", time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()))
+    with path.open("a", encoding="utf-8") as fh:
+        fh.write(json.dumps(record, ensure_ascii=False) + "\n")
+def all_turns(project_dir: Path, session_id: str) -> list[dict[str, Any]]:
+    """Return every recorded turn for ``session_id`` (oldest-first)."""
+    path = _turns_path(project_dir, session_id)
+    if not path.is_file():
+        return []
+    out: list[dict[str, Any]] = []
+    for line in path.read_text(encoding="utf-8").splitlines():
+        line = line.strip()
+        if not line:
+            continue
+        try:
+            out.append(json.loads(line))
+        except ValueError:
+            continue
+    return out
+def recent_turns(
+    project_dir: Path,
+    session_id: str,
+    *,
+    max_chars: int = 20_000,
+) -> list[dict[str, Any]]:
+    """Return the most recent turns whose serialized size fits ``max_chars``.
+    Truncates oldest-first so the prompt always carries the latest context.
+    """
+    turns = all_turns(project_dir, session_id)
+    out: list[dict[str, Any]] = []
+    used = 0
+    for turn in reversed(turns):
+        size = len(json.dumps(turn, ensure_ascii=False))
+        if used + size > max_chars:
+            break
+        out.append(turn)
+        used += size
+    out.reverse()
+    return out
+__all__ = ["append_turn", "all_turns", "recent_turns"]

specsmith-0.4.0.dev222/src/specsmith/agent/router.py ADDED Viewed

@@ -0,0 +1,78 @@
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2026 BitConcepts, LLC. All rights reserved.
+"""Dynamic agent / model routing for the Nexus orchestrator (REQ-122).
+The orchestrator asks ``choose_tier`` which model tier should run a given
+task. Three tiers are recognized:
+* ``coder``  - the local `l1-nexus` Qwen-Coder server (default).
+* ``heavy``  - a larger reasoning model for governance / architecture work.
+* ``fast``   - a quick lightweight model for read-only asks and summaries.
+The default mapping is overridable per project via
+``.specsmith/config.yml``::
+    routing:
+      change: coder
+      release: heavy
+      destructive: heavy
+      read_only_ask: fast
+"""
+from __future__ import annotations
+from pathlib import Path
+from typing import Literal
+Tier = Literal["coder", "heavy", "fast"]
+DEFAULT_MAPPING: dict[str, Tier] = {
+    "read_only_ask": "fast",
+    "change": "coder",
+    "release": "heavy",
+    "destructive": "heavy",
+}
+def choose_tier(
+    intent: str,
+    *,
+    project_dir: Path | None = None,
+    retry_count: int = 0,
+) -> Tier:
+    """Pick a model tier for ``intent``.
+    Repeated retries escalate from ``coder`` to ``heavy`` so a stuck task
+    gets a more capable model on the next try (Phase-3 behaviour from the
+    plan).
+    """
+    mapping = dict(DEFAULT_MAPPING)
+    if project_dir is not None:
+        mapping.update(_load_routing_overrides(project_dir))
+    tier: Tier = mapping.get(intent, "coder")
+    if retry_count >= 2 and tier == "coder":
+        tier = "heavy"
+    return tier
+def _load_routing_overrides(project_dir: Path) -> dict[str, Tier]:
+    cfg = Path(project_dir) / ".specsmith" / "config.yml"
+    if not cfg.is_file():
+        return {}
+    try:
+        import yaml
+        raw = yaml.safe_load(cfg.read_text(encoding="utf-8")) or {}
+    except Exception:  # noqa: BLE001
+        return {}
+    section = raw.get("routing") if isinstance(raw, dict) else None
+    if not isinstance(section, dict):
+        return {}
+    out: dict[str, Tier] = {}
+    for key, val in section.items():
+        if isinstance(val, str) and val in ("coder", "heavy", "fast"):
+            out[str(key)] = val  # type: ignore[assignment]
+    return out
+__all__ = ["DEFAULT_MAPPING", "Tier", "choose_tier"]

specsmith-0.4.0.dev222/src/specsmith/agent/rules.py ADDED Viewed

@@ -0,0 +1,62 @@
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2026 BitConcepts, LLC. All rights reserved.
+"""Project rules auto-injection for the Nexus orchestrator (REQ-119).
+Combines `docs/governance/*_RULES.md` files and the H-rules from
+`AGENTS.md` into a single deterministic system-prompt prefix that the
+orchestrator prepends to every AG2 agent's `system_message`.
+"""
+from __future__ import annotations
+import re
+from pathlib import Path
+def load_rules(project_dir: Path) -> str:
+    """Return the combined rules prefix for ``project_dir``.
+    The returned string is empty when no governance rule files are present
+    (so older projects keep working unchanged). When rules exist, they are
+    rendered as a single compact block so AG2 token costs stay reasonable.
+    """
+    project_dir = Path(project_dir)
+    sections: list[str] = []
+    governance_dir = project_dir / "docs" / "governance"
+    if governance_dir.is_dir():
+        for path in sorted(governance_dir.glob("*_RULES.md")):
+            try:
+                text = path.read_text(encoding="utf-8").strip()
+            except OSError:
+                continue
+            if text:
+                sections.append(f"# {path.stem}\n{text}")
+    agents_md = project_dir / "AGENTS.md"
+    if agents_md.is_file():
+        try:
+            agents_text = agents_md.read_text(encoding="utf-8")
+        except OSError:
+            agents_text = ""
+        h_rules = _extract_h_rules(agents_text)
+        if h_rules:
+            sections.append("# AGENTS.md hard rules\n" + h_rules)
+    if not sections:
+        return ""
+    return "## Project Governance Rules (auto-loaded)\n" + "\n\n".join(sections) + "\n"
+def _extract_h_rules(text: str) -> str:
+    """Extract numbered hard-rules (`H1`, `H2`, ...) from AGENTS.md."""
+    lines: list[str] = []
+    for line in text.splitlines():
+        stripped = line.strip()
+        if re.match(r"^[*\-]?\s*\*?\*?H\d+\b", stripped):
+            lines.append(stripped.lstrip("*-").lstrip())
+    return "\n".join(lines)
+__all__ = ["load_rules"]

specsmith-0.4.0.dev222/src/specsmith/agent/verifier.py ADDED Viewed

@@ -0,0 +1,123 @@
+# SPDX-License-Identifier: MIT
+# Copyright (c) 2026 BitConcepts, LLC. All rights reserved.
+"""Real verifier signal for the Nexus orchestrator (REQ-108).
+Replaces the hardcoded ``0.85 / 0.4 / 0.0`` confidence in
+``Orchestrator._build_task_result`` with a real signal derived from:
+* test_results (failures > 0  -> confidence <= 0.5)
+* ruff_errors  (>= 1          -> confidence x 0.7)
+* mypy_errors  (>= 1          -> confidence x 0.8)
+Equilibrium is reached only when all three gates are clean **and** the
+measured confidence meets or exceeds the preflight ``confidence_target``.
+"""
+from __future__ import annotations
+from dataclasses import dataclass
+@dataclass
+class VerifierReport:
+    """Inputs to the verifier; produced by parsing the orchestrator output."""
+    test_passed: int = 0
+    test_failed: int = 0
+    ruff_errors: int = 0
+    mypy_errors: int = 0
+    has_changes: bool = False
+@dataclass
+class VerifierVerdict:
+    """Outputs of the verifier; consumed by the harness."""
+    confidence: float
+    equilibrium: bool
+    summary: str
+def score(
+    report: VerifierReport,
+    *,
+    confidence_target: float = 0.7,
+) -> VerifierVerdict:
+    """Score a :class:`VerifierReport` into a :class:`VerifierVerdict`.
+    Deterministic, pure function so the harness behaviour is reproducible.
+    """
+    base = 1.0 if report.has_changes else 0.0
+    if report.test_failed > 0:
+        base = min(base, 0.5)
+    if report.ruff_errors > 0:
+        base *= 0.7
+    if report.mypy_errors > 0:
+        base *= 0.8
+    base = round(max(0.0, min(1.0, base)), 3)
+    clean = report.test_failed == 0 and report.ruff_errors == 0 and report.mypy_errors == 0
+    equilibrium = clean and report.has_changes and base >= confidence_target
+    parts: list[str] = []
+    if report.has_changes:
+        parts.append(f"{report.test_passed} passed / {report.test_failed} failed")
+    else:
+        parts.append("no changes detected")
+    if report.ruff_errors:
+        parts.append(f"{report.ruff_errors} ruff error(s)")
+    if report.mypy_errors:
+        parts.append(f"{report.mypy_errors} mypy error(s)")
+    summary = "; ".join(parts) + (" — equilibrium" if equilibrium else " — retry recommended")
+    return VerifierVerdict(confidence=base, equilibrium=equilibrium, summary=summary)
+def report_from_chat_sections(
+    sections: dict[str, str],
+    *,
+    files_changed: list[str] | None = None,
+) -> VerifierReport:
+    """Build a :class:`VerifierReport` from parsed Nexus output-contract sections.
+    The orchestrator's ``_parse_output_contract`` produces a dict keyed by
+    ``plan``, ``commands_to_run``, ``files_changed``, ``diff``,
+    ``test_results``, and ``next_action``. We extract structured signals
+    from the free-form ``test_results`` text. This is deliberately
+    forgiving: passes/failures are counted by simple regex.
+    """
+    import re
+    raw = sections.get("test_results", "") or ""
+    test_passed = 0
+    test_failed = 0
+    m_pass = re.search(r"(\d+)\s+passed", raw, re.IGNORECASE)
+    if m_pass:
+        test_passed = int(m_pass.group(1))
+    m_fail = re.search(r"(\d+)\s+failed", raw, re.IGNORECASE)
+    if m_fail:
+        test_failed = int(m_fail.group(1))
+    diff_text = sections.get("diff", "") or ""
+    has_changes = bool(diff_text.strip()) or bool(files_changed)
+    # ruff/mypy signals are not in the standard contract; scan the raw test
+    # output for the canonical error markers.
+    ruff_errors = len(re.findall(r"^\s*[A-Z]\d{3,4}\s", raw, re.MULTILINE))
+    mypy_errors = len(re.findall(r"\berror:", raw))
+    return VerifierReport(
+        test_passed=test_passed,
+        test_failed=test_failed,
+        ruff_errors=ruff_errors,
+        mypy_errors=mypy_errors,
+        has_changes=has_changes,
+    )
+__all__ = [
+    "VerifierReport",
+    "VerifierVerdict",
+    "report_from_chat_sections",
+    "score",
+]

specsmith 0.4.0__tar.gz → 0.4.0.dev222__tar.gz

specsmith 0.4.0tar.gz → 0.4.0.dev222tar.gz