PyPI - vigil-codeintel - Versions diffs - 0.1.0__py3-none-any.whl - Mend

vigil-codeintel 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (131) hide show

vigil_codeintel-0.1.0.dist-info/METADATA +780 -0
vigil_codeintel-0.1.0.dist-info/RECORD +131 -0
vigil_codeintel-0.1.0.dist-info/WHEEL +5 -0
vigil_codeintel-0.1.0.dist-info/entry_points.txt +3 -0
vigil_codeintel-0.1.0.dist-info/licenses/LICENSE +21 -0
vigil_codeintel-0.1.0.dist-info/top_level.txt +3 -0
vigil_forensic/__init__.py +224 -0
vigil_forensic/_git_utils.py +178 -0
vigil_forensic/_shared.py +510 -0
vigil_forensic/_stubs.py +156 -0
vigil_forensic/gate_checks/__init__.py +1 -0
vigil_forensic/gate_checks/_ast_helpers.py +629 -0
vigil_forensic/gate_checks/_deployment_detector.py +573 -0
vigil_forensic/gate_checks/atomic_write_checks.py +1143 -0
vigil_forensic/gate_checks/authority_checks.py +95 -0
vigil_forensic/gate_checks/boundary_breach_checks.py +202 -0
vigil_forensic/gate_checks/broad_except_checks.py +301 -0
vigil_forensic/gate_checks/broad_except_hidden_sentinel_checks.py +365 -0
vigil_forensic/gate_checks/common.py +253 -0
vigil_forensic/gate_checks/config_safety_checks.py +704 -0
vigil_forensic/gate_checks/config_ssot_checks.py +78 -0
vigil_forensic/gate_checks/conflict_checks.py +193 -0
vigil_forensic/gate_checks/context_fallback_checks.py +697 -0
vigil_forensic/gate_checks/context_health_checks.py +289 -0
vigil_forensic/gate_checks/contract_shape_drift_checks.py +459 -0
vigil_forensic/gate_checks/dirty_baseline_check.py +274 -0
vigil_forensic/gate_checks/duplication_checks.py +387 -0
vigil_forensic/gate_checks/embedded_string_checks.py +123 -0
vigil_forensic/gate_checks/empty_output_checks.py +87 -0
vigil_forensic/gate_checks/encoding_checks.py +847 -0
vigil_forensic/gate_checks/export_completeness_checks.py +156 -0
vigil_forensic/gate_checks/fallback_checks.py +41 -0
vigil_forensic/gate_checks/file_proliferation_checks.py +171 -0
vigil_forensic/gate_checks/fix_without_test_checks.py +69 -0
vigil_forensic/gate_checks/forensic_cluster_runners/__init__.py +9 -0
vigil_forensic/gate_checks/forensic_cluster_runners/_helpers.py +71 -0
vigil_forensic/gate_checks/forensic_cluster_runners/advanced_checks.py +322 -0
vigil_forensic/gate_checks/forensic_cluster_runners/core.py +273 -0
vigil_forensic/gate_checks/forensic_cluster_runners/integrity_checks.py +203 -0
vigil_forensic/gate_checks/forensic_cluster_runners/quality_checks.py +666 -0
vigil_forensic/gate_checks/forensic_clusters/__init__.py +193 -0
vigil_forensic/gate_checks/forensic_clusters/allowlist.py +426 -0
vigil_forensic/gate_checks/forensic_clusters/allowlist_writer.py +302 -0
vigil_forensic/gate_checks/forensic_clusters/api_protocol.py +231 -0
vigil_forensic/gate_checks/forensic_clusters/async_quality.py +1156 -0
vigil_forensic/gate_checks/forensic_clusters/code_style.py +808 -0
vigil_forensic/gate_checks/forensic_clusters/core.py +319 -0
vigil_forensic/gate_checks/forensic_clusters/data_quality.py +763 -0
vigil_forensic/gate_checks/forensic_clusters/dead_code.py +480 -0
vigil_forensic/gate_checks/forensic_clusters/edit_mutation.py +842 -0
vigil_forensic/gate_checks/forensic_clusters/exception_boundary.py +240 -0
vigil_forensic/gate_checks/forensic_clusters/legacy_debt.py +556 -0
vigil_forensic/gate_checks/forensic_clusters/static_analysis.py +834 -0
vigil_forensic/gate_checks/forensic_clusters/structural_quality.py +298 -0
vigil_forensic/gate_checks/god_object_zones_checks.py +173 -0
vigil_forensic/gate_checks/hallucination_checks.py +566 -0
vigil_forensic/gate_checks/hunter_artifact_completeness_check.py +139 -0
vigil_forensic/gate_checks/implementation_overfit_checks.py +380 -0
vigil_forensic/gate_checks/import_integrity_checks.py +233 -0
vigil_forensic/gate_checks/imports_in_function_checks.py +283 -0
vigil_forensic/gate_checks/ml_checks.py +318 -0
vigil_forensic/gate_checks/performance_checks.py +106 -0
vigil_forensic/gate_checks/project_specific_runner.py +691 -0
vigil_forensic/gate_checks/provider_capability_checks.py +73 -0
vigil_forensic/gate_checks/refactor_completeness_checks.py +274 -0
vigil_forensic/gate_checks/reliability_checks.py +389 -0
vigil_forensic/gate_checks/reporting_checks.py +55 -0
vigil_forensic/gate_checks/runtime_behavior_checks.py +220 -0
vigil_forensic/gate_checks/security_injection_checks.py +332 -0
vigil_forensic/gate_checks/semantic_intent_checks.py +139 -0
vigil_forensic/gate_checks/size_complexity_checks.py +336 -0
vigil_forensic/gate_checks/stuck_feature_flag_checks.py +354 -0
vigil_forensic/gate_checks/syntax_validity_checks.py +217 -0
vigil_forensic/gate_checks/temporal_freshness_checks.py +79 -0
vigil_forensic/gate_checks/test_quality_checks.py +946 -0
vigil_forensic/gate_checks/testing_checks.py +149 -0
vigil_forensic/gate_checks/toctou_checks.py +367 -0
vigil_forensic/gate_checks/type_checking_checks.py +316 -0
vigil_forensic/gate_models.py +392 -0
vigil_forensic/gate_packs/__init__.py +1 -0
vigil_forensic/gate_packs/universal.py +179 -0
vigil_forensic/gate_profile.json +31 -0
vigil_forensic/gate_registry.py +21 -0
vigil_forensic/language_profiles.py +219 -0
vigil_forensic/meta_findings.py +207 -0
vigil_forensic/self_audit.py +725 -0
vigil_forensic/source_analysis.py +175 -0
vigil_mapper/__init__.py +103 -0
vigil_mapper/_ast_helpers_minimal.py +229 -0
vigil_mapper/_extract_imports_impl.py +123 -0
vigil_mapper/_file_count_guard.py +129 -0
vigil_mapper/_git_utils.py +178 -0
vigil_mapper/_runtime_ast.py +438 -0
vigil_mapper/_runtime_dispatch.py +137 -0
vigil_mapper/_seed_helpers.py +82 -0
vigil_mapper/authority_builder.py +1102 -0
vigil_mapper/cli_entry.py +731 -0
vigil_mapper/conflict_builder.py +818 -0
vigil_mapper/data_contract_builder.py +446 -0
vigil_mapper/findings_builder.py +716 -0
vigil_mapper/fingerprint.py +53 -0
vigil_mapper/hotspot_builder.py +539 -0
vigil_mapper/map_common.py +449 -0
vigil_mapper/map_errors.py +55 -0
vigil_mapper/map_models.py +431 -0
vigil_mapper/map_models_ext.py +206 -0
vigil_mapper/map_models_findings.py +130 -0
vigil_mapper/map_storage.py +455 -0
vigil_mapper/parse_cache.py +795 -0
vigil_mapper/refactor_boundary_builder.py +266 -0
vigil_mapper/runtime_builder.py +527 -0
vigil_mapper/runtime_tracer.py +243 -0
vigil_mapper/runtime_tracer_entry.py +199 -0
vigil_mapper/semantic_diff.py +71 -0
vigil_mapper/source_adapters/__init__.py +109 -0
vigil_mapper/source_adapters/_base.py +264 -0
vigil_mapper/source_adapters/_ir.py +156 -0
vigil_mapper/source_adapters/_lexer.py +309 -0
vigil_mapper/source_adapters/_patterns.py +212 -0
vigil_mapper/source_adapters/_treesitter.py +182 -0
vigil_mapper/source_adapters/go.py +553 -0
vigil_mapper/source_adapters/java.py +541 -0
vigil_mapper/source_adapters/javascript.py +626 -0
vigil_mapper/source_adapters/python.py +325 -0
vigil_mapper/source_adapters/typescript.py +749 -0
vigil_mapper/structural_builder.py +586 -0
vigil_mcp/__init__.py +1 -0
vigil_mcp/_jobs.py +587 -0
vigil_mcp/_paths.py +93 -0
vigil_mcp/forensic_server.py +419 -0
vigil_mcp/map_server.py +452 -0

vigil_mcp/forensic_server.py ADDED Viewed

@@ -0,0 +1,419 @@
+"""FastMCP stdio server: forensic-audit
+Wraps vigil_forensic.run_forensic_audit behind a background-job poll API.
+Resource constraints:
+- At most 2 concurrent jobs (enforced by _jobs.JobRegistry).
+- One thread per job (no pool).
+- run_forensic_audit already enforces workers=1 internally (verified in source).
+- Output truncated/paginated to OUTPUT_CHAR_LIMIT chars (~25 k tokens budget).
+"""
+from __future__ import annotations
+import json
+from collections import Counter
+from pathlib import Path
+from typing import Any
+from mcp.server.fastmcp import FastMCP
+from vigil_mcp import _jobs
+from vigil_mcp import _paths
+from vigil_forensic import run_forensic_audit
+_INSTRUCTIONS = """\
+forensic-audit - static code-quality forensic auditor. Finds real bugs, swallowed
+exceptions, security issues, oversized/over-nested code, and cross-file duplication
+across Python/Go/Java/JS/TS. Pure static analysis (tree-sitter/AST) - it never runs
+the project or its tests.
+WHEN TO USE: when the user asks to audit a project, review code quality, or find
+problems/bugs/smells in a codebase or a set of changes - before committing or merging.
+Not for running tests (it doesn't execute code).
+WORKFLOW (background job + poll - do not expect an instant answer):
+  1. start_forensic_audit(path="")  -> leave path empty to auto-detect the project
+     root from the current directory; returns {job_id, resolved_path}.
+  2. get_forensic_status(job_id)     -> poll until status == "done" (usually seconds).
+  3. get_forensic_results(job_id)    -> returns a COMPACT SUMMARY by default
+     (counts by severity + by check_id + top findings). Read this FIRST; it is sized
+     to fit the context budget (~3k tokens), so prefer it over the full list.
+  4. Only if needed: get_forensic_results(job_id, view="full", severity="HIGH")
+     or check_id="..." to drill into specific findings (paginated).
+INTERPRETING: exit_code 0 = clean, 1 = high/critical findings exist, 2 = error.
+Triage HIGH first. On clean third-party code most findings are size.* (large files)
+and broad_except (real `except: pass` swallows).
+HUGE REPOS (anti-hang): if the collected file count exceeds max_files (default 800),
+the audit is SKIPPED and the result has meta.skipped_reason="too_many_files" with
+top_subdirs + a suggestion - scan a submodule (start_forensic_audit(path='<dir>/<subdir>'))
+or pass a larger max_files to force a full scan.
+TUNING (enable / disable checks):
+  - DISABLE noisy gates for a project: create <project>/.cortex/disabled_gates.json
+    = ["gate_id", ...]; those gates never run (reported in meta.gates_skipped).
+  - RUN ONLY specific gates: pass gates="gate_id1,gate_id2" (comma-separated) -
+    everything else is skipped. Empty = run all applicable gates.
+  - ENABLE an opt-in heuristic gate (e.g. god_object_zones, OFF by default because
+    noisy): name it in gates=, e.g. gates="god_object_zones".
+  - RAISE the severity floor: severity="HIGH" keeps only HIGH/CRITICAL findings.
+"""
+mcp = FastMCP("forensic-audit", instructions=_INSTRUCTIONS)
+# ~80 k chars keeps well under the 25 k token MCP output limit.
+OUTPUT_CHAR_LIMIT = 80_000
+# Severity ordering, highest first, used to pick the "top" findings bucket.
+_SEVERITY_ORDER = ("CRITICAL", "HIGH", "MEDIUM", "LOW", "INFO")
+# Caps for the compact summary view (keep JSON well under the MCP budget).
+_TOP_FINDINGS_CAP = 20
+_BY_CHECK_ID_CAP = 25
+# ---------------------------------------------------------------------------
+# Summary builder (Feature 1 - summary-first forensic results)
+# ---------------------------------------------------------------------------
+def _finding_location(f: dict) -> tuple[Any, Any]:
+    """Best-effort (file, line) for a finding across both schemas.
+    Synthetic/test findings carry flat ``file``/``line`` keys; real
+    vigil_forensic findings instead carry an ``evidence`` list of
+    ``{"kind": "file", "path": ..., "detail": "line:N"}``.  Prefer the flat
+    keys, fall back to the first file-evidence entry.
+    """
+    file = f.get("file")
+    line = f.get("line")
+    if file is None or line is None:
+        for ev in f.get("evidence") or []:
+            if not isinstance(ev, dict):
+                continue
+            if file is None and ev.get("path"):
+                file = ev.get("path")
+            if line is None:
+                detail = str(ev.get("detail", ""))
+                if detail.startswith("line:"):
+                    suffix = detail.split("line:", 1)[1].strip()
+                    line = int(suffix) if suffix.isdigit() else suffix or None
+            if file is not None and line is not None:
+                break
+    return file, line
+def _compact_finding(f: dict) -> dict:
+    """Project a finding down to the compact fields shown in the summary.
+    Works for both the flat test schema (``file``/``line``/``message``) and
+    the real forensic schema (``evidence``/``summary``/``title``).
+    """
+    file, line = _finding_location(f)
+    message = f.get("message") or f.get("summary") or f.get("title")
+    return {
+        "check_id": f.get("check_id"),
+        "severity": f.get("severity"),
+        "file": file,
+        "line": line,
+        "message": message,
+    }
+def _build_forensic_summary(result: dict) -> dict:
+    """Build a compact, context-budget-friendly summary of an audit result.
+    Instead of every finding, returns total counts, a per-severity breakdown,
+    a per-check_id breakdown (top ``_BY_CHECK_ID_CAP`` by count) and the top
+    ``_TOP_FINDINGS_CAP`` findings drawn from the highest severity present.
+    Args:
+        result: The raw ``run_forensic_audit`` result dict (``findings``,
+                ``exit_code``, ``meta``, ``errors``).
+    Returns:
+        A dict with keys: ``total``, ``exit_code``, ``by_severity``,
+        ``by_check_id``, ``top_findings``, ``meta``, ``errors``, ``hint``.
+    """
+    findings = result.get("findings") or []
+    total = len(findings)
+    # by_severity: lowercase keys so counts are stable regardless of input case.
+    sev_counter: Counter[str] = Counter()
+    for f in findings:
+        sev = str(f.get("severity", "")).upper()
+        sev_counter[sev] += 1
+    by_severity = {
+        "high": sev_counter.get("HIGH", 0) + sev_counter.get("CRITICAL", 0),
+        "medium": sev_counter.get("MEDIUM", 0),
+        "low": sev_counter.get("LOW", 0) + sev_counter.get("INFO", 0),
+    }
+    # by_check_id: top N check ids by count.
+    check_counter: Counter[str] = Counter(
+        str(f.get("check_id", "unknown")) for f in findings
+    )
+    by_check_id = dict(check_counter.most_common(_BY_CHECK_ID_CAP))
+    # top_findings: drawn from the highest severity actually present.
+    top_severity: str | None = None
+    for sev in _SEVERITY_ORDER:
+        if sev_counter.get(sev, 0) > 0:
+            top_severity = sev
+            break
+    top_findings: list[dict] = []
+    if top_severity is not None:
+        for f in findings:
+            if str(f.get("severity", "")).upper() == top_severity:
+                top_findings.append(_compact_finding(f))
+                if len(top_findings) >= _TOP_FINDINGS_CAP:
+                    break
+    hint = (
+        "Compact summary. For the full finding list call get_forensic_results "
+        "with view='full' (supports severity= and check_id= filters + paging)."
+    )
+    return {
+        "total": total,
+        "exit_code": result.get("exit_code", 2),
+        "by_severity": by_severity,
+        "by_check_id": by_check_id,
+        "top_findings": top_findings,
+        # `findings` mirrors top_findings (the compact subset actually shown in
+        # the summary).  Present so summary payloads still expose a findings
+        # list; for the complete, unbounded list use view='full'.
+        "findings": top_findings,
+        "meta": result.get("meta") or {},
+        "errors": result.get("errors") or [],
+        "hint": hint,
+    }
+# ---------------------------------------------------------------------------
+# Serialisation / truncation helpers
+# ---------------------------------------------------------------------------
+def _paginate_json(data: Any, page: int, page_size_chars: int) -> dict:
+    """Serialise *data* to JSON and return a page slice with metadata."""
+    full_json = json.dumps(data, default=str, indent=2)
+    total = len(full_json)
+    start_char = page * page_size_chars
+    end_char = start_char + page_size_chars
+    return {
+        "payload": full_json[start_char:end_char],
+        "truncated": end_char < total,
+        "total_chars": total,
+        "page": page,
+        "total_pages": (total + page_size_chars - 1) // page_size_chars,
+    }
+def _cap_findings(result: dict, max_findings: int = 200) -> dict:
+    """Cap the findings list to avoid unbounded blobs.
+    Adds "findings_truncated" and "total_findings_before_cap" to meta when
+    the list was cut.
+    """
+    findings = result.get("findings", [])
+    if len(findings) <= max_findings:
+        return result
+    result = dict(result)
+    result["findings"] = findings[:max_findings]
+    meta = dict(result.get("meta") or {})
+    meta["findings_truncated"] = True
+    meta["total_findings_before_cap"] = len(findings)
+    result["meta"] = meta
+    return result
+# ---------------------------------------------------------------------------
+# MCP tools
+# ---------------------------------------------------------------------------
+@mcp.tool()
+def start_forensic_audit(
+    path: str = "",
+    gates: str = "",
+    severity: str = "LOW",
+    all_languages: bool = True,
+    max_files: int = 800,
+) -> dict:
+    """Start a background forensic audit job for the given project path.
+    Args:
+        path:          Absolute path to the project root directory.  When
+                       empty/omitted the project root is auto-detected by
+                       walking up from the current working directory looking
+                       for a ``.git`` / ``pyproject.toml`` / ``package.json``
+                       marker (falling back to cwd).  The chosen directory is
+                       returned as ``resolved_path``.
+        gates:         Comma-separated list of gate check_ids to run.
+                       Empty string means run all applicable gates.
+        severity:      Minimum severity to include: LOW | MEDIUM | HIGH | CRITICAL.
+        all_languages: Reserved; currently always True.
+        max_files:     Anti-hang ceiling on the collected source-file count
+                       (default 800).  Above it the audit is SKIPPED (gates do
+                       NOT run) and get_forensic_results reports
+                       meta.skipped_reason="too_many_files" with top_subdirs +
+                       a suggestion to scan a submodule; raise it to force a full
+                       scan of a huge repo.
+    Returns:
+        {"job_id": str | None, "status": "running" | "busy",
+         "resolved_path": str, ...}
+        When status is "busy", retry later - the server is at max concurrent jobs.
+    Resource note:
+        run_forensic_audit always uses workers=1 internally. This server
+        enforces an additional cap of 2 concurrent jobs.
+    """
+    # Auto-target: resolve only when no explicit path was given.
+    if path:
+        resolved_path = path
+    else:
+        resolved_path = _paths._resolve_project_root(None)
+    gates_list = [g.strip() for g in gates.split(",") if g.strip()] if gates else None
+    def _run() -> dict:
+        # run_forensic_audit uses workers=1 internally (verified in source).
+        # No additional workers parameter is accepted.
+        return run_forensic_audit(
+            Path(resolved_path),
+            gates=gates_list,
+            severity=severity,
+            all_languages=all_languages,
+            max_files=max_files,
+        )
+    # project_dir enables disk-backed persistence so results survive a server
+    # restart; get_forensic_status/results then resolve by job_id from disk.
+    started = _jobs.start(_run, project_dir=resolved_path)
+    started["resolved_path"] = resolved_path
+    return started
+@mcp.tool()
+def get_forensic_status(job_id: str) -> dict:
+    """Poll the status of a forensic audit job.
+    Args:
+        job_id: Job ID returned by start_forensic_audit.
+    Returns:
+        {"job_id": str, "status": "running" | "done" | "error" | "cancelled" | "not_found"}
+    """
+    return _jobs.status(job_id)
+@mcp.tool()
+def get_forensic_results(
+    job_id: str,
+    view: str = "summary",
+    severity: str = "",
+    check_id: str = "",
+    page: int = 0,
+    page_size_chars: int = OUTPUT_CHAR_LIMIT,
+    max_findings: int = 200,
+) -> dict:
+    """Retrieve results of a completed forensic audit.
+    Two views:
+      * ``view='summary'`` (default) - a compact summary (total counts,
+        by_severity, by_check_id, top HIGH findings) that fits comfortably
+        in the MCP context budget.  Use this first.
+      * ``view='full'`` - the full findings list, capped and paginated.
+        Supports ``severity=`` and ``check_id=`` filters to drill in.
+    Args:
+        job_id:          Job ID returned by start_forensic_audit.
+        view:            "summary" (default) or "full".
+        severity:        (full view) keep only findings of this severity, e.g.
+                         "HIGH".  Empty = no filter.
+        check_id:        (full view) keep only findings with this check_id.
+                         Empty = no filter.
+        page:            Zero-based page index (full view).
+        page_size_chars: Max chars per page (default 80 000 ≈ 25 k tokens).
+        max_findings:    Cap on the findings list before pagination (default 200).
+    Returns:
+        dict with keys:
+          "job_id", "status", "view",
+          "exit_code" (0=clean, 1=high/critical findings, 2=error),
+          "payload" (JSON string - summary dict or full result),
+          "truncated" (bool), "total_chars", "page", "total_pages".
+    """
+    r = _jobs.result(job_id)
+    status = r.get("status")
+    if status in ("running", "not_found"):
+        return {"job_id": job_id, "status": status, "payload": None,
+                "truncated": False, "total_chars": 0}
+    if status == "cancelled":
+        return {"job_id": job_id, "status": "cancelled", "payload": None,
+                "truncated": False, "total_chars": 0}
+    if status == "error":
+        return {"job_id": job_id, "status": "error",
+                "error": r.get("error"), "payload": None,
+                "truncated": False, "total_chars": 0}
+    # status == "done"
+    audit_result = r.get("result") or {}
+    if not isinstance(audit_result, dict):
+        audit_result = {}
+    exit_code = audit_result.get("exit_code", 2)
+    if view == "full":
+        # Apply optional severity / check_id filters before capping.
+        findings = audit_result.get("findings") or []
+        if severity:
+            sev_u = severity.upper()
+            findings = [f for f in findings if str(f.get("severity", "")).upper() == sev_u]
+        if check_id:
+            findings = [f for f in findings if f.get("check_id") == check_id]
+        filtered = dict(audit_result)
+        filtered["findings"] = findings
+        capped = _cap_findings(filtered, max_findings=max_findings)
+        page_data = _paginate_json(capped, page=page, page_size_chars=page_size_chars)
+        return {
+            "job_id": job_id,
+            "status": "done",
+            "view": "full",
+            "exit_code": exit_code,
+            **page_data,
+        }
+    # Default: compact summary view.
+    summary = _build_forensic_summary(audit_result)
+    page_data = _paginate_json(summary, page=page, page_size_chars=page_size_chars)
+    return {
+        "job_id": job_id,
+        "status": "done",
+        "view": "summary",
+        "exit_code": exit_code,
+        **page_data,
+    }
+@mcp.tool()
+def cancel_forensic_audit(job_id: str) -> dict:
+    """Cancel a running forensic audit job.
+    Args:
+        job_id: Job ID returned by start_forensic_audit.
+    Returns:
+        {"job_id": str, "cancelled": bool, ...}
+    """
+    return _jobs.cancel(job_id)
+def main() -> None:
+    mcp.run()
+if __name__ == "__main__":
+    main()