PyPI - vaara - Versions diffs - 0.3.0__py3-none-any.whl - Mend

vaara 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

vaara/__init__.py +9 -0
vaara/audit/__init__.py +1 -0
vaara/audit/sqlite_backend.py +321 -0
vaara/audit/trail.py +674 -0
vaara/compliance/__init__.py +1 -0
vaara/compliance/engine.py +539 -0
vaara/integrations/__init__.py +1 -0
vaara/integrations/crewai.py +184 -0
vaara/integrations/langchain.py +281 -0
vaara/integrations/mcp_server.py +525 -0
vaara/integrations/openai_agents.py +226 -0
vaara/pipeline.py +303 -0
vaara/sandbox/__init__.py +1 -0
vaara/sandbox/trace_gen.py +354 -0
vaara/scorer/__init__.py +55 -0
vaara/scorer/action_gate.py +297 -0
vaara/scorer/adaptive.py +782 -0
vaara/scorer/mc_dropout_gate.py +288 -0
vaara/scorer/stacked_gate.py +138 -0
vaara/scorer/trained_gate.py +145 -0
vaara/taxonomy/__init__.py +1 -0
vaara/taxonomy/actions.py +328 -0
vaara/taxonomy/defi.py +704 -0
vaara-0.3.0.dist-info/METADATA +271 -0
vaara-0.3.0.dist-info/RECORD +28 -0
vaara-0.3.0.dist-info/WHEEL +5 -0
vaara-0.3.0.dist-info/licenses/LICENSE +190 -0
vaara-0.3.0.dist-info/top_level.txt +1 -0

vaara/__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+"""Vaara — Adaptive AI Agent Execution Layer.
+Sits between AI agents and their execution environment.
+Scores action risk, categorizes actions, produces compliance audit trails.
+Built on top of Microsoft Agent Governance Toolkit.
+"""
+__version__ = "0.3.0"

vaara/audit/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Audit trail — structured, immutable, regulation-mapped action logging."""

vaara/audit/sqlite_backend.py ADDED Viewed

@@ -0,0 +1,321 @@
+"""SQLite persistence backend for the audit trail.
+The in-memory trail is fast but volatile.  This backend writes every record
+to SQLite as it arrives (via on_record callback) and can reconstruct the
+full trail from disk.
+Design principles:
+- **WAL mode** for concurrent read/write (readers never block writers)
+- **Append-only** — no UPDATE or DELETE, matching the immutability guarantee
+- **Hash chain verified on load** — detects on-disk tampering
+- **Regulatory domain indexed** — fast compliance queries by regulation
+- **JSON data column** — flexible schema for action-specific fields
+EU AI Act Article 12(2): Logging capabilities shall allow for the recording
+of events relevant to identify situations that may result in the AI system
+posing a risk.  SQLite's ACID guarantees that no event is silently lost.
+"""
+from __future__ import annotations
+import json
+import logging
+import sqlite3
+import time
+from pathlib import Path
+from typing import Optional
+from vaara.audit.trail import AuditRecord, AuditTrail, EventType
+logger = logging.getLogger(__name__)
+SCHEMA_VERSION = 1
+SCHEMA_SQL = """
+CREATE TABLE IF NOT EXISTS audit_meta (
+    key   TEXT PRIMARY KEY,
+    value TEXT NOT NULL
+);
+CREATE TABLE IF NOT EXISTS audit_records (
+    record_id     TEXT PRIMARY KEY,
+    action_id     TEXT NOT NULL,
+    event_type    TEXT NOT NULL,
+    timestamp     REAL NOT NULL,
+    agent_id      TEXT NOT NULL,
+    tool_name     TEXT NOT NULL,
+    data          TEXT NOT NULL DEFAULT '{}',
+    regulatory    TEXT NOT NULL DEFAULT '[]',
+    previous_hash TEXT NOT NULL DEFAULT '',
+    record_hash   TEXT NOT NULL DEFAULT '',
+    seq           INTEGER NOT NULL
+);
+CREATE INDEX IF NOT EXISTS idx_action_id   ON audit_records(action_id);
+CREATE INDEX IF NOT EXISTS idx_agent_id    ON audit_records(agent_id);
+CREATE INDEX IF NOT EXISTS idx_event_type  ON audit_records(event_type);
+CREATE INDEX IF NOT EXISTS idx_timestamp   ON audit_records(timestamp);
+CREATE INDEX IF NOT EXISTS idx_tool_name   ON audit_records(tool_name);
+"""
+class SQLiteAuditBackend:
+    """Persistent audit trail backed by SQLite.
+    Usage::
+        backend = SQLiteAuditBackend("audit.db")
+        trail = AuditTrail(on_record=backend.write_record)
+        # On restart — reload the trail
+        trail = backend.load_trail()
+        # Compliance query — all DORA-relevant records
+        records = backend.query_by_regulation("dora")
+        # Export for external audit
+        backend.export_jsonl(Path("audit_export.jsonl"))
+    """
+    def __init__(self, db_path: str | Path) -> None:
+        self._db_path = Path(db_path)
+        self._conn = sqlite3.connect(
+            str(self._db_path),
+            isolation_level=None,  # Autocommit for WAL mode
+        )
+        self._conn.execute("PRAGMA journal_mode=WAL")
+        self._conn.execute("PRAGMA synchronous=NORMAL")
+        self._conn.execute("PRAGMA foreign_keys=ON")
+        self._init_schema()
+        self._seq = self._get_max_seq() + 1
+    def _init_schema(self) -> None:
+        """Create tables if they don't exist."""
+        self._conn.executescript(SCHEMA_SQL)
+        # Check/set schema version
+        row = self._conn.execute(
+            "SELECT value FROM audit_meta WHERE key='schema_version'"
+        ).fetchone()
+        if row is None:
+            self._conn.execute(
+                "INSERT INTO audit_meta (key, value) VALUES ('schema_version', ?)",
+                (str(SCHEMA_VERSION),),
+            )
+        else:
+            stored = int(row[0])
+            if stored != SCHEMA_VERSION:
+                raise RuntimeError(
+                    f"Audit DB schema version mismatch: "
+                    f"expected {SCHEMA_VERSION}, got {stored}"
+                )
+    def _get_max_seq(self) -> int:
+        """Get the highest sequence number in the DB."""
+        row = self._conn.execute(
+            "SELECT COALESCE(MAX(seq), -1) FROM audit_records"
+        ).fetchone()
+        return row[0]
+    # ── Write path ────────────────────────────────────────────────
+    def write_record(self, record: AuditRecord) -> None:
+        """Callback for AuditTrail.on_record — persists a single record.
+        This is called synchronously for every audit event.  SQLite in WAL
+        mode handles this efficiently.
+        """
+        self._conn.execute(
+            """INSERT INTO audit_records
+               (record_id, action_id, event_type, timestamp, agent_id,
+                tool_name, data, regulatory, previous_hash, record_hash, seq)
+               VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
+            (
+                record.record_id,
+                record.action_id,
+                record.event_type.value,
+                record.timestamp,
+                record.agent_id,
+                record.tool_name,
+                json.dumps(record.data, default=str),
+                json.dumps(record.regulatory_articles, default=str),
+                record.previous_hash,
+                record.record_hash,
+                self._seq,
+            ),
+        )
+        self._seq += 1
+    # ── Read path ─────────────────────────────────────────────────
+    def load_trail(self) -> AuditTrail:
+        """Reconstruct a full AuditTrail from the database.
+        Verifies the hash chain on load.  If the chain is broken,
+        a warning is logged but the trail is still returned (for
+        forensic analysis of the broken chain).
+        """
+        rows = self._conn.execute(
+            "SELECT * FROM audit_records ORDER BY seq ASC"
+        ).fetchall()
+        trail = AuditTrail(on_record=self.write_record)
+        for row in rows:
+            record = self._row_to_record(row)
+            # Inject directly into the trail (bypass on_record to avoid re-write)
+            trail._records.append(record)
+            trail._by_action[record.action_id].append(record)
+            trail._last_hash = record.record_hash
+        # Disconnect the on_record callback during load, reconnect after
+        # (we already wrote these records, don't re-write on load)
+        # The trail was loaded with on_record=self.write_record, but the
+        # records were injected directly, so no duplicates.
+        chain_error = trail.verify_chain()
+        if chain_error:
+            logger.error("AUDIT CHAIN INTEGRITY FAILURE: %s", chain_error)
+        logger.info("Loaded %d audit records from %s", len(rows), self._db_path)
+        return trail
+    def count(self) -> int:
+        """Total records in the database."""
+        row = self._conn.execute("SELECT COUNT(*) FROM audit_records").fetchone()
+        return row[0]
+    def query_by_action(self, action_id: str) -> list[AuditRecord]:
+        """Get all records for a specific action."""
+        rows = self._conn.execute(
+            "SELECT * FROM audit_records WHERE action_id=? ORDER BY seq ASC",
+            (action_id,),
+        ).fetchall()
+        return [self._row_to_record(r) for r in rows]
+    def query_by_agent(
+        self, agent_id: str, limit: int = 100
+    ) -> list[AuditRecord]:
+        """Get recent records for an agent."""
+        rows = self._conn.execute(
+            "SELECT * FROM audit_records WHERE agent_id=? "
+            "ORDER BY seq DESC LIMIT ?",
+            (agent_id, limit),
+        ).fetchall()
+        return [self._row_to_record(r) for r in reversed(rows)]
+    def query_by_event_type(
+        self, event_type: EventType, limit: int = 100
+    ) -> list[AuditRecord]:
+        """Get recent records of a specific event type."""
+        rows = self._conn.execute(
+            "SELECT * FROM audit_records WHERE event_type=? "
+            "ORDER BY seq DESC LIMIT ?",
+            (event_type.value, limit),
+        ).fetchall()
+        return [self._row_to_record(r) for r in reversed(rows)]
+    def query_by_regulation(
+        self, domain: str, limit: int = 500
+    ) -> list[AuditRecord]:
+        """Get records relevant to a regulatory domain.
+        Uses JSON contains matching on the regulatory column.
+        """
+        rows = self._conn.execute(
+            "SELECT * FROM audit_records WHERE regulatory LIKE ? "
+            "ORDER BY seq DESC LIMIT ?",
+            (f'%"{domain}"%', limit),
+        ).fetchall()
+        return [self._row_to_record(r) for r in reversed(rows)]
+    def query_time_range(
+        self,
+        start_ts: float,
+        end_ts: Optional[float] = None,
+        limit: int = 1000,
+    ) -> list[AuditRecord]:
+        """Get records within a time range."""
+        if end_ts is None:
+            end_ts = time.time()
+        rows = self._conn.execute(
+            "SELECT * FROM audit_records "
+            "WHERE timestamp >= ? AND timestamp <= ? "
+            "ORDER BY seq ASC LIMIT ?",
+            (start_ts, end_ts, limit),
+        ).fetchall()
+        return [self._row_to_record(r) for r in rows]
+    def query_blocked(self, limit: int = 50) -> list[AuditRecord]:
+        """Get recently blocked actions."""
+        return self.query_by_event_type(EventType.ACTION_BLOCKED, limit)
+    # ── Statistics ────────────────────────────────────────────────
+    def stats(self) -> dict:
+        """Database statistics for dashboards."""
+        rows = self._conn.execute(
+            "SELECT event_type, COUNT(*) FROM audit_records GROUP BY event_type"
+        ).fetchall()
+        by_type = {row[0]: row[1] for row in rows}
+        agent_rows = self._conn.execute(
+            "SELECT COUNT(DISTINCT agent_id) FROM audit_records"
+        ).fetchone()
+        time_rows = self._conn.execute(
+            "SELECT MIN(timestamp), MAX(timestamp) FROM audit_records"
+        ).fetchone()
+        return {
+            "total_records": self.count(),
+            "by_event_type": by_type,
+            "unique_agents": agent_rows[0],
+            "time_range": {
+                "earliest": time_rows[0],
+                "latest": time_rows[1],
+            },
+            "db_path": str(self._db_path),
+            "db_size_bytes": self._db_path.stat().st_size if self._db_path.exists() else 0,
+        }
+    # ── Export ────────────────────────────────────────────────────
+    def export_jsonl(self, path: Path, limit: int = 0) -> int:
+        """Export records as JSON Lines.  Returns count exported."""
+        query = "SELECT * FROM audit_records ORDER BY seq ASC"
+        if limit > 0:
+            query += f" LIMIT {limit}"
+        rows = self._conn.execute(query).fetchall()
+        with open(path, "w") as f:
+            for row in rows:
+                record = self._row_to_record(row)
+                f.write(json.dumps(record.to_dict(), default=str) + "\n")
+        return len(rows)
+    # ── Internal ──────────────────────────────────────────────────
+    @staticmethod
+    def _row_to_record(row: tuple) -> AuditRecord:
+        """Convert a database row to an AuditRecord."""
+        return AuditRecord(
+            record_id=row[0],
+            action_id=row[1],
+            event_type=EventType(row[2]),
+            timestamp=row[3],
+            agent_id=row[4],
+            tool_name=row[5],
+            data=json.loads(row[6]),
+            regulatory_articles=json.loads(row[7]),
+            previous_hash=row[8],
+            record_hash=row[9],
+        )
+    def close(self) -> None:
+        """Close the database connection."""
+        self._conn.close()
+    def __enter__(self):
+        return self
+    def __exit__(self, *args):
+        self.close()