vaara 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
vaara/__init__.py ADDED
@@ -0,0 +1,9 @@
1
+ """Vaara — Adaptive AI Agent Execution Layer.
2
+
3
+ Sits between AI agents and their execution environment.
4
+ Scores action risk, categorizes actions, produces compliance audit trails.
5
+
6
+ Built on top of Microsoft Agent Governance Toolkit.
7
+ """
8
+
9
+ __version__ = "0.3.0"
@@ -0,0 +1 @@
1
+ """Audit trail — structured, immutable, regulation-mapped action logging."""
@@ -0,0 +1,321 @@
1
+ """SQLite persistence backend for the audit trail.
2
+
3
+ The in-memory trail is fast but volatile. This backend writes every record
4
+ to SQLite as it arrives (via on_record callback) and can reconstruct the
5
+ full trail from disk.
6
+
7
+ Design principles:
8
+ - **WAL mode** for concurrent read/write (readers never block writers)
9
+ - **Append-only** — no UPDATE or DELETE, matching the immutability guarantee
10
+ - **Hash chain verified on load** — detects on-disk tampering
11
+ - **Regulatory domain indexed** — fast compliance queries by regulation
12
+ - **JSON data column** — flexible schema for action-specific fields
13
+
14
+ EU AI Act Article 12(2): Logging capabilities shall allow for the recording
15
+ of events relevant to identify situations that may result in the AI system
16
+ posing a risk. SQLite's ACID guarantees that no event is silently lost.
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import json
22
+ import logging
23
+ import sqlite3
24
+ import time
25
+ from pathlib import Path
26
+ from typing import Optional
27
+
28
+ from vaara.audit.trail import AuditRecord, AuditTrail, EventType
29
+
30
+ logger = logging.getLogger(__name__)
31
+
32
+ SCHEMA_VERSION = 1
33
+
34
+ SCHEMA_SQL = """
35
+ CREATE TABLE IF NOT EXISTS audit_meta (
36
+ key TEXT PRIMARY KEY,
37
+ value TEXT NOT NULL
38
+ );
39
+
40
+ CREATE TABLE IF NOT EXISTS audit_records (
41
+ record_id TEXT PRIMARY KEY,
42
+ action_id TEXT NOT NULL,
43
+ event_type TEXT NOT NULL,
44
+ timestamp REAL NOT NULL,
45
+ agent_id TEXT NOT NULL,
46
+ tool_name TEXT NOT NULL,
47
+ data TEXT NOT NULL DEFAULT '{}',
48
+ regulatory TEXT NOT NULL DEFAULT '[]',
49
+ previous_hash TEXT NOT NULL DEFAULT '',
50
+ record_hash TEXT NOT NULL DEFAULT '',
51
+ seq INTEGER NOT NULL
52
+ );
53
+
54
+ CREATE INDEX IF NOT EXISTS idx_action_id ON audit_records(action_id);
55
+ CREATE INDEX IF NOT EXISTS idx_agent_id ON audit_records(agent_id);
56
+ CREATE INDEX IF NOT EXISTS idx_event_type ON audit_records(event_type);
57
+ CREATE INDEX IF NOT EXISTS idx_timestamp ON audit_records(timestamp);
58
+ CREATE INDEX IF NOT EXISTS idx_tool_name ON audit_records(tool_name);
59
+ """
60
+
61
+
62
+ class SQLiteAuditBackend:
63
+ """Persistent audit trail backed by SQLite.
64
+
65
+ Usage::
66
+
67
+ backend = SQLiteAuditBackend("audit.db")
68
+ trail = AuditTrail(on_record=backend.write_record)
69
+
70
+ # On restart — reload the trail
71
+ trail = backend.load_trail()
72
+
73
+ # Compliance query — all DORA-relevant records
74
+ records = backend.query_by_regulation("dora")
75
+
76
+ # Export for external audit
77
+ backend.export_jsonl(Path("audit_export.jsonl"))
78
+ """
79
+
80
+ def __init__(self, db_path: str | Path) -> None:
81
+ self._db_path = Path(db_path)
82
+ self._conn = sqlite3.connect(
83
+ str(self._db_path),
84
+ isolation_level=None, # Autocommit for WAL mode
85
+ )
86
+ self._conn.execute("PRAGMA journal_mode=WAL")
87
+ self._conn.execute("PRAGMA synchronous=NORMAL")
88
+ self._conn.execute("PRAGMA foreign_keys=ON")
89
+ self._init_schema()
90
+ self._seq = self._get_max_seq() + 1
91
+
92
+ def _init_schema(self) -> None:
93
+ """Create tables if they don't exist."""
94
+ self._conn.executescript(SCHEMA_SQL)
95
+ # Check/set schema version
96
+ row = self._conn.execute(
97
+ "SELECT value FROM audit_meta WHERE key='schema_version'"
98
+ ).fetchone()
99
+ if row is None:
100
+ self._conn.execute(
101
+ "INSERT INTO audit_meta (key, value) VALUES ('schema_version', ?)",
102
+ (str(SCHEMA_VERSION),),
103
+ )
104
+ else:
105
+ stored = int(row[0])
106
+ if stored != SCHEMA_VERSION:
107
+ raise RuntimeError(
108
+ f"Audit DB schema version mismatch: "
109
+ f"expected {SCHEMA_VERSION}, got {stored}"
110
+ )
111
+
112
+ def _get_max_seq(self) -> int:
113
+ """Get the highest sequence number in the DB."""
114
+ row = self._conn.execute(
115
+ "SELECT COALESCE(MAX(seq), -1) FROM audit_records"
116
+ ).fetchone()
117
+ return row[0]
118
+
119
+ # ── Write path ────────────────────────────────────────────────
120
+
121
+ def write_record(self, record: AuditRecord) -> None:
122
+ """Callback for AuditTrail.on_record — persists a single record.
123
+
124
+ This is called synchronously for every audit event. SQLite in WAL
125
+ mode handles this efficiently.
126
+ """
127
+ self._conn.execute(
128
+ """INSERT INTO audit_records
129
+ (record_id, action_id, event_type, timestamp, agent_id,
130
+ tool_name, data, regulatory, previous_hash, record_hash, seq)
131
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
132
+ (
133
+ record.record_id,
134
+ record.action_id,
135
+ record.event_type.value,
136
+ record.timestamp,
137
+ record.agent_id,
138
+ record.tool_name,
139
+ json.dumps(record.data, default=str),
140
+ json.dumps(record.regulatory_articles, default=str),
141
+ record.previous_hash,
142
+ record.record_hash,
143
+ self._seq,
144
+ ),
145
+ )
146
+ self._seq += 1
147
+
148
+ # ── Read path ─────────────────────────────────────────────────
149
+
150
+ def load_trail(self) -> AuditTrail:
151
+ """Reconstruct a full AuditTrail from the database.
152
+
153
+ Verifies the hash chain on load. If the chain is broken,
154
+ a warning is logged but the trail is still returned (for
155
+ forensic analysis of the broken chain).
156
+ """
157
+ rows = self._conn.execute(
158
+ "SELECT * FROM audit_records ORDER BY seq ASC"
159
+ ).fetchall()
160
+
161
+ trail = AuditTrail(on_record=self.write_record)
162
+
163
+ for row in rows:
164
+ record = self._row_to_record(row)
165
+ # Inject directly into the trail (bypass on_record to avoid re-write)
166
+ trail._records.append(record)
167
+ trail._by_action[record.action_id].append(record)
168
+ trail._last_hash = record.record_hash
169
+
170
+ # Disconnect the on_record callback during load, reconnect after
171
+ # (we already wrote these records, don't re-write on load)
172
+ # The trail was loaded with on_record=self.write_record, but the
173
+ # records were injected directly, so no duplicates.
174
+
175
+ chain_error = trail.verify_chain()
176
+ if chain_error:
177
+ logger.error("AUDIT CHAIN INTEGRITY FAILURE: %s", chain_error)
178
+
179
+ logger.info("Loaded %d audit records from %s", len(rows), self._db_path)
180
+ return trail
181
+
182
+ def count(self) -> int:
183
+ """Total records in the database."""
184
+ row = self._conn.execute("SELECT COUNT(*) FROM audit_records").fetchone()
185
+ return row[0]
186
+
187
+ def query_by_action(self, action_id: str) -> list[AuditRecord]:
188
+ """Get all records for a specific action."""
189
+ rows = self._conn.execute(
190
+ "SELECT * FROM audit_records WHERE action_id=? ORDER BY seq ASC",
191
+ (action_id,),
192
+ ).fetchall()
193
+ return [self._row_to_record(r) for r in rows]
194
+
195
+ def query_by_agent(
196
+ self, agent_id: str, limit: int = 100
197
+ ) -> list[AuditRecord]:
198
+ """Get recent records for an agent."""
199
+ rows = self._conn.execute(
200
+ "SELECT * FROM audit_records WHERE agent_id=? "
201
+ "ORDER BY seq DESC LIMIT ?",
202
+ (agent_id, limit),
203
+ ).fetchall()
204
+ return [self._row_to_record(r) for r in reversed(rows)]
205
+
206
+ def query_by_event_type(
207
+ self, event_type: EventType, limit: int = 100
208
+ ) -> list[AuditRecord]:
209
+ """Get recent records of a specific event type."""
210
+ rows = self._conn.execute(
211
+ "SELECT * FROM audit_records WHERE event_type=? "
212
+ "ORDER BY seq DESC LIMIT ?",
213
+ (event_type.value, limit),
214
+ ).fetchall()
215
+ return [self._row_to_record(r) for r in reversed(rows)]
216
+
217
+ def query_by_regulation(
218
+ self, domain: str, limit: int = 500
219
+ ) -> list[AuditRecord]:
220
+ """Get records relevant to a regulatory domain.
221
+
222
+ Uses JSON contains matching on the regulatory column.
223
+ """
224
+ rows = self._conn.execute(
225
+ "SELECT * FROM audit_records WHERE regulatory LIKE ? "
226
+ "ORDER BY seq DESC LIMIT ?",
227
+ (f'%"{domain}"%', limit),
228
+ ).fetchall()
229
+ return [self._row_to_record(r) for r in reversed(rows)]
230
+
231
+ def query_time_range(
232
+ self,
233
+ start_ts: float,
234
+ end_ts: Optional[float] = None,
235
+ limit: int = 1000,
236
+ ) -> list[AuditRecord]:
237
+ """Get records within a time range."""
238
+ if end_ts is None:
239
+ end_ts = time.time()
240
+ rows = self._conn.execute(
241
+ "SELECT * FROM audit_records "
242
+ "WHERE timestamp >= ? AND timestamp <= ? "
243
+ "ORDER BY seq ASC LIMIT ?",
244
+ (start_ts, end_ts, limit),
245
+ ).fetchall()
246
+ return [self._row_to_record(r) for r in rows]
247
+
248
+ def query_blocked(self, limit: int = 50) -> list[AuditRecord]:
249
+ """Get recently blocked actions."""
250
+ return self.query_by_event_type(EventType.ACTION_BLOCKED, limit)
251
+
252
+ # ── Statistics ────────────────────────────────────────────────
253
+
254
+ def stats(self) -> dict:
255
+ """Database statistics for dashboards."""
256
+ rows = self._conn.execute(
257
+ "SELECT event_type, COUNT(*) FROM audit_records GROUP BY event_type"
258
+ ).fetchall()
259
+ by_type = {row[0]: row[1] for row in rows}
260
+
261
+ agent_rows = self._conn.execute(
262
+ "SELECT COUNT(DISTINCT agent_id) FROM audit_records"
263
+ ).fetchone()
264
+
265
+ time_rows = self._conn.execute(
266
+ "SELECT MIN(timestamp), MAX(timestamp) FROM audit_records"
267
+ ).fetchone()
268
+
269
+ return {
270
+ "total_records": self.count(),
271
+ "by_event_type": by_type,
272
+ "unique_agents": agent_rows[0],
273
+ "time_range": {
274
+ "earliest": time_rows[0],
275
+ "latest": time_rows[1],
276
+ },
277
+ "db_path": str(self._db_path),
278
+ "db_size_bytes": self._db_path.stat().st_size if self._db_path.exists() else 0,
279
+ }
280
+
281
+ # ── Export ────────────────────────────────────────────────────
282
+
283
+ def export_jsonl(self, path: Path, limit: int = 0) -> int:
284
+ """Export records as JSON Lines. Returns count exported."""
285
+ query = "SELECT * FROM audit_records ORDER BY seq ASC"
286
+ if limit > 0:
287
+ query += f" LIMIT {limit}"
288
+ rows = self._conn.execute(query).fetchall()
289
+ with open(path, "w") as f:
290
+ for row in rows:
291
+ record = self._row_to_record(row)
292
+ f.write(json.dumps(record.to_dict(), default=str) + "\n")
293
+ return len(rows)
294
+
295
+ # ── Internal ──────────────────────────────────────────────────
296
+
297
+ @staticmethod
298
+ def _row_to_record(row: tuple) -> AuditRecord:
299
+ """Convert a database row to an AuditRecord."""
300
+ return AuditRecord(
301
+ record_id=row[0],
302
+ action_id=row[1],
303
+ event_type=EventType(row[2]),
304
+ timestamp=row[3],
305
+ agent_id=row[4],
306
+ tool_name=row[5],
307
+ data=json.loads(row[6]),
308
+ regulatory_articles=json.loads(row[7]),
309
+ previous_hash=row[8],
310
+ record_hash=row[9],
311
+ )
312
+
313
+ def close(self) -> None:
314
+ """Close the database connection."""
315
+ self._conn.close()
316
+
317
+ def __enter__(self):
318
+ return self
319
+
320
+ def __exit__(self, *args):
321
+ self.close()