sumospace 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sumospace/__init__.py ADDED
@@ -0,0 +1,30 @@
1
+ """
2
+ SumoSpace — Locally-first autonomous agent framework.
3
+ """
4
+ from importlib.metadata import version, PackageNotFoundError
5
+
6
+ try:
7
+ __version__ = version("sumospace")
8
+ except PackageNotFoundError:
9
+ __version__ = "0.0.0-dev"
10
+
11
+ from sumospace.kernel import SumoKernel
12
+ from sumospace.settings import SumoSettings
13
+ from sumospace.exceptions import (
14
+ SumoSpaceError,
15
+ IngestError,
16
+ ProviderError,
17
+ ProviderNotConfiguredError,
18
+ ConsensusFailedError,
19
+ )
20
+
21
+ __all__ = [
22
+ "SumoKernel",
23
+ "SumoSettings",
24
+ "SumoSpaceError",
25
+ "IngestError",
26
+ "ProviderError",
27
+ "ProviderNotConfiguredError",
28
+ "ConsensusFailedError",
29
+ "__version__",
30
+ ]
sumospace/audit.py ADDED
@@ -0,0 +1,272 @@
1
+ from __future__ import annotations
2
+ import json
3
+ import os
4
+ from datetime import datetime
5
+ from pathlib import Path
6
+ from typing import Optional, TYPE_CHECKING
7
+ import builtins
8
+ from filelock import FileLock
9
+
10
+ if TYPE_CHECKING:
11
+ from sumospace.kernel import ExecutionTrace
12
+ from sumospace.committee import CommitteeVerdict
13
+ from sumospace.settings import SumoSettings
14
+
15
+ class AuditLogger:
16
+ """
17
+ Logs execution traces and committee verdicts to an audit log file.
18
+ Used for observability, debugging, and auditing.
19
+ """
20
+ def __init__(self, settings: SumoSettings):
21
+ self.settings = settings
22
+ self.log_dir = Path(settings.workspace) / ".sumo_audit"
23
+ if not self.log_dir.exists():
24
+ self.log_dir.mkdir(parents=True, exist_ok=True)
25
+
26
+ def log(self, trace: ExecutionTrace, verdict: Optional[CommitteeVerdict] = None):
27
+ """Write the trace and verdict to a JSON line in the audit log."""
28
+ log_entry = {
29
+ "timestamp": datetime.utcnow().isoformat() + "Z",
30
+ "session_id": trace.session_id,
31
+ "task": trace.task,
32
+ "success": trace.success,
33
+ "duration_ms": trace.duration_ms,
34
+ "intent": trace.intent.name if trace.intent else "UNKNOWN",
35
+ "final_answer": trace.final_answer,
36
+ "error": trace.error,
37
+ }
38
+
39
+ if verdict:
40
+ log_entry["committee_verdict"] = {
41
+ "approved": verdict.approved,
42
+ "rejection_reason": verdict.rejection_reason,
43
+ "critic_output": verdict.critic_output,
44
+ "resolver_output": verdict.resolver_output,
45
+ "planner_output": verdict.planner_output,
46
+ }
47
+
48
+ log_entry["steps"] = [
49
+ {
50
+ "step_number": st.step_number,
51
+ "tool": st.tool,
52
+ "description": st.description,
53
+ "success": st.result.success,
54
+ "duration_ms": st.duration_ms,
55
+ "error": st.result.error,
56
+ }
57
+ for st in trace.step_traces
58
+ ]
59
+
60
+ log_file = self.log_dir / f"audit_{datetime.utcnow().strftime('%Y%m%d')}.jsonl"
61
+ try:
62
+ with open(log_file, "a", encoding="utf-8") as f:
63
+ f.write(json.dumps(log_entry) + "\n")
64
+ self._update_index(log_entry)
65
+ except Exception:
66
+ pass
67
+
68
+ def _update_index(self, entry: dict):
69
+ """Incrementally update stats_index.json."""
70
+ index_file = self.log_dir / "stats_index.json"
71
+ lock_file = self.log_dir / "stats_index.json.lock"
72
+
73
+ try:
74
+ with FileLock(lock_file, timeout=10):
75
+ if index_file.exists():
76
+ with open(index_file, "r", encoding="utf-8") as f:
77
+ stats = json.load(f)
78
+ else:
79
+ stats = {
80
+ "total_sessions": 0,
81
+ "successful_sessions": 0,
82
+ "failed_sessions": 0,
83
+ "total_duration_ms": 0.0,
84
+ "tool_usage": {},
85
+ "intent_usage": {},
86
+ "failure_reasons": {},
87
+ }
88
+
89
+ stats["total_sessions"] += 1
90
+ if entry["success"]:
91
+ stats["successful_sessions"] += 1
92
+ else:
93
+ stats["failed_sessions"] += 1
94
+ reason = entry.get("error") or "Unknown error"
95
+ stats["failure_reasons"][reason] = stats["failure_reasons"].get(reason, 0) + 1
96
+
97
+ stats["total_duration_ms"] += entry["duration_ms"]
98
+
99
+ intent = entry["intent"]
100
+ stats["intent_usage"][intent] = stats["intent_usage"].get(intent, 0) + 1
101
+
102
+ for step in entry.get("steps", []):
103
+ tool = step["tool"]
104
+ if tool not in stats["tool_usage"]:
105
+ stats["tool_usage"][tool] = {"success": 0, "fail": 0}
106
+ if step["success"]:
107
+ stats["tool_usage"][tool]["success"] += 1
108
+ else:
109
+ stats["tool_usage"][tool]["fail"] += 1
110
+
111
+ with open(index_file, "w", encoding="utf-8") as f:
112
+ json.dump(stats, f, indent=2)
113
+ except Exception:
114
+ pass
115
+
116
+ def list(self, limit: int = 20) -> builtins.list[dict]:
117
+ """
118
+ List recent execution sessions from all log files.
119
+
120
+ Note:
121
+ This scans the `.jsonl` files in reverse chronological order. Use it
122
+ for displaying a history dashboard.
123
+
124
+ Warning:
125
+ The returned dictionaries contain full trace data which can be memory
126
+ intensive if `limit` is set very high.
127
+ """
128
+ sessions = []
129
+ log_files = sorted(self.log_dir.glob("audit_*.jsonl"), reverse=True)
130
+ for log_file in log_files:
131
+ if len(sessions) >= limit:
132
+ break
133
+ try:
134
+ with open(log_file, "r", encoding="utf-8") as f:
135
+ lines = f.readlines()
136
+ for line in reversed(lines):
137
+ sessions.append(json.loads(line))
138
+ if len(sessions) >= limit:
139
+ break
140
+ except Exception:
141
+ continue
142
+ return sessions
143
+
144
+ def show(self, session_id: str) -> Optional[dict]:
145
+ """
146
+ Retrieve the full trace for a specific session.
147
+
148
+ Note:
149
+ This scans the log files to find the exact session. Use this when you need
150
+ to inspect the exact steps, tool outputs, and LLM reasoning.
151
+
152
+ Warning:
153
+ Returns `None` if the session ID does not exist. Always check for `None`
154
+ before attempting to parse the result.
155
+ """
156
+ log_files = sorted(self.log_dir.glob("audit_*.jsonl"), reverse=True)
157
+ for log_file in log_files:
158
+ try:
159
+ with open(log_file, "r", encoding="utf-8") as f:
160
+ for line in f:
161
+ entry = json.loads(line)
162
+ if entry["session_id"] == session_id:
163
+ return entry
164
+ except Exception:
165
+ continue
166
+ return None
167
+
168
+ def search(self, query: str, limit: int = 10) -> builtins.list[dict]:
169
+ """
170
+ Search sessions for a substring in the task.
171
+
172
+ Note:
173
+ This is currently a linear substring search over the log files. It is useful
174
+ for debugging (e.g. `audit.search("database")`).
175
+
176
+ Warning:
177
+ Because it uses substring matching, it is not semantic. Searching for
178
+ "DB" will not match tasks that only say "database".
179
+ """
180
+ results = []
181
+ log_files = sorted(self.log_dir.glob("audit_*.jsonl"), reverse=True)
182
+ for log_file in log_files:
183
+ if len(results) >= limit:
184
+ break
185
+ try:
186
+ with open(log_file, "r", encoding="utf-8") as f:
187
+ for line in reversed(f.readlines()):
188
+ entry = json.loads(line)
189
+ if query.lower() in entry["task"].lower():
190
+ results.append(entry)
191
+ if len(results) >= limit:
192
+ break
193
+ except Exception:
194
+ continue
195
+ return results
196
+
197
+ def stats(self) -> dict:
198
+ """
199
+ Get aggregated stats from the index.
200
+
201
+ Note:
202
+ These statistics are maintained incrementally in `stats_index.json`.
203
+ Calling `stats()` is fast and perfectly safe to use in a high-frequency polling endpoint.
204
+ """
205
+ index_file = self.log_dir / "stats_index.json"
206
+ if not index_file.exists():
207
+ return {}
208
+ try:
209
+ with open(index_file, "r", encoding="utf-8") as f:
210
+ return json.load(f)
211
+ except Exception:
212
+ return {}
213
+
214
+ def export(self, session_id: str) -> Optional[str]:
215
+ """
216
+ Export session to a Markdown report.
217
+
218
+ Note:
219
+ Useful for downloading logs via a web API or saving a specific
220
+ run to a bug report attachment.
221
+ """
222
+ session = self.show(session_id)
223
+ if not session:
224
+ return None
225
+
226
+ lines = [
227
+ f"# Session Audit Report: {session_id}",
228
+ f"- **Task**: {session['task']}",
229
+ f"- **Timestamp**: {session['timestamp']}",
230
+ f"- **Success**: {'✅' if session['success'] else '❌'}",
231
+ f"- **Duration**: {session['duration_ms']:.0f}ms",
232
+ f"- **Intent**: {session['intent']}",
233
+ "",
234
+ "## Committee Verdict",
235
+ ]
236
+
237
+ verdict = session.get("committee_verdict")
238
+ if verdict:
239
+ lines.append(f"- **Approved**: {verdict['approved']}")
240
+ if not verdict["approved"]:
241
+ lines.append(f"- **Rejection Reason**: {verdict['rejection_reason']}")
242
+ else:
243
+ lines.append("No committee deliberation (likely cached).")
244
+
245
+ lines.extend([
246
+ "",
247
+ "## Execution Steps",
248
+ "| # | Tool | Description | Success | Duration |",
249
+ "|---|------|-------------|---------|----------|",
250
+ ])
251
+
252
+ for step in session.get("steps", []):
253
+ status = "✅" if step["success"] else "❌"
254
+ lines.append(
255
+ f"| {step['step_number']} | `{step['tool']}` | {step['description']} | "
256
+ f"{status} | {step['duration_ms']:.0f}ms |"
257
+ )
258
+
259
+ lines.extend([
260
+ "",
261
+ "## Final Answer",
262
+ session.get("final_answer", "N/A"),
263
+ ])
264
+
265
+ if session.get("error"):
266
+ lines.extend([
267
+ "",
268
+ "## Error Details",
269
+ f"```\n{session['error']}\n```",
270
+ ])
271
+
272
+ return "\n".join(lines)
sumospace/cache.py ADDED
@@ -0,0 +1,85 @@
1
+ import hashlib
2
+ import json
3
+ import time
4
+ from pathlib import Path
5
+
6
+ from sumospace.committee import ExecutionPlan, ExecutionStep
7
+
8
+ class PlanCache:
9
+ """
10
+ Content-addressed cache for approved ExecutionPlans.
11
+ Skips the 3-LLM committee on repeat tasks in the same context.
12
+ """
13
+
14
+ def __init__(self, cache_dir: str = ".sumo_db/plan_cache", ttl_hours: float = 24.0):
15
+ self._dir = Path(cache_dir)
16
+ self._dir.mkdir(parents=True, exist_ok=True)
17
+ self._ttl = ttl_hours * 3600
18
+
19
+ def _key(self, task: str, context: str) -> str:
20
+ raw = f"{task}|||{context[:500]}"
21
+ return hashlib.sha256(raw.encode()).hexdigest()[:24]
22
+
23
+ def get(self, task: str, context: str) -> ExecutionPlan | None:
24
+ path = self._dir / f"{self._key(task, context)}.json"
25
+ if not path.exists():
26
+ return None
27
+ try:
28
+ data = json.loads(path.read_text())
29
+ if time.time() - data["cached_at"] > self._ttl:
30
+ path.unlink()
31
+ return None
32
+ return self._deserialize(data["plan"])
33
+ except Exception:
34
+ return None
35
+
36
+ def set(self, task: str, context: str, plan: ExecutionPlan):
37
+ path = self._dir / f"{self._key(task, context)}.json"
38
+ path.write_text(json.dumps({
39
+ "cached_at": time.time(),
40
+ "task": task,
41
+ "plan": self._serialize(plan),
42
+ }, indent=2))
43
+
44
+ def _serialize(self, plan: ExecutionPlan) -> dict:
45
+ return {
46
+ "task": plan.task,
47
+ "reasoning": plan.reasoning,
48
+ "estimated_duration_s": plan.estimated_duration_s,
49
+ "risks": plan.risks,
50
+ "steps": [
51
+ {
52
+ "step_number": s.step_number,
53
+ "tool": s.tool,
54
+ "description": s.description,
55
+ "parameters": s.parameters,
56
+ "expected_output": s.expected_output,
57
+ "critical": s.critical,
58
+ }
59
+ for s in plan.steps
60
+ ],
61
+ }
62
+
63
+ def _deserialize(self, data: dict) -> ExecutionPlan:
64
+ return ExecutionPlan(
65
+ task=data["task"],
66
+ steps=[ExecutionStep(**s) for s in data["steps"]],
67
+ reasoning=data.get("reasoning", ""),
68
+ estimated_duration_s=data.get("estimated_duration_s", 0),
69
+ risks=data.get("risks", []),
70
+ approved=True,
71
+ approval_notes="Restored from cache",
72
+ )
73
+
74
+ def invalidate(self, task: str, context: str):
75
+ path = self._dir / f"{self._key(task, context)}.json"
76
+ if path.exists():
77
+ path.unlink()
78
+
79
+ def clear(self):
80
+ for p in self._dir.glob("*.json"):
81
+ p.unlink()
82
+
83
+ def stats(self) -> dict:
84
+ entries = list(self._dir.glob("*.json"))
85
+ return {"count": len(entries), "size_mb": sum(p.stat().st_size for p in entries) / 1e6}