tweek 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tweek/__init__.py +2 -2
- tweek/_keygen.py +53 -0
- tweek/audit.py +288 -0
- tweek/cli.py +5303 -2396
- tweek/cli_model.py +380 -0
- tweek/config/families.yaml +609 -0
- tweek/config/manager.py +42 -5
- tweek/config/patterns.yaml +1510 -8
- tweek/config/tiers.yaml +161 -11
- tweek/diagnostics.py +71 -2
- tweek/hooks/break_glass.py +163 -0
- tweek/hooks/feedback.py +223 -0
- tweek/hooks/overrides.py +531 -0
- tweek/hooks/post_tool_use.py +472 -0
- tweek/hooks/pre_tool_use.py +1024 -62
- tweek/integrations/openclaw.py +443 -0
- tweek/integrations/openclaw_server.py +385 -0
- tweek/licensing.py +14 -54
- tweek/logging/bundle.py +2 -2
- tweek/logging/security_log.py +56 -13
- tweek/mcp/approval.py +57 -16
- tweek/mcp/proxy.py +18 -0
- tweek/mcp/screening.py +5 -5
- tweek/mcp/server.py +4 -1
- tweek/memory/__init__.py +24 -0
- tweek/memory/queries.py +223 -0
- tweek/memory/safety.py +140 -0
- tweek/memory/schemas.py +80 -0
- tweek/memory/store.py +989 -0
- tweek/platform/__init__.py +4 -4
- tweek/plugins/__init__.py +40 -24
- tweek/plugins/base.py +1 -1
- tweek/plugins/detectors/__init__.py +3 -3
- tweek/plugins/detectors/{moltbot.py → openclaw.py} +30 -27
- tweek/plugins/git_discovery.py +16 -4
- tweek/plugins/git_registry.py +8 -2
- tweek/plugins/git_security.py +21 -9
- tweek/plugins/screening/__init__.py +10 -1
- tweek/plugins/screening/heuristic_scorer.py +477 -0
- tweek/plugins/screening/llm_reviewer.py +14 -6
- tweek/plugins/screening/local_model_reviewer.py +161 -0
- tweek/proxy/__init__.py +38 -37
- tweek/proxy/addon.py +22 -3
- tweek/proxy/interceptor.py +1 -0
- tweek/proxy/server.py +4 -2
- tweek/sandbox/__init__.py +11 -0
- tweek/sandbox/docker_bridge.py +143 -0
- tweek/sandbox/executor.py +9 -6
- tweek/sandbox/layers.py +97 -0
- tweek/sandbox/linux.py +1 -0
- tweek/sandbox/project.py +548 -0
- tweek/sandbox/registry.py +149 -0
- tweek/security/__init__.py +9 -0
- tweek/security/language.py +250 -0
- tweek/security/llm_reviewer.py +1146 -60
- tweek/security/local_model.py +331 -0
- tweek/security/local_reviewer.py +146 -0
- tweek/security/model_registry.py +371 -0
- tweek/security/rate_limiter.py +11 -6
- tweek/security/secret_scanner.py +70 -4
- tweek/security/session_analyzer.py +26 -2
- tweek/skill_template/SKILL.md +200 -0
- tweek/skill_template/__init__.py +0 -0
- tweek/skill_template/cli-reference.md +331 -0
- tweek/skill_template/overrides-reference.md +184 -0
- tweek/skill_template/scripts/__init__.py +0 -0
- tweek/skill_template/scripts/check_installed.py +170 -0
- tweek/skills/__init__.py +38 -0
- tweek/skills/config.py +150 -0
- tweek/skills/fingerprints.py +198 -0
- tweek/skills/guard.py +293 -0
- tweek/skills/isolation.py +469 -0
- tweek/skills/scanner.py +715 -0
- tweek/vault/__init__.py +0 -1
- tweek/vault/cross_platform.py +12 -1
- tweek/vault/keychain.py +87 -29
- tweek-0.2.0.dist-info/METADATA +281 -0
- tweek-0.2.0.dist-info/RECORD +121 -0
- {tweek-0.1.0.dist-info → tweek-0.2.0.dist-info}/entry_points.txt +8 -1
- {tweek-0.1.0.dist-info → tweek-0.2.0.dist-info}/licenses/LICENSE +80 -0
- tweek/integrations/moltbot.py +0 -243
- tweek-0.1.0.dist-info/METADATA +0 -335
- tweek-0.1.0.dist-info/RECORD +0 -85
- {tweek-0.1.0.dist-info → tweek-0.2.0.dist-info}/WHEEL +0 -0
- {tweek-0.1.0.dist-info → tweek-0.2.0.dist-info}/top_level.txt +0 -0
tweek/memory/store.py
ADDED
|
@@ -0,0 +1,989 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Tweek Memory Store
|
|
3
|
+
|
|
4
|
+
Core SQLite-backed storage for Tweek's agentic memory system.
|
|
5
|
+
Handles schema creation, CRUD operations, time decay, and audit logging.
|
|
6
|
+
|
|
7
|
+
Storage locations:
|
|
8
|
+
- Global: ~/.tweek/memory.db
|
|
9
|
+
- Per-project: .tweek/memory.db (inside project directory)
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import hashlib
|
|
13
|
+
import math
|
|
14
|
+
import sqlite3
|
|
15
|
+
from datetime import datetime, timedelta
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from typing import Dict, List, Optional, Any
|
|
18
|
+
|
|
19
|
+
from tweek.memory.schemas import (
|
|
20
|
+
ConfidenceAdjustment,
|
|
21
|
+
LearnedWhitelistSuggestion,
|
|
22
|
+
PatternDecisionEntry,
|
|
23
|
+
SourceTrustEntry,
|
|
24
|
+
WorkflowBaseline,
|
|
25
|
+
)
|
|
26
|
+
from tweek.memory.safety import (
|
|
27
|
+
MIN_APPROVAL_RATIO,
|
|
28
|
+
MIN_CONFIDENCE_SCORE,
|
|
29
|
+
MIN_DECISION_THRESHOLD,
|
|
30
|
+
compute_suggested_decision,
|
|
31
|
+
is_immune_pattern,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
# Half-life in days for time decay
|
|
36
|
+
DECAY_HALF_LIFE_DAYS = 30
|
|
37
|
+
|
|
38
|
+
# Default global memory DB path
|
|
39
|
+
GLOBAL_MEMORY_PATH = Path.home() / ".tweek" / "memory.db"
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class MemoryStore:
|
|
43
|
+
"""SQLite-backed persistent memory for security decisions.
|
|
44
|
+
|
|
45
|
+
Manages 5 tables + 1 view:
|
|
46
|
+
- pattern_decisions: Per-pattern approval/denial history
|
|
47
|
+
- source_trust: URL/file injection history
|
|
48
|
+
- workflow_baselines: Normal tool usage patterns
|
|
49
|
+
- learned_whitelists: Auto-generated whitelist suggestions
|
|
50
|
+
- memory_audit: Accountability log
|
|
51
|
+
- pattern_confidence_view: Computed confidence adjustments
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
SCHEMA_VERSION = 1
|
|
55
|
+
|
|
56
|
+
def __init__(self, db_path: Optional[Path] = None):
|
|
57
|
+
self.db_path = db_path or GLOBAL_MEMORY_PATH
|
|
58
|
+
self.db_path.parent.mkdir(parents=True, exist_ok=True)
|
|
59
|
+
self._conn: Optional[sqlite3.Connection] = None
|
|
60
|
+
self._ensure_schema()
|
|
61
|
+
|
|
62
|
+
def _get_connection(self) -> sqlite3.Connection:
|
|
63
|
+
"""Get or create a SQLite connection with WAL mode."""
|
|
64
|
+
if self._conn is None:
|
|
65
|
+
self._conn = sqlite3.connect(
|
|
66
|
+
str(self.db_path),
|
|
67
|
+
timeout=5.0,
|
|
68
|
+
isolation_level=None, # autocommit
|
|
69
|
+
)
|
|
70
|
+
self._conn.row_factory = sqlite3.Row
|
|
71
|
+
self._conn.execute("PRAGMA journal_mode=WAL")
|
|
72
|
+
self._conn.execute("PRAGMA foreign_keys=ON")
|
|
73
|
+
return self._conn
|
|
74
|
+
|
|
75
|
+
def close(self):
|
|
76
|
+
"""Close the database connection."""
|
|
77
|
+
if self._conn is not None:
|
|
78
|
+
self._conn.close()
|
|
79
|
+
self._conn = None
|
|
80
|
+
|
|
81
|
+
def _ensure_schema(self):
|
|
82
|
+
"""Create tables, indexes, and views if they don't exist."""
|
|
83
|
+
conn = self._get_connection()
|
|
84
|
+
conn.executescript("""
|
|
85
|
+
CREATE TABLE IF NOT EXISTS schema_version (
|
|
86
|
+
version INTEGER PRIMARY KEY
|
|
87
|
+
);
|
|
88
|
+
|
|
89
|
+
CREATE TABLE IF NOT EXISTS pattern_decisions (
|
|
90
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
91
|
+
pattern_name TEXT NOT NULL,
|
|
92
|
+
pattern_id INTEGER,
|
|
93
|
+
original_severity TEXT NOT NULL,
|
|
94
|
+
original_confidence TEXT NOT NULL,
|
|
95
|
+
decision TEXT NOT NULL,
|
|
96
|
+
user_response TEXT,
|
|
97
|
+
tool_name TEXT NOT NULL,
|
|
98
|
+
content_hash TEXT,
|
|
99
|
+
path_prefix TEXT,
|
|
100
|
+
project_hash TEXT,
|
|
101
|
+
timestamp TEXT NOT NULL DEFAULT (datetime('now')),
|
|
102
|
+
decay_weight REAL NOT NULL DEFAULT 1.0,
|
|
103
|
+
CHECK (NOT (
|
|
104
|
+
original_severity = 'critical'
|
|
105
|
+
AND original_confidence = 'deterministic'
|
|
106
|
+
AND decision = 'allow'
|
|
107
|
+
))
|
|
108
|
+
);
|
|
109
|
+
|
|
110
|
+
CREATE INDEX IF NOT EXISTS idx_pd_pattern_name
|
|
111
|
+
ON pattern_decisions(pattern_name);
|
|
112
|
+
CREATE INDEX IF NOT EXISTS idx_pd_pattern_path
|
|
113
|
+
ON pattern_decisions(pattern_name, path_prefix);
|
|
114
|
+
CREATE INDEX IF NOT EXISTS idx_pd_project
|
|
115
|
+
ON pattern_decisions(project_hash);
|
|
116
|
+
CREATE INDEX IF NOT EXISTS idx_pd_timestamp
|
|
117
|
+
ON pattern_decisions(timestamp);
|
|
118
|
+
|
|
119
|
+
CREATE TABLE IF NOT EXISTS source_trust (
|
|
120
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
121
|
+
source_type TEXT NOT NULL,
|
|
122
|
+
source_key TEXT NOT NULL,
|
|
123
|
+
total_scans INTEGER DEFAULT 0,
|
|
124
|
+
injection_detections INTEGER DEFAULT 0,
|
|
125
|
+
trust_score REAL DEFAULT 0.5,
|
|
126
|
+
last_clean_scan TEXT,
|
|
127
|
+
last_injection TEXT,
|
|
128
|
+
timestamp TEXT DEFAULT (datetime('now')),
|
|
129
|
+
decay_weight REAL DEFAULT 1.0,
|
|
130
|
+
UNIQUE(source_type, source_key)
|
|
131
|
+
);
|
|
132
|
+
|
|
133
|
+
CREATE INDEX IF NOT EXISTS idx_st_type_key
|
|
134
|
+
ON source_trust(source_type, source_key);
|
|
135
|
+
|
|
136
|
+
CREATE TABLE IF NOT EXISTS workflow_baselines (
|
|
137
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
138
|
+
project_hash TEXT NOT NULL,
|
|
139
|
+
tool_name TEXT NOT NULL,
|
|
140
|
+
hour_of_day INTEGER,
|
|
141
|
+
invocation_count INTEGER DEFAULT 0,
|
|
142
|
+
denied_count INTEGER DEFAULT 0,
|
|
143
|
+
last_updated TEXT DEFAULT (datetime('now')),
|
|
144
|
+
UNIQUE(project_hash, tool_name, hour_of_day)
|
|
145
|
+
);
|
|
146
|
+
|
|
147
|
+
CREATE INDEX IF NOT EXISTS idx_wb_project
|
|
148
|
+
ON workflow_baselines(project_hash);
|
|
149
|
+
|
|
150
|
+
CREATE TABLE IF NOT EXISTS learned_whitelists (
|
|
151
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
152
|
+
pattern_name TEXT NOT NULL,
|
|
153
|
+
tool_name TEXT,
|
|
154
|
+
path_prefix TEXT,
|
|
155
|
+
approval_count INTEGER DEFAULT 0,
|
|
156
|
+
denial_count INTEGER DEFAULT 0,
|
|
157
|
+
confidence REAL DEFAULT 0.0,
|
|
158
|
+
suggested_at TEXT,
|
|
159
|
+
human_reviewed INTEGER DEFAULT 0,
|
|
160
|
+
timestamp TEXT DEFAULT (datetime('now')),
|
|
161
|
+
UNIQUE(pattern_name, tool_name, path_prefix)
|
|
162
|
+
);
|
|
163
|
+
|
|
164
|
+
CREATE INDEX IF NOT EXISTS idx_lw_pattern
|
|
165
|
+
ON learned_whitelists(pattern_name);
|
|
166
|
+
|
|
167
|
+
CREATE TABLE IF NOT EXISTS memory_audit (
|
|
168
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
169
|
+
operation TEXT NOT NULL,
|
|
170
|
+
table_name TEXT NOT NULL,
|
|
171
|
+
key_info TEXT,
|
|
172
|
+
result TEXT,
|
|
173
|
+
timestamp TEXT DEFAULT (datetime('now'))
|
|
174
|
+
);
|
|
175
|
+
|
|
176
|
+
CREATE INDEX IF NOT EXISTS idx_ma_timestamp
|
|
177
|
+
ON memory_audit(timestamp);
|
|
178
|
+
""")
|
|
179
|
+
|
|
180
|
+
# Create or replace the confidence view
|
|
181
|
+
conn.execute("DROP VIEW IF EXISTS pattern_confidence_view")
|
|
182
|
+
conn.execute("""
|
|
183
|
+
CREATE VIEW pattern_confidence_view AS
|
|
184
|
+
SELECT
|
|
185
|
+
pattern_name,
|
|
186
|
+
path_prefix,
|
|
187
|
+
COUNT(*) as total_decisions,
|
|
188
|
+
SUM(CASE WHEN user_response = 'approved' THEN decay_weight ELSE 0 END)
|
|
189
|
+
as weighted_approvals,
|
|
190
|
+
SUM(CASE WHEN user_response = 'denied' THEN decay_weight ELSE 0 END)
|
|
191
|
+
as weighted_denials,
|
|
192
|
+
CASE WHEN SUM(decay_weight) > 0 THEN
|
|
193
|
+
SUM(CASE WHEN user_response = 'approved' THEN decay_weight ELSE 0 END)
|
|
194
|
+
/ SUM(decay_weight)
|
|
195
|
+
ELSE 0.5 END as approval_ratio,
|
|
196
|
+
MAX(timestamp) as last_decision
|
|
197
|
+
FROM pattern_decisions
|
|
198
|
+
WHERE decay_weight > 0.01
|
|
199
|
+
GROUP BY pattern_name, path_prefix
|
|
200
|
+
""")
|
|
201
|
+
|
|
202
|
+
# Set schema version
|
|
203
|
+
conn.execute(
|
|
204
|
+
"INSERT OR REPLACE INTO schema_version (version) VALUES (?)",
|
|
205
|
+
(self.SCHEMA_VERSION,),
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
# =====================================================================
|
|
209
|
+
# Pattern Decisions
|
|
210
|
+
# =====================================================================
|
|
211
|
+
|
|
212
|
+
def record_decision(self, entry: PatternDecisionEntry) -> int:
|
|
213
|
+
"""Record a pattern decision.
|
|
214
|
+
|
|
215
|
+
Returns the row ID of the inserted record.
|
|
216
|
+
"""
|
|
217
|
+
conn = self._get_connection()
|
|
218
|
+
|
|
219
|
+
# Safety: never record 'allow' for CRITICAL+deterministic
|
|
220
|
+
if (
|
|
221
|
+
entry.original_severity == "critical"
|
|
222
|
+
and entry.original_confidence == "deterministic"
|
|
223
|
+
and entry.decision == "allow"
|
|
224
|
+
):
|
|
225
|
+
self._audit("write", "pattern_decisions",
|
|
226
|
+
f"{entry.pattern_name}:{entry.path_prefix}",
|
|
227
|
+
"BLOCKED: attempted allow on critical+deterministic")
|
|
228
|
+
return -1
|
|
229
|
+
|
|
230
|
+
cursor = conn.execute(
|
|
231
|
+
"""
|
|
232
|
+
INSERT INTO pattern_decisions (
|
|
233
|
+
pattern_name, pattern_id, original_severity, original_confidence,
|
|
234
|
+
decision, user_response, tool_name, content_hash,
|
|
235
|
+
path_prefix, project_hash, decay_weight
|
|
236
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
237
|
+
""",
|
|
238
|
+
(
|
|
239
|
+
entry.pattern_name,
|
|
240
|
+
entry.pattern_id,
|
|
241
|
+
entry.original_severity,
|
|
242
|
+
entry.original_confidence,
|
|
243
|
+
entry.decision,
|
|
244
|
+
entry.user_response,
|
|
245
|
+
entry.tool_name,
|
|
246
|
+
entry.content_hash,
|
|
247
|
+
entry.path_prefix,
|
|
248
|
+
entry.project_hash,
|
|
249
|
+
entry.decay_weight,
|
|
250
|
+
),
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
row_id = cursor.lastrowid
|
|
254
|
+
self._audit(
|
|
255
|
+
"write", "pattern_decisions",
|
|
256
|
+
f"{entry.pattern_name}:{entry.path_prefix}",
|
|
257
|
+
f"id={row_id}, decision={entry.decision}, response={entry.user_response}",
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
# Update learned whitelists
|
|
261
|
+
self._update_learned_whitelist(entry)
|
|
262
|
+
|
|
263
|
+
return row_id
|
|
264
|
+
|
|
265
|
+
def get_confidence_adjustment(
|
|
266
|
+
self,
|
|
267
|
+
pattern_name: str,
|
|
268
|
+
path_prefix: Optional[str] = None,
|
|
269
|
+
current_decision: str = "ask",
|
|
270
|
+
original_severity: str = "medium",
|
|
271
|
+
original_confidence: str = "heuristic",
|
|
272
|
+
) -> Optional[ConfidenceAdjustment]:
|
|
273
|
+
"""Query memory for a confidence adjustment on a pattern.
|
|
274
|
+
|
|
275
|
+
Returns a ConfidenceAdjustment if memory has enough data,
|
|
276
|
+
or None if insufficient data / pattern is immune.
|
|
277
|
+
"""
|
|
278
|
+
conn = self._get_connection()
|
|
279
|
+
|
|
280
|
+
# Check immunity first
|
|
281
|
+
if is_immune_pattern(original_severity, original_confidence):
|
|
282
|
+
self._audit(
|
|
283
|
+
"read", "pattern_decisions",
|
|
284
|
+
f"{pattern_name}:{path_prefix}",
|
|
285
|
+
"immune_pattern_skipped",
|
|
286
|
+
)
|
|
287
|
+
return None
|
|
288
|
+
|
|
289
|
+
# Query the confidence view
|
|
290
|
+
if path_prefix:
|
|
291
|
+
row = conn.execute(
|
|
292
|
+
"""
|
|
293
|
+
SELECT * FROM pattern_confidence_view
|
|
294
|
+
WHERE pattern_name = ? AND path_prefix = ?
|
|
295
|
+
""",
|
|
296
|
+
(pattern_name, path_prefix),
|
|
297
|
+
).fetchone()
|
|
298
|
+
else:
|
|
299
|
+
row = conn.execute(
|
|
300
|
+
"""
|
|
301
|
+
SELECT * FROM pattern_confidence_view
|
|
302
|
+
WHERE pattern_name = ? AND path_prefix IS NULL
|
|
303
|
+
""",
|
|
304
|
+
(pattern_name,),
|
|
305
|
+
).fetchone()
|
|
306
|
+
|
|
307
|
+
# Also try without path prefix as fallback
|
|
308
|
+
if not row and path_prefix:
|
|
309
|
+
row = conn.execute(
|
|
310
|
+
"""
|
|
311
|
+
SELECT
|
|
312
|
+
pattern_name,
|
|
313
|
+
NULL as path_prefix,
|
|
314
|
+
SUM(total_decisions) as total_decisions,
|
|
315
|
+
SUM(weighted_approvals) as weighted_approvals,
|
|
316
|
+
SUM(weighted_denials) as weighted_denials,
|
|
317
|
+
CASE WHEN SUM(weighted_approvals) + SUM(weighted_denials) > 0 THEN
|
|
318
|
+
SUM(weighted_approvals) / (SUM(weighted_approvals) + SUM(weighted_denials))
|
|
319
|
+
ELSE 0.5 END as approval_ratio,
|
|
320
|
+
MAX(last_decision) as last_decision
|
|
321
|
+
FROM pattern_confidence_view
|
|
322
|
+
WHERE pattern_name = ?
|
|
323
|
+
GROUP BY pattern_name
|
|
324
|
+
""",
|
|
325
|
+
(pattern_name,),
|
|
326
|
+
).fetchone()
|
|
327
|
+
|
|
328
|
+
if not row:
|
|
329
|
+
self._audit(
|
|
330
|
+
"read", "pattern_decisions",
|
|
331
|
+
f"{pattern_name}:{path_prefix}",
|
|
332
|
+
"no_data",
|
|
333
|
+
)
|
|
334
|
+
return None
|
|
335
|
+
|
|
336
|
+
total = row["total_decisions"]
|
|
337
|
+
weighted_approvals = row["weighted_approvals"] or 0.0
|
|
338
|
+
weighted_denials = row["weighted_denials"] or 0.0
|
|
339
|
+
approval_ratio = row["approval_ratio"] or 0.5
|
|
340
|
+
total_weighted = weighted_approvals + weighted_denials
|
|
341
|
+
|
|
342
|
+
# Compute suggested decision
|
|
343
|
+
suggested = compute_suggested_decision(
|
|
344
|
+
current_decision=current_decision,
|
|
345
|
+
approval_ratio=approval_ratio,
|
|
346
|
+
total_weighted_decisions=total_weighted,
|
|
347
|
+
original_severity=original_severity,
|
|
348
|
+
original_confidence=original_confidence,
|
|
349
|
+
)
|
|
350
|
+
|
|
351
|
+
# Confidence score: based on data quantity and consistency
|
|
352
|
+
confidence_score = 0.0
|
|
353
|
+
if total_weighted >= MIN_DECISION_THRESHOLD:
|
|
354
|
+
# Scale 0-1 based on how far above threshold and ratio strength
|
|
355
|
+
data_factor = min(total_weighted / (MIN_DECISION_THRESHOLD * 3), 1.0)
|
|
356
|
+
ratio_factor = approval_ratio if suggested == "log" else (1 - approval_ratio)
|
|
357
|
+
confidence_score = data_factor * ratio_factor
|
|
358
|
+
|
|
359
|
+
adjustment = ConfidenceAdjustment(
|
|
360
|
+
pattern_name=pattern_name,
|
|
361
|
+
path_prefix=path_prefix,
|
|
362
|
+
total_decisions=total,
|
|
363
|
+
weighted_approvals=weighted_approvals,
|
|
364
|
+
weighted_denials=weighted_denials,
|
|
365
|
+
approval_ratio=approval_ratio,
|
|
366
|
+
last_decision=row["last_decision"],
|
|
367
|
+
adjusted_decision=suggested,
|
|
368
|
+
confidence_score=confidence_score,
|
|
369
|
+
)
|
|
370
|
+
|
|
371
|
+
self._audit(
|
|
372
|
+
"read", "pattern_decisions",
|
|
373
|
+
f"{pattern_name}:{path_prefix}",
|
|
374
|
+
f"total={total}, ratio={approval_ratio:.2f}, suggested={suggested}, "
|
|
375
|
+
f"confidence={confidence_score:.2f}",
|
|
376
|
+
)
|
|
377
|
+
|
|
378
|
+
return adjustment
|
|
379
|
+
|
|
380
|
+
# =====================================================================
|
|
381
|
+
# Source Trust
|
|
382
|
+
# =====================================================================
|
|
383
|
+
|
|
384
|
+
def record_source_scan(
|
|
385
|
+
self,
|
|
386
|
+
source_type: str,
|
|
387
|
+
source_key: str,
|
|
388
|
+
had_injection: bool,
|
|
389
|
+
) -> None:
|
|
390
|
+
"""Record a source scan result (clean or injection detected)."""
|
|
391
|
+
conn = self._get_connection()
|
|
392
|
+
now = datetime.utcnow().isoformat()
|
|
393
|
+
|
|
394
|
+
if had_injection:
|
|
395
|
+
conn.execute(
|
|
396
|
+
"""
|
|
397
|
+
INSERT INTO source_trust (source_type, source_key, total_scans,
|
|
398
|
+
injection_detections, trust_score, last_injection, timestamp)
|
|
399
|
+
VALUES (?, ?, 1, 1, 0.0, ?, ?)
|
|
400
|
+
ON CONFLICT(source_type, source_key) DO UPDATE SET
|
|
401
|
+
total_scans = total_scans + 1,
|
|
402
|
+
injection_detections = injection_detections + 1,
|
|
403
|
+
last_injection = excluded.last_injection,
|
|
404
|
+
trust_score = CASE
|
|
405
|
+
WHEN total_scans + 1 > 0 THEN
|
|
406
|
+
1.0 - (CAST(injection_detections + 1 AS REAL) / (total_scans + 1))
|
|
407
|
+
ELSE 0.5
|
|
408
|
+
END
|
|
409
|
+
""",
|
|
410
|
+
(source_type, source_key, now, now),
|
|
411
|
+
)
|
|
412
|
+
else:
|
|
413
|
+
conn.execute(
|
|
414
|
+
"""
|
|
415
|
+
INSERT INTO source_trust (source_type, source_key, total_scans,
|
|
416
|
+
injection_detections, trust_score, last_clean_scan, timestamp)
|
|
417
|
+
VALUES (?, ?, 1, 0, 1.0, ?, ?)
|
|
418
|
+
ON CONFLICT(source_type, source_key) DO UPDATE SET
|
|
419
|
+
total_scans = total_scans + 1,
|
|
420
|
+
last_clean_scan = excluded.last_clean_scan,
|
|
421
|
+
trust_score = CASE
|
|
422
|
+
WHEN total_scans + 1 > 0 THEN
|
|
423
|
+
1.0 - (CAST(injection_detections AS REAL) / (total_scans + 1))
|
|
424
|
+
ELSE 0.5
|
|
425
|
+
END
|
|
426
|
+
""",
|
|
427
|
+
(source_type, source_key, now, now),
|
|
428
|
+
)
|
|
429
|
+
|
|
430
|
+
self._audit(
|
|
431
|
+
"write", "source_trust",
|
|
432
|
+
f"{source_type}:{source_key}",
|
|
433
|
+
f"injection={had_injection}",
|
|
434
|
+
)
|
|
435
|
+
|
|
436
|
+
def get_source_trust(
|
|
437
|
+
self, source_type: str, source_key: str
|
|
438
|
+
) -> Optional[SourceTrustEntry]:
|
|
439
|
+
"""Get trust information for a source."""
|
|
440
|
+
conn = self._get_connection()
|
|
441
|
+
row = conn.execute(
|
|
442
|
+
"""
|
|
443
|
+
SELECT * FROM source_trust
|
|
444
|
+
WHERE source_type = ? AND source_key = ?
|
|
445
|
+
""",
|
|
446
|
+
(source_type, source_key),
|
|
447
|
+
).fetchone()
|
|
448
|
+
|
|
449
|
+
if not row:
|
|
450
|
+
# Also check domain-level trust for URLs
|
|
451
|
+
if source_type == "url":
|
|
452
|
+
domain = _extract_domain(source_key)
|
|
453
|
+
if domain:
|
|
454
|
+
row = conn.execute(
|
|
455
|
+
"""
|
|
456
|
+
SELECT * FROM source_trust
|
|
457
|
+
WHERE source_type = 'domain' AND source_key = ?
|
|
458
|
+
""",
|
|
459
|
+
(domain,),
|
|
460
|
+
).fetchone()
|
|
461
|
+
|
|
462
|
+
if not row:
|
|
463
|
+
self._audit("read", "source_trust", f"{source_type}:{source_key}", "no_data")
|
|
464
|
+
return None
|
|
465
|
+
|
|
466
|
+
entry = SourceTrustEntry(
|
|
467
|
+
source_type=row["source_type"],
|
|
468
|
+
source_key=row["source_key"],
|
|
469
|
+
total_scans=row["total_scans"],
|
|
470
|
+
injection_detections=row["injection_detections"],
|
|
471
|
+
trust_score=row["trust_score"],
|
|
472
|
+
last_clean_scan=row["last_clean_scan"],
|
|
473
|
+
last_injection=row["last_injection"],
|
|
474
|
+
)
|
|
475
|
+
|
|
476
|
+
self._audit(
|
|
477
|
+
"read", "source_trust",
|
|
478
|
+
f"{source_type}:{source_key}",
|
|
479
|
+
f"trust={entry.trust_score:.2f}, scans={entry.total_scans}",
|
|
480
|
+
)
|
|
481
|
+
|
|
482
|
+
return entry
|
|
483
|
+
|
|
484
|
+
def get_all_sources(self, suspicious_only: bool = False) -> List[SourceTrustEntry]:
|
|
485
|
+
"""Get all source trust entries, optionally filtering to suspicious ones."""
|
|
486
|
+
conn = self._get_connection()
|
|
487
|
+
if suspicious_only:
|
|
488
|
+
rows = conn.execute(
|
|
489
|
+
"SELECT * FROM source_trust WHERE trust_score < 0.5 ORDER BY trust_score ASC"
|
|
490
|
+
).fetchall()
|
|
491
|
+
else:
|
|
492
|
+
rows = conn.execute(
|
|
493
|
+
"SELECT * FROM source_trust ORDER BY trust_score ASC"
|
|
494
|
+
).fetchall()
|
|
495
|
+
|
|
496
|
+
return [
|
|
497
|
+
SourceTrustEntry(
|
|
498
|
+
source_type=r["source_type"],
|
|
499
|
+
source_key=r["source_key"],
|
|
500
|
+
total_scans=r["total_scans"],
|
|
501
|
+
injection_detections=r["injection_detections"],
|
|
502
|
+
trust_score=r["trust_score"],
|
|
503
|
+
last_clean_scan=r["last_clean_scan"],
|
|
504
|
+
last_injection=r["last_injection"],
|
|
505
|
+
)
|
|
506
|
+
for r in rows
|
|
507
|
+
]
|
|
508
|
+
|
|
509
|
+
# =====================================================================
|
|
510
|
+
# Workflow Baselines
|
|
511
|
+
# =====================================================================
|
|
512
|
+
|
|
513
|
+
def update_workflow(
|
|
514
|
+
self,
|
|
515
|
+
project_hash: str,
|
|
516
|
+
tool_name: str,
|
|
517
|
+
hour_of_day: Optional[int] = None,
|
|
518
|
+
was_denied: bool = False,
|
|
519
|
+
) -> None:
|
|
520
|
+
"""Update workflow baseline for a project+tool+hour."""
|
|
521
|
+
conn = self._get_connection()
|
|
522
|
+
now = datetime.utcnow().isoformat()
|
|
523
|
+
|
|
524
|
+
denied_inc = 1 if was_denied else 0
|
|
525
|
+
conn.execute(
|
|
526
|
+
"""
|
|
527
|
+
INSERT INTO workflow_baselines (
|
|
528
|
+
project_hash, tool_name, hour_of_day, invocation_count,
|
|
529
|
+
denied_count, last_updated
|
|
530
|
+
) VALUES (?, ?, ?, 1, ?, ?)
|
|
531
|
+
ON CONFLICT(project_hash, tool_name, hour_of_day) DO UPDATE SET
|
|
532
|
+
invocation_count = invocation_count + 1,
|
|
533
|
+
denied_count = denied_count + ?,
|
|
534
|
+
last_updated = ?
|
|
535
|
+
""",
|
|
536
|
+
(project_hash, tool_name, hour_of_day, denied_inc, now, denied_inc, now),
|
|
537
|
+
)
|
|
538
|
+
|
|
539
|
+
def get_workflow_baseline(
|
|
540
|
+
self, project_hash: str
|
|
541
|
+
) -> List[WorkflowBaseline]:
|
|
542
|
+
"""Get all workflow baselines for a project."""
|
|
543
|
+
conn = self._get_connection()
|
|
544
|
+
rows = conn.execute(
|
|
545
|
+
"""
|
|
546
|
+
SELECT * FROM workflow_baselines
|
|
547
|
+
WHERE project_hash = ?
|
|
548
|
+
ORDER BY tool_name, hour_of_day
|
|
549
|
+
""",
|
|
550
|
+
(project_hash,),
|
|
551
|
+
).fetchall()
|
|
552
|
+
|
|
553
|
+
self._audit("read", "workflow_baselines", project_hash, f"count={len(rows)}")
|
|
554
|
+
|
|
555
|
+
return [
|
|
556
|
+
WorkflowBaseline(
|
|
557
|
+
project_hash=r["project_hash"],
|
|
558
|
+
tool_name=r["tool_name"],
|
|
559
|
+
hour_of_day=r["hour_of_day"],
|
|
560
|
+
invocation_count=r["invocation_count"],
|
|
561
|
+
denied_count=r["denied_count"],
|
|
562
|
+
)
|
|
563
|
+
for r in rows
|
|
564
|
+
]
|
|
565
|
+
|
|
566
|
+
def get_workflow_tool_baseline(
|
|
567
|
+
self, project_hash: str, tool_name: str
|
|
568
|
+
) -> Optional[WorkflowBaseline]:
|
|
569
|
+
"""Get aggregated baseline for a specific tool in a project."""
|
|
570
|
+
conn = self._get_connection()
|
|
571
|
+
row = conn.execute(
|
|
572
|
+
"""
|
|
573
|
+
SELECT project_hash, tool_name, NULL as hour_of_day,
|
|
574
|
+
SUM(invocation_count) as invocation_count,
|
|
575
|
+
SUM(denied_count) as denied_count
|
|
576
|
+
FROM workflow_baselines
|
|
577
|
+
WHERE project_hash = ? AND tool_name = ?
|
|
578
|
+
GROUP BY project_hash, tool_name
|
|
579
|
+
""",
|
|
580
|
+
(project_hash, tool_name),
|
|
581
|
+
).fetchone()
|
|
582
|
+
|
|
583
|
+
if not row:
|
|
584
|
+
return None
|
|
585
|
+
|
|
586
|
+
return WorkflowBaseline(
|
|
587
|
+
project_hash=row["project_hash"],
|
|
588
|
+
tool_name=row["tool_name"],
|
|
589
|
+
hour_of_day=None,
|
|
590
|
+
invocation_count=row["invocation_count"],
|
|
591
|
+
denied_count=row["denied_count"],
|
|
592
|
+
)
|
|
593
|
+
|
|
594
|
+
# =====================================================================
|
|
595
|
+
# Learned Whitelists
|
|
596
|
+
# =====================================================================
|
|
597
|
+
|
|
598
|
+
def _update_learned_whitelist(self, entry: PatternDecisionEntry) -> None:
|
|
599
|
+
"""Update learned whitelist suggestion based on a new decision."""
|
|
600
|
+
if not entry.user_response:
|
|
601
|
+
return
|
|
602
|
+
|
|
603
|
+
conn = self._get_connection()
|
|
604
|
+
now = datetime.utcnow().isoformat()
|
|
605
|
+
|
|
606
|
+
approval_inc = 1 if entry.user_response == "approved" else 0
|
|
607
|
+
denial_inc = 1 if entry.user_response == "denied" else 0
|
|
608
|
+
|
|
609
|
+
conn.execute(
|
|
610
|
+
"""
|
|
611
|
+
INSERT INTO learned_whitelists (
|
|
612
|
+
pattern_name, tool_name, path_prefix,
|
|
613
|
+
approval_count, denial_count, timestamp
|
|
614
|
+
) VALUES (?, ?, ?, ?, ?, ?)
|
|
615
|
+
ON CONFLICT(pattern_name, tool_name, path_prefix) DO UPDATE SET
|
|
616
|
+
approval_count = approval_count + ?,
|
|
617
|
+
denial_count = denial_count + ?,
|
|
618
|
+
timestamp = ?
|
|
619
|
+
""",
|
|
620
|
+
(
|
|
621
|
+
entry.pattern_name, entry.tool_name, entry.path_prefix,
|
|
622
|
+
approval_inc, denial_inc, now,
|
|
623
|
+
approval_inc, denial_inc, now,
|
|
624
|
+
),
|
|
625
|
+
)
|
|
626
|
+
|
|
627
|
+
# Recompute confidence and check if suggestion threshold met
|
|
628
|
+
row = conn.execute(
|
|
629
|
+
"""
|
|
630
|
+
SELECT approval_count, denial_count FROM learned_whitelists
|
|
631
|
+
WHERE pattern_name = ? AND tool_name = ? AND path_prefix IS ?
|
|
632
|
+
""",
|
|
633
|
+
(entry.pattern_name, entry.tool_name, entry.path_prefix),
|
|
634
|
+
).fetchone()
|
|
635
|
+
|
|
636
|
+
if row:
|
|
637
|
+
total = row["approval_count"] + row["denial_count"]
|
|
638
|
+
if total > 0:
|
|
639
|
+
confidence = row["approval_count"] / total
|
|
640
|
+
suggested_at = now if (
|
|
641
|
+
confidence >= MIN_APPROVAL_RATIO
|
|
642
|
+
and total >= MIN_DECISION_THRESHOLD
|
|
643
|
+
) else None
|
|
644
|
+
|
|
645
|
+
conn.execute(
|
|
646
|
+
"""
|
|
647
|
+
UPDATE learned_whitelists
|
|
648
|
+
SET confidence = ?, suggested_at = ?
|
|
649
|
+
WHERE pattern_name = ? AND tool_name = ? AND path_prefix IS ?
|
|
650
|
+
""",
|
|
651
|
+
(confidence, suggested_at,
|
|
652
|
+
entry.pattern_name, entry.tool_name, entry.path_prefix),
|
|
653
|
+
)
|
|
654
|
+
|
|
655
|
+
def get_whitelist_suggestions(
|
|
656
|
+
self, pending_only: bool = True
|
|
657
|
+
) -> List[LearnedWhitelistSuggestion]:
|
|
658
|
+
"""Get learned whitelist suggestions.
|
|
659
|
+
|
|
660
|
+
Args:
|
|
661
|
+
pending_only: If True, only return unreviewed suggestions
|
|
662
|
+
"""
|
|
663
|
+
conn = self._get_connection()
|
|
664
|
+
|
|
665
|
+
if pending_only:
|
|
666
|
+
rows = conn.execute(
|
|
667
|
+
"""
|
|
668
|
+
SELECT * FROM learned_whitelists
|
|
669
|
+
WHERE suggested_at IS NOT NULL AND human_reviewed = 0
|
|
670
|
+
ORDER BY confidence DESC
|
|
671
|
+
"""
|
|
672
|
+
).fetchall()
|
|
673
|
+
else:
|
|
674
|
+
rows = conn.execute(
|
|
675
|
+
"""
|
|
676
|
+
SELECT * FROM learned_whitelists
|
|
677
|
+
WHERE suggested_at IS NOT NULL
|
|
678
|
+
ORDER BY confidence DESC
|
|
679
|
+
"""
|
|
680
|
+
).fetchall()
|
|
681
|
+
|
|
682
|
+
self._audit("read", "learned_whitelists", "suggestions", f"count={len(rows)}")
|
|
683
|
+
|
|
684
|
+
return [
|
|
685
|
+
LearnedWhitelistSuggestion(
|
|
686
|
+
id=r["id"],
|
|
687
|
+
pattern_name=r["pattern_name"],
|
|
688
|
+
tool_name=r["tool_name"],
|
|
689
|
+
path_prefix=r["path_prefix"],
|
|
690
|
+
approval_count=r["approval_count"],
|
|
691
|
+
denial_count=r["denial_count"],
|
|
692
|
+
confidence=r["confidence"],
|
|
693
|
+
suggested_at=r["suggested_at"],
|
|
694
|
+
human_reviewed=r["human_reviewed"],
|
|
695
|
+
)
|
|
696
|
+
for r in rows
|
|
697
|
+
]
|
|
698
|
+
|
|
699
|
+
def review_whitelist_suggestion(self, suggestion_id: int, accepted: bool) -> bool:
|
|
700
|
+
"""Mark a whitelist suggestion as accepted or rejected.
|
|
701
|
+
|
|
702
|
+
Returns True if the suggestion was found and updated.
|
|
703
|
+
"""
|
|
704
|
+
conn = self._get_connection()
|
|
705
|
+
status = 1 if accepted else -1
|
|
706
|
+
cursor = conn.execute(
|
|
707
|
+
"UPDATE learned_whitelists SET human_reviewed = ? WHERE id = ?",
|
|
708
|
+
(status, suggestion_id),
|
|
709
|
+
)
|
|
710
|
+
|
|
711
|
+
action = "accepted" if accepted else "rejected"
|
|
712
|
+
self._audit("write", "learned_whitelists", f"id={suggestion_id}", action)
|
|
713
|
+
|
|
714
|
+
return cursor.rowcount > 0
|
|
715
|
+
|
|
716
|
+
# =====================================================================
|
|
717
|
+
# Decay Engine
|
|
718
|
+
# =====================================================================
|
|
719
|
+
|
|
720
|
+
def apply_decay(self) -> Dict[str, int]:
|
|
721
|
+
"""Apply time-based decay to all weighted entries.
|
|
722
|
+
|
|
723
|
+
Uses a 30-day half-life: weight = 2^(-days_elapsed/30)
|
|
724
|
+
|
|
725
|
+
Returns count of updated rows per table.
|
|
726
|
+
"""
|
|
727
|
+
conn = self._get_connection()
|
|
728
|
+
now = datetime.utcnow()
|
|
729
|
+
results = {}
|
|
730
|
+
|
|
731
|
+
# Decay pattern decisions
|
|
732
|
+
rows = conn.execute(
|
|
733
|
+
"SELECT id, timestamp, decay_weight FROM pattern_decisions WHERE decay_weight > 0.01"
|
|
734
|
+
).fetchall()
|
|
735
|
+
|
|
736
|
+
updated = 0
|
|
737
|
+
for row in rows:
|
|
738
|
+
try:
|
|
739
|
+
ts = datetime.fromisoformat(row["timestamp"])
|
|
740
|
+
days_elapsed = (now - ts).total_seconds() / 86400
|
|
741
|
+
new_weight = math.pow(2, -days_elapsed / DECAY_HALF_LIFE_DAYS)
|
|
742
|
+
new_weight = max(new_weight, 0.0) # Floor at 0
|
|
743
|
+
|
|
744
|
+
if abs(new_weight - row["decay_weight"]) > 0.001:
|
|
745
|
+
conn.execute(
|
|
746
|
+
"UPDATE pattern_decisions SET decay_weight = ? WHERE id = ?",
|
|
747
|
+
(new_weight, row["id"]),
|
|
748
|
+
)
|
|
749
|
+
updated += 1
|
|
750
|
+
except (ValueError, TypeError):
|
|
751
|
+
continue
|
|
752
|
+
|
|
753
|
+
results["pattern_decisions"] = updated
|
|
754
|
+
|
|
755
|
+
# Decay source trust
|
|
756
|
+
rows = conn.execute(
|
|
757
|
+
"SELECT id, timestamp, decay_weight FROM source_trust WHERE decay_weight > 0.01"
|
|
758
|
+
).fetchall()
|
|
759
|
+
|
|
760
|
+
updated = 0
|
|
761
|
+
for row in rows:
|
|
762
|
+
try:
|
|
763
|
+
ts = datetime.fromisoformat(row["timestamp"])
|
|
764
|
+
days_elapsed = (now - ts).total_seconds() / 86400
|
|
765
|
+
new_weight = math.pow(2, -days_elapsed / DECAY_HALF_LIFE_DAYS)
|
|
766
|
+
new_weight = max(new_weight, 0.0)
|
|
767
|
+
|
|
768
|
+
if abs(new_weight - row["decay_weight"]) > 0.001:
|
|
769
|
+
conn.execute(
|
|
770
|
+
"UPDATE source_trust SET decay_weight = ? WHERE id = ?",
|
|
771
|
+
(new_weight, row["id"]),
|
|
772
|
+
)
|
|
773
|
+
updated += 1
|
|
774
|
+
except (ValueError, TypeError):
|
|
775
|
+
continue
|
|
776
|
+
|
|
777
|
+
results["source_trust"] = updated
|
|
778
|
+
|
|
779
|
+
self._audit("decay", "all", None, str(results))
|
|
780
|
+
return results
|
|
781
|
+
|
|
782
|
+
# =====================================================================
|
|
783
|
+
# Stats & Export
|
|
784
|
+
# =====================================================================
|
|
785
|
+
|
|
786
|
+
def get_stats(self) -> Dict[str, Any]:
|
|
787
|
+
"""Get overall memory statistics."""
|
|
788
|
+
conn = self._get_connection()
|
|
789
|
+
stats = {}
|
|
790
|
+
|
|
791
|
+
for table in ("pattern_decisions", "source_trust", "workflow_baselines",
|
|
792
|
+
"learned_whitelists", "memory_audit"):
|
|
793
|
+
row = conn.execute(f"SELECT COUNT(*) as cnt FROM {table}").fetchone()
|
|
794
|
+
stats[table] = row["cnt"]
|
|
795
|
+
|
|
796
|
+
# Last decay
|
|
797
|
+
row = conn.execute(
|
|
798
|
+
"""
|
|
799
|
+
SELECT timestamp FROM memory_audit
|
|
800
|
+
WHERE operation = 'decay'
|
|
801
|
+
ORDER BY timestamp DESC LIMIT 1
|
|
802
|
+
"""
|
|
803
|
+
).fetchone()
|
|
804
|
+
stats["last_decay"] = row["timestamp"] if row else None
|
|
805
|
+
|
|
806
|
+
# DB file size
|
|
807
|
+
try:
|
|
808
|
+
stats["db_size_bytes"] = self.db_path.stat().st_size
|
|
809
|
+
except OSError:
|
|
810
|
+
stats["db_size_bytes"] = 0
|
|
811
|
+
|
|
812
|
+
return stats
|
|
813
|
+
|
|
814
|
+
def get_pattern_stats(
|
|
815
|
+
self, min_decisions: int = 0, sort_by: str = "count"
|
|
816
|
+
) -> List[Dict[str, Any]]:
|
|
817
|
+
"""Get per-pattern confidence statistics."""
|
|
818
|
+
conn = self._get_connection()
|
|
819
|
+
rows = conn.execute(
|
|
820
|
+
"""
|
|
821
|
+
SELECT * FROM pattern_confidence_view
|
|
822
|
+
WHERE total_decisions >= ?
|
|
823
|
+
""",
|
|
824
|
+
(min_decisions,),
|
|
825
|
+
).fetchall()
|
|
826
|
+
|
|
827
|
+
results = [dict(r) for r in rows]
|
|
828
|
+
|
|
829
|
+
if sort_by == "approval":
|
|
830
|
+
results.sort(key=lambda r: r.get("approval_ratio", 0), reverse=True)
|
|
831
|
+
elif sort_by == "name":
|
|
832
|
+
results.sort(key=lambda r: r.get("pattern_name", ""))
|
|
833
|
+
else: # count
|
|
834
|
+
results.sort(key=lambda r: r.get("total_decisions", 0), reverse=True)
|
|
835
|
+
|
|
836
|
+
return results
|
|
837
|
+
|
|
838
|
+
def get_audit_log(self, limit: int = 50) -> List[Dict[str, Any]]:
|
|
839
|
+
"""Get recent audit log entries."""
|
|
840
|
+
conn = self._get_connection()
|
|
841
|
+
rows = conn.execute(
|
|
842
|
+
"SELECT * FROM memory_audit ORDER BY timestamp DESC LIMIT ?",
|
|
843
|
+
(limit,),
|
|
844
|
+
).fetchall()
|
|
845
|
+
return [dict(r) for r in rows]
|
|
846
|
+
|
|
847
|
+
def export_all(self) -> Dict[str, Any]:
|
|
848
|
+
"""Export all memory data as a JSON-serializable dict."""
|
|
849
|
+
conn = self._get_connection()
|
|
850
|
+
data = {}
|
|
851
|
+
|
|
852
|
+
for table in ("pattern_decisions", "source_trust", "workflow_baselines",
|
|
853
|
+
"learned_whitelists"):
|
|
854
|
+
rows = conn.execute(f"SELECT * FROM {table}").fetchall()
|
|
855
|
+
data[table] = [dict(r) for r in rows]
|
|
856
|
+
|
|
857
|
+
data["stats"] = self.get_stats()
|
|
858
|
+
return data
|
|
859
|
+
|
|
860
|
+
def clear_table(self, table_name: str) -> int:
|
|
861
|
+
"""Clear all data from a specific table.
|
|
862
|
+
|
|
863
|
+
Returns the number of deleted rows.
|
|
864
|
+
"""
|
|
865
|
+
valid_tables = {
|
|
866
|
+
"pattern_decisions", "source_trust", "workflow_baselines",
|
|
867
|
+
"learned_whitelists", "memory_audit",
|
|
868
|
+
}
|
|
869
|
+
if table_name not in valid_tables:
|
|
870
|
+
raise ValueError(f"Invalid table: {table_name}. Must be one of {valid_tables}")
|
|
871
|
+
|
|
872
|
+
conn = self._get_connection()
|
|
873
|
+
cursor = conn.execute(f"DELETE FROM {table_name}")
|
|
874
|
+
count = cursor.rowcount
|
|
875
|
+
|
|
876
|
+
self._audit("clear", table_name, None, f"deleted={count}")
|
|
877
|
+
return count
|
|
878
|
+
|
|
879
|
+
def clear_all(self) -> Dict[str, int]:
|
|
880
|
+
"""Clear all memory data. Returns counts per table."""
|
|
881
|
+
results = {}
|
|
882
|
+
for table in ("pattern_decisions", "source_trust", "workflow_baselines",
|
|
883
|
+
"learned_whitelists"):
|
|
884
|
+
results[table] = self.clear_table(table)
|
|
885
|
+
|
|
886
|
+
# Clear audit last (so the clear operations are logged first)
|
|
887
|
+
results["memory_audit"] = self.clear_table("memory_audit")
|
|
888
|
+
return results
|
|
889
|
+
|
|
890
|
+
# =====================================================================
|
|
891
|
+
# Audit
|
|
892
|
+
# =====================================================================
|
|
893
|
+
|
|
894
|
+
def _audit(
|
|
895
|
+
self,
|
|
896
|
+
operation: str,
|
|
897
|
+
table_name: str,
|
|
898
|
+
key_info: Optional[str],
|
|
899
|
+
result: Optional[str],
|
|
900
|
+
) -> None:
|
|
901
|
+
"""Log an operation to the memory audit table."""
|
|
902
|
+
try:
|
|
903
|
+
conn = self._get_connection()
|
|
904
|
+
conn.execute(
|
|
905
|
+
"""
|
|
906
|
+
INSERT INTO memory_audit (operation, table_name, key_info, result)
|
|
907
|
+
VALUES (?, ?, ?, ?)
|
|
908
|
+
""",
|
|
909
|
+
(operation, table_name, key_info, result),
|
|
910
|
+
)
|
|
911
|
+
except Exception:
|
|
912
|
+
pass # Audit logging should never block operations
|
|
913
|
+
|
|
914
|
+
|
|
915
|
+
# =========================================================================
|
|
916
|
+
# Helpers
|
|
917
|
+
# =========================================================================
|
|
918
|
+
|
|
919
|
+
|
|
920
|
+
def _extract_domain(url: str) -> Optional[str]:
|
|
921
|
+
"""Extract domain from a URL."""
|
|
922
|
+
try:
|
|
923
|
+
from urllib.parse import urlparse
|
|
924
|
+
parsed = urlparse(url)
|
|
925
|
+
return parsed.hostname
|
|
926
|
+
except Exception:
|
|
927
|
+
return None
|
|
928
|
+
|
|
929
|
+
|
|
930
|
+
def content_hash(content: str) -> str:
|
|
931
|
+
"""Compute SHA-256 hash of content for deduplication."""
|
|
932
|
+
return hashlib.sha256(content.encode("utf-8", errors="replace")).hexdigest()
|
|
933
|
+
|
|
934
|
+
|
|
935
|
+
def normalize_path_prefix(path: str, depth: int = 3) -> Optional[str]:
|
|
936
|
+
"""Normalize a path to a prefix for memory lookups.
|
|
937
|
+
|
|
938
|
+
Strips to first `depth` components from the project root.
|
|
939
|
+
Example: /home/user/project/src/lib/utils.py -> src/lib/utils.py
|
|
940
|
+
"""
|
|
941
|
+
if not path:
|
|
942
|
+
return None
|
|
943
|
+
try:
|
|
944
|
+
p = Path(path).resolve()
|
|
945
|
+
parts = p.parts
|
|
946
|
+
if len(parts) <= depth:
|
|
947
|
+
return str(p)
|
|
948
|
+
# Return last `depth` components
|
|
949
|
+
return str(Path(*parts[-depth:]))
|
|
950
|
+
except (ValueError, TypeError):
|
|
951
|
+
return None
|
|
952
|
+
|
|
953
|
+
|
|
954
|
+
def hash_project(working_dir: str) -> Optional[str]:
|
|
955
|
+
"""Hash a working directory to a project identifier."""
|
|
956
|
+
if not working_dir:
|
|
957
|
+
return None
|
|
958
|
+
return hashlib.sha256(working_dir.encode()).hexdigest()[:16]
|
|
959
|
+
|
|
960
|
+
|
|
961
|
+
# =========================================================================
|
|
962
|
+
# Module-level singleton
|
|
963
|
+
# =========================================================================
|
|
964
|
+
|
|
965
|
+
_global_store: Optional[MemoryStore] = None
|
|
966
|
+
|
|
967
|
+
|
|
968
|
+
def get_memory_store(db_path: Optional[Path] = None) -> MemoryStore:
|
|
969
|
+
"""Get the global MemoryStore singleton.
|
|
970
|
+
|
|
971
|
+
Args:
|
|
972
|
+
db_path: Override path for the database. If None, uses ~/.tweek/memory.db.
|
|
973
|
+
"""
|
|
974
|
+
global _global_store
|
|
975
|
+
if db_path:
|
|
976
|
+
# Custom path - return new instance (don't cache)
|
|
977
|
+
return MemoryStore(db_path=db_path)
|
|
978
|
+
|
|
979
|
+
if _global_store is None:
|
|
980
|
+
_global_store = MemoryStore()
|
|
981
|
+
return _global_store
|
|
982
|
+
|
|
983
|
+
|
|
984
|
+
def reset_memory_store() -> None:
|
|
985
|
+
"""Reset the global singleton (for testing)."""
|
|
986
|
+
global _global_store
|
|
987
|
+
if _global_store is not None:
|
|
988
|
+
_global_store.close()
|
|
989
|
+
_global_store = None
|