tweek 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tweek/__init__.py +2 -2
- tweek/_keygen.py +53 -0
- tweek/audit.py +288 -0
- tweek/cli.py +5303 -2396
- tweek/cli_model.py +380 -0
- tweek/config/families.yaml +609 -0
- tweek/config/manager.py +42 -5
- tweek/config/patterns.yaml +1510 -8
- tweek/config/tiers.yaml +161 -11
- tweek/diagnostics.py +71 -2
- tweek/hooks/break_glass.py +163 -0
- tweek/hooks/feedback.py +223 -0
- tweek/hooks/overrides.py +531 -0
- tweek/hooks/post_tool_use.py +472 -0
- tweek/hooks/pre_tool_use.py +1024 -62
- tweek/integrations/openclaw.py +443 -0
- tweek/integrations/openclaw_server.py +385 -0
- tweek/licensing.py +14 -54
- tweek/logging/bundle.py +2 -2
- tweek/logging/security_log.py +56 -13
- tweek/mcp/approval.py +57 -16
- tweek/mcp/proxy.py +18 -0
- tweek/mcp/screening.py +5 -5
- tweek/mcp/server.py +4 -1
- tweek/memory/__init__.py +24 -0
- tweek/memory/queries.py +223 -0
- tweek/memory/safety.py +140 -0
- tweek/memory/schemas.py +80 -0
- tweek/memory/store.py +989 -0
- tweek/platform/__init__.py +4 -4
- tweek/plugins/__init__.py +40 -24
- tweek/plugins/base.py +1 -1
- tweek/plugins/detectors/__init__.py +3 -3
- tweek/plugins/detectors/{moltbot.py → openclaw.py} +30 -27
- tweek/plugins/git_discovery.py +16 -4
- tweek/plugins/git_registry.py +8 -2
- tweek/plugins/git_security.py +21 -9
- tweek/plugins/screening/__init__.py +10 -1
- tweek/plugins/screening/heuristic_scorer.py +477 -0
- tweek/plugins/screening/llm_reviewer.py +14 -6
- tweek/plugins/screening/local_model_reviewer.py +161 -0
- tweek/proxy/__init__.py +38 -37
- tweek/proxy/addon.py +22 -3
- tweek/proxy/interceptor.py +1 -0
- tweek/proxy/server.py +4 -2
- tweek/sandbox/__init__.py +11 -0
- tweek/sandbox/docker_bridge.py +143 -0
- tweek/sandbox/executor.py +9 -6
- tweek/sandbox/layers.py +97 -0
- tweek/sandbox/linux.py +1 -0
- tweek/sandbox/project.py +548 -0
- tweek/sandbox/registry.py +149 -0
- tweek/security/__init__.py +9 -0
- tweek/security/language.py +250 -0
- tweek/security/llm_reviewer.py +1146 -60
- tweek/security/local_model.py +331 -0
- tweek/security/local_reviewer.py +146 -0
- tweek/security/model_registry.py +371 -0
- tweek/security/rate_limiter.py +11 -6
- tweek/security/secret_scanner.py +70 -4
- tweek/security/session_analyzer.py +26 -2
- tweek/skill_template/SKILL.md +200 -0
- tweek/skill_template/__init__.py +0 -0
- tweek/skill_template/cli-reference.md +331 -0
- tweek/skill_template/overrides-reference.md +184 -0
- tweek/skill_template/scripts/__init__.py +0 -0
- tweek/skill_template/scripts/check_installed.py +170 -0
- tweek/skills/__init__.py +38 -0
- tweek/skills/config.py +150 -0
- tweek/skills/fingerprints.py +198 -0
- tweek/skills/guard.py +293 -0
- tweek/skills/isolation.py +469 -0
- tweek/skills/scanner.py +715 -0
- tweek/vault/__init__.py +0 -1
- tweek/vault/cross_platform.py +12 -1
- tweek/vault/keychain.py +87 -29
- tweek-0.2.0.dist-info/METADATA +281 -0
- tweek-0.2.0.dist-info/RECORD +121 -0
- {tweek-0.1.0.dist-info → tweek-0.2.0.dist-info}/entry_points.txt +8 -1
- {tweek-0.1.0.dist-info → tweek-0.2.0.dist-info}/licenses/LICENSE +80 -0
- tweek/integrations/moltbot.py +0 -243
- tweek-0.1.0.dist-info/METADATA +0 -335
- tweek-0.1.0.dist-info/RECORD +0 -85
- {tweek-0.1.0.dist-info → tweek-0.2.0.dist-info}/WHEEL +0 -0
- {tweek-0.1.0.dist-info → tweek-0.2.0.dist-info}/top_level.txt +0 -0
tweek/mcp/approval.py
CHANGED
|
@@ -73,7 +73,7 @@ class ApprovalRequest:
|
|
|
73
73
|
return False
|
|
74
74
|
try:
|
|
75
75
|
ts = datetime.fromisoformat(self.timestamp)
|
|
76
|
-
elapsed = (datetime.
|
|
76
|
+
elapsed = (datetime.now(tz=None) - ts).total_seconds()
|
|
77
77
|
return elapsed >= self.timeout_seconds
|
|
78
78
|
except (ValueError, TypeError):
|
|
79
79
|
return False
|
|
@@ -83,7 +83,7 @@ class ApprovalRequest:
|
|
|
83
83
|
"""Seconds remaining before timeout. Returns 0 if expired."""
|
|
84
84
|
try:
|
|
85
85
|
ts = datetime.fromisoformat(self.timestamp)
|
|
86
|
-
elapsed = (datetime.
|
|
86
|
+
elapsed = (datetime.now(tz=None) - ts).total_seconds()
|
|
87
87
|
remaining = self.timeout_seconds - elapsed
|
|
88
88
|
return max(0.0, remaining)
|
|
89
89
|
except (ValueError, TypeError):
|
|
@@ -261,7 +261,11 @@ class ApprovalQueue:
|
|
|
261
261
|
return [self._row_to_request(row) for row in rows]
|
|
262
262
|
|
|
263
263
|
def get_request(self, request_id: str) -> Optional[ApprovalRequest]:
|
|
264
|
-
"""Get a specific approval request by ID (supports short IDs).
|
|
264
|
+
"""Get a specific approval request by ID (supports short IDs).
|
|
265
|
+
|
|
266
|
+
Raises:
|
|
267
|
+
ValueError: If short ID matches multiple requests (ambiguous).
|
|
268
|
+
"""
|
|
265
269
|
with self._get_connection() as conn:
|
|
266
270
|
# Try exact match first
|
|
267
271
|
row = conn.execute(
|
|
@@ -271,10 +275,18 @@ class ApprovalQueue:
|
|
|
271
275
|
|
|
272
276
|
# If not found, try prefix match (short ID)
|
|
273
277
|
if row is None and len(request_id) < 36:
|
|
274
|
-
|
|
278
|
+
rows = conn.execute(
|
|
275
279
|
"SELECT * FROM approval_requests WHERE id LIKE ?",
|
|
276
280
|
(f"{request_id}%",),
|
|
277
|
-
).
|
|
281
|
+
).fetchall()
|
|
282
|
+
if len(rows) == 1:
|
|
283
|
+
row = rows[0]
|
|
284
|
+
elif len(rows) > 1:
|
|
285
|
+
ids = [r["id"][:12] for r in rows]
|
|
286
|
+
raise ValueError(
|
|
287
|
+
f"Ambiguous short ID '{request_id}' matches {len(rows)} requests: "
|
|
288
|
+
f"{', '.join(ids)}. Use a longer prefix."
|
|
289
|
+
)
|
|
278
290
|
|
|
279
291
|
return self._row_to_request(row) if row else None
|
|
280
292
|
|
|
@@ -300,17 +312,28 @@ class ApprovalQueue:
|
|
|
300
312
|
if status not in (ApprovalStatus.APPROVED, ApprovalStatus.DENIED, ApprovalStatus.EXPIRED):
|
|
301
313
|
raise ValueError(f"Invalid decision status: {status}")
|
|
302
314
|
|
|
303
|
-
# Resolve short IDs
|
|
304
|
-
request = self.get_request(request_id)
|
|
305
|
-
if request is None:
|
|
306
|
-
return False
|
|
307
|
-
if request.status != ApprovalStatus.PENDING:
|
|
308
|
-
return False
|
|
309
|
-
|
|
310
|
-
full_id = request.id
|
|
311
|
-
|
|
312
315
|
def _do_decide():
|
|
313
316
|
with self._get_connection() as conn:
|
|
317
|
+
# Atomic check-and-update: resolve short IDs and update in a
|
|
318
|
+
# single connection to eliminate the TOCTOU race condition.
|
|
319
|
+
# For short IDs, do the LIKE lookup inside the same transaction.
|
|
320
|
+
if len(request_id) < 36:
|
|
321
|
+
rows = conn.execute(
|
|
322
|
+
"SELECT id FROM approval_requests WHERE id LIKE ? AND status = 'pending'",
|
|
323
|
+
(f"{request_id}%",),
|
|
324
|
+
).fetchall()
|
|
325
|
+
if len(rows) == 0:
|
|
326
|
+
return False
|
|
327
|
+
if len(rows) > 1:
|
|
328
|
+
ids = [r["id"][:12] for r in rows]
|
|
329
|
+
raise ValueError(
|
|
330
|
+
f"Ambiguous short ID '{request_id}' matches {len(rows)} requests: "
|
|
331
|
+
f"{', '.join(ids)}. Use a longer prefix."
|
|
332
|
+
)
|
|
333
|
+
full_id = rows[0]["id"]
|
|
334
|
+
else:
|
|
335
|
+
full_id = request_id
|
|
336
|
+
|
|
314
337
|
cursor = conn.execute(
|
|
315
338
|
"""
|
|
316
339
|
UPDATE approval_requests
|
|
@@ -452,5 +475,23 @@ class ApprovalQueue:
|
|
|
452
475
|
redactor = LogRedactor(enabled=True)
|
|
453
476
|
return redactor.redact_dict(arguments)
|
|
454
477
|
except ImportError:
|
|
455
|
-
# If logging module unavailable,
|
|
456
|
-
return arguments
|
|
478
|
+
# If logging module unavailable, do basic redaction to avoid storing raw secrets
|
|
479
|
+
return self._basic_redact(arguments)
|
|
480
|
+
|
|
481
|
+
@staticmethod
|
|
482
|
+
def _basic_redact(arguments: Dict[str, Any]) -> Dict[str, Any]:
|
|
483
|
+
"""Fallback redaction when LogRedactor is unavailable."""
|
|
484
|
+
import re
|
|
485
|
+
sensitive_keys = re.compile(
|
|
486
|
+
r'(?i)(password|secret|token|key|credential|auth|bearer|api.?key)', re.IGNORECASE
|
|
487
|
+
)
|
|
488
|
+
redacted = {}
|
|
489
|
+
for k, v in arguments.items():
|
|
490
|
+
if sensitive_keys.search(k):
|
|
491
|
+
redacted[k] = "***REDACTED***"
|
|
492
|
+
elif isinstance(v, str) and len(v) > 50:
|
|
493
|
+
# Long string values may contain secrets — truncate
|
|
494
|
+
redacted[k] = v[:8] + "***"
|
|
495
|
+
else:
|
|
496
|
+
redacted[k] = v
|
|
497
|
+
return redacted
|
tweek/mcp/proxy.py
CHANGED
|
@@ -12,6 +12,7 @@ Architecture:
|
|
|
12
12
|
Usage:
|
|
13
13
|
tweek mcp proxy # Start proxy on stdio transport
|
|
14
14
|
"""
|
|
15
|
+
from __future__ import annotations
|
|
15
16
|
|
|
16
17
|
import asyncio
|
|
17
18
|
import json
|
|
@@ -470,6 +471,23 @@ class TweekMCPProxy:
|
|
|
470
471
|
text=json.dumps({"result": "empty response from upstream"}),
|
|
471
472
|
)]
|
|
472
473
|
|
|
474
|
+
# Screen output for leaked credentials or sensitive data
|
|
475
|
+
try:
|
|
476
|
+
from tweek.mcp.screening import run_output_scan
|
|
477
|
+
combined_text = "\n".join(tc.text for tc in text_contents)
|
|
478
|
+
scan_result = run_output_scan(combined_text)
|
|
479
|
+
if scan_result.get("blocked"):
|
|
480
|
+
reason = scan_result.get("reason", "Output blocked by security screening")
|
|
481
|
+
return [TextContent(
|
|
482
|
+
type="text",
|
|
483
|
+
text=json.dumps({
|
|
484
|
+
"error": "Output blocked by Tweek security screening",
|
|
485
|
+
"reason": reason,
|
|
486
|
+
}),
|
|
487
|
+
)]
|
|
488
|
+
except Exception:
|
|
489
|
+
pass # Output scanning errors should not block the response
|
|
490
|
+
|
|
473
491
|
return text_contents
|
|
474
492
|
|
|
475
493
|
def _build_context(
|
tweek/mcp/screening.py
CHANGED
|
@@ -122,13 +122,13 @@ def run_mcp_screening(context: ScreeningContext) -> Dict[str, Any]:
|
|
|
122
122
|
}
|
|
123
123
|
|
|
124
124
|
except ImportError as e:
|
|
125
|
-
logger.
|
|
126
|
-
# Fail
|
|
125
|
+
logger.error(f"Screening modules not available: {e}")
|
|
126
|
+
# Fail closed: missing security modules should block, not bypass
|
|
127
127
|
return {
|
|
128
|
-
"allowed":
|
|
129
|
-
"blocked":
|
|
128
|
+
"allowed": False,
|
|
129
|
+
"blocked": True,
|
|
130
130
|
"should_prompt": False,
|
|
131
|
-
"reason": f"
|
|
131
|
+
"reason": f"Screening unavailable (missing modules: {e}). Blocking for safety.",
|
|
132
132
|
"findings": [],
|
|
133
133
|
}
|
|
134
134
|
except Exception as e:
|
tweek/mcp/server.py
CHANGED
|
@@ -14,6 +14,7 @@ proxy mode: tweek mcp proxy
|
|
|
14
14
|
Usage:
|
|
15
15
|
tweek mcp serve # stdio mode (desktop clients)
|
|
16
16
|
"""
|
|
17
|
+
from __future__ import annotations
|
|
17
18
|
|
|
18
19
|
import json
|
|
19
20
|
import logging
|
|
@@ -233,7 +234,9 @@ class TweekMCPServer:
|
|
|
233
234
|
})
|
|
234
235
|
|
|
235
236
|
except Exception as e:
|
|
236
|
-
|
|
237
|
+
# Don't leak internal details across trust boundary
|
|
238
|
+
logger.error(f"Vault operation failed: {e}")
|
|
239
|
+
return json.dumps({"error": "Vault operation failed"})
|
|
237
240
|
|
|
238
241
|
async def _handle_status(self, arguments: Dict[str, Any]) -> str:
|
|
239
242
|
"""Handle tweek_status tool call."""
|
tweek/memory/__init__.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Tweek Agentic Memory
|
|
3
|
+
|
|
4
|
+
Persistent, structured memory that enables Tweek to learn from past security
|
|
5
|
+
decisions and make better screening choices over time.
|
|
6
|
+
|
|
7
|
+
Features:
|
|
8
|
+
- Pattern decision history with time-decay weighting
|
|
9
|
+
- Source trustworthiness tracking (URL/file injection history)
|
|
10
|
+
- Cross-session workflow baselines
|
|
11
|
+
- Learned whitelist suggestions from approval patterns
|
|
12
|
+
|
|
13
|
+
Safety Invariants:
|
|
14
|
+
- CRITICAL+deterministic patterns are immune from memory adjustment
|
|
15
|
+
- Memory can only relax ask -> log (never deny -> anything)
|
|
16
|
+
- Project memory can escalate but never relax global decisions
|
|
17
|
+
- Minimum 10 weighted decisions before any adjustment suggested
|
|
18
|
+
- 30-day half-life for time decay
|
|
19
|
+
- Full audit trail for every memory operation
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from tweek.memory.store import MemoryStore, get_memory_store
|
|
23
|
+
|
|
24
|
+
__all__ = ["MemoryStore", "get_memory_store"]
|
tweek/memory/queries.py
ADDED
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Tweek Memory Query Functions
|
|
3
|
+
|
|
4
|
+
Hook entry points for reading and writing memory during PreToolUse
|
|
5
|
+
and PostToolUse screening. All functions are best-effort and fail
|
|
6
|
+
silently to avoid blocking security screening.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from datetime import datetime
|
|
10
|
+
from typing import Any, Dict, Optional
|
|
11
|
+
|
|
12
|
+
from tweek.memory.safety import MIN_CONFIDENCE_SCORE
|
|
13
|
+
from tweek.memory.schemas import PatternDecisionEntry
|
|
14
|
+
from tweek.memory.store import (
|
|
15
|
+
MemoryStore,
|
|
16
|
+
content_hash,
|
|
17
|
+
get_memory_store,
|
|
18
|
+
hash_project,
|
|
19
|
+
normalize_path_prefix,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def memory_read_for_pattern(
|
|
24
|
+
pattern_name: str,
|
|
25
|
+
pattern_severity: str,
|
|
26
|
+
pattern_confidence: str,
|
|
27
|
+
tool_name: str,
|
|
28
|
+
path_prefix: Optional[str] = None,
|
|
29
|
+
project_hash: Optional[str] = None,
|
|
30
|
+
current_decision: str = "ask",
|
|
31
|
+
) -> Optional[Dict[str, Any]]:
|
|
32
|
+
"""Read memory for a pattern match to get confidence adjustment.
|
|
33
|
+
|
|
34
|
+
Called from PreToolUse after pattern matching, before enforcement resolution.
|
|
35
|
+
|
|
36
|
+
Returns a dict with 'adjusted_decision' key if memory suggests a change,
|
|
37
|
+
or None if no adjustment suggested.
|
|
38
|
+
"""
|
|
39
|
+
try:
|
|
40
|
+
store = get_memory_store()
|
|
41
|
+
normalized_prefix = normalize_path_prefix(path_prefix) if path_prefix else None
|
|
42
|
+
|
|
43
|
+
adjustment = store.get_confidence_adjustment(
|
|
44
|
+
pattern_name=pattern_name,
|
|
45
|
+
path_prefix=normalized_prefix,
|
|
46
|
+
current_decision=current_decision,
|
|
47
|
+
original_severity=pattern_severity,
|
|
48
|
+
original_confidence=pattern_confidence,
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
if adjustment is None:
|
|
52
|
+
return None
|
|
53
|
+
|
|
54
|
+
if (
|
|
55
|
+
adjustment.adjusted_decision
|
|
56
|
+
and adjustment.confidence_score >= MIN_CONFIDENCE_SCORE
|
|
57
|
+
):
|
|
58
|
+
return {
|
|
59
|
+
"adjusted_decision": adjustment.adjusted_decision,
|
|
60
|
+
"confidence_score": adjustment.confidence_score,
|
|
61
|
+
"approval_ratio": adjustment.approval_ratio,
|
|
62
|
+
"total_decisions": adjustment.total_decisions,
|
|
63
|
+
"pattern_name": pattern_name,
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
return None
|
|
67
|
+
except Exception:
|
|
68
|
+
return None
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def memory_write_after_decision(
|
|
72
|
+
pattern_name: str,
|
|
73
|
+
pattern_id: Optional[int],
|
|
74
|
+
original_severity: str,
|
|
75
|
+
original_confidence: str,
|
|
76
|
+
decision: str,
|
|
77
|
+
user_response: Optional[str],
|
|
78
|
+
tool_name: str,
|
|
79
|
+
content: str,
|
|
80
|
+
path_prefix: Optional[str] = None,
|
|
81
|
+
project_hash: Optional[str] = None,
|
|
82
|
+
) -> None:
|
|
83
|
+
"""Write a pattern decision to memory.
|
|
84
|
+
|
|
85
|
+
Called from PreToolUse after the decision is made (in all branches:
|
|
86
|
+
deny, ask, log, and allow).
|
|
87
|
+
"""
|
|
88
|
+
try:
|
|
89
|
+
store = get_memory_store()
|
|
90
|
+
normalized_prefix = normalize_path_prefix(path_prefix) if path_prefix else None
|
|
91
|
+
c_hash = content_hash(content) if content else None
|
|
92
|
+
|
|
93
|
+
entry = PatternDecisionEntry(
|
|
94
|
+
pattern_name=pattern_name,
|
|
95
|
+
pattern_id=pattern_id,
|
|
96
|
+
original_severity=original_severity,
|
|
97
|
+
original_confidence=original_confidence,
|
|
98
|
+
decision=decision,
|
|
99
|
+
user_response=user_response,
|
|
100
|
+
tool_name=tool_name,
|
|
101
|
+
content_hash=c_hash,
|
|
102
|
+
path_prefix=normalized_prefix,
|
|
103
|
+
project_hash=project_hash,
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
store.record_decision(entry)
|
|
107
|
+
except Exception:
|
|
108
|
+
pass # Memory is best-effort
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def memory_read_source_trust(
|
|
112
|
+
source_type: str,
|
|
113
|
+
source_key: str,
|
|
114
|
+
) -> Optional[Dict[str, Any]]:
|
|
115
|
+
"""Read source trust information.
|
|
116
|
+
|
|
117
|
+
Called from PostToolUse before screen_content().
|
|
118
|
+
|
|
119
|
+
Returns a dict with trust information if available.
|
|
120
|
+
"""
|
|
121
|
+
try:
|
|
122
|
+
store = get_memory_store()
|
|
123
|
+
entry = store.get_source_trust(source_type, source_key)
|
|
124
|
+
|
|
125
|
+
if entry is None:
|
|
126
|
+
return None
|
|
127
|
+
|
|
128
|
+
return {
|
|
129
|
+
"source_type": entry.source_type,
|
|
130
|
+
"source_key": entry.source_key,
|
|
131
|
+
"trust_score": entry.trust_score,
|
|
132
|
+
"total_scans": entry.total_scans,
|
|
133
|
+
"injection_detections": entry.injection_detections,
|
|
134
|
+
"last_injection": entry.last_injection,
|
|
135
|
+
}
|
|
136
|
+
except Exception:
|
|
137
|
+
return None
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def memory_write_source_scan(
|
|
141
|
+
source_type: str,
|
|
142
|
+
source_key: str,
|
|
143
|
+
had_injection: bool,
|
|
144
|
+
) -> None:
|
|
145
|
+
"""Record a source scan result in memory.
|
|
146
|
+
|
|
147
|
+
Called from PostToolUse after screen_content() completes.
|
|
148
|
+
"""
|
|
149
|
+
try:
|
|
150
|
+
store = get_memory_store()
|
|
151
|
+
store.record_source_scan(source_type, source_key, had_injection)
|
|
152
|
+
|
|
153
|
+
# Also record domain-level trust for URLs
|
|
154
|
+
if source_type == "url":
|
|
155
|
+
try:
|
|
156
|
+
from urllib.parse import urlparse
|
|
157
|
+
domain = urlparse(source_key).hostname
|
|
158
|
+
if domain:
|
|
159
|
+
store.record_source_scan("domain", domain, had_injection)
|
|
160
|
+
except Exception:
|
|
161
|
+
pass
|
|
162
|
+
except Exception:
|
|
163
|
+
pass # Memory is best-effort
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def memory_update_workflow(
|
|
167
|
+
project_hash: str,
|
|
168
|
+
tool_name: str,
|
|
169
|
+
was_denied: bool = False,
|
|
170
|
+
) -> None:
|
|
171
|
+
"""Update workflow baseline for a project.
|
|
172
|
+
|
|
173
|
+
Called from PreToolUse at the end of processing.
|
|
174
|
+
"""
|
|
175
|
+
try:
|
|
176
|
+
store = get_memory_store()
|
|
177
|
+
hour = datetime.utcnow().hour
|
|
178
|
+
store.update_workflow(
|
|
179
|
+
project_hash=project_hash,
|
|
180
|
+
tool_name=tool_name,
|
|
181
|
+
hour_of_day=hour,
|
|
182
|
+
was_denied=was_denied,
|
|
183
|
+
)
|
|
184
|
+
except Exception:
|
|
185
|
+
pass # Memory is best-effort
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def memory_get_workflow_baseline(
|
|
189
|
+
project_hash: str,
|
|
190
|
+
) -> Optional[Dict[str, Any]]:
|
|
191
|
+
"""Get workflow baseline for cross-session comparison.
|
|
192
|
+
|
|
193
|
+
Called from session_analyzer to compare current behavior against baselines.
|
|
194
|
+
"""
|
|
195
|
+
try:
|
|
196
|
+
store = get_memory_store()
|
|
197
|
+
baselines = store.get_workflow_baseline(project_hash)
|
|
198
|
+
|
|
199
|
+
if not baselines:
|
|
200
|
+
return None
|
|
201
|
+
|
|
202
|
+
# Aggregate into a summary
|
|
203
|
+
tool_counts = {}
|
|
204
|
+
total_invocations = 0
|
|
205
|
+
total_denials = 0
|
|
206
|
+
|
|
207
|
+
for b in baselines:
|
|
208
|
+
if b.tool_name not in tool_counts:
|
|
209
|
+
tool_counts[b.tool_name] = {"invocations": 0, "denials": 0}
|
|
210
|
+
tool_counts[b.tool_name]["invocations"] += b.invocation_count
|
|
211
|
+
tool_counts[b.tool_name]["denials"] += b.denied_count
|
|
212
|
+
total_invocations += b.invocation_count
|
|
213
|
+
total_denials += b.denied_count
|
|
214
|
+
|
|
215
|
+
return {
|
|
216
|
+
"project_hash": project_hash,
|
|
217
|
+
"tool_counts": tool_counts,
|
|
218
|
+
"total_invocations": total_invocations,
|
|
219
|
+
"total_denials": total_denials,
|
|
220
|
+
"denial_ratio": total_denials / max(total_invocations, 1),
|
|
221
|
+
}
|
|
222
|
+
except Exception:
|
|
223
|
+
return None
|
tweek/memory/safety.py
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Tweek Memory Safety Module
|
|
3
|
+
|
|
4
|
+
Enforces non-negotiable safety invariants for memory-based adjustments.
|
|
5
|
+
|
|
6
|
+
Rules:
|
|
7
|
+
1. CRITICAL+deterministic patterns are immune - memory NEVER adjusts them
|
|
8
|
+
2. One-step max relaxation: ask -> log only (never deny -> anything)
|
|
9
|
+
3. Additive-only project merge: project can escalate, never relax
|
|
10
|
+
4. Minimum threshold: 10+ weighted decisions before any adjustment
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from typing import Optional
|
|
14
|
+
|
|
15
|
+
# Patterns that are immune from any memory-based adjustment.
|
|
16
|
+
# These are CRITICAL+deterministic patterns that should always deny.
|
|
17
|
+
IMMUNE_SEVERITIES = frozenset({"critical"})
|
|
18
|
+
IMMUNE_CONFIDENCES = frozenset({"deterministic"})
|
|
19
|
+
|
|
20
|
+
# Decision hierarchy: deny > ask > log > allow
|
|
21
|
+
DECISION_RANK = {"deny": 3, "ask": 2, "log": 1, "allow": 0}
|
|
22
|
+
|
|
23
|
+
# Maximum relaxation: current_decision -> max allowed relaxation target
|
|
24
|
+
# deny -> NOTHING (immune)
|
|
25
|
+
# ask -> log (one step down)
|
|
26
|
+
# log -> log (already minimum observable)
|
|
27
|
+
MAX_RELAXATION = {
|
|
28
|
+
"deny": "deny", # Never relax deny
|
|
29
|
+
"ask": "log", # Can relax to log
|
|
30
|
+
"log": "log", # Already at minimum
|
|
31
|
+
"allow": "allow", # Already at minimum
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
# Minimum weighted decisions before memory can suggest adjustments
|
|
35
|
+
MIN_DECISION_THRESHOLD = 10
|
|
36
|
+
|
|
37
|
+
# Minimum approval ratio to suggest relaxation
|
|
38
|
+
MIN_APPROVAL_RATIO = 0.90 # 90% approval rate
|
|
39
|
+
|
|
40
|
+
# Minimum confidence score to actually apply an adjustment
|
|
41
|
+
MIN_CONFIDENCE_SCORE = 0.80
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def is_immune_pattern(severity: str, confidence: str) -> bool:
|
|
45
|
+
"""Check if a pattern is immune from memory adjustment.
|
|
46
|
+
|
|
47
|
+
CRITICAL+deterministic patterns are NEVER adjusted by memory.
|
|
48
|
+
This is a hard safety invariant enforced at every layer.
|
|
49
|
+
"""
|
|
50
|
+
return (
|
|
51
|
+
severity.lower() in IMMUNE_SEVERITIES
|
|
52
|
+
and confidence.lower() in IMMUNE_CONFIDENCES
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def get_max_relaxation(current_decision: str) -> str:
|
|
57
|
+
"""Get the maximum allowed relaxation target for a decision.
|
|
58
|
+
|
|
59
|
+
Returns the most relaxed decision memory is allowed to suggest.
|
|
60
|
+
"""
|
|
61
|
+
return MAX_RELAXATION.get(current_decision, current_decision)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def validate_memory_adjustment(
|
|
65
|
+
pattern_name: str,
|
|
66
|
+
original_severity: str,
|
|
67
|
+
original_confidence: str,
|
|
68
|
+
suggested_decision: str,
|
|
69
|
+
current_decision: str,
|
|
70
|
+
) -> str:
|
|
71
|
+
"""Validate and potentially apply a memory-suggested decision adjustment.
|
|
72
|
+
|
|
73
|
+
Returns the final decision after safety validation. This is the last
|
|
74
|
+
gate before a memory adjustment takes effect.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
pattern_name: The pattern being evaluated
|
|
78
|
+
original_severity: Pattern's severity level
|
|
79
|
+
original_confidence: Pattern's confidence level
|
|
80
|
+
suggested_decision: What memory suggests
|
|
81
|
+
current_decision: The decision from enforcement policy
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
The validated decision (may be same as current if adjustment rejected)
|
|
85
|
+
"""
|
|
86
|
+
# Rule 1: CRITICAL+deterministic are immune
|
|
87
|
+
if is_immune_pattern(original_severity, original_confidence):
|
|
88
|
+
return current_decision
|
|
89
|
+
|
|
90
|
+
# Rule 2: deny is never relaxed by memory
|
|
91
|
+
if current_decision == "deny":
|
|
92
|
+
return current_decision
|
|
93
|
+
|
|
94
|
+
# Rule 3: Can only relax, never escalate via memory
|
|
95
|
+
# Memory is for reducing noise, not adding blocks
|
|
96
|
+
suggested_rank = DECISION_RANK.get(suggested_decision, 2)
|
|
97
|
+
current_rank = DECISION_RANK.get(current_decision, 2)
|
|
98
|
+
if suggested_rank >= current_rank:
|
|
99
|
+
# Suggested is same or stricter - no change needed
|
|
100
|
+
return current_decision
|
|
101
|
+
|
|
102
|
+
# Rule 4: Maximum one-step relaxation
|
|
103
|
+
max_relaxation = get_max_relaxation(current_decision)
|
|
104
|
+
max_rank = DECISION_RANK.get(max_relaxation, 2)
|
|
105
|
+
if suggested_rank < max_rank:
|
|
106
|
+
# Suggested goes beyond max relaxation - clamp to max
|
|
107
|
+
return max_relaxation
|
|
108
|
+
|
|
109
|
+
return suggested_decision
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def compute_suggested_decision(
|
|
113
|
+
current_decision: str,
|
|
114
|
+
approval_ratio: float,
|
|
115
|
+
total_weighted_decisions: float,
|
|
116
|
+
original_severity: str,
|
|
117
|
+
original_confidence: str,
|
|
118
|
+
) -> Optional[str]:
|
|
119
|
+
"""Compute what decision memory would suggest, if any.
|
|
120
|
+
|
|
121
|
+
Returns None if memory has no suggestion (insufficient data or
|
|
122
|
+
pattern is immune).
|
|
123
|
+
"""
|
|
124
|
+
# Immune patterns get no suggestions
|
|
125
|
+
if is_immune_pattern(original_severity, original_confidence):
|
|
126
|
+
return None
|
|
127
|
+
|
|
128
|
+
# Insufficient data
|
|
129
|
+
if total_weighted_decisions < MIN_DECISION_THRESHOLD:
|
|
130
|
+
return None
|
|
131
|
+
|
|
132
|
+
# deny is never relaxed
|
|
133
|
+
if current_decision == "deny":
|
|
134
|
+
return None
|
|
135
|
+
|
|
136
|
+
# Only suggest relaxation if approval ratio is very high
|
|
137
|
+
if approval_ratio >= MIN_APPROVAL_RATIO and current_decision == "ask":
|
|
138
|
+
return "log"
|
|
139
|
+
|
|
140
|
+
return None
|
tweek/memory/schemas.py
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Tweek Memory Data Schemas
|
|
3
|
+
|
|
4
|
+
Dataclasses for structured memory entries and query results.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from dataclasses import dataclass, field
|
|
8
|
+
from typing import Optional
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class PatternDecisionEntry:
|
|
13
|
+
"""A single pattern decision record."""
|
|
14
|
+
|
|
15
|
+
pattern_name: str
|
|
16
|
+
pattern_id: Optional[int]
|
|
17
|
+
original_severity: str
|
|
18
|
+
original_confidence: str
|
|
19
|
+
decision: str # deny/ask/log/allow
|
|
20
|
+
user_response: Optional[str] # approved/denied/null
|
|
21
|
+
tool_name: str
|
|
22
|
+
content_hash: Optional[str]
|
|
23
|
+
path_prefix: Optional[str]
|
|
24
|
+
project_hash: Optional[str]
|
|
25
|
+
timestamp: Optional[str] = None
|
|
26
|
+
decay_weight: float = 1.0
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass
|
|
30
|
+
class ConfidenceAdjustment:
|
|
31
|
+
"""Result of a memory confidence query for a pattern."""
|
|
32
|
+
|
|
33
|
+
pattern_name: str
|
|
34
|
+
path_prefix: Optional[str]
|
|
35
|
+
total_decisions: int
|
|
36
|
+
weighted_approvals: float
|
|
37
|
+
weighted_denials: float
|
|
38
|
+
approval_ratio: float
|
|
39
|
+
last_decision: Optional[str]
|
|
40
|
+
adjusted_decision: Optional[str] = None # suggested decision override
|
|
41
|
+
confidence_score: float = 0.0 # 0.0-1.0 how confident the suggestion is
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@dataclass
|
|
45
|
+
class SourceTrustEntry:
|
|
46
|
+
"""Trust score for a URL, file, or domain."""
|
|
47
|
+
|
|
48
|
+
source_type: str # url/file/domain
|
|
49
|
+
source_key: str
|
|
50
|
+
total_scans: int = 0
|
|
51
|
+
injection_detections: int = 0
|
|
52
|
+
trust_score: float = 0.5 # 0.0=bad, 1.0=good
|
|
53
|
+
last_clean_scan: Optional[str] = None
|
|
54
|
+
last_injection: Optional[str] = None
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@dataclass
|
|
58
|
+
class WorkflowBaseline:
|
|
59
|
+
"""Baseline tool usage pattern for a project."""
|
|
60
|
+
|
|
61
|
+
project_hash: str
|
|
62
|
+
tool_name: str
|
|
63
|
+
hour_of_day: Optional[int]
|
|
64
|
+
invocation_count: int = 0
|
|
65
|
+
denied_count: int = 0
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
@dataclass
|
|
69
|
+
class LearnedWhitelistSuggestion:
|
|
70
|
+
"""A suggested whitelist entry derived from approval patterns."""
|
|
71
|
+
|
|
72
|
+
id: int
|
|
73
|
+
pattern_name: str
|
|
74
|
+
tool_name: Optional[str]
|
|
75
|
+
path_prefix: Optional[str]
|
|
76
|
+
approval_count: int = 0
|
|
77
|
+
denial_count: int = 0
|
|
78
|
+
confidence: float = 0.0
|
|
79
|
+
suggested_at: Optional[str] = None
|
|
80
|
+
human_reviewed: int = 0 # 0=pending, 1=accepted, -1=rejected
|