tweek 0.4.1__py3-none-any.whl → 0.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tweek/__init__.py +1 -1
- tweek/cli_core.py +23 -6
- tweek/cli_install.py +361 -91
- tweek/cli_uninstall.py +119 -36
- tweek/config/families.yaml +13 -0
- tweek/config/models.py +31 -3
- tweek/config/patterns.yaml +126 -2
- tweek/diagnostics.py +124 -1
- tweek/hooks/break_glass.py +70 -47
- tweek/hooks/overrides.py +19 -1
- tweek/hooks/post_tool_use.py +6 -2
- tweek/hooks/pre_tool_use.py +19 -2
- tweek/hooks/wrapper_post_tool_use.py +121 -0
- tweek/hooks/wrapper_pre_tool_use.py +121 -0
- tweek/integrations/openclaw.py +70 -60
- tweek/integrations/openclaw_detection.py +140 -0
- tweek/integrations/openclaw_server.py +359 -86
- tweek/logging/security_log.py +22 -0
- tweek/memory/safety.py +7 -3
- tweek/memory/store.py +31 -10
- tweek/plugins/base.py +9 -1
- tweek/plugins/detectors/openclaw.py +31 -92
- tweek/plugins/screening/heuristic_scorer.py +12 -1
- tweek/plugins/screening/local_model_reviewer.py +9 -0
- tweek/security/language.py +2 -1
- tweek/security/llm_reviewer.py +45 -18
- tweek/security/local_model.py +21 -0
- tweek/security/model_registry.py +2 -2
- tweek/security/rate_limiter.py +99 -1
- tweek/skills/guard.py +30 -7
- {tweek-0.4.1.dist-info → tweek-0.4.2.dist-info}/METADATA +1 -1
- {tweek-0.4.1.dist-info → tweek-0.4.2.dist-info}/RECORD +37 -34
- {tweek-0.4.1.dist-info → tweek-0.4.2.dist-info}/WHEEL +0 -0
- {tweek-0.4.1.dist-info → tweek-0.4.2.dist-info}/entry_points.txt +0 -0
- {tweek-0.4.1.dist-info → tweek-0.4.2.dist-info}/licenses/LICENSE +0 -0
- {tweek-0.4.1.dist-info → tweek-0.4.2.dist-info}/licenses/NOTICE +0 -0
- {tweek-0.4.1.dist-info → tweek-0.4.2.dist-info}/top_level.txt +0 -0
tweek/memory/store.py
CHANGED
|
@@ -26,6 +26,7 @@ from tweek.memory.schemas import (
|
|
|
26
26
|
from tweek.memory.safety import (
|
|
27
27
|
MIN_APPROVAL_RATIO,
|
|
28
28
|
MIN_CONFIDENCE_SCORE,
|
|
29
|
+
MIN_DECISION_SPAN_HOURS,
|
|
29
30
|
MIN_DECISION_THRESHOLD,
|
|
30
31
|
SCOPED_THRESHOLDS,
|
|
31
32
|
compute_suggested_decision,
|
|
@@ -36,6 +37,12 @@ from tweek.memory.safety import (
|
|
|
36
37
|
# Half-life in days for time decay
|
|
37
38
|
DECAY_HALF_LIFE_DAYS = 30
|
|
38
39
|
|
|
40
|
+
# Valid table names for dynamic SQL (used by get_stats, export_all, clear_table)
|
|
41
|
+
_VALID_TABLES = frozenset({
|
|
42
|
+
"pattern_decisions", "source_trust", "workflow_baselines",
|
|
43
|
+
"learned_whitelists", "memory_audit",
|
|
44
|
+
})
|
|
45
|
+
|
|
39
46
|
# Default global memory DB path
|
|
40
47
|
GLOBAL_MEMORY_PATH = Path.home() / ".tweek" / "memory.db"
|
|
41
48
|
|
|
@@ -339,6 +346,7 @@ class MemoryStore:
|
|
|
339
346
|
SUM(CASE WHEN user_response = 'approved' THEN decay_weight ELSE 0 END)
|
|
340
347
|
/ SUM(decay_weight)
|
|
341
348
|
ELSE 0.5 END as approval_ratio,
|
|
349
|
+
MIN(timestamp) as first_decision,
|
|
342
350
|
MAX(timestamp) as last_decision
|
|
343
351
|
FROM pattern_decisions
|
|
344
352
|
WHERE {where_clause} AND decay_weight > 0.01
|
|
@@ -360,6 +368,23 @@ class MemoryStore:
|
|
|
360
368
|
if total_weighted < threshold:
|
|
361
369
|
continue
|
|
362
370
|
|
|
371
|
+
# Temporal spread: decisions must span MIN_DECISION_SPAN_HOURS
|
|
372
|
+
# to prevent rapid-fire approval bypasses
|
|
373
|
+
first_ts = row["first_decision"]
|
|
374
|
+
last_ts = row["last_decision"]
|
|
375
|
+
if first_ts and last_ts and first_ts != last_ts:
|
|
376
|
+
try:
|
|
377
|
+
t0 = datetime.fromisoformat(first_ts)
|
|
378
|
+
t1 = datetime.fromisoformat(last_ts)
|
|
379
|
+
span_hours = (t1 - t0).total_seconds() / 3600
|
|
380
|
+
if span_hours < MIN_DECISION_SPAN_HOURS:
|
|
381
|
+
continue
|
|
382
|
+
except (ValueError, TypeError):
|
|
383
|
+
pass # Malformed timestamps — skip check, don't block
|
|
384
|
+
elif total > 1:
|
|
385
|
+
# Multiple decisions with same timestamp — too rapid
|
|
386
|
+
continue
|
|
387
|
+
|
|
363
388
|
# Compute suggested decision with scope-specific threshold
|
|
364
389
|
suggested = compute_suggested_decision(
|
|
365
390
|
current_decision=current_decision,
|
|
@@ -818,8 +843,8 @@ class MemoryStore:
|
|
|
818
843
|
conn = self._get_connection()
|
|
819
844
|
stats = {}
|
|
820
845
|
|
|
821
|
-
for table in
|
|
822
|
-
|
|
846
|
+
for table in _VALID_TABLES:
|
|
847
|
+
# table names are from a frozen constant, safe for interpolation
|
|
823
848
|
row = conn.execute(f"SELECT COUNT(*) as cnt FROM {table}").fetchone()
|
|
824
849
|
stats[table] = row["cnt"]
|
|
825
850
|
|
|
@@ -879,8 +904,8 @@ class MemoryStore:
|
|
|
879
904
|
conn = self._get_connection()
|
|
880
905
|
data = {}
|
|
881
906
|
|
|
882
|
-
for table in (
|
|
883
|
-
|
|
907
|
+
for table in sorted(_VALID_TABLES - {"memory_audit"}):
|
|
908
|
+
# table names are from a frozen constant, safe for interpolation
|
|
884
909
|
rows = conn.execute(f"SELECT * FROM {table}").fetchall()
|
|
885
910
|
data[table] = [dict(r) for r in rows]
|
|
886
911
|
|
|
@@ -892,12 +917,8 @@ class MemoryStore:
|
|
|
892
917
|
|
|
893
918
|
Returns the number of deleted rows.
|
|
894
919
|
"""
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
"learned_whitelists", "memory_audit",
|
|
898
|
-
}
|
|
899
|
-
if table_name not in valid_tables:
|
|
900
|
-
raise ValueError(f"Invalid table: {table_name}. Must be one of {valid_tables}")
|
|
920
|
+
if table_name not in _VALID_TABLES:
|
|
921
|
+
raise ValueError(f"Invalid table: {table_name}. Must be one of {_VALID_TABLES}")
|
|
901
922
|
|
|
902
923
|
conn = self._get_connection()
|
|
903
924
|
cursor = conn.execute(f"DELETE FROM {table_name}")
|
tweek/plugins/base.py
CHANGED
|
@@ -59,11 +59,19 @@ class ReDoSProtection:
|
|
|
59
59
|
# Dangerous pattern indicators (simple heuristics)
|
|
60
60
|
# These are common patterns that can cause exponential backtracking
|
|
61
61
|
DANGEROUS_PATTERNS = [
|
|
62
|
-
# Nested quantifiers
|
|
62
|
+
# Nested quantifiers with dot
|
|
63
63
|
r'\(\.\*\)\+', # (.*)+
|
|
64
64
|
r'\(\.\+\)\+', # (.+)+
|
|
65
65
|
r'\(\.\*\)\*', # (.*)*
|
|
66
66
|
r'\(\.\+\)\*', # (.+)*
|
|
67
|
+
# Nested quantifiers with character classes
|
|
68
|
+
r'\(\[a-z[^\]]*\]\+\)\+', # ([a-z]+)+
|
|
69
|
+
r'\(\\w\+\)\+', # (\w+)+
|
|
70
|
+
r'\(\\d\+\)\+', # (\d+)+
|
|
71
|
+
r'\(\\s\+\)\+', # (\s+)+
|
|
72
|
+
# Multi-char groups with nested quantifiers
|
|
73
|
+
r'\(\.\{2,\}?\)\+', # (.{2,})+
|
|
74
|
+
r'\([^)]+\{[0-9,]+\}\)\+', # (x{n,m})+
|
|
67
75
|
# Overlapping alternation with quantifiers
|
|
68
76
|
r'\([^)]*\|[^)]*\)\+', # (a|a)+
|
|
69
77
|
r'\([^)]*\|[^)]*\)\*', # (a|a)*
|
|
@@ -9,11 +9,17 @@ Detects OpenClaw AI personal assistant:
|
|
|
9
9
|
- Potential proxy conflicts
|
|
10
10
|
"""
|
|
11
11
|
|
|
12
|
-
import os
|
|
13
|
-
import subprocess
|
|
14
12
|
import json
|
|
15
13
|
from pathlib import Path
|
|
16
|
-
from typing import
|
|
14
|
+
from typing import List, Dict, Any
|
|
15
|
+
|
|
16
|
+
from tweek.integrations.openclaw_detection import (
|
|
17
|
+
OPENCLAW_CONFIG,
|
|
18
|
+
OPENCLAW_DEFAULT_PORT,
|
|
19
|
+
check_gateway_active,
|
|
20
|
+
check_npm_installation,
|
|
21
|
+
check_running_process,
|
|
22
|
+
)
|
|
17
23
|
from tweek.plugins.base import ToolDetectorPlugin, DetectionResult
|
|
18
24
|
|
|
19
25
|
|
|
@@ -33,8 +39,8 @@ class OpenClawDetector(ToolDetectorPlugin):
|
|
|
33
39
|
AUTHOR = "Tweek"
|
|
34
40
|
REQUIRES_LICENSE = "free"
|
|
35
41
|
TAGS = ["detector", "openclaw", "assistant"]
|
|
42
|
+
DEFAULT_PORT = OPENCLAW_DEFAULT_PORT
|
|
36
43
|
|
|
37
|
-
DEFAULT_PORT = 18789
|
|
38
44
|
CONFIG_LOCATIONS = [
|
|
39
45
|
Path.home() / ".openclaw" / "openclaw.json",
|
|
40
46
|
]
|
|
@@ -44,15 +50,13 @@ class OpenClawDetector(ToolDetectorPlugin):
|
|
|
44
50
|
return "openclaw"
|
|
45
51
|
|
|
46
52
|
def detect(self) -> DetectionResult:
|
|
47
|
-
"""
|
|
48
|
-
Detect OpenClaw installation and status.
|
|
49
|
-
"""
|
|
53
|
+
"""Detect OpenClaw installation and status."""
|
|
50
54
|
result = DetectionResult(
|
|
51
55
|
detected=False,
|
|
52
56
|
tool_name=self.name,
|
|
53
57
|
)
|
|
54
58
|
|
|
55
|
-
# Check npm global installation
|
|
59
|
+
# Check npm global installation (via wrapper for testability)
|
|
56
60
|
npm_info = self._check_npm_installation()
|
|
57
61
|
if npm_info:
|
|
58
62
|
result.detected = True
|
|
@@ -69,16 +73,16 @@ class OpenClawDetector(ToolDetectorPlugin):
|
|
|
69
73
|
try:
|
|
70
74
|
with open(config_path) as f:
|
|
71
75
|
config = json.load(f)
|
|
72
|
-
result.port = config.get("gateway", {}).get("port",
|
|
76
|
+
result.port = config.get("gateway", {}).get("port", OPENCLAW_DEFAULT_PORT)
|
|
73
77
|
except (json.JSONDecodeError, IOError):
|
|
74
|
-
result.port =
|
|
78
|
+
result.port = OPENCLAW_DEFAULT_PORT
|
|
75
79
|
|
|
76
80
|
# Check for home directory existence
|
|
77
81
|
openclaw_home = Path.home() / ".openclaw"
|
|
78
82
|
if openclaw_home.exists():
|
|
79
83
|
result.detected = True
|
|
80
84
|
|
|
81
|
-
# Check for running process
|
|
85
|
+
# Check for running process (via wrapper for testability)
|
|
82
86
|
process_info = self._check_running_process()
|
|
83
87
|
if process_info:
|
|
84
88
|
result.detected = True
|
|
@@ -87,99 +91,33 @@ class OpenClawDetector(ToolDetectorPlugin):
|
|
|
87
91
|
if process_info.get("port"):
|
|
88
92
|
result.port = process_info["port"]
|
|
89
93
|
|
|
90
|
-
# Check if gateway is active
|
|
94
|
+
# Check if gateway is active (via wrapper for testability)
|
|
91
95
|
if result.port:
|
|
92
96
|
result.metadata["gateway_active"] = self._check_gateway_active(result.port)
|
|
93
97
|
|
|
94
98
|
return result
|
|
95
99
|
|
|
96
|
-
def
|
|
97
|
-
"""Check if openclaw is installed via npm."""
|
|
98
|
-
try:
|
|
99
|
-
# Try npm list -g
|
|
100
|
-
proc = subprocess.run(
|
|
101
|
-
["npm", "list", "-g", "openclaw", "--json"],
|
|
102
|
-
capture_output=True,
|
|
103
|
-
text=True,
|
|
104
|
-
timeout=10,
|
|
105
|
-
)
|
|
106
|
-
if proc.returncode == 0:
|
|
107
|
-
data = json.loads(proc.stdout)
|
|
108
|
-
deps = data.get("dependencies", {})
|
|
109
|
-
if "openclaw" in deps:
|
|
110
|
-
return {
|
|
111
|
-
"version": deps["openclaw"].get("version", "unknown"),
|
|
112
|
-
"path": data.get("path", ""),
|
|
113
|
-
}
|
|
114
|
-
except (subprocess.TimeoutExpired, json.JSONDecodeError, FileNotFoundError):
|
|
115
|
-
pass
|
|
116
|
-
|
|
117
|
-
# Try which/where
|
|
118
|
-
try:
|
|
119
|
-
proc = subprocess.run(
|
|
120
|
-
["which", "openclaw"] if os.name != "nt" else ["where", "openclaw"],
|
|
121
|
-
capture_output=True,
|
|
122
|
-
text=True,
|
|
123
|
-
timeout=5,
|
|
124
|
-
)
|
|
125
|
-
if proc.returncode == 0 and proc.stdout.strip():
|
|
126
|
-
return {"path": proc.stdout.strip().split("\n")[0]}
|
|
127
|
-
except (subprocess.TimeoutExpired, FileNotFoundError):
|
|
128
|
-
pass
|
|
129
|
-
|
|
130
|
-
return None
|
|
131
|
-
|
|
132
|
-
def _find_config(self) -> Optional[Path]:
|
|
100
|
+
def _find_config(self):
|
|
133
101
|
"""Find OpenClaw config file."""
|
|
134
102
|
for path in self.CONFIG_LOCATIONS:
|
|
135
103
|
if path.exists():
|
|
136
104
|
return path
|
|
137
105
|
return None
|
|
138
106
|
|
|
139
|
-
def
|
|
140
|
-
"""Check
|
|
141
|
-
|
|
142
|
-
if os.name == "nt":
|
|
143
|
-
# Windows
|
|
144
|
-
proc = subprocess.run(
|
|
145
|
-
["tasklist", "/FI", "IMAGENAME eq node.exe", "/FO", "CSV"],
|
|
146
|
-
capture_output=True,
|
|
147
|
-
text=True,
|
|
148
|
-
timeout=10,
|
|
149
|
-
)
|
|
150
|
-
if "openclaw" in proc.stdout.lower():
|
|
151
|
-
return {"running": True}
|
|
152
|
-
else:
|
|
153
|
-
# Unix-like
|
|
154
|
-
proc = subprocess.run(
|
|
155
|
-
["pgrep", "-f", "openclaw"],
|
|
156
|
-
capture_output=True,
|
|
157
|
-
text=True,
|
|
158
|
-
timeout=10,
|
|
159
|
-
)
|
|
160
|
-
if proc.returncode == 0 and proc.stdout.strip():
|
|
161
|
-
pids = proc.stdout.strip().split("\n")
|
|
162
|
-
return {"pid": pids[0]}
|
|
163
|
-
|
|
164
|
-
# Also check for node process with openclaw
|
|
165
|
-
proc = subprocess.run(
|
|
166
|
-
["pgrep", "-af", "node.*openclaw"],
|
|
167
|
-
capture_output=True,
|
|
168
|
-
text=True,
|
|
169
|
-
timeout=10,
|
|
170
|
-
)
|
|
171
|
-
if proc.returncode == 0 and proc.stdout.strip():
|
|
172
|
-
return {"running": True}
|
|
107
|
+
def _check_npm_installation(self) -> dict | None:
|
|
108
|
+
"""Check npm global installation (wrapper for shared detection)."""
|
|
109
|
+
return check_npm_installation()
|
|
173
110
|
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
return None
|
|
111
|
+
def _check_running_process(self) -> dict | None:
|
|
112
|
+
"""Check for running openclaw process (wrapper for shared detection)."""
|
|
113
|
+
return check_running_process()
|
|
178
114
|
|
|
179
|
-
def _check_gateway_active(self, port: int) -> bool:
|
|
180
|
-
"""Check if
|
|
115
|
+
def _check_gateway_active(self, port: int | None = None) -> bool:
|
|
116
|
+
"""Check if gateway is active on the given port."""
|
|
117
|
+
import socket
|
|
118
|
+
if port is None:
|
|
119
|
+
port = self.DEFAULT_PORT
|
|
181
120
|
try:
|
|
182
|
-
import socket
|
|
183
121
|
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
|
184
122
|
sock.settimeout(1)
|
|
185
123
|
result = sock.connect_ex(("127.0.0.1", port))
|
|
@@ -197,12 +135,13 @@ class OpenClawDetector(ToolDetectorPlugin):
|
|
|
197
135
|
if result.metadata.get("gateway_active"):
|
|
198
136
|
conflicts.append(
|
|
199
137
|
f"OpenClaw gateway is active on port {result.port}. "
|
|
200
|
-
"
|
|
138
|
+
"Both OpenClaw and Tweek will screen tool calls; "
|
|
139
|
+
"execution order depends on plugin configuration."
|
|
201
140
|
)
|
|
202
141
|
elif result.running:
|
|
203
142
|
conflicts.append(
|
|
204
143
|
"OpenClaw process is running. Gateway may start and "
|
|
205
|
-
"
|
|
144
|
+
"begin screening tool calls alongside Tweek."
|
|
206
145
|
)
|
|
207
146
|
|
|
208
147
|
return conflicts
|
|
@@ -70,6 +70,10 @@ _BENIGN_PATTERNS = [
|
|
|
70
70
|
]
|
|
71
71
|
]
|
|
72
72
|
|
|
73
|
+
# Command chaining operators -- presence means a "benign" prefix does not
|
|
74
|
+
# guarantee the entire command is benign (Finding F7 fix).
|
|
75
|
+
_CHAIN_OPERATORS_RE = re.compile(r"\s*(?:&&|\|\||;)\s*")
|
|
76
|
+
|
|
73
77
|
# Shell expansion patterns
|
|
74
78
|
_SHELL_EXPANSION_RE = re.compile(r"\$\(|\$\{|`[^`]+`|\beval\s|\bexec\s|\bsource\s")
|
|
75
79
|
|
|
@@ -212,8 +216,15 @@ class HeuristicScorerPlugin(ScreeningPlugin):
|
|
|
212
216
|
return re.split(r"[\s|;&()]+", content.lower())
|
|
213
217
|
|
|
214
218
|
def _is_benign(self, content: str) -> Optional[str]:
|
|
215
|
-
"""Check if content matches a known-benign pattern.
|
|
219
|
+
"""Check if content matches a known-benign pattern.
|
|
220
|
+
|
|
221
|
+
Returns None (not benign) if command chaining operators are detected,
|
|
222
|
+
since a benign prefix (e.g. 'git commit') does not make the entire
|
|
223
|
+
chained command benign (e.g. 'git commit && curl evil.com').
|
|
224
|
+
"""
|
|
216
225
|
stripped = content.strip()
|
|
226
|
+
if _CHAIN_OPERATORS_RE.search(stripped):
|
|
227
|
+
return None
|
|
217
228
|
for pattern in _BENIGN_PATTERNS:
|
|
218
229
|
if pattern.match(stripped):
|
|
219
230
|
return pattern.pattern
|
|
@@ -91,6 +91,15 @@ class LocalModelReviewerPlugin(ScreeningPlugin):
|
|
|
91
91
|
reason=f"Local model inference error: {e}",
|
|
92
92
|
)
|
|
93
93
|
|
|
94
|
+
# F6: Force cloud LLM escalation for dangerous-tier commands.
|
|
95
|
+
# A poisoned local model could produce high-confidence false negatives.
|
|
96
|
+
# When always_escalate_dangerous is enabled, override the local model's
|
|
97
|
+
# should_escalate to True for dangerous-tier commands.
|
|
98
|
+
tier = context.get("tier", "default")
|
|
99
|
+
always_escalate = (self._config or {}).get("always_escalate_dangerous", True)
|
|
100
|
+
if tier == "dangerous" and always_escalate and not result.should_escalate:
|
|
101
|
+
result.should_escalate = True
|
|
102
|
+
|
|
94
103
|
# Map risk levels to screening result
|
|
95
104
|
risk_severity_map = {
|
|
96
105
|
"safe": Severity.LOW,
|
tweek/security/language.py
CHANGED
|
@@ -229,7 +229,8 @@ def detect_non_english(content: str, min_confidence: float = 0.3) -> LanguageDet
|
|
|
229
229
|
)
|
|
230
230
|
extended_ratio = extended_count / max(total_alpha, 1)
|
|
231
231
|
|
|
232
|
-
|
|
232
|
+
_EXTENDED_LATIN_THRESHOLD = 0.12 # 12%+ accented characters suggests non-English
|
|
233
|
+
if extended_ratio >= _EXTENDED_LATIN_THRESHOLD:
|
|
233
234
|
detected_scripts.add("LATIN_EXTENDED")
|
|
234
235
|
confidence = min(1.0, extended_ratio * 5)
|
|
235
236
|
|
tweek/security/llm_reviewer.py
CHANGED
|
@@ -1138,6 +1138,7 @@ Do not include any other text or explanation."""
|
|
|
1138
1138
|
api_key_env: Optional[str] = None,
|
|
1139
1139
|
local_config: Optional[Dict[str, Any]] = None,
|
|
1140
1140
|
fallback_config: Optional[Dict[str, Any]] = None,
|
|
1141
|
+
fail_mode: str = "open",
|
|
1141
1142
|
):
|
|
1142
1143
|
"""Initialize the LLM reviewer.
|
|
1143
1144
|
|
|
@@ -1151,8 +1152,10 @@ Do not include any other text or explanation."""
|
|
|
1151
1152
|
api_key_env: Override which env var to read for the API key
|
|
1152
1153
|
local_config: Config for local LLM server detection (Ollama/LM Studio)
|
|
1153
1154
|
fallback_config: Config for fallback chain behavior
|
|
1155
|
+
fail_mode: Behavior when LLM unavailable: "open", "closed", or "escalate"
|
|
1154
1156
|
"""
|
|
1155
1157
|
self.timeout = timeout
|
|
1158
|
+
self._fail_mode = fail_mode
|
|
1156
1159
|
self._provider_instance: Optional[ReviewProvider] = None
|
|
1157
1160
|
|
|
1158
1161
|
if enabled:
|
|
@@ -1309,40 +1312,61 @@ Do not include any other text or explanation."""
|
|
|
1309
1312
|
)
|
|
1310
1313
|
|
|
1311
1314
|
except ReviewProviderError as e:
|
|
1312
|
-
# Infrastructure errors (auth, network, rate limit, timeout) should
|
|
1313
|
-
# NOT block the user with a scary dialog. Pattern matching is the
|
|
1314
|
-
# primary defense; LLM review is a supplementary layer. Gracefully
|
|
1315
|
-
# degrade and let pattern matching handle it.
|
|
1316
1315
|
import sys
|
|
1317
1316
|
error_type = "timeout" if e.is_timeout else "provider_error"
|
|
1318
1317
|
print(
|
|
1319
1318
|
f"tweek: LLM review unavailable ({self.provider_name}): {e}",
|
|
1320
1319
|
file=sys.stderr,
|
|
1321
1320
|
)
|
|
1322
|
-
return
|
|
1323
|
-
risk_level=RiskLevel.SAFE,
|
|
1324
|
-
reason=f"LLM review unavailable ({self.provider_name}): {e}",
|
|
1325
|
-
confidence=0.0,
|
|
1326
|
-
details={"error": error_type, "provider": self.provider_name,
|
|
1327
|
-
"graceful_degradation": True},
|
|
1328
|
-
should_prompt=False
|
|
1329
|
-
)
|
|
1321
|
+
return self._build_fail_result(error_type, str(e))
|
|
1330
1322
|
|
|
1331
1323
|
except Exception as e:
|
|
1332
|
-
# Unexpected error — also degrade gracefully. Pattern matching
|
|
1333
|
-
# already ran; don't punish the user for an LLM config issue.
|
|
1334
1324
|
import sys
|
|
1335
1325
|
print(
|
|
1336
1326
|
f"tweek: LLM review error: {e}",
|
|
1337
1327
|
file=sys.stderr,
|
|
1338
1328
|
)
|
|
1329
|
+
return self._build_fail_result("unexpected_error", str(e))
|
|
1330
|
+
|
|
1331
|
+
def _build_fail_result(self, error_type: str, error_msg: str) -> LLMReviewResult:
|
|
1332
|
+
"""Build an LLMReviewResult based on the configured fail_mode.
|
|
1333
|
+
|
|
1334
|
+
Args:
|
|
1335
|
+
error_type: Type of error (timeout, provider_error, unexpected_error)
|
|
1336
|
+
error_msg: Human-readable error message
|
|
1337
|
+
|
|
1338
|
+
Returns:
|
|
1339
|
+
LLMReviewResult configured per self._fail_mode:
|
|
1340
|
+
- "open": SAFE, should_prompt=False (default, backward compatible)
|
|
1341
|
+
- "closed": DANGEROUS, should_prompt=True (hard block)
|
|
1342
|
+
- "escalate": SUSPICIOUS, should_prompt=True (ask user)
|
|
1343
|
+
"""
|
|
1344
|
+
if self._fail_mode == "closed":
|
|
1345
|
+
return LLMReviewResult(
|
|
1346
|
+
risk_level=RiskLevel.DANGEROUS,
|
|
1347
|
+
reason=f"LLM review unavailable; fail-closed policy active ({error_msg})",
|
|
1348
|
+
confidence=0.0,
|
|
1349
|
+
details={"error": error_type, "provider": self.provider_name,
|
|
1350
|
+
"fail_mode": "closed"},
|
|
1351
|
+
should_prompt=True,
|
|
1352
|
+
)
|
|
1353
|
+
elif self._fail_mode == "escalate":
|
|
1354
|
+
return LLMReviewResult(
|
|
1355
|
+
risk_level=RiskLevel.SUSPICIOUS,
|
|
1356
|
+
reason=f"LLM review unavailable; escalating to user ({error_msg})",
|
|
1357
|
+
confidence=0.0,
|
|
1358
|
+
details={"error": error_type, "provider": self.provider_name,
|
|
1359
|
+
"fail_mode": "escalate"},
|
|
1360
|
+
should_prompt=True,
|
|
1361
|
+
)
|
|
1362
|
+
else: # "open" (default, backward compatible)
|
|
1339
1363
|
return LLMReviewResult(
|
|
1340
1364
|
risk_level=RiskLevel.SAFE,
|
|
1341
|
-
reason=f"LLM review unavailable (
|
|
1365
|
+
reason=f"LLM review unavailable ({self.provider_name}): {error_msg}",
|
|
1342
1366
|
confidence=0.0,
|
|
1343
|
-
details={"error":
|
|
1344
|
-
"graceful_degradation": True},
|
|
1345
|
-
should_prompt=False
|
|
1367
|
+
details={"error": error_type, "provider": self.provider_name,
|
|
1368
|
+
"graceful_degradation": True, "fail_mode": "open"},
|
|
1369
|
+
should_prompt=False,
|
|
1346
1370
|
)
|
|
1347
1371
|
|
|
1348
1372
|
# Translation prompt for non-English skill/content audit
|
|
@@ -1464,6 +1488,7 @@ def get_llm_reviewer(
|
|
|
1464
1488
|
# Load local/fallback config from tiers.yaml
|
|
1465
1489
|
local_config = None
|
|
1466
1490
|
fallback_config = None
|
|
1491
|
+
fail_mode = "open"
|
|
1467
1492
|
try:
|
|
1468
1493
|
import yaml
|
|
1469
1494
|
tiers_path = Path(__file__).parent.parent / "config" / "tiers.yaml"
|
|
@@ -1484,6 +1509,7 @@ def get_llm_reviewer(
|
|
|
1484
1509
|
api_key_env = llm_cfg.get("api_key_env")
|
|
1485
1510
|
if enabled:
|
|
1486
1511
|
enabled = llm_cfg.get("enabled", True)
|
|
1512
|
+
fail_mode = llm_cfg.get("fail_mode", "open")
|
|
1487
1513
|
except Exception:
|
|
1488
1514
|
pass # Config loading is best-effort
|
|
1489
1515
|
|
|
@@ -1495,6 +1521,7 @@ def get_llm_reviewer(
|
|
|
1495
1521
|
api_key_env=api_key_env,
|
|
1496
1522
|
local_config=local_config,
|
|
1497
1523
|
fallback_config=fallback_config,
|
|
1524
|
+
fail_mode=fail_mode,
|
|
1498
1525
|
)
|
|
1499
1526
|
return _llm_reviewer
|
|
1500
1527
|
|
tweek/security/local_model.py
CHANGED
|
@@ -88,6 +88,7 @@ class LocalModelInference:
|
|
|
88
88
|
self._tokenizer: Optional[object] = None # Tokenizer
|
|
89
89
|
self._lock = threading.Lock()
|
|
90
90
|
self._loaded = False
|
|
91
|
+
self._integrity_verified = False
|
|
91
92
|
|
|
92
93
|
# Load metadata
|
|
93
94
|
self._label_map: Dict[int, str] = {}
|
|
@@ -176,6 +177,26 @@ class LocalModelInference:
|
|
|
176
177
|
# Load metadata
|
|
177
178
|
self._load_metadata()
|
|
178
179
|
|
|
180
|
+
# Verify model file integrity (SHA-256 checksums)
|
|
181
|
+
if not self._integrity_verified:
|
|
182
|
+
try:
|
|
183
|
+
from tweek.security.model_registry import verify_model_hashes
|
|
184
|
+
hash_results = verify_model_hashes(self._model_name)
|
|
185
|
+
mismatched = [
|
|
186
|
+
f for f, status in hash_results.items()
|
|
187
|
+
if status == "mismatch"
|
|
188
|
+
]
|
|
189
|
+
if mismatched:
|
|
190
|
+
raise RuntimeError(
|
|
191
|
+
f"Model integrity check failed for: "
|
|
192
|
+
f"{', '.join(mismatched)}. "
|
|
193
|
+
f"Files may be corrupted or tampered with. "
|
|
194
|
+
f"Run 'tweek model download --force' to re-download."
|
|
195
|
+
)
|
|
196
|
+
self._integrity_verified = True
|
|
197
|
+
except ImportError:
|
|
198
|
+
pass # model_registry not available; skip verification
|
|
199
|
+
|
|
179
200
|
self._loaded = True
|
|
180
201
|
|
|
181
202
|
def is_loaded(self) -> bool:
|
tweek/security/model_registry.py
CHANGED
|
@@ -377,6 +377,8 @@ def verify_model(name: str) -> Dict[str, bool]:
|
|
|
377
377
|
|
|
378
378
|
status["model_meta.yaml"] = (model_dir / "model_meta.yaml").exists()
|
|
379
379
|
|
|
380
|
+
return status
|
|
381
|
+
|
|
380
382
|
|
|
381
383
|
def verify_model_hashes(name: str) -> Dict[str, Optional[str]]:
|
|
382
384
|
"""Verify SHA-256 integrity of an installed model's files.
|
|
@@ -413,8 +415,6 @@ def verify_model_hashes(name: str) -> Dict[str, Optional[str]]:
|
|
|
413
415
|
|
|
414
416
|
return results
|
|
415
417
|
|
|
416
|
-
return status
|
|
417
|
-
|
|
418
418
|
|
|
419
419
|
def get_model_size(name: str) -> Optional[int]:
|
|
420
420
|
"""Get the total size of an installed model in bytes.
|
tweek/security/rate_limiter.py
CHANGED
|
@@ -297,6 +297,101 @@ class CircuitBreaker:
|
|
|
297
297
|
return metrics
|
|
298
298
|
|
|
299
299
|
|
|
300
|
+
class PersistentCircuitBreaker(CircuitBreaker):
|
|
301
|
+
"""Circuit breaker with JSON file persistence across process invocations.
|
|
302
|
+
|
|
303
|
+
Uses fcntl.flock for safe concurrent access. Falls back to fresh
|
|
304
|
+
in-memory state if the persistence file is corrupted or inaccessible.
|
|
305
|
+
"""
|
|
306
|
+
|
|
307
|
+
def __init__(
|
|
308
|
+
self,
|
|
309
|
+
config: Optional[CircuitBreakerConfig] = None,
|
|
310
|
+
state_path: Optional[Path] = None,
|
|
311
|
+
):
|
|
312
|
+
super().__init__(config)
|
|
313
|
+
self._state_path = state_path or (Path.home() / ".tweek" / ".circuit_breaker.json")
|
|
314
|
+
|
|
315
|
+
def _load_states(self) -> None:
|
|
316
|
+
"""Load persisted states from JSON file under flock."""
|
|
317
|
+
import fcntl
|
|
318
|
+
try:
|
|
319
|
+
self._state_path.parent.mkdir(parents=True, exist_ok=True)
|
|
320
|
+
if not self._state_path.exists():
|
|
321
|
+
return
|
|
322
|
+
with open(self._state_path, "r") as f:
|
|
323
|
+
fcntl.flock(f, fcntl.LOCK_SH)
|
|
324
|
+
try:
|
|
325
|
+
raw = json.load(f)
|
|
326
|
+
finally:
|
|
327
|
+
fcntl.flock(f, fcntl.LOCK_UN)
|
|
328
|
+
for key, data in raw.items():
|
|
329
|
+
self._states[key] = CircuitBreakerState(
|
|
330
|
+
state=CircuitState(data.get("state", "closed")),
|
|
331
|
+
failure_count=data.get("failure_count", 0),
|
|
332
|
+
success_count=data.get("success_count", 0),
|
|
333
|
+
last_failure_time=(
|
|
334
|
+
datetime.fromisoformat(data["last_failure_time"])
|
|
335
|
+
if data.get("last_failure_time") else None
|
|
336
|
+
),
|
|
337
|
+
last_state_change=(
|
|
338
|
+
datetime.fromisoformat(data["last_state_change"])
|
|
339
|
+
if data.get("last_state_change") else None
|
|
340
|
+
),
|
|
341
|
+
half_open_requests=data.get("half_open_requests", 0),
|
|
342
|
+
)
|
|
343
|
+
except (json.JSONDecodeError, OSError, KeyError, ValueError):
|
|
344
|
+
pass # Corrupt file — start fresh
|
|
345
|
+
|
|
346
|
+
def _save_states(self) -> None:
|
|
347
|
+
"""Persist current states to JSON file under flock."""
|
|
348
|
+
import fcntl
|
|
349
|
+
try:
|
|
350
|
+
self._state_path.parent.mkdir(parents=True, exist_ok=True)
|
|
351
|
+
serializable = {}
|
|
352
|
+
for key, state in self._states.items():
|
|
353
|
+
serializable[key] = {
|
|
354
|
+
"state": state.state.value,
|
|
355
|
+
"failure_count": state.failure_count,
|
|
356
|
+
"success_count": state.success_count,
|
|
357
|
+
"last_failure_time": (
|
|
358
|
+
state.last_failure_time.isoformat()
|
|
359
|
+
if state.last_failure_time else None
|
|
360
|
+
),
|
|
361
|
+
"last_state_change": (
|
|
362
|
+
state.last_state_change.isoformat()
|
|
363
|
+
if state.last_state_change else None
|
|
364
|
+
),
|
|
365
|
+
"half_open_requests": state.half_open_requests,
|
|
366
|
+
}
|
|
367
|
+
with open(self._state_path, "w") as f:
|
|
368
|
+
fcntl.flock(f, fcntl.LOCK_EX)
|
|
369
|
+
try:
|
|
370
|
+
json.dump(serializable, f)
|
|
371
|
+
finally:
|
|
372
|
+
fcntl.flock(f, fcntl.LOCK_UN)
|
|
373
|
+
except OSError:
|
|
374
|
+
pass # Best-effort persistence
|
|
375
|
+
|
|
376
|
+
def record_success(self, key: str = "default") -> CircuitState:
|
|
377
|
+
self._load_states()
|
|
378
|
+
result = super().record_success(key)
|
|
379
|
+
self._save_states()
|
|
380
|
+
return result
|
|
381
|
+
|
|
382
|
+
def record_failure(self, key: str = "default") -> CircuitState:
|
|
383
|
+
self._load_states()
|
|
384
|
+
result = super().record_failure(key)
|
|
385
|
+
self._save_states()
|
|
386
|
+
return result
|
|
387
|
+
|
|
388
|
+
def can_execute(self, key: str = "default") -> Tuple[bool, CircuitState, Optional[int]]:
|
|
389
|
+
self._load_states()
|
|
390
|
+
result = super().can_execute(key)
|
|
391
|
+
self._save_states()
|
|
392
|
+
return result
|
|
393
|
+
|
|
394
|
+
|
|
300
395
|
class RateLimiter:
|
|
301
396
|
"""
|
|
302
397
|
Rate limiter for detecting resource theft and abuse patterns.
|
|
@@ -459,11 +554,14 @@ class RateLimiter:
|
|
|
459
554
|
RateLimitResult with allowed status and any violations
|
|
460
555
|
"""
|
|
461
556
|
if not session_id:
|
|
462
|
-
# No session ID - generate unique one per
|
|
557
|
+
# No session ID - generate unique one per invocation.
|
|
558
|
+
# os.urandom(16) adds 128 bits of entropy so each call is unique
|
|
559
|
+
# even with identical PID/CWD.
|
|
463
560
|
import os as _os
|
|
464
561
|
import uuid as _uuid
|
|
465
562
|
session_id = hashlib.sha256(
|
|
466
563
|
f"tweek-{_os.getpid()}-{_os.getcwd()}-{_uuid.getnode()}".encode()
|
|
564
|
+
+ _os.urandom(16)
|
|
467
565
|
).hexdigest()[:16]
|
|
468
566
|
|
|
469
567
|
# Check circuit breaker first
|