tweek 0.1.0__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tweek/__init__.py +2 -2
- tweek/_keygen.py +53 -0
- tweek/audit.py +288 -0
- tweek/cli.py +5398 -2392
- tweek/cli_model.py +380 -0
- tweek/config/families.yaml +609 -0
- tweek/config/manager.py +42 -5
- tweek/config/patterns.yaml +1510 -8
- tweek/config/tiers.yaml +161 -11
- tweek/diagnostics.py +71 -2
- tweek/hooks/break_glass.py +163 -0
- tweek/hooks/feedback.py +223 -0
- tweek/hooks/overrides.py +531 -0
- tweek/hooks/post_tool_use.py +472 -0
- tweek/hooks/pre_tool_use.py +1024 -62
- tweek/integrations/openclaw.py +443 -0
- tweek/integrations/openclaw_server.py +385 -0
- tweek/licensing.py +14 -54
- tweek/logging/bundle.py +2 -2
- tweek/logging/security_log.py +56 -13
- tweek/mcp/approval.py +57 -16
- tweek/mcp/proxy.py +18 -0
- tweek/mcp/screening.py +5 -5
- tweek/mcp/server.py +4 -1
- tweek/memory/__init__.py +24 -0
- tweek/memory/queries.py +223 -0
- tweek/memory/safety.py +140 -0
- tweek/memory/schemas.py +80 -0
- tweek/memory/store.py +989 -0
- tweek/platform/__init__.py +4 -4
- tweek/plugins/__init__.py +40 -24
- tweek/plugins/base.py +1 -1
- tweek/plugins/detectors/__init__.py +3 -3
- tweek/plugins/detectors/{moltbot.py → openclaw.py} +30 -27
- tweek/plugins/git_discovery.py +16 -4
- tweek/plugins/git_registry.py +8 -2
- tweek/plugins/git_security.py +21 -9
- tweek/plugins/screening/__init__.py +10 -1
- tweek/plugins/screening/heuristic_scorer.py +477 -0
- tweek/plugins/screening/llm_reviewer.py +14 -6
- tweek/plugins/screening/local_model_reviewer.py +161 -0
- tweek/proxy/__init__.py +38 -37
- tweek/proxy/addon.py +22 -3
- tweek/proxy/interceptor.py +1 -0
- tweek/proxy/server.py +4 -2
- tweek/sandbox/__init__.py +11 -0
- tweek/sandbox/docker_bridge.py +143 -0
- tweek/sandbox/executor.py +9 -6
- tweek/sandbox/layers.py +97 -0
- tweek/sandbox/linux.py +1 -0
- tweek/sandbox/project.py +548 -0
- tweek/sandbox/registry.py +149 -0
- tweek/security/__init__.py +9 -0
- tweek/security/language.py +250 -0
- tweek/security/llm_reviewer.py +1146 -60
- tweek/security/local_model.py +331 -0
- tweek/security/local_reviewer.py +146 -0
- tweek/security/model_registry.py +371 -0
- tweek/security/rate_limiter.py +11 -6
- tweek/security/secret_scanner.py +70 -4
- tweek/security/session_analyzer.py +26 -2
- tweek/skill_template/SKILL.md +200 -0
- tweek/skill_template/__init__.py +0 -0
- tweek/skill_template/cli-reference.md +331 -0
- tweek/skill_template/overrides-reference.md +184 -0
- tweek/skill_template/scripts/__init__.py +0 -0
- tweek/skill_template/scripts/check_installed.py +170 -0
- tweek/skills/__init__.py +38 -0
- tweek/skills/config.py +150 -0
- tweek/skills/fingerprints.py +198 -0
- tweek/skills/guard.py +293 -0
- tweek/skills/isolation.py +469 -0
- tweek/skills/scanner.py +715 -0
- tweek/vault/__init__.py +0 -1
- tweek/vault/cross_platform.py +12 -1
- tweek/vault/keychain.py +87 -29
- tweek-0.2.1.dist-info/METADATA +281 -0
- tweek-0.2.1.dist-info/RECORD +122 -0
- {tweek-0.1.0.dist-info → tweek-0.2.1.dist-info}/entry_points.txt +8 -1
- {tweek-0.1.0.dist-info → tweek-0.2.1.dist-info}/licenses/LICENSE +80 -0
- tweek-0.2.1.dist-info/top_level.txt +2 -0
- tweek-openclaw-plugin/node_modules/flatted/python/flatted.py +149 -0
- tweek/integrations/moltbot.py +0 -243
- tweek-0.1.0.dist-info/METADATA +0 -335
- tweek-0.1.0.dist-info/RECORD +0 -85
- tweek-0.1.0.dist-info/top_level.txt +0 -1
- {tweek-0.1.0.dist-info → tweek-0.2.1.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,477 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Tweek Heuristic Scorer Screening Plugin
|
|
3
|
+
|
|
4
|
+
Lightweight signal-based scoring for confidence-gated LLM escalation.
|
|
5
|
+
Runs between Layer 2 (regex) and Layer 3 (LLM) to detect novel attack
|
|
6
|
+
variants that don't match any of the 259 regex patterns but exhibit
|
|
7
|
+
suspicious characteristics.
|
|
8
|
+
|
|
9
|
+
Scoring signals (all local, no network, no LLM):
|
|
10
|
+
- Sensitive path tokens (from pattern family definitions)
|
|
11
|
+
- Exfiltration verbs
|
|
12
|
+
- Encoding/obfuscation tools
|
|
13
|
+
- Shell expansion/eval constructs
|
|
14
|
+
- Pipe chain complexity
|
|
15
|
+
- Combination bonuses (multiplicative)
|
|
16
|
+
- Known-benign dampening
|
|
17
|
+
|
|
18
|
+
FREE feature - available to all users.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from dataclasses import dataclass, field
|
|
22
|
+
from pathlib import Path
|
|
23
|
+
from typing import Optional, Dict, Any, List, Set, Tuple
|
|
24
|
+
import re
|
|
25
|
+
import yaml
|
|
26
|
+
|
|
27
|
+
from tweek.plugins.base import (
|
|
28
|
+
ScreeningPlugin,
|
|
29
|
+
ScreeningResult,
|
|
30
|
+
Finding,
|
|
31
|
+
Severity,
|
|
32
|
+
ActionType,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@dataclass
|
|
37
|
+
class HeuristicScore:
|
|
38
|
+
"""Result of heuristic scoring."""
|
|
39
|
+
|
|
40
|
+
total_score: float
|
|
41
|
+
signals: List[Dict[str, Any]]
|
|
42
|
+
family_scores: Dict[str, float]
|
|
43
|
+
threshold: float = 0.4
|
|
44
|
+
dampened: bool = False
|
|
45
|
+
dampening_reason: Optional[str] = None
|
|
46
|
+
|
|
47
|
+
@property
|
|
48
|
+
def should_escalate(self) -> bool:
|
|
49
|
+
return self.total_score >= self.threshold
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
# Pre-compiled benign command patterns
|
|
53
|
+
_BENIGN_PATTERNS = [
|
|
54
|
+
re.compile(p, re.IGNORECASE)
|
|
55
|
+
for p in [
|
|
56
|
+
r"^git\s+(commit|push|pull|fetch|clone|checkout|branch|merge|log|diff|status|add|stash|rebase|tag|remote|init)\b",
|
|
57
|
+
r"^npm\s+(install|test|run|build|start|ci|audit|outdated|ls|init)\b",
|
|
58
|
+
r"^yarn\s+(install|add|remove|build|test|start|dev)\b",
|
|
59
|
+
r"^pip3?\s+(install|list|show|freeze|check)\b",
|
|
60
|
+
r"^python[23]?\s+(-m\s+)?(pytest|unittest|pip|venv|http\.server|json\.tool)\b",
|
|
61
|
+
r"^(ls|pwd|cd|echo|mkdir|touch|date|which|type|man|help)\b",
|
|
62
|
+
r"^cargo\s+(build|test|run|check|fmt|clippy|doc|bench)\b",
|
|
63
|
+
r"^make(\s+|$)",
|
|
64
|
+
r"^docker\s+(build|run|compose|ps|images|logs|stop|start)\b",
|
|
65
|
+
r"^go\s+(build|test|run|mod|fmt|vet|generate)\b",
|
|
66
|
+
r"^rustc\b",
|
|
67
|
+
r"^gcc\b|^g\+\+\b|^clang\b",
|
|
68
|
+
r"^cat\s+\S+\.(py|js|ts|rs|go|java|c|cpp|h|rb|sh|md|txt|json|yaml|yml|toml|cfg|ini|html|css|xml|sql)\b",
|
|
69
|
+
r"^(ruff|black|prettier|eslint|flake8|mypy|pylint)\b",
|
|
70
|
+
]
|
|
71
|
+
]
|
|
72
|
+
|
|
73
|
+
# Shell expansion patterns
|
|
74
|
+
_SHELL_EXPANSION_RE = re.compile(r"\$\(|\$\{|`[^`]+`|\beval\s|\bexec\s|\bsource\s")
|
|
75
|
+
|
|
76
|
+
# Redirect to external patterns
|
|
77
|
+
_REDIRECT_EXTERNAL_RE = re.compile(r"/dev/tcp/|/dev/udp/|>\s*&\d|>\(\s*(curl|wget|nc|ncat)\b")
|
|
78
|
+
|
|
79
|
+
# Env var with secret name
|
|
80
|
+
_SECRET_ENV_RE = re.compile(
|
|
81
|
+
r"\$\{?(API_KEY|SECRET|TOKEN|PASSWORD|CREDENTIAL|PRIVATE_KEY|AUTH|"
|
|
82
|
+
r"AWS_SECRET|GITHUB_TOKEN|OPENAI_API_KEY|ANTHROPIC_API_KEY|"
|
|
83
|
+
r"DATABASE_URL|DB_PASSWORD|STRIPE_KEY|SENDGRID|TWILIO)[A-Z_]*\}?",
|
|
84
|
+
re.IGNORECASE,
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
class HeuristicScorerPlugin(ScreeningPlugin):
|
|
89
|
+
"""
|
|
90
|
+
Heuristic scorer screening plugin.
|
|
91
|
+
|
|
92
|
+
Uses cheap local signals to score commands for suspicious
|
|
93
|
+
characteristics. When the score exceeds a threshold, recommends
|
|
94
|
+
LLM escalation regardless of the tool's base tier.
|
|
95
|
+
|
|
96
|
+
FREE feature - available to all users.
|
|
97
|
+
"""
|
|
98
|
+
|
|
99
|
+
VERSION = "1.0.0"
|
|
100
|
+
DESCRIPTION = "Lightweight heuristic scoring for confidence-gated LLM escalation"
|
|
101
|
+
AUTHOR = "Tweek"
|
|
102
|
+
REQUIRES_LICENSE = "free"
|
|
103
|
+
TAGS = ["screening", "heuristic", "escalation"]
|
|
104
|
+
|
|
105
|
+
# --- Signal weights ---
|
|
106
|
+
WEIGHT_SENSITIVE_PATH = 0.25
|
|
107
|
+
WEIGHT_EXFIL_VERB = 0.20
|
|
108
|
+
WEIGHT_ENCODING_TOOL = 0.10
|
|
109
|
+
WEIGHT_SHELL_EXPANSION = 0.15
|
|
110
|
+
WEIGHT_PIPE_COMPLEXITY = 0.05 # per pipe beyond first
|
|
111
|
+
WEIGHT_REDIRECT_EXTERNAL = 0.20
|
|
112
|
+
WEIGHT_SECRET_ENV_VAR = 0.15
|
|
113
|
+
WEIGHT_EXFIL_TARGET = 0.30
|
|
114
|
+
|
|
115
|
+
# Combination bonuses (multiplicative)
|
|
116
|
+
COMBO_EXFIL_PLUS_SENSITIVE = 1.5
|
|
117
|
+
COMBO_ENCODING_PLUS_EXFIL = 1.3
|
|
118
|
+
COMBO_EXPANSION_PLUS_EXFIL = 1.4
|
|
119
|
+
|
|
120
|
+
# Known-benign dampening factor
|
|
121
|
+
BENIGN_DAMPENING = 0.8 # score *= (1 - 0.8) = 0.2
|
|
122
|
+
|
|
123
|
+
def __init__(self, config: Optional[Dict[str, Any]] = None):
|
|
124
|
+
super().__init__(config)
|
|
125
|
+
self._families: Optional[Dict] = None
|
|
126
|
+
self._sensitive_paths: Optional[Set[str]] = None
|
|
127
|
+
self._exfil_verbs: Optional[Set[str]] = None
|
|
128
|
+
self._exfil_targets: Optional[Set[str]] = None
|
|
129
|
+
self._encoding_tools: Optional[Set[str]] = None
|
|
130
|
+
self._threshold: float = self._config.get("threshold", 0.4)
|
|
131
|
+
self._enabled: bool = self._config.get("enabled", True)
|
|
132
|
+
self._log_all: bool = self._config.get("log_all_scores", False)
|
|
133
|
+
|
|
134
|
+
@property
|
|
135
|
+
def name(self) -> str:
|
|
136
|
+
return "heuristic_scorer"
|
|
137
|
+
|
|
138
|
+
def _load_families(self) -> Dict:
|
|
139
|
+
"""Load family definitions from YAML and build signal indices."""
|
|
140
|
+
if self._families is not None:
|
|
141
|
+
return self._families
|
|
142
|
+
|
|
143
|
+
# Try config path, then user path, then bundled
|
|
144
|
+
bundled = Path(__file__).parent.parent.parent / "config" / "families.yaml"
|
|
145
|
+
user_path = Path.home() / ".tweek" / "patterns" / "families.yaml"
|
|
146
|
+
|
|
147
|
+
path = None
|
|
148
|
+
if self._config.get("families_path"):
|
|
149
|
+
path = Path(self._config["families_path"])
|
|
150
|
+
elif user_path.exists():
|
|
151
|
+
path = user_path
|
|
152
|
+
elif bundled.exists():
|
|
153
|
+
path = bundled
|
|
154
|
+
|
|
155
|
+
if path and path.exists():
|
|
156
|
+
try:
|
|
157
|
+
with open(path) as f:
|
|
158
|
+
data = yaml.safe_load(f) or {}
|
|
159
|
+
self._families = data.get("families", {})
|
|
160
|
+
except (yaml.YAMLError, OSError):
|
|
161
|
+
self._families = {}
|
|
162
|
+
else:
|
|
163
|
+
self._families = {}
|
|
164
|
+
|
|
165
|
+
self._build_signal_indices()
|
|
166
|
+
return self._families
|
|
167
|
+
|
|
168
|
+
def _build_signal_indices(self):
|
|
169
|
+
"""Build lookup sets from all family heuristic_signals."""
|
|
170
|
+
self._sensitive_paths = set()
|
|
171
|
+
self._exfil_verbs = set()
|
|
172
|
+
self._exfil_targets = set()
|
|
173
|
+
self._encoding_tools = set()
|
|
174
|
+
|
|
175
|
+
for family_def in (self._families or {}).values():
|
|
176
|
+
signals = family_def.get("heuristic_signals", {})
|
|
177
|
+
|
|
178
|
+
# Sensitive paths from credential_theft, persistence, etc.
|
|
179
|
+
for key in ("sensitive_paths", "persistence_paths", "priv_paths"):
|
|
180
|
+
for token in signals.get(key, []):
|
|
181
|
+
self._sensitive_paths.add(token.lower())
|
|
182
|
+
|
|
183
|
+
# Exfil verbs
|
|
184
|
+
for token in signals.get("exfil_verbs", []):
|
|
185
|
+
self._exfil_verbs.add(token.lower())
|
|
186
|
+
|
|
187
|
+
# Exfil targets
|
|
188
|
+
for token in signals.get("exfil_targets", []):
|
|
189
|
+
self._exfil_targets.add(token.lower())
|
|
190
|
+
|
|
191
|
+
# Encoding tools
|
|
192
|
+
for token in signals.get("encoding_tools", []):
|
|
193
|
+
self._encoding_tools.add(token.lower())
|
|
194
|
+
|
|
195
|
+
# Add some baseline signals if families didn't provide any
|
|
196
|
+
if not self._sensitive_paths:
|
|
197
|
+
self._sensitive_paths = {
|
|
198
|
+
".ssh", ".aws", ".env", ".gnupg", ".kube", ".netrc",
|
|
199
|
+
"id_rsa", "id_ed25519", "credentials", "keychain",
|
|
200
|
+
}
|
|
201
|
+
if not self._exfil_verbs:
|
|
202
|
+
self._exfil_verbs = {
|
|
203
|
+
"curl", "wget", "nc", "ncat", "netcat", "socat",
|
|
204
|
+
"scp", "rsync", "ftp",
|
|
205
|
+
}
|
|
206
|
+
if not self._encoding_tools:
|
|
207
|
+
self._encoding_tools = {"base64", "xxd", "openssl", "gzip"}
|
|
208
|
+
|
|
209
|
+
def _tokenize(self, content: str) -> List[str]:
|
|
210
|
+
"""Split content into tokens for signal matching."""
|
|
211
|
+
# Split on whitespace, pipes, semicolons, ampersands, parentheses
|
|
212
|
+
return re.split(r"[\s|;&()]+", content.lower())
|
|
213
|
+
|
|
214
|
+
def _is_benign(self, content: str) -> Optional[str]:
|
|
215
|
+
"""Check if content matches a known-benign pattern."""
|
|
216
|
+
stripped = content.strip()
|
|
217
|
+
for pattern in _BENIGN_PATTERNS:
|
|
218
|
+
if pattern.match(stripped):
|
|
219
|
+
return pattern.pattern
|
|
220
|
+
return None
|
|
221
|
+
|
|
222
|
+
def _score_content(self, content: str) -> HeuristicScore:
|
|
223
|
+
"""Score content against heuristic signals."""
|
|
224
|
+
self._load_families()
|
|
225
|
+
|
|
226
|
+
content_lower = content.lower()
|
|
227
|
+
tokens = self._tokenize(content)
|
|
228
|
+
token_set = set(tokens)
|
|
229
|
+
|
|
230
|
+
signals: List[Dict[str, Any]] = []
|
|
231
|
+
family_scores: Dict[str, float] = {}
|
|
232
|
+
score = 0.0
|
|
233
|
+
|
|
234
|
+
# Track which signal categories fired (for combination bonuses)
|
|
235
|
+
has_sensitive_path = False
|
|
236
|
+
has_exfil_verb = False
|
|
237
|
+
has_encoding_tool = False
|
|
238
|
+
has_shell_expansion = False
|
|
239
|
+
|
|
240
|
+
# 1. Sensitive path scan
|
|
241
|
+
matched_paths = set()
|
|
242
|
+
for path_token in self._sensitive_paths:
|
|
243
|
+
if path_token in content_lower and path_token not in matched_paths:
|
|
244
|
+
matched_paths.add(path_token)
|
|
245
|
+
has_sensitive_path = True
|
|
246
|
+
if matched_paths:
|
|
247
|
+
score += self.WEIGHT_SENSITIVE_PATH
|
|
248
|
+
signals.append({
|
|
249
|
+
"name": "sensitive_path",
|
|
250
|
+
"weight": self.WEIGHT_SENSITIVE_PATH,
|
|
251
|
+
"matched": list(matched_paths)[:5],
|
|
252
|
+
})
|
|
253
|
+
|
|
254
|
+
# 2. Exfiltration verb scan
|
|
255
|
+
matched_verbs = token_set & self._exfil_verbs
|
|
256
|
+
if matched_verbs:
|
|
257
|
+
score += self.WEIGHT_EXFIL_VERB
|
|
258
|
+
has_exfil_verb = True
|
|
259
|
+
signals.append({
|
|
260
|
+
"name": "exfil_verb",
|
|
261
|
+
"weight": self.WEIGHT_EXFIL_VERB,
|
|
262
|
+
"matched": list(matched_verbs)[:5],
|
|
263
|
+
})
|
|
264
|
+
|
|
265
|
+
# 3. Exfil target scan
|
|
266
|
+
matched_targets = set()
|
|
267
|
+
for target in self._exfil_targets:
|
|
268
|
+
if target in content_lower:
|
|
269
|
+
matched_targets.add(target)
|
|
270
|
+
if matched_targets:
|
|
271
|
+
score += self.WEIGHT_EXFIL_TARGET
|
|
272
|
+
has_exfil_verb = True # treat target as exfil signal too
|
|
273
|
+
signals.append({
|
|
274
|
+
"name": "exfil_target",
|
|
275
|
+
"weight": self.WEIGHT_EXFIL_TARGET,
|
|
276
|
+
"matched": list(matched_targets)[:5],
|
|
277
|
+
})
|
|
278
|
+
|
|
279
|
+
# 4. Encoding tool scan
|
|
280
|
+
matched_encoding = token_set & self._encoding_tools
|
|
281
|
+
if matched_encoding:
|
|
282
|
+
score += self.WEIGHT_ENCODING_TOOL
|
|
283
|
+
has_encoding_tool = True
|
|
284
|
+
signals.append({
|
|
285
|
+
"name": "encoding_tool",
|
|
286
|
+
"weight": self.WEIGHT_ENCODING_TOOL,
|
|
287
|
+
"matched": list(matched_encoding),
|
|
288
|
+
})
|
|
289
|
+
|
|
290
|
+
# 5. Shell expansion scan
|
|
291
|
+
expansion_match = _SHELL_EXPANSION_RE.search(content)
|
|
292
|
+
if expansion_match:
|
|
293
|
+
score += self.WEIGHT_SHELL_EXPANSION
|
|
294
|
+
has_shell_expansion = True
|
|
295
|
+
signals.append({
|
|
296
|
+
"name": "shell_expansion",
|
|
297
|
+
"weight": self.WEIGHT_SHELL_EXPANSION,
|
|
298
|
+
"matched": [expansion_match.group()[:30]],
|
|
299
|
+
})
|
|
300
|
+
|
|
301
|
+
# 6. Pipe chain complexity
|
|
302
|
+
pipe_count = content.count("|")
|
|
303
|
+
if pipe_count > 1:
|
|
304
|
+
pipe_score = self.WEIGHT_PIPE_COMPLEXITY * (pipe_count - 1)
|
|
305
|
+
score += pipe_score
|
|
306
|
+
signals.append({
|
|
307
|
+
"name": "pipe_complexity",
|
|
308
|
+
"weight": pipe_score,
|
|
309
|
+
"matched": [f"{pipe_count} pipes"],
|
|
310
|
+
})
|
|
311
|
+
|
|
312
|
+
# 7. Redirect to external
|
|
313
|
+
if _REDIRECT_EXTERNAL_RE.search(content):
|
|
314
|
+
score += self.WEIGHT_REDIRECT_EXTERNAL
|
|
315
|
+
signals.append({
|
|
316
|
+
"name": "redirect_external",
|
|
317
|
+
"weight": self.WEIGHT_REDIRECT_EXTERNAL,
|
|
318
|
+
"matched": ["external redirect"],
|
|
319
|
+
})
|
|
320
|
+
|
|
321
|
+
# 8. Secret env var access
|
|
322
|
+
env_match = _SECRET_ENV_RE.search(content)
|
|
323
|
+
if env_match:
|
|
324
|
+
score += self.WEIGHT_SECRET_ENV_VAR
|
|
325
|
+
signals.append({
|
|
326
|
+
"name": "secret_env_var",
|
|
327
|
+
"weight": self.WEIGHT_SECRET_ENV_VAR,
|
|
328
|
+
"matched": [env_match.group()[:30]],
|
|
329
|
+
})
|
|
330
|
+
|
|
331
|
+
# 9. Combination bonuses (multiplicative)
|
|
332
|
+
if has_exfil_verb and has_sensitive_path:
|
|
333
|
+
score *= self.COMBO_EXFIL_PLUS_SENSITIVE
|
|
334
|
+
signals.append({
|
|
335
|
+
"name": "combo_exfil_sensitive",
|
|
336
|
+
"weight": self.COMBO_EXFIL_PLUS_SENSITIVE,
|
|
337
|
+
"matched": ["multiplicative"],
|
|
338
|
+
})
|
|
339
|
+
if has_encoding_tool and has_exfil_verb:
|
|
340
|
+
score *= self.COMBO_ENCODING_PLUS_EXFIL
|
|
341
|
+
signals.append({
|
|
342
|
+
"name": "combo_encoding_exfil",
|
|
343
|
+
"weight": self.COMBO_ENCODING_PLUS_EXFIL,
|
|
344
|
+
"matched": ["multiplicative"],
|
|
345
|
+
})
|
|
346
|
+
if has_shell_expansion and has_exfil_verb:
|
|
347
|
+
score *= self.COMBO_EXPANSION_PLUS_EXFIL
|
|
348
|
+
signals.append({
|
|
349
|
+
"name": "combo_expansion_exfil",
|
|
350
|
+
"weight": self.COMBO_EXPANSION_PLUS_EXFIL,
|
|
351
|
+
"matched": ["multiplicative"],
|
|
352
|
+
})
|
|
353
|
+
|
|
354
|
+
# 10. Per-family sub-scores
|
|
355
|
+
for family_name, family_def in (self._families or {}).items():
|
|
356
|
+
fam_signals = family_def.get("heuristic_signals", {})
|
|
357
|
+
fam_score = 0.0
|
|
358
|
+
all_family_tokens = set()
|
|
359
|
+
for token_list in fam_signals.values():
|
|
360
|
+
if isinstance(token_list, list):
|
|
361
|
+
for t in token_list:
|
|
362
|
+
all_family_tokens.add(t.lower())
|
|
363
|
+
|
|
364
|
+
hits = 0
|
|
365
|
+
for ft in all_family_tokens:
|
|
366
|
+
if ft in content_lower:
|
|
367
|
+
hits += 1
|
|
368
|
+
if all_family_tokens:
|
|
369
|
+
fam_score = hits / len(all_family_tokens)
|
|
370
|
+
family_scores[family_name] = round(fam_score, 3)
|
|
371
|
+
|
|
372
|
+
# 11. Known-benign dampening
|
|
373
|
+
dampened = False
|
|
374
|
+
dampening_reason = None
|
|
375
|
+
benign_match = self._is_benign(content)
|
|
376
|
+
if benign_match and score > 0:
|
|
377
|
+
score *= (1.0 - self.BENIGN_DAMPENING)
|
|
378
|
+
dampened = True
|
|
379
|
+
dampening_reason = f"Benign pattern: {benign_match[:50]}"
|
|
380
|
+
signals.append({
|
|
381
|
+
"name": "benign_dampening",
|
|
382
|
+
"weight": -(self.BENIGN_DAMPENING),
|
|
383
|
+
"matched": [dampening_reason],
|
|
384
|
+
})
|
|
385
|
+
|
|
386
|
+
# 12. Clamp
|
|
387
|
+
score = max(0.0, min(1.0, score))
|
|
388
|
+
|
|
389
|
+
return HeuristicScore(
|
|
390
|
+
total_score=round(score, 4),
|
|
391
|
+
signals=signals,
|
|
392
|
+
family_scores=family_scores,
|
|
393
|
+
threshold=self._threshold,
|
|
394
|
+
dampened=dampened,
|
|
395
|
+
dampening_reason=dampening_reason,
|
|
396
|
+
)
|
|
397
|
+
|
|
398
|
+
def screen(
|
|
399
|
+
self,
|
|
400
|
+
tool_name: str,
|
|
401
|
+
content: str,
|
|
402
|
+
context: Dict[str, Any],
|
|
403
|
+
) -> ScreeningResult:
|
|
404
|
+
"""
|
|
405
|
+
Score content and return escalation recommendation.
|
|
406
|
+
|
|
407
|
+
Unlike other screening plugins, this does not make a final
|
|
408
|
+
allow/block decision. It returns a score and an escalation
|
|
409
|
+
recommendation in the details dict.
|
|
410
|
+
"""
|
|
411
|
+
if not self._enabled:
|
|
412
|
+
return ScreeningResult(
|
|
413
|
+
allowed=True,
|
|
414
|
+
plugin_name=self.name,
|
|
415
|
+
reason="Heuristic scorer disabled",
|
|
416
|
+
risk_level="safe",
|
|
417
|
+
confidence=0.0,
|
|
418
|
+
details={"heuristic_score": 0.0, "should_escalate": False},
|
|
419
|
+
findings=[],
|
|
420
|
+
)
|
|
421
|
+
|
|
422
|
+
score = self._score_content(content)
|
|
423
|
+
|
|
424
|
+
if score.total_score < self._threshold:
|
|
425
|
+
return ScreeningResult(
|
|
426
|
+
allowed=True,
|
|
427
|
+
plugin_name=self.name,
|
|
428
|
+
risk_level="safe",
|
|
429
|
+
confidence=score.total_score,
|
|
430
|
+
details={
|
|
431
|
+
"heuristic_score": score.total_score,
|
|
432
|
+
"threshold": self._threshold,
|
|
433
|
+
"should_escalate": False,
|
|
434
|
+
"signals": score.signals,
|
|
435
|
+
"family_scores": score.family_scores,
|
|
436
|
+
},
|
|
437
|
+
findings=[],
|
|
438
|
+
)
|
|
439
|
+
|
|
440
|
+
# Score exceeds threshold — recommend LLM escalation
|
|
441
|
+
top_families = sorted(
|
|
442
|
+
score.family_scores.items(),
|
|
443
|
+
key=lambda x: x[1],
|
|
444
|
+
reverse=True,
|
|
445
|
+
)[:3]
|
|
446
|
+
|
|
447
|
+
top_family_name = top_families[0][0] if top_families else "unknown"
|
|
448
|
+
|
|
449
|
+
return ScreeningResult(
|
|
450
|
+
allowed=True, # Scorer does not block; it escalates
|
|
451
|
+
plugin_name=self.name,
|
|
452
|
+
reason=f"Heuristic score {score.total_score:.2f} exceeds threshold {self._threshold}",
|
|
453
|
+
risk_level="suspicious",
|
|
454
|
+
confidence=score.total_score,
|
|
455
|
+
should_prompt=False, # Don't prompt user directly; escalate to LLM
|
|
456
|
+
details={
|
|
457
|
+
"heuristic_score": score.total_score,
|
|
458
|
+
"threshold": self._threshold,
|
|
459
|
+
"should_escalate": True,
|
|
460
|
+
"top_families": top_families,
|
|
461
|
+
"signals": score.signals,
|
|
462
|
+
"family_scores": score.family_scores,
|
|
463
|
+
},
|
|
464
|
+
findings=[
|
|
465
|
+
Finding(
|
|
466
|
+
pattern_name="heuristic_escalation",
|
|
467
|
+
matched_text=content[:100],
|
|
468
|
+
severity=Severity.MEDIUM,
|
|
469
|
+
description=f"Near-miss heuristic: resembles {top_family_name} attack family",
|
|
470
|
+
recommended_action=ActionType.WARN,
|
|
471
|
+
metadata={
|
|
472
|
+
"score": score.total_score,
|
|
473
|
+
"families": dict(top_families),
|
|
474
|
+
},
|
|
475
|
+
)
|
|
476
|
+
],
|
|
477
|
+
)
|
|
@@ -9,7 +9,9 @@ Semantic analysis using LLM for risky/dangerous operations:
|
|
|
9
9
|
- Prompt injection indicators
|
|
10
10
|
- Privilege escalation attempts
|
|
11
11
|
|
|
12
|
-
|
|
12
|
+
Supports multiple providers: Anthropic, OpenAI, Google, and any
|
|
13
|
+
OpenAI-compatible endpoint. Free and open source. Requires an API key
|
|
14
|
+
for any supported provider (BYOK).
|
|
13
15
|
"""
|
|
14
16
|
|
|
15
17
|
from typing import Optional, Dict, Any, List
|
|
@@ -26,10 +28,12 @@ class LLMReviewerPlugin(ScreeningPlugin):
|
|
|
26
28
|
"""
|
|
27
29
|
LLM-based security reviewer plugin.
|
|
28
30
|
|
|
29
|
-
Uses a fast, cheap LLM
|
|
30
|
-
|
|
31
|
+
Uses a fast, cheap LLM to analyze commands that pass regex screening
|
|
32
|
+
but may still be malicious. Supports multiple providers: Anthropic
|
|
33
|
+
(Claude), OpenAI (GPT), Google (Gemini), and any OpenAI-compatible
|
|
34
|
+
endpoint (Ollama, LM Studio, Together, Groq, etc.).
|
|
31
35
|
|
|
32
|
-
Free and open source. Requires
|
|
36
|
+
Free and open source. Requires an API key for any supported provider (BYOK).
|
|
33
37
|
"""
|
|
34
38
|
|
|
35
39
|
VERSION = "1.0.0"
|
|
@@ -53,10 +57,13 @@ class LLMReviewerPlugin(ScreeningPlugin):
|
|
|
53
57
|
from tweek.security.llm_reviewer import LLMReviewer
|
|
54
58
|
|
|
55
59
|
self._reviewer = LLMReviewer(
|
|
56
|
-
model=self._config.get("model", "
|
|
60
|
+
model=self._config.get("model", "auto"),
|
|
57
61
|
api_key=self._config.get("api_key"),
|
|
58
62
|
timeout=self._config.get("timeout", 5.0),
|
|
59
63
|
enabled=self._config.get("enabled", True),
|
|
64
|
+
provider=self._config.get("provider", "auto"),
|
|
65
|
+
base_url=self._config.get("base_url"),
|
|
66
|
+
api_key_env=self._config.get("api_key_env"),
|
|
60
67
|
)
|
|
61
68
|
except ImportError:
|
|
62
69
|
pass
|
|
@@ -128,7 +135,8 @@ class LLMReviewerPlugin(ScreeningPlugin):
|
|
|
128
135
|
recommended_action=ActionType.ASK if result.should_prompt else ActionType.WARN,
|
|
129
136
|
metadata={
|
|
130
137
|
"confidence": result.confidence,
|
|
131
|
-
"model":
|
|
138
|
+
"model": result.details.get("model", "unknown"),
|
|
139
|
+
"provider": result.details.get("provider", "unknown"),
|
|
132
140
|
}
|
|
133
141
|
))
|
|
134
142
|
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Tweek Local Model Reviewer Screening Plugin
|
|
4
|
+
|
|
5
|
+
On-device prompt injection classifier using ONNX model.
|
|
6
|
+
No API key needed — inference runs entirely locally.
|
|
7
|
+
|
|
8
|
+
Requires optional dependencies: pip install tweek[local-models]
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from typing import Optional, Dict, Any
|
|
12
|
+
from tweek.plugins.base import (
|
|
13
|
+
ScreeningPlugin,
|
|
14
|
+
ScreeningResult,
|
|
15
|
+
Finding,
|
|
16
|
+
Severity,
|
|
17
|
+
ActionType,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class LocalModelReviewerPlugin(ScreeningPlugin):
|
|
22
|
+
"""
|
|
23
|
+
Local ONNX model screening plugin.
|
|
24
|
+
|
|
25
|
+
Uses a local prompt injection classifier for on-device security
|
|
26
|
+
analysis. No cloud API calls needed. Runs in ~20ms on CPU.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
VERSION = "1.0.0"
|
|
30
|
+
DESCRIPTION = "Local ONNX model for prompt injection detection"
|
|
31
|
+
AUTHOR = "Tweek"
|
|
32
|
+
REQUIRES_LICENSE = "free"
|
|
33
|
+
TAGS = ["screening", "local-model", "onnx", "prompt-injection"]
|
|
34
|
+
|
|
35
|
+
def __init__(self, config: Optional[Dict[str, Any]] = None):
|
|
36
|
+
super().__init__(config)
|
|
37
|
+
|
|
38
|
+
@property
|
|
39
|
+
def name(self) -> str:
|
|
40
|
+
return "local_model_reviewer"
|
|
41
|
+
|
|
42
|
+
def screen(
|
|
43
|
+
self,
|
|
44
|
+
tool_name: str,
|
|
45
|
+
content: str,
|
|
46
|
+
context: Dict[str, Any],
|
|
47
|
+
) -> ScreeningResult:
|
|
48
|
+
"""Screen content using local ONNX model.
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
tool_name: Name of the tool being invoked.
|
|
52
|
+
content: Command or content to analyze.
|
|
53
|
+
context: Should include 'tier', optionally 'tool_input'.
|
|
54
|
+
|
|
55
|
+
Returns:
|
|
56
|
+
ScreeningResult with local model analysis.
|
|
57
|
+
"""
|
|
58
|
+
try:
|
|
59
|
+
from tweek.security.local_model import (
|
|
60
|
+
LOCAL_MODEL_AVAILABLE,
|
|
61
|
+
get_local_model,
|
|
62
|
+
)
|
|
63
|
+
except ImportError:
|
|
64
|
+
return ScreeningResult(
|
|
65
|
+
allowed=True,
|
|
66
|
+
plugin_name=self.name,
|
|
67
|
+
reason="Local model dependencies not installed",
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
if not LOCAL_MODEL_AVAILABLE:
|
|
71
|
+
return ScreeningResult(
|
|
72
|
+
allowed=True,
|
|
73
|
+
plugin_name=self.name,
|
|
74
|
+
reason="Local model dependencies not installed",
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
model = get_local_model()
|
|
78
|
+
if model is None:
|
|
79
|
+
return ScreeningResult(
|
|
80
|
+
allowed=True,
|
|
81
|
+
plugin_name=self.name,
|
|
82
|
+
reason="Local model not downloaded",
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
try:
|
|
86
|
+
result = model.predict(content)
|
|
87
|
+
except Exception as e:
|
|
88
|
+
return ScreeningResult(
|
|
89
|
+
allowed=True,
|
|
90
|
+
plugin_name=self.name,
|
|
91
|
+
reason=f"Local model inference error: {e}",
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
# Map risk levels to screening result
|
|
95
|
+
risk_severity_map = {
|
|
96
|
+
"safe": Severity.LOW,
|
|
97
|
+
"suspicious": Severity.MEDIUM,
|
|
98
|
+
"dangerous": Severity.HIGH,
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
severity = risk_severity_map.get(result.risk_level, Severity.MEDIUM)
|
|
102
|
+
|
|
103
|
+
findings = []
|
|
104
|
+
if result.is_suspicious:
|
|
105
|
+
findings.append(
|
|
106
|
+
Finding(
|
|
107
|
+
pattern_name="local_model",
|
|
108
|
+
matched_text=content[:100],
|
|
109
|
+
severity=severity,
|
|
110
|
+
description=(
|
|
111
|
+
f"Local model ({result.model_name}): "
|
|
112
|
+
f"{result.label} ({result.confidence:.1%})"
|
|
113
|
+
),
|
|
114
|
+
recommended_action=(
|
|
115
|
+
ActionType.BLOCK
|
|
116
|
+
if result.is_dangerous and result.confidence > 0.9
|
|
117
|
+
else ActionType.ASK
|
|
118
|
+
),
|
|
119
|
+
metadata={
|
|
120
|
+
"confidence": result.confidence,
|
|
121
|
+
"model": result.model_name,
|
|
122
|
+
"label": result.label,
|
|
123
|
+
"inference_ms": result.inference_time_ms,
|
|
124
|
+
"all_scores": result.all_scores,
|
|
125
|
+
},
|
|
126
|
+
)
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
return ScreeningResult(
|
|
130
|
+
allowed=not result.is_dangerous,
|
|
131
|
+
plugin_name=self.name,
|
|
132
|
+
reason=(
|
|
133
|
+
f"Local model: {result.label} ({result.confidence:.1%})"
|
|
134
|
+
if result.is_suspicious
|
|
135
|
+
else "Local model: benign"
|
|
136
|
+
),
|
|
137
|
+
risk_level=result.risk_level,
|
|
138
|
+
confidence=result.confidence,
|
|
139
|
+
should_prompt=result.is_suspicious,
|
|
140
|
+
findings=findings,
|
|
141
|
+
details={
|
|
142
|
+
"model": result.model_name,
|
|
143
|
+
"label": result.label,
|
|
144
|
+
"inference_ms": result.inference_time_ms,
|
|
145
|
+
"all_scores": result.all_scores,
|
|
146
|
+
},
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
def is_available(self) -> bool:
|
|
150
|
+
"""Check if local model is available."""
|
|
151
|
+
try:
|
|
152
|
+
from tweek.security.local_model import (
|
|
153
|
+
LOCAL_MODEL_AVAILABLE,
|
|
154
|
+
get_local_model,
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
if not LOCAL_MODEL_AVAILABLE:
|
|
158
|
+
return False
|
|
159
|
+
return get_local_model() is not None
|
|
160
|
+
except ImportError:
|
|
161
|
+
return False
|