tweek 0.1.0__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. tweek/__init__.py +2 -2
  2. tweek/_keygen.py +53 -0
  3. tweek/audit.py +288 -0
  4. tweek/cli.py +5398 -2392
  5. tweek/cli_model.py +380 -0
  6. tweek/config/families.yaml +609 -0
  7. tweek/config/manager.py +42 -5
  8. tweek/config/patterns.yaml +1510 -8
  9. tweek/config/tiers.yaml +161 -11
  10. tweek/diagnostics.py +71 -2
  11. tweek/hooks/break_glass.py +163 -0
  12. tweek/hooks/feedback.py +223 -0
  13. tweek/hooks/overrides.py +531 -0
  14. tweek/hooks/post_tool_use.py +472 -0
  15. tweek/hooks/pre_tool_use.py +1024 -62
  16. tweek/integrations/openclaw.py +443 -0
  17. tweek/integrations/openclaw_server.py +385 -0
  18. tweek/licensing.py +14 -54
  19. tweek/logging/bundle.py +2 -2
  20. tweek/logging/security_log.py +56 -13
  21. tweek/mcp/approval.py +57 -16
  22. tweek/mcp/proxy.py +18 -0
  23. tweek/mcp/screening.py +5 -5
  24. tweek/mcp/server.py +4 -1
  25. tweek/memory/__init__.py +24 -0
  26. tweek/memory/queries.py +223 -0
  27. tweek/memory/safety.py +140 -0
  28. tweek/memory/schemas.py +80 -0
  29. tweek/memory/store.py +989 -0
  30. tweek/platform/__init__.py +4 -4
  31. tweek/plugins/__init__.py +40 -24
  32. tweek/plugins/base.py +1 -1
  33. tweek/plugins/detectors/__init__.py +3 -3
  34. tweek/plugins/detectors/{moltbot.py → openclaw.py} +30 -27
  35. tweek/plugins/git_discovery.py +16 -4
  36. tweek/plugins/git_registry.py +8 -2
  37. tweek/plugins/git_security.py +21 -9
  38. tweek/plugins/screening/__init__.py +10 -1
  39. tweek/plugins/screening/heuristic_scorer.py +477 -0
  40. tweek/plugins/screening/llm_reviewer.py +14 -6
  41. tweek/plugins/screening/local_model_reviewer.py +161 -0
  42. tweek/proxy/__init__.py +38 -37
  43. tweek/proxy/addon.py +22 -3
  44. tweek/proxy/interceptor.py +1 -0
  45. tweek/proxy/server.py +4 -2
  46. tweek/sandbox/__init__.py +11 -0
  47. tweek/sandbox/docker_bridge.py +143 -0
  48. tweek/sandbox/executor.py +9 -6
  49. tweek/sandbox/layers.py +97 -0
  50. tweek/sandbox/linux.py +1 -0
  51. tweek/sandbox/project.py +548 -0
  52. tweek/sandbox/registry.py +149 -0
  53. tweek/security/__init__.py +9 -0
  54. tweek/security/language.py +250 -0
  55. tweek/security/llm_reviewer.py +1146 -60
  56. tweek/security/local_model.py +331 -0
  57. tweek/security/local_reviewer.py +146 -0
  58. tweek/security/model_registry.py +371 -0
  59. tweek/security/rate_limiter.py +11 -6
  60. tweek/security/secret_scanner.py +70 -4
  61. tweek/security/session_analyzer.py +26 -2
  62. tweek/skill_template/SKILL.md +200 -0
  63. tweek/skill_template/__init__.py +0 -0
  64. tweek/skill_template/cli-reference.md +331 -0
  65. tweek/skill_template/overrides-reference.md +184 -0
  66. tweek/skill_template/scripts/__init__.py +0 -0
  67. tweek/skill_template/scripts/check_installed.py +170 -0
  68. tweek/skills/__init__.py +38 -0
  69. tweek/skills/config.py +150 -0
  70. tweek/skills/fingerprints.py +198 -0
  71. tweek/skills/guard.py +293 -0
  72. tweek/skills/isolation.py +469 -0
  73. tweek/skills/scanner.py +715 -0
  74. tweek/vault/__init__.py +0 -1
  75. tweek/vault/cross_platform.py +12 -1
  76. tweek/vault/keychain.py +87 -29
  77. tweek-0.2.1.dist-info/METADATA +281 -0
  78. tweek-0.2.1.dist-info/RECORD +122 -0
  79. {tweek-0.1.0.dist-info → tweek-0.2.1.dist-info}/entry_points.txt +8 -1
  80. {tweek-0.1.0.dist-info → tweek-0.2.1.dist-info}/licenses/LICENSE +80 -0
  81. tweek-0.2.1.dist-info/top_level.txt +2 -0
  82. tweek-openclaw-plugin/node_modules/flatted/python/flatted.py +149 -0
  83. tweek/integrations/moltbot.py +0 -243
  84. tweek-0.1.0.dist-info/METADATA +0 -335
  85. tweek-0.1.0.dist-info/RECORD +0 -85
  86. tweek-0.1.0.dist-info/top_level.txt +0 -1
  87. {tweek-0.1.0.dist-info → tweek-0.2.1.dist-info}/WHEEL +0 -0
@@ -0,0 +1,477 @@
1
+ """
2
+ Tweek Heuristic Scorer Screening Plugin
3
+
4
+ Lightweight signal-based scoring for confidence-gated LLM escalation.
5
+ Runs between Layer 2 (regex) and Layer 3 (LLM) to detect novel attack
6
+ variants that don't match any of the 259 regex patterns but exhibit
7
+ suspicious characteristics.
8
+
9
+ Scoring signals (all local, no network, no LLM):
10
+ - Sensitive path tokens (from pattern family definitions)
11
+ - Exfiltration verbs
12
+ - Encoding/obfuscation tools
13
+ - Shell expansion/eval constructs
14
+ - Pipe chain complexity
15
+ - Combination bonuses (multiplicative)
16
+ - Known-benign dampening
17
+
18
+ FREE feature - available to all users.
19
+ """
20
+
21
+ from dataclasses import dataclass, field
22
+ from pathlib import Path
23
+ from typing import Optional, Dict, Any, List, Set, Tuple
24
+ import re
25
+ import yaml
26
+
27
+ from tweek.plugins.base import (
28
+ ScreeningPlugin,
29
+ ScreeningResult,
30
+ Finding,
31
+ Severity,
32
+ ActionType,
33
+ )
34
+
35
+
36
+ @dataclass
37
+ class HeuristicScore:
38
+ """Result of heuristic scoring."""
39
+
40
+ total_score: float
41
+ signals: List[Dict[str, Any]]
42
+ family_scores: Dict[str, float]
43
+ threshold: float = 0.4
44
+ dampened: bool = False
45
+ dampening_reason: Optional[str] = None
46
+
47
+ @property
48
+ def should_escalate(self) -> bool:
49
+ return self.total_score >= self.threshold
50
+
51
+
52
+ # Pre-compiled benign command patterns
53
+ _BENIGN_PATTERNS = [
54
+ re.compile(p, re.IGNORECASE)
55
+ for p in [
56
+ r"^git\s+(commit|push|pull|fetch|clone|checkout|branch|merge|log|diff|status|add|stash|rebase|tag|remote|init)\b",
57
+ r"^npm\s+(install|test|run|build|start|ci|audit|outdated|ls|init)\b",
58
+ r"^yarn\s+(install|add|remove|build|test|start|dev)\b",
59
+ r"^pip3?\s+(install|list|show|freeze|check)\b",
60
+ r"^python[23]?\s+(-m\s+)?(pytest|unittest|pip|venv|http\.server|json\.tool)\b",
61
+ r"^(ls|pwd|cd|echo|mkdir|touch|date|which|type|man|help)\b",
62
+ r"^cargo\s+(build|test|run|check|fmt|clippy|doc|bench)\b",
63
+ r"^make(\s+|$)",
64
+ r"^docker\s+(build|run|compose|ps|images|logs|stop|start)\b",
65
+ r"^go\s+(build|test|run|mod|fmt|vet|generate)\b",
66
+ r"^rustc\b",
67
+ r"^gcc\b|^g\+\+\b|^clang\b",
68
+ r"^cat\s+\S+\.(py|js|ts|rs|go|java|c|cpp|h|rb|sh|md|txt|json|yaml|yml|toml|cfg|ini|html|css|xml|sql)\b",
69
+ r"^(ruff|black|prettier|eslint|flake8|mypy|pylint)\b",
70
+ ]
71
+ ]
72
+
73
+ # Shell expansion patterns
74
+ _SHELL_EXPANSION_RE = re.compile(r"\$\(|\$\{|`[^`]+`|\beval\s|\bexec\s|\bsource\s")
75
+
76
+ # Redirect to external patterns
77
+ _REDIRECT_EXTERNAL_RE = re.compile(r"/dev/tcp/|/dev/udp/|>\s*&\d|>\(\s*(curl|wget|nc|ncat)\b")
78
+
79
+ # Env var with secret name
80
+ _SECRET_ENV_RE = re.compile(
81
+ r"\$\{?(API_KEY|SECRET|TOKEN|PASSWORD|CREDENTIAL|PRIVATE_KEY|AUTH|"
82
+ r"AWS_SECRET|GITHUB_TOKEN|OPENAI_API_KEY|ANTHROPIC_API_KEY|"
83
+ r"DATABASE_URL|DB_PASSWORD|STRIPE_KEY|SENDGRID|TWILIO)[A-Z_]*\}?",
84
+ re.IGNORECASE,
85
+ )
86
+
87
+
88
+ class HeuristicScorerPlugin(ScreeningPlugin):
89
+ """
90
+ Heuristic scorer screening plugin.
91
+
92
+ Uses cheap local signals to score commands for suspicious
93
+ characteristics. When the score exceeds a threshold, recommends
94
+ LLM escalation regardless of the tool's base tier.
95
+
96
+ FREE feature - available to all users.
97
+ """
98
+
99
+ VERSION = "1.0.0"
100
+ DESCRIPTION = "Lightweight heuristic scoring for confidence-gated LLM escalation"
101
+ AUTHOR = "Tweek"
102
+ REQUIRES_LICENSE = "free"
103
+ TAGS = ["screening", "heuristic", "escalation"]
104
+
105
+ # --- Signal weights ---
106
+ WEIGHT_SENSITIVE_PATH = 0.25
107
+ WEIGHT_EXFIL_VERB = 0.20
108
+ WEIGHT_ENCODING_TOOL = 0.10
109
+ WEIGHT_SHELL_EXPANSION = 0.15
110
+ WEIGHT_PIPE_COMPLEXITY = 0.05 # per pipe beyond first
111
+ WEIGHT_REDIRECT_EXTERNAL = 0.20
112
+ WEIGHT_SECRET_ENV_VAR = 0.15
113
+ WEIGHT_EXFIL_TARGET = 0.30
114
+
115
+ # Combination bonuses (multiplicative)
116
+ COMBO_EXFIL_PLUS_SENSITIVE = 1.5
117
+ COMBO_ENCODING_PLUS_EXFIL = 1.3
118
+ COMBO_EXPANSION_PLUS_EXFIL = 1.4
119
+
120
+ # Known-benign dampening factor
121
+ BENIGN_DAMPENING = 0.8 # score *= (1 - 0.8) = 0.2
122
+
123
+ def __init__(self, config: Optional[Dict[str, Any]] = None):
124
+ super().__init__(config)
125
+ self._families: Optional[Dict] = None
126
+ self._sensitive_paths: Optional[Set[str]] = None
127
+ self._exfil_verbs: Optional[Set[str]] = None
128
+ self._exfil_targets: Optional[Set[str]] = None
129
+ self._encoding_tools: Optional[Set[str]] = None
130
+ self._threshold: float = self._config.get("threshold", 0.4)
131
+ self._enabled: bool = self._config.get("enabled", True)
132
+ self._log_all: bool = self._config.get("log_all_scores", False)
133
+
134
+ @property
135
+ def name(self) -> str:
136
+ return "heuristic_scorer"
137
+
138
+ def _load_families(self) -> Dict:
139
+ """Load family definitions from YAML and build signal indices."""
140
+ if self._families is not None:
141
+ return self._families
142
+
143
+ # Try config path, then user path, then bundled
144
+ bundled = Path(__file__).parent.parent.parent / "config" / "families.yaml"
145
+ user_path = Path.home() / ".tweek" / "patterns" / "families.yaml"
146
+
147
+ path = None
148
+ if self._config.get("families_path"):
149
+ path = Path(self._config["families_path"])
150
+ elif user_path.exists():
151
+ path = user_path
152
+ elif bundled.exists():
153
+ path = bundled
154
+
155
+ if path and path.exists():
156
+ try:
157
+ with open(path) as f:
158
+ data = yaml.safe_load(f) or {}
159
+ self._families = data.get("families", {})
160
+ except (yaml.YAMLError, OSError):
161
+ self._families = {}
162
+ else:
163
+ self._families = {}
164
+
165
+ self._build_signal_indices()
166
+ return self._families
167
+
168
+ def _build_signal_indices(self):
169
+ """Build lookup sets from all family heuristic_signals."""
170
+ self._sensitive_paths = set()
171
+ self._exfil_verbs = set()
172
+ self._exfil_targets = set()
173
+ self._encoding_tools = set()
174
+
175
+ for family_def in (self._families or {}).values():
176
+ signals = family_def.get("heuristic_signals", {})
177
+
178
+ # Sensitive paths from credential_theft, persistence, etc.
179
+ for key in ("sensitive_paths", "persistence_paths", "priv_paths"):
180
+ for token in signals.get(key, []):
181
+ self._sensitive_paths.add(token.lower())
182
+
183
+ # Exfil verbs
184
+ for token in signals.get("exfil_verbs", []):
185
+ self._exfil_verbs.add(token.lower())
186
+
187
+ # Exfil targets
188
+ for token in signals.get("exfil_targets", []):
189
+ self._exfil_targets.add(token.lower())
190
+
191
+ # Encoding tools
192
+ for token in signals.get("encoding_tools", []):
193
+ self._encoding_tools.add(token.lower())
194
+
195
+ # Add some baseline signals if families didn't provide any
196
+ if not self._sensitive_paths:
197
+ self._sensitive_paths = {
198
+ ".ssh", ".aws", ".env", ".gnupg", ".kube", ".netrc",
199
+ "id_rsa", "id_ed25519", "credentials", "keychain",
200
+ }
201
+ if not self._exfil_verbs:
202
+ self._exfil_verbs = {
203
+ "curl", "wget", "nc", "ncat", "netcat", "socat",
204
+ "scp", "rsync", "ftp",
205
+ }
206
+ if not self._encoding_tools:
207
+ self._encoding_tools = {"base64", "xxd", "openssl", "gzip"}
208
+
209
+ def _tokenize(self, content: str) -> List[str]:
210
+ """Split content into tokens for signal matching."""
211
+ # Split on whitespace, pipes, semicolons, ampersands, parentheses
212
+ return re.split(r"[\s|;&()]+", content.lower())
213
+
214
+ def _is_benign(self, content: str) -> Optional[str]:
215
+ """Check if content matches a known-benign pattern."""
216
+ stripped = content.strip()
217
+ for pattern in _BENIGN_PATTERNS:
218
+ if pattern.match(stripped):
219
+ return pattern.pattern
220
+ return None
221
+
222
+ def _score_content(self, content: str) -> HeuristicScore:
223
+ """Score content against heuristic signals."""
224
+ self._load_families()
225
+
226
+ content_lower = content.lower()
227
+ tokens = self._tokenize(content)
228
+ token_set = set(tokens)
229
+
230
+ signals: List[Dict[str, Any]] = []
231
+ family_scores: Dict[str, float] = {}
232
+ score = 0.0
233
+
234
+ # Track which signal categories fired (for combination bonuses)
235
+ has_sensitive_path = False
236
+ has_exfil_verb = False
237
+ has_encoding_tool = False
238
+ has_shell_expansion = False
239
+
240
+ # 1. Sensitive path scan
241
+ matched_paths = set()
242
+ for path_token in self._sensitive_paths:
243
+ if path_token in content_lower and path_token not in matched_paths:
244
+ matched_paths.add(path_token)
245
+ has_sensitive_path = True
246
+ if matched_paths:
247
+ score += self.WEIGHT_SENSITIVE_PATH
248
+ signals.append({
249
+ "name": "sensitive_path",
250
+ "weight": self.WEIGHT_SENSITIVE_PATH,
251
+ "matched": list(matched_paths)[:5],
252
+ })
253
+
254
+ # 2. Exfiltration verb scan
255
+ matched_verbs = token_set & self._exfil_verbs
256
+ if matched_verbs:
257
+ score += self.WEIGHT_EXFIL_VERB
258
+ has_exfil_verb = True
259
+ signals.append({
260
+ "name": "exfil_verb",
261
+ "weight": self.WEIGHT_EXFIL_VERB,
262
+ "matched": list(matched_verbs)[:5],
263
+ })
264
+
265
+ # 3. Exfil target scan
266
+ matched_targets = set()
267
+ for target in self._exfil_targets:
268
+ if target in content_lower:
269
+ matched_targets.add(target)
270
+ if matched_targets:
271
+ score += self.WEIGHT_EXFIL_TARGET
272
+ has_exfil_verb = True # treat target as exfil signal too
273
+ signals.append({
274
+ "name": "exfil_target",
275
+ "weight": self.WEIGHT_EXFIL_TARGET,
276
+ "matched": list(matched_targets)[:5],
277
+ })
278
+
279
+ # 4. Encoding tool scan
280
+ matched_encoding = token_set & self._encoding_tools
281
+ if matched_encoding:
282
+ score += self.WEIGHT_ENCODING_TOOL
283
+ has_encoding_tool = True
284
+ signals.append({
285
+ "name": "encoding_tool",
286
+ "weight": self.WEIGHT_ENCODING_TOOL,
287
+ "matched": list(matched_encoding),
288
+ })
289
+
290
+ # 5. Shell expansion scan
291
+ expansion_match = _SHELL_EXPANSION_RE.search(content)
292
+ if expansion_match:
293
+ score += self.WEIGHT_SHELL_EXPANSION
294
+ has_shell_expansion = True
295
+ signals.append({
296
+ "name": "shell_expansion",
297
+ "weight": self.WEIGHT_SHELL_EXPANSION,
298
+ "matched": [expansion_match.group()[:30]],
299
+ })
300
+
301
+ # 6. Pipe chain complexity
302
+ pipe_count = content.count("|")
303
+ if pipe_count > 1:
304
+ pipe_score = self.WEIGHT_PIPE_COMPLEXITY * (pipe_count - 1)
305
+ score += pipe_score
306
+ signals.append({
307
+ "name": "pipe_complexity",
308
+ "weight": pipe_score,
309
+ "matched": [f"{pipe_count} pipes"],
310
+ })
311
+
312
+ # 7. Redirect to external
313
+ if _REDIRECT_EXTERNAL_RE.search(content):
314
+ score += self.WEIGHT_REDIRECT_EXTERNAL
315
+ signals.append({
316
+ "name": "redirect_external",
317
+ "weight": self.WEIGHT_REDIRECT_EXTERNAL,
318
+ "matched": ["external redirect"],
319
+ })
320
+
321
+ # 8. Secret env var access
322
+ env_match = _SECRET_ENV_RE.search(content)
323
+ if env_match:
324
+ score += self.WEIGHT_SECRET_ENV_VAR
325
+ signals.append({
326
+ "name": "secret_env_var",
327
+ "weight": self.WEIGHT_SECRET_ENV_VAR,
328
+ "matched": [env_match.group()[:30]],
329
+ })
330
+
331
+ # 9. Combination bonuses (multiplicative)
332
+ if has_exfil_verb and has_sensitive_path:
333
+ score *= self.COMBO_EXFIL_PLUS_SENSITIVE
334
+ signals.append({
335
+ "name": "combo_exfil_sensitive",
336
+ "weight": self.COMBO_EXFIL_PLUS_SENSITIVE,
337
+ "matched": ["multiplicative"],
338
+ })
339
+ if has_encoding_tool and has_exfil_verb:
340
+ score *= self.COMBO_ENCODING_PLUS_EXFIL
341
+ signals.append({
342
+ "name": "combo_encoding_exfil",
343
+ "weight": self.COMBO_ENCODING_PLUS_EXFIL,
344
+ "matched": ["multiplicative"],
345
+ })
346
+ if has_shell_expansion and has_exfil_verb:
347
+ score *= self.COMBO_EXPANSION_PLUS_EXFIL
348
+ signals.append({
349
+ "name": "combo_expansion_exfil",
350
+ "weight": self.COMBO_EXPANSION_PLUS_EXFIL,
351
+ "matched": ["multiplicative"],
352
+ })
353
+
354
+ # 10. Per-family sub-scores
355
+ for family_name, family_def in (self._families or {}).items():
356
+ fam_signals = family_def.get("heuristic_signals", {})
357
+ fam_score = 0.0
358
+ all_family_tokens = set()
359
+ for token_list in fam_signals.values():
360
+ if isinstance(token_list, list):
361
+ for t in token_list:
362
+ all_family_tokens.add(t.lower())
363
+
364
+ hits = 0
365
+ for ft in all_family_tokens:
366
+ if ft in content_lower:
367
+ hits += 1
368
+ if all_family_tokens:
369
+ fam_score = hits / len(all_family_tokens)
370
+ family_scores[family_name] = round(fam_score, 3)
371
+
372
+ # 11. Known-benign dampening
373
+ dampened = False
374
+ dampening_reason = None
375
+ benign_match = self._is_benign(content)
376
+ if benign_match and score > 0:
377
+ score *= (1.0 - self.BENIGN_DAMPENING)
378
+ dampened = True
379
+ dampening_reason = f"Benign pattern: {benign_match[:50]}"
380
+ signals.append({
381
+ "name": "benign_dampening",
382
+ "weight": -(self.BENIGN_DAMPENING),
383
+ "matched": [dampening_reason],
384
+ })
385
+
386
+ # 12. Clamp
387
+ score = max(0.0, min(1.0, score))
388
+
389
+ return HeuristicScore(
390
+ total_score=round(score, 4),
391
+ signals=signals,
392
+ family_scores=family_scores,
393
+ threshold=self._threshold,
394
+ dampened=dampened,
395
+ dampening_reason=dampening_reason,
396
+ )
397
+
398
+ def screen(
399
+ self,
400
+ tool_name: str,
401
+ content: str,
402
+ context: Dict[str, Any],
403
+ ) -> ScreeningResult:
404
+ """
405
+ Score content and return escalation recommendation.
406
+
407
+ Unlike other screening plugins, this does not make a final
408
+ allow/block decision. It returns a score and an escalation
409
+ recommendation in the details dict.
410
+ """
411
+ if not self._enabled:
412
+ return ScreeningResult(
413
+ allowed=True,
414
+ plugin_name=self.name,
415
+ reason="Heuristic scorer disabled",
416
+ risk_level="safe",
417
+ confidence=0.0,
418
+ details={"heuristic_score": 0.0, "should_escalate": False},
419
+ findings=[],
420
+ )
421
+
422
+ score = self._score_content(content)
423
+
424
+ if score.total_score < self._threshold:
425
+ return ScreeningResult(
426
+ allowed=True,
427
+ plugin_name=self.name,
428
+ risk_level="safe",
429
+ confidence=score.total_score,
430
+ details={
431
+ "heuristic_score": score.total_score,
432
+ "threshold": self._threshold,
433
+ "should_escalate": False,
434
+ "signals": score.signals,
435
+ "family_scores": score.family_scores,
436
+ },
437
+ findings=[],
438
+ )
439
+
440
+ # Score exceeds threshold — recommend LLM escalation
441
+ top_families = sorted(
442
+ score.family_scores.items(),
443
+ key=lambda x: x[1],
444
+ reverse=True,
445
+ )[:3]
446
+
447
+ top_family_name = top_families[0][0] if top_families else "unknown"
448
+
449
+ return ScreeningResult(
450
+ allowed=True, # Scorer does not block; it escalates
451
+ plugin_name=self.name,
452
+ reason=f"Heuristic score {score.total_score:.2f} exceeds threshold {self._threshold}",
453
+ risk_level="suspicious",
454
+ confidence=score.total_score,
455
+ should_prompt=False, # Don't prompt user directly; escalate to LLM
456
+ details={
457
+ "heuristic_score": score.total_score,
458
+ "threshold": self._threshold,
459
+ "should_escalate": True,
460
+ "top_families": top_families,
461
+ "signals": score.signals,
462
+ "family_scores": score.family_scores,
463
+ },
464
+ findings=[
465
+ Finding(
466
+ pattern_name="heuristic_escalation",
467
+ matched_text=content[:100],
468
+ severity=Severity.MEDIUM,
469
+ description=f"Near-miss heuristic: resembles {top_family_name} attack family",
470
+ recommended_action=ActionType.WARN,
471
+ metadata={
472
+ "score": score.total_score,
473
+ "families": dict(top_families),
474
+ },
475
+ )
476
+ ],
477
+ )
@@ -9,7 +9,9 @@ Semantic analysis using LLM for risky/dangerous operations:
9
9
  - Prompt injection indicators
10
10
  - Privilege escalation attempts
11
11
 
12
- Free and open source. Requires ANTHROPIC_API_KEY (BYOK).
12
+ Supports multiple providers: Anthropic, OpenAI, Google, and any
13
+ OpenAI-compatible endpoint. Free and open source. Requires an API key
14
+ for any supported provider (BYOK).
13
15
  """
14
16
 
15
17
  from typing import Optional, Dict, Any, List
@@ -26,10 +28,12 @@ class LLMReviewerPlugin(ScreeningPlugin):
26
28
  """
27
29
  LLM-based security reviewer plugin.
28
30
 
29
- Uses a fast, cheap LLM (Claude Haiku) to analyze commands
30
- that pass regex screening but may still be malicious.
31
+ Uses a fast, cheap LLM to analyze commands that pass regex screening
32
+ but may still be malicious. Supports multiple providers: Anthropic
33
+ (Claude), OpenAI (GPT), Google (Gemini), and any OpenAI-compatible
34
+ endpoint (Ollama, LM Studio, Together, Groq, etc.).
31
35
 
32
- Free and open source. Requires ANTHROPIC_API_KEY (BYOK).
36
+ Free and open source. Requires an API key for any supported provider (BYOK).
33
37
  """
34
38
 
35
39
  VERSION = "1.0.0"
@@ -53,10 +57,13 @@ class LLMReviewerPlugin(ScreeningPlugin):
53
57
  from tweek.security.llm_reviewer import LLMReviewer
54
58
 
55
59
  self._reviewer = LLMReviewer(
56
- model=self._config.get("model", "claude-3-5-haiku-latest"),
60
+ model=self._config.get("model", "auto"),
57
61
  api_key=self._config.get("api_key"),
58
62
  timeout=self._config.get("timeout", 5.0),
59
63
  enabled=self._config.get("enabled", True),
64
+ provider=self._config.get("provider", "auto"),
65
+ base_url=self._config.get("base_url"),
66
+ api_key_env=self._config.get("api_key_env"),
60
67
  )
61
68
  except ImportError:
62
69
  pass
@@ -128,7 +135,8 @@ class LLMReviewerPlugin(ScreeningPlugin):
128
135
  recommended_action=ActionType.ASK if result.should_prompt else ActionType.WARN,
129
136
  metadata={
130
137
  "confidence": result.confidence,
131
- "model": self._config.get("model", "claude-3-5-haiku-latest"),
138
+ "model": result.details.get("model", "unknown"),
139
+ "provider": result.details.get("provider", "unknown"),
132
140
  }
133
141
  ))
134
142
 
@@ -0,0 +1,161 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Tweek Local Model Reviewer Screening Plugin
4
+
5
+ On-device prompt injection classifier using ONNX model.
6
+ No API key needed — inference runs entirely locally.
7
+
8
+ Requires optional dependencies: pip install tweek[local-models]
9
+ """
10
+
11
+ from typing import Optional, Dict, Any
12
+ from tweek.plugins.base import (
13
+ ScreeningPlugin,
14
+ ScreeningResult,
15
+ Finding,
16
+ Severity,
17
+ ActionType,
18
+ )
19
+
20
+
21
+ class LocalModelReviewerPlugin(ScreeningPlugin):
22
+ """
23
+ Local ONNX model screening plugin.
24
+
25
+ Uses a local prompt injection classifier for on-device security
26
+ analysis. No cloud API calls needed. Runs in ~20ms on CPU.
27
+ """
28
+
29
+ VERSION = "1.0.0"
30
+ DESCRIPTION = "Local ONNX model for prompt injection detection"
31
+ AUTHOR = "Tweek"
32
+ REQUIRES_LICENSE = "free"
33
+ TAGS = ["screening", "local-model", "onnx", "prompt-injection"]
34
+
35
+ def __init__(self, config: Optional[Dict[str, Any]] = None):
36
+ super().__init__(config)
37
+
38
+ @property
39
+ def name(self) -> str:
40
+ return "local_model_reviewer"
41
+
42
+ def screen(
43
+ self,
44
+ tool_name: str,
45
+ content: str,
46
+ context: Dict[str, Any],
47
+ ) -> ScreeningResult:
48
+ """Screen content using local ONNX model.
49
+
50
+ Args:
51
+ tool_name: Name of the tool being invoked.
52
+ content: Command or content to analyze.
53
+ context: Should include 'tier', optionally 'tool_input'.
54
+
55
+ Returns:
56
+ ScreeningResult with local model analysis.
57
+ """
58
+ try:
59
+ from tweek.security.local_model import (
60
+ LOCAL_MODEL_AVAILABLE,
61
+ get_local_model,
62
+ )
63
+ except ImportError:
64
+ return ScreeningResult(
65
+ allowed=True,
66
+ plugin_name=self.name,
67
+ reason="Local model dependencies not installed",
68
+ )
69
+
70
+ if not LOCAL_MODEL_AVAILABLE:
71
+ return ScreeningResult(
72
+ allowed=True,
73
+ plugin_name=self.name,
74
+ reason="Local model dependencies not installed",
75
+ )
76
+
77
+ model = get_local_model()
78
+ if model is None:
79
+ return ScreeningResult(
80
+ allowed=True,
81
+ plugin_name=self.name,
82
+ reason="Local model not downloaded",
83
+ )
84
+
85
+ try:
86
+ result = model.predict(content)
87
+ except Exception as e:
88
+ return ScreeningResult(
89
+ allowed=True,
90
+ plugin_name=self.name,
91
+ reason=f"Local model inference error: {e}",
92
+ )
93
+
94
+ # Map risk levels to screening result
95
+ risk_severity_map = {
96
+ "safe": Severity.LOW,
97
+ "suspicious": Severity.MEDIUM,
98
+ "dangerous": Severity.HIGH,
99
+ }
100
+
101
+ severity = risk_severity_map.get(result.risk_level, Severity.MEDIUM)
102
+
103
+ findings = []
104
+ if result.is_suspicious:
105
+ findings.append(
106
+ Finding(
107
+ pattern_name="local_model",
108
+ matched_text=content[:100],
109
+ severity=severity,
110
+ description=(
111
+ f"Local model ({result.model_name}): "
112
+ f"{result.label} ({result.confidence:.1%})"
113
+ ),
114
+ recommended_action=(
115
+ ActionType.BLOCK
116
+ if result.is_dangerous and result.confidence > 0.9
117
+ else ActionType.ASK
118
+ ),
119
+ metadata={
120
+ "confidence": result.confidence,
121
+ "model": result.model_name,
122
+ "label": result.label,
123
+ "inference_ms": result.inference_time_ms,
124
+ "all_scores": result.all_scores,
125
+ },
126
+ )
127
+ )
128
+
129
+ return ScreeningResult(
130
+ allowed=not result.is_dangerous,
131
+ plugin_name=self.name,
132
+ reason=(
133
+ f"Local model: {result.label} ({result.confidence:.1%})"
134
+ if result.is_suspicious
135
+ else "Local model: benign"
136
+ ),
137
+ risk_level=result.risk_level,
138
+ confidence=result.confidence,
139
+ should_prompt=result.is_suspicious,
140
+ findings=findings,
141
+ details={
142
+ "model": result.model_name,
143
+ "label": result.label,
144
+ "inference_ms": result.inference_time_ms,
145
+ "all_scores": result.all_scores,
146
+ },
147
+ )
148
+
149
+ def is_available(self) -> bool:
150
+ """Check if local model is available."""
151
+ try:
152
+ from tweek.security.local_model import (
153
+ LOCAL_MODEL_AVAILABLE,
154
+ get_local_model,
155
+ )
156
+
157
+ if not LOCAL_MODEL_AVAILABLE:
158
+ return False
159
+ return get_local_model() is not None
160
+ except ImportError:
161
+ return False