tweek 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. tweek/__init__.py +16 -0
  2. tweek/cli.py +3390 -0
  3. tweek/cli_helpers.py +193 -0
  4. tweek/config/__init__.py +13 -0
  5. tweek/config/allowed_dirs.yaml +23 -0
  6. tweek/config/manager.py +1064 -0
  7. tweek/config/patterns.yaml +751 -0
  8. tweek/config/tiers.yaml +129 -0
  9. tweek/diagnostics.py +589 -0
  10. tweek/hooks/__init__.py +1 -0
  11. tweek/hooks/pre_tool_use.py +861 -0
  12. tweek/integrations/__init__.py +3 -0
  13. tweek/integrations/moltbot.py +243 -0
  14. tweek/licensing.py +398 -0
  15. tweek/logging/__init__.py +9 -0
  16. tweek/logging/bundle.py +350 -0
  17. tweek/logging/json_logger.py +150 -0
  18. tweek/logging/security_log.py +745 -0
  19. tweek/mcp/__init__.py +24 -0
  20. tweek/mcp/approval.py +456 -0
  21. tweek/mcp/approval_cli.py +356 -0
  22. tweek/mcp/clients/__init__.py +37 -0
  23. tweek/mcp/clients/chatgpt.py +112 -0
  24. tweek/mcp/clients/claude_desktop.py +203 -0
  25. tweek/mcp/clients/gemini.py +178 -0
  26. tweek/mcp/proxy.py +667 -0
  27. tweek/mcp/screening.py +175 -0
  28. tweek/mcp/server.py +317 -0
  29. tweek/platform/__init__.py +131 -0
  30. tweek/plugins/__init__.py +835 -0
  31. tweek/plugins/base.py +1080 -0
  32. tweek/plugins/compliance/__init__.py +30 -0
  33. tweek/plugins/compliance/gdpr.py +333 -0
  34. tweek/plugins/compliance/gov.py +324 -0
  35. tweek/plugins/compliance/hipaa.py +285 -0
  36. tweek/plugins/compliance/legal.py +322 -0
  37. tweek/plugins/compliance/pci.py +361 -0
  38. tweek/plugins/compliance/soc2.py +275 -0
  39. tweek/plugins/detectors/__init__.py +30 -0
  40. tweek/plugins/detectors/continue_dev.py +206 -0
  41. tweek/plugins/detectors/copilot.py +254 -0
  42. tweek/plugins/detectors/cursor.py +192 -0
  43. tweek/plugins/detectors/moltbot.py +205 -0
  44. tweek/plugins/detectors/windsurf.py +214 -0
  45. tweek/plugins/git_discovery.py +395 -0
  46. tweek/plugins/git_installer.py +491 -0
  47. tweek/plugins/git_lockfile.py +338 -0
  48. tweek/plugins/git_registry.py +503 -0
  49. tweek/plugins/git_security.py +482 -0
  50. tweek/plugins/providers/__init__.py +30 -0
  51. tweek/plugins/providers/anthropic.py +181 -0
  52. tweek/plugins/providers/azure_openai.py +289 -0
  53. tweek/plugins/providers/bedrock.py +248 -0
  54. tweek/plugins/providers/google.py +197 -0
  55. tweek/plugins/providers/openai.py +230 -0
  56. tweek/plugins/scope.py +130 -0
  57. tweek/plugins/screening/__init__.py +26 -0
  58. tweek/plugins/screening/llm_reviewer.py +149 -0
  59. tweek/plugins/screening/pattern_matcher.py +273 -0
  60. tweek/plugins/screening/rate_limiter.py +174 -0
  61. tweek/plugins/screening/session_analyzer.py +159 -0
  62. tweek/proxy/__init__.py +302 -0
  63. tweek/proxy/addon.py +223 -0
  64. tweek/proxy/interceptor.py +313 -0
  65. tweek/proxy/server.py +315 -0
  66. tweek/sandbox/__init__.py +71 -0
  67. tweek/sandbox/executor.py +382 -0
  68. tweek/sandbox/linux.py +278 -0
  69. tweek/sandbox/profile_generator.py +323 -0
  70. tweek/screening/__init__.py +13 -0
  71. tweek/screening/context.py +81 -0
  72. tweek/security/__init__.py +22 -0
  73. tweek/security/llm_reviewer.py +348 -0
  74. tweek/security/rate_limiter.py +682 -0
  75. tweek/security/secret_scanner.py +506 -0
  76. tweek/security/session_analyzer.py +600 -0
  77. tweek/vault/__init__.py +40 -0
  78. tweek/vault/cross_platform.py +251 -0
  79. tweek/vault/keychain.py +288 -0
  80. tweek-0.1.0.dist-info/METADATA +335 -0
  81. tweek-0.1.0.dist-info/RECORD +85 -0
  82. tweek-0.1.0.dist-info/WHEEL +5 -0
  83. tweek-0.1.0.dist-info/entry_points.txt +25 -0
  84. tweek-0.1.0.dist-info/licenses/LICENSE +190 -0
  85. tweek-0.1.0.dist-info/top_level.txt +1 -0
tweek/plugins/base.py ADDED
@@ -0,0 +1,1080 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Tweek Plugin Base Classes
4
+
5
+ Abstract base classes defining the interface for each plugin type:
6
+ - CompliancePlugin: Domain compliance (Gov, HIPAA, PCI, Legal)
7
+ - LLMProviderPlugin: LLM API provider detection and parsing
8
+ - ToolDetectorPlugin: Tool/IDE detection
9
+ - ScreeningPlugin: Security screening methods
10
+
11
+ All plugins should inherit from one of these base classes.
12
+ """
13
+
14
+ from abc import ABC, abstractmethod
15
+ from dataclasses import dataclass, field
16
+ from enum import Enum
17
+ from typing import Optional, List, Dict, Any, Tuple
18
+ import re
19
+ import signal
20
+ import threading
21
+
22
+
23
+ # =============================================================================
24
+ # REDOS PROTECTION
25
+ # =============================================================================
26
+
27
+ class RegexError(Exception):
28
+ """Exception raised for regex-related errors."""
29
+ pass
30
+
31
+
32
+ class RegexTimeoutError(RegexError):
33
+ """Exception raised when regex execution times out (potential ReDoS)."""
34
+ pass
35
+
36
+
37
+ class ReDoSProtection:
38
+ """
39
+ Protection against Regular Expression Denial of Service (ReDoS) attacks.
40
+
41
+ ReDoS occurs when a crafted regex pattern causes exponential backtracking,
42
+ consuming excessive CPU and potentially hanging the application.
43
+
44
+ This class provides:
45
+ 1. Pattern validation to detect known dangerous patterns
46
+ 2. Timeout-based protection for regex execution
47
+ 3. Length limits on input strings
48
+ """
49
+
50
+ # Maximum allowed pattern length
51
+ MAX_PATTERN_LENGTH = 1000
52
+
53
+ # Maximum input length for scanning (per call)
54
+ MAX_INPUT_LENGTH = 1_000_000 # 1MB
55
+
56
+ # Timeout for regex operations (seconds)
57
+ DEFAULT_TIMEOUT = 5.0
58
+
59
+ # Dangerous pattern indicators (simple heuristics)
60
+ # These are common patterns that can cause exponential backtracking
61
+ DANGEROUS_PATTERNS = [
62
+ # Nested quantifiers
63
+ r'\(\.\*\)\+', # (.*)+
64
+ r'\(\.\+\)\+', # (.+)+
65
+ r'\(\.\*\)\*', # (.*)*
66
+ r'\(\.\+\)\*', # (.+)*
67
+ # Overlapping alternation with quantifiers
68
+ r'\([^)]*\|[^)]*\)\+', # (a|a)+
69
+ r'\([^)]*\|[^)]*\)\*', # (a|a)*
70
+ ]
71
+
72
+ @classmethod
73
+ def validate_pattern(cls, pattern: str) -> Tuple[bool, Optional[str]]:
74
+ """
75
+ Validate a regex pattern for potential ReDoS vulnerabilities.
76
+
77
+ Args:
78
+ pattern: The regex pattern to validate
79
+
80
+ Returns:
81
+ Tuple of (is_safe, error_message)
82
+ If is_safe is True, error_message is None
83
+ """
84
+ # Check length
85
+ if len(pattern) > cls.MAX_PATTERN_LENGTH:
86
+ return False, f"Pattern too long ({len(pattern)} > {cls.MAX_PATTERN_LENGTH})"
87
+
88
+ # Check for dangerous patterns
89
+ for dangerous in cls.DANGEROUS_PATTERNS:
90
+ try:
91
+ if re.search(dangerous, pattern):
92
+ return False, f"Pattern contains potentially dangerous construct"
93
+ except re.error:
94
+ continue
95
+
96
+ # Try to compile the pattern
97
+ try:
98
+ re.compile(pattern)
99
+ except re.error as e:
100
+ return False, f"Invalid regex: {e}"
101
+
102
+ return True, None
103
+
104
+ @classmethod
105
+ def safe_compile(
106
+ cls,
107
+ pattern: str,
108
+ flags: int = 0,
109
+ validate: bool = True
110
+ ) -> re.Pattern:
111
+ """
112
+ Safely compile a regex pattern with ReDoS validation.
113
+
114
+ Args:
115
+ pattern: The regex pattern to compile
116
+ flags: Regex flags (re.IGNORECASE, etc.)
117
+ validate: Whether to validate for ReDoS (default True)
118
+
119
+ Returns:
120
+ Compiled regex pattern
121
+
122
+ Raises:
123
+ RegexError: If pattern is invalid or potentially dangerous
124
+ """
125
+ if validate:
126
+ is_safe, error = cls.validate_pattern(pattern)
127
+ if not is_safe:
128
+ raise RegexError(f"Unsafe regex pattern: {error}")
129
+
130
+ try:
131
+ return re.compile(pattern, flags)
132
+ except re.error as e:
133
+ raise RegexError(f"Failed to compile regex: {e}")
134
+
135
+ @classmethod
136
+ def safe_search(
137
+ cls,
138
+ pattern: re.Pattern,
139
+ text: str,
140
+ timeout: float = None
141
+ ) -> Optional[re.Match]:
142
+ """
143
+ Safely execute regex search with timeout protection.
144
+
145
+ Note: Timeout protection only works on Unix-like systems (uses SIGALRM).
146
+ On Windows, this falls back to no timeout.
147
+
148
+ Args:
149
+ pattern: Compiled regex pattern
150
+ text: Text to search
151
+ timeout: Timeout in seconds (default: DEFAULT_TIMEOUT)
152
+
153
+ Returns:
154
+ Match object or None
155
+
156
+ Raises:
157
+ RegexTimeoutError: If regex execution times out
158
+ """
159
+ if timeout is None:
160
+ timeout = cls.DEFAULT_TIMEOUT
161
+
162
+ # Truncate input if too long
163
+ if len(text) > cls.MAX_INPUT_LENGTH:
164
+ text = text[:cls.MAX_INPUT_LENGTH]
165
+
166
+ # On Windows or in threaded context, just run without timeout
167
+ if not hasattr(signal, 'SIGALRM') or threading.current_thread() is not threading.main_thread():
168
+ return pattern.search(text)
169
+
170
+ def timeout_handler(signum, frame):
171
+ raise RegexTimeoutError(f"Regex execution timed out after {timeout}s")
172
+
173
+ old_handler = signal.signal(signal.SIGALRM, timeout_handler)
174
+ signal.setitimer(signal.ITIMER_REAL, timeout)
175
+
176
+ try:
177
+ return pattern.search(text)
178
+ finally:
179
+ signal.setitimer(signal.ITIMER_REAL, 0)
180
+ signal.signal(signal.SIGALRM, old_handler)
181
+
182
+ @classmethod
183
+ def safe_finditer(
184
+ cls,
185
+ pattern: re.Pattern,
186
+ text: str,
187
+ timeout: float = None,
188
+ max_matches: int = 1000
189
+ ) -> List[re.Match]:
190
+ """
191
+ Safely execute regex finditer with timeout and match limit.
192
+
193
+ Args:
194
+ pattern: Compiled regex pattern
195
+ text: Text to search
196
+ timeout: Timeout in seconds
197
+ max_matches: Maximum number of matches to return
198
+
199
+ Returns:
200
+ List of match objects
201
+
202
+ Raises:
203
+ RegexTimeoutError: If regex execution times out
204
+ """
205
+ if timeout is None:
206
+ timeout = cls.DEFAULT_TIMEOUT
207
+
208
+ # Truncate input if too long
209
+ if len(text) > cls.MAX_INPUT_LENGTH:
210
+ text = text[:cls.MAX_INPUT_LENGTH]
211
+
212
+ matches = []
213
+
214
+ # On Windows or in threaded context, just run without timeout
215
+ if not hasattr(signal, 'SIGALRM') or threading.current_thread() is not threading.main_thread():
216
+ for i, match in enumerate(pattern.finditer(text)):
217
+ if i >= max_matches:
218
+ break
219
+ matches.append(match)
220
+ return matches
221
+
222
+ def timeout_handler(signum, frame):
223
+ raise RegexTimeoutError(f"Regex execution timed out after {timeout}s")
224
+
225
+ old_handler = signal.signal(signal.SIGALRM, timeout_handler)
226
+ signal.setitimer(signal.ITIMER_REAL, timeout)
227
+
228
+ try:
229
+ for i, match in enumerate(pattern.finditer(text)):
230
+ if i >= max_matches:
231
+ break
232
+ matches.append(match)
233
+ return matches
234
+ finally:
235
+ signal.setitimer(signal.ITIMER_REAL, 0)
236
+ signal.signal(signal.SIGALRM, old_handler)
237
+
238
+
239
+ class ScanDirection(Enum):
240
+ """Direction of content scanning."""
241
+ INPUT = "input" # Scanning incoming data (user input, tool results)
242
+ OUTPUT = "output" # Scanning LLM outputs (before displaying to user)
243
+ BOTH = "both" # Bidirectional scanning
244
+
245
+
246
+ class ActionType(Enum):
247
+ """Actions that can be taken on findings."""
248
+ ALLOW = "allow" # Allow content through unchanged
249
+ WARN = "warn" # Allow but warn user
250
+ BLOCK = "block" # Block content entirely
251
+ REDACT = "redact" # Redact matched content and allow
252
+ ASK = "ask" # Prompt user for decision
253
+
254
+
255
+ class Severity(Enum):
256
+ """Severity levels for findings."""
257
+ LOW = "low"
258
+ MEDIUM = "medium"
259
+ HIGH = "high"
260
+ CRITICAL = "critical"
261
+
262
+
263
+ @dataclass
264
+ class Finding:
265
+ """
266
+ A single finding from a compliance or security scan.
267
+
268
+ Represents a specific match or issue detected in content.
269
+ Security: matched_text is stored internally for redaction processing,
270
+ but is redacted in to_dict() and redacted_text property to prevent
271
+ accidental exposure of sensitive data in logs/exports.
272
+ """
273
+ pattern_name: str
274
+ matched_text: str
275
+ severity: Severity
276
+ description: Optional[str] = None
277
+ line_number: Optional[int] = None
278
+ column: Optional[int] = None
279
+ context: Optional[str] = None
280
+ recommended_action: ActionType = ActionType.WARN
281
+ metadata: Dict[str, Any] = field(default_factory=dict)
282
+
283
+ @property
284
+ def redacted_text(self) -> str:
285
+ """
286
+ Get redacted version of matched text.
287
+
288
+ Preserves first and last chars, masks middle with asterisks.
289
+ For very short strings (<=4 chars), masks entirely.
290
+ """
291
+ return self._redact_text(self.matched_text)
292
+
293
+ @property
294
+ def redacted_context(self) -> Optional[str]:
295
+ """Get context with matched text redacted."""
296
+ if self.context is None:
297
+ return None
298
+ return self.context.replace(self.matched_text, self.redacted_text)
299
+
300
+ @staticmethod
301
+ def _redact_text(text: str) -> str:
302
+ """
303
+ Redact sensitive text while preserving some structure.
304
+
305
+ - For strings <=4 chars: mask entirely with asterisks
306
+ - For longer strings: show first 2 and last 2 chars, mask middle
307
+ """
308
+ if len(text) <= 4:
309
+ return "*" * len(text)
310
+ elif len(text) <= 8:
311
+ return text[0] + "*" * (len(text) - 2) + text[-1]
312
+ else:
313
+ return text[:2] + "*" * (len(text) - 4) + text[-2:]
314
+
315
+ def to_dict(self, include_raw: bool = False) -> Dict[str, Any]:
316
+ """
317
+ Convert to dictionary for serialization.
318
+
319
+ Args:
320
+ include_raw: If True, include raw matched_text (SECURITY: only use
321
+ for internal processing, never for logs/exports)
322
+
323
+ Returns:
324
+ Dictionary with finding details (matched_text redacted by default)
325
+ """
326
+ result = {
327
+ "pattern_name": self.pattern_name,
328
+ "matched_text": self.matched_text if include_raw else self.redacted_text,
329
+ "severity": self.severity.value,
330
+ "description": self.description,
331
+ "line_number": self.line_number,
332
+ "column": self.column,
333
+ "context": self.context if include_raw else self.redacted_context,
334
+ "recommended_action": self.recommended_action.value,
335
+ "metadata": self.metadata,
336
+ }
337
+ if not include_raw:
338
+ result["text_length"] = len(self.matched_text)
339
+ return result
340
+
341
+
342
+ @dataclass
343
+ class ScanResult:
344
+ """
345
+ Result of a compliance/security scan.
346
+
347
+ Aggregates all findings and determines overall action.
348
+ """
349
+ passed: bool
350
+ findings: List[Finding] = field(default_factory=list)
351
+ action: ActionType = ActionType.ALLOW
352
+ message: Optional[str] = None
353
+ scan_direction: Optional[ScanDirection] = None
354
+ plugin_name: Optional[str] = None
355
+ metadata: Dict[str, Any] = field(default_factory=dict)
356
+
357
+ @property
358
+ def finding_count(self) -> int:
359
+ return len(self.findings)
360
+
361
+ @property
362
+ def has_critical(self) -> bool:
363
+ return any(f.severity == Severity.CRITICAL for f in self.findings)
364
+
365
+ @property
366
+ def has_high(self) -> bool:
367
+ return any(f.severity in (Severity.HIGH, Severity.CRITICAL) for f in self.findings)
368
+
369
+ @property
370
+ def max_severity(self) -> Optional[Severity]:
371
+ if not self.findings:
372
+ return None
373
+ severity_order = [Severity.LOW, Severity.MEDIUM, Severity.HIGH, Severity.CRITICAL]
374
+ return max(self.findings, key=lambda f: severity_order.index(f.severity)).severity
375
+
376
+ def to_dict(self) -> Dict[str, Any]:
377
+ """Convert to dictionary for serialization."""
378
+ return {
379
+ "passed": self.passed,
380
+ "findings": [f.to_dict() for f in self.findings],
381
+ "action": self.action.value,
382
+ "message": self.message,
383
+ "scan_direction": self.scan_direction.value if self.scan_direction else None,
384
+ "plugin_name": self.plugin_name,
385
+ "finding_count": self.finding_count,
386
+ "max_severity": self.max_severity.value if self.max_severity else None,
387
+ "metadata": self.metadata,
388
+ }
389
+
390
+
391
+ @dataclass
392
+ class PatternDefinition:
393
+ """Definition of a pattern to match against."""
394
+ name: str
395
+ regex: str
396
+ severity: Severity
397
+ description: str
398
+ default_action: ActionType = ActionType.WARN
399
+ enabled: bool = True
400
+ tags: List[str] = field(default_factory=list)
401
+ metadata: Dict[str, Any] = field(default_factory=dict)
402
+ # Whether to validate pattern for ReDoS (disable for trusted built-in patterns)
403
+ validate_redos: bool = False
404
+
405
+ _compiled: Optional[re.Pattern] = field(default=None, repr=False)
406
+ _compile_lock: threading.Lock = field(default_factory=threading.Lock, repr=False)
407
+
408
+ def compile(self, validate: bool = None) -> re.Pattern:
409
+ """
410
+ Compile and cache the regex pattern.
411
+
412
+ Thread-safe using double-checked locking pattern.
413
+
414
+ Args:
415
+ validate: Override ReDoS validation (default: use self.validate_redos)
416
+
417
+ Returns:
418
+ Compiled regex pattern
419
+
420
+ Raises:
421
+ RegexError: If pattern is invalid or fails ReDoS validation
422
+ """
423
+ # Fast path: already compiled
424
+ if self._compiled is not None:
425
+ return self._compiled
426
+
427
+ # Slow path: acquire lock and compile
428
+ with self._compile_lock:
429
+ # Double-check after acquiring lock
430
+ if self._compiled is None:
431
+ should_validate = validate if validate is not None else self.validate_redos
432
+ self._compiled = ReDoSProtection.safe_compile(
433
+ self.regex,
434
+ flags=re.IGNORECASE | re.MULTILINE,
435
+ validate=should_validate
436
+ )
437
+ return self._compiled
438
+
439
+
440
+ # =============================================================================
441
+ # COMPLIANCE PLUGIN BASE
442
+ # =============================================================================
443
+
444
+ class CompliancePlugin(ABC):
445
+ """
446
+ Base class for domain compliance plugins.
447
+
448
+ Compliance plugins scan content for domain-specific sensitive information:
449
+ - Gov: Classification markings, CUI, FOUO
450
+ - HIPAA: PHI, medical records, patient data
451
+ - PCI: Credit cards, CVVs, bank accounts
452
+ - Legal: Attorney-client privilege markers
453
+
454
+ Supports bidirectional scanning:
455
+ - OUTPUT: Detect hallucinated sensitive content in LLM responses
456
+ - INPUT: Detect real sensitive content in incoming data
457
+ """
458
+
459
+ # Class-level metadata (override in subclasses)
460
+ VERSION = "1.0.0"
461
+ DESCRIPTION = "Base compliance plugin"
462
+ AUTHOR = "Tweek"
463
+ REQUIRES_LICENSE = "enterprise"
464
+ TAGS = ["compliance"]
465
+
466
+ def __init__(self, config: Optional[Dict[str, Any]] = None):
467
+ """
468
+ Initialize the compliance plugin.
469
+
470
+ Args:
471
+ config: Optional configuration dictionary with:
472
+ - enabled: bool
473
+ - scan_direction: "input", "output", or "both"
474
+ - actions: Dict mapping pattern names to actions
475
+ - allowlist: List of exact strings to ignore (false positive suppression)
476
+ - allowlist_patterns: List of regex patterns to ignore
477
+ - suppressed_patterns: List of pattern names to disable
478
+ """
479
+ self._config = config or {}
480
+ self._action_overrides: Dict[str, ActionType] = {}
481
+ self._allowlist: List[str] = []
482
+ self._allowlist_patterns: List[re.Pattern] = []
483
+ self._suppressed_patterns: set = set()
484
+
485
+ # Load action overrides from config
486
+ actions = self._config.get("actions", {})
487
+ for pattern_name, action_str in actions.items():
488
+ try:
489
+ self._action_overrides[pattern_name] = ActionType(action_str)
490
+ except ValueError:
491
+ pass
492
+
493
+ # Load allowlist (exact string matches to ignore)
494
+ self._allowlist = self._config.get("allowlist", [])
495
+
496
+ # Load allowlist patterns (regex patterns to ignore)
497
+ for pattern_str in self._config.get("allowlist_patterns", []):
498
+ try:
499
+ self._allowlist_patterns.append(re.compile(pattern_str, re.IGNORECASE))
500
+ except re.error:
501
+ pass # Skip invalid patterns
502
+
503
+ # Load suppressed pattern names
504
+ self._suppressed_patterns = set(self._config.get("suppressed_patterns", []))
505
+
506
+ @property
507
+ @abstractmethod
508
+ def name(self) -> str:
509
+ """Plugin name (e.g., 'gov', 'hipaa')."""
510
+ pass
511
+
512
+ @property
513
+ @abstractmethod
514
+ def scan_direction(self) -> ScanDirection:
515
+ """Which direction this plugin scans by default."""
516
+ pass
517
+
518
+ @abstractmethod
519
+ def get_patterns(self) -> List[PatternDefinition]:
520
+ """
521
+ Return patterns this plugin checks for.
522
+
523
+ Returns:
524
+ List of PatternDefinition objects
525
+ """
526
+ pass
527
+
528
+ def scan(self, content: str, direction: ScanDirection) -> ScanResult:
529
+ """
530
+ Scan content for compliance issues.
531
+
532
+ Args:
533
+ content: Text content to scan
534
+ direction: Whether this is input or output scanning
535
+
536
+ Returns:
537
+ ScanResult with findings and recommended action
538
+ """
539
+ # Check if we should scan this direction
540
+ if self.scan_direction != ScanDirection.BOTH:
541
+ if self.scan_direction != direction:
542
+ return ScanResult(
543
+ passed=True,
544
+ plugin_name=self.name,
545
+ scan_direction=direction
546
+ )
547
+
548
+ findings = []
549
+
550
+ for pattern in self.get_patterns():
551
+ if not pattern.enabled:
552
+ continue
553
+
554
+ # Check if pattern is suppressed
555
+ if pattern.name in self._suppressed_patterns:
556
+ continue
557
+
558
+ try:
559
+ compiled = pattern.compile()
560
+ for match in compiled.finditer(content):
561
+ matched_text = match.group()
562
+
563
+ # Check if match is in allowlist (exact match)
564
+ if matched_text in self._allowlist:
565
+ continue
566
+
567
+ # Check if match is suppressed by allowlist pattern
568
+ if self._is_allowlisted(matched_text):
569
+ continue
570
+
571
+ # Get line number
572
+ line_num = content[:match.start()].count('\n') + 1
573
+
574
+ # Get context (surrounding text)
575
+ context = self._get_context(content, match.start(), match.end())
576
+
577
+ # Determine action (check for override)
578
+ action = self._action_overrides.get(
579
+ pattern.name,
580
+ pattern.default_action
581
+ )
582
+
583
+ findings.append(Finding(
584
+ pattern_name=pattern.name,
585
+ matched_text=matched_text,
586
+ severity=pattern.severity,
587
+ description=pattern.description,
588
+ line_number=line_num,
589
+ context=context,
590
+ recommended_action=action,
591
+ metadata={"pattern_tags": pattern.tags}
592
+ ))
593
+ except re.error as e:
594
+ # Invalid regex - skip
595
+ continue
596
+
597
+ # Determine overall action (highest severity wins)
598
+ action = self._determine_action(findings)
599
+
600
+ # Generate message
601
+ message = self._format_message(findings, direction)
602
+
603
+ return ScanResult(
604
+ passed=len(findings) == 0,
605
+ findings=findings,
606
+ action=action,
607
+ message=message,
608
+ scan_direction=direction,
609
+ plugin_name=self.name
610
+ )
611
+
612
+ def _get_context(
613
+ self,
614
+ content: str,
615
+ start: int,
616
+ end: int,
617
+ context_chars: int = 50
618
+ ) -> str:
619
+ """Get surrounding context for a match."""
620
+ ctx_start = max(0, start - context_chars)
621
+ ctx_end = min(len(content), end + context_chars)
622
+
623
+ prefix = "..." if ctx_start > 0 else ""
624
+ suffix = "..." if ctx_end < len(content) else ""
625
+
626
+ return f"{prefix}{content[ctx_start:ctx_end]}{suffix}"
627
+
628
+ def _is_allowlisted(self, text: str) -> bool:
629
+ """
630
+ Check if text matches any allowlist pattern.
631
+
632
+ Args:
633
+ text: The matched text to check
634
+
635
+ Returns:
636
+ True if text should be suppressed (matches allowlist)
637
+ """
638
+ for pattern in self._allowlist_patterns:
639
+ if pattern.search(text):
640
+ return True
641
+ return False
642
+
643
+ def _determine_action(self, findings: List[Finding]) -> ActionType:
644
+ """Determine overall action based on findings."""
645
+ if not findings:
646
+ return ActionType.ALLOW
647
+
648
+ # Priority: BLOCK > REDACT > ASK > WARN > ALLOW
649
+ action_priority = [
650
+ ActionType.ALLOW,
651
+ ActionType.WARN,
652
+ ActionType.ASK,
653
+ ActionType.REDACT,
654
+ ActionType.BLOCK,
655
+ ]
656
+
657
+ max_action = ActionType.ALLOW
658
+ for finding in findings:
659
+ if action_priority.index(finding.recommended_action) > action_priority.index(max_action):
660
+ max_action = finding.recommended_action
661
+
662
+ return max_action
663
+
664
+ def _format_message(
665
+ self,
666
+ findings: List[Finding],
667
+ direction: ScanDirection
668
+ ) -> Optional[str]:
669
+ """Format a user-facing message about findings."""
670
+ if not findings:
671
+ return None
672
+
673
+ if direction == ScanDirection.OUTPUT:
674
+ prefix = "LLM output contains"
675
+ suffix = "These may be hallucinated and should be verified."
676
+ else:
677
+ prefix = "Input data contains"
678
+ suffix = "Verify proper handling procedures."
679
+
680
+ # Group by severity
681
+ critical = [f for f in findings if f.severity == Severity.CRITICAL]
682
+ high = [f for f in findings if f.severity == Severity.HIGH]
683
+
684
+ lines = [f"{prefix} {len(findings)} potential compliance issue(s):"]
685
+
686
+ if critical:
687
+ lines.append(f" CRITICAL: {len(critical)} finding(s)")
688
+ if high:
689
+ lines.append(f" HIGH: {len(high)} finding(s)")
690
+
691
+ lines.append(suffix)
692
+
693
+ return "\n".join(lines)
694
+
695
+ def configure(self, config: Dict[str, Any]) -> None:
696
+ """Update plugin configuration."""
697
+ self._config.update(config)
698
+
699
+ # Reload action overrides
700
+ actions = self._config.get("actions", {})
701
+ for pattern_name, action_str in actions.items():
702
+ try:
703
+ self._action_overrides[pattern_name] = ActionType(action_str)
704
+ except ValueError:
705
+ pass
706
+
707
+ # Reload allowlist settings
708
+ self._allowlist = self._config.get("allowlist", [])
709
+
710
+ self._allowlist_patterns = []
711
+ for pattern_str in self._config.get("allowlist_patterns", []):
712
+ try:
713
+ self._allowlist_patterns.append(re.compile(pattern_str, re.IGNORECASE))
714
+ except re.error:
715
+ pass
716
+
717
+ self._suppressed_patterns = set(self._config.get("suppressed_patterns", []))
718
+
719
+
720
+ # =============================================================================
721
+ # LLM PROVIDER PLUGIN BASE
722
+ # =============================================================================
723
+
724
+ @dataclass
725
+ class ToolCall:
726
+ """Represents a tool call extracted from an LLM response."""
727
+ id: str
728
+ name: str
729
+ input: Dict[str, Any]
730
+ provider: str
731
+ raw: Optional[Dict[str, Any]] = None
732
+
733
+
734
+ class LLMProviderPlugin(ABC):
735
+ """
736
+ Base class for LLM provider plugins.
737
+
738
+ Provider plugins handle provider-specific API formats:
739
+ - Endpoint detection
740
+ - Tool call extraction from responses
741
+ - Request/response parsing
742
+ """
743
+
744
+ VERSION = "1.0.0"
745
+ DESCRIPTION = "Base LLM provider plugin"
746
+ AUTHOR = "Tweek"
747
+ REQUIRES_LICENSE = "free"
748
+ TAGS = ["provider"]
749
+
750
+ def __init__(self, config: Optional[Dict[str, Any]] = None):
751
+ self._config = config or {}
752
+
753
+ @property
754
+ @abstractmethod
755
+ def name(self) -> str:
756
+ """Provider name (e.g., 'anthropic', 'openai')."""
757
+ pass
758
+
759
+ @property
760
+ @abstractmethod
761
+ def api_hosts(self) -> List[str]:
762
+ """List of API hostnames for this provider."""
763
+ pass
764
+
765
+ def matches_endpoint(self, url: str) -> bool:
766
+ """
767
+ Check if URL matches this provider's API.
768
+
769
+ Args:
770
+ url: URL or hostname to check
771
+
772
+ Returns:
773
+ True if this provider handles the URL
774
+ """
775
+ # Extract hostname from URL
776
+ if "://" in url:
777
+ host = url.split("://")[1].split("/")[0]
778
+ else:
779
+ host = url.split("/")[0]
780
+
781
+ # Remove port if present
782
+ host = host.split(":")[0]
783
+
784
+ return host in self.api_hosts
785
+
786
+ @abstractmethod
787
+ def extract_tool_calls(self, response: Dict[str, Any]) -> List[ToolCall]:
788
+ """
789
+ Extract tool calls from provider-specific response format.
790
+
791
+ Args:
792
+ response: Parsed JSON response from the API
793
+
794
+ Returns:
795
+ List of ToolCall objects
796
+ """
797
+ pass
798
+
799
+ def extract_content(self, response: Dict[str, Any]) -> str:
800
+ """
801
+ Extract text content from provider-specific response.
802
+
803
+ Args:
804
+ response: Parsed JSON response from the API
805
+
806
+ Returns:
807
+ Text content from the response
808
+ """
809
+ # Default implementation - override for provider-specific format
810
+ return ""
811
+
812
+ def configure(self, config: Dict[str, Any]) -> None:
813
+ """Update plugin configuration."""
814
+ self._config.update(config)
815
+
816
+
817
+ # =============================================================================
818
+ # TOOL DETECTOR PLUGIN BASE
819
+ # =============================================================================
820
+
821
+ @dataclass
822
+ class DetectionResult:
823
+ """Result of tool detection."""
824
+ detected: bool
825
+ tool_name: str
826
+ version: Optional[str] = None
827
+ install_path: Optional[str] = None
828
+ config_path: Optional[str] = None
829
+ running: bool = False
830
+ port: Optional[int] = None
831
+ metadata: Dict[str, Any] = field(default_factory=dict)
832
+
833
+
834
+ class ToolDetectorPlugin(ABC):
835
+ """
836
+ Base class for tool detector plugins.
837
+
838
+ Detector plugins identify installed LLM tools/IDEs:
839
+ - Installation detection
840
+ - Running process detection
841
+ - Configuration location
842
+ - Conflict detection
843
+ """
844
+
845
+ VERSION = "1.0.0"
846
+ DESCRIPTION = "Base tool detector plugin"
847
+ AUTHOR = "Tweek"
848
+ REQUIRES_LICENSE = "free"
849
+ TAGS = ["detector"]
850
+
851
+ def __init__(self, config: Optional[Dict[str, Any]] = None):
852
+ self._config = config or {}
853
+
854
+ @property
855
+ @abstractmethod
856
+ def name(self) -> str:
857
+ """Tool name (e.g., 'moltbot', 'cursor')."""
858
+ pass
859
+
860
+ @abstractmethod
861
+ def detect(self) -> DetectionResult:
862
+ """
863
+ Detect if tool is installed/running.
864
+
865
+ Returns:
866
+ DetectionResult with detection information
867
+ """
868
+ pass
869
+
870
+ def get_conflicts(self) -> List[str]:
871
+ """
872
+ Get list of potential conflicts with this tool.
873
+
874
+ Returns:
875
+ List of conflict descriptions
876
+ """
877
+ return []
878
+
879
+ def configure(self, config: Dict[str, Any]) -> None:
880
+ """Update plugin configuration."""
881
+ self._config.update(config)
882
+
883
+
884
+ # =============================================================================
885
+ # SCREENING PLUGIN BASE
886
+ # =============================================================================
887
+
888
+ @dataclass
889
+ class ScreeningResult:
890
+ """Result of a security screening check."""
891
+ allowed: bool
892
+ plugin_name: str
893
+ reason: Optional[str] = None
894
+ risk_level: Optional[str] = None # "safe", "suspicious", "dangerous"
895
+ confidence: float = 1.0
896
+ should_prompt: bool = False
897
+ details: Dict[str, Any] = field(default_factory=dict)
898
+ findings: List[Finding] = field(default_factory=list)
899
+
900
+ def to_dict(self) -> Dict[str, Any]:
901
+ return {
902
+ "allowed": self.allowed,
903
+ "plugin_name": self.plugin_name,
904
+ "reason": self.reason,
905
+ "risk_level": self.risk_level,
906
+ "confidence": self.confidence,
907
+ "should_prompt": self.should_prompt,
908
+ "details": self.details,
909
+ "findings": [f.to_dict() for f in self.findings],
910
+ }
911
+
912
+
913
+ class ScreeningPlugin(ABC):
914
+ """
915
+ Base class for screening method plugins.
916
+
917
+ Screening plugins analyze tool invocations for security risks:
918
+ - Rate limiting
919
+ - Pattern matching
920
+ - LLM-based review
921
+ - Session analysis
922
+ """
923
+
924
+ VERSION = "1.0.0"
925
+ DESCRIPTION = "Base screening plugin"
926
+ AUTHOR = "Tweek"
927
+ REQUIRES_LICENSE = "free"
928
+ TAGS = ["screening"]
929
+
930
+ def __init__(self, config: Optional[Dict[str, Any]] = None):
931
+ self._config = config or {}
932
+
933
+ @property
934
+ @abstractmethod
935
+ def name(self) -> str:
936
+ """Screening method name (e.g., 'rate_limiter', 'pattern_matcher')."""
937
+ pass
938
+
939
+ @abstractmethod
940
+ def screen(
941
+ self,
942
+ tool_name: str,
943
+ content: str,
944
+ context: Dict[str, Any]
945
+ ) -> ScreeningResult:
946
+ """
947
+ Screen a tool invocation for security risks.
948
+
949
+ Args:
950
+ tool_name: Name of the tool being invoked
951
+ content: Command or content to screen
952
+ context: Additional context (session_id, tool_input, tier, etc.)
953
+
954
+ Returns:
955
+ ScreeningResult with screening decision
956
+ """
957
+ pass
958
+
959
+ def configure(self, config: Dict[str, Any]) -> None:
960
+ """Update plugin configuration."""
961
+ self._config.update(config)
962
+
963
+
964
+ # =============================================================================
965
+ # UTILITY FUNCTIONS
966
+ # =============================================================================
967
+
968
+ def combine_scan_results(results: List[ScanResult]) -> ScanResult:
969
+ """
970
+ Combine multiple scan results into a single result.
971
+
972
+ Args:
973
+ results: List of ScanResult objects
974
+
975
+ Returns:
976
+ Combined ScanResult with all findings
977
+ """
978
+ if not results:
979
+ return ScanResult(passed=True)
980
+
981
+ all_findings = []
982
+ messages = []
983
+
984
+ for result in results:
985
+ all_findings.extend(result.findings)
986
+ if result.message:
987
+ messages.append(result.message)
988
+
989
+ # Determine combined action
990
+ action_priority = [
991
+ ActionType.ALLOW,
992
+ ActionType.WARN,
993
+ ActionType.ASK,
994
+ ActionType.REDACT,
995
+ ActionType.BLOCK,
996
+ ]
997
+
998
+ max_action = ActionType.ALLOW
999
+ for result in results:
1000
+ if action_priority.index(result.action) > action_priority.index(max_action):
1001
+ max_action = result.action
1002
+
1003
+ return ScanResult(
1004
+ passed=len(all_findings) == 0,
1005
+ findings=all_findings,
1006
+ action=max_action,
1007
+ message="\n\n".join(messages) if messages else None,
1008
+ metadata={"combined_from": [r.plugin_name for r in results if r.plugin_name]}
1009
+ )
1010
+
1011
+
1012
+ def combine_screening_results(results: List[ScreeningResult]) -> ScreeningResult:
1013
+ """
1014
+ Combine multiple screening results into a single result.
1015
+
1016
+ Args:
1017
+ results: List of ScreeningResult objects
1018
+
1019
+ Returns:
1020
+ Combined ScreeningResult
1021
+ """
1022
+ if not results:
1023
+ return ScreeningResult(allowed=True, plugin_name="combined")
1024
+
1025
+ # If any result blocks, the combined result blocks
1026
+ allowed = all(r.allowed for r in results)
1027
+ should_prompt = any(r.should_prompt for r in results)
1028
+
1029
+ # Collect all findings
1030
+ all_findings = []
1031
+ for result in results:
1032
+ all_findings.extend(result.findings)
1033
+
1034
+ # Determine highest risk level
1035
+ risk_order = {"safe": 0, "suspicious": 1, "dangerous": 2}
1036
+ max_risk = "safe"
1037
+ for result in results:
1038
+ if result.risk_level and risk_order.get(result.risk_level, 0) > risk_order.get(max_risk, 0):
1039
+ max_risk = result.risk_level
1040
+
1041
+ # Combine reasons
1042
+ reasons = [r.reason for r in results if r.reason and not r.allowed]
1043
+
1044
+ return ScreeningResult(
1045
+ allowed=allowed,
1046
+ plugin_name="combined",
1047
+ reason="; ".join(reasons) if reasons else None,
1048
+ risk_level=max_risk,
1049
+ should_prompt=should_prompt,
1050
+ details={"from_plugins": [r.plugin_name for r in results]},
1051
+ findings=all_findings
1052
+ )
1053
+
1054
+
1055
+ # Public API
1056
+ __all__ = [
1057
+ # Enums
1058
+ "ScanDirection",
1059
+ "ActionType",
1060
+ "Severity",
1061
+ # Exceptions
1062
+ "RegexError",
1063
+ "RegexTimeoutError",
1064
+ # Data classes
1065
+ "Finding",
1066
+ "ScanResult",
1067
+ "PatternDefinition",
1068
+ "ToolCall",
1069
+ "DetectionResult",
1070
+ "ScreeningResult",
1071
+ # Base classes
1072
+ "CompliancePlugin",
1073
+ "LLMProviderPlugin",
1074
+ "ToolDetectorPlugin",
1075
+ "ScreeningPlugin",
1076
+ # Utilities
1077
+ "ReDoSProtection",
1078
+ "combine_scan_results",
1079
+ "combine_screening_results",
1080
+ ]