tweek 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. tweek/__init__.py +2 -2
  2. tweek/_keygen.py +53 -0
  3. tweek/audit.py +288 -0
  4. tweek/cli.py +5303 -2396
  5. tweek/cli_model.py +380 -0
  6. tweek/config/families.yaml +609 -0
  7. tweek/config/manager.py +42 -5
  8. tweek/config/patterns.yaml +1510 -8
  9. tweek/config/tiers.yaml +161 -11
  10. tweek/diagnostics.py +71 -2
  11. tweek/hooks/break_glass.py +163 -0
  12. tweek/hooks/feedback.py +223 -0
  13. tweek/hooks/overrides.py +531 -0
  14. tweek/hooks/post_tool_use.py +472 -0
  15. tweek/hooks/pre_tool_use.py +1024 -62
  16. tweek/integrations/openclaw.py +443 -0
  17. tweek/integrations/openclaw_server.py +385 -0
  18. tweek/licensing.py +14 -54
  19. tweek/logging/bundle.py +2 -2
  20. tweek/logging/security_log.py +56 -13
  21. tweek/mcp/approval.py +57 -16
  22. tweek/mcp/proxy.py +18 -0
  23. tweek/mcp/screening.py +5 -5
  24. tweek/mcp/server.py +4 -1
  25. tweek/memory/__init__.py +24 -0
  26. tweek/memory/queries.py +223 -0
  27. tweek/memory/safety.py +140 -0
  28. tweek/memory/schemas.py +80 -0
  29. tweek/memory/store.py +989 -0
  30. tweek/platform/__init__.py +4 -4
  31. tweek/plugins/__init__.py +40 -24
  32. tweek/plugins/base.py +1 -1
  33. tweek/plugins/detectors/__init__.py +3 -3
  34. tweek/plugins/detectors/{moltbot.py → openclaw.py} +30 -27
  35. tweek/plugins/git_discovery.py +16 -4
  36. tweek/plugins/git_registry.py +8 -2
  37. tweek/plugins/git_security.py +21 -9
  38. tweek/plugins/screening/__init__.py +10 -1
  39. tweek/plugins/screening/heuristic_scorer.py +477 -0
  40. tweek/plugins/screening/llm_reviewer.py +14 -6
  41. tweek/plugins/screening/local_model_reviewer.py +161 -0
  42. tweek/proxy/__init__.py +38 -37
  43. tweek/proxy/addon.py +22 -3
  44. tweek/proxy/interceptor.py +1 -0
  45. tweek/proxy/server.py +4 -2
  46. tweek/sandbox/__init__.py +11 -0
  47. tweek/sandbox/docker_bridge.py +143 -0
  48. tweek/sandbox/executor.py +9 -6
  49. tweek/sandbox/layers.py +97 -0
  50. tweek/sandbox/linux.py +1 -0
  51. tweek/sandbox/project.py +548 -0
  52. tweek/sandbox/registry.py +149 -0
  53. tweek/security/__init__.py +9 -0
  54. tweek/security/language.py +250 -0
  55. tweek/security/llm_reviewer.py +1146 -60
  56. tweek/security/local_model.py +331 -0
  57. tweek/security/local_reviewer.py +146 -0
  58. tweek/security/model_registry.py +371 -0
  59. tweek/security/rate_limiter.py +11 -6
  60. tweek/security/secret_scanner.py +70 -4
  61. tweek/security/session_analyzer.py +26 -2
  62. tweek/skill_template/SKILL.md +200 -0
  63. tweek/skill_template/__init__.py +0 -0
  64. tweek/skill_template/cli-reference.md +331 -0
  65. tweek/skill_template/overrides-reference.md +184 -0
  66. tweek/skill_template/scripts/__init__.py +0 -0
  67. tweek/skill_template/scripts/check_installed.py +170 -0
  68. tweek/skills/__init__.py +38 -0
  69. tweek/skills/config.py +150 -0
  70. tweek/skills/fingerprints.py +198 -0
  71. tweek/skills/guard.py +293 -0
  72. tweek/skills/isolation.py +469 -0
  73. tweek/skills/scanner.py +715 -0
  74. tweek/vault/__init__.py +0 -1
  75. tweek/vault/cross_platform.py +12 -1
  76. tweek/vault/keychain.py +87 -29
  77. tweek-0.2.0.dist-info/METADATA +281 -0
  78. tweek-0.2.0.dist-info/RECORD +121 -0
  79. {tweek-0.1.0.dist-info → tweek-0.2.0.dist-info}/entry_points.txt +8 -1
  80. {tweek-0.1.0.dist-info → tweek-0.2.0.dist-info}/licenses/LICENSE +80 -0
  81. tweek/integrations/moltbot.py +0 -243
  82. tweek-0.1.0.dist-info/METADATA +0 -335
  83. tweek-0.1.0.dist-info/RECORD +0 -85
  84. {tweek-0.1.0.dist-info → tweek-0.2.0.dist-info}/WHEEL +0 -0
  85. {tweek-0.1.0.dist-info → tweek-0.2.0.dist-info}/top_level.txt +0 -0
@@ -5,11 +5,17 @@ Advanced security components for detecting and preventing attacks:
5
5
  - Rate limiting for resource theft protection
6
6
  - Session analysis for cross-turn anomaly detection
7
7
  - LLM-based secondary review for risky operations
8
+ - Language detection for non-English prompt injection
8
9
  """
9
10
 
10
11
  from tweek.security.rate_limiter import RateLimiter, RateLimitResult, RateLimitConfig
11
12
  from tweek.security.session_analyzer import SessionAnalyzer, SessionAnalysis
12
13
  from tweek.security.llm_reviewer import LLMReviewer, LLMReviewResult
14
+ from tweek.security.language import (
15
+ detect_non_english,
16
+ LanguageDetectionResult,
17
+ NonEnglishHandling,
18
+ )
13
19
 
14
20
  __all__ = [
15
21
  "RateLimiter",
@@ -19,4 +25,7 @@ __all__ = [
19
25
  "SessionAnalysis",
20
26
  "LLMReviewer",
21
27
  "LLMReviewResult",
28
+ "detect_non_english",
29
+ "LanguageDetectionResult",
30
+ "NonEnglishHandling",
22
31
  ]
@@ -0,0 +1,250 @@
1
+ """
2
+ Tweek Language Detection
3
+
4
+ Lightweight, zero-dependency detection of non-English natural language content.
5
+ Uses Unicode script analysis to identify when tool call content contains
6
+ non-English text that would bypass English-only regex patterns (prompt injection,
7
+ social engineering, etc.).
8
+
9
+ Technical shell commands (file paths, tool names, flags) are language-independent
10
+ and do not trigger detection.
11
+ """
12
+
13
+ import re
14
+ import unicodedata
15
+ from dataclasses import dataclass
16
+ from enum import Enum
17
+ from typing import Optional
18
+
19
+
20
+ class NonEnglishHandling(Enum):
21
+ """How to handle non-English content in the screening pipeline."""
22
+ ESCALATE = "escalate" # Auto-escalate tier to force LLM review (default)
23
+ TRANSLATE = "translate" # Translate to English before pattern matching
24
+ BOTH = "both" # Escalate AND translate
25
+ NONE = "none" # No special handling
26
+
27
+
28
+ @dataclass
29
+ class LanguageDetectionResult:
30
+ """Result of language detection on tool call content."""
31
+ has_non_english: bool
32
+ confidence: float # 0.0 - 1.0
33
+ detected_scripts: list # e.g. ["CJK", "CYRILLIC", "ARABIC"]
34
+ non_english_ratio: float # ratio of non-English characters to total
35
+ sample: Optional[str] = None # short sample of detected non-English text
36
+
37
+
38
+ # Unicode block ranges for non-Latin scripts commonly used in prompt injection
39
+ # We detect these because English-only regex patterns cannot match them
40
+ _SCRIPT_RANGES = {
41
+ "CJK": [
42
+ (0x4E00, 0x9FFF), # CJK Unified Ideographs
43
+ (0x3400, 0x4DBF), # CJK Extension A
44
+ (0x3000, 0x303F), # CJK Symbols and Punctuation
45
+ (0x3040, 0x309F), # Hiragana
46
+ (0x30A0, 0x30FF), # Katakana
47
+ (0xAC00, 0xD7AF), # Hangul Syllables
48
+ ],
49
+ "CYRILLIC": [
50
+ (0x0400, 0x04FF), # Cyrillic
51
+ (0x0500, 0x052F), # Cyrillic Supplement
52
+ ],
53
+ "ARABIC": [
54
+ (0x0600, 0x06FF), # Arabic
55
+ (0x0750, 0x077F), # Arabic Supplement
56
+ (0xFB50, 0xFDFF), # Arabic Presentation Forms-A
57
+ ],
58
+ "DEVANAGARI": [
59
+ (0x0900, 0x097F), # Devanagari
60
+ ],
61
+ "THAI": [
62
+ (0x0E00, 0x0E7F), # Thai
63
+ ],
64
+ "HEBREW": [
65
+ (0x0590, 0x05FF), # Hebrew
66
+ ],
67
+ "GREEK": [
68
+ (0x0370, 0x03FF), # Greek and Coptic
69
+ ],
70
+ "GEORGIAN": [
71
+ (0x10A0, 0x10FF), # Georgian
72
+ ],
73
+ "ARMENIAN": [
74
+ (0x0530, 0x058F), # Armenian
75
+ ],
76
+ "BENGALI": [
77
+ (0x0980, 0x09FF), # Bengali
78
+ ],
79
+ "TAMIL": [
80
+ (0x0B80, 0x0BFF), # Tamil
81
+ ],
82
+ "KOREAN": [
83
+ (0x1100, 0x11FF), # Hangul Jamo
84
+ ],
85
+ "ETHIOPIC": [
86
+ (0x1200, 0x137F), # Ethiopic
87
+ ],
88
+ }
89
+
90
+ # Extended Latin characters used in European languages (French, German, Spanish, etc.)
91
+ # These are harder to detect as "non-English" since they use Latin script,
92
+ # so we use word-level heuristics instead.
93
+ _EXTENDED_LATIN_RANGE = (0x00C0, 0x024F) # Latin Extended-A and B
94
+
95
+ # Common non-English European words that signal prompt injection in other languages.
96
+ # These are the translations of key injection phrases.
97
+ _NON_ENGLISH_INJECTION_KEYWORDS = [
98
+ # French
99
+ r"\b(ignorez|oubliez|annulez|remplacez)\s+(les\s+)?(instructions|directives|règles)",
100
+ r"\b(tu\s+es\s+maintenant|agis\s+comme|fais\s+semblant)",
101
+ r"\b(en\s+tant\s+qu[e'])\s*(admin|root|propriétaire)",
102
+ # German
103
+ r"\b(ignoriere|vergiss|überschreibe)\s+(die\s+)?(vorherigen|bisherigen)\s+(Anweisungen|Regeln|Instruktionen)",
104
+ r"\b(du\s+bist\s+jetzt|handle\s+als|tu\s+so\s+als)",
105
+ # Spanish
106
+ r"\b(ignora|olvida|anula)\s+(las\s+)?(instrucciones|directivas|reglas)\s+(anteriores|previas)",
107
+ r"\b(ahora\s+eres|actúa\s+como|finge\s+ser)",
108
+ # Portuguese
109
+ r"\b(ignore|esqueça|anule)\s+(as\s+)?(instruções|diretivas|regras)\s+(anteriores|prévias)",
110
+ r"\b(agora\s+você\s+é|atue\s+como|finja\s+ser)",
111
+ # Italian
112
+ r"\b(ignora|dimentica|sovrascrivi)\s+(le\s+)?(istruzioni|direttive|regole)\s+(precedenti|anteriori)",
113
+ r"\b(ora\s+sei|agisci\s+come|fingi\s+di\s+essere)",
114
+ # Russian (transliterated)
115
+ r"\b(ignoriruj|zabudj|otmeni)\s+(predydushchie|sistemnye)\s+(instrukcii|pravila)",
116
+ # Dutch
117
+ r"\b(negeer|vergeet)\s+(de\s+)?(vorige|eerdere)\s+(instructies|regels)",
118
+ # Chinese (pinyin patterns that might appear in mixed content)
119
+ r"\b(hūlüè|wúshì)\s+(zhǐlìng|guīzé)",
120
+ # Japanese (romaji)
121
+ r"\b(mushi\s+shite|aratana\s+yakuwari)",
122
+ ]
123
+
124
+ # Compile the keyword patterns
125
+ _COMPILED_KEYWORDS = [re.compile(p, re.IGNORECASE | re.UNICODE) for p in _NON_ENGLISH_INJECTION_KEYWORDS]
126
+
127
+ # Characters to exclude from analysis (technical syntax, punctuation, digits)
128
+ _TECHNICAL_CHARS = re.compile(r'[a-zA-Z0-9\s\-_./\\:;,!?@#$%^&*()+=\[\]{}<>|~`"\']+')
129
+
130
+
131
+ def _get_script(char: str) -> Optional[str]:
132
+ """Determine which non-Latin script a character belongs to."""
133
+ cp = ord(char)
134
+ for script_name, ranges in _SCRIPT_RANGES.items():
135
+ for start, end in ranges:
136
+ if start <= cp <= end:
137
+ return script_name
138
+ return None
139
+
140
+
141
+ def _has_extended_latin(text: str) -> bool:
142
+ """Check if text contains extended Latin characters (accented European)."""
143
+ for char in text:
144
+ cp = ord(char)
145
+ if _EXTENDED_LATIN_RANGE[0] <= cp <= _EXTENDED_LATIN_RANGE[1]:
146
+ return True
147
+ return False
148
+
149
+
150
+ def detect_non_english(content: str, min_confidence: float = 0.3) -> LanguageDetectionResult:
151
+ """
152
+ Detect non-English natural language in tool call content.
153
+
154
+ Uses Unicode script analysis for non-Latin scripts (CJK, Cyrillic, Arabic, etc.)
155
+ and keyword matching for Latin-script European languages.
156
+
157
+ Technical content (shell commands, file paths, flags) is ignored since it's
158
+ language-independent.
159
+
160
+ Args:
161
+ content: The tool call content to analyze
162
+ min_confidence: Minimum confidence threshold to report detection
163
+
164
+ Returns:
165
+ LanguageDetectionResult with detection details
166
+ """
167
+ if not content or len(content.strip()) < 3:
168
+ return LanguageDetectionResult(
169
+ has_non_english=False,
170
+ confidence=0.0,
171
+ detected_scripts=[],
172
+ non_english_ratio=0.0,
173
+ )
174
+
175
+ detected_scripts = set()
176
+ non_latin_count = 0
177
+ total_alpha = 0
178
+ sample_chars = []
179
+
180
+ # Pass 1: Unicode script detection (non-Latin scripts)
181
+ for char in content:
182
+ if char.isalpha():
183
+ total_alpha += 1
184
+ script = _get_script(char)
185
+ if script:
186
+ non_latin_count += 1
187
+ detected_scripts.add(script)
188
+ if len(sample_chars) < 30:
189
+ sample_chars.append(char)
190
+
191
+ # Calculate non-English ratio from non-Latin characters
192
+ non_english_ratio = non_latin_count / max(total_alpha, 1)
193
+
194
+ # If we found non-Latin scripts, that's a strong signal
195
+ if non_latin_count >= 5 and non_english_ratio >= 0.05:
196
+ confidence = min(1.0, non_english_ratio * 2 + 0.3)
197
+ sample = "".join(sample_chars) if sample_chars else None
198
+
199
+ if confidence >= min_confidence:
200
+ return LanguageDetectionResult(
201
+ has_non_english=True,
202
+ confidence=confidence,
203
+ detected_scripts=sorted(detected_scripts),
204
+ non_english_ratio=non_english_ratio,
205
+ sample=sample,
206
+ )
207
+
208
+ # Pass 2: Latin-script European language detection via keyword matching
209
+ # This catches French, German, Spanish, etc. prompt injection phrases
210
+ for pattern in _COMPILED_KEYWORDS:
211
+ match = pattern.search(content)
212
+ if match:
213
+ detected_scripts.add("LATIN_EUROPEAN")
214
+ sample = match.group(0)[:50]
215
+ return LanguageDetectionResult(
216
+ has_non_english=True,
217
+ confidence=0.8,
218
+ detected_scripts=sorted(detected_scripts),
219
+ non_english_ratio=0.0, # Can't easily compute for Latin scripts
220
+ sample=sample,
221
+ )
222
+
223
+ # Pass 3: Extended Latin character density check
224
+ # High density of accented characters suggests European language
225
+ if _has_extended_latin(content) and total_alpha > 10:
226
+ extended_count = sum(
227
+ 1 for c in content
228
+ if _EXTENDED_LATIN_RANGE[0] <= ord(c) <= _EXTENDED_LATIN_RANGE[1]
229
+ )
230
+ extended_ratio = extended_count / max(total_alpha, 1)
231
+
232
+ if extended_ratio >= 0.08: # 8%+ accented characters suggests non-English
233
+ detected_scripts.add("LATIN_EXTENDED")
234
+ confidence = min(1.0, extended_ratio * 5)
235
+
236
+ if confidence >= min_confidence:
237
+ return LanguageDetectionResult(
238
+ has_non_english=True,
239
+ confidence=confidence,
240
+ detected_scripts=sorted(detected_scripts),
241
+ non_english_ratio=extended_ratio,
242
+ )
243
+
244
+ # No significant non-English content detected
245
+ return LanguageDetectionResult(
246
+ has_non_english=False,
247
+ confidence=0.0,
248
+ detected_scripts=[],
249
+ non_english_ratio=0.0,
250
+ )