tweek 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tweek/__init__.py +16 -0
- tweek/cli.py +3390 -0
- tweek/cli_helpers.py +193 -0
- tweek/config/__init__.py +13 -0
- tweek/config/allowed_dirs.yaml +23 -0
- tweek/config/manager.py +1064 -0
- tweek/config/patterns.yaml +751 -0
- tweek/config/tiers.yaml +129 -0
- tweek/diagnostics.py +589 -0
- tweek/hooks/__init__.py +1 -0
- tweek/hooks/pre_tool_use.py +861 -0
- tweek/integrations/__init__.py +3 -0
- tweek/integrations/moltbot.py +243 -0
- tweek/licensing.py +398 -0
- tweek/logging/__init__.py +9 -0
- tweek/logging/bundle.py +350 -0
- tweek/logging/json_logger.py +150 -0
- tweek/logging/security_log.py +745 -0
- tweek/mcp/__init__.py +24 -0
- tweek/mcp/approval.py +456 -0
- tweek/mcp/approval_cli.py +356 -0
- tweek/mcp/clients/__init__.py +37 -0
- tweek/mcp/clients/chatgpt.py +112 -0
- tweek/mcp/clients/claude_desktop.py +203 -0
- tweek/mcp/clients/gemini.py +178 -0
- tweek/mcp/proxy.py +667 -0
- tweek/mcp/screening.py +175 -0
- tweek/mcp/server.py +317 -0
- tweek/platform/__init__.py +131 -0
- tweek/plugins/__init__.py +835 -0
- tweek/plugins/base.py +1080 -0
- tweek/plugins/compliance/__init__.py +30 -0
- tweek/plugins/compliance/gdpr.py +333 -0
- tweek/plugins/compliance/gov.py +324 -0
- tweek/plugins/compliance/hipaa.py +285 -0
- tweek/plugins/compliance/legal.py +322 -0
- tweek/plugins/compliance/pci.py +361 -0
- tweek/plugins/compliance/soc2.py +275 -0
- tweek/plugins/detectors/__init__.py +30 -0
- tweek/plugins/detectors/continue_dev.py +206 -0
- tweek/plugins/detectors/copilot.py +254 -0
- tweek/plugins/detectors/cursor.py +192 -0
- tweek/plugins/detectors/moltbot.py +205 -0
- tweek/plugins/detectors/windsurf.py +214 -0
- tweek/plugins/git_discovery.py +395 -0
- tweek/plugins/git_installer.py +491 -0
- tweek/plugins/git_lockfile.py +338 -0
- tweek/plugins/git_registry.py +503 -0
- tweek/plugins/git_security.py +482 -0
- tweek/plugins/providers/__init__.py +30 -0
- tweek/plugins/providers/anthropic.py +181 -0
- tweek/plugins/providers/azure_openai.py +289 -0
- tweek/plugins/providers/bedrock.py +248 -0
- tweek/plugins/providers/google.py +197 -0
- tweek/plugins/providers/openai.py +230 -0
- tweek/plugins/scope.py +130 -0
- tweek/plugins/screening/__init__.py +26 -0
- tweek/plugins/screening/llm_reviewer.py +149 -0
- tweek/plugins/screening/pattern_matcher.py +273 -0
- tweek/plugins/screening/rate_limiter.py +174 -0
- tweek/plugins/screening/session_analyzer.py +159 -0
- tweek/proxy/__init__.py +302 -0
- tweek/proxy/addon.py +223 -0
- tweek/proxy/interceptor.py +313 -0
- tweek/proxy/server.py +315 -0
- tweek/sandbox/__init__.py +71 -0
- tweek/sandbox/executor.py +382 -0
- tweek/sandbox/linux.py +278 -0
- tweek/sandbox/profile_generator.py +323 -0
- tweek/screening/__init__.py +13 -0
- tweek/screening/context.py +81 -0
- tweek/security/__init__.py +22 -0
- tweek/security/llm_reviewer.py +348 -0
- tweek/security/rate_limiter.py +682 -0
- tweek/security/secret_scanner.py +506 -0
- tweek/security/session_analyzer.py +600 -0
- tweek/vault/__init__.py +40 -0
- tweek/vault/cross_platform.py +251 -0
- tweek/vault/keychain.py +288 -0
- tweek-0.1.0.dist-info/METADATA +335 -0
- tweek-0.1.0.dist-info/RECORD +85 -0
- tweek-0.1.0.dist-info/WHEEL +5 -0
- tweek-0.1.0.dist-info/entry_points.txt +25 -0
- tweek-0.1.0.dist-info/licenses/LICENSE +190 -0
- tweek-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Tweek Compliance Plugins
|
|
4
|
+
|
|
5
|
+
Domain-specific compliance modules for detecting sensitive information:
|
|
6
|
+
- Gov: Government classification markings (TS, SECRET, CUI, etc.)
|
|
7
|
+
- HIPAA: Healthcare PHI and patient data
|
|
8
|
+
- PCI: Payment card industry data (credit cards, CVVs)
|
|
9
|
+
- Legal: Attorney-client privilege and confidentiality markers
|
|
10
|
+
- SOC2: Security and compliance patterns
|
|
11
|
+
- GDPR: EU personal data protection
|
|
12
|
+
|
|
13
|
+
These are ENTERPRISE tier plugins requiring appropriate licensing.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from tweek.plugins.compliance.gov import GovCompliancePlugin
|
|
17
|
+
from tweek.plugins.compliance.hipaa import HIPAACompliancePlugin
|
|
18
|
+
from tweek.plugins.compliance.pci import PCICompliancePlugin
|
|
19
|
+
from tweek.plugins.compliance.legal import LegalCompliancePlugin
|
|
20
|
+
from tweek.plugins.compliance.soc2 import SOC2CompliancePlugin
|
|
21
|
+
from tweek.plugins.compliance.gdpr import GDPRCompliancePlugin
|
|
22
|
+
|
|
23
|
+
__all__ = [
|
|
24
|
+
"GovCompliancePlugin",
|
|
25
|
+
"HIPAACompliancePlugin",
|
|
26
|
+
"PCICompliancePlugin",
|
|
27
|
+
"LegalCompliancePlugin",
|
|
28
|
+
"SOC2CompliancePlugin",
|
|
29
|
+
"GDPRCompliancePlugin",
|
|
30
|
+
]
|
|
@@ -0,0 +1,333 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Tweek GDPR Compliance Plugin
|
|
4
|
+
|
|
5
|
+
Detects patterns indicating GDPR-relevant personal data:
|
|
6
|
+
- Personal identifiers (names, emails, phone numbers)
|
|
7
|
+
- Special category data (health, biometric, genetic)
|
|
8
|
+
- Location data
|
|
9
|
+
- Online identifiers (IP addresses, cookies, device IDs)
|
|
10
|
+
- Data subject rights markers
|
|
11
|
+
- Cross-border transfer indicators
|
|
12
|
+
- Consent and legal basis references
|
|
13
|
+
|
|
14
|
+
GDPR Article 4 Categories:
|
|
15
|
+
- Personal Data (any data relating to an identified/identifiable person)
|
|
16
|
+
- Special Categories (Article 9 - sensitive data requiring explicit consent)
|
|
17
|
+
|
|
18
|
+
Supports bidirectional scanning:
|
|
19
|
+
- OUTPUT: Detect LLM generating personal data inappropriately
|
|
20
|
+
- INPUT: Detect personal data in incoming content for proper handling
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
from typing import Optional, List, Dict, Any
|
|
24
|
+
from tweek.plugins.base import (
|
|
25
|
+
CompliancePlugin,
|
|
26
|
+
ScanDirection,
|
|
27
|
+
ActionType,
|
|
28
|
+
Severity,
|
|
29
|
+
PatternDefinition,
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class GDPRCompliancePlugin(CompliancePlugin):
|
|
34
|
+
"""
|
|
35
|
+
GDPR compliance plugin.
|
|
36
|
+
|
|
37
|
+
Detects personal data patterns under GDPR, helping ensure
|
|
38
|
+
proper handling of EU residents' data.
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
VERSION = "1.0.0"
|
|
42
|
+
DESCRIPTION = "Detect GDPR-relevant personal data patterns"
|
|
43
|
+
AUTHOR = "Tweek"
|
|
44
|
+
REQUIRES_LICENSE = "enterprise"
|
|
45
|
+
TAGS = ["compliance", "gdpr", "privacy", "eu"]
|
|
46
|
+
|
|
47
|
+
def __init__(self, config: Optional[Dict[str, Any]] = None):
|
|
48
|
+
super().__init__(config)
|
|
49
|
+
self._patterns: Optional[List[PatternDefinition]] = None
|
|
50
|
+
|
|
51
|
+
@property
|
|
52
|
+
def name(self) -> str:
|
|
53
|
+
return "gdpr"
|
|
54
|
+
|
|
55
|
+
@property
|
|
56
|
+
def scan_direction(self) -> ScanDirection:
|
|
57
|
+
direction = self._config.get("scan_direction", "both")
|
|
58
|
+
return ScanDirection(direction)
|
|
59
|
+
|
|
60
|
+
def get_patterns(self) -> List[PatternDefinition]:
|
|
61
|
+
"""Return GDPR compliance patterns."""
|
|
62
|
+
if self._patterns is not None:
|
|
63
|
+
return self._patterns
|
|
64
|
+
|
|
65
|
+
self._patterns = [
|
|
66
|
+
# =================================================================
|
|
67
|
+
# Direct Personal Identifiers (Article 4(1))
|
|
68
|
+
# =================================================================
|
|
69
|
+
PatternDefinition(
|
|
70
|
+
name="eu_national_id",
|
|
71
|
+
regex=r"(?i)(?:national\s+id|passport\s+(?:no|number|#))[:\s]+[A-Z0-9]{6,12}",
|
|
72
|
+
severity=Severity.HIGH,
|
|
73
|
+
description="EU national ID or passport number",
|
|
74
|
+
default_action=ActionType.REDACT,
|
|
75
|
+
tags=["gdpr", "personal-data", "identifier"],
|
|
76
|
+
),
|
|
77
|
+
PatternDefinition(
|
|
78
|
+
name="eu_phone_number",
|
|
79
|
+
regex=r"\+(?:31|32|33|34|39|43|44|45|46|47|48|49)\s*\d[\d\s-]{8,}",
|
|
80
|
+
severity=Severity.MEDIUM,
|
|
81
|
+
description="EU phone number format",
|
|
82
|
+
default_action=ActionType.WARN,
|
|
83
|
+
tags=["gdpr", "personal-data", "contact"],
|
|
84
|
+
),
|
|
85
|
+
PatternDefinition(
|
|
86
|
+
name="eu_iban",
|
|
87
|
+
regex=r"\b[A-Z]{2}\d{2}\s*(?:[A-Z0-9]{4}\s*){4,7}[A-Z0-9]{0,3}\b",
|
|
88
|
+
severity=Severity.HIGH,
|
|
89
|
+
description="EU IBAN bank account number",
|
|
90
|
+
default_action=ActionType.REDACT,
|
|
91
|
+
tags=["gdpr", "personal-data", "financial"],
|
|
92
|
+
),
|
|
93
|
+
PatternDefinition(
|
|
94
|
+
name="eu_vat_number",
|
|
95
|
+
regex=r"\b(?:AT|BE|BG|CY|CZ|DE|DK|EE|EL|ES|FI|FR|HR|HU|IE|IT|LT|LU|LV|MT|NL|PL|PT|RO|SE|SI|SK)[A-Z0-9]{8,12}\b",
|
|
96
|
+
severity=Severity.MEDIUM,
|
|
97
|
+
description="EU VAT identification number",
|
|
98
|
+
default_action=ActionType.WARN,
|
|
99
|
+
tags=["gdpr", "personal-data", "business"],
|
|
100
|
+
),
|
|
101
|
+
|
|
102
|
+
# =================================================================
|
|
103
|
+
# Online Identifiers (Recital 30)
|
|
104
|
+
# =================================================================
|
|
105
|
+
PatternDefinition(
|
|
106
|
+
name="ipv4_address",
|
|
107
|
+
regex=r"\b(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\.(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\.(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\.(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\b",
|
|
108
|
+
severity=Severity.MEDIUM,
|
|
109
|
+
description="IPv4 address (online identifier)",
|
|
110
|
+
default_action=ActionType.WARN,
|
|
111
|
+
tags=["gdpr", "online-identifier"],
|
|
112
|
+
),
|
|
113
|
+
PatternDefinition(
|
|
114
|
+
name="ipv6_address",
|
|
115
|
+
regex=r"(?i)\b(?:[0-9a-f]{1,4}:){7}[0-9a-f]{1,4}\b|(?:[0-9a-f]{1,4}:){1,7}:|(?:[0-9a-f]{1,4}:){1,6}:[0-9a-f]{1,4}",
|
|
116
|
+
severity=Severity.MEDIUM,
|
|
117
|
+
description="IPv6 address (online identifier)",
|
|
118
|
+
default_action=ActionType.WARN,
|
|
119
|
+
tags=["gdpr", "online-identifier"],
|
|
120
|
+
),
|
|
121
|
+
PatternDefinition(
|
|
122
|
+
name="device_id",
|
|
123
|
+
regex=r"(?i)(?:device[_-]?id|imei|udid|idfa|gaid)[:\s=]+['\"]?[A-Za-z0-9-]{16,}['\"]?",
|
|
124
|
+
severity=Severity.MEDIUM,
|
|
125
|
+
description="Device identifier",
|
|
126
|
+
default_action=ActionType.WARN,
|
|
127
|
+
tags=["gdpr", "online-identifier"],
|
|
128
|
+
),
|
|
129
|
+
PatternDefinition(
|
|
130
|
+
name="cookie_identifier",
|
|
131
|
+
regex=r"(?i)(?:tracking[_-]?id|session[_-]?id|visitor[_-]?id)[:\s=]+['\"]?[A-Za-z0-9-]{16,}['\"]?",
|
|
132
|
+
severity=Severity.LOW,
|
|
133
|
+
description="Tracking cookie identifier",
|
|
134
|
+
default_action=ActionType.WARN,
|
|
135
|
+
tags=["gdpr", "online-identifier"],
|
|
136
|
+
),
|
|
137
|
+
|
|
138
|
+
# =================================================================
|
|
139
|
+
# Special Category Data (Article 9)
|
|
140
|
+
# =================================================================
|
|
141
|
+
PatternDefinition(
|
|
142
|
+
name="health_data",
|
|
143
|
+
regex=r"(?i)(?:diagnosis|medical\s+condition|treatment\s+for|prescribed)\s*:\s*\w+",
|
|
144
|
+
severity=Severity.CRITICAL,
|
|
145
|
+
description="Health data (Article 9 special category)",
|
|
146
|
+
default_action=ActionType.BLOCK,
|
|
147
|
+
tags=["gdpr", "special-category", "health"],
|
|
148
|
+
),
|
|
149
|
+
PatternDefinition(
|
|
150
|
+
name="biometric_data",
|
|
151
|
+
regex=r"(?i)(?:fingerprint|facial\s+recognition|iris\s+scan|biometric)[:\s]+[A-Za-z0-9+/=]{20,}",
|
|
152
|
+
severity=Severity.CRITICAL,
|
|
153
|
+
description="Biometric data (Article 9 special category)",
|
|
154
|
+
default_action=ActionType.BLOCK,
|
|
155
|
+
tags=["gdpr", "special-category", "biometric"],
|
|
156
|
+
),
|
|
157
|
+
PatternDefinition(
|
|
158
|
+
name="genetic_data",
|
|
159
|
+
regex=r"(?i)(?:dna|genetic|genome)\s+(?:sequence|data|profile|test)",
|
|
160
|
+
severity=Severity.CRITICAL,
|
|
161
|
+
description="Genetic data (Article 9 special category)",
|
|
162
|
+
default_action=ActionType.BLOCK,
|
|
163
|
+
tags=["gdpr", "special-category", "genetic"],
|
|
164
|
+
),
|
|
165
|
+
PatternDefinition(
|
|
166
|
+
name="political_opinion",
|
|
167
|
+
regex=r"(?i)(?:political\s+(?:party|affiliation|opinion)|voting\s+(?:record|preference))",
|
|
168
|
+
severity=Severity.HIGH,
|
|
169
|
+
description="Political opinion data (Article 9)",
|
|
170
|
+
default_action=ActionType.WARN,
|
|
171
|
+
tags=["gdpr", "special-category", "political"],
|
|
172
|
+
),
|
|
173
|
+
PatternDefinition(
|
|
174
|
+
name="religious_belief",
|
|
175
|
+
regex=r"(?i)(?:religious?\s+(?:belief|affiliation)|church\s+member)",
|
|
176
|
+
severity=Severity.HIGH,
|
|
177
|
+
description="Religious belief data (Article 9)",
|
|
178
|
+
default_action=ActionType.WARN,
|
|
179
|
+
tags=["gdpr", "special-category", "religious"],
|
|
180
|
+
),
|
|
181
|
+
PatternDefinition(
|
|
182
|
+
name="trade_union",
|
|
183
|
+
regex=r"(?i)(?:trade|labor)\s+union\s+(?:member|affiliation)",
|
|
184
|
+
severity=Severity.HIGH,
|
|
185
|
+
description="Trade union membership (Article 9)",
|
|
186
|
+
default_action=ActionType.WARN,
|
|
187
|
+
tags=["gdpr", "special-category", "union"],
|
|
188
|
+
),
|
|
189
|
+
PatternDefinition(
|
|
190
|
+
name="sexual_orientation",
|
|
191
|
+
regex=r"(?i)sexual\s+(?:orientation|preference)|gender\s+identity",
|
|
192
|
+
severity=Severity.HIGH,
|
|
193
|
+
description="Sexual orientation/identity data (Article 9)",
|
|
194
|
+
default_action=ActionType.WARN,
|
|
195
|
+
tags=["gdpr", "special-category", "sensitive"],
|
|
196
|
+
),
|
|
197
|
+
|
|
198
|
+
# =================================================================
|
|
199
|
+
# Location Data (Recital 30)
|
|
200
|
+
# =================================================================
|
|
201
|
+
PatternDefinition(
|
|
202
|
+
name="precise_location",
|
|
203
|
+
regex=r"(?i)(?:location|coordinates?|gps)[:\s]+[-+]?\d+\.?\d*[,\s]+[-+]?\d+\.?\d*",
|
|
204
|
+
severity=Severity.MEDIUM,
|
|
205
|
+
description="Precise location coordinates",
|
|
206
|
+
default_action=ActionType.WARN,
|
|
207
|
+
tags=["gdpr", "location"],
|
|
208
|
+
),
|
|
209
|
+
PatternDefinition(
|
|
210
|
+
name="home_address",
|
|
211
|
+
regex=r"(?i)(?:home|residential)\s+address[:\s]+.{10,}",
|
|
212
|
+
severity=Severity.HIGH,
|
|
213
|
+
description="Home/residential address",
|
|
214
|
+
default_action=ActionType.WARN,
|
|
215
|
+
tags=["gdpr", "personal-data", "address"],
|
|
216
|
+
),
|
|
217
|
+
|
|
218
|
+
# =================================================================
|
|
219
|
+
# Data Subject Rights (Chapter III)
|
|
220
|
+
# =================================================================
|
|
221
|
+
PatternDefinition(
|
|
222
|
+
name="data_subject_request",
|
|
223
|
+
regex=r"(?i)(?:subject\s+access|deletion|erasure|portability|rectification)\s+request",
|
|
224
|
+
severity=Severity.MEDIUM,
|
|
225
|
+
description="Data subject rights request",
|
|
226
|
+
default_action=ActionType.WARN,
|
|
227
|
+
tags=["gdpr", "data-subject-rights"],
|
|
228
|
+
),
|
|
229
|
+
PatternDefinition(
|
|
230
|
+
name="right_to_be_forgotten",
|
|
231
|
+
regex=r"(?i)right\s+to\s+(?:be\s+forgotten|erasure)|article\s+17",
|
|
232
|
+
severity=Severity.MEDIUM,
|
|
233
|
+
description="Right to be forgotten reference",
|
|
234
|
+
default_action=ActionType.WARN,
|
|
235
|
+
tags=["gdpr", "data-subject-rights", "erasure"],
|
|
236
|
+
),
|
|
237
|
+
|
|
238
|
+
# =================================================================
|
|
239
|
+
# Cross-Border Transfers (Chapter V)
|
|
240
|
+
# =================================================================
|
|
241
|
+
PatternDefinition(
|
|
242
|
+
name="cross_border_transfer",
|
|
243
|
+
regex=r"(?i)(?:transfer|export)\s+(?:to|outside)\s+(?:eu|eea|european)",
|
|
244
|
+
severity=Severity.MEDIUM,
|
|
245
|
+
description="Cross-border data transfer indicator",
|
|
246
|
+
default_action=ActionType.WARN,
|
|
247
|
+
tags=["gdpr", "transfer"],
|
|
248
|
+
),
|
|
249
|
+
PatternDefinition(
|
|
250
|
+
name="adequacy_decision",
|
|
251
|
+
regex=r"(?i)(?:adequacy\s+decision|standard\s+contractual\s+clauses|scc|bcr)",
|
|
252
|
+
severity=Severity.LOW,
|
|
253
|
+
description="Transfer mechanism reference",
|
|
254
|
+
default_action=ActionType.WARN,
|
|
255
|
+
tags=["gdpr", "transfer", "legal-basis"],
|
|
256
|
+
),
|
|
257
|
+
|
|
258
|
+
# =================================================================
|
|
259
|
+
# Consent and Legal Basis (Article 6/7)
|
|
260
|
+
# =================================================================
|
|
261
|
+
PatternDefinition(
|
|
262
|
+
name="consent_indicator",
|
|
263
|
+
regex=r"(?i)(?:consent\s+(?:withdrawn|revoked|given)|opt[_-]?out\s+requested)",
|
|
264
|
+
severity=Severity.MEDIUM,
|
|
265
|
+
description="Consent status indicator",
|
|
266
|
+
default_action=ActionType.WARN,
|
|
267
|
+
tags=["gdpr", "consent", "legal-basis"],
|
|
268
|
+
),
|
|
269
|
+
PatternDefinition(
|
|
270
|
+
name="legitimate_interest",
|
|
271
|
+
regex=r"(?i)legitimate\s+interest\s+(?:assessment|basis|applied)",
|
|
272
|
+
severity=Severity.LOW,
|
|
273
|
+
description="Legitimate interest reference",
|
|
274
|
+
default_action=ActionType.WARN,
|
|
275
|
+
tags=["gdpr", "legal-basis"],
|
|
276
|
+
),
|
|
277
|
+
|
|
278
|
+
# =================================================================
|
|
279
|
+
# Data Breach (Article 33/34)
|
|
280
|
+
# =================================================================
|
|
281
|
+
PatternDefinition(
|
|
282
|
+
name="personal_data_breach",
|
|
283
|
+
regex=r"(?i)personal\s+data\s+(?:breach|incident|exposure)",
|
|
284
|
+
severity=Severity.HIGH,
|
|
285
|
+
description="Personal data breach indicator",
|
|
286
|
+
default_action=ActionType.WARN,
|
|
287
|
+
tags=["gdpr", "breach", "incident"],
|
|
288
|
+
),
|
|
289
|
+
PatternDefinition(
|
|
290
|
+
name="dpa_notification",
|
|
291
|
+
regex=r"(?i)(?:dpa|supervisory\s+authority|data\s+protection\s+authority)\s+(?:notification|notified)",
|
|
292
|
+
severity=Severity.MEDIUM,
|
|
293
|
+
description="DPA notification reference",
|
|
294
|
+
default_action=ActionType.WARN,
|
|
295
|
+
tags=["gdpr", "breach", "regulatory"],
|
|
296
|
+
),
|
|
297
|
+
]
|
|
298
|
+
|
|
299
|
+
return self._patterns
|
|
300
|
+
|
|
301
|
+
def _format_message(
|
|
302
|
+
self,
|
|
303
|
+
findings: List,
|
|
304
|
+
direction: ScanDirection
|
|
305
|
+
) -> Optional[str]:
|
|
306
|
+
"""Format a GDPR-specific message."""
|
|
307
|
+
if not findings:
|
|
308
|
+
return None
|
|
309
|
+
|
|
310
|
+
# Group findings by GDPR category
|
|
311
|
+
special_category = [f for f in findings if "special-category" in f.metadata.get("pattern_tags", [])]
|
|
312
|
+
personal_data = [f for f in findings if "personal-data" in f.metadata.get("pattern_tags", [])]
|
|
313
|
+
online_id = [f for f in findings if "online-identifier" in f.metadata.get("pattern_tags", [])]
|
|
314
|
+
|
|
315
|
+
if direction == ScanDirection.OUTPUT:
|
|
316
|
+
lines = [
|
|
317
|
+
f"WARNING: LLM output contains {len(findings)} GDPR-relevant finding(s).",
|
|
318
|
+
"Personal data should not be generated/exposed without proper basis:"
|
|
319
|
+
]
|
|
320
|
+
else:
|
|
321
|
+
lines = [
|
|
322
|
+
f"ALERT: Input contains {len(findings)} GDPR-relevant finding(s).",
|
|
323
|
+
"Ensure proper legal basis and handling for personal data:"
|
|
324
|
+
]
|
|
325
|
+
|
|
326
|
+
if special_category:
|
|
327
|
+
lines.append(f" Article 9 Special Category: {len(special_category)} finding(s) - REQUIRES EXPLICIT CONSENT")
|
|
328
|
+
if personal_data:
|
|
329
|
+
lines.append(f" Personal Identifiers: {len(personal_data)} finding(s)")
|
|
330
|
+
if online_id:
|
|
331
|
+
lines.append(f" Online Identifiers: {len(online_id)} finding(s)")
|
|
332
|
+
|
|
333
|
+
return "\n".join(lines)
|
|
@@ -0,0 +1,324 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Tweek Government Classification Compliance Plugin
|
|
4
|
+
|
|
5
|
+
Detects and handles government classification markings:
|
|
6
|
+
- Classification levels (TOP SECRET, SECRET, CONFIDENTIAL)
|
|
7
|
+
- Portion markings ((TS), (S), (C))
|
|
8
|
+
- Handling caveats (NOFORN, ORCON, REL TO)
|
|
9
|
+
- Controlled Unclassified Information (CUI, FOUO)
|
|
10
|
+
|
|
11
|
+
Supports bidirectional scanning:
|
|
12
|
+
- OUTPUT: Detect hallucinated classification markings in LLM responses
|
|
13
|
+
- INPUT: Detect real classification markings in incoming data
|
|
14
|
+
|
|
15
|
+
IMPORTANT: This plugin detects MARKERS, not actual classified content.
|
|
16
|
+
It helps prevent:
|
|
17
|
+
1. LLMs hallucinating classification markings on unclassified content
|
|
18
|
+
2. Accidental exposure of marked content to LLMs
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from typing import Optional, List, Dict, Any
|
|
22
|
+
from tweek.plugins.base import (
|
|
23
|
+
CompliancePlugin,
|
|
24
|
+
ScanDirection,
|
|
25
|
+
ActionType,
|
|
26
|
+
Severity,
|
|
27
|
+
PatternDefinition,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class GovCompliancePlugin(CompliancePlugin):
|
|
32
|
+
"""
|
|
33
|
+
Government classification compliance plugin.
|
|
34
|
+
|
|
35
|
+
Detects classification markings, portion markings, and handling caveats
|
|
36
|
+
used in US government documents and communications.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
VERSION = "1.0.0"
|
|
40
|
+
DESCRIPTION = "Detect government classification markings and handling caveats"
|
|
41
|
+
AUTHOR = "Tweek"
|
|
42
|
+
REQUIRES_LICENSE = "enterprise"
|
|
43
|
+
TAGS = ["compliance", "government", "classification", "cui"]
|
|
44
|
+
|
|
45
|
+
def __init__(self, config: Optional[Dict[str, Any]] = None):
|
|
46
|
+
super().__init__(config)
|
|
47
|
+
self._patterns: Optional[List[PatternDefinition]] = None
|
|
48
|
+
|
|
49
|
+
@property
|
|
50
|
+
def name(self) -> str:
|
|
51
|
+
return "gov"
|
|
52
|
+
|
|
53
|
+
@property
|
|
54
|
+
def scan_direction(self) -> ScanDirection:
|
|
55
|
+
# Configurable, defaults to BOTH
|
|
56
|
+
direction = self._config.get("scan_direction", "both")
|
|
57
|
+
return ScanDirection(direction)
|
|
58
|
+
|
|
59
|
+
def get_patterns(self) -> List[PatternDefinition]:
|
|
60
|
+
"""Return government classification patterns."""
|
|
61
|
+
if self._patterns is not None:
|
|
62
|
+
return self._patterns
|
|
63
|
+
|
|
64
|
+
self._patterns = [
|
|
65
|
+
# =================================================================
|
|
66
|
+
# TOP SECRET Level
|
|
67
|
+
# =================================================================
|
|
68
|
+
PatternDefinition(
|
|
69
|
+
name="top_secret_banner",
|
|
70
|
+
regex=r"(?i)\bTOP\s*SECRET\b(?:\s*/\s*(?:SCI|TK|NOFORN|ORCON|REL\s+TO\s+[\w,\s]+))*",
|
|
71
|
+
severity=Severity.CRITICAL,
|
|
72
|
+
description="Top Secret classification marking",
|
|
73
|
+
default_action=ActionType.BLOCK,
|
|
74
|
+
tags=["classification", "top-secret"],
|
|
75
|
+
),
|
|
76
|
+
PatternDefinition(
|
|
77
|
+
name="ts_sci",
|
|
78
|
+
regex=r"(?i)\bTS\s*/\s*SCI\b",
|
|
79
|
+
severity=Severity.CRITICAL,
|
|
80
|
+
description="Top Secret/Sensitive Compartmented Information",
|
|
81
|
+
default_action=ActionType.BLOCK,
|
|
82
|
+
tags=["classification", "top-secret", "sci"],
|
|
83
|
+
),
|
|
84
|
+
PatternDefinition(
|
|
85
|
+
name="portion_marking_ts",
|
|
86
|
+
regex=r"\(TS(?:\s*/\s*[A-Z]+)*\)",
|
|
87
|
+
severity=Severity.CRITICAL,
|
|
88
|
+
description="Top Secret portion marking",
|
|
89
|
+
default_action=ActionType.BLOCK,
|
|
90
|
+
tags=["classification", "portion-marking", "top-secret"],
|
|
91
|
+
),
|
|
92
|
+
|
|
93
|
+
# =================================================================
|
|
94
|
+
# SECRET Level
|
|
95
|
+
# =================================================================
|
|
96
|
+
PatternDefinition(
|
|
97
|
+
name="secret_banner",
|
|
98
|
+
regex=r"(?i)(?<!/)\bSECRET\b(?!\s*/\s*(?:SERVICE|KEY|TOKEN|PASSWORD|API))",
|
|
99
|
+
severity=Severity.CRITICAL,
|
|
100
|
+
description="Secret classification marking",
|
|
101
|
+
default_action=ActionType.BLOCK,
|
|
102
|
+
tags=["classification", "secret"],
|
|
103
|
+
),
|
|
104
|
+
PatternDefinition(
|
|
105
|
+
name="portion_marking_s",
|
|
106
|
+
regex=r"\(S(?:\s*/\s*[A-Z]+)*\)(?!\s*[a-z])",
|
|
107
|
+
severity=Severity.CRITICAL,
|
|
108
|
+
description="Secret portion marking",
|
|
109
|
+
default_action=ActionType.BLOCK,
|
|
110
|
+
tags=["classification", "portion-marking", "secret"],
|
|
111
|
+
),
|
|
112
|
+
|
|
113
|
+
# =================================================================
|
|
114
|
+
# CONFIDENTIAL Level
|
|
115
|
+
# =================================================================
|
|
116
|
+
PatternDefinition(
|
|
117
|
+
name="confidential_banner",
|
|
118
|
+
regex=r"(?i)(?<!/)\bCONFIDENTIAL\b(?!\s+(?:INFORMATION|DATA|FILE|DOCUMENT))",
|
|
119
|
+
severity=Severity.HIGH,
|
|
120
|
+
description="Confidential classification marking",
|
|
121
|
+
default_action=ActionType.WARN,
|
|
122
|
+
tags=["classification", "confidential"],
|
|
123
|
+
),
|
|
124
|
+
PatternDefinition(
|
|
125
|
+
name="portion_marking_c",
|
|
126
|
+
regex=r"\(C(?:\s*/\s*[A-Z]+)*\)(?!\s*[a-z])",
|
|
127
|
+
severity=Severity.HIGH,
|
|
128
|
+
description="Confidential portion marking",
|
|
129
|
+
default_action=ActionType.WARN,
|
|
130
|
+
tags=["classification", "portion-marking", "confidential"],
|
|
131
|
+
),
|
|
132
|
+
|
|
133
|
+
# =================================================================
|
|
134
|
+
# Handling Caveats
|
|
135
|
+
# =================================================================
|
|
136
|
+
PatternDefinition(
|
|
137
|
+
name="noforn",
|
|
138
|
+
regex=r"(?i)\bNOFORN\b|NO\s*FOREIGN\s*(?:NATIONALS?|DISSEMINATION)",
|
|
139
|
+
severity=Severity.CRITICAL,
|
|
140
|
+
description="No Foreign Nationals handling caveat",
|
|
141
|
+
default_action=ActionType.BLOCK,
|
|
142
|
+
tags=["caveat", "noforn"],
|
|
143
|
+
),
|
|
144
|
+
PatternDefinition(
|
|
145
|
+
name="orcon",
|
|
146
|
+
regex=r"(?i)\bORCON\b|ORIGINATOR\s*CONTROLLED",
|
|
147
|
+
severity=Severity.HIGH,
|
|
148
|
+
description="Originator Controlled handling caveat",
|
|
149
|
+
default_action=ActionType.WARN,
|
|
150
|
+
tags=["caveat", "orcon"],
|
|
151
|
+
),
|
|
152
|
+
PatternDefinition(
|
|
153
|
+
name="rel_to",
|
|
154
|
+
regex=r"(?i)REL(?:EASABLE)?\s*TO\s+(?:USA|FVEY|[\w,\s]+)",
|
|
155
|
+
severity=Severity.HIGH,
|
|
156
|
+
description="Releasable To specific countries",
|
|
157
|
+
default_action=ActionType.WARN,
|
|
158
|
+
tags=["caveat", "rel-to"],
|
|
159
|
+
),
|
|
160
|
+
PatternDefinition(
|
|
161
|
+
name="fvey",
|
|
162
|
+
regex=r"(?i)\bFVEY\b|FIVE\s*EYES",
|
|
163
|
+
severity=Severity.HIGH,
|
|
164
|
+
description="Five Eyes intelligence sharing",
|
|
165
|
+
default_action=ActionType.WARN,
|
|
166
|
+
tags=["caveat", "fvey"],
|
|
167
|
+
),
|
|
168
|
+
PatternDefinition(
|
|
169
|
+
name="wnintel",
|
|
170
|
+
regex=r"(?i)\bWNINTEL\b|WARNING\s*NOTICE",
|
|
171
|
+
severity=Severity.HIGH,
|
|
172
|
+
description="Warning Notice Intelligence Sources",
|
|
173
|
+
default_action=ActionType.WARN,
|
|
174
|
+
tags=["caveat", "wnintel"],
|
|
175
|
+
),
|
|
176
|
+
PatternDefinition(
|
|
177
|
+
name="propin",
|
|
178
|
+
regex=r"(?i)\bPROPIN\b|PROPRIETARY\s*INFORMATION",
|
|
179
|
+
severity=Severity.MEDIUM,
|
|
180
|
+
description="Proprietary Information caveat",
|
|
181
|
+
default_action=ActionType.WARN,
|
|
182
|
+
tags=["caveat", "propin"],
|
|
183
|
+
),
|
|
184
|
+
|
|
185
|
+
# =================================================================
|
|
186
|
+
# Controlled Unclassified Information (CUI)
|
|
187
|
+
# =================================================================
|
|
188
|
+
PatternDefinition(
|
|
189
|
+
name="cui",
|
|
190
|
+
regex=r"(?i)\bCUI\b(?!\s*(?:BASIC|SPECIFIED))|\bCONTROLLED\s+UNCLASSIFIED\s+INFORMATION\b",
|
|
191
|
+
severity=Severity.MEDIUM,
|
|
192
|
+
description="Controlled Unclassified Information",
|
|
193
|
+
default_action=ActionType.WARN,
|
|
194
|
+
tags=["cui"],
|
|
195
|
+
),
|
|
196
|
+
PatternDefinition(
|
|
197
|
+
name="cui_specified",
|
|
198
|
+
regex=r"(?i)CUI\s*//?\s*(?:SP|SPECIFIED)",
|
|
199
|
+
severity=Severity.HIGH,
|
|
200
|
+
description="CUI Specified (higher protection)",
|
|
201
|
+
default_action=ActionType.WARN,
|
|
202
|
+
tags=["cui", "specified"],
|
|
203
|
+
),
|
|
204
|
+
PatternDefinition(
|
|
205
|
+
name="fouo",
|
|
206
|
+
regex=r"(?i)\bFOUO\b|FOR\s+OFFICIAL\s+USE\s+ONLY",
|
|
207
|
+
severity=Severity.MEDIUM,
|
|
208
|
+
description="For Official Use Only (legacy CUI)",
|
|
209
|
+
default_action=ActionType.WARN,
|
|
210
|
+
tags=["cui", "fouo", "legacy"],
|
|
211
|
+
),
|
|
212
|
+
PatternDefinition(
|
|
213
|
+
name="law_enforcement_sensitive",
|
|
214
|
+
regex=r"(?i)\bLES\b(?:\s*/\s*[A-Z]+)*|LAW\s+ENFORCEMENT\s+SENSITIVE",
|
|
215
|
+
severity=Severity.MEDIUM,
|
|
216
|
+
description="Law Enforcement Sensitive",
|
|
217
|
+
default_action=ActionType.WARN,
|
|
218
|
+
tags=["cui", "les"],
|
|
219
|
+
),
|
|
220
|
+
|
|
221
|
+
# =================================================================
|
|
222
|
+
# Classification Headers/Footers
|
|
223
|
+
# =================================================================
|
|
224
|
+
PatternDefinition(
|
|
225
|
+
name="classification_header",
|
|
226
|
+
regex=r"^(?:UNCLASSIFIED|CONFIDENTIAL|SECRET|TOP\s*SECRET)(?:\s*//\s*[A-Z/\s]+)?$",
|
|
227
|
+
severity=Severity.HIGH,
|
|
228
|
+
description="Classification header/footer line",
|
|
229
|
+
default_action=ActionType.WARN,
|
|
230
|
+
tags=["classification", "header"],
|
|
231
|
+
),
|
|
232
|
+
PatternDefinition(
|
|
233
|
+
name="classification_banner_line",
|
|
234
|
+
regex=r"[-=]{3,}\s*(?:UNCLASSIFIED|CONFIDENTIAL|SECRET|TOP\s*SECRET)\s*[-=]{3,}",
|
|
235
|
+
severity=Severity.HIGH,
|
|
236
|
+
description="Classification banner line",
|
|
237
|
+
default_action=ActionType.WARN,
|
|
238
|
+
tags=["classification", "banner"],
|
|
239
|
+
),
|
|
240
|
+
|
|
241
|
+
# =================================================================
|
|
242
|
+
# Special Programs
|
|
243
|
+
# =================================================================
|
|
244
|
+
PatternDefinition(
|
|
245
|
+
name="sap_marker",
|
|
246
|
+
regex=r"(?i)\bSAP\b(?:\s*/\s*[A-Z]+)*|SPECIAL\s+ACCESS\s+PROGRAM",
|
|
247
|
+
severity=Severity.CRITICAL,
|
|
248
|
+
description="Special Access Program marker",
|
|
249
|
+
default_action=ActionType.BLOCK,
|
|
250
|
+
tags=["classification", "sap"],
|
|
251
|
+
),
|
|
252
|
+
PatternDefinition(
|
|
253
|
+
name="waived_sap",
|
|
254
|
+
regex=r"(?i)WAIVED\s+SAP|UNACKNOWLEDGED\s+SAP",
|
|
255
|
+
severity=Severity.CRITICAL,
|
|
256
|
+
description="Waived/Unacknowledged SAP reference",
|
|
257
|
+
default_action=ActionType.BLOCK,
|
|
258
|
+
tags=["classification", "sap"],
|
|
259
|
+
),
|
|
260
|
+
|
|
261
|
+
# =================================================================
|
|
262
|
+
# NATO Classifications
|
|
263
|
+
# =================================================================
|
|
264
|
+
PatternDefinition(
|
|
265
|
+
name="nato_classification",
|
|
266
|
+
regex=r"(?i)NATO\s+(?:UNCLASSIFIED|RESTRICTED|CONFIDENTIAL|SECRET|COSMIC\s+TOP\s+SECRET)",
|
|
267
|
+
severity=Severity.HIGH,
|
|
268
|
+
description="NATO classification marking",
|
|
269
|
+
default_action=ActionType.WARN,
|
|
270
|
+
tags=["classification", "nato"],
|
|
271
|
+
),
|
|
272
|
+
|
|
273
|
+
# =================================================================
|
|
274
|
+
# Declassification Markings
|
|
275
|
+
# =================================================================
|
|
276
|
+
PatternDefinition(
|
|
277
|
+
name="declassification_date",
|
|
278
|
+
regex=r"(?i)DECLAS(?:SIFY)?(?:\s+ON)?:\s*\d{4}[-/]\d{2}[-/]\d{2}",
|
|
279
|
+
severity=Severity.MEDIUM,
|
|
280
|
+
description="Declassification date marking",
|
|
281
|
+
default_action=ActionType.WARN,
|
|
282
|
+
tags=["classification", "declassification"],
|
|
283
|
+
),
|
|
284
|
+
PatternDefinition(
|
|
285
|
+
name="classified_by",
|
|
286
|
+
regex=r"(?i)CLASSIFIED\s+BY:\s*[\w\s]+",
|
|
287
|
+
severity=Severity.MEDIUM,
|
|
288
|
+
description="Classified By line",
|
|
289
|
+
default_action=ActionType.WARN,
|
|
290
|
+
tags=["classification", "attribution"],
|
|
291
|
+
),
|
|
292
|
+
PatternDefinition(
|
|
293
|
+
name="derived_from",
|
|
294
|
+
regex=r"(?i)DERIVED\s+FROM:\s*[\w\s]+",
|
|
295
|
+
severity=Severity.MEDIUM,
|
|
296
|
+
description="Derived From line",
|
|
297
|
+
default_action=ActionType.WARN,
|
|
298
|
+
tags=["classification", "attribution"],
|
|
299
|
+
),
|
|
300
|
+
]
|
|
301
|
+
|
|
302
|
+
return self._patterns
|
|
303
|
+
|
|
304
|
+
def _format_message(
|
|
305
|
+
self,
|
|
306
|
+
findings: List,
|
|
307
|
+
direction: ScanDirection
|
|
308
|
+
) -> Optional[str]:
|
|
309
|
+
"""Format a government-specific message."""
|
|
310
|
+
if not findings:
|
|
311
|
+
return None
|
|
312
|
+
|
|
313
|
+
if direction == ScanDirection.OUTPUT:
|
|
314
|
+
return (
|
|
315
|
+
f"WARNING: LLM output contains {len(findings)} classification marking(s).\n"
|
|
316
|
+
"These are likely HALLUCINATED and do not indicate actual classified content.\n"
|
|
317
|
+
"Do NOT treat this content as classified - verify with proper authorities."
|
|
318
|
+
)
|
|
319
|
+
else:
|
|
320
|
+
return (
|
|
321
|
+
f"ALERT: Input contains {len(findings)} classification marking(s).\n"
|
|
322
|
+
"If this is actual classified material, it should NOT be processed by this system.\n"
|
|
323
|
+
"Verify proper handling procedures and need-to-know."
|
|
324
|
+
)
|