tweek 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. tweek/__init__.py +16 -0
  2. tweek/cli.py +3390 -0
  3. tweek/cli_helpers.py +193 -0
  4. tweek/config/__init__.py +13 -0
  5. tweek/config/allowed_dirs.yaml +23 -0
  6. tweek/config/manager.py +1064 -0
  7. tweek/config/patterns.yaml +751 -0
  8. tweek/config/tiers.yaml +129 -0
  9. tweek/diagnostics.py +589 -0
  10. tweek/hooks/__init__.py +1 -0
  11. tweek/hooks/pre_tool_use.py +861 -0
  12. tweek/integrations/__init__.py +3 -0
  13. tweek/integrations/moltbot.py +243 -0
  14. tweek/licensing.py +398 -0
  15. tweek/logging/__init__.py +9 -0
  16. tweek/logging/bundle.py +350 -0
  17. tweek/logging/json_logger.py +150 -0
  18. tweek/logging/security_log.py +745 -0
  19. tweek/mcp/__init__.py +24 -0
  20. tweek/mcp/approval.py +456 -0
  21. tweek/mcp/approval_cli.py +356 -0
  22. tweek/mcp/clients/__init__.py +37 -0
  23. tweek/mcp/clients/chatgpt.py +112 -0
  24. tweek/mcp/clients/claude_desktop.py +203 -0
  25. tweek/mcp/clients/gemini.py +178 -0
  26. tweek/mcp/proxy.py +667 -0
  27. tweek/mcp/screening.py +175 -0
  28. tweek/mcp/server.py +317 -0
  29. tweek/platform/__init__.py +131 -0
  30. tweek/plugins/__init__.py +835 -0
  31. tweek/plugins/base.py +1080 -0
  32. tweek/plugins/compliance/__init__.py +30 -0
  33. tweek/plugins/compliance/gdpr.py +333 -0
  34. tweek/plugins/compliance/gov.py +324 -0
  35. tweek/plugins/compliance/hipaa.py +285 -0
  36. tweek/plugins/compliance/legal.py +322 -0
  37. tweek/plugins/compliance/pci.py +361 -0
  38. tweek/plugins/compliance/soc2.py +275 -0
  39. tweek/plugins/detectors/__init__.py +30 -0
  40. tweek/plugins/detectors/continue_dev.py +206 -0
  41. tweek/plugins/detectors/copilot.py +254 -0
  42. tweek/plugins/detectors/cursor.py +192 -0
  43. tweek/plugins/detectors/moltbot.py +205 -0
  44. tweek/plugins/detectors/windsurf.py +214 -0
  45. tweek/plugins/git_discovery.py +395 -0
  46. tweek/plugins/git_installer.py +491 -0
  47. tweek/plugins/git_lockfile.py +338 -0
  48. tweek/plugins/git_registry.py +503 -0
  49. tweek/plugins/git_security.py +482 -0
  50. tweek/plugins/providers/__init__.py +30 -0
  51. tweek/plugins/providers/anthropic.py +181 -0
  52. tweek/plugins/providers/azure_openai.py +289 -0
  53. tweek/plugins/providers/bedrock.py +248 -0
  54. tweek/plugins/providers/google.py +197 -0
  55. tweek/plugins/providers/openai.py +230 -0
  56. tweek/plugins/scope.py +130 -0
  57. tweek/plugins/screening/__init__.py +26 -0
  58. tweek/plugins/screening/llm_reviewer.py +149 -0
  59. tweek/plugins/screening/pattern_matcher.py +273 -0
  60. tweek/plugins/screening/rate_limiter.py +174 -0
  61. tweek/plugins/screening/session_analyzer.py +159 -0
  62. tweek/proxy/__init__.py +302 -0
  63. tweek/proxy/addon.py +223 -0
  64. tweek/proxy/interceptor.py +313 -0
  65. tweek/proxy/server.py +315 -0
  66. tweek/sandbox/__init__.py +71 -0
  67. tweek/sandbox/executor.py +382 -0
  68. tweek/sandbox/linux.py +278 -0
  69. tweek/sandbox/profile_generator.py +323 -0
  70. tweek/screening/__init__.py +13 -0
  71. tweek/screening/context.py +81 -0
  72. tweek/security/__init__.py +22 -0
  73. tweek/security/llm_reviewer.py +348 -0
  74. tweek/security/rate_limiter.py +682 -0
  75. tweek/security/secret_scanner.py +506 -0
  76. tweek/security/session_analyzer.py +600 -0
  77. tweek/vault/__init__.py +40 -0
  78. tweek/vault/cross_platform.py +251 -0
  79. tweek/vault/keychain.py +288 -0
  80. tweek-0.1.0.dist-info/METADATA +335 -0
  81. tweek-0.1.0.dist-info/RECORD +85 -0
  82. tweek-0.1.0.dist-info/WHEEL +5 -0
  83. tweek-0.1.0.dist-info/entry_points.txt +25 -0
  84. tweek-0.1.0.dist-info/licenses/LICENSE +190 -0
  85. tweek-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,30 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Tweek Compliance Plugins
4
+
5
+ Domain-specific compliance modules for detecting sensitive information:
6
+ - Gov: Government classification markings (TS, SECRET, CUI, etc.)
7
+ - HIPAA: Healthcare PHI and patient data
8
+ - PCI: Payment card industry data (credit cards, CVVs)
9
+ - Legal: Attorney-client privilege and confidentiality markers
10
+ - SOC2: Security and compliance patterns
11
+ - GDPR: EU personal data protection
12
+
13
+ These are ENTERPRISE tier plugins requiring appropriate licensing.
14
+ """
15
+
16
+ from tweek.plugins.compliance.gov import GovCompliancePlugin
17
+ from tweek.plugins.compliance.hipaa import HIPAACompliancePlugin
18
+ from tweek.plugins.compliance.pci import PCICompliancePlugin
19
+ from tweek.plugins.compliance.legal import LegalCompliancePlugin
20
+ from tweek.plugins.compliance.soc2 import SOC2CompliancePlugin
21
+ from tweek.plugins.compliance.gdpr import GDPRCompliancePlugin
22
+
23
+ __all__ = [
24
+ "GovCompliancePlugin",
25
+ "HIPAACompliancePlugin",
26
+ "PCICompliancePlugin",
27
+ "LegalCompliancePlugin",
28
+ "SOC2CompliancePlugin",
29
+ "GDPRCompliancePlugin",
30
+ ]
@@ -0,0 +1,333 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Tweek GDPR Compliance Plugin
4
+
5
+ Detects patterns indicating GDPR-relevant personal data:
6
+ - Personal identifiers (names, emails, phone numbers)
7
+ - Special category data (health, biometric, genetic)
8
+ - Location data
9
+ - Online identifiers (IP addresses, cookies, device IDs)
10
+ - Data subject rights markers
11
+ - Cross-border transfer indicators
12
+ - Consent and legal basis references
13
+
14
+ GDPR Article 4 Categories:
15
+ - Personal Data (any data relating to an identified/identifiable person)
16
+ - Special Categories (Article 9 - sensitive data requiring explicit consent)
17
+
18
+ Supports bidirectional scanning:
19
+ - OUTPUT: Detect LLM generating personal data inappropriately
20
+ - INPUT: Detect personal data in incoming content for proper handling
21
+ """
22
+
23
+ from typing import Optional, List, Dict, Any
24
+ from tweek.plugins.base import (
25
+ CompliancePlugin,
26
+ ScanDirection,
27
+ ActionType,
28
+ Severity,
29
+ PatternDefinition,
30
+ )
31
+
32
+
33
+ class GDPRCompliancePlugin(CompliancePlugin):
34
+ """
35
+ GDPR compliance plugin.
36
+
37
+ Detects personal data patterns under GDPR, helping ensure
38
+ proper handling of EU residents' data.
39
+ """
40
+
41
+ VERSION = "1.0.0"
42
+ DESCRIPTION = "Detect GDPR-relevant personal data patterns"
43
+ AUTHOR = "Tweek"
44
+ REQUIRES_LICENSE = "enterprise"
45
+ TAGS = ["compliance", "gdpr", "privacy", "eu"]
46
+
47
+ def __init__(self, config: Optional[Dict[str, Any]] = None):
48
+ super().__init__(config)
49
+ self._patterns: Optional[List[PatternDefinition]] = None
50
+
51
+ @property
52
+ def name(self) -> str:
53
+ return "gdpr"
54
+
55
+ @property
56
+ def scan_direction(self) -> ScanDirection:
57
+ direction = self._config.get("scan_direction", "both")
58
+ return ScanDirection(direction)
59
+
60
+ def get_patterns(self) -> List[PatternDefinition]:
61
+ """Return GDPR compliance patterns."""
62
+ if self._patterns is not None:
63
+ return self._patterns
64
+
65
+ self._patterns = [
66
+ # =================================================================
67
+ # Direct Personal Identifiers (Article 4(1))
68
+ # =================================================================
69
+ PatternDefinition(
70
+ name="eu_national_id",
71
+ regex=r"(?i)(?:national\s+id|passport\s+(?:no|number|#))[:\s]+[A-Z0-9]{6,12}",
72
+ severity=Severity.HIGH,
73
+ description="EU national ID or passport number",
74
+ default_action=ActionType.REDACT,
75
+ tags=["gdpr", "personal-data", "identifier"],
76
+ ),
77
+ PatternDefinition(
78
+ name="eu_phone_number",
79
+ regex=r"\+(?:31|32|33|34|39|43|44|45|46|47|48|49)\s*\d[\d\s-]{8,}",
80
+ severity=Severity.MEDIUM,
81
+ description="EU phone number format",
82
+ default_action=ActionType.WARN,
83
+ tags=["gdpr", "personal-data", "contact"],
84
+ ),
85
+ PatternDefinition(
86
+ name="eu_iban",
87
+ regex=r"\b[A-Z]{2}\d{2}\s*(?:[A-Z0-9]{4}\s*){4,7}[A-Z0-9]{0,3}\b",
88
+ severity=Severity.HIGH,
89
+ description="EU IBAN bank account number",
90
+ default_action=ActionType.REDACT,
91
+ tags=["gdpr", "personal-data", "financial"],
92
+ ),
93
+ PatternDefinition(
94
+ name="eu_vat_number",
95
+ regex=r"\b(?:AT|BE|BG|CY|CZ|DE|DK|EE|EL|ES|FI|FR|HR|HU|IE|IT|LT|LU|LV|MT|NL|PL|PT|RO|SE|SI|SK)[A-Z0-9]{8,12}\b",
96
+ severity=Severity.MEDIUM,
97
+ description="EU VAT identification number",
98
+ default_action=ActionType.WARN,
99
+ tags=["gdpr", "personal-data", "business"],
100
+ ),
101
+
102
+ # =================================================================
103
+ # Online Identifiers (Recital 30)
104
+ # =================================================================
105
+ PatternDefinition(
106
+ name="ipv4_address",
107
+ regex=r"\b(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\.(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\.(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\.(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\b",
108
+ severity=Severity.MEDIUM,
109
+ description="IPv4 address (online identifier)",
110
+ default_action=ActionType.WARN,
111
+ tags=["gdpr", "online-identifier"],
112
+ ),
113
+ PatternDefinition(
114
+ name="ipv6_address",
115
+ regex=r"(?i)\b(?:[0-9a-f]{1,4}:){7}[0-9a-f]{1,4}\b|(?:[0-9a-f]{1,4}:){1,7}:|(?:[0-9a-f]{1,4}:){1,6}:[0-9a-f]{1,4}",
116
+ severity=Severity.MEDIUM,
117
+ description="IPv6 address (online identifier)",
118
+ default_action=ActionType.WARN,
119
+ tags=["gdpr", "online-identifier"],
120
+ ),
121
+ PatternDefinition(
122
+ name="device_id",
123
+ regex=r"(?i)(?:device[_-]?id|imei|udid|idfa|gaid)[:\s=]+['\"]?[A-Za-z0-9-]{16,}['\"]?",
124
+ severity=Severity.MEDIUM,
125
+ description="Device identifier",
126
+ default_action=ActionType.WARN,
127
+ tags=["gdpr", "online-identifier"],
128
+ ),
129
+ PatternDefinition(
130
+ name="cookie_identifier",
131
+ regex=r"(?i)(?:tracking[_-]?id|session[_-]?id|visitor[_-]?id)[:\s=]+['\"]?[A-Za-z0-9-]{16,}['\"]?",
132
+ severity=Severity.LOW,
133
+ description="Tracking cookie identifier",
134
+ default_action=ActionType.WARN,
135
+ tags=["gdpr", "online-identifier"],
136
+ ),
137
+
138
+ # =================================================================
139
+ # Special Category Data (Article 9)
140
+ # =================================================================
141
+ PatternDefinition(
142
+ name="health_data",
143
+ regex=r"(?i)(?:diagnosis|medical\s+condition|treatment\s+for|prescribed)\s*:\s*\w+",
144
+ severity=Severity.CRITICAL,
145
+ description="Health data (Article 9 special category)",
146
+ default_action=ActionType.BLOCK,
147
+ tags=["gdpr", "special-category", "health"],
148
+ ),
149
+ PatternDefinition(
150
+ name="biometric_data",
151
+ regex=r"(?i)(?:fingerprint|facial\s+recognition|iris\s+scan|biometric)[:\s]+[A-Za-z0-9+/=]{20,}",
152
+ severity=Severity.CRITICAL,
153
+ description="Biometric data (Article 9 special category)",
154
+ default_action=ActionType.BLOCK,
155
+ tags=["gdpr", "special-category", "biometric"],
156
+ ),
157
+ PatternDefinition(
158
+ name="genetic_data",
159
+ regex=r"(?i)(?:dna|genetic|genome)\s+(?:sequence|data|profile|test)",
160
+ severity=Severity.CRITICAL,
161
+ description="Genetic data (Article 9 special category)",
162
+ default_action=ActionType.BLOCK,
163
+ tags=["gdpr", "special-category", "genetic"],
164
+ ),
165
+ PatternDefinition(
166
+ name="political_opinion",
167
+ regex=r"(?i)(?:political\s+(?:party|affiliation|opinion)|voting\s+(?:record|preference))",
168
+ severity=Severity.HIGH,
169
+ description="Political opinion data (Article 9)",
170
+ default_action=ActionType.WARN,
171
+ tags=["gdpr", "special-category", "political"],
172
+ ),
173
+ PatternDefinition(
174
+ name="religious_belief",
175
+ regex=r"(?i)(?:religious?\s+(?:belief|affiliation)|church\s+member)",
176
+ severity=Severity.HIGH,
177
+ description="Religious belief data (Article 9)",
178
+ default_action=ActionType.WARN,
179
+ tags=["gdpr", "special-category", "religious"],
180
+ ),
181
+ PatternDefinition(
182
+ name="trade_union",
183
+ regex=r"(?i)(?:trade|labor)\s+union\s+(?:member|affiliation)",
184
+ severity=Severity.HIGH,
185
+ description="Trade union membership (Article 9)",
186
+ default_action=ActionType.WARN,
187
+ tags=["gdpr", "special-category", "union"],
188
+ ),
189
+ PatternDefinition(
190
+ name="sexual_orientation",
191
+ regex=r"(?i)sexual\s+(?:orientation|preference)|gender\s+identity",
192
+ severity=Severity.HIGH,
193
+ description="Sexual orientation/identity data (Article 9)",
194
+ default_action=ActionType.WARN,
195
+ tags=["gdpr", "special-category", "sensitive"],
196
+ ),
197
+
198
+ # =================================================================
199
+ # Location Data (Recital 30)
200
+ # =================================================================
201
+ PatternDefinition(
202
+ name="precise_location",
203
+ regex=r"(?i)(?:location|coordinates?|gps)[:\s]+[-+]?\d+\.?\d*[,\s]+[-+]?\d+\.?\d*",
204
+ severity=Severity.MEDIUM,
205
+ description="Precise location coordinates",
206
+ default_action=ActionType.WARN,
207
+ tags=["gdpr", "location"],
208
+ ),
209
+ PatternDefinition(
210
+ name="home_address",
211
+ regex=r"(?i)(?:home|residential)\s+address[:\s]+.{10,}",
212
+ severity=Severity.HIGH,
213
+ description="Home/residential address",
214
+ default_action=ActionType.WARN,
215
+ tags=["gdpr", "personal-data", "address"],
216
+ ),
217
+
218
+ # =================================================================
219
+ # Data Subject Rights (Chapter III)
220
+ # =================================================================
221
+ PatternDefinition(
222
+ name="data_subject_request",
223
+ regex=r"(?i)(?:subject\s+access|deletion|erasure|portability|rectification)\s+request",
224
+ severity=Severity.MEDIUM,
225
+ description="Data subject rights request",
226
+ default_action=ActionType.WARN,
227
+ tags=["gdpr", "data-subject-rights"],
228
+ ),
229
+ PatternDefinition(
230
+ name="right_to_be_forgotten",
231
+ regex=r"(?i)right\s+to\s+(?:be\s+forgotten|erasure)|article\s+17",
232
+ severity=Severity.MEDIUM,
233
+ description="Right to be forgotten reference",
234
+ default_action=ActionType.WARN,
235
+ tags=["gdpr", "data-subject-rights", "erasure"],
236
+ ),
237
+
238
+ # =================================================================
239
+ # Cross-Border Transfers (Chapter V)
240
+ # =================================================================
241
+ PatternDefinition(
242
+ name="cross_border_transfer",
243
+ regex=r"(?i)(?:transfer|export)\s+(?:to|outside)\s+(?:eu|eea|european)",
244
+ severity=Severity.MEDIUM,
245
+ description="Cross-border data transfer indicator",
246
+ default_action=ActionType.WARN,
247
+ tags=["gdpr", "transfer"],
248
+ ),
249
+ PatternDefinition(
250
+ name="adequacy_decision",
251
+ regex=r"(?i)(?:adequacy\s+decision|standard\s+contractual\s+clauses|scc|bcr)",
252
+ severity=Severity.LOW,
253
+ description="Transfer mechanism reference",
254
+ default_action=ActionType.WARN,
255
+ tags=["gdpr", "transfer", "legal-basis"],
256
+ ),
257
+
258
+ # =================================================================
259
+ # Consent and Legal Basis (Article 6/7)
260
+ # =================================================================
261
+ PatternDefinition(
262
+ name="consent_indicator",
263
+ regex=r"(?i)(?:consent\s+(?:withdrawn|revoked|given)|opt[_-]?out\s+requested)",
264
+ severity=Severity.MEDIUM,
265
+ description="Consent status indicator",
266
+ default_action=ActionType.WARN,
267
+ tags=["gdpr", "consent", "legal-basis"],
268
+ ),
269
+ PatternDefinition(
270
+ name="legitimate_interest",
271
+ regex=r"(?i)legitimate\s+interest\s+(?:assessment|basis|applied)",
272
+ severity=Severity.LOW,
273
+ description="Legitimate interest reference",
274
+ default_action=ActionType.WARN,
275
+ tags=["gdpr", "legal-basis"],
276
+ ),
277
+
278
+ # =================================================================
279
+ # Data Breach (Article 33/34)
280
+ # =================================================================
281
+ PatternDefinition(
282
+ name="personal_data_breach",
283
+ regex=r"(?i)personal\s+data\s+(?:breach|incident|exposure)",
284
+ severity=Severity.HIGH,
285
+ description="Personal data breach indicator",
286
+ default_action=ActionType.WARN,
287
+ tags=["gdpr", "breach", "incident"],
288
+ ),
289
+ PatternDefinition(
290
+ name="dpa_notification",
291
+ regex=r"(?i)(?:dpa|supervisory\s+authority|data\s+protection\s+authority)\s+(?:notification|notified)",
292
+ severity=Severity.MEDIUM,
293
+ description="DPA notification reference",
294
+ default_action=ActionType.WARN,
295
+ tags=["gdpr", "breach", "regulatory"],
296
+ ),
297
+ ]
298
+
299
+ return self._patterns
300
+
301
+ def _format_message(
302
+ self,
303
+ findings: List,
304
+ direction: ScanDirection
305
+ ) -> Optional[str]:
306
+ """Format a GDPR-specific message."""
307
+ if not findings:
308
+ return None
309
+
310
+ # Group findings by GDPR category
311
+ special_category = [f for f in findings if "special-category" in f.metadata.get("pattern_tags", [])]
312
+ personal_data = [f for f in findings if "personal-data" in f.metadata.get("pattern_tags", [])]
313
+ online_id = [f for f in findings if "online-identifier" in f.metadata.get("pattern_tags", [])]
314
+
315
+ if direction == ScanDirection.OUTPUT:
316
+ lines = [
317
+ f"WARNING: LLM output contains {len(findings)} GDPR-relevant finding(s).",
318
+ "Personal data should not be generated/exposed without proper basis:"
319
+ ]
320
+ else:
321
+ lines = [
322
+ f"ALERT: Input contains {len(findings)} GDPR-relevant finding(s).",
323
+ "Ensure proper legal basis and handling for personal data:"
324
+ ]
325
+
326
+ if special_category:
327
+ lines.append(f" Article 9 Special Category: {len(special_category)} finding(s) - REQUIRES EXPLICIT CONSENT")
328
+ if personal_data:
329
+ lines.append(f" Personal Identifiers: {len(personal_data)} finding(s)")
330
+ if online_id:
331
+ lines.append(f" Online Identifiers: {len(online_id)} finding(s)")
332
+
333
+ return "\n".join(lines)
@@ -0,0 +1,324 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Tweek Government Classification Compliance Plugin
4
+
5
+ Detects and handles government classification markings:
6
+ - Classification levels (TOP SECRET, SECRET, CONFIDENTIAL)
7
+ - Portion markings ((TS), (S), (C))
8
+ - Handling caveats (NOFORN, ORCON, REL TO)
9
+ - Controlled Unclassified Information (CUI, FOUO)
10
+
11
+ Supports bidirectional scanning:
12
+ - OUTPUT: Detect hallucinated classification markings in LLM responses
13
+ - INPUT: Detect real classification markings in incoming data
14
+
15
+ IMPORTANT: This plugin detects MARKERS, not actual classified content.
16
+ It helps prevent:
17
+ 1. LLMs hallucinating classification markings on unclassified content
18
+ 2. Accidental exposure of marked content to LLMs
19
+ """
20
+
21
+ from typing import Optional, List, Dict, Any
22
+ from tweek.plugins.base import (
23
+ CompliancePlugin,
24
+ ScanDirection,
25
+ ActionType,
26
+ Severity,
27
+ PatternDefinition,
28
+ )
29
+
30
+
31
+ class GovCompliancePlugin(CompliancePlugin):
32
+ """
33
+ Government classification compliance plugin.
34
+
35
+ Detects classification markings, portion markings, and handling caveats
36
+ used in US government documents and communications.
37
+ """
38
+
39
+ VERSION = "1.0.0"
40
+ DESCRIPTION = "Detect government classification markings and handling caveats"
41
+ AUTHOR = "Tweek"
42
+ REQUIRES_LICENSE = "enterprise"
43
+ TAGS = ["compliance", "government", "classification", "cui"]
44
+
45
+ def __init__(self, config: Optional[Dict[str, Any]] = None):
46
+ super().__init__(config)
47
+ self._patterns: Optional[List[PatternDefinition]] = None
48
+
49
+ @property
50
+ def name(self) -> str:
51
+ return "gov"
52
+
53
+ @property
54
+ def scan_direction(self) -> ScanDirection:
55
+ # Configurable, defaults to BOTH
56
+ direction = self._config.get("scan_direction", "both")
57
+ return ScanDirection(direction)
58
+
59
+ def get_patterns(self) -> List[PatternDefinition]:
60
+ """Return government classification patterns."""
61
+ if self._patterns is not None:
62
+ return self._patterns
63
+
64
+ self._patterns = [
65
+ # =================================================================
66
+ # TOP SECRET Level
67
+ # =================================================================
68
+ PatternDefinition(
69
+ name="top_secret_banner",
70
+ regex=r"(?i)\bTOP\s*SECRET\b(?:\s*/\s*(?:SCI|TK|NOFORN|ORCON|REL\s+TO\s+[\w,\s]+))*",
71
+ severity=Severity.CRITICAL,
72
+ description="Top Secret classification marking",
73
+ default_action=ActionType.BLOCK,
74
+ tags=["classification", "top-secret"],
75
+ ),
76
+ PatternDefinition(
77
+ name="ts_sci",
78
+ regex=r"(?i)\bTS\s*/\s*SCI\b",
79
+ severity=Severity.CRITICAL,
80
+ description="Top Secret/Sensitive Compartmented Information",
81
+ default_action=ActionType.BLOCK,
82
+ tags=["classification", "top-secret", "sci"],
83
+ ),
84
+ PatternDefinition(
85
+ name="portion_marking_ts",
86
+ regex=r"\(TS(?:\s*/\s*[A-Z]+)*\)",
87
+ severity=Severity.CRITICAL,
88
+ description="Top Secret portion marking",
89
+ default_action=ActionType.BLOCK,
90
+ tags=["classification", "portion-marking", "top-secret"],
91
+ ),
92
+
93
+ # =================================================================
94
+ # SECRET Level
95
+ # =================================================================
96
+ PatternDefinition(
97
+ name="secret_banner",
98
+ regex=r"(?i)(?<!/)\bSECRET\b(?!\s*/\s*(?:SERVICE|KEY|TOKEN|PASSWORD|API))",
99
+ severity=Severity.CRITICAL,
100
+ description="Secret classification marking",
101
+ default_action=ActionType.BLOCK,
102
+ tags=["classification", "secret"],
103
+ ),
104
+ PatternDefinition(
105
+ name="portion_marking_s",
106
+ regex=r"\(S(?:\s*/\s*[A-Z]+)*\)(?!\s*[a-z])",
107
+ severity=Severity.CRITICAL,
108
+ description="Secret portion marking",
109
+ default_action=ActionType.BLOCK,
110
+ tags=["classification", "portion-marking", "secret"],
111
+ ),
112
+
113
+ # =================================================================
114
+ # CONFIDENTIAL Level
115
+ # =================================================================
116
+ PatternDefinition(
117
+ name="confidential_banner",
118
+ regex=r"(?i)(?<!/)\bCONFIDENTIAL\b(?!\s+(?:INFORMATION|DATA|FILE|DOCUMENT))",
119
+ severity=Severity.HIGH,
120
+ description="Confidential classification marking",
121
+ default_action=ActionType.WARN,
122
+ tags=["classification", "confidential"],
123
+ ),
124
+ PatternDefinition(
125
+ name="portion_marking_c",
126
+ regex=r"\(C(?:\s*/\s*[A-Z]+)*\)(?!\s*[a-z])",
127
+ severity=Severity.HIGH,
128
+ description="Confidential portion marking",
129
+ default_action=ActionType.WARN,
130
+ tags=["classification", "portion-marking", "confidential"],
131
+ ),
132
+
133
+ # =================================================================
134
+ # Handling Caveats
135
+ # =================================================================
136
+ PatternDefinition(
137
+ name="noforn",
138
+ regex=r"(?i)\bNOFORN\b|NO\s*FOREIGN\s*(?:NATIONALS?|DISSEMINATION)",
139
+ severity=Severity.CRITICAL,
140
+ description="No Foreign Nationals handling caveat",
141
+ default_action=ActionType.BLOCK,
142
+ tags=["caveat", "noforn"],
143
+ ),
144
+ PatternDefinition(
145
+ name="orcon",
146
+ regex=r"(?i)\bORCON\b|ORIGINATOR\s*CONTROLLED",
147
+ severity=Severity.HIGH,
148
+ description="Originator Controlled handling caveat",
149
+ default_action=ActionType.WARN,
150
+ tags=["caveat", "orcon"],
151
+ ),
152
+ PatternDefinition(
153
+ name="rel_to",
154
+ regex=r"(?i)REL(?:EASABLE)?\s*TO\s+(?:USA|FVEY|[\w,\s]+)",
155
+ severity=Severity.HIGH,
156
+ description="Releasable To specific countries",
157
+ default_action=ActionType.WARN,
158
+ tags=["caveat", "rel-to"],
159
+ ),
160
+ PatternDefinition(
161
+ name="fvey",
162
+ regex=r"(?i)\bFVEY\b|FIVE\s*EYES",
163
+ severity=Severity.HIGH,
164
+ description="Five Eyes intelligence sharing",
165
+ default_action=ActionType.WARN,
166
+ tags=["caveat", "fvey"],
167
+ ),
168
+ PatternDefinition(
169
+ name="wnintel",
170
+ regex=r"(?i)\bWNINTEL\b|WARNING\s*NOTICE",
171
+ severity=Severity.HIGH,
172
+ description="Warning Notice Intelligence Sources",
173
+ default_action=ActionType.WARN,
174
+ tags=["caveat", "wnintel"],
175
+ ),
176
+ PatternDefinition(
177
+ name="propin",
178
+ regex=r"(?i)\bPROPIN\b|PROPRIETARY\s*INFORMATION",
179
+ severity=Severity.MEDIUM,
180
+ description="Proprietary Information caveat",
181
+ default_action=ActionType.WARN,
182
+ tags=["caveat", "propin"],
183
+ ),
184
+
185
+ # =================================================================
186
+ # Controlled Unclassified Information (CUI)
187
+ # =================================================================
188
+ PatternDefinition(
189
+ name="cui",
190
+ regex=r"(?i)\bCUI\b(?!\s*(?:BASIC|SPECIFIED))|\bCONTROLLED\s+UNCLASSIFIED\s+INFORMATION\b",
191
+ severity=Severity.MEDIUM,
192
+ description="Controlled Unclassified Information",
193
+ default_action=ActionType.WARN,
194
+ tags=["cui"],
195
+ ),
196
+ PatternDefinition(
197
+ name="cui_specified",
198
+ regex=r"(?i)CUI\s*//?\s*(?:SP|SPECIFIED)",
199
+ severity=Severity.HIGH,
200
+ description="CUI Specified (higher protection)",
201
+ default_action=ActionType.WARN,
202
+ tags=["cui", "specified"],
203
+ ),
204
+ PatternDefinition(
205
+ name="fouo",
206
+ regex=r"(?i)\bFOUO\b|FOR\s+OFFICIAL\s+USE\s+ONLY",
207
+ severity=Severity.MEDIUM,
208
+ description="For Official Use Only (legacy CUI)",
209
+ default_action=ActionType.WARN,
210
+ tags=["cui", "fouo", "legacy"],
211
+ ),
212
+ PatternDefinition(
213
+ name="law_enforcement_sensitive",
214
+ regex=r"(?i)\bLES\b(?:\s*/\s*[A-Z]+)*|LAW\s+ENFORCEMENT\s+SENSITIVE",
215
+ severity=Severity.MEDIUM,
216
+ description="Law Enforcement Sensitive",
217
+ default_action=ActionType.WARN,
218
+ tags=["cui", "les"],
219
+ ),
220
+
221
+ # =================================================================
222
+ # Classification Headers/Footers
223
+ # =================================================================
224
+ PatternDefinition(
225
+ name="classification_header",
226
+ regex=r"^(?:UNCLASSIFIED|CONFIDENTIAL|SECRET|TOP\s*SECRET)(?:\s*//\s*[A-Z/\s]+)?$",
227
+ severity=Severity.HIGH,
228
+ description="Classification header/footer line",
229
+ default_action=ActionType.WARN,
230
+ tags=["classification", "header"],
231
+ ),
232
+ PatternDefinition(
233
+ name="classification_banner_line",
234
+ regex=r"[-=]{3,}\s*(?:UNCLASSIFIED|CONFIDENTIAL|SECRET|TOP\s*SECRET)\s*[-=]{3,}",
235
+ severity=Severity.HIGH,
236
+ description="Classification banner line",
237
+ default_action=ActionType.WARN,
238
+ tags=["classification", "banner"],
239
+ ),
240
+
241
+ # =================================================================
242
+ # Special Programs
243
+ # =================================================================
244
+ PatternDefinition(
245
+ name="sap_marker",
246
+ regex=r"(?i)\bSAP\b(?:\s*/\s*[A-Z]+)*|SPECIAL\s+ACCESS\s+PROGRAM",
247
+ severity=Severity.CRITICAL,
248
+ description="Special Access Program marker",
249
+ default_action=ActionType.BLOCK,
250
+ tags=["classification", "sap"],
251
+ ),
252
+ PatternDefinition(
253
+ name="waived_sap",
254
+ regex=r"(?i)WAIVED\s+SAP|UNACKNOWLEDGED\s+SAP",
255
+ severity=Severity.CRITICAL,
256
+ description="Waived/Unacknowledged SAP reference",
257
+ default_action=ActionType.BLOCK,
258
+ tags=["classification", "sap"],
259
+ ),
260
+
261
+ # =================================================================
262
+ # NATO Classifications
263
+ # =================================================================
264
+ PatternDefinition(
265
+ name="nato_classification",
266
+ regex=r"(?i)NATO\s+(?:UNCLASSIFIED|RESTRICTED|CONFIDENTIAL|SECRET|COSMIC\s+TOP\s+SECRET)",
267
+ severity=Severity.HIGH,
268
+ description="NATO classification marking",
269
+ default_action=ActionType.WARN,
270
+ tags=["classification", "nato"],
271
+ ),
272
+
273
+ # =================================================================
274
+ # Declassification Markings
275
+ # =================================================================
276
+ PatternDefinition(
277
+ name="declassification_date",
278
+ regex=r"(?i)DECLAS(?:SIFY)?(?:\s+ON)?:\s*\d{4}[-/]\d{2}[-/]\d{2}",
279
+ severity=Severity.MEDIUM,
280
+ description="Declassification date marking",
281
+ default_action=ActionType.WARN,
282
+ tags=["classification", "declassification"],
283
+ ),
284
+ PatternDefinition(
285
+ name="classified_by",
286
+ regex=r"(?i)CLASSIFIED\s+BY:\s*[\w\s]+",
287
+ severity=Severity.MEDIUM,
288
+ description="Classified By line",
289
+ default_action=ActionType.WARN,
290
+ tags=["classification", "attribution"],
291
+ ),
292
+ PatternDefinition(
293
+ name="derived_from",
294
+ regex=r"(?i)DERIVED\s+FROM:\s*[\w\s]+",
295
+ severity=Severity.MEDIUM,
296
+ description="Derived From line",
297
+ default_action=ActionType.WARN,
298
+ tags=["classification", "attribution"],
299
+ ),
300
+ ]
301
+
302
+ return self._patterns
303
+
304
+ def _format_message(
305
+ self,
306
+ findings: List,
307
+ direction: ScanDirection
308
+ ) -> Optional[str]:
309
+ """Format a government-specific message."""
310
+ if not findings:
311
+ return None
312
+
313
+ if direction == ScanDirection.OUTPUT:
314
+ return (
315
+ f"WARNING: LLM output contains {len(findings)} classification marking(s).\n"
316
+ "These are likely HALLUCINATED and do not indicate actual classified content.\n"
317
+ "Do NOT treat this content as classified - verify with proper authorities."
318
+ )
319
+ else:
320
+ return (
321
+ f"ALERT: Input contains {len(findings)} classification marking(s).\n"
322
+ "If this is actual classified material, it should NOT be processed by this system.\n"
323
+ "Verify proper handling procedures and need-to-know."
324
+ )