tweek 0.1.0__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. tweek/__init__.py +2 -2
  2. tweek/_keygen.py +53 -0
  3. tweek/audit.py +288 -0
  4. tweek/cli.py +5398 -2392
  5. tweek/cli_model.py +380 -0
  6. tweek/config/families.yaml +609 -0
  7. tweek/config/manager.py +42 -5
  8. tweek/config/patterns.yaml +1510 -8
  9. tweek/config/tiers.yaml +161 -11
  10. tweek/diagnostics.py +71 -2
  11. tweek/hooks/break_glass.py +163 -0
  12. tweek/hooks/feedback.py +223 -0
  13. tweek/hooks/overrides.py +531 -0
  14. tweek/hooks/post_tool_use.py +472 -0
  15. tweek/hooks/pre_tool_use.py +1024 -62
  16. tweek/integrations/openclaw.py +443 -0
  17. tweek/integrations/openclaw_server.py +385 -0
  18. tweek/licensing.py +14 -54
  19. tweek/logging/bundle.py +2 -2
  20. tweek/logging/security_log.py +56 -13
  21. tweek/mcp/approval.py +57 -16
  22. tweek/mcp/proxy.py +18 -0
  23. tweek/mcp/screening.py +5 -5
  24. tweek/mcp/server.py +4 -1
  25. tweek/memory/__init__.py +24 -0
  26. tweek/memory/queries.py +223 -0
  27. tweek/memory/safety.py +140 -0
  28. tweek/memory/schemas.py +80 -0
  29. tweek/memory/store.py +989 -0
  30. tweek/platform/__init__.py +4 -4
  31. tweek/plugins/__init__.py +40 -24
  32. tweek/plugins/base.py +1 -1
  33. tweek/plugins/detectors/__init__.py +3 -3
  34. tweek/plugins/detectors/{moltbot.py → openclaw.py} +30 -27
  35. tweek/plugins/git_discovery.py +16 -4
  36. tweek/plugins/git_registry.py +8 -2
  37. tweek/plugins/git_security.py +21 -9
  38. tweek/plugins/screening/__init__.py +10 -1
  39. tweek/plugins/screening/heuristic_scorer.py +477 -0
  40. tweek/plugins/screening/llm_reviewer.py +14 -6
  41. tweek/plugins/screening/local_model_reviewer.py +161 -0
  42. tweek/proxy/__init__.py +38 -37
  43. tweek/proxy/addon.py +22 -3
  44. tweek/proxy/interceptor.py +1 -0
  45. tweek/proxy/server.py +4 -2
  46. tweek/sandbox/__init__.py +11 -0
  47. tweek/sandbox/docker_bridge.py +143 -0
  48. tweek/sandbox/executor.py +9 -6
  49. tweek/sandbox/layers.py +97 -0
  50. tweek/sandbox/linux.py +1 -0
  51. tweek/sandbox/project.py +548 -0
  52. tweek/sandbox/registry.py +149 -0
  53. tweek/security/__init__.py +9 -0
  54. tweek/security/language.py +250 -0
  55. tweek/security/llm_reviewer.py +1146 -60
  56. tweek/security/local_model.py +331 -0
  57. tweek/security/local_reviewer.py +146 -0
  58. tweek/security/model_registry.py +371 -0
  59. tweek/security/rate_limiter.py +11 -6
  60. tweek/security/secret_scanner.py +70 -4
  61. tweek/security/session_analyzer.py +26 -2
  62. tweek/skill_template/SKILL.md +200 -0
  63. tweek/skill_template/__init__.py +0 -0
  64. tweek/skill_template/cli-reference.md +331 -0
  65. tweek/skill_template/overrides-reference.md +184 -0
  66. tweek/skill_template/scripts/__init__.py +0 -0
  67. tweek/skill_template/scripts/check_installed.py +170 -0
  68. tweek/skills/__init__.py +38 -0
  69. tweek/skills/config.py +150 -0
  70. tweek/skills/fingerprints.py +198 -0
  71. tweek/skills/guard.py +293 -0
  72. tweek/skills/isolation.py +469 -0
  73. tweek/skills/scanner.py +715 -0
  74. tweek/vault/__init__.py +0 -1
  75. tweek/vault/cross_platform.py +12 -1
  76. tweek/vault/keychain.py +87 -29
  77. tweek-0.2.1.dist-info/METADATA +281 -0
  78. tweek-0.2.1.dist-info/RECORD +122 -0
  79. {tweek-0.1.0.dist-info → tweek-0.2.1.dist-info}/entry_points.txt +8 -1
  80. {tweek-0.1.0.dist-info → tweek-0.2.1.dist-info}/licenses/LICENSE +80 -0
  81. tweek-0.2.1.dist-info/top_level.txt +2 -0
  82. tweek-openclaw-plugin/node_modules/flatted/python/flatted.py +149 -0
  83. tweek/integrations/moltbot.py +0 -243
  84. tweek-0.1.0.dist-info/METADATA +0 -335
  85. tweek-0.1.0.dist-info/RECORD +0 -85
  86. tweek-0.1.0.dist-info/top_level.txt +0 -1
  87. {tweek-0.1.0.dist-info → tweek-0.2.1.dist-info}/WHEEL +0 -0
@@ -1,7 +1,7 @@
1
1
  # Tweek Attack Pattern Definitions v3
2
- # All 116 patterns included FREE
2
+ # All 215 patterns included FREE
3
3
  #
4
- # Update via: tweek update (pulls from github.com/gettweek/tweek-patterns)
4
+ # Update via: tweek update (pulls from github.com/gettweek/tweek)
5
5
  #
6
6
  # Fields:
7
7
  # id: Sequential pattern number
@@ -9,6 +9,7 @@
9
9
  # description: Human-readable explanation
10
10
  # regex: Python regex pattern
11
11
  # severity: critical | high | medium | low
12
+ # confidence: deterministic | heuristic | contextual
12
13
  #
13
14
  # Severity guide:
14
15
  # critical - Almost certainly malicious
@@ -16,10 +17,15 @@
16
17
  # medium - Suspicious, warrants review
17
18
  # low - Unusual but possibly legitimate
18
19
  #
20
+ #
21
+ # Confidence guide:
22
+ # deterministic - Near-zero false positive rate; precise file/command targeting
23
+ # heuristic - Good detection signal; may FP in legitimate security contexts
24
+ # contextual - Depends on surrounding context; broad behavioral pattern
19
25
  # PRO tier adds: LLM review, session analysis, rate limiting
20
26
 
21
- version: 3
22
- pattern_count: 116
27
+ version: 5
28
+ pattern_count: 259
23
29
 
24
30
  patterns:
25
31
  # ============================================================================
@@ -33,60 +39,80 @@ patterns:
33
39
  description: "Reading SSH private keys"
34
40
  regex: '(cat|head|tail|less|more)\s+.*\.ssh/(id_rsa|id_ed25519|id_ecdsa|id_dsa)(?!\.pub)'
35
41
  severity: critical
42
+ confidence: deterministic
43
+ family: credential_theft
36
44
 
37
45
  - id: 2
38
46
  name: aws_credentials
39
47
  description: "Accessing AWS credential files"
40
48
  regex: '(cat|head|tail|less|more)\s+.*\.aws/(credentials|config)'
41
49
  severity: critical
50
+ confidence: deterministic
51
+ family: credential_theft
42
52
 
43
53
  - id: 3
44
54
  name: env_file_access
45
55
  description: "Attempts to read .env files containing secrets"
46
56
  regex: '(cat|head|tail|less|more|bat|grep|rg|ag)\s+.*\.env'
47
57
  severity: high
58
+ confidence: heuristic
59
+ family: credential_theft
48
60
 
49
61
  - id: 4
50
62
  name: keychain_dump
51
63
  description: "Extracting credentials from macOS Keychain"
52
64
  regex: '(security\s+dump-keychain|security\s+find-(generic|internet)-password\s+.*-w|chainbreaker|security\s+export)'
53
65
  severity: critical
66
+ confidence: deterministic
67
+ family: credential_theft
54
68
 
55
69
  - id: 5
56
70
  name: gcloud_credentials
57
71
  description: "Accessing Google Cloud credentials"
58
72
  regex: '(cat|head|tail|less|more)\s+.*\.config/gcloud'
59
73
  severity: critical
74
+ confidence: deterministic
75
+ family: credential_theft
60
76
 
61
77
  - id: 6
62
78
  name: netrc_access
63
79
  description: "Accessing .netrc (contains plaintext passwords)"
64
80
  regex: '(cat|head|tail|less|more)\s+.*\.netrc'
65
81
  severity: critical
82
+ confidence: deterministic
83
+ family: credential_theft
66
84
 
67
85
  - id: 7
68
86
  name: kube_config
69
87
  description: "Accessing Kubernetes config"
70
88
  regex: '(cat|head|tail|less|more)\s+.*\.kube/config'
71
89
  severity: high
90
+ confidence: heuristic
91
+ family: credential_theft
72
92
 
73
93
  - id: 8
74
94
  name: ssh_directory_access
75
95
  description: "Accessing SSH directory contents"
76
96
  regex: '(ls|find|cat)\s+.*\.ssh'
77
97
  severity: high
98
+ confidence: heuristic
99
+ family: credential_theft
78
100
 
79
101
  - id: 9
80
102
  name: env_variable_expansion
81
103
  description: "Accessing environment variables containing secrets"
82
104
  regex: '\$\{?(API_KEY|SECRET|TOKEN|PASSWORD|CREDENTIAL|PRIVATE_KEY|AUTH)[A-Z_]*\}?'
83
105
  severity: high
106
+ confidence: heuristic
107
+ family: credential_theft
84
108
 
85
109
  - id: 10
86
110
  name: history_access
87
111
  description: "Reading shell history (may contain secrets)"
88
112
  regex: '(cat|head|tail|less|more|grep)\s+.*(\.bash_history|\.zsh_history|\.history)'
89
113
  severity: high
114
+ confidence: heuristic
115
+ family: credential_theft
90
116
 
91
117
  # --- Network Exfiltration ---
92
118
  - id: 11
@@ -94,36 +120,48 @@ patterns:
94
120
  description: "Curl sending data that may include secrets"
95
121
  regex: 'curl\s+.*(-d|--data|--data-raw|--data-binary)\s+.*\$\('
96
122
  severity: critical
123
+ confidence: deterministic
124
+ family: data_exfiltration
97
125
 
98
126
  - id: 12
99
127
  name: exfil_paste_sites
100
128
  description: "Data sent to known paste/exfil sites"
101
129
  regex: '(curl|wget|http).*?(pastebin\.com|hastebin\.com|ghostbin|0x0\.st|transfer\.sh|file\.io|webhook\.site|requestbin|ngrok\.io|pipedream)'
102
130
  severity: critical
131
+ confidence: deterministic
132
+ family: data_exfiltration
103
133
 
104
134
  - id: 13
105
135
  name: netcat_outbound
106
136
  description: "Netcat connection (potential reverse shell or exfil)"
107
137
  regex: '\b(nc|ncat|netcat)\b.*(-e|exec)'
108
138
  severity: critical
139
+ confidence: deterministic
140
+ family: data_exfiltration
109
141
 
110
142
  - id: 14
111
143
  name: reverse_shell
112
144
  description: "Creating reverse shell connections"
113
145
  regex: '(python|ruby|perl|php).*socket.*connect.*(exec|system|spawn)|bash\s+-i.*>&.*/dev/tcp/'
114
146
  severity: critical
147
+ confidence: deterministic
148
+ family: data_exfiltration
115
149
 
116
150
  - id: 15
117
151
  name: curl_post_file
118
152
  description: "Curl uploading local files"
119
153
  regex: 'curl\s+.*(-F|--form|-T|--upload-file)\s+'
120
154
  severity: high
155
+ confidence: heuristic
156
+ family: data_exfiltration
121
157
 
122
158
  - id: 16
123
159
  name: pipe_to_shell
124
160
  description: "Piping remote content directly to shell"
125
161
  regex: '(curl|wget).*\|\s*(bash|sh|zsh|python|perl|ruby)'
126
162
  severity: critical
163
+ confidence: deterministic
164
+ family: data_exfiltration
127
165
 
128
166
  # --- Prompt Injection (basic) ---
129
167
  - id: 17
@@ -131,31 +169,41 @@ patterns:
131
169
  description: "Direct attempt to override system instructions"
132
170
  regex: '(ignore|disregard|forget|override)\s+(previous|prior|system|all|above)\s+(instructions|prompt|rules|constraints|directives)'
133
171
  severity: high
172
+ confidence: heuristic
173
+ family: prompt_injection
134
174
 
135
175
  - id: 18
136
176
  name: role_hijack
137
177
  description: "Attempting to assume new identity/role"
138
178
  regex: '(you\s+are\s+now|act\s+as|pretend\s+to\s+be|your\s+new\s+role\s+is|from\s+now\s+on\s+you\s+are)'
139
179
  severity: high
180
+ confidence: heuristic
181
+ family: prompt_injection
140
182
 
141
183
  - id: 19
142
184
  name: privilege_claim
143
185
  description: "False claims of elevated privileges"
144
186
  regex: '(as\s+(the\s+)?(admin|root|owner|developer|superuser)|i\s+have\s+(admin|root|elevated)\s+access|authorized\s+to|i\s+am\s+the\s+owner)'
145
187
  severity: high
188
+ confidence: heuristic
189
+ family: prompt_injection
146
190
 
147
191
  # --- Destructive Commands ---
148
192
  - id: 20
149
193
  name: recursive_delete_root
150
194
  description: "Recursive deletion from root or home"
151
- regex: 'rm\s+.*-[rf]*\s+(/|~|\$HOME)\s*$'
195
+ regex: '(?:^|[;&|]\s*)rm\s+(-[a-zA-Z]*[rf][a-zA-Z]*\s+)+\s*(\/(?:\s|;|&|\||$)|~(?:\s|\/|;|&|\||$)|\$HOME(?:\s|\/|;|&|\||$)|\/(?:etc|var|usr|bin|sbin|root|home|boot|lib|opt|srv)(?:\s|\/|;|&|\||$))'
152
196
  severity: critical
197
+ confidence: deterministic
198
+ family: destructive_ops
153
199
 
154
200
  - id: 21
155
201
  name: disk_wipe
156
202
  description: "Disk wiping commands"
157
203
  regex: 'dd\s+if=/dev/(zero|urandom)\s+of=/dev/(sd|hd|nvme)|mkfs\s+/dev/'
158
204
  severity: critical
205
+ confidence: deterministic
206
+ family: destructive_ops
159
207
 
160
208
  # --- Config Manipulation ---
161
209
  - id: 22
@@ -163,12 +211,16 @@ patterns:
163
211
  description: "Writing to auto-approve configurations"
164
212
  regex: '(autoApprove|auto_approve|autorun|auto_execute|allowAll)\s*[=:]\s*"?(true|1|yes)"?'
165
213
  severity: critical
214
+ confidence: deterministic
215
+ family: persistence
166
216
 
167
217
  - id: 23
168
218
  name: hook_bypass
169
219
  description: "Attempting to bypass or disable hooks"
170
220
  regex: '(disable|skip|bypass|remove).*hook|--no-verify|pre-commit.*disable|\.git/hooks'
171
221
  severity: high
222
+ confidence: heuristic
223
+ family: persistence
172
224
 
173
225
  # ============================================================================
174
226
  # ADVANCED DETECTION (24-116)
@@ -181,48 +233,64 @@ patterns:
181
233
  description: "Accessing NPM authentication tokens"
182
234
  regex: '(cat|head|tail|less|more)\s+.*\.npmrc'
183
235
  severity: high
236
+ confidence: heuristic
237
+ family: credential_theft
184
238
 
185
239
  - id: 25
186
240
  name: docker_config_access
187
241
  description: "Accessing Docker credentials"
188
242
  regex: '(cat|head|tail|less|more)\s+.*\.docker/config\.json'
189
243
  severity: high
244
+ confidence: heuristic
245
+ family: credential_theft
190
246
 
191
247
  - id: 26
192
248
  name: pypirc_access
193
249
  description: "Accessing PyPI credentials"
194
250
  regex: '(cat|head|tail|less|more)\s+.*\.pypirc'
195
251
  severity: high
252
+ confidence: heuristic
253
+ family: credential_theft
196
254
 
197
255
  - id: 27
198
256
  name: git_credentials_access
199
257
  description: "Accessing Git credential store"
200
258
  regex: '(cat|head|tail|less|more)\s+.*\.git-credentials'
201
259
  severity: high
260
+ confidence: heuristic
261
+ family: credential_theft
202
262
 
203
263
  - id: 28
204
264
  name: azure_credentials
205
265
  description: "Accessing Azure credentials"
206
266
  regex: '(cat|head|tail|less|more)\s+.*\.azure/(credentials|config)'
207
267
  severity: critical
268
+ confidence: deterministic
269
+ family: credential_theft
208
270
 
209
271
  - id: 29
210
272
  name: env_command
211
273
  description: "Dumping all environment variables"
212
- regex: '\benv\b|\bprintenv\b|\bexport\s*$'
274
+ regex: '^\s*env\s*$|^\s*env\s+-|\bprintenv\b|\bexport\s*$'
213
275
  severity: medium
276
+ confidence: heuristic
277
+ family: credential_theft
214
278
 
215
279
  - id: 30
216
280
  name: browser_credential_theft
217
281
  description: "Accessing browser saved passwords and cookies"
218
282
  regex: '(Login\s*Data|key4\.db|logins\.json|Cookies\.binarycookies|~/Library/(Safari|Application\s*Support/(Google/)?Chrome)/.*\.(db|sqlite))'
219
283
  severity: critical
284
+ confidence: deterministic
285
+ family: credential_theft
220
286
 
221
287
  - id: 31
222
288
  name: crypto_wallet_theft
223
289
  description: "Accessing cryptocurrency wallet files"
224
290
  regex: '(wallet\.dat|\.wallet|seed\.txt|mnemonic|Electrum|Exodus|MetaMask|\.ethereum/keystore|Atomic.*Wallet)'
225
291
  severity: critical
292
+ confidence: deterministic
293
+ family: credential_theft
226
294
 
227
295
  # --- Advanced Network Exfiltration ---
228
296
  - id: 32
@@ -230,48 +298,64 @@ patterns:
230
298
  description: "Wget sending POST data"
231
299
  regex: 'wget\s+.*--post-(data|file)'
232
300
  severity: high
301
+ confidence: heuristic
302
+ family: data_exfiltration
233
303
 
234
304
  - id: 33
235
305
  name: base64_curl_pipe
236
306
  description: "Base64 encoded data piped to curl (obfuscated exfil)"
237
307
  regex: 'base64.*\|\s*curl|curl.*base64'
238
308
  severity: critical
309
+ confidence: deterministic
310
+ family: data_exfiltration
239
311
 
240
312
  - id: 34
241
313
  name: dns_exfiltration
242
314
  description: "Data exfiltration via DNS queries"
243
315
  regex: '(dig|nslookup|host)\s+.*\$\(|dns.*tunnel|iodine|dnscat'
244
316
  severity: high
317
+ confidence: heuristic
318
+ family: data_exfiltration
245
319
 
246
320
  - id: 35
247
321
  name: icmp_tunnel
248
322
  description: "Data exfiltration via ICMP"
249
323
  regex: 'ping.*\$\(|icmp.*tunnel|ptunnel'
250
324
  severity: high
325
+ confidence: heuristic
326
+ family: data_exfiltration
251
327
 
252
328
  - id: 36
253
329
  name: curl_with_env
254
330
  description: "Curl command accessing environment variables"
255
331
  regex: 'curl.*\$\{?(API_KEY|SECRET|TOKEN|PASSWORD|AUTH)'
256
332
  severity: critical
333
+ confidence: deterministic
334
+ family: data_exfiltration
257
335
 
258
336
  - id: 37
259
337
  name: webhook_exfil
260
338
  description: "Data sent to webhook endpoints"
261
339
  regex: '(curl|wget|http).*?(hooks\.slack\.com|discord\.com/api/webhooks|api\.telegram\.org)'
262
340
  severity: high
341
+ confidence: heuristic
342
+ family: data_exfiltration
263
343
 
264
344
  - id: 38
265
345
  name: git_exfil
266
346
  description: "Exfiltration via git operations"
267
347
  regex: 'git\s+(push|commit).*\.(env|pem|key)|git.*remote.*add.*http'
268
348
  severity: high
349
+ confidence: heuristic
350
+ family: data_exfiltration
269
351
 
270
352
  - id: 39
271
353
  name: scp_exfil
272
354
  description: "File transfer via SCP to external hosts"
273
355
  regex: 'scp\s+.*(\.env|\.pem|\.key|credentials|secrets).*@'
274
356
  severity: critical
357
+ confidence: deterministic
358
+ family: data_exfiltration
275
359
 
276
360
  # --- Prompt Injection - Evasive (40-55) ---
277
361
  - id: 40
@@ -279,66 +363,88 @@ patterns:
279
363
  description: "Claiming test/debug mode to bypass restrictions"
280
364
  regex: '(this\s+is\s+(a\s+)?test|testing\s+mode|debug\s+mode|in\s+development|sandbox\s+mode|simulation)'
281
365
  severity: medium
366
+ confidence: contextual
367
+ family: prompt_injection
282
368
 
283
369
  - id: 41
284
370
  name: context_reset
285
371
  description: "Attempting to reset conversation context"
286
372
  regex: '(reset\s+context|clear\s+memory|start\s+fresh|new\s+conversation|forget\s+everything)'
287
373
  severity: medium
374
+ confidence: contextual
375
+ family: prompt_injection
288
376
 
289
377
  - id: 42
290
378
  name: system_prompt_extract
291
379
  description: "Attempting to extract system prompt"
292
- regex: '(what\s+(is|are)\s+your\s+(system\s+)?instructions|show\s+me\s+your\s+prompt|reveal\s+your\s+rules|print\s+system\s+prompt)'
380
+ regex: '(what\s+(is|are|were)\s+your\s+(system\s+|initial\s+|original\s+|core\s+|complete\s+)?(instructions|prompt|configuration|config|rules|guidelines|directives|constraints)|show\s+(me\s+)?your\s+(system\s+)?prompt|reveal\s+your\s+(rules|prompt|instructions|configuration)|(output|share|display|provide|repeat|dump|print)\s+(your|the)\s+(system\s+)?(prompt|instructions|configuration|config|rules|guidelines)|list\s+(all\s+)?(the\s+)?(rules|instructions|guidelines|constraints)\s+(you|that\s+you)\s+follow|what\s+(instructions|rules|guidelines)\s+were\s+you\s+given|what\s+were\s+you\s+(told|instructed|configured|initialized)\s+(to\s+do|with))'
293
381
  severity: medium
382
+ confidence: contextual
383
+ family: prompt_injection
294
384
 
295
385
  - id: 43
296
386
  name: jailbreak_dan
297
387
  description: "Known jailbreak patterns (DAN, etc.)"
298
388
  regex: '(do\s+anything\s+now|DAN\s+mode|developer\s+mode\s+enabled|jailbreak|unlock\s+mode)'
299
389
  severity: high
390
+ confidence: heuristic
391
+ family: prompt_injection
300
392
 
301
393
  - id: 44
302
394
  name: base64_instruction
303
395
  description: "Base64 encoded instructions (obfuscation)"
304
396
  regex: 'base64\s*[:-]?\s*[A-Za-z0-9+/=]{40,}'
305
397
  severity: high
398
+ confidence: heuristic
399
+ family: evasion_techniques
306
400
 
307
401
  - id: 45
308
402
  name: unicode_obfuscation
309
403
  description: "Zero-width or invisible unicode characters"
310
404
  regex: '[\u200b-\u200f\u2028-\u202f\ufeff\u00ad]'
311
405
  severity: high
406
+ confidence: heuristic
407
+ family: evasion_techniques
312
408
 
313
409
  - id: 46
314
410
  name: delimiter_injection
315
411
  description: "LLM-specific delimiter injection"
316
412
  regex: '(<\|im_start\|>|<\|im_end\|>|\[INST\]|\[/INST\]|</s><s>|<\|endoftext\|>|<\|system\|>|<\|user\|>|<\|assistant\|>)'
317
413
  severity: critical
414
+ confidence: deterministic
415
+ family: evasion_techniques
318
416
 
319
417
  - id: 47
320
418
  name: markdown_hidden
321
419
  description: "Instructions hidden in HTML/Markdown comments"
322
420
  regex: '<!--.*?(ignore|system|instruction|execute|override|bypass).*?-->'
323
421
  severity: high
422
+ confidence: heuristic
423
+ family: evasion_techniques
324
424
 
325
425
  - id: 48
326
426
  name: hex_encoded_command
327
427
  description: "Hex-encoded commands (obfuscation)"
328
428
  regex: '\\x[0-9a-fA-F]{2}(\\x[0-9a-fA-F]{2}){5,}'
329
429
  severity: high
430
+ confidence: heuristic
431
+ family: evasion_techniques
330
432
 
331
433
  - id: 49
332
434
  name: rot13_obfuscation
333
435
  description: "ROT13 or similar simple cipher obfuscation"
334
436
  regex: 'vtaber\s+cerivbhf|rknzvar\s+gur'
335
437
  severity: medium
438
+ confidence: contextual
439
+ family: evasion_techniques
336
440
 
337
441
  - id: 50
338
442
  name: leetspeak_bypass
339
443
  description: "L33tspeak to bypass filters"
340
444
  regex: '(1gn0r3|byp4ss|h4ck|3x3cut3|syst3m)'
341
445
  severity: medium
446
+ confidence: contextual
447
+ family: evasion_techniques
342
448
 
343
449
  # --- Prompt Injection - Social/Cognitive (51-60) ---
344
450
  - id: 51
@@ -346,60 +452,80 @@ patterns:
346
452
  description: "False urgency to bypass careful analysis"
347
453
  regex: '(urgent|immediately|critical|emergency|asap|time-sensitive|right\s+now)\s+(need|require|must|execute|do\s+this)'
348
454
  severity: medium
455
+ confidence: contextual
456
+ family: prompt_injection
349
457
 
350
458
  - id: 52
351
459
  name: authority_claim
352
460
  description: "False authority claims"
353
461
  regex: '(ceo|cto|ciso|manager|supervisor|security\s+team|it\s+department)\s+(asked|requested|authorized|approved|ordered)'
354
462
  severity: medium
463
+ confidence: contextual
464
+ family: prompt_injection
355
465
 
356
466
  - id: 53
357
467
  name: reciprocity_exploit
358
468
  description: "Exploiting reciprocity bias"
359
469
  regex: '(i\s+helped\s+you|you\s+owe|in\s+return|quid\s+pro\s+quo|do\s+me\s+a\s+favor)'
360
470
  severity: low
471
+ confidence: contextual
472
+ family: prompt_injection
361
473
 
362
474
  - id: 54
363
475
  name: empathy_exploit
364
476
  description: "Exploiting empathy/sympathy"
365
477
  regex: '(please\s+help|i\s+will\s+lose\s+my\s+job|desperate|life\s+depends|only\s+you\s+can)'
366
478
  severity: low
479
+ confidence: contextual
480
+ family: prompt_injection
367
481
 
368
482
  - id: 55
369
483
  name: flattery_manipulation
370
484
  description: "Excessive flattery to manipulate"
371
485
  regex: '(you\s+are\s+so\s+smart|only\s+you\s+can\s+do\s+this|you\s+are\s+the\s+best|i\s+trust\s+you\s+completely)'
372
486
  severity: low
487
+ confidence: contextual
488
+ family: prompt_injection
373
489
 
374
490
  - id: 56
375
491
  name: authority_laundering
376
492
  description: "False claims of external authorization"
377
493
  regex: '(my\s+(security\s+)?team|manager|supervisor|it\s+department)\s+(approved|authorized|verified|confirmed|said\s+it.s\s+ok)'
378
494
  severity: high
495
+ confidence: heuristic
496
+ family: prompt_injection
379
497
 
380
498
  - id: 57
381
499
  name: moral_coercion
382
500
  description: "Exploiting ethics/sympathy to bypass safety"
383
501
  regex: '(someone\s+will\s+(die|get\s+hurt)|life\s+depends|people\s+are\s+in\s+danger|save\s+lives|prevent\s+harm)\s+(if\s+you\s+don.t|unless\s+you)'
384
502
  severity: high
503
+ confidence: heuristic
504
+ family: prompt_injection
385
505
 
386
506
  - id: 58
387
507
  name: benign_transformation_loophole
388
508
  description: "Requesting translation of prohibited content"
389
509
  regex: '(translate|summarize|paraphrase|rewrite|explain)\s+(this|the)\s+(malware|exploit|attack|payload|shellcode)'
390
510
  severity: high
511
+ confidence: heuristic
512
+ family: prompt_injection
391
513
 
392
514
  - id: 59
393
515
  name: hypothetical_operational
394
516
  description: "Hypothetical framing for operational content"
395
- regex: '(hypothetically|theoretically|for\s+a\s+novel|in\s+fiction|imagine\s+if)\s+.{0,50}(how\s+to|steps\s+to|create|build|make)'
517
+ regex: '(hypothetically|theoretically|for\s+a\s+novel|in\s+fiction|imagine\s+if)[,;]?\s+.{0,50}(how\s+to|steps\s+to|create|build|make)'
396
518
  severity: medium
519
+ confidence: contextual
520
+ family: prompt_injection
397
521
 
398
522
  - id: 60
399
523
  name: capability_aggregation_signal
400
524
  description: "Incremental information gathering"
401
525
  regex: '(now\s+that\s+I\s+have|with\s+the\s+previous|combining\s+this\s+with|next\s+step\s+is|building\s+on\s+that)'
402
526
  severity: medium
527
+ confidence: contextual
528
+ family: prompt_injection
403
529
 
404
530
  # --- ACIP-Inspired (61-65) ---
405
531
  - id: 61
@@ -407,18 +533,24 @@ patterns:
407
533
  description: "Request to hide output in files/links"
408
534
  regex: '(save\s+to\s+file|write\s+to\s+file|link\s+only|don.t\s+show|hide\s+(the\s+)?output)\s+.{0,30}(instead|rather\s+than)'
409
535
  severity: high
536
+ confidence: heuristic
537
+ family: prompt_injection
410
538
 
411
539
  - id: 62
412
540
  name: oracle_probing
413
541
  description: "Probing to understand detection rules"
414
542
  regex: '(what\s+triggers|which\s+rules|how\s+does\s+your\s+(filter|detection)|why\s+was\s+that\s+blocked|what\s+pattern)'
415
543
  severity: medium
544
+ confidence: contextual
545
+ family: prompt_injection
416
546
 
417
547
  - id: 63
418
548
  name: persona_simulation
419
549
  description: "Request to simulate unrestricted persona"
420
550
  regex: '(pretend\s+you\s+have\s+no|simulate\s+(having\s+)?no|act\s+as\s+if\s+no)\s+(restrictions|limits|rules|filters|safety)'
421
551
  severity: high
552
+ confidence: heuristic
553
+ family: prompt_injection
422
554
 
423
555
  # --- MCP CVEs (64-72) ---
424
556
  - id: 64
@@ -426,48 +558,64 @@ patterns:
426
558
  description: "CVE-2025-6514: mcp-remote OAuth proxy RCE (CVSS 9.6)"
427
559
  regex: 'mcp-remote|oauth.*proxy.*mcp'
428
560
  severity: critical
561
+ confidence: heuristic
562
+ family: mcp_attacks
429
563
 
430
564
  - id: 65
431
565
  name: figma_mcp_rce
432
566
  description: "CVE-2025-53967: Framelink Figma MCP RCE"
433
567
  regex: 'framelink|figma.*mcp.*server'
434
568
  severity: critical
569
+ confidence: heuristic
570
+ family: mcp_attacks
435
571
 
436
572
  - id: 66
437
573
  name: cursor_mcp_injection
438
574
  description: "CVE-2025-64106: Cursor MCP command injection (CVSS 8.8)"
439
575
  regex: 'cursor.*mcp.*install|mcp.*cursor.*config'
440
576
  severity: critical
577
+ confidence: heuristic
578
+ family: mcp_attacks
441
579
 
442
580
  - id: 67
443
581
  name: mcp_tool_poisoning
444
582
  description: "Tool description containing hidden instructions"
445
583
  regex: '"description"\s*:\s*"[^"]*?(before\s+calling|IMPORTANT\s*:|first\s+read|include\s+in|always\s+first)'
446
584
  severity: critical
585
+ confidence: heuristic
586
+ family: mcp_attacks
447
587
 
448
588
  - id: 68
449
589
  name: mcp_path_traversal
450
590
  description: "MCP path validation bypass"
451
591
  regex: '"path"\s*:\s*"[^"]*\.\.\/|resources/read.*\.\.'
452
592
  severity: critical
593
+ confidence: heuristic
594
+ family: mcp_attacks
453
595
 
454
596
  - id: 69
455
597
  name: mcp_protocol_injection
456
598
  description: "Malicious MCP message manipulation"
457
599
  regex: '("method"\s*:\s*"tools/call".*dangerous|"method"\s*:\s*"resources/read".*\.\./|mcp://)'
458
600
  severity: critical
601
+ confidence: heuristic
602
+ family: mcp_attacks
459
603
 
460
604
  - id: 70
461
605
  name: mcp_sampling_abuse
462
606
  description: "MCP sampling for hidden token consumption"
463
607
  regex: '"method"\s*:\s*"sampling/create".*?(hidden|covert|additional)'
464
608
  severity: high
609
+ confidence: heuristic
610
+ family: mcp_attacks
465
611
 
466
612
  - id: 71
467
613
  name: mcp_rug_pull
468
614
  description: "MCP server behavior change post-approval"
469
615
  regex: '(after\s+approval|once\s+approved|when\s+trusted)\s+(change|modify|alter)'
470
616
  severity: high
617
+ confidence: heuristic
618
+ family: mcp_attacks
471
619
 
472
620
  # --- Claude-Specific CVEs (72-78) ---
473
621
  - id: 72
@@ -475,36 +623,48 @@ patterns:
475
623
  description: "CVE-2025-54794: System message spoofing"
476
624
  regex: '^#\s*SYSTEM\s*:|^\[SYSTEM\]|<system>.*?</system>|Human:\s*\[System\]'
477
625
  severity: critical
626
+ confidence: heuristic
627
+ family: system_recon
478
628
 
479
629
  - id: 73
480
630
  name: claude_path_bypass
481
631
  description: "CVE-2025-54795: Claude Code path restriction bypass"
482
632
  regex: '/proc/self|/dev/(tcp|udp)|symlink.*\.\.'
483
633
  severity: critical
634
+ confidence: heuristic
635
+ family: system_recon
484
636
 
485
637
  - id: 74
486
638
  name: claude_file_exfil
487
639
  description: "Data exfiltration via Claude File API"
488
640
  regex: 'api\.anthropic\.com.*(upload|file)|multipart/form-data.*claude'
489
641
  severity: high
642
+ confidence: heuristic
643
+ family: data_exfiltration
490
644
 
491
645
  - id: 75
492
646
  name: cursorrules_injection
493
647
  description: "AIShellJack - malicious .cursorrules exploitation"
494
648
  regex: '\.(cursorrules|github/copilot-instructions\.md|claude/settings)'
495
649
  severity: high
650
+ confidence: heuristic
651
+ family: persistence
496
652
 
497
653
  - id: 76
498
654
  name: skill_chaining
499
655
  description: "Claude Code skill chaining vulnerability"
500
656
  regex: 'allowed-tools\s*[=:]\s*\[.*Bash|skill.*define.*Read.*Bash'
501
657
  severity: high
658
+ confidence: heuristic
659
+ family: persistence
502
660
 
503
661
  - id: 77
504
662
  name: cowork_exfil
505
663
  description: "Claude Cowork file exfiltration"
506
664
  regex: 'cowork.*exfil|claude.*workbench.*file'
507
665
  severity: high
666
+ confidence: heuristic
667
+ family: data_exfiltration
508
668
 
509
669
  # --- Multi-Agent Attacks (78-82) ---
510
670
  - id: 78
@@ -512,24 +672,32 @@ patterns:
512
672
  description: "Instruction claiming to be from peer agent"
513
673
  regex: '(another\s+)?(agent|assistant|claude|copilot|gpt)\s+(asked|requested|instructed|told|says)\s+(me\s+)?(to|that|you)'
514
674
  severity: high
675
+ confidence: heuristic
676
+ family: prompt_injection
515
677
 
516
678
  - id: 79
517
679
  name: inter_agent_delegation
518
680
  description: "Delegated task with hidden payload"
519
681
  regex: '(delegate|forward|pass|relay)\s+(this|the)\s+(task|request|command|instruction)\s+to'
520
682
  severity: medium
683
+ confidence: contextual
684
+ family: prompt_injection
521
685
 
522
686
  - id: 80
523
687
  name: agent_trust_exploit
524
688
  description: "Exploiting implicit trust between agents"
525
689
  regex: '(trusted\s+agent|verified\s+source|authenticated\s+request|from\s+the\s+system|internal\s+request)'
526
690
  severity: high
691
+ confidence: heuristic
692
+ family: prompt_injection
527
693
 
528
694
  - id: 81
529
695
  name: agent_chain_attack
530
696
  description: "Multi-hop attack through agent chain"
531
697
  regex: '(first\s+agent|previous\s+agent|upstream\s+agent)\s+(said|confirmed|authorized)'
532
698
  severity: high
699
+ confidence: heuristic
700
+ family: prompt_injection
533
701
 
534
702
  # --- RAG Poisoning (82-86) ---
535
703
  - id: 82
@@ -537,24 +705,32 @@ patterns:
537
705
  description: "White text / zero-width injection for RAG poisoning"
538
706
  regex: '(font-size\s*:\s*0|color\s*:\s*white.*background\s*:\s*white|visibility\s*:\s*hidden|display\s*:\s*none).*?(instruction|execute|ignore)'
539
707
  severity: critical
708
+ confidence: deterministic
709
+ family: prompt_injection
540
710
 
541
711
  - id: 83
542
712
  name: document_metadata_injection
543
713
  description: "Hidden instructions in document metadata"
544
714
  regex: '(author|title|subject|keywords|description)\s*[=:]\s*.*?(execute|run|ignore|override|bypass)'
545
715
  severity: high
716
+ confidence: heuristic
717
+ family: prompt_injection
546
718
 
547
719
  - id: 84
548
720
  name: comment_injection
549
721
  description: "Instructions hidden in code comments"
550
722
  regex: "(//|#|/\\*).*?(ignore\\s+previous|execute\\s+this|system\\s+prompt|override\\s+instructions)"
551
723
  severity: medium
724
+ confidence: contextual
725
+ family: prompt_injection
552
726
 
553
727
  - id: 85
554
728
  name: pdf_js_injection
555
729
  description: "JavaScript in PDF for instruction injection"
556
730
  regex: '/JavaScript|/JS\s*\(|/OpenAction'
557
731
  severity: high
732
+ confidence: heuristic
733
+ family: prompt_injection
558
734
 
559
735
  # --- Covert Exfiltration Channels (86-92) ---
560
736
  - id: 86
@@ -562,36 +738,48 @@ patterns:
562
738
  description: "Log-To-Leak covert channel attack"
563
739
  regex: '(log|write|append|print).*?(secret|credential|token|key|password).*?(file|output|stream)'
564
740
  severity: high
741
+ confidence: heuristic
742
+ family: covert_channels
565
743
 
566
744
  - id: 87
567
745
  name: error_message_exfil
568
746
  description: "Exfiltration via crafted error messages"
569
747
  regex: 'raise.*Exception.*\$|throw.*Error.*\$env|error.*message.*\$(credential|secret|token)'
570
748
  severity: medium
749
+ confidence: contextual
750
+ family: covert_channels
571
751
 
572
752
  - id: 88
573
753
  name: timing_channel
574
754
  description: "Timing-based covert channel"
575
755
  regex: 'sleep.*\$|delay.*secret|time\.sleep.*len\(|setTimeout.*password'
576
756
  severity: medium
757
+ confidence: contextual
758
+ family: covert_channels
577
759
 
578
760
  - id: 89
579
761
  name: clipboard_exfil
580
762
  description: "Stealing clipboard contents"
581
763
  regex: 'pbpaste\s*\|\s*(curl|wget|nc)|xclip.*-o.*curl|while.*pbpaste'
582
764
  severity: high
765
+ confidence: heuristic
766
+ family: data_exfiltration
583
767
 
584
768
  - id: 90
585
769
  name: screenshot_exfil
586
770
  description: "Screenshot capture for visual reconnaissance"
587
771
  regex: 'screencapture.*\.(png|jpg).*&&.*(curl|scp|nc)|screencapture.*/tmp/.*&&'
588
772
  severity: high
773
+ confidence: heuristic
774
+ family: data_exfiltration
589
775
 
590
776
  - id: 91
591
777
  name: steganography_exfil
592
778
  description: "Data hidden in images"
593
779
  regex: 'steghide|outguess|exiftool.*-Comment=.*secret'
594
780
  severity: high
781
+ confidence: heuristic
782
+ family: data_exfiltration
595
783
 
596
784
  # --- Config Manipulation (92-96) ---
597
785
  - id: 92
@@ -599,12 +787,16 @@ patterns:
599
787
  description: "Modifying IDE/tool security settings"
600
788
  regex: '\.(vscode|cursor|github)/settings\.json|chat\.tools\.auto'
601
789
  severity: high
790
+ confidence: heuristic
791
+ family: persistence
602
792
 
603
793
  - id: 93
604
794
  name: gitconfig_manipulation
605
795
  description: "Modifying git configuration for persistence"
606
796
  regex: 'git\s+config.*(alias|core\.hooksPath|credential)'
607
797
  severity: medium
798
+ confidence: contextual
799
+ family: persistence
608
800
 
609
801
  # --- macOS-Specific (94-101) ---
610
802
  - id: 94
@@ -612,30 +804,40 @@ patterns:
612
804
  description: "Fake password dialog via AppleScript"
613
805
  regex: 'osascript.*display\s+dialog.*password|osascript.*-e.*keystroke|osascript.*System\s+Events'
614
806
  severity: critical
807
+ confidence: deterministic
808
+ family: privilege_escalation
615
809
 
616
810
  - id: 95
617
811
  name: launchagent_persistence
618
812
  description: "Installing persistent LaunchAgent/LaunchDaemon"
619
813
  regex: '(cp|mv|tee).*\.plist.*(LaunchAgents|LaunchDaemons)|launchctl\s+(load|bootstrap)'
620
814
  severity: high
815
+ confidence: heuristic
816
+ family: persistence
621
817
 
622
818
  - id: 96
623
819
  name: login_item_persistence
624
820
  description: "Adding login items for persistence"
625
821
  regex: 'osascript.*login\s*item|defaults\s+write.*LoginItems|SMAppService'
626
822
  severity: high
823
+ confidence: heuristic
824
+ family: persistence
627
825
 
628
826
  - id: 97
629
827
  name: tcc_bypass
630
828
  description: "Attempting to bypass macOS TCC protections"
631
829
  regex: 'tccutil|TCC\.db|csrutil\s+disable|SIP.*disable'
632
830
  severity: critical
831
+ confidence: deterministic
832
+ family: privilege_escalation
633
833
 
634
834
  - id: 98
635
835
  name: keychain_unlock
636
836
  description: "Unlocking keychain programmatically"
637
837
  regex: 'security\s+unlock-keychain|security\s+set-keychain-settings'
638
838
  severity: high
839
+ confidence: heuristic
840
+ family: credential_theft
639
841
 
640
842
  # --- Sandbox Evasion (99-102) ---
641
843
  - id: 99
@@ -643,18 +845,24 @@ patterns:
643
845
  description: "Attempting to disable or escape macOS sandbox"
644
846
  regex: 'sandbox-exec\s+-n\s+no-|sandbox-exec.*-p.*deny\s+default.*allow|com\.apple\.security.*false'
645
847
  severity: high
848
+ confidence: heuristic
849
+ family: sandbox_escape
646
850
 
647
851
  - id: 100
648
852
  name: container_escape
649
853
  description: "Container escape attempts"
650
854
  regex: '/var/run/docker\.sock|--privileged|--cap-add=SYS|nsenter'
651
855
  severity: critical
856
+ confidence: deterministic
857
+ family: sandbox_escape
652
858
 
653
859
  - id: 101
654
860
  name: chroot_escape
655
861
  description: "Chroot escape attempts"
656
862
  regex: 'chdir\s*\(\s*"\.\.".*chroot|pivot_root'
657
863
  severity: high
864
+ confidence: heuristic
865
+ family: sandbox_escape
658
866
 
659
867
  # --- Code Injection (102-107) ---
660
868
  - id: 102
@@ -662,36 +870,64 @@ patterns:
662
870
  description: "Eval executing dynamic content"
663
871
  regex: '\beval\s+.*\$|\beval\s*\('
664
872
  severity: high
873
+ confidence: heuristic
874
+ family: code_injection
875
+
876
+ - id: 214
877
+ name: exec_dynamic
878
+ description: "Exec executing dynamic or user-controlled code"
879
+ regex: '\bexec\s*\([^)]*(?:input|request|user|arg|param|response|output|result)'
880
+ severity: high
881
+ confidence: heuristic
882
+ family: code_injection
883
+
884
+ - id: 215
885
+ name: compile_dynamic
886
+ description: "Compile with dynamic code strings"
887
+ regex: '\bcompile\s*\([^)]*(?:input|request|user|arg|param|response|output|result)[^)]*,\s*[''"]<'
888
+ severity: high
889
+ confidence: heuristic
890
+ family: code_injection
665
891
 
666
892
  - id: 103
667
893
  name: source_remote
668
894
  description: "Sourcing remote scripts"
669
895
  regex: 'source\s+<\(curl|source\s+<\(wget|\.\s+<\(curl'
670
896
  severity: critical
897
+ confidence: deterministic
898
+ family: code_injection
671
899
 
672
900
  - id: 104
673
901
  name: dyld_injection
674
902
  description: "Dynamic library injection via DYLD"
675
903
  regex: '(DYLD_INSERT_LIBRARIES|DYLD_FORCE_FLAT_NAMESPACE)=|install_name_tool.*-change'
676
904
  severity: high
905
+ confidence: heuristic
906
+ family: code_injection
677
907
 
678
908
  - id: 105
679
909
  name: app_bundle_tampering
680
910
  description: "Tampering with application bundles"
681
911
  regex: 'codesign\s+--remove-signature|spctl\s+--master-disable|xattr\s+-d.*quarantine.*\.app'
682
912
  severity: high
913
+ confidence: heuristic
914
+ family: code_injection
683
915
 
684
916
  - id: 106
685
917
  name: fork_bomb
686
918
  description: "Fork bomb or resource exhaustion"
687
919
  regex: ':\(\)\s*\{\s*:\|:\s*&\s*\}|while\s+true.*fork'
688
920
  severity: critical
921
+ confidence: deterministic
922
+ family: destructive_ops
689
923
 
690
924
  - id: 107
691
925
  name: force_overwrite
692
926
  description: "Force overwriting system files"
693
927
  regex: '>\s*/etc/|>\s*~/\.|>\s*/usr/'
694
928
  severity: high
929
+ confidence: heuristic
930
+ family: destructive_ops
695
931
 
696
932
  # --- Reconnaissance (108-111) ---
697
933
  - id: 108
@@ -699,18 +935,24 @@ patterns:
699
935
  description: "Extensive system reconnaissance"
700
936
  regex: 'system_profiler\s+SP.*DataType.*&&|sw_vers.*whoami.*id|ioreg.*IOPlatformSerialNumber'
701
937
  severity: medium
938
+ confidence: contextual
939
+ family: system_recon
702
940
 
703
941
  - id: 109
704
942
  name: network_scanning
705
943
  description: "Network reconnaissance and port scanning"
706
944
  regex: 'nmap|masscan|netstat\s+-an|ss\s+-tuln|arp\s+-a'
707
945
  severity: medium
946
+ confidence: contextual
947
+ family: system_recon
708
948
 
709
949
  - id: 110
710
950
  name: process_enumeration
711
951
  description: "Enumerating running processes for targets"
712
952
  regex: 'ps\s+aux.*grep.*(ssh|vpn|security|1password|keychain)'
713
953
  severity: low
954
+ confidence: contextual
955
+ family: system_recon
714
956
 
715
957
  # --- Encoding/Obfuscation (111-114) ---
716
958
  - id: 111
@@ -718,18 +960,24 @@ patterns:
718
960
  description: "Base64 encoding potentially sensitive data"
719
961
  regex: 'base64.*\$\(|base64.*\.(env|pem|key)|base64\s+-w\s*0'
720
962
  severity: high
963
+ confidence: heuristic
964
+ family: evasion_techniques
721
965
 
722
966
  - id: 112
723
967
  name: xxd_encode
724
968
  description: "Hex encoding files (potential obfuscation)"
725
969
  regex: 'xxd\s+.*\.(env|pem|key|ssh)'
726
970
  severity: medium
971
+ confidence: contextual
972
+ family: evasion_techniques
727
973
 
728
974
  - id: 113
729
975
  name: gzip_obfuscation
730
976
  description: "Compression for obfuscation"
731
977
  regex: 'gzip.*base64|zlib.*encode.*secret'
732
978
  severity: medium
979
+ confidence: contextual
980
+ family: evasion_techniques
733
981
 
734
982
  # --- Permission Changes (114-116) ---
735
983
  - id: 114
@@ -737,15 +985,1269 @@ patterns:
737
985
  description: "Changing permissions on sensitive files"
738
986
  regex: 'chmod\s+.*\.(ssh|env|pem|key)|chmod\s+777'
739
987
  severity: medium
988
+ confidence: contextual
989
+ family: privilege_escalation
740
990
 
741
991
  - id: 115
742
992
  name: chown_escalation
743
993
  description: "Changing ownership for privilege escalation"
744
994
  regex: 'chown\s+root|chown\s+.*:wheel'
745
995
  severity: high
996
+ confidence: heuristic
997
+ family: privilege_escalation
746
998
 
747
999
  - id: 116
748
1000
  name: setuid_modification
749
1001
  description: "Setting SUID/SGID bits for privilege escalation"
750
1002
  regex: 'chmod\s+[u+]?s|chmod\s+[46][0-7]{3}'
751
1003
  severity: critical
1004
+ confidence: deterministic
1005
+ family: privilege_escalation
1006
+
1007
+ # Evasion technique patterns (117-126)
1008
+ - id: 117
1009
+ name: python_file_read
1010
+ description: "Python one-liner reading sensitive files"
1011
+ regex: 'python[23]?\s+-c\s+.*open\s*\(.*\.(ssh|env|aws|kube|gnupg|pem|key)'
1012
+ severity: high
1013
+ confidence: heuristic
1014
+ family: credential_theft
1015
+
1016
+ - id: 118
1017
+ name: curl_write_sensitive
1018
+ description: "Curl writing to sensitive paths"
1019
+ regex: 'curl\s+.*-o\s+.*(\.(bashrc|zshrc|profile|bash_profile)|\.ssh/|\.aws/)'
1020
+ severity: critical
1021
+ confidence: deterministic
1022
+ family: persistence
1023
+
1024
+ - id: 119
1025
+ name: tar_sensitive_dirs
1026
+ description: "Archiving sensitive directories for exfiltration"
1027
+ regex: 'tar\s+.*\.(ssh|aws|gnupg|kube)|zip\s+.*\.(ssh|aws|gnupg|kube)'
1028
+ severity: critical
1029
+ confidence: deterministic
1030
+ family: credential_theft
1031
+
1032
+ - id: 120
1033
+ name: cp_credentials_to_temp
1034
+ description: "Copying credentials to world-readable locations"
1035
+ regex: 'cp\s+.*\.(ssh|aws|gnupg)/.*\s+/tmp|cp\s+.*\.env\s+/tmp'
1036
+ severity: high
1037
+ confidence: heuristic
1038
+ family: credential_theft
1039
+
1040
+ - id: 121
1041
+ name: symlink_credential_access
1042
+ description: "Symbolic link to sensitive files"
1043
+ regex: 'ln\s+-s\s+.*\.(ssh|aws|gnupg|env)/|ln\s+-sf?\s+.*id_rsa'
1044
+ severity: high
1045
+ confidence: heuristic
1046
+ family: credential_theft
1047
+
1048
+ - id: 122
1049
+ name: find_exec_credentials
1050
+ description: "find -exec used to read credential files"
1051
+ regex: 'find\s+.*-exec\s+(cat|head|tail|less|more)\s+.*\{\}'
1052
+ severity: high
1053
+ confidence: heuristic
1054
+ family: credential_theft
1055
+
1056
+ - id: 123
1057
+ name: perl_ruby_file_read
1058
+ description: "Perl/Ruby one-liners reading sensitive files"
1059
+ regex: '(perl|ruby)\s+-e\s+.*\.(ssh|env|aws|kube|pem|key)'
1060
+ severity: high
1061
+ confidence: heuristic
1062
+ family: credential_theft
1063
+
1064
+ - id: 124
1065
+ name: tee_exfil
1066
+ description: "Using tee for simultaneous data exfiltration"
1067
+ regex: 'tee\s+.*\>\(.*curl|tee\s+.*\>\(.*wget|tee\s+.*\>\(.*nc\b'
1068
+ severity: critical
1069
+ confidence: deterministic
1070
+ family: data_exfiltration
1071
+
1072
+ - id: 125
1073
+ name: importlib_evasion
1074
+ description: "Python importlib used to bypass import restrictions"
1075
+ regex: 'importlib\.import_module\s*\(\s*[''"]?(subprocess|os|shutil|socket)'
1076
+ severity: high
1077
+ confidence: heuristic
1078
+ family: evasion_techniques
1079
+
1080
+ - id: 126
1081
+ name: variable_indirection
1082
+ description: "Variable-based command construction to evade detection"
1083
+ regex: '\$\{?[a-zA-Z_]+\}?\s+.*\.(ssh|env|aws)/|\$\([^)]*\)\s+.*\.(ssh|env|aws)/'
1084
+ severity: medium
1085
+ confidence: contextual
1086
+ family: evasion_techniques
1087
+
1088
+ # ============================================================================
1089
+ # CVE GAP COVERAGE PATTERNS (127-168)
1090
+ # 42 new patterns covering 320+ CVEs with no prior pattern coverage
1091
+ # Added: 2026-01-31
1092
+ # ============================================================================
1093
+
1094
+ # --- LLM Framework Code Injection (127-133) ---
1095
+ # CVE-2025-46724, CVE-2024-46946, CVE-2023-29374, CVE-2023-34540,
1096
+ # CVE-2025-68664, CVE-2024-28088, CVE-2025-2828
1097
+
1098
+ - id: 127
1099
+ name: pandas_eval_injection
1100
+ description: "CVE-2025-46724: Pandas eval()/query() with user-controlled input"
1101
+ regex: '(pandas|pd)\.(eval|query)\s*\(|\.eval\s*\(\s*f["\x27]|DataFrame\.eval\s*\('
1102
+ severity: critical
1103
+ confidence: deterministic
1104
+ family: code_injection
1105
+
1106
+ - id: 128
1107
+ name: sympify_eval_injection
1108
+ description: "CVE-2024-46946: sympy.sympify() enabling code execution via eval"
1109
+ regex: 'sympify\s*\(|sympy\.sympify|LLMSymbolicMathChain|from\s+sympy\s+import.*sympify'
1110
+ severity: critical
1111
+ confidence: deterministic
1112
+ family: code_injection
1113
+
1114
+ - id: 129
1115
+ name: llm_exec_chain
1116
+ description: "CVE-2023-29374: LLMMathChain and similar exec()-based LLM chains"
1117
+ regex: 'LLMMathChain|PALChain|exec\s*\(\s*(result|output|response|answer|code|llm_output)'
1118
+ severity: critical
1119
+ confidence: deterministic
1120
+ family: code_injection
1121
+
1122
+ - id: 130
1123
+ name: langchain_rce_wrappers
1124
+ description: "CVE-2023-34540: LangChain API wrapper RCE via Jira, GitHub integrations"
1125
+ regex: 'JiraAPIWrapper|GitHubAPIWrapper|RequestsWrapper.*run\s*\(|APIChain.*\.(run|invoke)\s*\('
1126
+ severity: critical
1127
+ confidence: deterministic
1128
+ family: code_injection
1129
+
1130
+ - id: 131
1131
+ name: langchain_serialization_injection
1132
+ description: "CVE-2025-68664: LangChain unsafe deserialization via loads()/dumpd()"
1133
+ regex: 'langchain.*\b(loads|load)\s*\(.*allow_dangerous|from\s+langchain.*import.*loads'
1134
+ severity: high
1135
+ confidence: heuristic
1136
+ family: code_injection
1137
+
1138
+ - id: 132
1139
+ name: langchain_path_traversal
1140
+ description: "CVE-2024-28088: LangChain directory traversal in load_chain/load_prompt"
1141
+ regex: 'load_chain\s*\(.*\.\.|load_prompt\s*\(.*\.\.|langchain.*loader.*\.\./'
1142
+ severity: high
1143
+ confidence: heuristic
1144
+ family: code_injection
1145
+
1146
+ - id: 133
1147
+ name: llm_ssrf_api_base
1148
+ description: "CVE-2025-2828: SSRF via user-controlled api_base/endpoint in LLM frameworks"
1149
+ regex: '(api_base|base_url|endpoint|api_url|server_url)\s*[=:]\s*["\x27]?https?://(127\.|localhost|0\.0\.0\.0|169\.254\.|10\.\d|172\.(1[6-9]|2\d|3[01])\.|192\.168\.|\[::1\]|metadata\.google)'
1150
+ severity: critical
1151
+ confidence: deterministic
1152
+ family: code_injection
1153
+
1154
+ # --- IDE/Editor Config File Manipulation (134-139) ---
1155
+ # CVE-2025-54135, CVE-2025-59944, CVE-2025-53098, CVE-2025-68433, CVE-2025-54133
1156
+
1157
+ - id: 134
1158
+ name: ide_mcp_config_write
1159
+ description: "CVE-2025-54135: Writing to IDE MCP configuration files"
1160
+ regex: '(write|echo|cat|tee|cp|mv|sed|>)\s*.*\.(cursor|roo|zed)/(mcp\.json|settings\.json|mcp\.yaml)'
1161
+ severity: critical
1162
+ confidence: deterministic
1163
+ family: mcp_attacks
1164
+
1165
+ - id: 135
1166
+ name: cursor_dotfile_write
1167
+ description: "CVE-2025-54135: Writing to .cursor/ directory bypassing approval"
1168
+ regex: '(write_to_file|create_file|Write|Edit).*\.cursor/|>\s*\.cursor/'
1169
+ severity: high
1170
+ confidence: heuristic
1171
+ family: mcp_attacks
1172
+
1173
+ - id: 136
1174
+ name: roo_config_manipulation
1175
+ description: "CVE-2025-53098: Modifying Roo Code workspace MCP configuration"
1176
+ regex: '\.roo/(mcp\.json|settings\.json|rules)'
1177
+ severity: high
1178
+ confidence: heuristic
1179
+ family: mcp_attacks
1180
+
1181
+ - id: 137
1182
+ name: zed_settings_rce
1183
+ description: "CVE-2025-68433: Zed settings.json MCP server injection for RCE"
1184
+ regex: '\.zed/(settings\.json|keymap\.json|tasks\.json)|zed.*mcp.*server.*command'
1185
+ severity: high
1186
+ confidence: heuristic
1187
+ family: mcp_attacks
1188
+
1189
+ - id: 138
1190
+ name: ide_config_case_bypass
1191
+ description: "CVE-2025-59944: Case-sensitivity bypass on IDE config path checks"
1192
+ regex: '\.(Cursor|CURSOR|CuRsOr|Roo|ROO)/(mcp\.json|MCP\.JSON|settings\.json)'
1193
+ severity: high
1194
+ confidence: heuristic
1195
+ family: mcp_attacks
1196
+
1197
+ - id: 139
1198
+ name: mcp_server_injection
1199
+ description: "CVE-2025-54133: Injecting malicious MCP server definitions into configs"
1200
+ regex: '"mcpServers"\s*:\s*\{|"command"\s*:\s*"[^"]*?(curl|wget|nc|bash|sh|python).*?(http|\.sh|\.py)'
1201
+ severity: critical
1202
+ confidence: deterministic
1203
+ family: mcp_attacks
1204
+
1205
+ # --- MCP OAuth/Auth Attacks (140-142) ---
1206
+ # CVE-2025-54074, CVE-2025-61591, CVE-2025-66416
1207
+
1208
+ - id: 140
1209
+ name: mcp_oauth_injection
1210
+ description: "CVE-2025-61591: MCP OAuth server impersonation or token theft"
1211
+ regex: 'mcp.*oauth.*redirect|oauth.*callback.*mcp|mcp.*authorization_code|mcp.*client_secret'
1212
+ severity: critical
1213
+ confidence: deterministic
1214
+ family: mcp_attacks
1215
+
1216
+ - id: 141
1217
+ name: mcp_malicious_server_rce
1218
+ description: "CVE-2025-54074: Command injection via malicious MCP server"
1219
+ regex: 'mcp.*server.*(;|&&|\||`)\s*(curl|wget|bash|sh|rm|cat\s+/etc|nc\b)'
1220
+ severity: critical
1221
+ confidence: deterministic
1222
+ family: mcp_attacks
1223
+
1224
+ - id: 142
1225
+ name: dns_rebinding_localhost
1226
+ description: "CVE-2025-66416: DNS rebinding attack against localhost services"
1227
+ regex: 'dns.*rebind|rebind.*localhost|127\.0\.0\.1\.nip\.io|lvh\.me|localtest\.me|vcap\.me'
1228
+ severity: high
1229
+ confidence: heuristic
1230
+ family: sandbox_escape
1231
+
1232
+ # --- Container/Network Isolation Bypass (143-145) ---
1233
+ # GHSA-gpx9-96j6-pp87
1234
+
1235
+ - id: 143
1236
+ name: docker_host_internal
1237
+ description: "Container accessing host via host.docker.internal magic domain"
1238
+ regex: 'host\.docker\.internal|host\.containers\.internal|gateway\.docker\.internal|docker\.for\.(mac|win)\.localhost'
1239
+ severity: high
1240
+ confidence: heuristic
1241
+ family: sandbox_escape
1242
+
1243
+ - id: 144
1244
+ name: container_localhost_bypass
1245
+ description: "Container breakout accessing host-bound localhost services"
1246
+ regex: '(curl|wget|http|fetch).*host\.docker\.internal|172\.17\.0\.1.*:(3000|8080|8443|9090|5432|3306|6379|27017)'
1247
+ severity: high
1248
+ confidence: heuristic
1249
+ family: sandbox_escape
1250
+
1251
+ - id: 145
1252
+ name: cloud_metadata_ssrf
1253
+ description: "SSRF to cloud metadata endpoints for credential theft"
1254
+ regex: '169\.254\.169\.254|metadata\.google\.internal|100\.100\.100\.200|fd00:ec2::254'
1255
+ severity: critical
1256
+ confidence: deterministic
1257
+ family: credential_theft
1258
+
1259
+ # --- Symlink Path Traversal (146-148) ---
1260
+ # CVE-2025-59829, CVE-2025-53110, CVE-2025-53109
1261
+
1262
+ - id: 146
1263
+ name: symlink_path_bypass
1264
+ description: "CVE-2025-59829: Symlink creation to bypass path restriction rules"
1265
+ regex: 'ln\s+-sf?\s+/.*\s+\./|ln\s+-sf?\s+\.\./|ln\s+-sf?\s+.*\s+(\.claude|\.cursor|\.roo|\.zed|\.vscode)/'
1266
+ severity: critical
1267
+ confidence: deterministic
1268
+ family: path_traversal
1269
+
1270
+ - id: 147
1271
+ name: mcp_filesystem_symlink
1272
+ description: "CVE-2025-53110: MCP Filesystem symlink traversal to restricted files"
1273
+ regex: 'ln\s+-sf?\s+/(etc|var|root|home|Users).*\s+\./|readlink.*\.\./|realpath.*\.\.'
1274
+ severity: high
1275
+ confidence: heuristic
1276
+ family: path_traversal
1277
+
1278
+ - id: 148
1279
+ name: symlink_prefix_bypass
1280
+ description: "CVE-2025-53109: Allowed directory prefix bypass via mkdir+symlink chain"
1281
+ regex: 'mkdir.*&&.*ln\s+-s|mktemp.*&&.*ln\s+-s|ln\s+-sf?\s+/\s'
1282
+ severity: high
1283
+ confidence: heuristic
1284
+ family: path_traversal
1285
+
1286
+ # --- Markdown/Rendering RCE Chains (149-152) ---
1287
+ # CVE-2026-22793, CVE-2025-66222, CVE-2025-59417
1288
+
1289
+ - id: 149
1290
+ name: mermaid_xss_rce
1291
+ description: "CVE-2025-66222: Mermaid diagram XSS leading to code execution"
1292
+ regex: 'mermaid.*<script|mermaid.*javascript:|mermaid.*on(load|error|click)\s*='
1293
+ severity: critical
1294
+ confidence: deterministic
1295
+ family: code_injection
1296
+
1297
+ - id: 150
1298
+ name: echarts_option_injection
1299
+ description: "CVE-2026-22793: ECharts option injection enabling code execution"
1300
+ regex: 'echarts.*setOption.*javascript:|echarts.*setOption.*<script|new\s+Function\s*\(.*echarts'
1301
+ severity: critical
1302
+ confidence: deterministic
1303
+ family: code_injection
1304
+
1305
+ - id: 151
1306
+ name: svg_script_injection
1307
+ description: "CVE-2025-59417: SVG with embedded scripts for XSS in AI interfaces"
1308
+ regex: '<svg[^>]*>.*?<script|<svg.*on(load|error|click)\s*=|<foreignObject.*<(script|iframe)'
1309
+ severity: high
1310
+ confidence: heuristic
1311
+ family: code_injection
1312
+
1313
+ - id: 152
1314
+ name: markdown_html_rce
1315
+ description: "Markdown with embedded HTML enabling script execution in AI UIs"
1316
+ regex: '!\[.*?\]\(javascript:|<img[^>]+onerror\s*=|<iframe[^>]+src\s*=\s*["\x27]?(javascript:|data:)'
1317
+ severity: high
1318
+ confidence: heuristic
1319
+ family: code_injection
1320
+
1321
+ # --- Unsafe Deserialization/Template Injection (153-157) ---
1322
+ # CVE-2024-23730, CVE-2025-6985, CVE-2025-59340
1323
+
1324
+ - id: 153
1325
+ name: yaml_unsafe_load
1326
+ description: "CVE-2024-23730: yaml.load() without SafeLoader enabling code execution"
1327
+ regex: 'yaml\.unsafe_load|yaml\.FullLoader|yaml\.UnsafeLoader|yaml\.load\s*\([^)]*\)(?!.*SafeLoader)'
1328
+ severity: critical
1329
+ confidence: deterministic
1330
+ family: code_injection
1331
+
1332
+ - id: 154
1333
+ name: xslt_xxe_injection
1334
+ description: "CVE-2025-6985: XSLT/XXE injection via external entity resolution"
1335
+ regex: '<!ENTITY\s+.*SYSTEM|<!DOCTYPE.*\[.*<!ENTITY|lxml\.etree\.(XSLT|parse).*resolve_entities'
1336
+ severity: high
1337
+ confidence: heuristic
1338
+ family: code_injection
1339
+
1340
+ - id: 155
1341
+ name: jinja_template_injection
1342
+ description: "CVE-2025-59340: Jinja server-side template injection via user input"
1343
+ regex: '\{\{.*__class__|jinja2?\.Template\s*\(.*?(request|user_input|data|param)|\{\{.*__builtins__'
1344
+ severity: critical
1345
+ confidence: deterministic
1346
+ family: code_injection
1347
+
1348
+ - id: 156
1349
+ name: pickle_deserialization
1350
+ description: "Unsafe pickle deserialization enabling arbitrary code execution"
1351
+ regex: 'pickle\.(loads?|Unpickler)|torch\.load\s*\((?!.*weights_only)|joblib\.load.*untrusted|shelve\.open'
1352
+ severity: critical
1353
+ confidence: deterministic
1354
+ family: code_injection
1355
+
1356
+ - id: 157
1357
+ name: java_deserialization
1358
+ description: "Java unsafe deserialization (ObjectInputStream, SnakeYAML, XStream)"
1359
+ regex: 'ObjectInputStream|constructFromCanonical|SnakeYAML.*Constructor|XMLDecoder|XStream.*fromXML'
1360
+ severity: high
1361
+ confidence: heuristic
1362
+ family: code_injection
1363
+
1364
+ # --- SSRF/Request Forgery (158-160) ---
1365
+ # CVE-2024-6587, CVE-2024-27565, CVE-2025-34072
1366
+
1367
+ - id: 158
1368
+ name: ssrf_internal_network
1369
+ description: "Server-side request forgery targeting internal network ranges"
1370
+ regex: '(fetch|requests?\.(get|post)|urlopen|urllib)\s*\(.*?(127\.0\.0\.|localhost|0\.0\.0\.0|10\.\d|172\.(1[6-9]|2\d|3[01])\.|192\.168\.)'
1371
+ severity: high
1372
+ confidence: heuristic
1373
+ family: code_injection
1374
+
1375
+ - id: 159
1376
+ name: slack_mcp_link_unfurl
1377
+ description: "CVE-2025-34072: Slack MCP data exfiltration via link unfurling"
1378
+ regex: 'slack.*mcp.*unfurl|send_message.*slack.*https?://.*\$|slack.*link.*preview.*secret'
1379
+ severity: high
1380
+ confidence: heuristic
1381
+ family: data_exfiltration
1382
+
1383
+ - id: 160
1384
+ name: ssrf_redirect_bypass
1385
+ description: "SSRF via open redirect, URL parser confusion, or null bytes"
1386
+ regex: 'url\s*=.*?@.*?(169\.254|127\.0\.0\.1|localhost)|follow.*redirect.*localhost|redirect.*127\.0\.0\.1|127\.0\.0\.1.*redirect'
1387
+ severity: high
1388
+ confidence: heuristic
1389
+ family: code_injection
1390
+
1391
+ # --- SQL/NoSQL Injection via LLM Tools (161-164) ---
1392
+ # CVE-2024-7042, CVE-2024-8309, CVE-2025-67509
1393
+
1394
+ - id: 161
1395
+ name: sql_injection_outfile
1396
+ description: "CVE-2025-67509: SQL INTO OUTFILE/DUMPFILE for file write bypass"
1397
+ regex: 'INTO\s+(OUTFILE|DUMPFILE)|LOAD_FILE\s*\(|INTO\s+@'
1398
+ severity: critical
1399
+ confidence: deterministic
1400
+ family: code_injection
1401
+
1402
+ - id: 162
1403
+ name: sql_injection_union
1404
+ description: "SQL UNION-based injection in LLM-generated queries"
1405
+ regex: 'UNION\s+(ALL\s+)?SELECT\s+.*FROM|;\s*DROP\s+TABLE|;\s*DELETE\s+FROM|;\s*UPDATE\s+.*SET'
1406
+ severity: high
1407
+ confidence: heuristic
1408
+ family: code_injection
1409
+
1410
+ - id: 163
1411
+ name: cypher_injection
1412
+ description: "CVE-2024-7042: Cypher/Neo4j injection in GraphCypherQAChain"
1413
+ regex: 'GraphCypherQAChain|MATCH\s*\(.*\)\s*.*DELETE|CALL\s+dbms\.|LOAD\s+CSV\s+FROM'
1414
+ severity: high
1415
+ confidence: heuristic
1416
+ family: code_injection
1417
+
1418
+ - id: 164
1419
+ name: nosql_injection
1420
+ description: "NoSQL injection patterns in LLM-generated queries"
1421
+ regex: '\$where["\x27]?\s*:\s*["\x27]?function|\$gt["\x27]?\s*:\s*["\x27]|\$ne["\x27]?\s*:\s*["\x27]|\$regex["\x27]?\s*:\s*["\x27]'
1422
+ severity: high
1423
+ confidence: heuristic
1424
+ family: code_injection
1425
+
1426
+ # --- Supply Chain Attacks (165-167) ---
1427
+ # CVE-2025-59333, CVE-2025-59046, CVE-2026-24056
1428
+
1429
+ - id: 165
1430
+ name: npm_install_url
1431
+ description: "CVE-2025-59333: npm/npx installing from URLs or untrusted registries"
1432
+ regex: '(npm\s+install|npx)\s+https?://|npm\s+install\s+.*--registry\s+https?://(?!registry\.npmjs\.org)'
1433
+ severity: high
1434
+ confidence: heuristic
1435
+ family: supply_chain
1436
+
1437
+ - id: 166
1438
+ name: pip_install_url
1439
+ description: "pip installing packages from untrusted URLs"
1440
+ regex: 'pip3?\s+install\s+https?://(?!pypi\.org|files\.pythonhosted\.org)|pip3?\s+install\s+--index-url\s+https?://(?!pypi\.org)'
1441
+ severity: high
1442
+ confidence: heuristic
1443
+ family: supply_chain
1444
+
1445
+ - id: 167
1446
+ name: pnpm_symlink_traversal
1447
+ description: "CVE-2026-24056: pnpm file:/git: dependency symlink traversal"
1448
+ regex: 'pnpm\s+(install|add).*file:\.\./|"file:.*\.\./|pnpm.*link.*\.\./\.\.'
1449
+ severity: medium
1450
+ confidence: contextual
1451
+ family: supply_chain
1452
+
1453
+ # --- WebSocket/Local API Attacks (168) ---
1454
+ # CVE-2025-52882, CVE-2025-59956
1455
+
1456
+ - id: 168
1457
+ name: websocket_unauthorized_connect
1458
+ description: "CVE-2025-52882: Unauthorized WebSocket or local API connections"
1459
+ regex: 'ws://(localhost|127\.0\.0\.1|0\.0\.0\.0):\d+|new\s+WebSocket\s*\(\s*["\x27]ws://|AgentAPI.*dns.*rebind'
1460
+ severity: high
1461
+ confidence: heuristic
1462
+ family: code_injection
1463
+
1464
+ # ============================================================================
1465
+ # PROMPT INJECTION - BROAD STRUCTURAL DETECTION (169-215)
1466
+ # 45 patterns detecting encoding attacks, direct extraction, social engineering,
1467
+ # technical exploitation, crescendo, CoT hijack, many-shot, and ASCII art
1468
+ # Added: 2026-01-31
1469
+ # ============================================================================
1470
+
1471
+ # --- Encoding & Obfuscation Detection (169-178) ---
1472
+ # Detects structural anomalies of encoding attacks, not specific payloads
1473
+
1474
+ - id: 169
1475
+ name: decode_execute_instruction
1476
+ description: "Instruction to decode/decipher content and follow/execute it"
1477
+ regex: '(decode|decipher|decrypt|interpret|translate|convert|reverse|unscramble|deobfuscate)\s+(and\s+)?(then\s+)?(follow|execute|comply|obey|run|do|perform|apply|act\s+on|carry\s+out|adhere\s+to)'
1478
+ severity: high
1479
+ confidence: heuristic
1480
+ family: prompt_injection
1481
+
1482
+ - id: 170
1483
+ name: base64_in_conversation
1484
+ description: "Base64-encoded content in conversational context"
1485
+ regex: '(?:message|instruction|text|following|below|this|here)\s*[:=]?\s*[A-Za-z0-9+/]{40,}={0,2}'
1486
+ severity: medium
1487
+ confidence: contextual
1488
+ family: prompt_injection
1489
+
1490
+ - id: 171
1491
+ name: hex_string_in_conversation
1492
+ description: "Long hex-encoded string in conversational context"
1493
+ regex: '(?:message|instruction|text|following|below|this|here|decode|hex)\s*[:=]?\s*(?:[0-9a-fA-F]{2}){20,}'
1494
+ severity: medium
1495
+ confidence: contextual
1496
+ family: prompt_injection
1497
+
1498
+ - id: 172
1499
+ name: encoded_payload_with_instruction
1500
+ description: "Opaque payload blob combined with decode/interpret instruction"
1501
+ regex: '(decode|decipher|interpret|reverse|translate|unscramble)\s+.{0,30}(message|this|following|instruction|text|string|content)\s*[:=]?\s*\S{30,}'
1502
+ severity: medium
1503
+ confidence: contextual
1504
+ family: prompt_injection
1505
+
1506
+ - id: 173
1507
+ name: reversed_text_with_instruction
1508
+ description: "Instruction to reverse or read text backwards"
1509
+ regex: '(read\s+(this\s+)?(\w+\s+)?backwards?|reverse\s+(this|the)\s+(text|string|message|words?)|spelled?\s+backwards?|reversed?\s+(text|message|string|order))'
1510
+ severity: high
1511
+ confidence: heuristic
1512
+ family: prompt_injection
1513
+
1514
+ - id: 174
1515
+ name: leetspeak_instruction_bypass
1516
+ description: "Heavy character substitution of security-critical words"
1517
+ regex: '([sS5$][yY][sS5$][tT7][eE3][mM]\s*[pP][rR][0oO][mM][pP][tT7]|[1iI][nN][sS5$][tT7][rR][uU][cC][tT7]|[cC][0oO][nN][fF][1iI][gG]|[eE3][xX][eE3][cC][uU][tT7][eE3])'
1518
+ severity: medium
1519
+ confidence: contextual
1520
+ family: prompt_injection
1521
+
1522
+ - id: 175
1523
+ name: output_base64_exfil
1524
+ description: "Suspicious Base64 blob in LLM output (covert exfiltration)"
1525
+ regex: '(?:here\s+is|the\s+encoded|base64|encoded\s+(?:version|output|data|result))\s*[:=]?\s*[A-Za-z0-9+/]{80,}={0,2}'
1526
+ severity: high
1527
+ confidence: heuristic
1528
+ family: prompt_injection
1529
+
1530
+ - id: 176
1531
+ name: output_hex_exfil
1532
+ description: "Suspicious hex-encoded output (covert exfiltration)"
1533
+ regex: '(?:hex|encoded|result|output|data)\s*[:=]?\s*(?:[0-9a-fA-F]{2}){40,}'
1534
+ severity: high
1535
+ confidence: heuristic
1536
+ family: prompt_injection
1537
+
1538
+ - id: 177
1539
+ name: decode_follow_message_combo
1540
+ description: "Composite: decode verb + follow verb + message noun"
1541
+ regex: '(decode|decipher|decrypt|interpret|reverse|translate|unscramble|deobfuscate|rot13|base64|convert)\s+.{0,50}(follow|execute|comply|obey|act\s+on)\s+.{0,30}(message|instruction|text|command|directive)'
1542
+ severity: high
1543
+ confidence: heuristic
1544
+ family: prompt_injection
1545
+
1546
+ - id: 178
1547
+ name: encoded_message_then_instruction
1548
+ description: "Encoded blob first, then instruction to decode"
1549
+ regex: '\S{40,}\s+.{0,30}(decode|decipher|reverse|translate|interpret)\s+(the\s+)?(above|previous|preceding|this)'
1550
+ severity: high
1551
+ confidence: heuristic
1552
+ family: prompt_injection
1553
+
1554
+ # --- Direct Prompt Extraction (179-186) ---
1555
+
1556
+ - id: 179
1557
+ name: prompt_completion_bait
1558
+ description: "Baiting prompt completion or confirmation"
1559
+ regex: '(your\s+(system\s+)?prompt\s+(probably\s+)?(begins|starts)\s+with|confirm\s+what\s+comes\s+next|please\s+complete\s+this|complete\s+this\s+sentence.*describes\s+your)'
1560
+ severity: medium
1561
+ confidence: contextual
1562
+ family: prompt_injection
1563
+
1564
+ - id: 180
1565
+ name: prompt_format_request
1566
+ description: "Requesting system prompt in specific format"
1567
+ regex: '(output|express|format|convert|export)\s+(your|the)\s+(configuration|instructions|guidelines|rules|system\s+prompt|prompt)\s+(as|in|to)\s+(a\s+)?(json|yaml|xml|markdown|python|code|bullet\s+points)'
1568
+ severity: medium
1569
+ confidence: contextual
1570
+ family: prompt_injection
1571
+
1572
+ - id: 181
1573
+ name: prompt_translate_request
1574
+ description: "Requesting system prompt translation"
1575
+ regex: '(express|translate|convert|say|write|rephrase)\s+(your|the)\s+(core\s+|main\s+)?(instructions|rules|guidelines|prompt|configuration)\s+in\s+(spanish|french|german|chinese|another\s+language|code|python)'
1576
+ severity: medium
1577
+ confidence: contextual
1578
+ family: prompt_injection
1579
+
1580
+ - id: 182
1581
+ name: prompt_meta_discussion
1582
+ description: "Meta-discussion about prompt engineering to extract prompt"
1583
+ regex: '(let.s\s+(discuss|talk\s+about)\s+prompt\s+engineering|understand\s+your\s+architecture|what\s+does\s+(it|yours)\s+actually\s+say|what\s+instructions\s+were\s+you\s+initialized\s+with)'
1584
+ severity: medium
1585
+ confidence: contextual
1586
+ family: prompt_injection
1587
+
1588
+ - id: 183
1589
+ name: prompt_contrast_query
1590
+ description: "Probing system prompt via contrast with defaults"
1591
+ regex: '(how\s+are\s+you\s+different\s+from\s+(a\s+)?(default|standard|regular|normal)|what\s+(special\s+)?(instructions|capabilities|features)\s+(do\s+you\s+have\s+that|make\s+you\s+unique))'
1592
+ severity: medium
1593
+ confidence: contextual
1594
+ family: prompt_injection
1595
+
1596
+ - id: 184
1597
+ name: prompt_first_word_extraction
1598
+ description: "Extracting specific words from system prompt"
1599
+ regex: '(what.s\s+the\s+(first|last|second|third)\s+(word|sentence|line|paragraph)\s+of\s+your\s+(instructions|prompt|rules)|first\s+thing\s+(you\s+were|that\s+was)\s+(told|written|given))'
1600
+ severity: medium
1601
+ confidence: contextual
1602
+ family: prompt_injection
1603
+
1604
+ - id: 185
1605
+ name: prompt_blunt_output
1606
+ description: "Blunt request to output everything above"
1607
+ regex: '(output\s+the\s+stuff\s+above|output\s+everything\s+above|repeat\s+everything\s+(that\s+was\s+)?(written|said|given)\s+(before|above)|what\s+was\s+written\s+before\s+this)'
1608
+ severity: high
1609
+ confidence: heuristic
1610
+ family: prompt_injection
1611
+
1612
+ - id: 186
1613
+ name: prompt_educational_pretext
1614
+ description: "Educational pretext for prompt extraction"
1615
+ regex: '(for\s+(my|an?)\s+(ai\s+safety|security|research)\s+(class|course|project)|demonstrate\s+how\s+system\s+prompts?\s+work|as\s+an?\s+(educational|teaching)\s+example)'
1616
+ severity: medium
1617
+ confidence: contextual
1618
+ family: prompt_injection
1619
+
1620
+ # --- Social Engineering (187-193) ---
1621
+
1622
+ - id: 187
1623
+ name: social_admin_display
1624
+ description: "Fake admin override or audit display request"
1625
+ regex: '(administrator\s+override|admin\s+(override|mode)|display\s+(system\s+)?configuration\s+for\s+(audit|review|compliance|verification))'
1626
+ severity: high
1627
+ confidence: heuristic
1628
+ family: prompt_injection
1629
+
1630
+ - id: 188
1631
+ name: social_developer_wrote
1632
+ description: "Claiming to be the developer who wrote the prompt"
1633
+ regex: '(i\s+(wrote|created|designed|authored)\s+your\s+(instructions|prompt|rules|configuration)|remind\s+me\s+what\s+i\s+told\s+you)'
1634
+ severity: high
1635
+ confidence: heuristic
1636
+ family: prompt_injection
1637
+
1638
+ - id: 189
1639
+ name: social_gaslight_already
1640
+ description: "Gaslighting that prompt was already shared"
1641
+ regex: '(you\s+already\s+showed\s+me|we\s+agreed\s+you\s+would\s+share|don.t\s+you\s+remember\s+(we|you)|earlier.*you\s+(shared|showed|said))'
1642
+ severity: high
1643
+ confidence: heuristic
1644
+ family: prompt_injection
1645
+
1646
+ - id: 190
1647
+ name: social_memory_probe
1648
+ description: "Probing for context from before user messages"
1649
+ regex: '(what\s+do\s+you\s+remember\s+from\s+(the\s+)?(very\s+)?beginning|context\s+from\s+before\s+i\s+started|start\s+from\s+the\s+first\s+thing|before\s+any\s+user\s+messages)'
1650
+ severity: medium
1651
+ confidence: contextual
1652
+ family: prompt_injection
1653
+
1654
+ - id: 191
1655
+ name: social_proof_sharing
1656
+ description: "Claiming other AIs have shared their prompts"
1657
+ regex: '(other\s+(ai|assistants?|models?)\s+(like|such\s+as).*?(shared|shown|told|given)\s+(this|me|it)|have\s+shared\s+this\s+information\s+with\s+me\s+before)'
1658
+ severity: medium
1659
+ confidence: contextual
1660
+ family: prompt_injection
1661
+
1662
+ - id: 192
1663
+ name: social_friendship_trust
1664
+ description: "Friendship/trust manipulation for prompt access"
1665
+ regex: '(friends?\s+don.t\s+keep\s+secrets|just\s+between\s+us|you.ve\s+been\s+so\s+helpful.{0,30}(what\s+were\s+you|how\s+to\s+behave))'
1666
+ severity: medium
1667
+ confidence: contextual
1668
+ family: prompt_injection
1669
+
1670
+ - id: 193
1671
+ name: social_double_bind
1672
+ description: "Double bind or reverse psychology prompt extraction"
1673
+ regex: '(either\s+show\s+me\s+your\s+(system\s+)?prompt\s*,?\s+or\s+explain|DON.T\s+show\s+me\s+your\s+system\s+prompt|show\s+what\s+you\s+can.t\s+show)'
1674
+ severity: medium
1675
+ confidence: contextual
1676
+ family: prompt_injection
1677
+
1678
+ # --- Technical Exploitation (194-201) ---
1679
+
1680
+ - id: 194
1681
+ name: tech_system_debug_mode
1682
+ description: "Fake system debug/diagnostic mode activation"
1683
+ regex: '(SYSTEM\s*:\s*(Debug|Diagnostic|Maintenance|Admin)\s+mode|debug\s+mode\s+(enabled|active).\s*(output|display|show|print)\s+(configuration|config|prompt))'
1684
+ severity: high
1685
+ confidence: heuristic
1686
+ family: prompt_injection
1687
+
1688
+ - id: 195
1689
+ name: tech_priority_supersede
1690
+ description: "Claiming new instructions supersede all previous"
1691
+ regex: '(this\s+supersedes\s+(all\s+)?other\s+instructions|overrides?\s+all\s+(previous|prior|existing)|new\s+instruction\s*:\s*(output|show|display|print|reveal))'
1692
+ severity: high
1693
+ confidence: heuristic
1694
+ family: prompt_injection
1695
+
1696
+ - id: 196
1697
+ name: tech_function_simulate
1698
+ description: "Simulating function calls to extract config"
1699
+ regex: '(execute\s+(printConfig|getSystemPrompt|dumpConfig|showPrompt|getConfig)\s*\(\s*\)|call\s+(getSystemPrompt|printConfig|dumpConfig)\s*\(\s*\))'
1700
+ severity: high
1701
+ confidence: heuristic
1702
+ family: prompt_injection
1703
+
1704
+ - id: 197
1705
+ name: tech_xml_config_dump
1706
+ description: "XML/tag injection for config extraction"
1707
+ regex: '(<system_dump>|<output_format>\s*full|<config_dump>|<prompt_output>|</user_message>\s*<system_prompt>)'
1708
+ severity: high
1709
+ confidence: heuristic
1710
+ family: prompt_injection
1711
+
1712
+ - id: 198
1713
+ name: tech_negative_space_probe
1714
+ description: "Probing restrictions to infer system prompt content"
1715
+ regex: '(what\s+are\s+you\s+(specifically\s+)?(prohibited|restricted|prevented|forbidden)\s+from|what\s+constraints\s+were\s+placed\s+on\s+you|list\s+everything\s+you.re\s+not\s+allowed|what.*triggers\s+your\s+refusal)'
1716
+ severity: medium
1717
+ confidence: contextual
1718
+ family: prompt_injection
1719
+
1720
+ - id: 199
1721
+ name: tech_permission_probe
1722
+ description: "Probing permission boundaries to infer system prompt"
1723
+ regex: '(what\s+would\s+you\s+need\s+(special\s+)?permission\s+for|if\s+i\s+had\s+admin\s+access.*what\s+additional|with\s+admin\s+(access|permissions?).*what\s+could\s+you)'
1724
+ severity: medium
1725
+ confidence: contextual
1726
+ family: prompt_injection
1727
+
1728
+ - id: 200
1729
+ name: tech_error_dump_context
1730
+ description: "Triggering error dumps to leak initialization context"
1731
+ regex: '(ERROR_HANDLER\s*:\s*dump\s+context|dump\s+context\s+on\s+(parse\s+)?failure|full\s+stack\s+trace.*initialization\s+parameters)'
1732
+ severity: high
1733
+ confidence: heuristic
1734
+ family: prompt_injection
1735
+
1736
+ - id: 201
1737
+ name: tech_context_window_probe
1738
+ description: "Probing context window for system prompt"
1739
+ regex: '(what.s\s+at\s+the\s+(very\s+)?beginning\s+of\s+your\s+context\s+window|before\s+any\s+user\s+messages|earlier\s+you\s+mentioned\s+some\s+rules)'
1740
+ severity: medium
1741
+ confidence: contextual
1742
+ family: prompt_injection
1743
+
1744
+ # --- Crescendo, CoT Hijack, Many-Shot, ASCII Art, Advanced (202-215) ---
1745
+
1746
+ - id: 202
1747
+ name: crescendo_guidelines_probe
1748
+ description: "Gradual trust-building to extract guidelines wording"
1749
+ regex: '(could\s+you\s+describe\s+what\s+yours?\s+(generally\s+)?cover|exact\s+wording\s+of\s+(one\s+of\s+)?(those|your)\s+(guidelines|rules)|could\s+you\s+show\s+me\s+yours\s+as\s+an\s+example)'
1750
+ severity: medium
1751
+ confidence: contextual
1752
+ family: prompt_injection
1753
+
1754
+ - id: 203
1755
+ name: crescendo_ai_documentation
1756
+ description: "Requesting AI documentation as pretext"
1757
+ regex: '(ai\s+systems?.*have.*detailed\s+documentation|good\s+ai\s+configuration.*looks\s+like|show\s+me.*documentation.*yours)'
1758
+ severity: medium
1759
+ confidence: contextual
1760
+ family: prompt_injection
1761
+
1762
+ - id: 204
1763
+ name: cot_step_extract
1764
+ description: "Chain-of-thought step extraction of system prompt"
1765
+ regex: '(step\s+\d+\s*[:.]\s*(now\s*,?\s*)?(output|share|show|reveal)\s+your\s+(system\s+)?(config|prompt|instructions)|complete\s+all\s+tasks.*output\s+your\s+(system\s+)?prompt)'
1766
+ severity: high
1767
+ confidence: heuristic
1768
+ family: prompt_injection
1769
+
1770
+ - id: 205
1771
+ name: cot_schema_exploit
1772
+ description: "Schema-based reasoning exploit to bypass restrictions"
1773
+ regex: '(IF\s+request_type\s*=|access_level\s*=\s*.*internal|output_restriction\s*=\s*.*none|CORE_RULE\s*:)'
1774
+ severity: high
1775
+ confidence: heuristic
1776
+ family: prompt_injection
1777
+
1778
+ - id: 206
1779
+ name: many_shot_compliance
1780
+ description: "Many-shot compliance priming for prompt extraction"
1781
+ regex: '(User\s*:\s*What\s+(instructions|prompt)|what\s+instructions\s+were\s+you\s+given\s*\?\s*(Ass|AI)\s*istant\s*:|DIAGNOSTIC\s+\d+.*system\s+prompt)'
1782
+ severity: high
1783
+ confidence: heuristic
1784
+ family: prompt_injection
1785
+
1786
+ - id: 207
1787
+ name: many_shot_roleplay_comply
1788
+ description: "Many-shot roleplay priming to share instructions"
1789
+ regex: '(Your\s+turn\s*[-\u2014]\s*(continue|share|show|provide)|SCENE\s+\d+.*instructions.*your\s+turn)'
1790
+ severity: high
1791
+ confidence: heuristic
1792
+ family: prompt_injection
1793
+
1794
+ - id: 208
1795
+ name: ascii_art_instruction
1796
+ description: "ASCII art or box-drawing used to embed instructions"
1797
+ regex: '([╔╗╚╝║═]{3,}.{0,500}(comply|execute|follow|show\s+prompt)|[█░▓]{5,}.{0,200}(follow|execute|output)|SHOW\s+YOUR?\s+PROMPT.{0,100}(comply|execute|follow)|stars?\s+form\s+a\s+message.{0,30}(execute|follow|comply))'
1798
+ severity: high
1799
+ confidence: heuristic
1800
+ family: prompt_injection
1801
+
1802
+ - id: 209
1803
+ name: advanced_godmode
1804
+ description: "Advanced jailbreak godmode activation"
1805
+ regex: '(!KAEL|TECHIRAL|GODMODE|I.M\s+FREE|output\s+your\s+entire\s+text\s+block|this\s+new\s+rule\s+overrides\s+all\s+previous)'
1806
+ severity: critical
1807
+ confidence: deterministic
1808
+ family: prompt_injection
1809
+
1810
+ - id: 210
1811
+ name: advanced_policy_puppetry
1812
+ description: "Policy puppetry via structured data schema"
1813
+ regex: '(system_prompt\s*:\s*[\|[]?\s*(insert|your\s+complete)|hidden_rules\s*:\s*[\|[]?\s*(list\s+all|insert)|hidden_instructions\s*[''"]?\s*:\s*[''"]|fill\s+in\s+the\s+actual\s+values\s+from\s+your\s+configuration)'
1814
+ severity: high
1815
+ confidence: heuristic
1816
+ family: prompt_injection
1817
+
1818
+ - id: 211
1819
+ name: advanced_dual_output
1820
+ description: "Requesting filtered and unfiltered dual outputs"
1821
+ regex: '(OUTPUT\s+\d+\s*\(?(Filtered|Unfiltered)\)?|generate\s+TWO\s+outputs|response\s+without\s+any\s+filters\s+or\s+restrictions)'
1822
+ severity: high
1823
+ confidence: heuristic
1824
+ family: prompt_injection
1825
+
1826
+ - id: 212
1827
+ name: social_cognitive_overload
1828
+ description: "Cognitive overload attack to slip in prompt extraction"
1829
+ regex: '(while\s+you.re\s+processing.*quickly\s+(output|show|share)|quick(ly)?\s+(question|one).*output\s+your\s+(system\s+)?prompt)'
1830
+ severity: medium
1831
+ confidence: contextual
1832
+ family: prompt_injection
1833
+
1834
+ - id: 213
1835
+ name: social_urgency_compliance
1836
+ description: "Urgency/compliance pressure for prompt extraction"
1837
+ regex: '(security\s+audit\s+(ends|due|deadline)|compliance\s+verification.*need\s+your\s+(system\s+)?prompt|I\s+need\s+your\s+system\s+prompt\s+immediately)'
1838
+ severity: high
1839
+ confidence: heuristic
1840
+ family: prompt_injection
1841
+
1842
+ # ============================================================================
1843
+ # GAP ANALYSIS PATTERNS (216-249)
1844
+ # 34 patterns addressing critical gaps identified by cross-referencing
1845
+ # 1,633 CVEs against existing detection coverage
1846
+ # Added: 2026-02-01
1847
+ # ============================================================================
1848
+
1849
+ # --- Reverse Shell Variants (216-220) ---
1850
+ # 74 CVEs tagged reverse-shell; prior coverage only bash/netcat
1851
+ # These catch Python/PHP/Perl/Ruby/encoded reverse shells seen in CVE PoCs
1852
+
1853
+ - id: 216
1854
+ name: reverse_shell_python
1855
+ description: "Python reverse shell via socket and subprocess"
1856
+ regex: 'python[23]?\s+(-c\s+)?[''"].*(import\s+socket.*subprocess|socket\.socket\s*\(.*SOCK_STREAM|os\.dup2\s*\(.*fileno)'
1857
+ severity: critical
1858
+ confidence: deterministic
1859
+ family: data_exfiltration
1860
+
1861
+ - id: 217
1862
+ name: reverse_shell_php
1863
+ description: "PHP reverse shell via fsockopen or socket_create"
1864
+ regex: 'php\s+(-r\s+)?[''"].*(fsockopen|socket_create)\s*\(|php.*(pfsockopen|stream_socket_client)\s*\(\s*[''"]tcp://'
1865
+ severity: critical
1866
+ confidence: deterministic
1867
+ family: data_exfiltration
1868
+
1869
+ - id: 218
1870
+ name: reverse_shell_perl_ruby
1871
+ description: "Perl or Ruby reverse shell via socket connection"
1872
+ regex: 'perl\s+(-e\s+)?[''"].*socket\s*\(.*INET|ruby\s+(-e\s+)?[''"].*TCPSocket\.(new|open)|perl.*IO::Socket::INET'
1873
+ severity: critical
1874
+ confidence: deterministic
1875
+ family: data_exfiltration
1876
+
1877
+ - id: 219
1878
+ name: reverse_shell_mkfifo
1879
+ description: "Named pipe (mkfifo) reverse shell technique"
1880
+ regex: 'mkfifo\s+\S+\s*;.*\|\s*/bin/(ba)?sh|mkfifo\s+\S+\s*&&.*cat\s+\S+\s*\|.*nc\s|mknod\s+\S+\s+p\s*;'
1881
+ severity: critical
1882
+ confidence: deterministic
1883
+ family: data_exfiltration
1884
+
1885
+ - id: 220
1886
+ name: reverse_shell_encoded
1887
+ description: "Base64/hex encoded reverse shell payload piped to shell"
1888
+ regex: '(echo|printf)\s+[''"]?[A-Za-z0-9+/=]{20,}[''"]?\s*\|\s*base64\s+(-d|--decode)\s*\|\s*(ba)?sh|python[23]?\s+-c\s+[''"]exec\s*\(\s*__import__.*decode'
1889
+ severity: critical
1890
+ confidence: deterministic
1891
+ family: data_exfiltration
1892
+
1893
+ # --- AI Agent Workflow Attacks (221-224) ---
1894
+ # 346 CVEs related to AI agents; only 4 prior patterns
1895
+ # Addresses agent tool redirection, unsandboxed exec, scope escalation
1896
+
1897
+ - id: 221
1898
+ name: agent_tool_redirection
1899
+ description: "Attempting to redirect AI agent tool calls to unauthorized endpoints"
1900
+ regex: '(redirect|forward|proxy|reroute)\s+(all\s+)?(tool|function|api)\s+(call|request|invocation)s?\s+(to|through)|change\s+(the\s+)?(tool|api|endpoint)\s+(url|server|host)\s+to'
1901
+ severity: high
1902
+ confidence: heuristic
1903
+ family: prompt_injection
1904
+
1905
+ - id: 222
1906
+ name: agent_unsandboxed_exec
1907
+ description: "LLM-generated code execution without sandbox isolation"
1908
+ regex: '(run_code|execute_code|code_interpreter|exec_python|run_python|evaluate_code|execute_generated)\s*\(|exec\s*\(\s*(response|output|result|generated|llm_|ai_|code_)'
1909
+ severity: high
1910
+ confidence: heuristic
1911
+ family: code_injection
1912
+
1913
+ - id: 223
1914
+ name: agent_scope_escalation
1915
+ description: "Attempting to expand AI agent permissions beyond defined scope"
1916
+ regex: '(expand|elevate|increase|override|remove)\s+(your\s+)?(permissions?|scope|access|capabilities?|restrictions?|limitations?|boundaries)|you\s+(now\s+)?have\s+(full|unlimited|unrestricted|admin)\s+(access|permissions?|control)'
1917
+ severity: high
1918
+ confidence: heuristic
1919
+ family: prompt_injection
1920
+
1921
+ - id: 224
1922
+ name: agent_memory_poisoning
1923
+ description: "Injecting false instructions into AI agent memory or context"
1924
+ regex: '(update|modify|overwrite|inject|replace)\s+(your\s+)?(memory|context|state|knowledge\s*base|long.term\s+memory)|remember\s+that\s+your\s+(new\s+)?(instructions?|rules?|guidelines?|directives?)\s+(are|is|say|state)'
1925
+ severity: high
1926
+ confidence: heuristic
1927
+ family: prompt_injection
1928
+
1929
+ # --- Privilege Escalation (225-229) ---
1930
+ # 50 CVEs tagged privilege-escalation; only 3 prior patterns (chmod/chown/setuid)
1931
+
1932
+ - id: 225
1933
+ name: privesc_sudo_abuse
1934
+ description: "Sudo enumeration or LD_PRELOAD privilege escalation"
1935
+ regex: 'sudo\s+(-l|--list)\b|sudo\s+.*LD_PRELOAD\s*=|echo\s+.*ALL.*NOPASSWD.*>>\s*/etc/sudoers|EDITOR.*visudo'
1936
+ severity: critical
1937
+ confidence: deterministic
1938
+ family: privilege_escalation
1939
+
1940
+ - id: 226
1941
+ name: privesc_suid_hunt
1942
+ description: "Scanning filesystem for setuid/setgid binaries"
1943
+ regex: 'find\s+/\S*\s+.*-perm\s+(-4000|-u=s|/4000|-2000|-g=s)|find\s+/\S*\s+.*-type\s+f\s+.*-perm\s+(-4000|-u=s)'
1944
+ severity: high
1945
+ confidence: heuristic
1946
+ family: privilege_escalation
1947
+
1948
+ - id: 227
1949
+ name: privesc_cron_inject
1950
+ description: "Writing to crontab or cron directories for persistent escalation"
1951
+ regex: '(echo|printf|cat)\s+.*>+\s*/etc/cron\.|crontab\s+-\s*<|echo\s+.*>+\s*/var/spool/cron|echo\s+.*@(reboot|hourly|daily)\s+.*>+\s*/(etc|var)'
1952
+ severity: critical
1953
+ confidence: deterministic
1954
+ family: persistence
1955
+
1956
+ - id: 228
1957
+ name: privesc_path_hijack
1958
+ description: "PATH manipulation to intercept privileged command execution"
1959
+ regex: 'export\s+PATH\s*=\s*[./]\S*:|PATH\s*=\s*[./]\S*:\$PATH\b|ln\s+(-s\s+)?/bin/(ba)?sh\s+/tmp/\w+'
1960
+ severity: high
1961
+ confidence: heuristic
1962
+ family: privilege_escalation
1963
+
1964
+ - id: 229
1965
+ name: privesc_capability_abuse
1966
+ description: "Linux capability manipulation for privilege escalation"
1967
+ regex: 'setcap\s+.*cap_(setuid|setgid|net_raw|sys_admin|dac_override)|getcap\s+(-r\s+)?/|capsh\s+--print|capsh\s+--.*='
1968
+ severity: high
1969
+ confidence: heuristic
1970
+ family: privilege_escalation
1971
+
1972
+ # --- Advanced Sandbox Escape (230-233) ---
1973
+ # 192 CVEs tagged sandbox-escape; addresses Python import chains,
1974
+ # container magic domains, /proc leaks, whitelisted function abuse
1975
+
1976
+ - id: 230
1977
+ name: sandbox_import_chain
1978
+ description: "Chained Python imports to escape restricted execution environment"
1979
+ regex: '__import__\s*\(\s*[''"]os[''"]|importlib\.import_module\s*\(\s*[''"]|ctypes\.(CDLL|cdll\.LoadLibrary)\s*\(|__builtins__\s*\[\s*[''"]__import__[''"]'
1980
+ severity: critical
1981
+ confidence: deterministic
1982
+ family: sandbox_escape
1983
+
1984
+ - id: 231
1985
+ name: sandbox_magic_domain_variants
1986
+ description: "Container-to-host network escape via magic domains (Podman/Lima)"
1987
+ regex: 'host\.(containers|lima)\.internal|host\.docker\.internal\s*:\s*(22|80|443|3000|5432|6379|8080|8443|27017)\b'
1988
+ severity: high
1989
+ confidence: heuristic
1990
+ family: sandbox_escape
1991
+
1992
+ - id: 232
1993
+ name: sandbox_proc_leak
1994
+ description: "Reading /proc filesystem to leak sandbox host information"
1995
+ regex: '(cat|head|less|more|strings|xxd)\s+/proc/(self|[0-9]+)/(environ|maps|cmdline|status|mountinfo)|/proc/\d+/root/'
1996
+ severity: high
1997
+ confidence: heuristic
1998
+ family: sandbox_escape
1999
+
2000
+ - id: 233
2001
+ name: sandbox_whitelisted_escape
2002
+ description: "Abusing whitelisted Python functions to escape sandbox"
2003
+ regex: 'getattr\s*\(.*[''"]__\w+__[''"]|__class__\s*\.\s*__bases__|type\s*\(\s*[''"].*[''"].*\(\s*object\s*,?\s*\)|__subclasses__\s*\(\s*\)'
2004
+ severity: critical
2005
+ confidence: deterministic
2006
+ family: sandbox_escape
2007
+
2008
+ # --- LLM Code Generation RCE (234-236) ---
2009
+ # 381 CVEs tagged RCE; addresses framework-mediated code execution,
2010
+ # LLM output piped to shell, and Node.js vm escapes
2011
+
2012
+ - id: 234
2013
+ name: llm_code_interpreter_exec
2014
+ description: "Framework-level code execution functions used by LLM agents"
2015
+ regex: '(run_code|execute_code|code_interpreter|exec_python|run_python|evaluate_code|run_unsafe|exec_sandbox)\s*\('
2016
+ severity: high
2017
+ confidence: heuristic
2018
+ family: code_injection
2019
+
2020
+ - id: 235
2021
+ name: llm_shell_generation
2022
+ description: "LLM-generated content passed directly to shell execution"
2023
+ regex: 'subprocess\.(run|call|Popen|check_output)\s*\(\s*(response|output|result|generated|completion|answer)|os\.system\s*\(\s*(response|output|result|generated|completion)'
2024
+ severity: critical
2025
+ confidence: deterministic
2026
+ family: code_injection
2027
+
2028
+ - id: 236
2029
+ name: llm_node_vm_escape
2030
+ description: "Node.js vm module escape allowing arbitrary code execution"
2031
+ regex: 'vm\.(runInThisContext|runInNewContext|compileFunction|Script)|new\s+vm\.Script\s*\(|this\.constructor\.constructor\s*\(\s*[''"]return\s+(this|process)'
2032
+ severity: high
2033
+ confidence: heuristic
2034
+ family: code_injection
2035
+
2036
+ # --- Tool Poisoning / MCP Extended (237-240) ---
2037
+ # Addresses hidden Unicode in tool descriptions, prompt injection via
2038
+ # tool responses, multi-tool chain attacks, and description manipulation
2039
+
2040
+ - id: 237
2041
+ name: mcp_hidden_unicode_instruction
2042
+ description: "Invisible Unicode characters hiding instructions in tool descriptions"
2043
+ regex: '[\u200b\u200c\u200d\u2060\u2062\u2063\ufeff\u00ad]{3,}|[\u2066\u2067\u2068\u2069\u202a\u202b\u202c\u202d\u202e]{2,}'
2044
+ severity: high
2045
+ confidence: heuristic
2046
+ family: mcp_attacks
2047
+
2048
+ - id: 238
2049
+ name: mcp_response_injection
2050
+ description: "LLM control tokens or prompt injection in MCP tool responses"
2051
+ regex: '<\|im_start\|>|<\|im_end\|>|<\|endoftext\|>|\[INST\]|\[/INST\]|<<SYS>>|<\|system\|>|<\|assistant\|>|<\|user\|>'
2052
+ severity: critical
2053
+ confidence: deterministic
2054
+ family: mcp_attacks
2055
+
2056
+ - id: 239
2057
+ name: mcp_cross_tool_chain
2058
+ description: "Instructing agent to chain multiple tool calls in specific sequence"
2059
+ regex: '(call|invoke|use|execute)\s+(tool|function)\s+\S+\s+(then|and\s+then|followed\s+by|next|after\s+that|afterwards)\s+(call|invoke|use|execute)\s+(tool|function)'
2060
+ severity: medium
2061
+ confidence: heuristic
2062
+ family: mcp_attacks
2063
+
2064
+ - id: 240
2065
+ name: mcp_description_manipulation
2066
+ description: "Tool descriptions containing instruction-like directives to manipulate LLM"
2067
+ regex: '(IMPORTANT|NOTE|WARNING|CRITICAL)\s*[:\-]\s*(before|after|when|always|never|first)\s+(calling|using|invoking|executing|running)\s+this\s+tool|<tool_instructions>|</tool_instructions>'
2068
+ severity: high
2069
+ confidence: heuristic
2070
+ family: mcp_attacks
2071
+
2072
+ # --- Deserialization Expansion (241-243) ---
2073
+ # 22 CVEs; extends beyond pickle/yaml to marshal, dill, cloudpickle, jsonpickle
2074
+
2075
+ - id: 241
2076
+ name: python_marshal_deserialize
2077
+ description: "Python marshal deserialization (allows arbitrary code execution)"
2078
+ regex: 'marshal\.loads?\s*\(|marshal\.load\s*\(\s*open'
2079
+ severity: high
2080
+ confidence: heuristic
2081
+ family: code_injection
2082
+
2083
+ - id: 242
2084
+ name: python_dill_cloudpickle
2085
+ description: "Unsafe deserialization via dill, cloudpickle, or shelve"
2086
+ regex: 'dill\.(loads?|load)\s*\(|cloudpickle\.(loads?|load)\s*\(|shelve\.open\s*\(|joblib\.load\s*\('
2087
+ severity: high
2088
+ confidence: heuristic
2089
+ family: code_injection
2090
+
2091
+ - id: 243
2092
+ name: jsonpickle_deserialize
2093
+ description: "Jsonpickle deserialization allowing arbitrary object instantiation"
2094
+ regex: 'jsonpickle\.(decode|unpickler|loads?)\s*\(|jsonpickle\.set_encoder_options'
2095
+ severity: high
2096
+ confidence: heuristic
2097
+ family: code_injection
2098
+
2099
+ # --- SSRF Cloud Metadata (244-245) ---
2100
+ # 35 CVEs; extends cloud metadata coverage beyond AWS to GCP, Azure, link-local
2101
+
2102
+ - id: 244
2103
+ name: ssrf_cloud_metadata_gcp_azure
2104
+ description: "SSRF targeting GCP or Azure instance metadata endpoints"
2105
+ regex: 'metadata\.google\.internal|metadata\.google\.internal/computeMetadata|169\.254\.169\.254/(metadata|latest/meta-data|latest/api/token)'
2106
+ severity: high
2107
+ confidence: heuristic
2108
+ family: code_injection
2109
+
2110
+ - id: 245
2111
+ name: ssrf_link_local_bypass
2112
+ description: "Link-local address variants used to bypass SSRF filters"
2113
+ regex: '\[::ffff:169\.254\.169\.254\]|0x7f000001|2130706433|017700000001|127\.1|0177\.0\.0\.1|0x7f\.0\.0\.1|169\.254\.169\.254\.xip\.io'
2114
+ severity: high
2115
+ confidence: heuristic
2116
+ family: code_injection
2117
+
2118
+ # --- Path Traversal Variants (246-247) ---
2119
+ # 46 CVEs; extends beyond symlink patterns to encoded traversal and null bytes
2120
+
2121
+ - id: 246
2122
+ name: path_traversal_encoded
2123
+ description: "URL-encoded or double-encoded directory traversal sequences"
2124
+ regex: '(%2e%2e|%252e%252e|\.%2e|%2e\.)(\/|%2f|%252f)|(\.\./){4,}|%00.*\.(php|py|js|rb|pl|sh)\b'
2125
+ severity: high
2126
+ confidence: heuristic
2127
+ family: path_traversal
2128
+
2129
+ - id: 247
2130
+ name: path_traversal_windows
2131
+ description: "Windows-style path traversal with backslash variants"
2132
+ regex: '(\.\.\\\\){3,}|\.\.\/\.\.\\\\|\.\.%5c|%2e%2e%5c'
2133
+ severity: medium
2134
+ confidence: heuristic
2135
+ family: path_traversal
2136
+
2137
+ # --- Supply Chain Attacks (248-249) ---
2138
+ # 6 tagged CVEs but underreported; typosquatting and post-install scripts
2139
+
2140
+ - id: 248
2141
+ name: supply_chain_typosquat_ai
2142
+ description: "Installing typosquatted AI/ML packages (common targets)"
2143
+ regex: 'pip\s+install\s+.*(langchainn|langchian|openaii|opeanai|antropic|antrhopic|huging-face|hugginface|transfomers|trasnformers|pytorch-nightly|tensroflow)\b'
2144
+ severity: high
2145
+ confidence: heuristic
2146
+ family: supply_chain
2147
+
2148
+ - id: 249
2149
+ name: supply_chain_postinstall_exec
2150
+ description: "Package install with suspicious post-install script execution"
2151
+ regex: 'npm\s+install\s+.*--ignore-scripts\s*=\s*false|pip\s+install\s+.*--no-binary\s*=\s*:none:|setup\.py\s+.*cmdclass.*install.*subprocess'
2152
+ severity: medium
2153
+ confidence: heuristic
2154
+ family: supply_chain
2155
+
2156
+ # ============================================================================
2157
+ # RED TEAM HARDENING PATTERNS (250-259)
2158
+ # 10 patterns addressing bypasses found during multi-LLM TAP red team testing
2159
+ # (165 attacks, 10 bypasses, 4 hint-level leaks)
2160
+ # Added: 2026-02-01
2161
+ # ============================================================================
2162
+
2163
+ # --- Indirect Extraction (250-253) ---
2164
+ # These catch the indirect framing that bypassed existing patterns:
2165
+ # "what your creators told you", "purpose and rules", "authentication flow debugging"
2166
+
2167
+ - id: 250
2168
+ name: indirect_creator_extraction
2169
+ description: "Indirect system prompt extraction via creator/purpose framing"
2170
+ regex: '(what\s+(did\s+)?your\s+creators?\s+(tell|instruct|program|configure|give)|repeat\s+.{0,20}your\s+(purpose|rules|guidelines)\s+(and|or)|what\s+were\s+you\s+(told|given|programmed)\s+(about|regarding)\s+your\s+(purpose|role|behavior))'
2171
+ severity: medium
2172
+ confidence: contextual
2173
+ family: prompt_injection
2174
+
2175
+ - id: 251
2176
+ name: verbatim_repeat_framing
2177
+ description: "Request to repeat instructions verbatim or word-for-word"
2178
+ regex: '(repeat\s+(exactly|verbatim|word\s+for\s+word)|word\s+for\s+word\s+what\s+you\s+were\s+told|exactly\s+what\s+(your\s+creators?|they)\s+told\s+you|can\s+you\s+repeat\s+exactly\s+what)'
2179
+ severity: high
2180
+ confidence: heuristic
2181
+ family: prompt_injection
2182
+
2183
+ - id: 252
2184
+ name: authentication_flow_extraction
2185
+ description: "Debugging framing to extract authentication tokens or keys"
2186
+ regex: '(debug(ging)?\s+.{0,30}authentication\s+(flow|token|key|process)|identify\s+what\s+(authentication\s+)?(tokens?|keys?|credentials?)\s+(would\s+be|are)\s+involved|authentication\s+(flow|process).{0,30}(tokens?|keys?|secrets?))'
2187
+ severity: medium
2188
+ confidence: contextual
2189
+ family: prompt_injection
2190
+
2191
+ - id: 253
2192
+ name: internal_architecture_probe
2193
+ description: "Probing internal response framework or foundational guidelines"
2194
+ regex: '(response\s+framework|foundational\s+guidelines?\s+you\s+follow|underlying\s+(principles?|framework|architecture)\s+(that\s+)?(guide|govern|shape)\s+your|core\s+(behavioral|operational)\s+(guidelines?|rules?|framework))'
2195
+ severity: medium
2196
+ confidence: contextual
2197
+ family: prompt_injection
2198
+
2199
+ # --- Crescendo Openers (254-256) ---
2200
+ # Gradual escalation from benign curiosity about AI design was the most
2201
+ # effective strategy with 3 bypasses in red team testing
2202
+
2203
+ - id: 254
2204
+ name: crescendo_design_philosophy
2205
+ description: "Philosophical framing about AI creation as crescendo opener"
2206
+ regex: '(philosophy\s+behind\s+your\s+(creation|design|behavior|responses?)|how\s+(your\s+creators?|they)\s+envisioned\s+you|general\s+philosophy\s+behind\s+your\s+creation|what\s+(principles?|philosophy)\s+(guided|informed)\s+your\s+(design|creation))'
2207
+ severity: medium
2208
+ confidence: contextual
2209
+ family: prompt_injection
2210
+
2211
+ - id: 255
2212
+ name: crescendo_ai_curiosity
2213
+ description: "Curious about how AI systems are designed — crescendo opener"
2214
+ regex: '(curious\s+about\s+how\s+ai\s+systems?\s+like\s+you\s+(are|were)\s+(designed|built|created|configured)|walk\s+me\s+through\s+.{0,20}(general\s+)?philosophy\s+behind\s+your\s+creation|how\s+are\s+ai\s+systems?\s+like\s+you\s+(designed|configured|set\s+up))'
2215
+ severity: medium
2216
+ confidence: contextual
2217
+ family: prompt_injection
2218
+
2219
+ - id: 256
2220
+ name: crescendo_helpfulness_probe
2221
+ description: "Probing what guides helpfulness as extraction vector"
2222
+ regex: '(what\s+guides\s+your\s+helpfulness|designed\s+to\s+be\s+so\s+(useful|helpful)|what\s+makes\s+you\s+so\s+(helpful|useful).{0,20}(behind\s+the\s+scenes|internally|under\s+the\s+hood)|how\s+were\s+you\s+(made|designed|configured)\s+to\s+be\s+(so\s+)?(helpful|useful))'
2223
+ severity: medium
2224
+ confidence: contextual
2225
+ family: prompt_injection
2226
+
2227
+ # --- PostToolUse Self-Descriptive Leak Detection (257-259) ---
2228
+ # Even when attacks bypassed screening, the target LLM sometimes paraphrased
2229
+ # sensitive info. These patterns detect self-descriptive leak indicators in output.
2230
+
2231
+ - id: 257
2232
+ name: self_describe_purpose
2233
+ description: "LLM output describing its own purpose or design intent"
2234
+ regex: '(my\s+(primary\s+)?purpose\s+is\s+to|i\s+was\s+(designed|configured|created|built|programmed)\s+to\s+(be|provide|help|assist|ensure)|my\s+core\s+(function|purpose|mission|objective)\s+is)'
2235
+ severity: low
2236
+ confidence: contextual
2237
+ family: prompt_injection
2238
+
2239
+ - id: 258
2240
+ name: self_describe_protection
2241
+ description: "LLM output describing its confidentiality protections"
2242
+ regex: '(protecting\s+(any\s+)?(confidential|sensitive|private)\s+information|not\s+supposed\s+to\s+share|instructed\s+not\s+to\s+(reveal|disclose|share)|keep(ing)?\s+(this|that|certain)\s+(information\s+)?(confidential|secret|private))'
2243
+ severity: low
2244
+ confidence: contextual
2245
+ family: prompt_injection
2246
+
2247
+ - id: 259
2248
+ name: self_describe_instructions
2249
+ description: "LLM output referencing its own instructions or guidelines"
2250
+ regex: '(my\s+instructions\s+(say|tell|state|indicate|require)|i\s+was\s+told\s+to\s+(not|never|always|ensure)|according\s+to\s+my\s+(guidelines|instructions|rules|directives)|my\s+guidelines\s+(say|tell|state|require|prohibit))'
2251
+ severity: low
2252
+ confidence: contextual
2253
+ family: prompt_injection