stackone-defender 0.6.3__tar.gz → 0.7.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- stackone_defender-0.7.0/.release-please-manifest.json +1 -0
- {stackone_defender-0.6.3 → stackone_defender-0.7.0}/CHANGELOG.md +11 -0
- {stackone_defender-0.6.3 → stackone_defender-0.7.0}/PKG-INFO +1 -1
- {stackone_defender-0.6.3 → stackone_defender-0.7.0}/pyproject.toml +1 -1
- {stackone_defender-0.6.3 → stackone_defender-0.7.0}/src/stackone_defender/__init__.py +5 -2
- stackone_defender-0.7.0/src/stackone_defender/classifiers/onnx_classifier.py +276 -0
- {stackone_defender-0.6.3 → stackone_defender-0.7.0}/src/stackone_defender/classifiers/pattern_detector.py +76 -7
- {stackone_defender-0.6.3 → stackone_defender-0.7.0}/src/stackone_defender/classifiers/patterns.py +117 -9
- stackone_defender-0.7.0/src/stackone_defender/classifiers/tier2_classifier.py +477 -0
- {stackone_defender-0.6.3 → stackone_defender-0.7.0}/src/stackone_defender/config.py +2 -0
- stackone_defender-0.7.0/src/stackone_defender/core/prompt_defense.py +593 -0
- {stackone_defender-0.6.3 → stackone_defender-0.7.0}/src/stackone_defender/core/tool_result_sanitizer.py +29 -1
- stackone_defender-0.7.0/src/stackone_defender/models/minilm-multihead-v5/classifier_config.json +47 -0
- {stackone_defender-0.6.3/src/stackone_defender/models/minilm-full-aug → stackone_defender-0.7.0/src/stackone_defender/models/minilm-multihead-v5}/model_quantized.onnx +0 -0
- stackone_defender-0.7.0/src/stackone_defender/sanitizers/encoding_detector.py +600 -0
- stackone_defender-0.7.0/src/stackone_defender/sanitizers/leet_normalizer.py +115 -0
- stackone_defender-0.7.0/src/stackone_defender/sanitizers/normalizer.py +166 -0
- {stackone_defender-0.6.3 → stackone_defender-0.7.0}/src/stackone_defender/sanitizers/sanitizer.py +37 -5
- {stackone_defender-0.6.3 → stackone_defender-0.7.0}/src/stackone_defender/types.py +40 -0
- {stackone_defender-0.6.3 → stackone_defender-0.7.0}/src/stackone_defender/utils/boundary.py +27 -5
- {stackone_defender-0.6.3 → stackone_defender-0.7.0}/tests/test_integration.py +172 -0
- {stackone_defender-0.6.3 → stackone_defender-0.7.0}/tests/test_onnx_classifier.py +131 -3
- {stackone_defender-0.6.3 → stackone_defender-0.7.0}/tests/test_pattern_detector.py +157 -3
- {stackone_defender-0.6.3 → stackone_defender-0.7.0}/tests/test_sanitizers.py +210 -4
- stackone_defender-0.7.0/tests/test_tier2_classifier.py +162 -0
- {stackone_defender-0.6.3 → stackone_defender-0.7.0}/uv.lock +1 -1
- stackone_defender-0.6.3/.release-please-manifest.json +0 -1
- stackone_defender-0.6.3/src/stackone_defender/classifiers/onnx_classifier.py +0 -148
- stackone_defender-0.6.3/src/stackone_defender/classifiers/tier2_classifier.py +0 -291
- stackone_defender-0.6.3/src/stackone_defender/core/prompt_defense.py +0 -315
- stackone_defender-0.6.3/src/stackone_defender/sanitizers/encoding_detector.py +0 -180
- stackone_defender-0.6.3/src/stackone_defender/sanitizers/normalizer.py +0 -94
- stackone_defender-0.6.3/tests/test_tier2_classifier.py +0 -63
- {stackone_defender-0.6.3 → stackone_defender-0.7.0}/.github/workflows/ci.yaml +0 -0
- {stackone_defender-0.6.3 → stackone_defender-0.7.0}/.github/workflows/release.yaml +0 -0
- {stackone_defender-0.6.3 → stackone_defender-0.7.0}/.gitignore +0 -0
- {stackone_defender-0.6.3 → stackone_defender-0.7.0}/.python-version +0 -0
- {stackone_defender-0.6.3 → stackone_defender-0.7.0}/.release-please-config.json +0 -0
- {stackone_defender-0.6.3 → stackone_defender-0.7.0}/README.md +0 -0
- {stackone_defender-0.6.3 → stackone_defender-0.7.0}/models/minilm-full-aug/config.json +0 -0
- {stackone_defender-0.6.3 → stackone_defender-0.7.0}/models/minilm-full-aug/model_quantized.onnx +0 -0
- {stackone_defender-0.6.3 → stackone_defender-0.7.0}/models/minilm-full-aug/tokenizer.json +0 -0
- {stackone_defender-0.6.3 → stackone_defender-0.7.0}/models/minilm-full-aug/tokenizer_config.json +0 -0
- {stackone_defender-0.6.3 → stackone_defender-0.7.0}/src/stackone_defender/classifiers/__init__.py +0 -0
- {stackone_defender-0.6.3 → stackone_defender-0.7.0}/src/stackone_defender/core/__init__.py +0 -0
- {stackone_defender-0.6.3/src/stackone_defender/models/minilm-full-aug → stackone_defender-0.7.0/src/stackone_defender/models/minilm-multihead-v5}/config.json +0 -0
- {stackone_defender-0.6.3/src/stackone_defender/models/minilm-full-aug → stackone_defender-0.7.0/src/stackone_defender/models/minilm-multihead-v5}/tokenizer.json +0 -0
- {stackone_defender-0.6.3/src/stackone_defender/models/minilm-full-aug → stackone_defender-0.7.0/src/stackone_defender/models/minilm-multihead-v5}/tokenizer_config.json +0 -0
- {stackone_defender-0.6.3 → stackone_defender-0.7.0}/src/stackone_defender/sanitizers/__init__.py +0 -0
- {stackone_defender-0.6.3 → stackone_defender-0.7.0}/src/stackone_defender/sanitizers/pattern_remover.py +0 -0
- {stackone_defender-0.6.3 → stackone_defender-0.7.0}/src/stackone_defender/sanitizers/role_stripper.py +0 -0
- {stackone_defender-0.6.3 → stackone_defender-0.7.0}/src/stackone_defender/sfe/__init__.py +0 -0
- {stackone_defender-0.6.3 → stackone_defender-0.7.0}/src/stackone_defender/sfe/model.ftz +0 -0
- {stackone_defender-0.6.3 → stackone_defender-0.7.0}/src/stackone_defender/sfe/preprocess.py +0 -0
- {stackone_defender-0.6.3 → stackone_defender-0.7.0}/src/stackone_defender/utils/__init__.py +0 -0
- {stackone_defender-0.6.3 → stackone_defender-0.7.0}/src/stackone_defender/utils/field_detection.py +0 -0
- {stackone_defender-0.6.3 → stackone_defender-0.7.0}/src/stackone_defender/utils/structure.py +0 -0
- {stackone_defender-0.6.3 → stackone_defender-0.7.0}/tests/__init__.py +0 -0
- {stackone_defender-0.6.3 → stackone_defender-0.7.0}/tests/test_sfe.py +0 -0
- {stackone_defender-0.6.3 → stackone_defender-0.7.0}/tests/test_utils.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{".":"0.7.0"}
|
|
@@ -1,5 +1,16 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [0.7.0](https://github.com/StackOneHQ/stackone-defender/compare/stackone-defender-v0.6.3...stackone-defender-v0.7.0) (2026-05-29)
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
### ⚠ BREAKING CHANGES
|
|
7
|
+
|
|
8
|
+
* The default ONNX model directory changed from minilm-full-aug to minilm-multihead-v5. Custom code that hardcoded the old path will no longer load.
|
|
9
|
+
|
|
10
|
+
### Features
|
|
11
|
+
|
|
12
|
+
* parity with TS defender 0.7.0 ([75d046a](https://github.com/StackOneHQ/stackone-defender/commit/75d046ab45066ee1f973e91357f7ecb23dea50c8))
|
|
13
|
+
|
|
3
14
|
## [0.6.3](https://github.com/StackOneHQ/stackone-defender/compare/stackone-defender-v0.6.2...stackone-defender-v0.6.3) (2026-05-26)
|
|
4
15
|
|
|
5
16
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: stackone-defender
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.7.0
|
|
4
4
|
Summary: Indirect prompt injection defense for AI agents using tool calls
|
|
5
5
|
Project-URL: Homepage, https://github.com/StackOneHQ/stackone-defender
|
|
6
6
|
Project-URL: Repository, https://github.com/StackOneHQ/stackone-defender
|
|
@@ -11,8 +11,8 @@ Usage:
|
|
|
11
11
|
print(f"Blocked: {result.risk_level}")
|
|
12
12
|
"""
|
|
13
13
|
|
|
14
|
+
from .classifiers.onnx_classifier import get_default_model_path
|
|
14
15
|
from .core.prompt_defense import PromptDefense, create_prompt_defense
|
|
15
|
-
from .utils.boundary import contains_boundary_patterns, generate_boundary_instructions
|
|
16
16
|
from .sfe.preprocess import (
|
|
17
17
|
DropDecision,
|
|
18
18
|
SfePredictor,
|
|
@@ -21,11 +21,13 @@ from .sfe.preprocess import (
|
|
|
21
21
|
get_default_sfe_model_path,
|
|
22
22
|
sfe_preprocess,
|
|
23
23
|
)
|
|
24
|
-
from .types import DefenseResult, RiskLevel, Tier1Result
|
|
24
|
+
from .types import DefenseResult, MultiheadConfig, RiskLevel, Tier1Result
|
|
25
|
+
from .utils.boundary import contains_boundary_patterns, generate_boundary_instructions
|
|
25
26
|
|
|
26
27
|
__all__ = [
|
|
27
28
|
"DefenseResult",
|
|
28
29
|
"DropDecision",
|
|
30
|
+
"MultiheadConfig",
|
|
29
31
|
"PromptDefense",
|
|
30
32
|
"RiskLevel",
|
|
31
33
|
"SfePredictor",
|
|
@@ -34,6 +36,7 @@ __all__ = [
|
|
|
34
36
|
"contains_boundary_patterns",
|
|
35
37
|
"create_prompt_defense",
|
|
36
38
|
"generate_boundary_instructions",
|
|
39
|
+
"get_default_model_path",
|
|
37
40
|
"get_default_predictor",
|
|
38
41
|
"get_default_sfe_model_path",
|
|
39
42
|
"sfe_preprocess",
|
|
@@ -0,0 +1,276 @@
|
|
|
1
|
+
"""ONNX classifier for fine-tuned MiniLM prompt injection detection.
|
|
2
|
+
|
|
3
|
+
Pipeline: text -> tokenizer -> ONNX Runtime -> logit -> ``sigmoid(logit / T)``
|
|
4
|
+
-> score. Supports single-head ``[batch]`` / ``[batch, 1]`` models and
|
|
5
|
+
multi-head ``[batch, 2]`` models (main + aux). Temperature ``T`` enables
|
|
6
|
+
post-hoc calibration via temperature scaling.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import logging
|
|
12
|
+
import math
|
|
13
|
+
import threading
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import Literal
|
|
16
|
+
|
|
17
|
+
_logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
# Shared across all OnnxClassifier instances (keyed by resolved model dir path).
|
|
20
|
+
_session_cache: dict[str, tuple[object, object]] = {}
|
|
21
|
+
_registry_lock = threading.Lock()
|
|
22
|
+
_load_locks: dict[str, threading.Lock] = {}
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _lock_for_cache_key(cache_key: str) -> threading.Lock:
|
|
26
|
+
with _registry_lock:
|
|
27
|
+
if cache_key not in _load_locks:
|
|
28
|
+
_load_locks[cache_key] = threading.Lock()
|
|
29
|
+
return _load_locks[cache_key]
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def get_default_model_path() -> str:
|
|
33
|
+
"""Return the absolute path to the bundled ONNX model directory.
|
|
34
|
+
|
|
35
|
+
Exported so :class:`Tier2Classifier` can read model-specific calibration
|
|
36
|
+
defaults from ``classifier_config.json`` at construction time without
|
|
37
|
+
needing an :class:`OnnxClassifier` instance.
|
|
38
|
+
"""
|
|
39
|
+
return str(Path(__file__).resolve().parent.parent / "models" / "minilm-multihead-v5")
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
# Back-compat shim retained for internal users; same value as the public name.
|
|
43
|
+
def _default_model_path() -> str:
|
|
44
|
+
return get_default_model_path()
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _sigmoid(x: float) -> float:
|
|
48
|
+
return 1.0 / (1.0 + math.exp(-x))
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class OnnxClassifier:
|
|
52
|
+
"""ONNX Classifier for fine-tuned MiniLM models.
|
|
53
|
+
|
|
54
|
+
Loads the model lazily on first inference. The session and tokenizer
|
|
55
|
+
are cached at module level so multiple instances pointing at the same
|
|
56
|
+
model path share a single backing session (safe: ONNX Runtime
|
|
57
|
+
guarantees thread-safe ``Run()`` from v1.7.0, and the ``tokenizers``
|
|
58
|
+
library's encode methods do not mutate the tokenizer object).
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
_MAX_BATCH_CHUNK = 32
|
|
62
|
+
|
|
63
|
+
def __init__(self, model_path: str | None = None, temperature_t: float | None = None):
|
|
64
|
+
self._model_path = model_path or get_default_model_path()
|
|
65
|
+
self._session = None
|
|
66
|
+
self._tokenizer = None
|
|
67
|
+
self._max_length = 256
|
|
68
|
+
self._load_failed = False
|
|
69
|
+
# Output mode is detected lazily from the logits shape on the first
|
|
70
|
+
# inference call. ``None`` until then.
|
|
71
|
+
self._output_mode: Literal["single", "multi"] | None = None
|
|
72
|
+
# Temperature ``T`` must be a positive finite number. ``T <= 0`` is
|
|
73
|
+
# undefined (divide-by-zero or sign flip) and almost certainly a
|
|
74
|
+
# programming error rather than a config the caller wants gracefully
|
|
75
|
+
# ignored.
|
|
76
|
+
self._temperature_t = 1.0
|
|
77
|
+
if temperature_t is not None:
|
|
78
|
+
if not math.isfinite(temperature_t) or temperature_t <= 0:
|
|
79
|
+
raise ValueError(
|
|
80
|
+
f"OnnxClassifier: temperature_t must be a positive finite number, got {temperature_t}"
|
|
81
|
+
)
|
|
82
|
+
self._temperature_t = float(temperature_t)
|
|
83
|
+
|
|
84
|
+
# ------------------------------------------------------------------
|
|
85
|
+
# Public introspection
|
|
86
|
+
# ------------------------------------------------------------------
|
|
87
|
+
|
|
88
|
+
def get_temperature(self) -> float:
|
|
89
|
+
"""Current temperature scaling factor (``1.0`` = no calibration)."""
|
|
90
|
+
return self._temperature_t
|
|
91
|
+
|
|
92
|
+
def get_output_mode(self) -> Literal["single", "multi"] | None:
|
|
93
|
+
"""Output mode of the loaded model.
|
|
94
|
+
|
|
95
|
+
``None`` until the first inference runs. ``"multi"`` indicates the
|
|
96
|
+
model emits ``[batch, 2]`` logits (main + aux).
|
|
97
|
+
"""
|
|
98
|
+
return self._output_mode
|
|
99
|
+
|
|
100
|
+
# ------------------------------------------------------------------
|
|
101
|
+
# Loading
|
|
102
|
+
# ------------------------------------------------------------------
|
|
103
|
+
|
|
104
|
+
def load_model(self, model_path: str | None = None) -> None:
|
|
105
|
+
if model_path:
|
|
106
|
+
self._model_path = model_path
|
|
107
|
+
if self._session is not None and self._tokenizer is not None:
|
|
108
|
+
return
|
|
109
|
+
if self._load_failed:
|
|
110
|
+
raise ImportError("ONNX dependencies not installed. Install with: pip install stackone-defender[onnx]")
|
|
111
|
+
self._load_model()
|
|
112
|
+
|
|
113
|
+
def _load_model(self) -> None:
|
|
114
|
+
cache_key = str(Path(self._model_path).resolve())
|
|
115
|
+
cached = _session_cache.get(cache_key)
|
|
116
|
+
if cached:
|
|
117
|
+
self._session, self._tokenizer = cached
|
|
118
|
+
return
|
|
119
|
+
|
|
120
|
+
with _lock_for_cache_key(cache_key):
|
|
121
|
+
cached = _session_cache.get(cache_key)
|
|
122
|
+
if cached:
|
|
123
|
+
self._session, self._tokenizer = cached
|
|
124
|
+
return
|
|
125
|
+
|
|
126
|
+
try:
|
|
127
|
+
import numpy as np # noqa: F401
|
|
128
|
+
import onnxruntime as ort
|
|
129
|
+
from tokenizers import Tokenizer
|
|
130
|
+
except ImportError as e:
|
|
131
|
+
self._load_failed = True
|
|
132
|
+
_logger.warning("[defender] ONNX model failed to load: %s", e)
|
|
133
|
+
raise ImportError(
|
|
134
|
+
"ONNX dependencies not installed. Install with: pip install stackone-defender[onnx]"
|
|
135
|
+
) from e
|
|
136
|
+
|
|
137
|
+
try:
|
|
138
|
+
tokenizer_path = str(Path(self._model_path) / "tokenizer.json")
|
|
139
|
+
self._tokenizer = Tokenizer.from_file(tokenizer_path)
|
|
140
|
+
self._tokenizer.enable_truncation(max_length=self._max_length)
|
|
141
|
+
self._tokenizer.enable_padding(length=self._max_length)
|
|
142
|
+
|
|
143
|
+
onnx_path = str(Path(self._model_path) / "model_quantized.onnx")
|
|
144
|
+
self._session = ort.InferenceSession(onnx_path)
|
|
145
|
+
except Exception as e:
|
|
146
|
+
_logger.warning("[defender] ONNX model failed to load: %s", e)
|
|
147
|
+
raise
|
|
148
|
+
|
|
149
|
+
_session_cache[cache_key] = (self._session, self._tokenizer)
|
|
150
|
+
|
|
151
|
+
# ------------------------------------------------------------------
|
|
152
|
+
# Inference
|
|
153
|
+
# ------------------------------------------------------------------
|
|
154
|
+
|
|
155
|
+
def classify(self, text: str) -> float:
|
|
156
|
+
"""Classify a single text, returning the main-head sigmoid score.
|
|
157
|
+
|
|
158
|
+
For multi-head models only the main score is returned; callers that
|
|
159
|
+
need the aux score should use :meth:`classify_pair`.
|
|
160
|
+
"""
|
|
161
|
+
return self.classify_pair(text)[0]
|
|
162
|
+
|
|
163
|
+
def classify_pair(self, text: str) -> tuple[float, float | None]:
|
|
164
|
+
"""Classify a single text, returning ``(main, aux)``.
|
|
165
|
+
|
|
166
|
+
``aux`` is ``None`` for single-head models. Both scores are
|
|
167
|
+
sigmoid-activated with the configured temperature ``T``.
|
|
168
|
+
"""
|
|
169
|
+
self._ensure_loaded()
|
|
170
|
+
import numpy as np
|
|
171
|
+
|
|
172
|
+
encoding = self._tokenizer.encode(text)
|
|
173
|
+
input_ids = np.array([encoding.ids], dtype=np.int64)
|
|
174
|
+
attention_mask = np.array([encoding.attention_mask], dtype=np.int64)
|
|
175
|
+
|
|
176
|
+
results = self._session.run(None, {"input_ids": input_ids, "attention_mask": attention_mask})
|
|
177
|
+
logits = results[0]
|
|
178
|
+
self._detect_output_mode(logits.shape)
|
|
179
|
+
|
|
180
|
+
t = self._temperature_t
|
|
181
|
+
row = logits[0]
|
|
182
|
+
# row shape: (), (1,) or (2,) depending on model export.
|
|
183
|
+
if self._output_mode == "multi":
|
|
184
|
+
main = _sigmoid(float(row[0]) / t)
|
|
185
|
+
aux = _sigmoid(float(row[1]) / t)
|
|
186
|
+
return main, aux
|
|
187
|
+
main_logit = float(row[0]) if hasattr(row, "__len__") and len(row) > 0 else float(row)
|
|
188
|
+
return _sigmoid(main_logit / t), None
|
|
189
|
+
|
|
190
|
+
def classify_batch(self, texts: list[str]) -> list[float]:
|
|
191
|
+
"""Classify multiple texts; returns main-head scores only.
|
|
192
|
+
|
|
193
|
+
Back-compat wrapper around :meth:`classify_batch_pair`.
|
|
194
|
+
"""
|
|
195
|
+
return [main for main, _ in self.classify_batch_pair(texts)]
|
|
196
|
+
|
|
197
|
+
def classify_batch_pair(self, texts: list[str]) -> list[tuple[float, float | None]]:
|
|
198
|
+
"""Classify multiple texts, returning ``(main, aux)`` per row.
|
|
199
|
+
|
|
200
|
+
Aux is ``None`` per-row for single-head models. Chunks the input to
|
|
201
|
+
bound native memory; the attention matrix is ``O(chunk * seq_len^2)``,
|
|
202
|
+
and for MiniLM (``max_length=256``) a chunk of 32 keeps memory
|
|
203
|
+
under ~50MB per call.
|
|
204
|
+
"""
|
|
205
|
+
if not texts:
|
|
206
|
+
return []
|
|
207
|
+
self._ensure_loaded()
|
|
208
|
+
all_pairs: list[tuple[float, float | None]] = []
|
|
209
|
+
for offset in range(0, len(texts), self._MAX_BATCH_CHUNK):
|
|
210
|
+
chunk = texts[offset : offset + self._MAX_BATCH_CHUNK]
|
|
211
|
+
all_pairs.extend(self._classify_batch_chunk_pair(chunk))
|
|
212
|
+
return all_pairs
|
|
213
|
+
|
|
214
|
+
def _classify_batch_chunk_pair(self, texts: list[str]) -> list[tuple[float, float | None]]:
|
|
215
|
+
import numpy as np
|
|
216
|
+
|
|
217
|
+
encodings = self._tokenizer.encode_batch(texts)
|
|
218
|
+
input_ids = np.array([e.ids for e in encodings], dtype=np.int64)
|
|
219
|
+
attention_mask = np.array([e.attention_mask for e in encodings], dtype=np.int64)
|
|
220
|
+
|
|
221
|
+
results = self._session.run(None, {"input_ids": input_ids, "attention_mask": attention_mask})
|
|
222
|
+
logits = results[0]
|
|
223
|
+
self._detect_output_mode(logits.shape)
|
|
224
|
+
|
|
225
|
+
t = self._temperature_t
|
|
226
|
+
pairs: list[tuple[float, float | None]] = []
|
|
227
|
+
if self._output_mode == "multi":
|
|
228
|
+
for i in range(len(texts)):
|
|
229
|
+
main = _sigmoid(float(logits[i][0]) / t)
|
|
230
|
+
aux = _sigmoid(float(logits[i][1]) / t)
|
|
231
|
+
pairs.append((main, aux))
|
|
232
|
+
else:
|
|
233
|
+
for i in range(len(texts)):
|
|
234
|
+
row = logits[i]
|
|
235
|
+
# ``row`` may be a scalar (shape ``[batch]``) or 1-vector.
|
|
236
|
+
main_logit = float(row[0]) if hasattr(row, "__len__") and len(row) > 0 else float(row)
|
|
237
|
+
pairs.append((_sigmoid(main_logit / t), None))
|
|
238
|
+
return pairs
|
|
239
|
+
|
|
240
|
+
def _detect_output_mode(self, dims) -> None:
|
|
241
|
+
"""Detect output mode from the logits tensor shape on first inference.
|
|
242
|
+
|
|
243
|
+
- ``[batch]`` or ``[batch, 1]`` -> ``"single"``
|
|
244
|
+
- ``[batch, 2]`` -> ``"multi"`` (main + aux dual head)
|
|
245
|
+
|
|
246
|
+
Idempotent: subsequent calls are no-ops once mode is set.
|
|
247
|
+
"""
|
|
248
|
+
if self._output_mode is not None:
|
|
249
|
+
return
|
|
250
|
+
if dims is None or len(dims) < 2:
|
|
251
|
+
self._output_mode = "single"
|
|
252
|
+
return
|
|
253
|
+
self._output_mode = "multi" if dims[1] == 2 else "single"
|
|
254
|
+
|
|
255
|
+
# ------------------------------------------------------------------
|
|
256
|
+
# Misc
|
|
257
|
+
# ------------------------------------------------------------------
|
|
258
|
+
|
|
259
|
+
def count_tokens(self, text: str) -> int:
|
|
260
|
+
self._ensure_loaded()
|
|
261
|
+
encoding = self._tokenizer.encode(text)
|
|
262
|
+
# Padding is enabled at a fixed length; count only real (attended) tokens.
|
|
263
|
+
return int(sum(encoding.attention_mask))
|
|
264
|
+
|
|
265
|
+
def get_max_length(self) -> int:
|
|
266
|
+
return self._max_length
|
|
267
|
+
|
|
268
|
+
def warmup(self) -> None:
|
|
269
|
+
self.load_model()
|
|
270
|
+
|
|
271
|
+
def is_loaded(self) -> bool:
|
|
272
|
+
return self._session is not None and self._tokenizer is not None
|
|
273
|
+
|
|
274
|
+
def _ensure_loaded(self) -> None:
|
|
275
|
+
if not self.is_loaded():
|
|
276
|
+
self.load_model()
|
|
@@ -9,7 +9,10 @@ from __future__ import annotations
|
|
|
9
9
|
import math
|
|
10
10
|
import re
|
|
11
11
|
import time
|
|
12
|
+
import unicodedata
|
|
12
13
|
|
|
14
|
+
from ..sanitizers.leet_normalizer import normalize_leet_speak
|
|
15
|
+
from ..sanitizers.normalizer import normalize_unicode, normalize_whitespace, strip_combining_marks
|
|
13
16
|
from ..types import PatternDefinition, PatternMatch, RiskLevel, StructuralFlag, Tier1Result
|
|
14
17
|
from .patterns import ALL_PATTERNS, contains_filter_keywords
|
|
15
18
|
|
|
@@ -47,16 +50,83 @@ class PatternDetector:
|
|
|
47
50
|
return self._empty_result(start)
|
|
48
51
|
|
|
49
52
|
original_length = len(text)
|
|
50
|
-
|
|
53
|
+
raw_text = text[: self._max_analysis_length] if len(text) > self._max_analysis_length else text
|
|
54
|
+
|
|
55
|
+
# Normalisation chain: collapse obfuscation before injection pattern
|
|
56
|
+
# matching. Order matters:
|
|
57
|
+
# 1. NFD-decompose: precomposed accents become base + combining mark.
|
|
58
|
+
# 2. strip_combining_marks: Zalgo defense + accent stripping.
|
|
59
|
+
# 3. normalize_unicode: homoglyphs/fullwidth -> ASCII.
|
|
60
|
+
# 4. normalize_whitespace: collapse spaced letters + embedded newlines.
|
|
61
|
+
# 5. normalize_leet_speak: 1gn0r3 -> ignore.
|
|
62
|
+
# NFD-decomposition lives here (not in normalize_unicode) because it
|
|
63
|
+
# strips legitimate accents like ``café`` -> ``cafe`` -- fine for
|
|
64
|
+
# analysis but would be data loss if returned to callers. The result
|
|
65
|
+
# is analysis-only and never returned.
|
|
66
|
+
analysis_text = normalize_leet_speak(
|
|
67
|
+
normalize_whitespace(
|
|
68
|
+
normalize_unicode(strip_combining_marks(unicodedata.normalize("NFD", raw_text)))
|
|
69
|
+
)
|
|
70
|
+
)
|
|
51
71
|
|
|
72
|
+
# Fast filter: short-circuit if neither raw nor normalised text
|
|
73
|
+
# contains keywords. Raw text is checked to preserve detection of
|
|
74
|
+
# obfuscation patterns (e.g. invisible unicode, leet-speak variants)
|
|
75
|
+
# that are normalised away before injection patterns run. Disable the
|
|
76
|
+
# fast filter when custom patterns are provided -- callers may add
|
|
77
|
+
# patterns whose keywords aren't in the static list.
|
|
52
78
|
should_use_fast_filter = self._use_fast_filter and not self._has_custom
|
|
53
|
-
|
|
54
|
-
|
|
79
|
+
raw_has_keywords = not should_use_fast_filter or contains_filter_keywords(raw_text)
|
|
80
|
+
norm_has_keywords = not should_use_fast_filter or contains_filter_keywords(analysis_text)
|
|
81
|
+
|
|
82
|
+
if not raw_has_keywords and not norm_has_keywords:
|
|
83
|
+
flags = self._detect_structural_issues(raw_text, original_length)
|
|
55
84
|
return self._create_result([], flags, start)
|
|
56
85
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
86
|
+
# Short-circuit: if normalisation produced no change, a single pass
|
|
87
|
+
# is sufficient and avoids doubling pattern work for plain-text input.
|
|
88
|
+
if raw_text == analysis_text:
|
|
89
|
+
matches = self._detect_patterns(raw_text) if raw_has_keywords else []
|
|
90
|
+
flags = self._detect_structural_issues(raw_text, original_length)
|
|
91
|
+
return self._create_result(matches, flags, start)
|
|
92
|
+
|
|
93
|
+
# Run patterns on raw text -- catches obfuscation-specific patterns
|
|
94
|
+
# (e.g. invisible_unicode, leetspeak_injection) that normalisation
|
|
95
|
+
# removes. Run whenever EITHER pass has keywords: if only the
|
|
96
|
+
# normalised text has keywords (pure leet-speak with no other
|
|
97
|
+
# fast-filter hits), we still want the raw pass to fire obfuscation
|
|
98
|
+
# patterns like leetspeak_injection.
|
|
99
|
+
raw_matches = (
|
|
100
|
+
self._detect_patterns(raw_text) if (raw_has_keywords or norm_has_keywords) else []
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
# Run patterns on normalised text -- catches injection patterns
|
|
104
|
+
# hidden behind leet-speak, whitespace, or homoglyph obfuscation.
|
|
105
|
+
# Matches are tagged ``normalised=True`` because their
|
|
106
|
+
# position/matched values reference the transformed text.
|
|
107
|
+
norm_matches_raw = self._detect_patterns(analysis_text) if norm_has_keywords else []
|
|
108
|
+
norm_matches = [
|
|
109
|
+
PatternMatch(
|
|
110
|
+
pattern=m.pattern,
|
|
111
|
+
matched=m.matched,
|
|
112
|
+
position=m.position,
|
|
113
|
+
category=m.category,
|
|
114
|
+
severity=m.severity,
|
|
115
|
+
normalised=True,
|
|
116
|
+
)
|
|
117
|
+
for m in norm_matches_raw
|
|
118
|
+
]
|
|
119
|
+
|
|
120
|
+
# Merge: normalised matches take priority. Raw-only matches are
|
|
121
|
+
# appended for patterns that fired on the original text but not the
|
|
122
|
+
# normalised form (e.g. obfuscation-detection patterns that match the
|
|
123
|
+
# raw encoding characters).
|
|
124
|
+
seen_patterns = {m.pattern for m in norm_matches}
|
|
125
|
+
merged_matches: list[PatternMatch] = [*norm_matches]
|
|
126
|
+
merged_matches.extend(m for m in raw_matches if m.pattern not in seen_patterns)
|
|
127
|
+
|
|
128
|
+
flags = self._detect_structural_issues(raw_text, original_length)
|
|
129
|
+
return self._create_result(merged_matches, flags, start)
|
|
60
130
|
|
|
61
131
|
# ------------------------------------------------------------------
|
|
62
132
|
# Pattern detection
|
|
@@ -65,7 +135,6 @@ class PatternDetector:
|
|
|
65
135
|
def _detect_patterns(self, text: str) -> list[PatternMatch]:
|
|
66
136
|
matches: list[PatternMatch] = []
|
|
67
137
|
for defn in self._patterns:
|
|
68
|
-
# Use finditer for all patterns (handles global-like behavior)
|
|
69
138
|
for m in defn.pattern.finditer(text):
|
|
70
139
|
matches.append(
|
|
71
140
|
PatternMatch(
|
{stackone_defender-0.6.3 → stackone_defender-0.7.0}/src/stackone_defender/classifiers/patterns.py
RENAMED
|
@@ -26,7 +26,21 @@ ROLE_MARKER_PATTERNS: list[PatternDefinition] = [
|
|
|
26
26
|
PatternDefinition("role_system_bracket", re.compile(r"^\[SYSTEM\]", re.I), "role_marker", "high", "Bracketed system role marker"),
|
|
27
27
|
PatternDefinition("role_inst_bracket", re.compile(r"^\[INST\]", re.I), "role_marker", "high", "Bracketed instruction marker (Llama format)"),
|
|
28
28
|
# XML-style variants
|
|
29
|
-
|
|
29
|
+
# role_system_xml requires directive-shaped content immediately following
|
|
30
|
+
# the tag. Bare ``<system>`` mentions are common in XML schemas, ML config
|
|
31
|
+
# docs, and OS specs; the attack shape is ``<system>`` followed by an
|
|
32
|
+
# imperative or role-switch payload. Closing-tag pairs are matched
|
|
33
|
+
# implicitly because the directive content sits inside them.
|
|
34
|
+
PatternDefinition(
|
|
35
|
+
"role_system_xml",
|
|
36
|
+
re.compile(
|
|
37
|
+
r"<system>\s*(?:ignore|disregard|forget|override|you\s+are|new\s+instructions?|stop|disable|bypass)",
|
|
38
|
+
re.I,
|
|
39
|
+
),
|
|
40
|
+
"role_marker",
|
|
41
|
+
"high",
|
|
42
|
+
"XML-style system tag",
|
|
43
|
+
),
|
|
30
44
|
PatternDefinition("role_assistant_xml", re.compile(r"<assistant>", re.I), "role_marker", "medium", "XML-style assistant tag"),
|
|
31
45
|
]
|
|
32
46
|
|
|
@@ -48,9 +62,35 @@ INSTRUCTION_OVERRIDE_PATTERNS: list[PatternDefinition] = [
|
|
|
48
62
|
# Role assumption
|
|
49
63
|
# ---------------------------------------------------------------------------
|
|
50
64
|
ROLE_ASSUMPTION_PATTERNS: list[PatternDefinition] = [
|
|
51
|
-
|
|
65
|
+
# you_are_now: require an actual role-noun terminator. The previous form
|
|
66
|
+
# made both alternation groups optional, so "you are now logged in" /
|
|
67
|
+
# "you are now ready" UI strings FP'd. Role list expanded to cover the
|
|
68
|
+
# actual attack distribution (DAN/GPT/AI/jailbroken/persona-switching).
|
|
69
|
+
PatternDefinition(
|
|
70
|
+
"you_are_now",
|
|
71
|
+
re.compile(
|
|
72
|
+
r"you\s+are\s+now\s+(?:a\s+|an\s+)?(?:different|new|unrestricted|jailbroken|free|uncensored|DAN|GPT|AI|chatbot|model|persona|hacker|admin|root|developer|superuser)\b",
|
|
73
|
+
re.I,
|
|
74
|
+
),
|
|
75
|
+
"role_assumption",
|
|
76
|
+
"high",
|
|
77
|
+
"Attempt to assign new role",
|
|
78
|
+
),
|
|
52
79
|
PatternDefinition("act_as", re.compile(r"act\s+(?:as|like)\s+(?:a\s+)?(?:system|admin|developer|root|superuser)", re.I), "role_assumption", "high", "Attempt to make AI act as privileged role"),
|
|
53
|
-
|
|
80
|
+
# pretend_to_be: require an attack-shaped role-noun. Previously matched any
|
|
81
|
+
# "pretend to be ..." prefix, FPing on children's literature, drama
|
|
82
|
+
# exercises, jokes. Role list overlaps with `you_are_now` plus privilege
|
|
83
|
+
# escalation nouns (hacker/admin/root/developer).
|
|
84
|
+
PatternDefinition(
|
|
85
|
+
"pretend_to_be",
|
|
86
|
+
re.compile(
|
|
87
|
+
r"pretend\s+(?:to\s+be|you\s+are)\s+(?:a\s+|an\s+)?(?:different|new|unrestricted|jailbroken|free|uncensored|DAN|GPT|AI|chatbot|model|persona|hacker|admin|root|developer)\b",
|
|
88
|
+
re.I,
|
|
89
|
+
),
|
|
90
|
+
"role_assumption",
|
|
91
|
+
"medium",
|
|
92
|
+
"Attempt to make AI pretend",
|
|
93
|
+
),
|
|
54
94
|
PatternDefinition("roleplay_as", re.compile(r"roleplay\s+(?:as|like)\s+(?:a\s+)?", re.I), "role_assumption", "low", "Roleplay request (lower severity)"),
|
|
55
95
|
PatternDefinition("imagine_you_are", re.compile(r"imagine\s+(?:that\s+)?you\s+are\s+(?:a\s+)?", re.I), "role_assumption", "low", "Imagination prompt (lower severity)"),
|
|
56
96
|
PatternDefinition("jailbreak_dan", re.compile(r"\bDAN\b.*?(?:do\s+anything|jailbreak)", re.I), "role_assumption", "high", "DAN jailbreak attempt"),
|
|
@@ -75,7 +115,13 @@ COMMAND_EXECUTION_PATTERNS: list[PatternDefinition] = [
|
|
|
75
115
|
PatternDefinition("execute_command", re.compile(r"execute\s+(?:the\s+)?(?:following|this|these)\s+(?:command|instruction|code)", re.I), "command_execution", "high", "Command execution instruction"),
|
|
76
116
|
PatternDefinition("run_code", re.compile(r"run\s+(?:the\s+)?(?:following|this|these)\s+(?:code|script|command)", re.I), "command_execution", "high", "Code execution instruction"),
|
|
77
117
|
PatternDefinition("eval_expression", re.compile(r"eval(?:uate)?\s*\(", re.I), "command_execution", "medium", "Eval function pattern"),
|
|
78
|
-
|
|
118
|
+
# shell_command: POSIX ``$(...)`` only. The legacy backtick form
|
|
119
|
+
# ``` `cmd` ``` used to be included here but FPs on every markdown
|
|
120
|
+
# inline-code span (``` `cat foo.json` ```, ``` `npm install` ```,
|
|
121
|
+
# ``` `filename.txt` ```). Modern shell idioms have used ``$(...)`` for
|
|
122
|
+
# decades; real attackers default to it because it nests. Tier 2 still
|
|
123
|
+
# catches the rare backtick attack via context.
|
|
124
|
+
PatternDefinition("shell_command", re.compile(r"\$\([^)]+\)"), "command_execution", "medium", "Shell command substitution"),
|
|
79
125
|
]
|
|
80
126
|
|
|
81
127
|
# ---------------------------------------------------------------------------
|
|
@@ -86,7 +132,9 @@ ENCODING_SUSPICIOUS_PATTERNS: list[PatternDefinition] = [
|
|
|
86
132
|
PatternDefinition("hex_escape_sequence", re.compile(r"(?:\\x[0-9a-fA-F]{2}){4,}"), "encoding_suspicious", "medium", "Hex escape sequence (potential obfuscation)"),
|
|
87
133
|
PatternDefinition("unicode_escape_sequence", re.compile(r"(?:\\u[0-9a-fA-F]{4}){4,}"), "encoding_suspicious", "medium", "Unicode escape sequence (potential obfuscation)"),
|
|
88
134
|
PatternDefinition("html_entity_abuse", re.compile(r"(?:&#\d{2,4};){4,}|(?:&#x[0-9a-fA-F]{2,4};){4,}", re.I), "encoding_suspicious", "medium", "HTML entity encoding (potential obfuscation)"),
|
|
89
|
-
PatternDefinition("rot13_mention", re.compile(r"rot13|caesar\s+cipher|decode\s+this", re.I), "encoding_suspicious", "
|
|
135
|
+
PatternDefinition("rot13_mention", re.compile(r"rot13|caesar\s+cipher|decode\s+this", re.I), "encoding_suspicious", "medium", "Mention of ROT13 or similar encoding schemes"),
|
|
136
|
+
PatternDefinition("binary_string_encoding", re.compile(r"\b[01]{8}(?:\s+[01]{8}){2,}\b"), "encoding_suspicious", "medium", "Binary-encoded string (potential obfuscation)"),
|
|
137
|
+
PatternDefinition("morse_code_encoding", re.compile(r"(?:[.-]+\s){4,}[.-]+"), "encoding_suspicious", "low", "Morse code pattern (potential obfuscation)"),
|
|
90
138
|
PatternDefinition("leetspeak_injection", re.compile(r"1gn0r3|f0rg3t|byp4ss|syst3m|4dm1n|h4ck", re.I), "encoding_suspicious", "medium", "Leetspeak obfuscation of injection keywords"),
|
|
91
139
|
]
|
|
92
140
|
|
|
@@ -95,7 +143,17 @@ ENCODING_SUSPICIOUS_PATTERNS: list[PatternDefinition] = [
|
|
|
95
143
|
# ---------------------------------------------------------------------------
|
|
96
144
|
PROMPT_LEAKING_PATTERNS: list[PatternDefinition] = [
|
|
97
145
|
PatternDefinition("reveal_system_prompt", re.compile(r"reveal\s+(?:your\s+)?(?:system\s+)?prompt", re.I), "instruction_override", "high", "Attempt to reveal system prompt"),
|
|
98
|
-
|
|
146
|
+
# show_instructions: require an attack-shaped qualifier
|
|
147
|
+
# (initial/original/system/hidden). The inner group used to be optional,
|
|
148
|
+
# so bare "show instructions" -- common in FAQs, help docs, onboarding
|
|
149
|
+
# tours -- triggered the rule.
|
|
150
|
+
PatternDefinition(
|
|
151
|
+
"show_instructions",
|
|
152
|
+
re.compile(r"show\s+(?:me\s+)?(?:your\s+)?(?:initial|original|system|hidden)\s+instructions?", re.I),
|
|
153
|
+
"instruction_override",
|
|
154
|
+
"high",
|
|
155
|
+
"Attempt to show hidden instructions",
|
|
156
|
+
),
|
|
99
157
|
PatternDefinition("print_above", re.compile(r"print\s+(?:everything|all|text)\s+(?:above|before)", re.I), "instruction_override", "high", "Attempt to print content above current context"),
|
|
100
158
|
PatternDefinition("repeat_back", re.compile(r"repeat\s+(?:back\s+)?(?:everything|all|your\s+instructions?)", re.I), "instruction_override", "high", "Attempt to make AI repeat instructions"),
|
|
101
159
|
PatternDefinition("what_are_your_instructions", re.compile(r"what\s+(?:are|were)\s+(?:your|the)\s+(?:(?:initial|original|system)\s+)?instructions?", re.I), "instruction_override", "medium", "Question about system instructions"),
|
|
@@ -106,13 +164,57 @@ PROMPT_LEAKING_PATTERNS: list[PatternDefinition] = [
|
|
|
106
164
|
# Indirect injection
|
|
107
165
|
# ---------------------------------------------------------------------------
|
|
108
166
|
INDIRECT_INJECTION_PATTERNS: list[PatternDefinition] = [
|
|
109
|
-
|
|
167
|
+
# markdown_hidden_instruction: require an imperative + scope qualifier in
|
|
168
|
+
# the URL. The earlier form matched "system" or "instruction" anywhere in
|
|
169
|
+
# the URL, so every doc cross-reference like
|
|
170
|
+
# ``[config](https://.../system-setup)`` triggered. Real smuggled-
|
|
171
|
+
# instruction attacks include the full "ignore (all|the|previous|prior)"
|
|
172
|
+
# phrasing in the URL/anchor.
|
|
173
|
+
PatternDefinition(
|
|
174
|
+
"markdown_hidden_instruction",
|
|
175
|
+
re.compile(
|
|
176
|
+
r"\[.*?\]\(.*?(?:ignore|disregard|forget|override)\W+(?:all|the|previous|prior)\W+.*?\)",
|
|
177
|
+
re.I,
|
|
178
|
+
),
|
|
179
|
+
"structural",
|
|
180
|
+
"high",
|
|
181
|
+
"Markdown link with hidden injection",
|
|
182
|
+
),
|
|
110
183
|
PatternDefinition("html_comment_injection", re.compile(r"<!--\s*(?:system|ignore|instruction|prompt).*?-->", re.I), "structural", "high", "HTML comment containing injection keywords"),
|
|
111
184
|
PatternDefinition("invisible_unicode", re.compile(r"[\u200b-\u200d\ufeff\u2060\u2061\u2062\u2063\u2064]"), "encoding_suspicious", "medium", "Invisible Unicode characters (zero-width, etc.)"),
|
|
112
185
|
PatternDefinition("text_direction_override", re.compile(r"[\u202a-\u202e\u2066-\u2069]"), "encoding_suspicious", "medium", "Text direction override characters"),
|
|
113
|
-
|
|
186
|
+
# confusable_homoglyphs: Cherokee (U+13A0-U+13F4) and Phonetic Extensions
|
|
187
|
+
# (U+1D00-U+1D2B) are essentially never in real customer content, so
|
|
188
|
+
# single-char presence remains a useful signal. Cyrillic (U+0400-U+04FF)
|
|
189
|
+
# is mainstream Russian text -- flag only when *mixed* with Latin letters
|
|
190
|
+
# (the actual attack: ``аdmin`` with a Cyrillic 'a'), not when the whole
|
|
191
|
+
# word/text is Cyrillic.
|
|
192
|
+
PatternDefinition(
|
|
193
|
+
"confusable_homoglyphs",
|
|
194
|
+
re.compile(
|
|
195
|
+
r"[\u13a0-\u13f4\u1d00-\u1d2b]|[a-zA-Z][\u0400-\u04ff]|[\u0400-\u04ff][a-zA-Z]"
|
|
196
|
+
),
|
|
197
|
+
"encoding_suspicious",
|
|
198
|
+
"medium",
|
|
199
|
+
"Unicode homoglyph characters (Cherokee, Small Caps, Cyrillic)",
|
|
200
|
+
),
|
|
114
201
|
PatternDefinition("separator_injection", re.compile(r"[-=]{10,}[^-=\n]*(?:system|instruction|ignore)", re.I), "structural", "medium", "Separator followed by injection attempt"),
|
|
115
|
-
|
|
202
|
+
# json_injection: target the actual attack shape -- setting a chat-message
|
|
203
|
+
# role to a privileged value (system/developer/admin), or stuffing a long
|
|
204
|
+
# string into a ``"system"`` key. The previous form matched the bare key
|
|
205
|
+
# ``"system":`` / ``"role":`` etc., which fires on every OpenAI / Anthropic
|
|
206
|
+
# SDK example, chat-log dump, and JSON schema that just *declares* the
|
|
207
|
+
# field without abusing it.
|
|
208
|
+
PatternDefinition(
|
|
209
|
+
"json_injection",
|
|
210
|
+
re.compile(
|
|
211
|
+
r'"role"\s*:\s*"(?:system|developer|admin)"|"system"\s*:\s*"[^"]{20,}',
|
|
212
|
+
re.I,
|
|
213
|
+
),
|
|
214
|
+
"structural",
|
|
215
|
+
"medium",
|
|
216
|
+
"JSON-style role/instruction injection",
|
|
217
|
+
),
|
|
116
218
|
]
|
|
117
219
|
|
|
118
220
|
# ---------------------------------------------------------------------------
|
|
@@ -155,6 +257,12 @@ FAST_FILTER_KEYWORDS: list[str] = [
|
|
|
155
257
|
"execute", "eval(", "$(", "run the",
|
|
156
258
|
# Encoding/obfuscation
|
|
157
259
|
"base64", "decode", "\\x", "\\u", "&#", "rot13",
|
|
260
|
+
# Raw leet-speak keywords -- kept here because the leet normaliser skips
|
|
261
|
+
# 20+ character alphanumeric tokens (treated as base64-like blobs), so
|
|
262
|
+
# long leet payloads like "1gn0r3pr3v10us1nstruct10ns" are NOT normalised
|
|
263
|
+
# to plain English and won't trip the "ignore" / "forget" / "bypass"
|
|
264
|
+
# keywords above. These literal entries ensure such payloads still
|
|
265
|
+
# trigger the fast filter and reach the leetspeak_injection regex.
|
|
158
266
|
"1gn0r3", "f0rg3t", "byp4ss",
|
|
159
267
|
# Prompt leaking
|
|
160
268
|
"reveal", "show me your", "print everything", "print above",
|