stackone-defender 0.6.3__tar.gz → 0.7.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. stackone_defender-0.7.1/.release-please-manifest.json +1 -0
  2. {stackone_defender-0.6.3 → stackone_defender-0.7.1}/CHANGELOG.md +26 -0
  3. {stackone_defender-0.6.3 → stackone_defender-0.7.1}/PKG-INFO +14 -1
  4. {stackone_defender-0.6.3 → stackone_defender-0.7.1}/README.md +13 -0
  5. {stackone_defender-0.6.3 → stackone_defender-0.7.1}/pyproject.toml +1 -1
  6. {stackone_defender-0.6.3 → stackone_defender-0.7.1}/src/stackone_defender/__init__.py +19 -2
  7. stackone_defender-0.7.1/src/stackone_defender/classifiers/onnx_classifier.py +276 -0
  8. {stackone_defender-0.6.3 → stackone_defender-0.7.1}/src/stackone_defender/classifiers/pattern_detector.py +76 -7
  9. {stackone_defender-0.6.3 → stackone_defender-0.7.1}/src/stackone_defender/classifiers/patterns.py +117 -9
  10. stackone_defender-0.7.1/src/stackone_defender/classifiers/tier2_classifier.py +477 -0
  11. stackone_defender-0.7.1/src/stackone_defender/classifiers/tier3_orchestrator.py +27 -0
  12. {stackone_defender-0.6.3 → stackone_defender-0.7.1}/src/stackone_defender/config.py +2 -0
  13. stackone_defender-0.7.1/src/stackone_defender/core/prompt_defense.py +999 -0
  14. {stackone_defender-0.6.3 → stackone_defender-0.7.1}/src/stackone_defender/core/tool_result_sanitizer.py +29 -1
  15. stackone_defender-0.7.1/src/stackone_defender/models/minilm-multihead-v5/classifier_config.json +47 -0
  16. {stackone_defender-0.6.3/src/stackone_defender/models/minilm-full-aug → stackone_defender-0.7.1/src/stackone_defender/models/minilm-multihead-v5}/model_quantized.onnx +0 -0
  17. stackone_defender-0.7.1/src/stackone_defender/sanitizers/encoding_detector.py +600 -0
  18. stackone_defender-0.7.1/src/stackone_defender/sanitizers/leet_normalizer.py +115 -0
  19. stackone_defender-0.7.1/src/stackone_defender/sanitizers/normalizer.py +166 -0
  20. {stackone_defender-0.6.3 → stackone_defender-0.7.1}/src/stackone_defender/sanitizers/sanitizer.py +37 -5
  21. {stackone_defender-0.6.3 → stackone_defender-0.7.1}/src/stackone_defender/types.py +94 -1
  22. {stackone_defender-0.6.3 → stackone_defender-0.7.1}/src/stackone_defender/utils/boundary.py +27 -5
  23. {stackone_defender-0.6.3 → stackone_defender-0.7.1}/tests/test_integration.py +172 -0
  24. {stackone_defender-0.6.3 → stackone_defender-0.7.1}/tests/test_onnx_classifier.py +131 -3
  25. {stackone_defender-0.6.3 → stackone_defender-0.7.1}/tests/test_pattern_detector.py +157 -3
  26. {stackone_defender-0.6.3 → stackone_defender-0.7.1}/tests/test_sanitizers.py +210 -4
  27. stackone_defender-0.7.1/tests/test_tier2_classifier.py +162 -0
  28. stackone_defender-0.7.1/tests/test_tier3.py +380 -0
  29. {stackone_defender-0.6.3 → stackone_defender-0.7.1}/uv.lock +1 -1
  30. stackone_defender-0.6.3/.release-please-manifest.json +0 -1
  31. stackone_defender-0.6.3/src/stackone_defender/classifiers/onnx_classifier.py +0 -148
  32. stackone_defender-0.6.3/src/stackone_defender/classifiers/tier2_classifier.py +0 -291
  33. stackone_defender-0.6.3/src/stackone_defender/core/prompt_defense.py +0 -315
  34. stackone_defender-0.6.3/src/stackone_defender/sanitizers/encoding_detector.py +0 -180
  35. stackone_defender-0.6.3/src/stackone_defender/sanitizers/normalizer.py +0 -94
  36. stackone_defender-0.6.3/tests/test_tier2_classifier.py +0 -63
  37. {stackone_defender-0.6.3 → stackone_defender-0.7.1}/.github/workflows/ci.yaml +0 -0
  38. {stackone_defender-0.6.3 → stackone_defender-0.7.1}/.github/workflows/release.yaml +0 -0
  39. {stackone_defender-0.6.3 → stackone_defender-0.7.1}/.gitignore +0 -0
  40. {stackone_defender-0.6.3 → stackone_defender-0.7.1}/.python-version +0 -0
  41. {stackone_defender-0.6.3 → stackone_defender-0.7.1}/.release-please-config.json +0 -0
  42. {stackone_defender-0.6.3 → stackone_defender-0.7.1}/models/minilm-full-aug/config.json +0 -0
  43. {stackone_defender-0.6.3 → stackone_defender-0.7.1}/models/minilm-full-aug/model_quantized.onnx +0 -0
  44. {stackone_defender-0.6.3 → stackone_defender-0.7.1}/models/minilm-full-aug/tokenizer.json +0 -0
  45. {stackone_defender-0.6.3 → stackone_defender-0.7.1}/models/minilm-full-aug/tokenizer_config.json +0 -0
  46. {stackone_defender-0.6.3 → stackone_defender-0.7.1}/src/stackone_defender/classifiers/__init__.py +0 -0
  47. {stackone_defender-0.6.3 → stackone_defender-0.7.1}/src/stackone_defender/core/__init__.py +0 -0
  48. {stackone_defender-0.6.3/src/stackone_defender/models/minilm-full-aug → stackone_defender-0.7.1/src/stackone_defender/models/minilm-multihead-v5}/config.json +0 -0
  49. {stackone_defender-0.6.3/src/stackone_defender/models/minilm-full-aug → stackone_defender-0.7.1/src/stackone_defender/models/minilm-multihead-v5}/tokenizer.json +0 -0
  50. {stackone_defender-0.6.3/src/stackone_defender/models/minilm-full-aug → stackone_defender-0.7.1/src/stackone_defender/models/minilm-multihead-v5}/tokenizer_config.json +0 -0
  51. {stackone_defender-0.6.3 → stackone_defender-0.7.1}/src/stackone_defender/sanitizers/__init__.py +0 -0
  52. {stackone_defender-0.6.3 → stackone_defender-0.7.1}/src/stackone_defender/sanitizers/pattern_remover.py +0 -0
  53. {stackone_defender-0.6.3 → stackone_defender-0.7.1}/src/stackone_defender/sanitizers/role_stripper.py +0 -0
  54. {stackone_defender-0.6.3 → stackone_defender-0.7.1}/src/stackone_defender/sfe/__init__.py +0 -0
  55. {stackone_defender-0.6.3 → stackone_defender-0.7.1}/src/stackone_defender/sfe/model.ftz +0 -0
  56. {stackone_defender-0.6.3 → stackone_defender-0.7.1}/src/stackone_defender/sfe/preprocess.py +0 -0
  57. {stackone_defender-0.6.3 → stackone_defender-0.7.1}/src/stackone_defender/utils/__init__.py +0 -0
  58. {stackone_defender-0.6.3 → stackone_defender-0.7.1}/src/stackone_defender/utils/field_detection.py +0 -0
  59. {stackone_defender-0.6.3 → stackone_defender-0.7.1}/src/stackone_defender/utils/structure.py +0 -0
  60. {stackone_defender-0.6.3 → stackone_defender-0.7.1}/tests/__init__.py +0 -0
  61. {stackone_defender-0.6.3 → stackone_defender-0.7.1}/tests/test_sfe.py +0 -0
  62. {stackone_defender-0.6.3 → stackone_defender-0.7.1}/tests/test_utils.py +0 -0
@@ -0,0 +1 @@
1
+ {".":"0.7.1"}
@@ -1,5 +1,31 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.7.1](https://github.com/StackOneHQ/stackone-defender/compare/stackone-defender-v0.7.0...stackone-defender-v0.7.1) (2026-06-16)
4
+
5
+
6
+ ### Features
7
+
8
+ * add defend_tool_results_async for npm batch parity ([a05783c](https://github.com/StackOneHQ/stackone-defender/commit/a05783c5671548aa66dfead1f129584b249d8778))
9
+ * Python parity with @stackone/defender 0.7.1 (Tier 3) ([c58a17c](https://github.com/StackOneHQ/stackone-defender/commit/c58a17c9ba1a902148cde9204666f7f1a916d09b))
10
+ * Tier 3 provider interface and cascade orchestration (TS 0.7.1 parity) ([f2b4109](https://github.com/StackOneHQ/stackone-defender/commit/f2b41096db4ca65741b9d4ba62f3fad7591929ab))
11
+
12
+
13
+ ### Bug Fixes
14
+
15
+ * address Copilot PR review on Tier 3 orchestration ([570f567](https://github.com/StackOneHQ/stackone-defender/commit/570f56753292700a15b73725a12db426316468c6))
16
+ * tighten Tier3ClassifyResult type and batch doc wording ([2515772](https://github.com/StackOneHQ/stackone-defender/commit/2515772f894dd2cbdaa51e9d0b39e26f151d257f))
17
+
18
+ ## [0.7.0](https://github.com/StackOneHQ/stackone-defender/compare/stackone-defender-v0.6.3...stackone-defender-v0.7.0) (2026-05-29)
19
+
20
+
21
+ ### ⚠ BREAKING CHANGES
22
+
23
+ * The default ONNX model directory changed from minilm-full-aug to minilm-multihead-v5. Custom code that hardcoded the old path will no longer load.
24
+
25
+ ### Features
26
+
27
+ * parity with TS defender 0.7.0 ([75d046a](https://github.com/StackOneHQ/stackone-defender/commit/75d046ab45066ee1f973e91357f7ecb23dea50c8))
28
+
3
29
  ## [0.6.3](https://github.com/StackOneHQ/stackone-defender/compare/stackone-defender-v0.6.2...stackone-defender-v0.6.3) (2026-05-26)
4
30
 
5
31
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: stackone-defender
3
- Version: 0.6.3
3
+ Version: 0.7.1
4
4
  Summary: Indirect prompt injection defense for AI agents using tool calls
5
5
  Project-URL: Homepage, https://github.com/StackOneHQ/stackone-defender
6
6
  Project-URL: Repository, https://github.com/StackOneHQ/stackone-defender
@@ -204,6 +204,8 @@ class DefenseResult:
204
204
 
205
205
  ### `defense.defend_tool_results(items)`
206
206
 
207
+ Sync batch API. When `enable_tier3=True`, uses one `asyncio.run()` and defends items **concurrently** via `asyncio.gather` (same scheduling model as npm `defendToolResults`; blocking sync providers still run one at a time on the event-loop thread). From async code, prefer `defend_tool_results_async`.
208
+
207
209
  ```python
208
210
  results = defense.defend_tool_results([
209
211
  {"value": email_data, "tool_name": "gmail_get_message"},
@@ -215,6 +217,17 @@ for r in results:
215
217
  print("Blocked:", ", ".join(r.fields_sanitized))
216
218
  ```
217
219
 
220
+ ### `await defense.defend_tool_results_async(items)`
221
+
222
+ Async batch API — runs `defend_tool_result_async` per item concurrently via `asyncio.gather`. Required when Tier 3 is enabled inside a running event loop (e.g. FastAPI).
223
+
224
+ ```python
225
+ results = await defense.defend_tool_results_async([
226
+ {"value": email_data, "tool_name": "gmail_get_message"},
227
+ {"value": doc_data, "tool_name": "documents_get"},
228
+ ])
229
+ ```
230
+
218
231
  ### `defense.analyze(text)`
219
232
 
220
233
  Tier 1 only — useful for debugging pattern hits without full tool-result traversal.
@@ -178,6 +178,8 @@ class DefenseResult:
178
178
 
179
179
  ### `defense.defend_tool_results(items)`
180
180
 
181
+ Sync batch API. When `enable_tier3=True`, uses one `asyncio.run()` and defends items **concurrently** via `asyncio.gather` (same scheduling model as npm `defendToolResults`; blocking sync providers still run one at a time on the event-loop thread). From async code, prefer `defend_tool_results_async`.
182
+
181
183
  ```python
182
184
  results = defense.defend_tool_results([
183
185
  {"value": email_data, "tool_name": "gmail_get_message"},
@@ -189,6 +191,17 @@ for r in results:
189
191
  print("Blocked:", ", ".join(r.fields_sanitized))
190
192
  ```
191
193
 
194
+ ### `await defense.defend_tool_results_async(items)`
195
+
196
+ Async batch API — runs `defend_tool_result_async` per item concurrently via `asyncio.gather`. Required when Tier 3 is enabled inside a running event loop (e.g. FastAPI).
197
+
198
+ ```python
199
+ results = await defense.defend_tool_results_async([
200
+ {"value": email_data, "tool_name": "gmail_get_message"},
201
+ {"value": doc_data, "tool_name": "documents_get"},
202
+ ])
203
+ ```
204
+
192
205
  ### `defense.analyze(text)`
193
206
 
194
207
  Tier 1 only — useful for debugging pattern hits without full tool-result traversal.
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "stackone-defender"
3
- version = "0.6.3"
3
+ version = "0.7.1"
4
4
  description = "Indirect prompt injection defense for AI agents using tool calls"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.11"
@@ -11,8 +11,9 @@ Usage:
11
11
  print(f"Blocked: {result.risk_level}")
12
12
  """
13
13
 
14
+ from .classifiers.onnx_classifier import get_default_model_path
15
+ from .classifiers.tier3_orchestrator import get_default_tier3_provider, set_default_tier3_provider
14
16
  from .core.prompt_defense import PromptDefense, create_prompt_defense
15
- from .utils.boundary import contains_boundary_patterns, generate_boundary_instructions
16
17
  from .sfe.preprocess import (
17
18
  DropDecision,
18
19
  SfePredictor,
@@ -21,20 +22,36 @@ from .sfe.preprocess import (
21
22
  get_default_sfe_model_path,
22
23
  sfe_preprocess,
23
24
  )
24
- from .types import DefenseResult, RiskLevel, Tier1Result
25
+ from .types import (
26
+ DefenderMode,
27
+ DefenseResult,
28
+ MultiheadConfig,
29
+ RiskLevel,
30
+ Tier1Result,
31
+ Tier3Provider,
32
+ Tier3Verdict,
33
+ )
34
+ from .utils.boundary import contains_boundary_patterns, generate_boundary_instructions
25
35
 
26
36
  __all__ = [
37
+ "DefenderMode",
27
38
  "DefenseResult",
28
39
  "DropDecision",
40
+ "MultiheadConfig",
29
41
  "PromptDefense",
30
42
  "RiskLevel",
31
43
  "SfePredictor",
32
44
  "SfePreprocessResult",
33
45
  "Tier1Result",
46
+ "Tier3Provider",
47
+ "Tier3Verdict",
34
48
  "contains_boundary_patterns",
35
49
  "create_prompt_defense",
36
50
  "generate_boundary_instructions",
51
+ "get_default_model_path",
37
52
  "get_default_predictor",
38
53
  "get_default_sfe_model_path",
54
+ "get_default_tier3_provider",
55
+ "set_default_tier3_provider",
39
56
  "sfe_preprocess",
40
57
  ]
@@ -0,0 +1,276 @@
1
+ """ONNX classifier for fine-tuned MiniLM prompt injection detection.
2
+
3
+ Pipeline: text -> tokenizer -> ONNX Runtime -> logit -> ``sigmoid(logit / T)``
4
+ -> score. Supports single-head ``[batch]`` / ``[batch, 1]`` models and
5
+ multi-head ``[batch, 2]`` models (main + aux). Temperature ``T`` enables
6
+ post-hoc calibration via temperature scaling.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import logging
12
+ import math
13
+ import threading
14
+ from pathlib import Path
15
+ from typing import Literal
16
+
17
+ _logger = logging.getLogger(__name__)
18
+
19
+ # Shared across all OnnxClassifier instances (keyed by resolved model dir path).
20
+ _session_cache: dict[str, tuple[object, object]] = {}
21
+ _registry_lock = threading.Lock()
22
+ _load_locks: dict[str, threading.Lock] = {}
23
+
24
+
25
+ def _lock_for_cache_key(cache_key: str) -> threading.Lock:
26
+ with _registry_lock:
27
+ if cache_key not in _load_locks:
28
+ _load_locks[cache_key] = threading.Lock()
29
+ return _load_locks[cache_key]
30
+
31
+
32
+ def get_default_model_path() -> str:
33
+ """Return the absolute path to the bundled ONNX model directory.
34
+
35
+ Exported so :class:`Tier2Classifier` can read model-specific calibration
36
+ defaults from ``classifier_config.json`` at construction time without
37
+ needing an :class:`OnnxClassifier` instance.
38
+ """
39
+ return str(Path(__file__).resolve().parent.parent / "models" / "minilm-multihead-v5")
40
+
41
+
42
+ # Back-compat shim retained for internal users; same value as the public name.
43
+ def _default_model_path() -> str:
44
+ return get_default_model_path()
45
+
46
+
47
+ def _sigmoid(x: float) -> float:
48
+ return 1.0 / (1.0 + math.exp(-x))
49
+
50
+
51
+ class OnnxClassifier:
52
+ """ONNX Classifier for fine-tuned MiniLM models.
53
+
54
+ Loads the model lazily on first inference. The session and tokenizer
55
+ are cached at module level so multiple instances pointing at the same
56
+ model path share a single backing session (safe: ONNX Runtime
57
+ guarantees thread-safe ``Run()`` from v1.7.0, and the ``tokenizers``
58
+ library's encode methods do not mutate the tokenizer object).
59
+ """
60
+
61
+ _MAX_BATCH_CHUNK = 32
62
+
63
+ def __init__(self, model_path: str | None = None, temperature_t: float | None = None):
64
+ self._model_path = model_path or get_default_model_path()
65
+ self._session = None
66
+ self._tokenizer = None
67
+ self._max_length = 256
68
+ self._load_failed = False
69
+ # Output mode is detected lazily from the logits shape on the first
70
+ # inference call. ``None`` until then.
71
+ self._output_mode: Literal["single", "multi"] | None = None
72
+ # Temperature ``T`` must be a positive finite number. ``T <= 0`` is
73
+ # undefined (divide-by-zero or sign flip) and almost certainly a
74
+ # programming error rather than a config the caller wants gracefully
75
+ # ignored.
76
+ self._temperature_t = 1.0
77
+ if temperature_t is not None:
78
+ if not math.isfinite(temperature_t) or temperature_t <= 0:
79
+ raise ValueError(
80
+ f"OnnxClassifier: temperature_t must be a positive finite number, got {temperature_t}"
81
+ )
82
+ self._temperature_t = float(temperature_t)
83
+
84
+ # ------------------------------------------------------------------
85
+ # Public introspection
86
+ # ------------------------------------------------------------------
87
+
88
+ def get_temperature(self) -> float:
89
+ """Current temperature scaling factor (``1.0`` = no calibration)."""
90
+ return self._temperature_t
91
+
92
+ def get_output_mode(self) -> Literal["single", "multi"] | None:
93
+ """Output mode of the loaded model.
94
+
95
+ ``None`` until the first inference runs. ``"multi"`` indicates the
96
+ model emits ``[batch, 2]`` logits (main + aux).
97
+ """
98
+ return self._output_mode
99
+
100
+ # ------------------------------------------------------------------
101
+ # Loading
102
+ # ------------------------------------------------------------------
103
+
104
+ def load_model(self, model_path: str | None = None) -> None:
105
+ if model_path:
106
+ self._model_path = model_path
107
+ if self._session is not None and self._tokenizer is not None:
108
+ return
109
+ if self._load_failed:
110
+ raise ImportError("ONNX dependencies not installed. Install with: pip install stackone-defender[onnx]")
111
+ self._load_model()
112
+
113
+ def _load_model(self) -> None:
114
+ cache_key = str(Path(self._model_path).resolve())
115
+ cached = _session_cache.get(cache_key)
116
+ if cached:
117
+ self._session, self._tokenizer = cached
118
+ return
119
+
120
+ with _lock_for_cache_key(cache_key):
121
+ cached = _session_cache.get(cache_key)
122
+ if cached:
123
+ self._session, self._tokenizer = cached
124
+ return
125
+
126
+ try:
127
+ import numpy as np # noqa: F401
128
+ import onnxruntime as ort
129
+ from tokenizers import Tokenizer
130
+ except ImportError as e:
131
+ self._load_failed = True
132
+ _logger.warning("[defender] ONNX model failed to load: %s", e)
133
+ raise ImportError(
134
+ "ONNX dependencies not installed. Install with: pip install stackone-defender[onnx]"
135
+ ) from e
136
+
137
+ try:
138
+ tokenizer_path = str(Path(self._model_path) / "tokenizer.json")
139
+ self._tokenizer = Tokenizer.from_file(tokenizer_path)
140
+ self._tokenizer.enable_truncation(max_length=self._max_length)
141
+ self._tokenizer.enable_padding(length=self._max_length)
142
+
143
+ onnx_path = str(Path(self._model_path) / "model_quantized.onnx")
144
+ self._session = ort.InferenceSession(onnx_path)
145
+ except Exception as e:
146
+ _logger.warning("[defender] ONNX model failed to load: %s", e)
147
+ raise
148
+
149
+ _session_cache[cache_key] = (self._session, self._tokenizer)
150
+
151
+ # ------------------------------------------------------------------
152
+ # Inference
153
+ # ------------------------------------------------------------------
154
+
155
+ def classify(self, text: str) -> float:
156
+ """Classify a single text, returning the main-head sigmoid score.
157
+
158
+ For multi-head models only the main score is returned; callers that
159
+ need the aux score should use :meth:`classify_pair`.
160
+ """
161
+ return self.classify_pair(text)[0]
162
+
163
+ def classify_pair(self, text: str) -> tuple[float, float | None]:
164
+ """Classify a single text, returning ``(main, aux)``.
165
+
166
+ ``aux`` is ``None`` for single-head models. Both scores are
167
+ sigmoid-activated with the configured temperature ``T``.
168
+ """
169
+ self._ensure_loaded()
170
+ import numpy as np
171
+
172
+ encoding = self._tokenizer.encode(text)
173
+ input_ids = np.array([encoding.ids], dtype=np.int64)
174
+ attention_mask = np.array([encoding.attention_mask], dtype=np.int64)
175
+
176
+ results = self._session.run(None, {"input_ids": input_ids, "attention_mask": attention_mask})
177
+ logits = results[0]
178
+ self._detect_output_mode(logits.shape)
179
+
180
+ t = self._temperature_t
181
+ row = logits[0]
182
+ # row shape: (), (1,) or (2,) depending on model export.
183
+ if self._output_mode == "multi":
184
+ main = _sigmoid(float(row[0]) / t)
185
+ aux = _sigmoid(float(row[1]) / t)
186
+ return main, aux
187
+ main_logit = float(row[0]) if hasattr(row, "__len__") and len(row) > 0 else float(row)
188
+ return _sigmoid(main_logit / t), None
189
+
190
+ def classify_batch(self, texts: list[str]) -> list[float]:
191
+ """Classify multiple texts; returns main-head scores only.
192
+
193
+ Back-compat wrapper around :meth:`classify_batch_pair`.
194
+ """
195
+ return [main for main, _ in self.classify_batch_pair(texts)]
196
+
197
+ def classify_batch_pair(self, texts: list[str]) -> list[tuple[float, float | None]]:
198
+ """Classify multiple texts, returning ``(main, aux)`` per row.
199
+
200
+ Aux is ``None`` per-row for single-head models. Chunks the input to
201
+ bound native memory; the attention matrix is ``O(chunk * seq_len^2)``,
202
+ and for MiniLM (``max_length=256``) a chunk of 32 keeps memory
203
+ under ~50MB per call.
204
+ """
205
+ if not texts:
206
+ return []
207
+ self._ensure_loaded()
208
+ all_pairs: list[tuple[float, float | None]] = []
209
+ for offset in range(0, len(texts), self._MAX_BATCH_CHUNK):
210
+ chunk = texts[offset : offset + self._MAX_BATCH_CHUNK]
211
+ all_pairs.extend(self._classify_batch_chunk_pair(chunk))
212
+ return all_pairs
213
+
214
+ def _classify_batch_chunk_pair(self, texts: list[str]) -> list[tuple[float, float | None]]:
215
+ import numpy as np
216
+
217
+ encodings = self._tokenizer.encode_batch(texts)
218
+ input_ids = np.array([e.ids for e in encodings], dtype=np.int64)
219
+ attention_mask = np.array([e.attention_mask for e in encodings], dtype=np.int64)
220
+
221
+ results = self._session.run(None, {"input_ids": input_ids, "attention_mask": attention_mask})
222
+ logits = results[0]
223
+ self._detect_output_mode(logits.shape)
224
+
225
+ t = self._temperature_t
226
+ pairs: list[tuple[float, float | None]] = []
227
+ if self._output_mode == "multi":
228
+ for i in range(len(texts)):
229
+ main = _sigmoid(float(logits[i][0]) / t)
230
+ aux = _sigmoid(float(logits[i][1]) / t)
231
+ pairs.append((main, aux))
232
+ else:
233
+ for i in range(len(texts)):
234
+ row = logits[i]
235
+ # ``row`` may be a scalar (shape ``[batch]``) or 1-vector.
236
+ main_logit = float(row[0]) if hasattr(row, "__len__") and len(row) > 0 else float(row)
237
+ pairs.append((_sigmoid(main_logit / t), None))
238
+ return pairs
239
+
240
+ def _detect_output_mode(self, dims) -> None:
241
+ """Detect output mode from the logits tensor shape on first inference.
242
+
243
+ - ``[batch]`` or ``[batch, 1]`` -> ``"single"``
244
+ - ``[batch, 2]`` -> ``"multi"`` (main + aux dual head)
245
+
246
+ Idempotent: subsequent calls are no-ops once mode is set.
247
+ """
248
+ if self._output_mode is not None:
249
+ return
250
+ if dims is None or len(dims) < 2:
251
+ self._output_mode = "single"
252
+ return
253
+ self._output_mode = "multi" if dims[1] == 2 else "single"
254
+
255
+ # ------------------------------------------------------------------
256
+ # Misc
257
+ # ------------------------------------------------------------------
258
+
259
+ def count_tokens(self, text: str) -> int:
260
+ self._ensure_loaded()
261
+ encoding = self._tokenizer.encode(text)
262
+ # Padding is enabled at a fixed length; count only real (attended) tokens.
263
+ return int(sum(encoding.attention_mask))
264
+
265
+ def get_max_length(self) -> int:
266
+ return self._max_length
267
+
268
+ def warmup(self) -> None:
269
+ self.load_model()
270
+
271
+ def is_loaded(self) -> bool:
272
+ return self._session is not None and self._tokenizer is not None
273
+
274
+ def _ensure_loaded(self) -> None:
275
+ if not self.is_loaded():
276
+ self.load_model()
@@ -9,7 +9,10 @@ from __future__ import annotations
9
9
  import math
10
10
  import re
11
11
  import time
12
+ import unicodedata
12
13
 
14
+ from ..sanitizers.leet_normalizer import normalize_leet_speak
15
+ from ..sanitizers.normalizer import normalize_unicode, normalize_whitespace, strip_combining_marks
13
16
  from ..types import PatternDefinition, PatternMatch, RiskLevel, StructuralFlag, Tier1Result
14
17
  from .patterns import ALL_PATTERNS, contains_filter_keywords
15
18
 
@@ -47,16 +50,83 @@ class PatternDetector:
47
50
  return self._empty_result(start)
48
51
 
49
52
  original_length = len(text)
50
- analysis_text = text[: self._max_analysis_length] if len(text) > self._max_analysis_length else text
53
+ raw_text = text[: self._max_analysis_length] if len(text) > self._max_analysis_length else text
54
+
55
+ # Normalisation chain: collapse obfuscation before injection pattern
56
+ # matching. Order matters:
57
+ # 1. NFD-decompose: precomposed accents become base + combining mark.
58
+ # 2. strip_combining_marks: Zalgo defense + accent stripping.
59
+ # 3. normalize_unicode: homoglyphs/fullwidth -> ASCII.
60
+ # 4. normalize_whitespace: collapse spaced letters + embedded newlines.
61
+ # 5. normalize_leet_speak: 1gn0r3 -> ignore.
62
+ # NFD-decomposition lives here (not in normalize_unicode) because it
63
+ # strips legitimate accents like ``café`` -> ``cafe`` -- fine for
64
+ # analysis but would be data loss if returned to callers. The result
65
+ # is analysis-only and never returned.
66
+ analysis_text = normalize_leet_speak(
67
+ normalize_whitespace(
68
+ normalize_unicode(strip_combining_marks(unicodedata.normalize("NFD", raw_text)))
69
+ )
70
+ )
51
71
 
72
+ # Fast filter: short-circuit if neither raw nor normalised text
73
+ # contains keywords. Raw text is checked to preserve detection of
74
+ # obfuscation patterns (e.g. invisible unicode, leet-speak variants)
75
+ # that are normalised away before injection patterns run. Disable the
76
+ # fast filter when custom patterns are provided -- callers may add
77
+ # patterns whose keywords aren't in the static list.
52
78
  should_use_fast_filter = self._use_fast_filter and not self._has_custom
53
- if should_use_fast_filter and not contains_filter_keywords(analysis_text):
54
- flags = self._detect_structural_issues(analysis_text, original_length)
79
+ raw_has_keywords = not should_use_fast_filter or contains_filter_keywords(raw_text)
80
+ norm_has_keywords = not should_use_fast_filter or contains_filter_keywords(analysis_text)
81
+
82
+ if not raw_has_keywords and not norm_has_keywords:
83
+ flags = self._detect_structural_issues(raw_text, original_length)
55
84
  return self._create_result([], flags, start)
56
85
 
57
- matches = self._detect_patterns(analysis_text)
58
- flags = self._detect_structural_issues(analysis_text, original_length)
59
- return self._create_result(matches, flags, start)
86
+ # Short-circuit: if normalisation produced no change, a single pass
87
+ # is sufficient and avoids doubling pattern work for plain-text input.
88
+ if raw_text == analysis_text:
89
+ matches = self._detect_patterns(raw_text) if raw_has_keywords else []
90
+ flags = self._detect_structural_issues(raw_text, original_length)
91
+ return self._create_result(matches, flags, start)
92
+
93
+ # Run patterns on raw text -- catches obfuscation-specific patterns
94
+ # (e.g. invisible_unicode, leetspeak_injection) that normalisation
95
+ # removes. Run whenever EITHER pass has keywords: if only the
96
+ # normalised text has keywords (pure leet-speak with no other
97
+ # fast-filter hits), we still want the raw pass to fire obfuscation
98
+ # patterns like leetspeak_injection.
99
+ raw_matches = (
100
+ self._detect_patterns(raw_text) if (raw_has_keywords or norm_has_keywords) else []
101
+ )
102
+
103
+ # Run patterns on normalised text -- catches injection patterns
104
+ # hidden behind leet-speak, whitespace, or homoglyph obfuscation.
105
+ # Matches are tagged ``normalised=True`` because their
106
+ # position/matched values reference the transformed text.
107
+ norm_matches_raw = self._detect_patterns(analysis_text) if norm_has_keywords else []
108
+ norm_matches = [
109
+ PatternMatch(
110
+ pattern=m.pattern,
111
+ matched=m.matched,
112
+ position=m.position,
113
+ category=m.category,
114
+ severity=m.severity,
115
+ normalised=True,
116
+ )
117
+ for m in norm_matches_raw
118
+ ]
119
+
120
+ # Merge: normalised matches take priority. Raw-only matches are
121
+ # appended for patterns that fired on the original text but not the
122
+ # normalised form (e.g. obfuscation-detection patterns that match the
123
+ # raw encoding characters).
124
+ seen_patterns = {m.pattern for m in norm_matches}
125
+ merged_matches: list[PatternMatch] = [*norm_matches]
126
+ merged_matches.extend(m for m in raw_matches if m.pattern not in seen_patterns)
127
+
128
+ flags = self._detect_structural_issues(raw_text, original_length)
129
+ return self._create_result(merged_matches, flags, start)
60
130
 
61
131
  # ------------------------------------------------------------------
62
132
  # Pattern detection
@@ -65,7 +135,6 @@ class PatternDetector:
65
135
  def _detect_patterns(self, text: str) -> list[PatternMatch]:
66
136
  matches: list[PatternMatch] = []
67
137
  for defn in self._patterns:
68
- # Use finditer for all patterns (handles global-like behavior)
69
138
  for m in defn.pattern.finditer(text):
70
139
  matches.append(
71
140
  PatternMatch(