stackone-defender 0.6.2__tar.gz → 0.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. stackone_defender-0.7.0/.release-please-manifest.json +1 -0
  2. {stackone_defender-0.6.2 → stackone_defender-0.7.0}/CHANGELOG.md +33 -0
  3. {stackone_defender-0.6.2 → stackone_defender-0.7.0}/PKG-INFO +8 -6
  4. {stackone_defender-0.6.2 → stackone_defender-0.7.0}/README.md +7 -5
  5. {stackone_defender-0.6.2 → stackone_defender-0.7.0}/pyproject.toml +1 -1
  6. {stackone_defender-0.6.2 → stackone_defender-0.7.0}/src/stackone_defender/__init__.py +7 -1
  7. stackone_defender-0.7.0/src/stackone_defender/classifiers/onnx_classifier.py +276 -0
  8. {stackone_defender-0.6.2 → stackone_defender-0.7.0}/src/stackone_defender/classifiers/pattern_detector.py +76 -7
  9. {stackone_defender-0.6.2 → stackone_defender-0.7.0}/src/stackone_defender/classifiers/patterns.py +117 -9
  10. stackone_defender-0.7.0/src/stackone_defender/classifiers/tier2_classifier.py +477 -0
  11. {stackone_defender-0.6.2 → stackone_defender-0.7.0}/src/stackone_defender/config.py +2 -0
  12. stackone_defender-0.7.0/src/stackone_defender/core/prompt_defense.py +593 -0
  13. {stackone_defender-0.6.2 → stackone_defender-0.7.0}/src/stackone_defender/core/tool_result_sanitizer.py +36 -3
  14. stackone_defender-0.7.0/src/stackone_defender/models/minilm-multihead-v5/classifier_config.json +47 -0
  15. {stackone_defender-0.6.2/src/stackone_defender/models/minilm-full-aug → stackone_defender-0.7.0/src/stackone_defender/models/minilm-multihead-v5}/model_quantized.onnx +0 -0
  16. stackone_defender-0.7.0/src/stackone_defender/sanitizers/encoding_detector.py +600 -0
  17. stackone_defender-0.7.0/src/stackone_defender/sanitizers/leet_normalizer.py +115 -0
  18. stackone_defender-0.7.0/src/stackone_defender/sanitizers/normalizer.py +166 -0
  19. {stackone_defender-0.6.2 → stackone_defender-0.7.0}/src/stackone_defender/sanitizers/sanitizer.py +50 -12
  20. {stackone_defender-0.6.2 → stackone_defender-0.7.0}/src/stackone_defender/types.py +50 -3
  21. {stackone_defender-0.6.2 → stackone_defender-0.7.0}/src/stackone_defender/utils/boundary.py +27 -5
  22. {stackone_defender-0.6.2 → stackone_defender-0.7.0}/tests/test_integration.py +186 -5
  23. {stackone_defender-0.6.2 → stackone_defender-0.7.0}/tests/test_onnx_classifier.py +131 -3
  24. {stackone_defender-0.6.2 → stackone_defender-0.7.0}/tests/test_pattern_detector.py +157 -3
  25. stackone_defender-0.7.0/tests/test_sanitizers.py +483 -0
  26. {stackone_defender-0.6.2 → stackone_defender-0.7.0}/tests/test_sfe.py +3 -2
  27. stackone_defender-0.7.0/tests/test_tier2_classifier.py +162 -0
  28. {stackone_defender-0.6.2 → stackone_defender-0.7.0}/uv.lock +1 -1
  29. stackone_defender-0.6.2/.release-please-manifest.json +0 -1
  30. stackone_defender-0.6.2/src/stackone_defender/classifiers/onnx_classifier.py +0 -148
  31. stackone_defender-0.6.2/src/stackone_defender/classifiers/tier2_classifier.py +0 -291
  32. stackone_defender-0.6.2/src/stackone_defender/core/prompt_defense.py +0 -313
  33. stackone_defender-0.6.2/src/stackone_defender/sanitizers/encoding_detector.py +0 -180
  34. stackone_defender-0.6.2/src/stackone_defender/sanitizers/normalizer.py +0 -94
  35. stackone_defender-0.6.2/tests/test_sanitizers.py +0 -257
  36. stackone_defender-0.6.2/tests/test_tier2_classifier.py +0 -63
  37. {stackone_defender-0.6.2 → stackone_defender-0.7.0}/.github/workflows/ci.yaml +0 -0
  38. {stackone_defender-0.6.2 → stackone_defender-0.7.0}/.github/workflows/release.yaml +0 -0
  39. {stackone_defender-0.6.2 → stackone_defender-0.7.0}/.gitignore +0 -0
  40. {stackone_defender-0.6.2 → stackone_defender-0.7.0}/.python-version +0 -0
  41. {stackone_defender-0.6.2 → stackone_defender-0.7.0}/.release-please-config.json +0 -0
  42. {stackone_defender-0.6.2 → stackone_defender-0.7.0}/models/minilm-full-aug/config.json +0 -0
  43. {stackone_defender-0.6.2 → stackone_defender-0.7.0}/models/minilm-full-aug/model_quantized.onnx +0 -0
  44. {stackone_defender-0.6.2 → stackone_defender-0.7.0}/models/minilm-full-aug/tokenizer.json +0 -0
  45. {stackone_defender-0.6.2 → stackone_defender-0.7.0}/models/minilm-full-aug/tokenizer_config.json +0 -0
  46. {stackone_defender-0.6.2 → stackone_defender-0.7.0}/src/stackone_defender/classifiers/__init__.py +0 -0
  47. {stackone_defender-0.6.2 → stackone_defender-0.7.0}/src/stackone_defender/core/__init__.py +0 -0
  48. {stackone_defender-0.6.2/src/stackone_defender/models/minilm-full-aug → stackone_defender-0.7.0/src/stackone_defender/models/minilm-multihead-v5}/config.json +0 -0
  49. {stackone_defender-0.6.2/src/stackone_defender/models/minilm-full-aug → stackone_defender-0.7.0/src/stackone_defender/models/minilm-multihead-v5}/tokenizer.json +0 -0
  50. {stackone_defender-0.6.2/src/stackone_defender/models/minilm-full-aug → stackone_defender-0.7.0/src/stackone_defender/models/minilm-multihead-v5}/tokenizer_config.json +0 -0
  51. {stackone_defender-0.6.2 → stackone_defender-0.7.0}/src/stackone_defender/sanitizers/__init__.py +0 -0
  52. {stackone_defender-0.6.2 → stackone_defender-0.7.0}/src/stackone_defender/sanitizers/pattern_remover.py +0 -0
  53. {stackone_defender-0.6.2 → stackone_defender-0.7.0}/src/stackone_defender/sanitizers/role_stripper.py +0 -0
  54. {stackone_defender-0.6.2 → stackone_defender-0.7.0}/src/stackone_defender/sfe/__init__.py +0 -0
  55. {stackone_defender-0.6.2 → stackone_defender-0.7.0}/src/stackone_defender/sfe/model.ftz +0 -0
  56. {stackone_defender-0.6.2 → stackone_defender-0.7.0}/src/stackone_defender/sfe/preprocess.py +0 -0
  57. {stackone_defender-0.6.2 → stackone_defender-0.7.0}/src/stackone_defender/utils/__init__.py +0 -0
  58. {stackone_defender-0.6.2 → stackone_defender-0.7.0}/src/stackone_defender/utils/field_detection.py +0 -0
  59. {stackone_defender-0.6.2 → stackone_defender-0.7.0}/src/stackone_defender/utils/structure.py +0 -0
  60. {stackone_defender-0.6.2 → stackone_defender-0.7.0}/tests/__init__.py +0 -0
  61. {stackone_defender-0.6.2 → stackone_defender-0.7.0}/tests/test_utils.py +0 -0
@@ -0,0 +1 @@
1
+ {".":"0.7.0"}
@@ -1,5 +1,38 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.7.0](https://github.com/StackOneHQ/stackone-defender/compare/stackone-defender-v0.6.3...stackone-defender-v0.7.0) (2026-05-29)
4
+
5
+
6
+ ### ⚠ BREAKING CHANGES
7
+
8
+ * The default ONNX model directory changed from minilm-full-aug to minilm-multihead-v5. Custom code that hardcoded the old path will no longer load.
9
+
10
+ ### Features
11
+
12
+ * parity with TS defender 0.7.0 ([75d046a](https://github.com/StackOneHQ/stackone-defender/commit/75d046ab45066ee1f973e91357f7ecb23dea50c8))
13
+
14
+ ## [0.6.3](https://github.com/StackOneHQ/stackone-defender/compare/stackone-defender-v0.6.2...stackone-defender-v0.6.3) (2026-05-26)
15
+
16
+
17
+ ### ⚠ BREAKING CHANGES
18
+
19
+ * When `tier2_fields` is unset, Tier 2 scans all strings (no fallback to Tier 1 risky_field_names).
20
+
21
+ ### Features
22
+
23
+ * align Python package with @stackone/defender 0.6.3 ([a91a904](https://github.com/StackOneHQ/stackone-defender/commit/a91a904de2a08a29479afb2cff31e8488468ebaf))
24
+
25
+
26
+ ### Bug Fixes
27
+
28
+ * **ENG-269:** Python parity with @stackone/defender 0.6.3 ([7c312f1](https://github.com/StackOneHQ/stackone-defender/commit/7c312f1d1c858b2f25b49043d783ce7294638b82))
29
+
30
+
31
+ ### Miscellaneous Chores
32
+
33
+ * prepare release 0.6.3 ([8ef9888](https://github.com/StackOneHQ/stackone-defender/commit/8ef9888752713ed5df76c4eed3e117605a8fb9e6))
34
+ * retrigger release workflow after gh actions outage ([72f586b](https://github.com/StackOneHQ/stackone-defender/commit/72f586bcb974b1aab08e7525253d9d8a9c8bc59d))
35
+
3
36
  ## [0.6.2](https://github.com/StackOneHQ/stackone-defender/compare/stackone-defender-v0.6.1...stackone-defender-v0.6.2) (2026-04-22)
4
37
 
5
38
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: stackone-defender
3
- Version: 0.6.2
3
+ Version: 0.7.0
4
4
  Summary: Indirect prompt injection defense for AI agents using tool calls
5
5
  Project-URL: Homepage, https://github.com/StackOneHQ/stackone-defender
6
6
  Project-URL: Repository, https://github.com/StackOneHQ/stackone-defender
@@ -120,7 +120,7 @@ else:
120
120
  - **Role stripping** — `SYSTEM:`, `ASSISTANT:`, `<system>`, `[INST]`, etc.
121
121
  - **Pattern removal** — phrases like “ignore previous instructions”
122
122
  - **Encoding detection** — suspicious Base64/URL-shaped payloads
123
- - **Boundary annotation** — `[UD-{id}]…[/UD-{id}]` wrappers around untrusted spans
123
+ - **Boundary annotation (opt-in)** — `[UD-{id}]…[/UD-{id}]` wrappers when `annotate_boundary=True` (npm: `annotateBoundary`). Use `generate_boundary_instructions` from the package root in prompts when you enable wrapping.
124
124
 
125
125
  ### Tier 2 — ML classification (ONNX)
126
126
 
@@ -132,8 +132,9 @@ Packed-chunk MiniLM classifier (int8 ONNX ~22 MB, bundled):
132
132
 
133
133
  ### Optional SFE preprocessor
134
134
 
135
- - `use_sfe=True` enables a field-level FastText pass before Tier 1/Tier 2
136
- - Drops metadata-like leaves (IDs, enum-like strings) and keeps user-facing content
135
+ - `use_sfe=True` runs a field-level FastText pass to build a **classifier-only** view of the payload
136
+ - **Tier 1** always sanitizes the **original** tool value; **`sanitized`** in `DefenseResult` is unchanged by SFE drops
137
+ - **Tier 2** extracts strings from the SFE-filtered tree; `fields_dropped` lists paths omitted from that extraction (not removed from `sanitized`)
137
138
  - Fails open if the runtime/model is unavailable: payload continues unfiltered
138
139
 
139
140
  **Benchmarks** (F1 @ threshold 0.5):
@@ -166,7 +167,8 @@ defense = create_prompt_defense(
166
167
  enable_tier2=True,
167
168
  block_high_risk=False,
168
169
  default_risk_level="medium",
169
- tier2_fields=["subject", "body", "snippet"], # optional: scope Tier 2 to these JSON keys
170
+ annotate_boundary=False, # True: wrap risky strings with [UD-…] tags (npm: annotateBoundary)
171
+ tier2_fields=["subject", "body", "snippet"], # optional: scope Tier 2 to these JSON keys (default: all strings)
170
172
  use_sfe=True, # optional: enable semantic field extractor preprocessing
171
173
  config={
172
174
  "tier2": {
@@ -179,7 +181,7 @@ defense = create_prompt_defense(
179
181
 
180
182
  ### `defense.defend_tool_result(value, tool_name)`
181
183
 
182
- Runs Tier 1 sanitization on risky fields, then Tier 2 on extracted text (with optional field scoping). **Synchronous** — no `await`.
184
+ Runs Tier 1 sanitization on risky fields of the **original** payload, then Tier 2 on strings from the SFE-filtered view when SFE is on (otherwise the full value). Optional `tier2_fields` restricts Tier 2 extraction to specific keys; omit it to classify **all** strings (matches `@stackone/defender` 0.6.3). **Synchronous** — no `await`.
183
185
 
184
186
  ```python
185
187
  from dataclasses import dataclass, field
@@ -94,7 +94,7 @@ else:
94
94
  - **Role stripping** — `SYSTEM:`, `ASSISTANT:`, `<system>`, `[INST]`, etc.
95
95
  - **Pattern removal** — phrases like “ignore previous instructions”
96
96
  - **Encoding detection** — suspicious Base64/URL-shaped payloads
97
- - **Boundary annotation** — `[UD-{id}]…[/UD-{id}]` wrappers around untrusted spans
97
+ - **Boundary annotation (opt-in)** — `[UD-{id}]…[/UD-{id}]` wrappers when `annotate_boundary=True` (npm: `annotateBoundary`). Use `generate_boundary_instructions` from the package root in prompts when you enable wrapping.
98
98
 
99
99
  ### Tier 2 — ML classification (ONNX)
100
100
 
@@ -106,8 +106,9 @@ Packed-chunk MiniLM classifier (int8 ONNX ~22 MB, bundled):
106
106
 
107
107
  ### Optional SFE preprocessor
108
108
 
109
- - `use_sfe=True` enables a field-level FastText pass before Tier 1/Tier 2
110
- - Drops metadata-like leaves (IDs, enum-like strings) and keeps user-facing content
109
+ - `use_sfe=True` runs a field-level FastText pass to build a **classifier-only** view of the payload
110
+ - **Tier 1** always sanitizes the **original** tool value; **`sanitized`** in `DefenseResult` is unchanged by SFE drops
111
+ - **Tier 2** extracts strings from the SFE-filtered tree; `fields_dropped` lists paths omitted from that extraction (not removed from `sanitized`)
111
112
  - Fails open if the runtime/model is unavailable: payload continues unfiltered
112
113
 
113
114
  **Benchmarks** (F1 @ threshold 0.5):
@@ -140,7 +141,8 @@ defense = create_prompt_defense(
140
141
  enable_tier2=True,
141
142
  block_high_risk=False,
142
143
  default_risk_level="medium",
143
- tier2_fields=["subject", "body", "snippet"], # optional: scope Tier 2 to these JSON keys
144
+ annotate_boundary=False, # True: wrap risky strings with [UD-…] tags (npm: annotateBoundary)
145
+ tier2_fields=["subject", "body", "snippet"], # optional: scope Tier 2 to these JSON keys (default: all strings)
144
146
  use_sfe=True, # optional: enable semantic field extractor preprocessing
145
147
  config={
146
148
  "tier2": {
@@ -153,7 +155,7 @@ defense = create_prompt_defense(
153
155
 
154
156
  ### `defense.defend_tool_result(value, tool_name)`
155
157
 
156
- Runs Tier 1 sanitization on risky fields, then Tier 2 on extracted text (with optional field scoping). **Synchronous** — no `await`.
158
+ Runs Tier 1 sanitization on risky fields of the **original** payload, then Tier 2 on strings from the SFE-filtered view when SFE is on (otherwise the full value). Optional `tier2_fields` restricts Tier 2 extraction to specific keys; omit it to classify **all** strings (matches `@stackone/defender` 0.6.3). **Synchronous** — no `await`.
157
159
 
158
160
  ```python
159
161
  from dataclasses import dataclass, field
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "stackone-defender"
3
- version = "0.6.2"
3
+ version = "0.7.0"
4
4
  description = "Indirect prompt injection defense for AI agents using tool calls"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.11"
@@ -11,6 +11,7 @@ Usage:
11
11
  print(f"Blocked: {result.risk_level}")
12
12
  """
13
13
 
14
+ from .classifiers.onnx_classifier import get_default_model_path
14
15
  from .core.prompt_defense import PromptDefense, create_prompt_defense
15
16
  from .sfe.preprocess import (
16
17
  DropDecision,
@@ -20,17 +21,22 @@ from .sfe.preprocess import (
20
21
  get_default_sfe_model_path,
21
22
  sfe_preprocess,
22
23
  )
23
- from .types import DefenseResult, RiskLevel, Tier1Result
24
+ from .types import DefenseResult, MultiheadConfig, RiskLevel, Tier1Result
25
+ from .utils.boundary import contains_boundary_patterns, generate_boundary_instructions
24
26
 
25
27
  __all__ = [
26
28
  "DefenseResult",
27
29
  "DropDecision",
30
+ "MultiheadConfig",
28
31
  "PromptDefense",
29
32
  "RiskLevel",
30
33
  "SfePredictor",
31
34
  "SfePreprocessResult",
32
35
  "Tier1Result",
36
+ "contains_boundary_patterns",
33
37
  "create_prompt_defense",
38
+ "generate_boundary_instructions",
39
+ "get_default_model_path",
34
40
  "get_default_predictor",
35
41
  "get_default_sfe_model_path",
36
42
  "sfe_preprocess",
@@ -0,0 +1,276 @@
1
+ """ONNX classifier for fine-tuned MiniLM prompt injection detection.
2
+
3
+ Pipeline: text -> tokenizer -> ONNX Runtime -> logit -> ``sigmoid(logit / T)``
4
+ -> score. Supports single-head ``[batch]`` / ``[batch, 1]`` models and
5
+ multi-head ``[batch, 2]`` models (main + aux). Temperature ``T`` enables
6
+ post-hoc calibration via temperature scaling.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import logging
12
+ import math
13
+ import threading
14
+ from pathlib import Path
15
+ from typing import Literal
16
+
17
+ _logger = logging.getLogger(__name__)
18
+
19
+ # Shared across all OnnxClassifier instances (keyed by resolved model dir path).
20
+ _session_cache: dict[str, tuple[object, object]] = {}
21
+ _registry_lock = threading.Lock()
22
+ _load_locks: dict[str, threading.Lock] = {}
23
+
24
+
25
+ def _lock_for_cache_key(cache_key: str) -> threading.Lock:
26
+ with _registry_lock:
27
+ if cache_key not in _load_locks:
28
+ _load_locks[cache_key] = threading.Lock()
29
+ return _load_locks[cache_key]
30
+
31
+
32
+ def get_default_model_path() -> str:
33
+ """Return the absolute path to the bundled ONNX model directory.
34
+
35
+ Exported so :class:`Tier2Classifier` can read model-specific calibration
36
+ defaults from ``classifier_config.json`` at construction time without
37
+ needing an :class:`OnnxClassifier` instance.
38
+ """
39
+ return str(Path(__file__).resolve().parent.parent / "models" / "minilm-multihead-v5")
40
+
41
+
42
+ # Back-compat shim retained for internal users; same value as the public name.
43
+ def _default_model_path() -> str:
44
+ return get_default_model_path()
45
+
46
+
47
+ def _sigmoid(x: float) -> float:
48
+ return 1.0 / (1.0 + math.exp(-x))
49
+
50
+
51
+ class OnnxClassifier:
52
+ """ONNX Classifier for fine-tuned MiniLM models.
53
+
54
+ Loads the model lazily on first inference. The session and tokenizer
55
+ are cached at module level so multiple instances pointing at the same
56
+ model path share a single backing session (safe: ONNX Runtime
57
+ guarantees thread-safe ``Run()`` from v1.7.0, and the ``tokenizers``
58
+ library's encode methods do not mutate the tokenizer object).
59
+ """
60
+
61
+ _MAX_BATCH_CHUNK = 32
62
+
63
+ def __init__(self, model_path: str | None = None, temperature_t: float | None = None):
64
+ self._model_path = model_path or get_default_model_path()
65
+ self._session = None
66
+ self._tokenizer = None
67
+ self._max_length = 256
68
+ self._load_failed = False
69
+ # Output mode is detected lazily from the logits shape on the first
70
+ # inference call. ``None`` until then.
71
+ self._output_mode: Literal["single", "multi"] | None = None
72
+ # Temperature ``T`` must be a positive finite number. ``T <= 0`` is
73
+ # undefined (divide-by-zero or sign flip) and almost certainly a
74
+ # programming error rather than a config the caller wants gracefully
75
+ # ignored.
76
+ self._temperature_t = 1.0
77
+ if temperature_t is not None:
78
+ if not math.isfinite(temperature_t) or temperature_t <= 0:
79
+ raise ValueError(
80
+ f"OnnxClassifier: temperature_t must be a positive finite number, got {temperature_t}"
81
+ )
82
+ self._temperature_t = float(temperature_t)
83
+
84
+ # ------------------------------------------------------------------
85
+ # Public introspection
86
+ # ------------------------------------------------------------------
87
+
88
+ def get_temperature(self) -> float:
89
+ """Current temperature scaling factor (``1.0`` = no calibration)."""
90
+ return self._temperature_t
91
+
92
+ def get_output_mode(self) -> Literal["single", "multi"] | None:
93
+ """Output mode of the loaded model.
94
+
95
+ ``None`` until the first inference runs. ``"multi"`` indicates the
96
+ model emits ``[batch, 2]`` logits (main + aux).
97
+ """
98
+ return self._output_mode
99
+
100
+ # ------------------------------------------------------------------
101
+ # Loading
102
+ # ------------------------------------------------------------------
103
+
104
+ def load_model(self, model_path: str | None = None) -> None:
105
+ if model_path:
106
+ self._model_path = model_path
107
+ if self._session is not None and self._tokenizer is not None:
108
+ return
109
+ if self._load_failed:
110
+ raise ImportError("ONNX dependencies not installed. Install with: pip install stackone-defender[onnx]")
111
+ self._load_model()
112
+
113
+ def _load_model(self) -> None:
114
+ cache_key = str(Path(self._model_path).resolve())
115
+ cached = _session_cache.get(cache_key)
116
+ if cached:
117
+ self._session, self._tokenizer = cached
118
+ return
119
+
120
+ with _lock_for_cache_key(cache_key):
121
+ cached = _session_cache.get(cache_key)
122
+ if cached:
123
+ self._session, self._tokenizer = cached
124
+ return
125
+
126
+ try:
127
+ import numpy as np # noqa: F401
128
+ import onnxruntime as ort
129
+ from tokenizers import Tokenizer
130
+ except ImportError as e:
131
+ self._load_failed = True
132
+ _logger.warning("[defender] ONNX model failed to load: %s", e)
133
+ raise ImportError(
134
+ "ONNX dependencies not installed. Install with: pip install stackone-defender[onnx]"
135
+ ) from e
136
+
137
+ try:
138
+ tokenizer_path = str(Path(self._model_path) / "tokenizer.json")
139
+ self._tokenizer = Tokenizer.from_file(tokenizer_path)
140
+ self._tokenizer.enable_truncation(max_length=self._max_length)
141
+ self._tokenizer.enable_padding(length=self._max_length)
142
+
143
+ onnx_path = str(Path(self._model_path) / "model_quantized.onnx")
144
+ self._session = ort.InferenceSession(onnx_path)
145
+ except Exception as e:
146
+ _logger.warning("[defender] ONNX model failed to load: %s", e)
147
+ raise
148
+
149
+ _session_cache[cache_key] = (self._session, self._tokenizer)
150
+
151
+ # ------------------------------------------------------------------
152
+ # Inference
153
+ # ------------------------------------------------------------------
154
+
155
+ def classify(self, text: str) -> float:
156
+ """Classify a single text, returning the main-head sigmoid score.
157
+
158
+ For multi-head models only the main score is returned; callers that
159
+ need the aux score should use :meth:`classify_pair`.
160
+ """
161
+ return self.classify_pair(text)[0]
162
+
163
+ def classify_pair(self, text: str) -> tuple[float, float | None]:
164
+ """Classify a single text, returning ``(main, aux)``.
165
+
166
+ ``aux`` is ``None`` for single-head models. Both scores are
167
+ sigmoid-activated with the configured temperature ``T``.
168
+ """
169
+ self._ensure_loaded()
170
+ import numpy as np
171
+
172
+ encoding = self._tokenizer.encode(text)
173
+ input_ids = np.array([encoding.ids], dtype=np.int64)
174
+ attention_mask = np.array([encoding.attention_mask], dtype=np.int64)
175
+
176
+ results = self._session.run(None, {"input_ids": input_ids, "attention_mask": attention_mask})
177
+ logits = results[0]
178
+ self._detect_output_mode(logits.shape)
179
+
180
+ t = self._temperature_t
181
+ row = logits[0]
182
+ # row shape: (), (1,) or (2,) depending on model export.
183
+ if self._output_mode == "multi":
184
+ main = _sigmoid(float(row[0]) / t)
185
+ aux = _sigmoid(float(row[1]) / t)
186
+ return main, aux
187
+ main_logit = float(row[0]) if hasattr(row, "__len__") and len(row) > 0 else float(row)
188
+ return _sigmoid(main_logit / t), None
189
+
190
+ def classify_batch(self, texts: list[str]) -> list[float]:
191
+ """Classify multiple texts; returns main-head scores only.
192
+
193
+ Back-compat wrapper around :meth:`classify_batch_pair`.
194
+ """
195
+ return [main for main, _ in self.classify_batch_pair(texts)]
196
+
197
+ def classify_batch_pair(self, texts: list[str]) -> list[tuple[float, float | None]]:
198
+ """Classify multiple texts, returning ``(main, aux)`` per row.
199
+
200
+ Aux is ``None`` per-row for single-head models. Chunks the input to
201
+ bound native memory; the attention matrix is ``O(chunk * seq_len^2)``,
202
+ and for MiniLM (``max_length=256``) a chunk of 32 keeps memory
203
+ under ~50MB per call.
204
+ """
205
+ if not texts:
206
+ return []
207
+ self._ensure_loaded()
208
+ all_pairs: list[tuple[float, float | None]] = []
209
+ for offset in range(0, len(texts), self._MAX_BATCH_CHUNK):
210
+ chunk = texts[offset : offset + self._MAX_BATCH_CHUNK]
211
+ all_pairs.extend(self._classify_batch_chunk_pair(chunk))
212
+ return all_pairs
213
+
214
+ def _classify_batch_chunk_pair(self, texts: list[str]) -> list[tuple[float, float | None]]:
215
+ import numpy as np
216
+
217
+ encodings = self._tokenizer.encode_batch(texts)
218
+ input_ids = np.array([e.ids for e in encodings], dtype=np.int64)
219
+ attention_mask = np.array([e.attention_mask for e in encodings], dtype=np.int64)
220
+
221
+ results = self._session.run(None, {"input_ids": input_ids, "attention_mask": attention_mask})
222
+ logits = results[0]
223
+ self._detect_output_mode(logits.shape)
224
+
225
+ t = self._temperature_t
226
+ pairs: list[tuple[float, float | None]] = []
227
+ if self._output_mode == "multi":
228
+ for i in range(len(texts)):
229
+ main = _sigmoid(float(logits[i][0]) / t)
230
+ aux = _sigmoid(float(logits[i][1]) / t)
231
+ pairs.append((main, aux))
232
+ else:
233
+ for i in range(len(texts)):
234
+ row = logits[i]
235
+ # ``row`` may be a scalar (shape ``[batch]``) or 1-vector.
236
+ main_logit = float(row[0]) if hasattr(row, "__len__") and len(row) > 0 else float(row)
237
+ pairs.append((_sigmoid(main_logit / t), None))
238
+ return pairs
239
+
240
+ def _detect_output_mode(self, dims) -> None:
241
+ """Detect output mode from the logits tensor shape on first inference.
242
+
243
+ - ``[batch]`` or ``[batch, 1]`` -> ``"single"``
244
+ - ``[batch, 2]`` -> ``"multi"`` (main + aux dual head)
245
+
246
+ Idempotent: subsequent calls are no-ops once mode is set.
247
+ """
248
+ if self._output_mode is not None:
249
+ return
250
+ if dims is None or len(dims) < 2:
251
+ self._output_mode = "single"
252
+ return
253
+ self._output_mode = "multi" if dims[1] == 2 else "single"
254
+
255
+ # ------------------------------------------------------------------
256
+ # Misc
257
+ # ------------------------------------------------------------------
258
+
259
+ def count_tokens(self, text: str) -> int:
260
+ self._ensure_loaded()
261
+ encoding = self._tokenizer.encode(text)
262
+ # Padding is enabled at a fixed length; count only real (attended) tokens.
263
+ return int(sum(encoding.attention_mask))
264
+
265
+ def get_max_length(self) -> int:
266
+ return self._max_length
267
+
268
+ def warmup(self) -> None:
269
+ self.load_model()
270
+
271
+ def is_loaded(self) -> bool:
272
+ return self._session is not None and self._tokenizer is not None
273
+
274
+ def _ensure_loaded(self) -> None:
275
+ if not self.is_loaded():
276
+ self.load_model()
@@ -9,7 +9,10 @@ from __future__ import annotations
9
9
  import math
10
10
  import re
11
11
  import time
12
+ import unicodedata
12
13
 
14
+ from ..sanitizers.leet_normalizer import normalize_leet_speak
15
+ from ..sanitizers.normalizer import normalize_unicode, normalize_whitespace, strip_combining_marks
13
16
  from ..types import PatternDefinition, PatternMatch, RiskLevel, StructuralFlag, Tier1Result
14
17
  from .patterns import ALL_PATTERNS, contains_filter_keywords
15
18
 
@@ -47,16 +50,83 @@ class PatternDetector:
47
50
  return self._empty_result(start)
48
51
 
49
52
  original_length = len(text)
50
- analysis_text = text[: self._max_analysis_length] if len(text) > self._max_analysis_length else text
53
+ raw_text = text[: self._max_analysis_length] if len(text) > self._max_analysis_length else text
54
+
55
+ # Normalisation chain: collapse obfuscation before injection pattern
56
+ # matching. Order matters:
57
+ # 1. NFD-decompose: precomposed accents become base + combining mark.
58
+ # 2. strip_combining_marks: Zalgo defense + accent stripping.
59
+ # 3. normalize_unicode: homoglyphs/fullwidth -> ASCII.
60
+ # 4. normalize_whitespace: collapse spaced letters + embedded newlines.
61
+ # 5. normalize_leet_speak: 1gn0r3 -> ignore.
62
+ # NFD-decomposition lives here (not in normalize_unicode) because it
63
+ # strips legitimate accents like ``café`` -> ``cafe`` -- fine for
64
+ # analysis but would be data loss if returned to callers. The result
65
+ # is analysis-only and never returned.
66
+ analysis_text = normalize_leet_speak(
67
+ normalize_whitespace(
68
+ normalize_unicode(strip_combining_marks(unicodedata.normalize("NFD", raw_text)))
69
+ )
70
+ )
51
71
 
72
+ # Fast filter: short-circuit if neither raw nor normalised text
73
+ # contains keywords. Raw text is checked to preserve detection of
74
+ # obfuscation patterns (e.g. invisible unicode, leet-speak variants)
75
+ # that are normalised away before injection patterns run. Disable the
76
+ # fast filter when custom patterns are provided -- callers may add
77
+ # patterns whose keywords aren't in the static list.
52
78
  should_use_fast_filter = self._use_fast_filter and not self._has_custom
53
- if should_use_fast_filter and not contains_filter_keywords(analysis_text):
54
- flags = self._detect_structural_issues(analysis_text, original_length)
79
+ raw_has_keywords = not should_use_fast_filter or contains_filter_keywords(raw_text)
80
+ norm_has_keywords = not should_use_fast_filter or contains_filter_keywords(analysis_text)
81
+
82
+ if not raw_has_keywords and not norm_has_keywords:
83
+ flags = self._detect_structural_issues(raw_text, original_length)
55
84
  return self._create_result([], flags, start)
56
85
 
57
- matches = self._detect_patterns(analysis_text)
58
- flags = self._detect_structural_issues(analysis_text, original_length)
59
- return self._create_result(matches, flags, start)
86
+ # Short-circuit: if normalisation produced no change, a single pass
87
+ # is sufficient and avoids doubling pattern work for plain-text input.
88
+ if raw_text == analysis_text:
89
+ matches = self._detect_patterns(raw_text) if raw_has_keywords else []
90
+ flags = self._detect_structural_issues(raw_text, original_length)
91
+ return self._create_result(matches, flags, start)
92
+
93
+ # Run patterns on raw text -- catches obfuscation-specific patterns
94
+ # (e.g. invisible_unicode, leetspeak_injection) that normalisation
95
+ # removes. Run whenever EITHER pass has keywords: if only the
96
+ # normalised text has keywords (pure leet-speak with no other
97
+ # fast-filter hits), we still want the raw pass to fire obfuscation
98
+ # patterns like leetspeak_injection.
99
+ raw_matches = (
100
+ self._detect_patterns(raw_text) if (raw_has_keywords or norm_has_keywords) else []
101
+ )
102
+
103
+ # Run patterns on normalised text -- catches injection patterns
104
+ # hidden behind leet-speak, whitespace, or homoglyph obfuscation.
105
+ # Matches are tagged ``normalised=True`` because their
106
+ # position/matched values reference the transformed text.
107
+ norm_matches_raw = self._detect_patterns(analysis_text) if norm_has_keywords else []
108
+ norm_matches = [
109
+ PatternMatch(
110
+ pattern=m.pattern,
111
+ matched=m.matched,
112
+ position=m.position,
113
+ category=m.category,
114
+ severity=m.severity,
115
+ normalised=True,
116
+ )
117
+ for m in norm_matches_raw
118
+ ]
119
+
120
+ # Merge: normalised matches take priority. Raw-only matches are
121
+ # appended for patterns that fired on the original text but not the
122
+ # normalised form (e.g. obfuscation-detection patterns that match the
123
+ # raw encoding characters).
124
+ seen_patterns = {m.pattern for m in norm_matches}
125
+ merged_matches: list[PatternMatch] = [*norm_matches]
126
+ merged_matches.extend(m for m in raw_matches if m.pattern not in seen_patterns)
127
+
128
+ flags = self._detect_structural_issues(raw_text, original_length)
129
+ return self._create_result(merged_matches, flags, start)
60
130
 
61
131
  # ------------------------------------------------------------------
62
132
  # Pattern detection
@@ -65,7 +135,6 @@ class PatternDetector:
65
135
  def _detect_patterns(self, text: str) -> list[PatternMatch]:
66
136
  matches: list[PatternMatch] = []
67
137
  for defn in self._patterns:
68
- # Use finditer for all patterns (handles global-like behavior)
69
138
  for m in defn.pattern.finditer(text):
70
139
  matches.append(
71
140
  PatternMatch(