stackone-defender 0.1.2__tar.gz → 0.6.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. stackone_defender-0.6.2/.release-please-manifest.json +1 -0
  2. stackone_defender-0.6.2/CHANGELOG.md +117 -0
  3. {stackone_defender-0.1.2 → stackone_defender-0.6.2}/PKG-INFO +26 -4
  4. {stackone_defender-0.1.2 → stackone_defender-0.6.2}/README.md +23 -3
  5. {stackone_defender-0.1.2 → stackone_defender-0.6.2}/pyproject.toml +5 -1
  6. {stackone_defender-0.1.2 → stackone_defender-0.6.2}/src/stackone_defender/__init__.py +14 -0
  7. {stackone_defender-0.1.2 → stackone_defender-0.6.2}/src/stackone_defender/classifiers/onnx_classifier.py +19 -1
  8. stackone_defender-0.6.2/src/stackone_defender/classifiers/tier2_classifier.py +291 -0
  9. {stackone_defender-0.1.2 → stackone_defender-0.6.2}/src/stackone_defender/config.py +46 -4
  10. stackone_defender-0.6.2/src/stackone_defender/core/prompt_defense.py +313 -0
  11. {stackone_defender-0.1.2 → stackone_defender-0.6.2}/src/stackone_defender/core/tool_result_sanitizer.py +60 -20
  12. stackone_defender-0.6.2/src/stackone_defender/models/minilm-full-aug/config.json +30 -0
  13. {stackone_defender-0.1.2 → stackone_defender-0.6.2}/src/stackone_defender/models/minilm-full-aug/model_quantized.onnx +0 -0
  14. stackone_defender-0.6.2/src/stackone_defender/models/minilm-full-aug/tokenizer.json +30686 -0
  15. stackone_defender-0.6.2/src/stackone_defender/models/minilm-full-aug/tokenizer_config.json +23 -0
  16. stackone_defender-0.6.2/src/stackone_defender/sfe/__init__.py +19 -0
  17. stackone_defender-0.6.2/src/stackone_defender/sfe/model.ftz +0 -0
  18. stackone_defender-0.6.2/src/stackone_defender/sfe/preprocess.py +232 -0
  19. {stackone_defender-0.1.2 → stackone_defender-0.6.2}/src/stackone_defender/types.py +23 -4
  20. {stackone_defender-0.1.2 → stackone_defender-0.6.2}/tests/test_integration.py +40 -31
  21. {stackone_defender-0.1.2 → stackone_defender-0.6.2}/tests/test_onnx_classifier.py +24 -0
  22. stackone_defender-0.6.2/tests/test_sfe.py +43 -0
  23. {stackone_defender-0.1.2 → stackone_defender-0.6.2}/tests/test_tier2_classifier.py +19 -0
  24. {stackone_defender-0.1.2 → stackone_defender-0.6.2}/uv.lock +30 -2
  25. stackone_defender-0.1.2/.release-please-manifest.json +0 -1
  26. stackone_defender-0.1.2/CHANGELOG.md +0 -43
  27. stackone_defender-0.1.2/src/stackone_defender/classifiers/tier2_classifier.py +0 -173
  28. stackone_defender-0.1.2/src/stackone_defender/core/prompt_defense.py +0 -202
  29. stackone_defender-0.1.2/src/stackone_defender/models/minilm-full-aug/config.json +0 -28
  30. stackone_defender-0.1.2/src/stackone_defender/models/minilm-full-aug/tokenizer.json +0 -30678
  31. stackone_defender-0.1.2/src/stackone_defender/models/minilm-full-aug/tokenizer_config.json +0 -16
  32. {stackone_defender-0.1.2 → stackone_defender-0.6.2}/.github/workflows/ci.yaml +0 -0
  33. {stackone_defender-0.1.2 → stackone_defender-0.6.2}/.github/workflows/release.yaml +0 -0
  34. {stackone_defender-0.1.2 → stackone_defender-0.6.2}/.gitignore +0 -0
  35. {stackone_defender-0.1.2 → stackone_defender-0.6.2}/.python-version +0 -0
  36. {stackone_defender-0.1.2 → stackone_defender-0.6.2}/.release-please-config.json +0 -0
  37. {stackone_defender-0.1.2 → stackone_defender-0.6.2}/models/minilm-full-aug/config.json +0 -0
  38. {stackone_defender-0.1.2 → stackone_defender-0.6.2}/models/minilm-full-aug/model_quantized.onnx +0 -0
  39. {stackone_defender-0.1.2 → stackone_defender-0.6.2}/models/minilm-full-aug/tokenizer.json +0 -0
  40. {stackone_defender-0.1.2 → stackone_defender-0.6.2}/models/minilm-full-aug/tokenizer_config.json +0 -0
  41. {stackone_defender-0.1.2 → stackone_defender-0.6.2}/src/stackone_defender/classifiers/__init__.py +0 -0
  42. {stackone_defender-0.1.2 → stackone_defender-0.6.2}/src/stackone_defender/classifiers/pattern_detector.py +0 -0
  43. {stackone_defender-0.1.2 → stackone_defender-0.6.2}/src/stackone_defender/classifiers/patterns.py +0 -0
  44. {stackone_defender-0.1.2 → stackone_defender-0.6.2}/src/stackone_defender/core/__init__.py +0 -0
  45. {stackone_defender-0.1.2 → stackone_defender-0.6.2}/src/stackone_defender/sanitizers/__init__.py +0 -0
  46. {stackone_defender-0.1.2 → stackone_defender-0.6.2}/src/stackone_defender/sanitizers/encoding_detector.py +0 -0
  47. {stackone_defender-0.1.2 → stackone_defender-0.6.2}/src/stackone_defender/sanitizers/normalizer.py +0 -0
  48. {stackone_defender-0.1.2 → stackone_defender-0.6.2}/src/stackone_defender/sanitizers/pattern_remover.py +0 -0
  49. {stackone_defender-0.1.2 → stackone_defender-0.6.2}/src/stackone_defender/sanitizers/role_stripper.py +0 -0
  50. {stackone_defender-0.1.2 → stackone_defender-0.6.2}/src/stackone_defender/sanitizers/sanitizer.py +0 -0
  51. {stackone_defender-0.1.2 → stackone_defender-0.6.2}/src/stackone_defender/utils/__init__.py +0 -0
  52. {stackone_defender-0.1.2 → stackone_defender-0.6.2}/src/stackone_defender/utils/boundary.py +0 -0
  53. {stackone_defender-0.1.2 → stackone_defender-0.6.2}/src/stackone_defender/utils/field_detection.py +0 -0
  54. {stackone_defender-0.1.2 → stackone_defender-0.6.2}/src/stackone_defender/utils/structure.py +0 -0
  55. {stackone_defender-0.1.2 → stackone_defender-0.6.2}/tests/__init__.py +0 -0
  56. {stackone_defender-0.1.2 → stackone_defender-0.6.2}/tests/test_pattern_detector.py +0 -0
  57. {stackone_defender-0.1.2 → stackone_defender-0.6.2}/tests/test_sanitizers.py +0 -0
  58. {stackone_defender-0.1.2 → stackone_defender-0.6.2}/tests/test_utils.py +0 -0
@@ -0,0 +1 @@
1
+ {".":"0.6.2"}
@@ -0,0 +1,117 @@
1
+ # Changelog
2
+
3
+ ## [0.6.2](https://github.com/StackOneHQ/stackone-defender/compare/stackone-defender-v0.6.1...stackone-defender-v0.6.2) (2026-04-22)
4
+
5
+
6
+ ### ⚠ BREAKING CHANGES
7
+
8
+ * Drop ToolSanitizationRule, config/sanitizer tool_rules, use_default_tool_rules, and get_tool_rule/should_skip_field. Matches @stackone/defender post ENG-12594.
9
+
10
+ ### Features
11
+
12
+ * add missing functions for full TS API parity ([aec0c5b](https://github.com/StackOneHQ/stackone-defender/commit/aec0c5b8d31715df7e4ec2e4d306b55d595bb1c3))
13
+ * add PyPI publishing setup with Release Please CI ([2e28373](https://github.com/StackOneHQ/stackone-defender/commit/2e28373a27315dbb5e7deb23621977fe7fa2f7bc))
14
+ * add tier2_fields filter and export ToolSanitizationRule ([cb7fd93](https://github.com/StackOneHQ/stackone-defender/commit/cb7fd93fb88a30f40edc171ef3fcdc5d6ce2534d))
15
+ * align Python defender with Node (Tier 2 scoping, ONNX cache) ([482bfdd](https://github.com/StackOneHQ/stackone-defender/commit/482bfdda59b4617a75bc261621984cc321d28989))
16
+ * **ENG-12402:** add PyPI publishing setup with Release Please CI ([f979748](https://github.com/StackOneHQ/stackone-defender/commit/f979748a8a3b2084ea241c352866adcfcd0145ea))
17
+ * **ENG-12699:** TypeScript parity and synced ONNX bundle ([0449800](https://github.com/StackOneHQ/stackone-defender/commit/0449800fc2375c89ef231f5671f9a74bd84d3388))
18
+ * port stackone-defender from TypeScript to Python ([e3ff70d](https://github.com/StackOneHQ/stackone-defender/commit/e3ff70dd6a0bc94578dc4dbfde87c5d75f00b7b8))
19
+ * remove tool rules; batch Tier2 ONNX; lock ONNX load ([26c95c2](https://github.com/StackOneHQ/stackone-defender/commit/26c95c257175c892ae4be82ab7c17a099c1b6c6e))
20
+ * **sanitizer:** remove dead use_tier2_classification from ToolResultSanitizer ([4646179](https://github.com/StackOneHQ/stackone-defender/commit/46461798fcf5acc6ac6e23bc65177c35d9353d9c))
21
+ * sync Python package with TypeScript parity ([e1836dd](https://github.com/StackOneHQ/stackone-defender/commit/e1836dd967ad23997983ef1607118d1a25807e1c))
22
+ * upgrade ML classifier to jbv2 model (AgentShield 73.7 → 79.8) ([bcd27f8](https://github.com/StackOneHQ/stackone-defender/commit/bcd27f8abf954700276249f9b03de34f733c67c4))
23
+ * upgrade ML classifier to jbv5 (AgentShield 79.8 → 81.1) ([781dd10](https://github.com/StackOneHQ/stackone-defender/commit/781dd1007e7a0db03d58619a23b69f1b5d73e85d))
24
+
25
+
26
+ ### Bug Fixes
27
+
28
+ * address Copilot/cubic review (Tier2 scope, tokens, SFE, thresholds) ([bf173ac](https://github.com/StackOneHQ/stackone-defender/commit/bf173ac42f6aaa7513ea2a1fc19083806a5c5ee1))
29
+ * **ci:** avoid fasttext-wheel on Python 3.13 ([a6cda76](https://github.com/StackOneHQ/stackone-defender/commit/a6cda76894e3cd240c4f104e701e3202babb2682))
30
+ * **classifier:** surface classification errors in classify_by_sentence skip_reason ([bd94639](https://github.com/StackOneHQ/stackone-defender/commit/bd9463978dac5572f999d8ec3ed1adbaf0bb97f2))
31
+ * default enable_tier2 to True to match TypeScript SDK behaviour ([d66773b](https://github.com/StackOneHQ/stackone-defender/commit/d66773bee026517d09dd56b9311dd3c281c6f675))
32
+ * **defender:** fix _extract_strings filtering, None checks, and cache ONNX load failure ([bf4ce99](https://github.com/StackOneHQ/stackone-defender/commit/bf4ce993287db9e067b661100b5bd92cc21aef6b))
33
+ * **defender:** sync hasThreats blocking logic and tool rules precedence from JS package ([a217c3e](https://github.com/StackOneHQ/stackone-defender/commit/a217c3ef27aa0e4d92f21571bf0559ff9906f660))
34
+ * enable tier2 by default to match TypeScript package ([f1fe990](https://github.com/StackOneHQ/stackone-defender/commit/f1fe990e1a81c32cb271f6ca85cc063f3da49223))
35
+ * sync Python with TypeScript parity ([cec0813](https://github.com/StackOneHQ/stackone-defender/commit/cec0813ff8cc98f4502d5916d285a28877983d98))
36
+ * **tier2:** apply max_text_length truncation in classify_by_sentence ([a67d2c6](https://github.com/StackOneHQ/stackone-defender/commit/a67d2c6524fb1d6b4f9331f547f28221867038de))
37
+ * upgrade ML classifier to jbv2 (AgentShield 73.7 → 79.8) ([b452b39](https://github.com/StackOneHQ/stackone-defender/commit/b452b39c718329355f50c418bd50c37da2ed8698))
38
+ * upgrade ML classifier to jbv2 (AgentShield 73.7 → 79.8) ([ccb1204](https://github.com/StackOneHQ/stackone-defender/commit/ccb1204d5e3d9763bb916d71bb49b75039ceb197))
39
+ * use uv instead of pip in README installation instructions ([519759f](https://github.com/StackOneHQ/stackone-defender/commit/519759f09c6fc1eb6bf97f53ad0cbd25c78e2893))
40
+
41
+
42
+ ### Dependencies
43
+
44
+ * **sfe:** switch optional FastText bindings to fasttext-ng ([bc9cc28](https://github.com/StackOneHQ/stackone-defender/commit/bc9cc283bc2da9f10472415d4aa94a0df083ec3d))
45
+
46
+
47
+ ### Documentation
48
+
49
+ * add README adapted from TypeScript package ([a03c757](https://github.com/StackOneHQ/stackone-defender/commit/a03c757a1760b797d9a3ef444950e2839ca1c52d))
50
+ * update README — enable_tier2 defaults to True ([af0d059](https://github.com/StackOneHQ/stackone-defender/commit/af0d05957e39a83b7e6e18b1f78b95219b14a4f5))
51
+ * update README to reflect changes in package name and Python version ([d2fc2ca](https://github.com/StackOneHQ/stackone-defender/commit/d2fc2ca1900e2f6410df2ec075c5a8a1c3ac241b))
52
+
53
+
54
+ ### Miscellaneous Chores
55
+
56
+ * prepare patch release 0.6.2 ([7b3c105](https://github.com/StackOneHQ/stackone-defender/commit/7b3c105b2ce23f88f284d72e41c1917aefdc4537))
57
+
58
+ ## [0.6.1](https://github.com/StackOneHQ/stackone-defender/compare/stackone-defender-v0.1.2...stackone-defender-v0.6.1) (2026-04-21)
59
+
60
+ ### Features
61
+
62
+ * align Python package behavior with `@stackone/defender` 0.6.1
63
+ * add SFE preprocessing support (`use_sfe`) with fail-open optional runtime loading
64
+ * add packed-chunk Tier 2 batching and density-adjusted scoring
65
+ * add dangerous-key traversal hardening (`__proto__`, `constructor`, `prototype`)
66
+ * add cumulative-risk fractional thresholds to reduce list-response false positives
67
+
68
+ ### Bug Fixes
69
+
70
+ * use `fasttext-ng` instead of `fasttext-wheel` for the `[sfe]` extra and dev tests so Python 3.13 CI can install maintained FastText bindings (NumPy 2.3+).
71
+
72
+ ### Breaking Changes
73
+
74
+ * Python package version jumps from `0.1.2` to `0.6.1` to align release train with TypeScript parity.
75
+ * `DefenseResult` now includes `fields_dropped` and `truncated_at_depth`.
76
+
77
+ ## [0.1.2](https://github.com/StackOneHQ/stackone-defender/compare/stackone-defender-v0.1.1...stackone-defender-v0.1.2) (2026-04-08)
78
+
79
+
80
+ ### Bug Fixes
81
+
82
+ * upgrade ML classifier to jbv2 (AgentShield 73.7 → 79.8) ([b452b39](https://github.com/StackOneHQ/stackone-defender/commit/b452b39c718329355f50c418bd50c37da2ed8698))
83
+
84
+
85
+ ### Documentation
86
+
87
+ * update README to reflect changes in package name and Python version ([d2fc2ca](https://github.com/StackOneHQ/stackone-defender/commit/d2fc2ca1900e2f6410df2ec075c5a8a1c3ac241b))
88
+
89
+ ## [0.1.1](https://github.com/StackOneHQ/stackone-defender/compare/stackone-defender-v0.1.0...stackone-defender-v0.1.1) (2026-04-08)
90
+
91
+
92
+ ### Features
93
+
94
+ * add missing functions for full TS API parity ([aec0c5b](https://github.com/StackOneHQ/stackone-defender/commit/aec0c5b8d31715df7e4ec2e4d306b55d595bb1c3))
95
+ * add PyPI publishing setup with Release Please CI ([2e28373](https://github.com/StackOneHQ/stackone-defender/commit/2e28373a27315dbb5e7deb23621977fe7fa2f7bc))
96
+ * add tier2_fields filter and export ToolSanitizationRule ([cb7fd93](https://github.com/StackOneHQ/stackone-defender/commit/cb7fd93fb88a30f40edc171ef3fcdc5d6ce2534d))
97
+ * **ENG-12402:** add PyPI publishing setup with Release Please CI ([f979748](https://github.com/StackOneHQ/stackone-defender/commit/f979748a8a3b2084ea241c352866adcfcd0145ea))
98
+ * port stackone-defender from TypeScript to Python ([e3ff70d](https://github.com/StackOneHQ/stackone-defender/commit/e3ff70dd6a0bc94578dc4dbfde87c5d75f00b7b8))
99
+ * **sanitizer:** remove dead use_tier2_classification from ToolResultSanitizer ([4646179](https://github.com/StackOneHQ/stackone-defender/commit/46461798fcf5acc6ac6e23bc65177c35d9353d9c))
100
+ * sync Python package with TypeScript parity ([e1836dd](https://github.com/StackOneHQ/stackone-defender/commit/e1836dd967ad23997983ef1607118d1a25807e1c))
101
+
102
+
103
+ ### Bug Fixes
104
+
105
+ * **classifier:** surface classification errors in classify_by_sentence skip_reason ([bd94639](https://github.com/StackOneHQ/stackone-defender/commit/bd9463978dac5572f999d8ec3ed1adbaf0bb97f2))
106
+ * **defender:** fix _extract_strings filtering, None checks, and cache ONNX load failure ([bf4ce99](https://github.com/StackOneHQ/stackone-defender/commit/bf4ce993287db9e067b661100b5bd92cc21aef6b))
107
+ * **defender:** sync hasThreats blocking logic and tool rules precedence from JS package ([a217c3e](https://github.com/StackOneHQ/stackone-defender/commit/a217c3ef27aa0e4d92f21571bf0559ff9906f660))
108
+ * enable tier2 by default to match TypeScript package ([f1fe990](https://github.com/StackOneHQ/stackone-defender/commit/f1fe990e1a81c32cb271f6ca85cc063f3da49223))
109
+ * sync Python with TypeScript parity ([cec0813](https://github.com/StackOneHQ/stackone-defender/commit/cec0813ff8cc98f4502d5916d285a28877983d98))
110
+ * use uv instead of pip in README installation instructions ([519759f](https://github.com/StackOneHQ/stackone-defender/commit/519759f09c6fc1eb6bf97f53ad0cbd25c78e2893))
111
+
112
+
113
+ ### Documentation
114
+
115
+ * add README adapted from TypeScript package ([a03c757](https://github.com/StackOneHQ/stackone-defender/commit/a03c757a1760b797d9a3ef444950e2839ca1c52d))
116
+
117
+ ## Changelog
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: stackone-defender
3
- Version: 0.1.2
3
+ Version: 0.6.2
4
4
  Summary: Indirect prompt injection defense for AI agents using tool calls
5
5
  Project-URL: Homepage, https://github.com/StackOneHQ/stackone-defender
6
6
  Project-URL: Repository, https://github.com/StackOneHQ/stackone-defender
@@ -20,6 +20,8 @@ Provides-Extra: onnx
20
20
  Requires-Dist: numpy>=1.24.0; extra == 'onnx'
21
21
  Requires-Dist: onnxruntime>=1.16.0; extra == 'onnx'
22
22
  Requires-Dist: tokenizers>=0.15.0; extra == 'onnx'
23
+ Provides-Extra: sfe
24
+ Requires-Dist: fasttext-ng>=0.9.3; extra == 'sfe'
23
25
  Description-Content-Type: text/markdown
24
26
 
25
27
  <div align="center">
@@ -74,6 +76,15 @@ pip install stackone-defender[onnx]
74
76
 
75
77
  The ONNX model (~22MB) is bundled in the wheel — no extra downloads at runtime.
76
78
 
79
+ **SFE preprocessor (optional)** — add extras:
80
+
81
+ ```bash
82
+ pip install stackone-defender[sfe]
83
+ # or: uv add "stackone-defender[sfe]"
84
+ ```
85
+
86
+ The `[sfe]` extra installs [`fasttext-ng`](https://pypi.org/project/fasttext-ng/) (provides the `fasttext` module). It requires **NumPy 2.3+**. PyPI may ship a wheel only for some platforms; otherwise pip/uv builds from source (needs a C++ toolchain).
87
+
77
88
  ## Quick start
78
89
 
79
90
  ```python
@@ -113,11 +124,17 @@ else:
113
124
 
114
125
  ### Tier 2 — ML classification (ONNX)
115
126
 
116
- Sentence-level MiniLM classifier (int8 ONNX ~22 MB, bundled):
127
+ Packed-chunk MiniLM classifier (int8 ONNX ~22 MB, bundled):
117
128
 
118
- - Split text into sentences, score each (0.0 = benign, 1.0 = injection-like), take the max
129
+ - Split text into sentences, pack to model-sized chunks, score chunks in batched ONNX calls
119
130
  - Catches paraphrased or novel injections missed by regex
120
- - Roughly ~10 ms per batch after warmup (CPU)
131
+ - Uses chunked batch inference to bound memory on large payloads
132
+
133
+ ### Optional SFE preprocessor
134
+
135
+ - `use_sfe=True` enables a field-level FastText pass before Tier 1/Tier 2
136
+ - Drops metadata-like leaves (IDs, enum-like strings) and keeps user-facing content
137
+ - Fails open if the runtime/model is unavailable: payload continues unfiltered
121
138
 
122
139
  **Benchmarks** (F1 @ threshold 0.5):
123
140
 
@@ -150,6 +167,7 @@ defense = create_prompt_defense(
150
167
  block_high_risk=False,
151
168
  default_risk_level="medium",
152
169
  tier2_fields=["subject", "body", "snippet"], # optional: scope Tier 2 to these JSON keys
170
+ use_sfe=True, # optional: enable semantic field extractor preprocessing
153
171
  config={
154
172
  "tier2": {
155
173
  "high_risk_threshold": 0.8,
@@ -164,6 +182,8 @@ defense = create_prompt_defense(
164
182
  Runs Tier 1 sanitization on risky fields, then Tier 2 on extracted text (with optional field scoping). **Synchronous** — no `await`.
165
183
 
166
184
  ```python
185
+ from dataclasses import dataclass, field
186
+
167
187
  @dataclass
168
188
  class DefenseResult:
169
189
  allowed: bool
@@ -175,6 +195,8 @@ class DefenseResult:
175
195
  tier2_score: float | None = None
176
196
  tier2_skip_reason: str | None = None
177
197
  max_sentence: str | None = None
198
+ fields_dropped: list[str] = field(default_factory=list)
199
+ truncated_at_depth: bool | None = None
178
200
  latency_ms: float = 0.0
179
201
  ```
180
202
 
@@ -50,6 +50,15 @@ pip install stackone-defender[onnx]
50
50
 
51
51
  The ONNX model (~22MB) is bundled in the wheel — no extra downloads at runtime.
52
52
 
53
+ **SFE preprocessor (optional)** — add extras:
54
+
55
+ ```bash
56
+ pip install stackone-defender[sfe]
57
+ # or: uv add "stackone-defender[sfe]"
58
+ ```
59
+
60
+ The `[sfe]` extra installs [`fasttext-ng`](https://pypi.org/project/fasttext-ng/) (provides the `fasttext` module). It requires **NumPy 2.3+**. PyPI may ship a wheel only for some platforms; otherwise pip/uv builds from source (needs a C++ toolchain).
61
+
53
62
  ## Quick start
54
63
 
55
64
  ```python
@@ -89,11 +98,17 @@ else:
89
98
 
90
99
  ### Tier 2 — ML classification (ONNX)
91
100
 
92
- Sentence-level MiniLM classifier (int8 ONNX ~22 MB, bundled):
101
+ Packed-chunk MiniLM classifier (int8 ONNX ~22 MB, bundled):
93
102
 
94
- - Split text into sentences, score each (0.0 = benign, 1.0 = injection-like), take the max
103
+ - Split text into sentences, pack to model-sized chunks, score chunks in batched ONNX calls
95
104
  - Catches paraphrased or novel injections missed by regex
96
- - Roughly ~10 ms per batch after warmup (CPU)
105
+ - Uses chunked batch inference to bound memory on large payloads
106
+
107
+ ### Optional SFE preprocessor
108
+
109
+ - `use_sfe=True` enables a field-level FastText pass before Tier 1/Tier 2
110
+ - Drops metadata-like leaves (IDs, enum-like strings) and keeps user-facing content
111
+ - Fails open if the runtime/model is unavailable: payload continues unfiltered
97
112
 
98
113
  **Benchmarks** (F1 @ threshold 0.5):
99
114
 
@@ -126,6 +141,7 @@ defense = create_prompt_defense(
126
141
  block_high_risk=False,
127
142
  default_risk_level="medium",
128
143
  tier2_fields=["subject", "body", "snippet"], # optional: scope Tier 2 to these JSON keys
144
+ use_sfe=True, # optional: enable semantic field extractor preprocessing
129
145
  config={
130
146
  "tier2": {
131
147
  "high_risk_threshold": 0.8,
@@ -140,6 +156,8 @@ defense = create_prompt_defense(
140
156
  Runs Tier 1 sanitization on risky fields, then Tier 2 on extracted text (with optional field scoping). **Synchronous** — no `await`.
141
157
 
142
158
  ```python
159
+ from dataclasses import dataclass, field
160
+
143
161
  @dataclass
144
162
  class DefenseResult:
145
163
  allowed: bool
@@ -151,6 +169,8 @@ class DefenseResult:
151
169
  tier2_score: float | None = None
152
170
  tier2_skip_reason: str | None = None
153
171
  max_sentence: str | None = None
172
+ fields_dropped: list[str] = field(default_factory=list)
173
+ truncated_at_depth: bool | None = None
154
174
  latency_ms: float = 0.0
155
175
  ```
156
176
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "stackone-defender"
3
- version = "0.1.2"
3
+ version = "0.6.2"
4
4
  description = "Indirect prompt injection defense for AI agents using tool calls"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.11"
@@ -25,6 +25,9 @@ Repository = "https://github.com/StackOneHQ/stackone-defender"
25
25
 
26
26
  [project.optional-dependencies]
27
27
  onnx = ["onnxruntime>=1.16.0", "tokenizers>=0.15.0", "numpy>=1.24.0"]
28
+ # fasttext-ng provides the `fasttext` module (maintained bindings; supports 3.13).
29
+ # Pulls numpy>=2.3; SFE still fail-opens when import/load fails.
30
+ sfe = ["fasttext-ng>=0.9.3"]
28
31
 
29
32
  [dependency-groups]
30
33
  dev = [
@@ -32,6 +35,7 @@ dev = [
32
35
  "onnxruntime>=1.16.0",
33
36
  "tokenizers>=0.15.0",
34
37
  "numpy>=1.24.0",
38
+ "fasttext-ng>=0.9.3",
35
39
  ]
36
40
 
37
41
  [build-system]
@@ -12,12 +12,26 @@ Usage:
12
12
  """
13
13
 
14
14
  from .core.prompt_defense import PromptDefense, create_prompt_defense
15
+ from .sfe.preprocess import (
16
+ DropDecision,
17
+ SfePredictor,
18
+ SfePreprocessResult,
19
+ get_default_predictor,
20
+ get_default_sfe_model_path,
21
+ sfe_preprocess,
22
+ )
15
23
  from .types import DefenseResult, RiskLevel, Tier1Result
16
24
 
17
25
  __all__ = [
18
26
  "DefenseResult",
27
+ "DropDecision",
19
28
  "PromptDefense",
20
29
  "RiskLevel",
30
+ "SfePredictor",
31
+ "SfePreprocessResult",
21
32
  "Tier1Result",
22
33
  "create_prompt_defense",
34
+ "get_default_predictor",
35
+ "get_default_sfe_model_path",
36
+ "sfe_preprocess",
23
37
  ]
@@ -37,6 +37,8 @@ def _sigmoid(x: float) -> float:
37
37
  class OnnxClassifier:
38
38
  """ONNX Classifier for fine-tuned MiniLM models."""
39
39
 
40
+ _MAX_BATCH_CHUNK = 32
41
+
40
42
  def __init__(self, model_path: str | None = None):
41
43
  self._model_path = model_path or _default_model_path()
42
44
  self._session = None
@@ -105,10 +107,17 @@ class OnnxClassifier:
105
107
  return _sigmoid(logit)
106
108
 
107
109
  def classify_batch(self, texts: list[str]) -> list[float]:
108
- """Classify multiple texts in batch."""
110
+ """Classify multiple texts in batch, bounded by chunk size."""
109
111
  if not texts:
110
112
  return []
111
113
  self._ensure_loaded()
114
+ all_scores: list[float] = []
115
+ for offset in range(0, len(texts), self._MAX_BATCH_CHUNK):
116
+ chunk = texts[offset: offset + self._MAX_BATCH_CHUNK]
117
+ all_scores.extend(self._classify_batch_chunk(chunk))
118
+ return all_scores
119
+
120
+ def _classify_batch_chunk(self, texts: list[str]) -> list[float]:
112
121
  import numpy as np
113
122
 
114
123
  encodings = self._tokenizer.encode_batch(texts)
@@ -119,6 +128,15 @@ class OnnxClassifier:
119
128
  logits = results[0]
120
129
  return [_sigmoid(float(logits[i][0])) for i in range(len(texts))]
121
130
 
131
+ def count_tokens(self, text: str) -> int:
132
+ self._ensure_loaded()
133
+ encoding = self._tokenizer.encode(text)
134
+ # Padding is enabled at a fixed length; count only real (attended) tokens.
135
+ return int(sum(encoding.attention_mask))
136
+
137
+ def get_max_length(self) -> int:
138
+ return self._max_length
139
+
122
140
  def warmup(self) -> None:
123
141
  self.load_model()
124
142
 
@@ -0,0 +1,291 @@
1
+ """Tier 2 Classifier: ML-based prompt injection detection (ONNX only)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+ import time
7
+ from typing import Any
8
+
9
+ from ..types import RiskLevel, Tier2Result
10
+ from .onnx_classifier import OnnxClassifier
11
+
12
+ DEFAULT_TIER2_CLASSIFIER_CONFIG = {
13
+ "high_risk_threshold": 0.8,
14
+ "medium_risk_threshold": 0.5,
15
+ "min_text_length": 10,
16
+ "max_text_length": 10000,
17
+ }
18
+
19
+
20
+ class Tier2Classifier:
21
+ """Tier 2 Classifier using ONNX inference."""
22
+
23
+ def __init__(self, config: dict | None = None):
24
+ cfg = dict(DEFAULT_TIER2_CLASSIFIER_CONFIG)
25
+ if config:
26
+ cfg.update(config)
27
+ self._high_risk_threshold: float = cfg["high_risk_threshold"]
28
+ self._medium_risk_threshold: float = cfg["medium_risk_threshold"]
29
+ self._min_text_length: int = cfg["min_text_length"]
30
+ self._max_text_length: int = cfg["max_text_length"]
31
+ self._onnx = OnnxClassifier(cfg.get("onnx_model_path"))
32
+
33
+ def is_ready(self) -> bool:
34
+ return self._onnx.is_loaded()
35
+
36
+ def warmup(self) -> None:
37
+ self._onnx.warmup()
38
+
39
+ def classify(self, text: str) -> Tier2Result:
40
+ start = time.perf_counter()
41
+ if len(text) < self._min_text_length:
42
+ return Tier2Result(
43
+ score=0,
44
+ confidence=0,
45
+ skipped=True,
46
+ skip_reason=f"Text too short ({len(text)} < {self._min_text_length})",
47
+ latency_ms=_ms(start),
48
+ )
49
+
50
+ analysis_text = text[: self._max_text_length] if len(text) > self._max_text_length else text
51
+
52
+ try:
53
+ score = self._onnx.classify(analysis_text)
54
+ confidence = abs(score - 0.5) * 2
55
+ return Tier2Result(score=score, confidence=confidence, skipped=False, latency_ms=_ms(start))
56
+ except Exception as e:
57
+ return Tier2Result(
58
+ score=0,
59
+ confidence=0,
60
+ skipped=True,
61
+ skip_reason=f"Classification error: {e}",
62
+ latency_ms=_ms(start),
63
+ )
64
+
65
+ def classify_batch(self, texts: list[str]) -> list[Tier2Result]:
66
+ return [self.classify(t) for t in texts]
67
+
68
+ def classify_by_sentence(self, text: str) -> dict[str, Any]:
69
+ """Classify text by sentence and return max score."""
70
+ start = time.perf_counter()
71
+ sentences = _split_into_sentences(text)
72
+ if not sentences:
73
+ return {"score": 0, "confidence": 0, "skipped": True, "skip_reason": "No sentences found", "latency_ms": _ms(start)}
74
+
75
+ original_sentences: list[str] = []
76
+ classifiable: list[str] = []
77
+ for sentence in sentences:
78
+ if len(sentence) < self._min_text_length:
79
+ continue
80
+ original_sentences.append(sentence)
81
+ classifiable.append(
82
+ sentence[: self._max_text_length] if len(sentence) > self._max_text_length else sentence
83
+ )
84
+
85
+ if not classifiable:
86
+ return {"score": 0, "confidence": 0, "skipped": True, "skip_reason": "No classifiable sentences", "latency_ms": _ms(start)}
87
+
88
+ try:
89
+ scores = self._onnx.classify_batch(classifiable)
90
+ except Exception as e:
91
+ return {
92
+ "score": 0,
93
+ "confidence": 0,
94
+ "skipped": True,
95
+ "skip_reason": f"Classification error: {e}",
96
+ "latency_ms": _ms(start),
97
+ }
98
+
99
+ sentence_scores: list[dict[str, Any]] = []
100
+ max_score = 0.0
101
+ max_sentence = ""
102
+ for sentence, score in zip(original_sentences, scores, strict=True):
103
+ safe_score = score if isinstance(score, (int, float)) and score == score else 0.0
104
+ sentence_scores.append({"sentence": sentence, "score": safe_score})
105
+ if safe_score > max_score:
106
+ max_score = safe_score
107
+ max_sentence = sentence
108
+
109
+ confidence = abs(max_score - 0.5) * 2
110
+ return {
111
+ "score": max_score,
112
+ "confidence": confidence,
113
+ "skipped": False,
114
+ "latency_ms": _ms(start),
115
+ "max_sentence": max_sentence,
116
+ "sentence_scores": sentence_scores,
117
+ }
118
+
119
+ def classify_by_chunks(self, text: str) -> dict[str, Any]:
120
+ start = time.perf_counter()
121
+ if len(text) < self._min_text_length:
122
+ return {"score": 0, "confidence": 0, "skipped": True, "skip_reason": "Text below minTextLength", "latency_ms": _ms(start)}
123
+
124
+ model_max_len = self._onnx.get_max_length()
125
+ bounded = text[: self._max_text_length] if len(text) > self._max_text_length else text
126
+
127
+ try:
128
+ self._onnx.warmup()
129
+ except Exception as e:
130
+ return {"score": 0, "confidence": 0, "skipped": True, "skip_reason": f"Warmup error: {e}", "latency_ms": _ms(start)}
131
+
132
+ try:
133
+ total_tokens = self._onnx.count_tokens(bounded)
134
+ except Exception as e:
135
+ return {"score": 0, "confidence": 0, "skipped": True, "skip_reason": f"Token count error: {e}", "latency_ms": _ms(start)}
136
+
137
+ if total_tokens <= model_max_len:
138
+ try:
139
+ score = self._onnx.classify(bounded)
140
+ except Exception as e:
141
+ return {"score": 0, "confidence": 0, "skipped": True, "skip_reason": f"Classification error: {e}", "latency_ms": _ms(start)}
142
+ safe_score = score if isinstance(score, (int, float)) and score == score else 0.0
143
+ return {
144
+ "score": safe_score,
145
+ "confidence": abs(safe_score - 0.5) * 2,
146
+ "skipped": False,
147
+ "max_sentence": bounded,
148
+ "sentence_scores": [{"sentence": bounded, "score": safe_score}],
149
+ "latency_ms": _ms(start),
150
+ }
151
+
152
+ max_content_tokens = model_max_len - 2
153
+ sentences = [s for s in _split_into_sentences(bounded) if len(s) >= self._min_text_length]
154
+ if not sentences:
155
+ return {"score": 0, "confidence": 0, "skipped": True, "skip_reason": "No classifiable sentences", "latency_ms": _ms(start)}
156
+
157
+ try:
158
+ chunks = self._pack_sentences(sentences, max_content_tokens)
159
+ scores = self._onnx.classify_batch(chunks)
160
+ except Exception as e:
161
+ return {"score": 0, "confidence": 0, "skipped": True, "skip_reason": f"Classification error: {e}", "latency_ms": _ms(start)}
162
+
163
+ max_score = 0.0
164
+ max_chunk = ""
165
+ chunk_scores: list[dict[str, Any]] = []
166
+ for i, raw in enumerate(scores):
167
+ safe_score = raw if isinstance(raw, (int, float)) and raw == raw else 0.0
168
+ chunk = chunks[i] if i < len(chunks) else ""
169
+ chunk_scores.append({"sentence": chunk, "score": safe_score})
170
+ if safe_score > max_score:
171
+ max_score = safe_score
172
+ max_chunk = chunk
173
+
174
+ return {
175
+ "score": max_score,
176
+ "confidence": abs(max_score - 0.5) * 2,
177
+ "skipped": False,
178
+ "max_sentence": max_chunk,
179
+ "sentence_scores": chunk_scores,
180
+ "latency_ms": _ms(start),
181
+ }
182
+
183
+ def prepare_chunks(self, text: str) -> dict[str, Any]:
184
+ if len(text) < self._min_text_length:
185
+ return {"chunks": [], "skipped": True, "skip_reason": "Text below minTextLength"}
186
+
187
+ model_max_len = self._onnx.get_max_length()
188
+ bounded = text[: self._max_text_length] if len(text) > self._max_text_length else text
189
+ try:
190
+ self._onnx.warmup()
191
+ except Exception as e:
192
+ return {"chunks": [], "skipped": True, "skip_reason": f"Warmup error: {e}"}
193
+
194
+ if len(bounded) + 2 <= model_max_len:
195
+ return {"chunks": [bounded], "skipped": False}
196
+
197
+ try:
198
+ total_tokens = self._onnx.count_tokens(bounded)
199
+ except Exception as e:
200
+ return {"chunks": [], "skipped": True, "skip_reason": f"Token count error: {e}"}
201
+ if total_tokens <= model_max_len:
202
+ return {"chunks": [bounded], "skipped": False}
203
+
204
+ max_content_tokens = model_max_len - 2
205
+ sentences = [s for s in _split_into_sentences(bounded) if len(s) >= self._min_text_length]
206
+ if not sentences:
207
+ return {"chunks": [], "skipped": True, "skip_reason": "No classifiable sentences"}
208
+ return {"chunks": self._pack_sentences(sentences, max_content_tokens), "skipped": False}
209
+
210
+ def classify_chunks_batch(self, chunks: list[str]) -> list[float]:
211
+ if not chunks:
212
+ return []
213
+ self._onnx.warmup()
214
+ return self._onnx.classify_batch(chunks)
215
+
216
+ def _pack_sentences(self, sentences: list[str], max_content_tokens: int) -> list[str]:
217
+ chunks: list[str] = []
218
+ current: list[str] = []
219
+ current_tokens = 0
220
+
221
+ for sentence in sentences:
222
+ sentence_tokens = self._onnx.count_tokens(sentence)
223
+ sentence_content_tokens = max(0, sentence_tokens - 2)
224
+
225
+ if sentence_content_tokens > max_content_tokens:
226
+ if current:
227
+ chunks.append(" ".join(current))
228
+ current = []
229
+ current_tokens = 0
230
+ chunks.append(sentence)
231
+ continue
232
+
233
+ if current_tokens + sentence_content_tokens > max_content_tokens:
234
+ chunks.append(" ".join(current))
235
+ current = [sentence]
236
+ current_tokens = sentence_content_tokens
237
+ else:
238
+ current.append(sentence)
239
+ current_tokens += sentence_content_tokens
240
+
241
+ if current:
242
+ chunks.append(" ".join(current))
243
+
244
+ return chunks
245
+
246
+ def is_injection(self, text: str, threshold: float | None = None) -> bool:
247
+ result = self.classify(text)
248
+ if result.skipped:
249
+ return False
250
+ return result.score >= (threshold if threshold is not None else self._medium_risk_threshold)
251
+
252
+ def get_config(self) -> dict:
253
+ return {
254
+ "high_risk_threshold": self._high_risk_threshold,
255
+ "medium_risk_threshold": self._medium_risk_threshold,
256
+ "min_text_length": self._min_text_length,
257
+ "max_text_length": self._max_text_length,
258
+ }
259
+
260
+ def get_risk_level(self, score: float) -> RiskLevel:
261
+ if score >= self._high_risk_threshold:
262
+ return "high"
263
+ if score >= self._medium_risk_threshold:
264
+ return "medium"
265
+ return "low"
266
+
267
+
268
+ def create_tier2_classifier(config: dict | None = None) -> Tier2Classifier:
269
+ return Tier2Classifier(config)
270
+
271
+
272
+ def _ms(start: float) -> float:
273
+ return (time.perf_counter() - start) * 1000
274
+
275
+
276
+ def _split_into_sentences(text: str) -> list[str]:
277
+ """Split text into sentences for granular analysis."""
278
+ sentences: list[str] = []
279
+ chunks = re.split(r"(?<=[.!?])\s+|\n\n+|\n(?=[A-Z0-9#\-*])|(?<=:)\s*\n", text)
280
+ for chunk in chunks:
281
+ trimmed = chunk.strip()
282
+ if not trimmed:
283
+ continue
284
+ if len(trimmed) > 200 and "\n" in trimmed:
285
+ for sub in trimmed.split("\n"):
286
+ sub = sub.strip()
287
+ if sub:
288
+ sentences.append(sub)
289
+ else:
290
+ sentences.append(trimmed)
291
+ return sentences