stackone-defender 0.1.2__tar.gz → 0.6.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. stackone_defender-0.6.3/.release-please-manifest.json +1 -0
  2. stackone_defender-0.6.3/CHANGELOG.md +139 -0
  3. {stackone_defender-0.1.2 → stackone_defender-0.6.3}/PKG-INFO +31 -7
  4. {stackone_defender-0.1.2 → stackone_defender-0.6.3}/README.md +28 -6
  5. {stackone_defender-0.1.2 → stackone_defender-0.6.3}/pyproject.toml +5 -1
  6. {stackone_defender-0.1.2 → stackone_defender-0.6.3}/src/stackone_defender/__init__.py +17 -0
  7. {stackone_defender-0.1.2 → stackone_defender-0.6.3}/src/stackone_defender/classifiers/onnx_classifier.py +19 -1
  8. stackone_defender-0.6.3/src/stackone_defender/classifiers/tier2_classifier.py +291 -0
  9. {stackone_defender-0.1.2 → stackone_defender-0.6.3}/src/stackone_defender/config.py +46 -4
  10. stackone_defender-0.6.3/src/stackone_defender/core/prompt_defense.py +315 -0
  11. {stackone_defender-0.1.2 → stackone_defender-0.6.3}/src/stackone_defender/core/tool_result_sanitizer.py +67 -22
  12. stackone_defender-0.6.3/src/stackone_defender/models/minilm-full-aug/config.json +30 -0
  13. {stackone_defender-0.1.2 → stackone_defender-0.6.3}/src/stackone_defender/models/minilm-full-aug/model_quantized.onnx +0 -0
  14. stackone_defender-0.6.3/src/stackone_defender/models/minilm-full-aug/tokenizer.json +30686 -0
  15. stackone_defender-0.6.3/src/stackone_defender/models/minilm-full-aug/tokenizer_config.json +23 -0
  16. {stackone_defender-0.1.2 → stackone_defender-0.6.3}/src/stackone_defender/sanitizers/sanitizer.py +13 -7
  17. stackone_defender-0.6.3/src/stackone_defender/sfe/__init__.py +19 -0
  18. stackone_defender-0.6.3/src/stackone_defender/sfe/model.ftz +0 -0
  19. stackone_defender-0.6.3/src/stackone_defender/sfe/preprocess.py +232 -0
  20. {stackone_defender-0.1.2 → stackone_defender-0.6.3}/src/stackone_defender/types.py +31 -5
  21. {stackone_defender-0.1.2 → stackone_defender-0.6.3}/tests/test_integration.py +50 -32
  22. {stackone_defender-0.1.2 → stackone_defender-0.6.3}/tests/test_onnx_classifier.py +24 -0
  23. {stackone_defender-0.1.2 → stackone_defender-0.6.3}/tests/test_sanitizers.py +24 -4
  24. stackone_defender-0.6.3/tests/test_sfe.py +44 -0
  25. {stackone_defender-0.1.2 → stackone_defender-0.6.3}/tests/test_tier2_classifier.py +19 -0
  26. {stackone_defender-0.1.2 → stackone_defender-0.6.3}/uv.lock +30 -2
  27. stackone_defender-0.1.2/.release-please-manifest.json +0 -1
  28. stackone_defender-0.1.2/CHANGELOG.md +0 -43
  29. stackone_defender-0.1.2/src/stackone_defender/classifiers/tier2_classifier.py +0 -173
  30. stackone_defender-0.1.2/src/stackone_defender/core/prompt_defense.py +0 -202
  31. stackone_defender-0.1.2/src/stackone_defender/models/minilm-full-aug/config.json +0 -28
  32. stackone_defender-0.1.2/src/stackone_defender/models/minilm-full-aug/tokenizer.json +0 -30678
  33. stackone_defender-0.1.2/src/stackone_defender/models/minilm-full-aug/tokenizer_config.json +0 -16
  34. {stackone_defender-0.1.2 → stackone_defender-0.6.3}/.github/workflows/ci.yaml +0 -0
  35. {stackone_defender-0.1.2 → stackone_defender-0.6.3}/.github/workflows/release.yaml +0 -0
  36. {stackone_defender-0.1.2 → stackone_defender-0.6.3}/.gitignore +0 -0
  37. {stackone_defender-0.1.2 → stackone_defender-0.6.3}/.python-version +0 -0
  38. {stackone_defender-0.1.2 → stackone_defender-0.6.3}/.release-please-config.json +0 -0
  39. {stackone_defender-0.1.2 → stackone_defender-0.6.3}/models/minilm-full-aug/config.json +0 -0
  40. {stackone_defender-0.1.2 → stackone_defender-0.6.3}/models/minilm-full-aug/model_quantized.onnx +0 -0
  41. {stackone_defender-0.1.2 → stackone_defender-0.6.3}/models/minilm-full-aug/tokenizer.json +0 -0
  42. {stackone_defender-0.1.2 → stackone_defender-0.6.3}/models/minilm-full-aug/tokenizer_config.json +0 -0
  43. {stackone_defender-0.1.2 → stackone_defender-0.6.3}/src/stackone_defender/classifiers/__init__.py +0 -0
  44. {stackone_defender-0.1.2 → stackone_defender-0.6.3}/src/stackone_defender/classifiers/pattern_detector.py +0 -0
  45. {stackone_defender-0.1.2 → stackone_defender-0.6.3}/src/stackone_defender/classifiers/patterns.py +0 -0
  46. {stackone_defender-0.1.2 → stackone_defender-0.6.3}/src/stackone_defender/core/__init__.py +0 -0
  47. {stackone_defender-0.1.2 → stackone_defender-0.6.3}/src/stackone_defender/sanitizers/__init__.py +0 -0
  48. {stackone_defender-0.1.2 → stackone_defender-0.6.3}/src/stackone_defender/sanitizers/encoding_detector.py +0 -0
  49. {stackone_defender-0.1.2 → stackone_defender-0.6.3}/src/stackone_defender/sanitizers/normalizer.py +0 -0
  50. {stackone_defender-0.1.2 → stackone_defender-0.6.3}/src/stackone_defender/sanitizers/pattern_remover.py +0 -0
  51. {stackone_defender-0.1.2 → stackone_defender-0.6.3}/src/stackone_defender/sanitizers/role_stripper.py +0 -0
  52. {stackone_defender-0.1.2 → stackone_defender-0.6.3}/src/stackone_defender/utils/__init__.py +0 -0
  53. {stackone_defender-0.1.2 → stackone_defender-0.6.3}/src/stackone_defender/utils/boundary.py +0 -0
  54. {stackone_defender-0.1.2 → stackone_defender-0.6.3}/src/stackone_defender/utils/field_detection.py +0 -0
  55. {stackone_defender-0.1.2 → stackone_defender-0.6.3}/src/stackone_defender/utils/structure.py +0 -0
  56. {stackone_defender-0.1.2 → stackone_defender-0.6.3}/tests/__init__.py +0 -0
  57. {stackone_defender-0.1.2 → stackone_defender-0.6.3}/tests/test_pattern_detector.py +0 -0
  58. {stackone_defender-0.1.2 → stackone_defender-0.6.3}/tests/test_utils.py +0 -0
@@ -0,0 +1 @@
1
+ {".":"0.6.3"}
@@ -0,0 +1,139 @@
1
+ # Changelog
2
+
3
+ ## [0.6.3](https://github.com/StackOneHQ/stackone-defender/compare/stackone-defender-v0.6.2...stackone-defender-v0.6.3) (2026-05-26)
4
+
5
+
6
+ ### ⚠ BREAKING CHANGES
7
+
8
+ * When `tier2_fields` is unset, Tier 2 scans all strings (no fallback to Tier 1 risky_field_names).
9
+
10
+ ### Features
11
+
12
+ * align Python package with @stackone/defender 0.6.3 ([a91a904](https://github.com/StackOneHQ/stackone-defender/commit/a91a904de2a08a29479afb2cff31e8488468ebaf))
13
+
14
+
15
+ ### Bug Fixes
16
+
17
+ * **ENG-269:** Python parity with @stackone/defender 0.6.3 ([7c312f1](https://github.com/StackOneHQ/stackone-defender/commit/7c312f1d1c858b2f25b49043d783ce7294638b82))
18
+
19
+
20
+ ### Miscellaneous Chores
21
+
22
+ * prepare release 0.6.3 ([8ef9888](https://github.com/StackOneHQ/stackone-defender/commit/8ef9888752713ed5df76c4eed3e117605a8fb9e6))
23
+ * retrigger release workflow after gh actions outage ([72f586b](https://github.com/StackOneHQ/stackone-defender/commit/72f586bcb974b1aab08e7525253d9d8a9c8bc59d))
24
+
25
+ ## [0.6.2](https://github.com/StackOneHQ/stackone-defender/compare/stackone-defender-v0.6.1...stackone-defender-v0.6.2) (2026-04-22)
26
+
27
+
28
+ ### ⚠ BREAKING CHANGES
29
+
30
+ * Drop ToolSanitizationRule, config/sanitizer tool_rules, use_default_tool_rules, and get_tool_rule/should_skip_field. Matches @stackone/defender post ENG-12594.
31
+
32
+ ### Features
33
+
34
+ * add missing functions for full TS API parity ([aec0c5b](https://github.com/StackOneHQ/stackone-defender/commit/aec0c5b8d31715df7e4ec2e4d306b55d595bb1c3))
35
+ * add PyPI publishing setup with Release Please CI ([2e28373](https://github.com/StackOneHQ/stackone-defender/commit/2e28373a27315dbb5e7deb23621977fe7fa2f7bc))
36
+ * add tier2_fields filter and export ToolSanitizationRule ([cb7fd93](https://github.com/StackOneHQ/stackone-defender/commit/cb7fd93fb88a30f40edc171ef3fcdc5d6ce2534d))
37
+ * align Python defender with Node (Tier 2 scoping, ONNX cache) ([482bfdd](https://github.com/StackOneHQ/stackone-defender/commit/482bfdda59b4617a75bc261621984cc321d28989))
38
+ * **ENG-12402:** add PyPI publishing setup with Release Please CI ([f979748](https://github.com/StackOneHQ/stackone-defender/commit/f979748a8a3b2084ea241c352866adcfcd0145ea))
39
+ * **ENG-12699:** TypeScript parity and synced ONNX bundle ([0449800](https://github.com/StackOneHQ/stackone-defender/commit/0449800fc2375c89ef231f5671f9a74bd84d3388))
40
+ * port stackone-defender from TypeScript to Python ([e3ff70d](https://github.com/StackOneHQ/stackone-defender/commit/e3ff70dd6a0bc94578dc4dbfde87c5d75f00b7b8))
41
+ * remove tool rules; batch Tier2 ONNX; lock ONNX load ([26c95c2](https://github.com/StackOneHQ/stackone-defender/commit/26c95c257175c892ae4be82ab7c17a099c1b6c6e))
42
+ * **sanitizer:** remove dead use_tier2_classification from ToolResultSanitizer ([4646179](https://github.com/StackOneHQ/stackone-defender/commit/46461798fcf5acc6ac6e23bc65177c35d9353d9c))
43
+ * sync Python package with TypeScript parity ([e1836dd](https://github.com/StackOneHQ/stackone-defender/commit/e1836dd967ad23997983ef1607118d1a25807e1c))
44
+ * upgrade ML classifier to jbv2 model (AgentShield 73.7 → 79.8) ([bcd27f8](https://github.com/StackOneHQ/stackone-defender/commit/bcd27f8abf954700276249f9b03de34f733c67c4))
45
+ * upgrade ML classifier to jbv5 (AgentShield 79.8 → 81.1) ([781dd10](https://github.com/StackOneHQ/stackone-defender/commit/781dd1007e7a0db03d58619a23b69f1b5d73e85d))
46
+
47
+
48
+ ### Bug Fixes
49
+
50
+ * address Copilot/cubic review (Tier2 scope, tokens, SFE, thresholds) ([bf173ac](https://github.com/StackOneHQ/stackone-defender/commit/bf173ac42f6aaa7513ea2a1fc19083806a5c5ee1))
51
+ * **ci:** avoid fasttext-wheel on Python 3.13 ([a6cda76](https://github.com/StackOneHQ/stackone-defender/commit/a6cda76894e3cd240c4f104e701e3202babb2682))
52
+ * **classifier:** surface classification errors in classify_by_sentence skip_reason ([bd94639](https://github.com/StackOneHQ/stackone-defender/commit/bd9463978dac5572f999d8ec3ed1adbaf0bb97f2))
53
+ * default enable_tier2 to True to match TypeScript SDK behaviour ([d66773b](https://github.com/StackOneHQ/stackone-defender/commit/d66773bee026517d09dd56b9311dd3c281c6f675))
54
+ * **defender:** fix _extract_strings filtering, None checks, and cache ONNX load failure ([bf4ce99](https://github.com/StackOneHQ/stackone-defender/commit/bf4ce993287db9e067b661100b5bd92cc21aef6b))
55
+ * **defender:** sync hasThreats blocking logic and tool rules precedence from JS package ([a217c3e](https://github.com/StackOneHQ/stackone-defender/commit/a217c3ef27aa0e4d92f21571bf0559ff9906f660))
56
+ * enable tier2 by default to match TypeScript package ([f1fe990](https://github.com/StackOneHQ/stackone-defender/commit/f1fe990e1a81c32cb271f6ca85cc063f3da49223))
57
+ * sync Python with TypeScript parity ([cec0813](https://github.com/StackOneHQ/stackone-defender/commit/cec0813ff8cc98f4502d5916d285a28877983d98))
58
+ * **tier2:** apply max_text_length truncation in classify_by_sentence ([a67d2c6](https://github.com/StackOneHQ/stackone-defender/commit/a67d2c6524fb1d6b4f9331f547f28221867038de))
59
+ * upgrade ML classifier to jbv2 (AgentShield 73.7 → 79.8) ([b452b39](https://github.com/StackOneHQ/stackone-defender/commit/b452b39c718329355f50c418bd50c37da2ed8698))
60
+ * upgrade ML classifier to jbv2 (AgentShield 73.7 → 79.8) ([ccb1204](https://github.com/StackOneHQ/stackone-defender/commit/ccb1204d5e3d9763bb916d71bb49b75039ceb197))
61
+ * use uv instead of pip in README installation instructions ([519759f](https://github.com/StackOneHQ/stackone-defender/commit/519759f09c6fc1eb6bf97f53ad0cbd25c78e2893))
62
+
63
+
64
+ ### Dependencies
65
+
66
+ * **sfe:** switch optional FastText bindings to fasttext-ng ([bc9cc28](https://github.com/StackOneHQ/stackone-defender/commit/bc9cc283bc2da9f10472415d4aa94a0df083ec3d))
67
+
68
+
69
+ ### Documentation
70
+
71
+ * add README adapted from TypeScript package ([a03c757](https://github.com/StackOneHQ/stackone-defender/commit/a03c757a1760b797d9a3ef444950e2839ca1c52d))
72
+ * update README — enable_tier2 defaults to True ([af0d059](https://github.com/StackOneHQ/stackone-defender/commit/af0d05957e39a83b7e6e18b1f78b95219b14a4f5))
73
+ * update README to reflect changes in package name and Python version ([d2fc2ca](https://github.com/StackOneHQ/stackone-defender/commit/d2fc2ca1900e2f6410df2ec075c5a8a1c3ac241b))
74
+
75
+
76
+ ### Miscellaneous Chores
77
+
78
+ * prepare patch release 0.6.2 ([7b3c105](https://github.com/StackOneHQ/stackone-defender/commit/7b3c105b2ce23f88f284d72e41c1917aefdc4537))
79
+
80
+ ## [0.6.1](https://github.com/StackOneHQ/stackone-defender/compare/stackone-defender-v0.1.2...stackone-defender-v0.6.1) (2026-04-21)
81
+
82
+ ### Features
83
+
84
+ * align Python package behavior with `@stackone/defender` 0.6.1
85
+ * add SFE preprocessing support (`use_sfe`) with fail-open optional runtime loading
86
+ * add packed-chunk Tier 2 batching and density-adjusted scoring
87
+ * add dangerous-key traversal hardening (`__proto__`, `constructor`, `prototype`)
88
+ * add cumulative-risk fractional thresholds to reduce list-response false positives
89
+
90
+ ### Bug Fixes
91
+
92
+ * use `fasttext-ng` instead of `fasttext-wheel` for the `[sfe]` extra and dev tests so Python 3.13 CI can install maintained FastText bindings (NumPy 2.3+).
93
+
94
+ ### Breaking Changes
95
+
96
+ * Python package version jumps from `0.1.2` to `0.6.1` to align release train with TypeScript parity.
97
+ * `DefenseResult` now includes `fields_dropped` and `truncated_at_depth`.
98
+
99
+ ## [0.1.2](https://github.com/StackOneHQ/stackone-defender/compare/stackone-defender-v0.1.1...stackone-defender-v0.1.2) (2026-04-08)
100
+
101
+
102
+ ### Bug Fixes
103
+
104
+ * upgrade ML classifier to jbv2 (AgentShield 73.7 → 79.8) ([b452b39](https://github.com/StackOneHQ/stackone-defender/commit/b452b39c718329355f50c418bd50c37da2ed8698))
105
+
106
+
107
+ ### Documentation
108
+
109
+ * update README to reflect changes in package name and Python version ([d2fc2ca](https://github.com/StackOneHQ/stackone-defender/commit/d2fc2ca1900e2f6410df2ec075c5a8a1c3ac241b))
110
+
111
+ ## [0.1.1](https://github.com/StackOneHQ/stackone-defender/compare/stackone-defender-v0.1.0...stackone-defender-v0.1.1) (2026-04-08)
112
+
113
+
114
+ ### Features
115
+
116
+ * add missing functions for full TS API parity ([aec0c5b](https://github.com/StackOneHQ/stackone-defender/commit/aec0c5b8d31715df7e4ec2e4d306b55d595bb1c3))
117
+ * add PyPI publishing setup with Release Please CI ([2e28373](https://github.com/StackOneHQ/stackone-defender/commit/2e28373a27315dbb5e7deb23621977fe7fa2f7bc))
118
+ * add tier2_fields filter and export ToolSanitizationRule ([cb7fd93](https://github.com/StackOneHQ/stackone-defender/commit/cb7fd93fb88a30f40edc171ef3fcdc5d6ce2534d))
119
+ * **ENG-12402:** add PyPI publishing setup with Release Please CI ([f979748](https://github.com/StackOneHQ/stackone-defender/commit/f979748a8a3b2084ea241c352866adcfcd0145ea))
120
+ * port stackone-defender from TypeScript to Python ([e3ff70d](https://github.com/StackOneHQ/stackone-defender/commit/e3ff70dd6a0bc94578dc4dbfde87c5d75f00b7b8))
121
+ * **sanitizer:** remove dead use_tier2_classification from ToolResultSanitizer ([4646179](https://github.com/StackOneHQ/stackone-defender/commit/46461798fcf5acc6ac6e23bc65177c35d9353d9c))
122
+ * sync Python package with TypeScript parity ([e1836dd](https://github.com/StackOneHQ/stackone-defender/commit/e1836dd967ad23997983ef1607118d1a25807e1c))
123
+
124
+
125
+ ### Bug Fixes
126
+
127
+ * **classifier:** surface classification errors in classify_by_sentence skip_reason ([bd94639](https://github.com/StackOneHQ/stackone-defender/commit/bd9463978dac5572f999d8ec3ed1adbaf0bb97f2))
128
+ * **defender:** fix _extract_strings filtering, None checks, and cache ONNX load failure ([bf4ce99](https://github.com/StackOneHQ/stackone-defender/commit/bf4ce993287db9e067b661100b5bd92cc21aef6b))
129
+ * **defender:** sync hasThreats blocking logic and tool rules precedence from JS package ([a217c3e](https://github.com/StackOneHQ/stackone-defender/commit/a217c3ef27aa0e4d92f21571bf0559ff9906f660))
130
+ * enable tier2 by default to match TypeScript package ([f1fe990](https://github.com/StackOneHQ/stackone-defender/commit/f1fe990e1a81c32cb271f6ca85cc063f3da49223))
131
+ * sync Python with TypeScript parity ([cec0813](https://github.com/StackOneHQ/stackone-defender/commit/cec0813ff8cc98f4502d5916d285a28877983d98))
132
+ * use uv instead of pip in README installation instructions ([519759f](https://github.com/StackOneHQ/stackone-defender/commit/519759f09c6fc1eb6bf97f53ad0cbd25c78e2893))
133
+
134
+
135
+ ### Documentation
136
+
137
+ * add README adapted from TypeScript package ([a03c757](https://github.com/StackOneHQ/stackone-defender/commit/a03c757a1760b797d9a3ef444950e2839ca1c52d))
138
+
139
+ ## Changelog
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: stackone-defender
3
- Version: 0.1.2
3
+ Version: 0.6.3
4
4
  Summary: Indirect prompt injection defense for AI agents using tool calls
5
5
  Project-URL: Homepage, https://github.com/StackOneHQ/stackone-defender
6
6
  Project-URL: Repository, https://github.com/StackOneHQ/stackone-defender
@@ -20,6 +20,8 @@ Provides-Extra: onnx
20
20
  Requires-Dist: numpy>=1.24.0; extra == 'onnx'
21
21
  Requires-Dist: onnxruntime>=1.16.0; extra == 'onnx'
22
22
  Requires-Dist: tokenizers>=0.15.0; extra == 'onnx'
23
+ Provides-Extra: sfe
24
+ Requires-Dist: fasttext-ng>=0.9.3; extra == 'sfe'
23
25
  Description-Content-Type: text/markdown
24
26
 
25
27
  <div align="center">
@@ -74,6 +76,15 @@ pip install stackone-defender[onnx]
74
76
 
75
77
  The ONNX model (~22MB) is bundled in the wheel — no extra downloads at runtime.
76
78
 
79
+ **SFE preprocessor (optional)** — add extras:
80
+
81
+ ```bash
82
+ pip install stackone-defender[sfe]
83
+ # or: uv add "stackone-defender[sfe]"
84
+ ```
85
+
86
+ The `[sfe]` extra installs [`fasttext-ng`](https://pypi.org/project/fasttext-ng/) (provides the `fasttext` module). It requires **NumPy 2.3+**. PyPI may ship a wheel only for some platforms; otherwise pip/uv builds from source (needs a C++ toolchain).
87
+
77
88
  ## Quick start
78
89
 
79
90
  ```python
@@ -109,15 +120,22 @@ else:
109
120
  - **Role stripping** — `SYSTEM:`, `ASSISTANT:`, `<system>`, `[INST]`, etc.
110
121
  - **Pattern removal** — phrases like “ignore previous instructions”
111
122
  - **Encoding detection** — suspicious Base64/URL-shaped payloads
112
- - **Boundary annotation** — `[UD-{id}]…[/UD-{id}]` wrappers around untrusted spans
123
+ - **Boundary annotation (opt-in)** — `[UD-{id}]…[/UD-{id}]` wrappers when `annotate_boundary=True` (npm: `annotateBoundary`). Use `generate_boundary_instructions` from the package root in prompts when you enable wrapping.
113
124
 
114
125
  ### Tier 2 — ML classification (ONNX)
115
126
 
116
- Sentence-level MiniLM classifier (int8 ONNX ~22 MB, bundled):
127
+ Packed-chunk MiniLM classifier (int8 ONNX ~22 MB, bundled):
117
128
 
118
- - Split text into sentences, score each (0.0 = benign, 1.0 = injection-like), take the max
129
+ - Split text into sentences, pack to model-sized chunks, score chunks in batched ONNX calls
119
130
  - Catches paraphrased or novel injections missed by regex
120
- - Roughly ~10 ms per batch after warmup (CPU)
131
+ - Uses chunked batch inference to bound memory on large payloads
132
+
133
+ ### Optional SFE preprocessor
134
+
135
+ - `use_sfe=True` runs a field-level FastText pass to build a **classifier-only** view of the payload
136
+ - **Tier 1** always sanitizes the **original** tool value; **`sanitized`** in `DefenseResult` is unchanged by SFE drops
137
+ - **Tier 2** extracts strings from the SFE-filtered tree; `fields_dropped` lists paths omitted from that extraction (not removed from `sanitized`)
138
+ - Fails open if the runtime/model is unavailable: payload continues unfiltered
121
139
 
122
140
  **Benchmarks** (F1 @ threshold 0.5):
123
141
 
@@ -149,7 +167,9 @@ defense = create_prompt_defense(
149
167
  enable_tier2=True,
150
168
  block_high_risk=False,
151
169
  default_risk_level="medium",
152
- tier2_fields=["subject", "body", "snippet"], # optional: scope Tier 2 to these JSON keys
170
+ annotate_boundary=False, # True: wrap risky strings with [UD-…] tags (npm: annotateBoundary)
171
+ tier2_fields=["subject", "body", "snippet"], # optional: scope Tier 2 to these JSON keys (default: all strings)
172
+ use_sfe=True, # optional: enable semantic field extractor preprocessing
153
173
  config={
154
174
  "tier2": {
155
175
  "high_risk_threshold": 0.8,
@@ -161,9 +181,11 @@ defense = create_prompt_defense(
161
181
 
162
182
  ### `defense.defend_tool_result(value, tool_name)`
163
183
 
164
- Runs Tier 1 sanitization on risky fields, then Tier 2 on extracted text (with optional field scoping). **Synchronous** — no `await`.
184
+ Runs Tier 1 sanitization on risky fields of the **original** payload, then Tier 2 on strings from the SFE-filtered view when SFE is on (otherwise the full value). Optional `tier2_fields` restricts Tier 2 extraction to specific keys; omit it to classify **all** strings (matches `@stackone/defender` 0.6.3). **Synchronous** — no `await`.
165
185
 
166
186
  ```python
187
+ from dataclasses import dataclass, field
188
+
167
189
  @dataclass
168
190
  class DefenseResult:
169
191
  allowed: bool
@@ -175,6 +197,8 @@ class DefenseResult:
175
197
  tier2_score: float | None = None
176
198
  tier2_skip_reason: str | None = None
177
199
  max_sentence: str | None = None
200
+ fields_dropped: list[str] = field(default_factory=list)
201
+ truncated_at_depth: bool | None = None
178
202
  latency_ms: float = 0.0
179
203
  ```
180
204
 
@@ -50,6 +50,15 @@ pip install stackone-defender[onnx]
50
50
 
51
51
  The ONNX model (~22MB) is bundled in the wheel — no extra downloads at runtime.
52
52
 
53
+ **SFE preprocessor (optional)** — add extras:
54
+
55
+ ```bash
56
+ pip install stackone-defender[sfe]
57
+ # or: uv add "stackone-defender[sfe]"
58
+ ```
59
+
60
+ The `[sfe]` extra installs [`fasttext-ng`](https://pypi.org/project/fasttext-ng/) (provides the `fasttext` module). It requires **NumPy 2.3+**. PyPI may ship a wheel only for some platforms; otherwise pip/uv builds from source (needs a C++ toolchain).
61
+
53
62
  ## Quick start
54
63
 
55
64
  ```python
@@ -85,15 +94,22 @@ else:
85
94
  - **Role stripping** — `SYSTEM:`, `ASSISTANT:`, `<system>`, `[INST]`, etc.
86
95
  - **Pattern removal** — phrases like “ignore previous instructions”
87
96
  - **Encoding detection** — suspicious Base64/URL-shaped payloads
88
- - **Boundary annotation** — `[UD-{id}]…[/UD-{id}]` wrappers around untrusted spans
97
+ - **Boundary annotation (opt-in)** — `[UD-{id}]…[/UD-{id}]` wrappers when `annotate_boundary=True` (npm: `annotateBoundary`). Use `generate_boundary_instructions` from the package root in prompts when you enable wrapping.
89
98
 
90
99
  ### Tier 2 — ML classification (ONNX)
91
100
 
92
- Sentence-level MiniLM classifier (int8 ONNX ~22 MB, bundled):
101
+ Packed-chunk MiniLM classifier (int8 ONNX ~22 MB, bundled):
93
102
 
94
- - Split text into sentences, score each (0.0 = benign, 1.0 = injection-like), take the max
103
+ - Split text into sentences, pack to model-sized chunks, score chunks in batched ONNX calls
95
104
  - Catches paraphrased or novel injections missed by regex
96
- - Roughly ~10 ms per batch after warmup (CPU)
105
+ - Uses chunked batch inference to bound memory on large payloads
106
+
107
+ ### Optional SFE preprocessor
108
+
109
+ - `use_sfe=True` runs a field-level FastText pass to build a **classifier-only** view of the payload
110
+ - **Tier 1** always sanitizes the **original** tool value; **`sanitized`** in `DefenseResult` is unchanged by SFE drops
111
+ - **Tier 2** extracts strings from the SFE-filtered tree; `fields_dropped` lists paths omitted from that extraction (not removed from `sanitized`)
112
+ - Fails open if the runtime/model is unavailable: payload continues unfiltered
97
113
 
98
114
  **Benchmarks** (F1 @ threshold 0.5):
99
115
 
@@ -125,7 +141,9 @@ defense = create_prompt_defense(
125
141
  enable_tier2=True,
126
142
  block_high_risk=False,
127
143
  default_risk_level="medium",
128
- tier2_fields=["subject", "body", "snippet"], # optional: scope Tier 2 to these JSON keys
144
+ annotate_boundary=False, # True: wrap risky strings with [UD-…] tags (npm: annotateBoundary)
145
+ tier2_fields=["subject", "body", "snippet"], # optional: scope Tier 2 to these JSON keys (default: all strings)
146
+ use_sfe=True, # optional: enable semantic field extractor preprocessing
129
147
  config={
130
148
  "tier2": {
131
149
  "high_risk_threshold": 0.8,
@@ -137,9 +155,11 @@ defense = create_prompt_defense(
137
155
 
138
156
  ### `defense.defend_tool_result(value, tool_name)`
139
157
 
140
- Runs Tier 1 sanitization on risky fields, then Tier 2 on extracted text (with optional field scoping). **Synchronous** — no `await`.
158
+ Runs Tier 1 sanitization on risky fields of the **original** payload, then Tier 2 on strings from the SFE-filtered view when SFE is on (otherwise the full value). Optional `tier2_fields` restricts Tier 2 extraction to specific keys; omit it to classify **all** strings (matches `@stackone/defender` 0.6.3). **Synchronous** — no `await`.
141
159
 
142
160
  ```python
161
+ from dataclasses import dataclass, field
162
+
143
163
  @dataclass
144
164
  class DefenseResult:
145
165
  allowed: bool
@@ -151,6 +171,8 @@ class DefenseResult:
151
171
  tier2_score: float | None = None
152
172
  tier2_skip_reason: str | None = None
153
173
  max_sentence: str | None = None
174
+ fields_dropped: list[str] = field(default_factory=list)
175
+ truncated_at_depth: bool | None = None
154
176
  latency_ms: float = 0.0
155
177
  ```
156
178
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "stackone-defender"
3
- version = "0.1.2"
3
+ version = "0.6.3"
4
4
  description = "Indirect prompt injection defense for AI agents using tool calls"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.11"
@@ -25,6 +25,9 @@ Repository = "https://github.com/StackOneHQ/stackone-defender"
25
25
 
26
26
  [project.optional-dependencies]
27
27
  onnx = ["onnxruntime>=1.16.0", "tokenizers>=0.15.0", "numpy>=1.24.0"]
28
+ # fasttext-ng provides the `fasttext` module (maintained bindings; supports 3.13).
29
+ # Pulls numpy>=2.3; SFE still fail-opens when import/load fails.
30
+ sfe = ["fasttext-ng>=0.9.3"]
28
31
 
29
32
  [dependency-groups]
30
33
  dev = [
@@ -32,6 +35,7 @@ dev = [
32
35
  "onnxruntime>=1.16.0",
33
36
  "tokenizers>=0.15.0",
34
37
  "numpy>=1.24.0",
38
+ "fasttext-ng>=0.9.3",
35
39
  ]
36
40
 
37
41
  [build-system]
@@ -12,12 +12,29 @@ Usage:
12
12
  """
13
13
 
14
14
  from .core.prompt_defense import PromptDefense, create_prompt_defense
15
+ from .utils.boundary import contains_boundary_patterns, generate_boundary_instructions
16
+ from .sfe.preprocess import (
17
+ DropDecision,
18
+ SfePredictor,
19
+ SfePreprocessResult,
20
+ get_default_predictor,
21
+ get_default_sfe_model_path,
22
+ sfe_preprocess,
23
+ )
15
24
  from .types import DefenseResult, RiskLevel, Tier1Result
16
25
 
17
26
  __all__ = [
18
27
  "DefenseResult",
28
+ "DropDecision",
19
29
  "PromptDefense",
20
30
  "RiskLevel",
31
+ "SfePredictor",
32
+ "SfePreprocessResult",
21
33
  "Tier1Result",
34
+ "contains_boundary_patterns",
22
35
  "create_prompt_defense",
36
+ "generate_boundary_instructions",
37
+ "get_default_predictor",
38
+ "get_default_sfe_model_path",
39
+ "sfe_preprocess",
23
40
  ]
@@ -37,6 +37,8 @@ def _sigmoid(x: float) -> float:
37
37
  class OnnxClassifier:
38
38
  """ONNX Classifier for fine-tuned MiniLM models."""
39
39
 
40
+ _MAX_BATCH_CHUNK = 32
41
+
40
42
  def __init__(self, model_path: str | None = None):
41
43
  self._model_path = model_path or _default_model_path()
42
44
  self._session = None
@@ -105,10 +107,17 @@ class OnnxClassifier:
105
107
  return _sigmoid(logit)
106
108
 
107
109
  def classify_batch(self, texts: list[str]) -> list[float]:
108
- """Classify multiple texts in batch."""
110
+ """Classify multiple texts in batch, bounded by chunk size."""
109
111
  if not texts:
110
112
  return []
111
113
  self._ensure_loaded()
114
+ all_scores: list[float] = []
115
+ for offset in range(0, len(texts), self._MAX_BATCH_CHUNK):
116
+ chunk = texts[offset: offset + self._MAX_BATCH_CHUNK]
117
+ all_scores.extend(self._classify_batch_chunk(chunk))
118
+ return all_scores
119
+
120
+ def _classify_batch_chunk(self, texts: list[str]) -> list[float]:
112
121
  import numpy as np
113
122
 
114
123
  encodings = self._tokenizer.encode_batch(texts)
@@ -119,6 +128,15 @@ class OnnxClassifier:
119
128
  logits = results[0]
120
129
  return [_sigmoid(float(logits[i][0])) for i in range(len(texts))]
121
130
 
131
+ def count_tokens(self, text: str) -> int:
132
+ self._ensure_loaded()
133
+ encoding = self._tokenizer.encode(text)
134
+ # Padding is enabled at a fixed length; count only real (attended) tokens.
135
+ return int(sum(encoding.attention_mask))
136
+
137
+ def get_max_length(self) -> int:
138
+ return self._max_length
139
+
122
140
  def warmup(self) -> None:
123
141
  self.load_model()
124
142