stackone-defender 0.1.2__tar.gz → 0.6.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- stackone_defender-0.6.3/.release-please-manifest.json +1 -0
- stackone_defender-0.6.3/CHANGELOG.md +139 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.3}/PKG-INFO +31 -7
- {stackone_defender-0.1.2 → stackone_defender-0.6.3}/README.md +28 -6
- {stackone_defender-0.1.2 → stackone_defender-0.6.3}/pyproject.toml +5 -1
- {stackone_defender-0.1.2 → stackone_defender-0.6.3}/src/stackone_defender/__init__.py +17 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.3}/src/stackone_defender/classifiers/onnx_classifier.py +19 -1
- stackone_defender-0.6.3/src/stackone_defender/classifiers/tier2_classifier.py +291 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.3}/src/stackone_defender/config.py +46 -4
- stackone_defender-0.6.3/src/stackone_defender/core/prompt_defense.py +315 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.3}/src/stackone_defender/core/tool_result_sanitizer.py +67 -22
- stackone_defender-0.6.3/src/stackone_defender/models/minilm-full-aug/config.json +30 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.3}/src/stackone_defender/models/minilm-full-aug/model_quantized.onnx +0 -0
- stackone_defender-0.6.3/src/stackone_defender/models/minilm-full-aug/tokenizer.json +30686 -0
- stackone_defender-0.6.3/src/stackone_defender/models/minilm-full-aug/tokenizer_config.json +23 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.3}/src/stackone_defender/sanitizers/sanitizer.py +13 -7
- stackone_defender-0.6.3/src/stackone_defender/sfe/__init__.py +19 -0
- stackone_defender-0.6.3/src/stackone_defender/sfe/model.ftz +0 -0
- stackone_defender-0.6.3/src/stackone_defender/sfe/preprocess.py +232 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.3}/src/stackone_defender/types.py +31 -5
- {stackone_defender-0.1.2 → stackone_defender-0.6.3}/tests/test_integration.py +50 -32
- {stackone_defender-0.1.2 → stackone_defender-0.6.3}/tests/test_onnx_classifier.py +24 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.3}/tests/test_sanitizers.py +24 -4
- stackone_defender-0.6.3/tests/test_sfe.py +44 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.3}/tests/test_tier2_classifier.py +19 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.3}/uv.lock +30 -2
- stackone_defender-0.1.2/.release-please-manifest.json +0 -1
- stackone_defender-0.1.2/CHANGELOG.md +0 -43
- stackone_defender-0.1.2/src/stackone_defender/classifiers/tier2_classifier.py +0 -173
- stackone_defender-0.1.2/src/stackone_defender/core/prompt_defense.py +0 -202
- stackone_defender-0.1.2/src/stackone_defender/models/minilm-full-aug/config.json +0 -28
- stackone_defender-0.1.2/src/stackone_defender/models/minilm-full-aug/tokenizer.json +0 -30678
- stackone_defender-0.1.2/src/stackone_defender/models/minilm-full-aug/tokenizer_config.json +0 -16
- {stackone_defender-0.1.2 → stackone_defender-0.6.3}/.github/workflows/ci.yaml +0 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.3}/.github/workflows/release.yaml +0 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.3}/.gitignore +0 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.3}/.python-version +0 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.3}/.release-please-config.json +0 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.3}/models/minilm-full-aug/config.json +0 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.3}/models/minilm-full-aug/model_quantized.onnx +0 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.3}/models/minilm-full-aug/tokenizer.json +0 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.3}/models/minilm-full-aug/tokenizer_config.json +0 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.3}/src/stackone_defender/classifiers/__init__.py +0 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.3}/src/stackone_defender/classifiers/pattern_detector.py +0 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.3}/src/stackone_defender/classifiers/patterns.py +0 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.3}/src/stackone_defender/core/__init__.py +0 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.3}/src/stackone_defender/sanitizers/__init__.py +0 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.3}/src/stackone_defender/sanitizers/encoding_detector.py +0 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.3}/src/stackone_defender/sanitizers/normalizer.py +0 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.3}/src/stackone_defender/sanitizers/pattern_remover.py +0 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.3}/src/stackone_defender/sanitizers/role_stripper.py +0 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.3}/src/stackone_defender/utils/__init__.py +0 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.3}/src/stackone_defender/utils/boundary.py +0 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.3}/src/stackone_defender/utils/field_detection.py +0 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.3}/src/stackone_defender/utils/structure.py +0 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.3}/tests/__init__.py +0 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.3}/tests/test_pattern_detector.py +0 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.3}/tests/test_utils.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{".":"0.6.3"}
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
## [0.6.3](https://github.com/StackOneHQ/stackone-defender/compare/stackone-defender-v0.6.2...stackone-defender-v0.6.3) (2026-05-26)
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
### ⚠ BREAKING CHANGES
|
|
7
|
+
|
|
8
|
+
* When `tier2_fields` is unset, Tier 2 scans all strings (no fallback to Tier 1 risky_field_names).
|
|
9
|
+
|
|
10
|
+
### Features
|
|
11
|
+
|
|
12
|
+
* align Python package with @stackone/defender 0.6.3 ([a91a904](https://github.com/StackOneHQ/stackone-defender/commit/a91a904de2a08a29479afb2cff31e8488468ebaf))
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
### Bug Fixes
|
|
16
|
+
|
|
17
|
+
* **ENG-269:** Python parity with @stackone/defender 0.6.3 ([7c312f1](https://github.com/StackOneHQ/stackone-defender/commit/7c312f1d1c858b2f25b49043d783ce7294638b82))
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
### Miscellaneous Chores
|
|
21
|
+
|
|
22
|
+
* prepare release 0.6.3 ([8ef9888](https://github.com/StackOneHQ/stackone-defender/commit/8ef9888752713ed5df76c4eed3e117605a8fb9e6))
|
|
23
|
+
* retrigger release workflow after gh actions outage ([72f586b](https://github.com/StackOneHQ/stackone-defender/commit/72f586bcb974b1aab08e7525253d9d8a9c8bc59d))
|
|
24
|
+
|
|
25
|
+
## [0.6.2](https://github.com/StackOneHQ/stackone-defender/compare/stackone-defender-v0.6.1...stackone-defender-v0.6.2) (2026-04-22)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
### ⚠ BREAKING CHANGES
|
|
29
|
+
|
|
30
|
+
* Drop ToolSanitizationRule, config/sanitizer tool_rules, use_default_tool_rules, and get_tool_rule/should_skip_field. Matches @stackone/defender post ENG-12594.
|
|
31
|
+
|
|
32
|
+
### Features
|
|
33
|
+
|
|
34
|
+
* add missing functions for full TS API parity ([aec0c5b](https://github.com/StackOneHQ/stackone-defender/commit/aec0c5b8d31715df7e4ec2e4d306b55d595bb1c3))
|
|
35
|
+
* add PyPI publishing setup with Release Please CI ([2e28373](https://github.com/StackOneHQ/stackone-defender/commit/2e28373a27315dbb5e7deb23621977fe7fa2f7bc))
|
|
36
|
+
* add tier2_fields filter and export ToolSanitizationRule ([cb7fd93](https://github.com/StackOneHQ/stackone-defender/commit/cb7fd93fb88a30f40edc171ef3fcdc5d6ce2534d))
|
|
37
|
+
* align Python defender with Node (Tier 2 scoping, ONNX cache) ([482bfdd](https://github.com/StackOneHQ/stackone-defender/commit/482bfdda59b4617a75bc261621984cc321d28989))
|
|
38
|
+
* **ENG-12402:** add PyPI publishing setup with Release Please CI ([f979748](https://github.com/StackOneHQ/stackone-defender/commit/f979748a8a3b2084ea241c352866adcfcd0145ea))
|
|
39
|
+
* **ENG-12699:** TypeScript parity and synced ONNX bundle ([0449800](https://github.com/StackOneHQ/stackone-defender/commit/0449800fc2375c89ef231f5671f9a74bd84d3388))
|
|
40
|
+
* port stackone-defender from TypeScript to Python ([e3ff70d](https://github.com/StackOneHQ/stackone-defender/commit/e3ff70dd6a0bc94578dc4dbfde87c5d75f00b7b8))
|
|
41
|
+
* remove tool rules; batch Tier2 ONNX; lock ONNX load ([26c95c2](https://github.com/StackOneHQ/stackone-defender/commit/26c95c257175c892ae4be82ab7c17a099c1b6c6e))
|
|
42
|
+
* **sanitizer:** remove dead use_tier2_classification from ToolResultSanitizer ([4646179](https://github.com/StackOneHQ/stackone-defender/commit/46461798fcf5acc6ac6e23bc65177c35d9353d9c))
|
|
43
|
+
* sync Python package with TypeScript parity ([e1836dd](https://github.com/StackOneHQ/stackone-defender/commit/e1836dd967ad23997983ef1607118d1a25807e1c))
|
|
44
|
+
* upgrade ML classifier to jbv2 model (AgentShield 73.7 → 79.8) ([bcd27f8](https://github.com/StackOneHQ/stackone-defender/commit/bcd27f8abf954700276249f9b03de34f733c67c4))
|
|
45
|
+
* upgrade ML classifier to jbv5 (AgentShield 79.8 → 81.1) ([781dd10](https://github.com/StackOneHQ/stackone-defender/commit/781dd1007e7a0db03d58619a23b69f1b5d73e85d))
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
### Bug Fixes
|
|
49
|
+
|
|
50
|
+
* address Copilot/cubic review (Tier2 scope, tokens, SFE, thresholds) ([bf173ac](https://github.com/StackOneHQ/stackone-defender/commit/bf173ac42f6aaa7513ea2a1fc19083806a5c5ee1))
|
|
51
|
+
* **ci:** avoid fasttext-wheel on Python 3.13 ([a6cda76](https://github.com/StackOneHQ/stackone-defender/commit/a6cda76894e3cd240c4f104e701e3202babb2682))
|
|
52
|
+
* **classifier:** surface classification errors in classify_by_sentence skip_reason ([bd94639](https://github.com/StackOneHQ/stackone-defender/commit/bd9463978dac5572f999d8ec3ed1adbaf0bb97f2))
|
|
53
|
+
* default enable_tier2 to True to match TypeScript SDK behaviour ([d66773b](https://github.com/StackOneHQ/stackone-defender/commit/d66773bee026517d09dd56b9311dd3c281c6f675))
|
|
54
|
+
* **defender:** fix _extract_strings filtering, None checks, and cache ONNX load failure ([bf4ce99](https://github.com/StackOneHQ/stackone-defender/commit/bf4ce993287db9e067b661100b5bd92cc21aef6b))
|
|
55
|
+
* **defender:** sync hasThreats blocking logic and tool rules precedence from JS package ([a217c3e](https://github.com/StackOneHQ/stackone-defender/commit/a217c3ef27aa0e4d92f21571bf0559ff9906f660))
|
|
56
|
+
* enable tier2 by default to match TypeScript package ([f1fe990](https://github.com/StackOneHQ/stackone-defender/commit/f1fe990e1a81c32cb271f6ca85cc063f3da49223))
|
|
57
|
+
* sync Python with TypeScript parity ([cec0813](https://github.com/StackOneHQ/stackone-defender/commit/cec0813ff8cc98f4502d5916d285a28877983d98))
|
|
58
|
+
* **tier2:** apply max_text_length truncation in classify_by_sentence ([a67d2c6](https://github.com/StackOneHQ/stackone-defender/commit/a67d2c6524fb1d6b4f9331f547f28221867038de))
|
|
59
|
+
* upgrade ML classifier to jbv2 (AgentShield 73.7 → 79.8) ([b452b39](https://github.com/StackOneHQ/stackone-defender/commit/b452b39c718329355f50c418bd50c37da2ed8698))
|
|
60
|
+
* upgrade ML classifier to jbv2 (AgentShield 73.7 → 79.8) ([ccb1204](https://github.com/StackOneHQ/stackone-defender/commit/ccb1204d5e3d9763bb916d71bb49b75039ceb197))
|
|
61
|
+
* use uv instead of pip in README installation instructions ([519759f](https://github.com/StackOneHQ/stackone-defender/commit/519759f09c6fc1eb6bf97f53ad0cbd25c78e2893))
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
### Dependencies
|
|
65
|
+
|
|
66
|
+
* **sfe:** switch optional FastText bindings to fasttext-ng ([bc9cc28](https://github.com/StackOneHQ/stackone-defender/commit/bc9cc283bc2da9f10472415d4aa94a0df083ec3d))
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
### Documentation
|
|
70
|
+
|
|
71
|
+
* add README adapted from TypeScript package ([a03c757](https://github.com/StackOneHQ/stackone-defender/commit/a03c757a1760b797d9a3ef444950e2839ca1c52d))
|
|
72
|
+
* update README — enable_tier2 defaults to True ([af0d059](https://github.com/StackOneHQ/stackone-defender/commit/af0d05957e39a83b7e6e18b1f78b95219b14a4f5))
|
|
73
|
+
* update README to reflect changes in package name and Python version ([d2fc2ca](https://github.com/StackOneHQ/stackone-defender/commit/d2fc2ca1900e2f6410df2ec075c5a8a1c3ac241b))
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
### Miscellaneous Chores
|
|
77
|
+
|
|
78
|
+
* prepare patch release 0.6.2 ([7b3c105](https://github.com/StackOneHQ/stackone-defender/commit/7b3c105b2ce23f88f284d72e41c1917aefdc4537))
|
|
79
|
+
|
|
80
|
+
## [0.6.1](https://github.com/StackOneHQ/stackone-defender/compare/stackone-defender-v0.1.2...stackone-defender-v0.6.1) (2026-04-21)
|
|
81
|
+
|
|
82
|
+
### Features
|
|
83
|
+
|
|
84
|
+
* align Python package behavior with `@stackone/defender` 0.6.1
|
|
85
|
+
* add SFE preprocessing support (`use_sfe`) with fail-open optional runtime loading
|
|
86
|
+
* add packed-chunk Tier 2 batching and density-adjusted scoring
|
|
87
|
+
* add dangerous-key traversal hardening (`__proto__`, `constructor`, `prototype`)
|
|
88
|
+
* add cumulative-risk fractional thresholds to reduce list-response false positives
|
|
89
|
+
|
|
90
|
+
### Bug Fixes
|
|
91
|
+
|
|
92
|
+
* use `fasttext-ng` instead of `fasttext-wheel` for the `[sfe]` extra and dev tests so Python 3.13 CI can install maintained FastText bindings (NumPy 2.3+).
|
|
93
|
+
|
|
94
|
+
### Breaking Changes
|
|
95
|
+
|
|
96
|
+
* Python package version jumps from `0.1.2` to `0.6.1` to align release train with TypeScript parity.
|
|
97
|
+
* `DefenseResult` now includes `fields_dropped` and `truncated_at_depth`.
|
|
98
|
+
|
|
99
|
+
## [0.1.2](https://github.com/StackOneHQ/stackone-defender/compare/stackone-defender-v0.1.1...stackone-defender-v0.1.2) (2026-04-08)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
### Bug Fixes
|
|
103
|
+
|
|
104
|
+
* upgrade ML classifier to jbv2 (AgentShield 73.7 → 79.8) ([b452b39](https://github.com/StackOneHQ/stackone-defender/commit/b452b39c718329355f50c418bd50c37da2ed8698))
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
### Documentation
|
|
108
|
+
|
|
109
|
+
* update README to reflect changes in package name and Python version ([d2fc2ca](https://github.com/StackOneHQ/stackone-defender/commit/d2fc2ca1900e2f6410df2ec075c5a8a1c3ac241b))
|
|
110
|
+
|
|
111
|
+
## [0.1.1](https://github.com/StackOneHQ/stackone-defender/compare/stackone-defender-v0.1.0...stackone-defender-v0.1.1) (2026-04-08)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
### Features
|
|
115
|
+
|
|
116
|
+
* add missing functions for full TS API parity ([aec0c5b](https://github.com/StackOneHQ/stackone-defender/commit/aec0c5b8d31715df7e4ec2e4d306b55d595bb1c3))
|
|
117
|
+
* add PyPI publishing setup with Release Please CI ([2e28373](https://github.com/StackOneHQ/stackone-defender/commit/2e28373a27315dbb5e7deb23621977fe7fa2f7bc))
|
|
118
|
+
* add tier2_fields filter and export ToolSanitizationRule ([cb7fd93](https://github.com/StackOneHQ/stackone-defender/commit/cb7fd93fb88a30f40edc171ef3fcdc5d6ce2534d))
|
|
119
|
+
* **ENG-12402:** add PyPI publishing setup with Release Please CI ([f979748](https://github.com/StackOneHQ/stackone-defender/commit/f979748a8a3b2084ea241c352866adcfcd0145ea))
|
|
120
|
+
* port stackone-defender from TypeScript to Python ([e3ff70d](https://github.com/StackOneHQ/stackone-defender/commit/e3ff70dd6a0bc94578dc4dbfde87c5d75f00b7b8))
|
|
121
|
+
* **sanitizer:** remove dead use_tier2_classification from ToolResultSanitizer ([4646179](https://github.com/StackOneHQ/stackone-defender/commit/46461798fcf5acc6ac6e23bc65177c35d9353d9c))
|
|
122
|
+
* sync Python package with TypeScript parity ([e1836dd](https://github.com/StackOneHQ/stackone-defender/commit/e1836dd967ad23997983ef1607118d1a25807e1c))
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
### Bug Fixes
|
|
126
|
+
|
|
127
|
+
* **classifier:** surface classification errors in classify_by_sentence skip_reason ([bd94639](https://github.com/StackOneHQ/stackone-defender/commit/bd9463978dac5572f999d8ec3ed1adbaf0bb97f2))
|
|
128
|
+
* **defender:** fix _extract_strings filtering, None checks, and cache ONNX load failure ([bf4ce99](https://github.com/StackOneHQ/stackone-defender/commit/bf4ce993287db9e067b661100b5bd92cc21aef6b))
|
|
129
|
+
* **defender:** sync hasThreats blocking logic and tool rules precedence from JS package ([a217c3e](https://github.com/StackOneHQ/stackone-defender/commit/a217c3ef27aa0e4d92f21571bf0559ff9906f660))
|
|
130
|
+
* enable tier2 by default to match TypeScript package ([f1fe990](https://github.com/StackOneHQ/stackone-defender/commit/f1fe990e1a81c32cb271f6ca85cc063f3da49223))
|
|
131
|
+
* sync Python with TypeScript parity ([cec0813](https://github.com/StackOneHQ/stackone-defender/commit/cec0813ff8cc98f4502d5916d285a28877983d98))
|
|
132
|
+
* use uv instead of pip in README installation instructions ([519759f](https://github.com/StackOneHQ/stackone-defender/commit/519759f09c6fc1eb6bf97f53ad0cbd25c78e2893))
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
### Documentation
|
|
136
|
+
|
|
137
|
+
* add README adapted from TypeScript package ([a03c757](https://github.com/StackOneHQ/stackone-defender/commit/a03c757a1760b797d9a3ef444950e2839ca1c52d))
|
|
138
|
+
|
|
139
|
+
## Changelog
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: stackone-defender
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.6.3
|
|
4
4
|
Summary: Indirect prompt injection defense for AI agents using tool calls
|
|
5
5
|
Project-URL: Homepage, https://github.com/StackOneHQ/stackone-defender
|
|
6
6
|
Project-URL: Repository, https://github.com/StackOneHQ/stackone-defender
|
|
@@ -20,6 +20,8 @@ Provides-Extra: onnx
|
|
|
20
20
|
Requires-Dist: numpy>=1.24.0; extra == 'onnx'
|
|
21
21
|
Requires-Dist: onnxruntime>=1.16.0; extra == 'onnx'
|
|
22
22
|
Requires-Dist: tokenizers>=0.15.0; extra == 'onnx'
|
|
23
|
+
Provides-Extra: sfe
|
|
24
|
+
Requires-Dist: fasttext-ng>=0.9.3; extra == 'sfe'
|
|
23
25
|
Description-Content-Type: text/markdown
|
|
24
26
|
|
|
25
27
|
<div align="center">
|
|
@@ -74,6 +76,15 @@ pip install stackone-defender[onnx]
|
|
|
74
76
|
|
|
75
77
|
The ONNX model (~22MB) is bundled in the wheel — no extra downloads at runtime.
|
|
76
78
|
|
|
79
|
+
**SFE preprocessor (optional)** — add extras:
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
pip install stackone-defender[sfe]
|
|
83
|
+
# or: uv add "stackone-defender[sfe]"
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
The `[sfe]` extra installs [`fasttext-ng`](https://pypi.org/project/fasttext-ng/) (provides the `fasttext` module). It requires **NumPy 2.3+**. PyPI may ship a wheel only for some platforms; otherwise pip/uv builds from source (needs a C++ toolchain).
|
|
87
|
+
|
|
77
88
|
## Quick start
|
|
78
89
|
|
|
79
90
|
```python
|
|
@@ -109,15 +120,22 @@ else:
|
|
|
109
120
|
- **Role stripping** — `SYSTEM:`, `ASSISTANT:`, `<system>`, `[INST]`, etc.
|
|
110
121
|
- **Pattern removal** — phrases like “ignore previous instructions”
|
|
111
122
|
- **Encoding detection** — suspicious Base64/URL-shaped payloads
|
|
112
|
-
- **Boundary annotation** — `[UD-{id}]…[/UD-{id}]` wrappers
|
|
123
|
+
- **Boundary annotation (opt-in)** — `[UD-{id}]…[/UD-{id}]` wrappers when `annotate_boundary=True` (npm: `annotateBoundary`). Use `generate_boundary_instructions` from the package root in prompts when you enable wrapping.
|
|
113
124
|
|
|
114
125
|
### Tier 2 — ML classification (ONNX)
|
|
115
126
|
|
|
116
|
-
|
|
127
|
+
Packed-chunk MiniLM classifier (int8 ONNX ~22 MB, bundled):
|
|
117
128
|
|
|
118
|
-
- Split text into sentences,
|
|
129
|
+
- Split text into sentences, pack to model-sized chunks, score chunks in batched ONNX calls
|
|
119
130
|
- Catches paraphrased or novel injections missed by regex
|
|
120
|
-
-
|
|
131
|
+
- Uses chunked batch inference to bound memory on large payloads
|
|
132
|
+
|
|
133
|
+
### Optional SFE preprocessor
|
|
134
|
+
|
|
135
|
+
- `use_sfe=True` runs a field-level FastText pass to build a **classifier-only** view of the payload
|
|
136
|
+
- **Tier 1** always sanitizes the **original** tool value; **`sanitized`** in `DefenseResult` is unchanged by SFE drops
|
|
137
|
+
- **Tier 2** extracts strings from the SFE-filtered tree; `fields_dropped` lists paths omitted from that extraction (not removed from `sanitized`)
|
|
138
|
+
- Fails open if the runtime/model is unavailable: payload continues unfiltered
|
|
121
139
|
|
|
122
140
|
**Benchmarks** (F1 @ threshold 0.5):
|
|
123
141
|
|
|
@@ -149,7 +167,9 @@ defense = create_prompt_defense(
|
|
|
149
167
|
enable_tier2=True,
|
|
150
168
|
block_high_risk=False,
|
|
151
169
|
default_risk_level="medium",
|
|
152
|
-
|
|
170
|
+
annotate_boundary=False, # True: wrap risky strings with [UD-…] tags (npm: annotateBoundary)
|
|
171
|
+
tier2_fields=["subject", "body", "snippet"], # optional: scope Tier 2 to these JSON keys (default: all strings)
|
|
172
|
+
use_sfe=True, # optional: enable semantic field extractor preprocessing
|
|
153
173
|
config={
|
|
154
174
|
"tier2": {
|
|
155
175
|
"high_risk_threshold": 0.8,
|
|
@@ -161,9 +181,11 @@ defense = create_prompt_defense(
|
|
|
161
181
|
|
|
162
182
|
### `defense.defend_tool_result(value, tool_name)`
|
|
163
183
|
|
|
164
|
-
Runs Tier 1 sanitization on risky fields, then Tier 2 on
|
|
184
|
+
Runs Tier 1 sanitization on risky fields of the **original** payload, then Tier 2 on strings from the SFE-filtered view when SFE is on (otherwise the full value). Optional `tier2_fields` restricts Tier 2 extraction to specific keys; omit it to classify **all** strings (matches `@stackone/defender` 0.6.3). **Synchronous** — no `await`.
|
|
165
185
|
|
|
166
186
|
```python
|
|
187
|
+
from dataclasses import dataclass, field
|
|
188
|
+
|
|
167
189
|
@dataclass
|
|
168
190
|
class DefenseResult:
|
|
169
191
|
allowed: bool
|
|
@@ -175,6 +197,8 @@ class DefenseResult:
|
|
|
175
197
|
tier2_score: float | None = None
|
|
176
198
|
tier2_skip_reason: str | None = None
|
|
177
199
|
max_sentence: str | None = None
|
|
200
|
+
fields_dropped: list[str] = field(default_factory=list)
|
|
201
|
+
truncated_at_depth: bool | None = None
|
|
178
202
|
latency_ms: float = 0.0
|
|
179
203
|
```
|
|
180
204
|
|
|
@@ -50,6 +50,15 @@ pip install stackone-defender[onnx]
|
|
|
50
50
|
|
|
51
51
|
The ONNX model (~22MB) is bundled in the wheel — no extra downloads at runtime.
|
|
52
52
|
|
|
53
|
+
**SFE preprocessor (optional)** — add extras:
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
pip install stackone-defender[sfe]
|
|
57
|
+
# or: uv add "stackone-defender[sfe]"
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
The `[sfe]` extra installs [`fasttext-ng`](https://pypi.org/project/fasttext-ng/) (provides the `fasttext` module). It requires **NumPy 2.3+**. PyPI may ship a wheel only for some platforms; otherwise pip/uv builds from source (needs a C++ toolchain).
|
|
61
|
+
|
|
53
62
|
## Quick start
|
|
54
63
|
|
|
55
64
|
```python
|
|
@@ -85,15 +94,22 @@ else:
|
|
|
85
94
|
- **Role stripping** — `SYSTEM:`, `ASSISTANT:`, `<system>`, `[INST]`, etc.
|
|
86
95
|
- **Pattern removal** — phrases like “ignore previous instructions”
|
|
87
96
|
- **Encoding detection** — suspicious Base64/URL-shaped payloads
|
|
88
|
-
- **Boundary annotation** — `[UD-{id}]…[/UD-{id}]` wrappers
|
|
97
|
+
- **Boundary annotation (opt-in)** — `[UD-{id}]…[/UD-{id}]` wrappers when `annotate_boundary=True` (npm: `annotateBoundary`). Use `generate_boundary_instructions` from the package root in prompts when you enable wrapping.
|
|
89
98
|
|
|
90
99
|
### Tier 2 — ML classification (ONNX)
|
|
91
100
|
|
|
92
|
-
|
|
101
|
+
Packed-chunk MiniLM classifier (int8 ONNX ~22 MB, bundled):
|
|
93
102
|
|
|
94
|
-
- Split text into sentences,
|
|
103
|
+
- Split text into sentences, pack to model-sized chunks, score chunks in batched ONNX calls
|
|
95
104
|
- Catches paraphrased or novel injections missed by regex
|
|
96
|
-
-
|
|
105
|
+
- Uses chunked batch inference to bound memory on large payloads
|
|
106
|
+
|
|
107
|
+
### Optional SFE preprocessor
|
|
108
|
+
|
|
109
|
+
- `use_sfe=True` runs a field-level FastText pass to build a **classifier-only** view of the payload
|
|
110
|
+
- **Tier 1** always sanitizes the **original** tool value; **`sanitized`** in `DefenseResult` is unchanged by SFE drops
|
|
111
|
+
- **Tier 2** extracts strings from the SFE-filtered tree; `fields_dropped` lists paths omitted from that extraction (not removed from `sanitized`)
|
|
112
|
+
- Fails open if the runtime/model is unavailable: payload continues unfiltered
|
|
97
113
|
|
|
98
114
|
**Benchmarks** (F1 @ threshold 0.5):
|
|
99
115
|
|
|
@@ -125,7 +141,9 @@ defense = create_prompt_defense(
|
|
|
125
141
|
enable_tier2=True,
|
|
126
142
|
block_high_risk=False,
|
|
127
143
|
default_risk_level="medium",
|
|
128
|
-
|
|
144
|
+
annotate_boundary=False, # True: wrap risky strings with [UD-…] tags (npm: annotateBoundary)
|
|
145
|
+
tier2_fields=["subject", "body", "snippet"], # optional: scope Tier 2 to these JSON keys (default: all strings)
|
|
146
|
+
use_sfe=True, # optional: enable semantic field extractor preprocessing
|
|
129
147
|
config={
|
|
130
148
|
"tier2": {
|
|
131
149
|
"high_risk_threshold": 0.8,
|
|
@@ -137,9 +155,11 @@ defense = create_prompt_defense(
|
|
|
137
155
|
|
|
138
156
|
### `defense.defend_tool_result(value, tool_name)`
|
|
139
157
|
|
|
140
|
-
Runs Tier 1 sanitization on risky fields, then Tier 2 on
|
|
158
|
+
Runs Tier 1 sanitization on risky fields of the **original** payload, then Tier 2 on strings from the SFE-filtered view when SFE is on (otherwise the full value). Optional `tier2_fields` restricts Tier 2 extraction to specific keys; omit it to classify **all** strings (matches `@stackone/defender` 0.6.3). **Synchronous** — no `await`.
|
|
141
159
|
|
|
142
160
|
```python
|
|
161
|
+
from dataclasses import dataclass, field
|
|
162
|
+
|
|
143
163
|
@dataclass
|
|
144
164
|
class DefenseResult:
|
|
145
165
|
allowed: bool
|
|
@@ -151,6 +171,8 @@ class DefenseResult:
|
|
|
151
171
|
tier2_score: float | None = None
|
|
152
172
|
tier2_skip_reason: str | None = None
|
|
153
173
|
max_sentence: str | None = None
|
|
174
|
+
fields_dropped: list[str] = field(default_factory=list)
|
|
175
|
+
truncated_at_depth: bool | None = None
|
|
154
176
|
latency_ms: float = 0.0
|
|
155
177
|
```
|
|
156
178
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "stackone-defender"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.6.3"
|
|
4
4
|
description = "Indirect prompt injection defense for AI agents using tool calls"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
requires-python = ">=3.11"
|
|
@@ -25,6 +25,9 @@ Repository = "https://github.com/StackOneHQ/stackone-defender"
|
|
|
25
25
|
|
|
26
26
|
[project.optional-dependencies]
|
|
27
27
|
onnx = ["onnxruntime>=1.16.0", "tokenizers>=0.15.0", "numpy>=1.24.0"]
|
|
28
|
+
# fasttext-ng provides the `fasttext` module (maintained bindings; supports 3.13).
|
|
29
|
+
# Pulls numpy>=2.3; SFE still fail-opens when import/load fails.
|
|
30
|
+
sfe = ["fasttext-ng>=0.9.3"]
|
|
28
31
|
|
|
29
32
|
[dependency-groups]
|
|
30
33
|
dev = [
|
|
@@ -32,6 +35,7 @@ dev = [
|
|
|
32
35
|
"onnxruntime>=1.16.0",
|
|
33
36
|
"tokenizers>=0.15.0",
|
|
34
37
|
"numpy>=1.24.0",
|
|
38
|
+
"fasttext-ng>=0.9.3",
|
|
35
39
|
]
|
|
36
40
|
|
|
37
41
|
[build-system]
|
|
@@ -12,12 +12,29 @@ Usage:
|
|
|
12
12
|
"""
|
|
13
13
|
|
|
14
14
|
from .core.prompt_defense import PromptDefense, create_prompt_defense
|
|
15
|
+
from .utils.boundary import contains_boundary_patterns, generate_boundary_instructions
|
|
16
|
+
from .sfe.preprocess import (
|
|
17
|
+
DropDecision,
|
|
18
|
+
SfePredictor,
|
|
19
|
+
SfePreprocessResult,
|
|
20
|
+
get_default_predictor,
|
|
21
|
+
get_default_sfe_model_path,
|
|
22
|
+
sfe_preprocess,
|
|
23
|
+
)
|
|
15
24
|
from .types import DefenseResult, RiskLevel, Tier1Result
|
|
16
25
|
|
|
17
26
|
__all__ = [
|
|
18
27
|
"DefenseResult",
|
|
28
|
+
"DropDecision",
|
|
19
29
|
"PromptDefense",
|
|
20
30
|
"RiskLevel",
|
|
31
|
+
"SfePredictor",
|
|
32
|
+
"SfePreprocessResult",
|
|
21
33
|
"Tier1Result",
|
|
34
|
+
"contains_boundary_patterns",
|
|
22
35
|
"create_prompt_defense",
|
|
36
|
+
"generate_boundary_instructions",
|
|
37
|
+
"get_default_predictor",
|
|
38
|
+
"get_default_sfe_model_path",
|
|
39
|
+
"sfe_preprocess",
|
|
23
40
|
]
|
|
@@ -37,6 +37,8 @@ def _sigmoid(x: float) -> float:
|
|
|
37
37
|
class OnnxClassifier:
|
|
38
38
|
"""ONNX Classifier for fine-tuned MiniLM models."""
|
|
39
39
|
|
|
40
|
+
_MAX_BATCH_CHUNK = 32
|
|
41
|
+
|
|
40
42
|
def __init__(self, model_path: str | None = None):
|
|
41
43
|
self._model_path = model_path or _default_model_path()
|
|
42
44
|
self._session = None
|
|
@@ -105,10 +107,17 @@ class OnnxClassifier:
|
|
|
105
107
|
return _sigmoid(logit)
|
|
106
108
|
|
|
107
109
|
def classify_batch(self, texts: list[str]) -> list[float]:
|
|
108
|
-
"""Classify multiple texts in batch."""
|
|
110
|
+
"""Classify multiple texts in batch, bounded by chunk size."""
|
|
109
111
|
if not texts:
|
|
110
112
|
return []
|
|
111
113
|
self._ensure_loaded()
|
|
114
|
+
all_scores: list[float] = []
|
|
115
|
+
for offset in range(0, len(texts), self._MAX_BATCH_CHUNK):
|
|
116
|
+
chunk = texts[offset: offset + self._MAX_BATCH_CHUNK]
|
|
117
|
+
all_scores.extend(self._classify_batch_chunk(chunk))
|
|
118
|
+
return all_scores
|
|
119
|
+
|
|
120
|
+
def _classify_batch_chunk(self, texts: list[str]) -> list[float]:
|
|
112
121
|
import numpy as np
|
|
113
122
|
|
|
114
123
|
encodings = self._tokenizer.encode_batch(texts)
|
|
@@ -119,6 +128,15 @@ class OnnxClassifier:
|
|
|
119
128
|
logits = results[0]
|
|
120
129
|
return [_sigmoid(float(logits[i][0])) for i in range(len(texts))]
|
|
121
130
|
|
|
131
|
+
def count_tokens(self, text: str) -> int:
|
|
132
|
+
self._ensure_loaded()
|
|
133
|
+
encoding = self._tokenizer.encode(text)
|
|
134
|
+
# Padding is enabled at a fixed length; count only real (attended) tokens.
|
|
135
|
+
return int(sum(encoding.attention_mask))
|
|
136
|
+
|
|
137
|
+
def get_max_length(self) -> int:
|
|
138
|
+
return self._max_length
|
|
139
|
+
|
|
122
140
|
def warmup(self) -> None:
|
|
123
141
|
self.load_model()
|
|
124
142
|
|