stackone-defender 0.1.1__tar.gz → 0.6.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- stackone_defender-0.6.2/.release-please-manifest.json +1 -0
- stackone_defender-0.6.2/CHANGELOG.md +117 -0
- stackone_defender-0.6.2/PKG-INFO +269 -0
- stackone_defender-0.6.2/README.md +243 -0
- stackone_defender-0.6.2/models/minilm-full-aug/config.json +30 -0
- {stackone_defender-0.1.1 → stackone_defender-0.6.2}/models/minilm-full-aug/model_quantized.onnx +0 -0
- stackone_defender-0.6.2/models/minilm-full-aug/tokenizer.json +30686 -0
- stackone_defender-0.6.2/models/minilm-full-aug/tokenizer_config.json +23 -0
- {stackone_defender-0.1.1 → stackone_defender-0.6.2}/pyproject.toml +5 -1
- {stackone_defender-0.1.1 → stackone_defender-0.6.2}/src/stackone_defender/__init__.py +15 -2
- stackone_defender-0.6.2/src/stackone_defender/classifiers/onnx_classifier.py +148 -0
- stackone_defender-0.6.2/src/stackone_defender/classifiers/tier2_classifier.py +291 -0
- {stackone_defender-0.1.1 → stackone_defender-0.6.2}/src/stackone_defender/config.py +51 -56
- stackone_defender-0.6.2/src/stackone_defender/core/prompt_defense.py +313 -0
- {stackone_defender-0.1.1 → stackone_defender-0.6.2}/src/stackone_defender/core/tool_result_sanitizer.py +85 -51
- stackone_defender-0.6.2/src/stackone_defender/models/minilm-full-aug/config.json +30 -0
- {stackone_defender-0.1.1 → stackone_defender-0.6.2}/src/stackone_defender/models/minilm-full-aug/model_quantized.onnx +0 -0
- stackone_defender-0.6.2/src/stackone_defender/models/minilm-full-aug/tokenizer.json +30686 -0
- stackone_defender-0.6.2/src/stackone_defender/models/minilm-full-aug/tokenizer_config.json +23 -0
- stackone_defender-0.6.2/src/stackone_defender/sfe/__init__.py +19 -0
- stackone_defender-0.6.2/src/stackone_defender/sfe/model.ftz +0 -0
- stackone_defender-0.6.2/src/stackone_defender/sfe/preprocess.py +232 -0
- {stackone_defender-0.1.1 → stackone_defender-0.6.2}/src/stackone_defender/types.py +25 -14
- {stackone_defender-0.1.1 → stackone_defender-0.6.2}/src/stackone_defender/utils/__init__.py +1 -3
- {stackone_defender-0.1.1 → stackone_defender-0.6.2}/src/stackone_defender/utils/field_detection.py +1 -24
- {stackone_defender-0.1.1 → stackone_defender-0.6.2}/tests/test_integration.py +109 -53
- {stackone_defender-0.1.1 → stackone_defender-0.6.2}/tests/test_onnx_classifier.py +34 -0
- stackone_defender-0.6.2/tests/test_sfe.py +43 -0
- {stackone_defender-0.1.1 → stackone_defender-0.6.2}/tests/test_tier2_classifier.py +19 -0
- {stackone_defender-0.1.1 → stackone_defender-0.6.2}/tests/test_utils.py +1 -21
- {stackone_defender-0.1.1 → stackone_defender-0.6.2}/uv.lock +27 -156
- stackone_defender-0.1.1/.release-please-manifest.json +0 -1
- stackone_defender-0.1.1/CHANGELOG.md +0 -31
- stackone_defender-0.1.1/PKG-INFO +0 -229
- stackone_defender-0.1.1/README.md +0 -205
- stackone_defender-0.1.1/models/minilm-full-aug/config.json +0 -28
- stackone_defender-0.1.1/models/minilm-full-aug/tokenizer.json +0 -30678
- stackone_defender-0.1.1/models/minilm-full-aug/tokenizer_config.json +0 -16
- stackone_defender-0.1.1/src/stackone_defender/classifiers/onnx_classifier.py +0 -95
- stackone_defender-0.1.1/src/stackone_defender/classifiers/tier2_classifier.py +0 -164
- stackone_defender-0.1.1/src/stackone_defender/core/prompt_defense.py +0 -197
- stackone_defender-0.1.1/src/stackone_defender/models/minilm-full-aug/config.json +0 -28
- stackone_defender-0.1.1/src/stackone_defender/models/minilm-full-aug/tokenizer.json +0 -30678
- stackone_defender-0.1.1/src/stackone_defender/models/minilm-full-aug/tokenizer_config.json +0 -16
- {stackone_defender-0.1.1 → stackone_defender-0.6.2}/.github/workflows/ci.yaml +0 -0
- {stackone_defender-0.1.1 → stackone_defender-0.6.2}/.github/workflows/release.yaml +0 -0
- {stackone_defender-0.1.1 → stackone_defender-0.6.2}/.gitignore +0 -0
- {stackone_defender-0.1.1 → stackone_defender-0.6.2}/.python-version +0 -0
- {stackone_defender-0.1.1 → stackone_defender-0.6.2}/.release-please-config.json +0 -0
- {stackone_defender-0.1.1 → stackone_defender-0.6.2}/src/stackone_defender/classifiers/__init__.py +0 -0
- {stackone_defender-0.1.1 → stackone_defender-0.6.2}/src/stackone_defender/classifiers/pattern_detector.py +0 -0
- {stackone_defender-0.1.1 → stackone_defender-0.6.2}/src/stackone_defender/classifiers/patterns.py +0 -0
- {stackone_defender-0.1.1 → stackone_defender-0.6.2}/src/stackone_defender/core/__init__.py +0 -0
- {stackone_defender-0.1.1 → stackone_defender-0.6.2}/src/stackone_defender/sanitizers/__init__.py +0 -0
- {stackone_defender-0.1.1 → stackone_defender-0.6.2}/src/stackone_defender/sanitizers/encoding_detector.py +0 -0
- {stackone_defender-0.1.1 → stackone_defender-0.6.2}/src/stackone_defender/sanitizers/normalizer.py +0 -0
- {stackone_defender-0.1.1 → stackone_defender-0.6.2}/src/stackone_defender/sanitizers/pattern_remover.py +0 -0
- {stackone_defender-0.1.1 → stackone_defender-0.6.2}/src/stackone_defender/sanitizers/role_stripper.py +0 -0
- {stackone_defender-0.1.1 → stackone_defender-0.6.2}/src/stackone_defender/sanitizers/sanitizer.py +0 -0
- {stackone_defender-0.1.1 → stackone_defender-0.6.2}/src/stackone_defender/utils/boundary.py +0 -0
- {stackone_defender-0.1.1 → stackone_defender-0.6.2}/src/stackone_defender/utils/structure.py +0 -0
- {stackone_defender-0.1.1 → stackone_defender-0.6.2}/tests/__init__.py +0 -0
- {stackone_defender-0.1.1 → stackone_defender-0.6.2}/tests/test_pattern_detector.py +0 -0
- {stackone_defender-0.1.1 → stackone_defender-0.6.2}/tests/test_sanitizers.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{".":"0.6.2"}
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
## [0.6.2](https://github.com/StackOneHQ/stackone-defender/compare/stackone-defender-v0.6.1...stackone-defender-v0.6.2) (2026-04-22)
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
### ⚠ BREAKING CHANGES
|
|
7
|
+
|
|
8
|
+
* Drop ToolSanitizationRule, config/sanitizer tool_rules, use_default_tool_rules, and get_tool_rule/should_skip_field. Matches @stackone/defender post ENG-12594.
|
|
9
|
+
|
|
10
|
+
### Features
|
|
11
|
+
|
|
12
|
+
* add missing functions for full TS API parity ([aec0c5b](https://github.com/StackOneHQ/stackone-defender/commit/aec0c5b8d31715df7e4ec2e4d306b55d595bb1c3))
|
|
13
|
+
* add PyPI publishing setup with Release Please CI ([2e28373](https://github.com/StackOneHQ/stackone-defender/commit/2e28373a27315dbb5e7deb23621977fe7fa2f7bc))
|
|
14
|
+
* add tier2_fields filter and export ToolSanitizationRule ([cb7fd93](https://github.com/StackOneHQ/stackone-defender/commit/cb7fd93fb88a30f40edc171ef3fcdc5d6ce2534d))
|
|
15
|
+
* align Python defender with Node (Tier 2 scoping, ONNX cache) ([482bfdd](https://github.com/StackOneHQ/stackone-defender/commit/482bfdda59b4617a75bc261621984cc321d28989))
|
|
16
|
+
* **ENG-12402:** add PyPI publishing setup with Release Please CI ([f979748](https://github.com/StackOneHQ/stackone-defender/commit/f979748a8a3b2084ea241c352866adcfcd0145ea))
|
|
17
|
+
* **ENG-12699:** TypeScript parity and synced ONNX bundle ([0449800](https://github.com/StackOneHQ/stackone-defender/commit/0449800fc2375c89ef231f5671f9a74bd84d3388))
|
|
18
|
+
* port stackone-defender from TypeScript to Python ([e3ff70d](https://github.com/StackOneHQ/stackone-defender/commit/e3ff70dd6a0bc94578dc4dbfde87c5d75f00b7b8))
|
|
19
|
+
* remove tool rules; batch Tier2 ONNX; lock ONNX load ([26c95c2](https://github.com/StackOneHQ/stackone-defender/commit/26c95c257175c892ae4be82ab7c17a099c1b6c6e))
|
|
20
|
+
* **sanitizer:** remove dead use_tier2_classification from ToolResultSanitizer ([4646179](https://github.com/StackOneHQ/stackone-defender/commit/46461798fcf5acc6ac6e23bc65177c35d9353d9c))
|
|
21
|
+
* sync Python package with TypeScript parity ([e1836dd](https://github.com/StackOneHQ/stackone-defender/commit/e1836dd967ad23997983ef1607118d1a25807e1c))
|
|
22
|
+
* upgrade ML classifier to jbv2 model (AgentShield 73.7 → 79.8) ([bcd27f8](https://github.com/StackOneHQ/stackone-defender/commit/bcd27f8abf954700276249f9b03de34f733c67c4))
|
|
23
|
+
* upgrade ML classifier to jbv5 (AgentShield 79.8 → 81.1) ([781dd10](https://github.com/StackOneHQ/stackone-defender/commit/781dd1007e7a0db03d58619a23b69f1b5d73e85d))
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
### Bug Fixes
|
|
27
|
+
|
|
28
|
+
* address Copilot/cubic review (Tier2 scope, tokens, SFE, thresholds) ([bf173ac](https://github.com/StackOneHQ/stackone-defender/commit/bf173ac42f6aaa7513ea2a1fc19083806a5c5ee1))
|
|
29
|
+
* **ci:** avoid fasttext-wheel on Python 3.13 ([a6cda76](https://github.com/StackOneHQ/stackone-defender/commit/a6cda76894e3cd240c4f104e701e3202babb2682))
|
|
30
|
+
* **classifier:** surface classification errors in classify_by_sentence skip_reason ([bd94639](https://github.com/StackOneHQ/stackone-defender/commit/bd9463978dac5572f999d8ec3ed1adbaf0bb97f2))
|
|
31
|
+
* default enable_tier2 to True to match TypeScript SDK behaviour ([d66773b](https://github.com/StackOneHQ/stackone-defender/commit/d66773bee026517d09dd56b9311dd3c281c6f675))
|
|
32
|
+
* **defender:** fix _extract_strings filtering, None checks, and cache ONNX load failure ([bf4ce99](https://github.com/StackOneHQ/stackone-defender/commit/bf4ce993287db9e067b661100b5bd92cc21aef6b))
|
|
33
|
+
* **defender:** sync hasThreats blocking logic and tool rules precedence from JS package ([a217c3e](https://github.com/StackOneHQ/stackone-defender/commit/a217c3ef27aa0e4d92f21571bf0559ff9906f660))
|
|
34
|
+
* enable tier2 by default to match TypeScript package ([f1fe990](https://github.com/StackOneHQ/stackone-defender/commit/f1fe990e1a81c32cb271f6ca85cc063f3da49223))
|
|
35
|
+
* sync Python with TypeScript parity ([cec0813](https://github.com/StackOneHQ/stackone-defender/commit/cec0813ff8cc98f4502d5916d285a28877983d98))
|
|
36
|
+
* **tier2:** apply max_text_length truncation in classify_by_sentence ([a67d2c6](https://github.com/StackOneHQ/stackone-defender/commit/a67d2c6524fb1d6b4f9331f547f28221867038de))
|
|
37
|
+
* upgrade ML classifier to jbv2 (AgentShield 73.7 → 79.8) ([b452b39](https://github.com/StackOneHQ/stackone-defender/commit/b452b39c718329355f50c418bd50c37da2ed8698))
|
|
38
|
+
* upgrade ML classifier to jbv2 (AgentShield 73.7 → 79.8) ([ccb1204](https://github.com/StackOneHQ/stackone-defender/commit/ccb1204d5e3d9763bb916d71bb49b75039ceb197))
|
|
39
|
+
* use uv instead of pip in README installation instructions ([519759f](https://github.com/StackOneHQ/stackone-defender/commit/519759f09c6fc1eb6bf97f53ad0cbd25c78e2893))
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
### Dependencies
|
|
43
|
+
|
|
44
|
+
* **sfe:** switch optional FastText bindings to fasttext-ng ([bc9cc28](https://github.com/StackOneHQ/stackone-defender/commit/bc9cc283bc2da9f10472415d4aa94a0df083ec3d))
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
### Documentation
|
|
48
|
+
|
|
49
|
+
* add README adapted from TypeScript package ([a03c757](https://github.com/StackOneHQ/stackone-defender/commit/a03c757a1760b797d9a3ef444950e2839ca1c52d))
|
|
50
|
+
* update README — enable_tier2 defaults to True ([af0d059](https://github.com/StackOneHQ/stackone-defender/commit/af0d05957e39a83b7e6e18b1f78b95219b14a4f5))
|
|
51
|
+
* update README to reflect changes in package name and Python version ([d2fc2ca](https://github.com/StackOneHQ/stackone-defender/commit/d2fc2ca1900e2f6410df2ec075c5a8a1c3ac241b))
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
### Miscellaneous Chores
|
|
55
|
+
|
|
56
|
+
* prepare patch release 0.6.2 ([7b3c105](https://github.com/StackOneHQ/stackone-defender/commit/7b3c105b2ce23f88f284d72e41c1917aefdc4537))
|
|
57
|
+
|
|
58
|
+
## [0.6.1](https://github.com/StackOneHQ/stackone-defender/compare/stackone-defender-v0.1.2...stackone-defender-v0.6.1) (2026-04-21)
|
|
59
|
+
|
|
60
|
+
### Features
|
|
61
|
+
|
|
62
|
+
* align Python package behavior with `@stackone/defender` 0.6.1
|
|
63
|
+
* add SFE preprocessing support (`use_sfe`) with fail-open optional runtime loading
|
|
64
|
+
* add packed-chunk Tier 2 batching and density-adjusted scoring
|
|
65
|
+
* add dangerous-key traversal hardening (`__proto__`, `constructor`, `prototype`)
|
|
66
|
+
* add cumulative-risk fractional thresholds to reduce list-response false positives
|
|
67
|
+
|
|
68
|
+
### Bug Fixes
|
|
69
|
+
|
|
70
|
+
* use `fasttext-ng` instead of `fasttext-wheel` for the `[sfe]` extra and dev tests so Python 3.13 CI can install maintained FastText bindings (NumPy 2.3+).
|
|
71
|
+
|
|
72
|
+
### Breaking Changes
|
|
73
|
+
|
|
74
|
+
* Python package version jumps from `0.1.2` to `0.6.1` to align release train with TypeScript parity.
|
|
75
|
+
* `DefenseResult` now includes `fields_dropped` and `truncated_at_depth`.
|
|
76
|
+
|
|
77
|
+
## [0.1.2](https://github.com/StackOneHQ/stackone-defender/compare/stackone-defender-v0.1.1...stackone-defender-v0.1.2) (2026-04-08)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
### Bug Fixes
|
|
81
|
+
|
|
82
|
+
* upgrade ML classifier to jbv2 (AgentShield 73.7 → 79.8) ([b452b39](https://github.com/StackOneHQ/stackone-defender/commit/b452b39c718329355f50c418bd50c37da2ed8698))
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
### Documentation
|
|
86
|
+
|
|
87
|
+
* update README to reflect changes in package name and Python version ([d2fc2ca](https://github.com/StackOneHQ/stackone-defender/commit/d2fc2ca1900e2f6410df2ec075c5a8a1c3ac241b))
|
|
88
|
+
|
|
89
|
+
## [0.1.1](https://github.com/StackOneHQ/stackone-defender/compare/stackone-defender-v0.1.0...stackone-defender-v0.1.1) (2026-04-08)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
### Features
|
|
93
|
+
|
|
94
|
+
* add missing functions for full TS API parity ([aec0c5b](https://github.com/StackOneHQ/stackone-defender/commit/aec0c5b8d31715df7e4ec2e4d306b55d595bb1c3))
|
|
95
|
+
* add PyPI publishing setup with Release Please CI ([2e28373](https://github.com/StackOneHQ/stackone-defender/commit/2e28373a27315dbb5e7deb23621977fe7fa2f7bc))
|
|
96
|
+
* add tier2_fields filter and export ToolSanitizationRule ([cb7fd93](https://github.com/StackOneHQ/stackone-defender/commit/cb7fd93fb88a30f40edc171ef3fcdc5d6ce2534d))
|
|
97
|
+
* **ENG-12402:** add PyPI publishing setup with Release Please CI ([f979748](https://github.com/StackOneHQ/stackone-defender/commit/f979748a8a3b2084ea241c352866adcfcd0145ea))
|
|
98
|
+
* port stackone-defender from TypeScript to Python ([e3ff70d](https://github.com/StackOneHQ/stackone-defender/commit/e3ff70dd6a0bc94578dc4dbfde87c5d75f00b7b8))
|
|
99
|
+
* **sanitizer:** remove dead use_tier2_classification from ToolResultSanitizer ([4646179](https://github.com/StackOneHQ/stackone-defender/commit/46461798fcf5acc6ac6e23bc65177c35d9353d9c))
|
|
100
|
+
* sync Python package with TypeScript parity ([e1836dd](https://github.com/StackOneHQ/stackone-defender/commit/e1836dd967ad23997983ef1607118d1a25807e1c))
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
### Bug Fixes
|
|
104
|
+
|
|
105
|
+
* **classifier:** surface classification errors in classify_by_sentence skip_reason ([bd94639](https://github.com/StackOneHQ/stackone-defender/commit/bd9463978dac5572f999d8ec3ed1adbaf0bb97f2))
|
|
106
|
+
* **defender:** fix _extract_strings filtering, None checks, and cache ONNX load failure ([bf4ce99](https://github.com/StackOneHQ/stackone-defender/commit/bf4ce993287db9e067b661100b5bd92cc21aef6b))
|
|
107
|
+
* **defender:** sync hasThreats blocking logic and tool rules precedence from JS package ([a217c3e](https://github.com/StackOneHQ/stackone-defender/commit/a217c3ef27aa0e4d92f21571bf0559ff9906f660))
|
|
108
|
+
* enable tier2 by default to match TypeScript package ([f1fe990](https://github.com/StackOneHQ/stackone-defender/commit/f1fe990e1a81c32cb271f6ca85cc063f3da49223))
|
|
109
|
+
* sync Python with TypeScript parity ([cec0813](https://github.com/StackOneHQ/stackone-defender/commit/cec0813ff8cc98f4502d5916d285a28877983d98))
|
|
110
|
+
* use uv instead of pip in README installation instructions ([519759f](https://github.com/StackOneHQ/stackone-defender/commit/519759f09c6fc1eb6bf97f53ad0cbd25c78e2893))
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
### Documentation
|
|
114
|
+
|
|
115
|
+
* add README adapted from TypeScript package ([a03c757](https://github.com/StackOneHQ/stackone-defender/commit/a03c757a1760b797d9a3ef444950e2839ca1c52d))
|
|
116
|
+
|
|
117
|
+
## Changelog
|
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: stackone-defender
|
|
3
|
+
Version: 0.6.2
|
|
4
|
+
Summary: Indirect prompt injection defense for AI agents using tool calls
|
|
5
|
+
Project-URL: Homepage, https://github.com/StackOneHQ/stackone-defender
|
|
6
|
+
Project-URL: Repository, https://github.com/StackOneHQ/stackone-defender
|
|
7
|
+
Author-email: StackOne <support@stackone.com>
|
|
8
|
+
License: Apache-2.0
|
|
9
|
+
Classifier: Development Status :: 4 - Beta
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
16
|
+
Classifier: Topic :: Security
|
|
17
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
18
|
+
Requires-Python: >=3.11
|
|
19
|
+
Provides-Extra: onnx
|
|
20
|
+
Requires-Dist: numpy>=1.24.0; extra == 'onnx'
|
|
21
|
+
Requires-Dist: onnxruntime>=1.16.0; extra == 'onnx'
|
|
22
|
+
Requires-Dist: tokenizers>=0.15.0; extra == 'onnx'
|
|
23
|
+
Provides-Extra: sfe
|
|
24
|
+
Requires-Dist: fasttext-ng>=0.9.3; extra == 'sfe'
|
|
25
|
+
Description-Content-Type: text/markdown
|
|
26
|
+
|
|
27
|
+
<div align="center">
|
|
28
|
+
|
|
29
|
+
<picture>
|
|
30
|
+
<source media="(prefers-color-scheme: dark)" srcset="https://raw.githubusercontent.com/StackOneHQ/defender/main/assets/banner-dark.svg" />
|
|
31
|
+
<img src="https://raw.githubusercontent.com/StackOneHQ/defender/main/assets/banner-light.svg" alt="Defender by StackOne — Indirect prompt injection protection for MCP tool calls" width="800" />
|
|
32
|
+
</picture>
|
|
33
|
+
|
|
34
|
+
<p>
|
|
35
|
+
<a href="https://pypi.org/project/stackone-defender/"><img src="https://img.shields.io/pypi/v/stackone-defender?style=flat-square&color=047B43&label=pypi" alt="PyPI version" /></a>
|
|
36
|
+
<a href="https://github.com/StackOneHQ/stackone-defender/releases"><img src="https://img.shields.io/github/v/release/StackOneHQ/stackone-defender?style=flat-square&color=047B43&label=release" alt="latest GitHub release" /></a>
|
|
37
|
+
<a href="https://github.com/StackOneHQ/stackone-defender/stargazers"><img src="https://img.shields.io/github/stars/StackOneHQ/stackone-defender?style=flat-square&color=047B43" alt="GitHub stars" /></a>
|
|
38
|
+
<a href="./LICENSE"><img src="https://img.shields.io/pypi/l/stackone-defender?style=flat-square&color=047B43" alt="License" /></a>
|
|
39
|
+
<img src="https://img.shields.io/badge/Python-3.11+-047B43?style=flat-square" alt="Python 3.11+" />
|
|
40
|
+
</p>
|
|
41
|
+
<p>
|
|
42
|
+
<img src="https://img.shields.io/badge/model-22MB-047B43?style=flat-square" alt="Model size: 22MB" />
|
|
43
|
+
<img src="https://img.shields.io/badge/latency-~10ms-047B43?style=flat-square" alt="Latency: ~10ms" />
|
|
44
|
+
<img src="https://img.shields.io/badge/CPU--only-no%20GPU%20needed-047B43?style=flat-square" alt="CPU only" />
|
|
45
|
+
<img src="https://img.shields.io/badge/F1%20Score-90.8%25-047B43?style=flat-square" alt="F1 Score: 90.8%" />
|
|
46
|
+
</p>
|
|
47
|
+
|
|
48
|
+
</div>
|
|
49
|
+
|
|
50
|
+
---
|
|
51
|
+
|
|
52
|
+
Indirect prompt injection defense for AI agents using tool calls (MCP, CLI, or direct APIs). Detects and neutralizes attacks hidden in tool results (emails, documents, PRs, etc.) before they reach your LLM.
|
|
53
|
+
|
|
54
|
+
**Python package:** [`stackone-defender`](https://pypi.org/project/stackone-defender/) — aligned with [`@stackone/defender`](https://www.npmjs.com/package/@stackone/defender) on npm.
|
|
55
|
+
|
|
56
|
+
## Installation
|
|
57
|
+
|
|
58
|
+
**pip**
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
pip install stackone-defender
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
**uv**
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
uv add stackone-defender
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
**Tier 2 (ONNX)** — add extras:
|
|
71
|
+
|
|
72
|
+
```bash
|
|
73
|
+
pip install stackone-defender[onnx]
|
|
74
|
+
# or: uv add "stackone-defender[onnx]"
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
The ONNX model (~22MB) is bundled in the wheel — no extra downloads at runtime.
|
|
78
|
+
|
|
79
|
+
**SFE preprocessor (optional)** — add extras:
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
pip install stackone-defender[sfe]
|
|
83
|
+
# or: uv add "stackone-defender[sfe]"
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
The `[sfe]` extra installs [`fasttext-ng`](https://pypi.org/project/fasttext-ng/) (provides the `fasttext` module). It requires **NumPy 2.3+**. PyPI may ship a wheel only for some platforms; otherwise pip/uv builds from source (needs a C++ toolchain).
|
|
87
|
+
|
|
88
|
+
## Quick start
|
|
89
|
+
|
|
90
|
+
```python
|
|
91
|
+
from stackone_defender import create_prompt_defense
|
|
92
|
+
|
|
93
|
+
# Tier 1 + Tier 2 are on by default. block_high_risk=True enables allow/block.
|
|
94
|
+
defense = create_prompt_defense(block_high_risk=True)
|
|
95
|
+
|
|
96
|
+
# Optional: preload ONNX to avoid first-call latency (requires [onnx] extra)
|
|
97
|
+
defense.warmup_tier2()
|
|
98
|
+
|
|
99
|
+
result = defense.defend_tool_result(tool_output, "gmail_get_message")
|
|
100
|
+
|
|
101
|
+
if not result.allowed:
|
|
102
|
+
print(f"Blocked: risk={result.risk_level}, score={result.tier2_score}")
|
|
103
|
+
print(f"Detections: {', '.join(result.detections)}")
|
|
104
|
+
else:
|
|
105
|
+
send_to_llm(result.sanitized)
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
## How it works
|
|
109
|
+
|
|
110
|
+
<picture>
|
|
111
|
+
<source media="(prefers-color-scheme: dark)" srcset="https://raw.githubusercontent.com/StackOneHQ/defender/main/assets/demo-dark.svg" />
|
|
112
|
+
<img src="https://raw.githubusercontent.com/StackOneHQ/defender/main/assets/demo-light.svg" alt="Defender flow: poisoned tool output is sanitized and evaluated; high-risk content can be blocked before the LLM" width="900" />
|
|
113
|
+
</picture>
|
|
114
|
+
|
|
115
|
+
`defend_tool_result()` runs two tiers:
|
|
116
|
+
|
|
117
|
+
### Tier 1 — Pattern detection (sync, ~1 ms)
|
|
118
|
+
|
|
119
|
+
- **Unicode normalization** — homoglyph resistance (e.g. Cyrillic `а` → ASCII `a`)
|
|
120
|
+
- **Role stripping** — `SYSTEM:`, `ASSISTANT:`, `<system>`, `[INST]`, etc.
|
|
121
|
+
- **Pattern removal** — phrases like “ignore previous instructions”
|
|
122
|
+
- **Encoding detection** — suspicious Base64/URL-shaped payloads
|
|
123
|
+
- **Boundary annotation** — `[UD-{id}]…[/UD-{id}]` wrappers around untrusted spans
|
|
124
|
+
|
|
125
|
+
### Tier 2 — ML classification (ONNX)
|
|
126
|
+
|
|
127
|
+
Packed-chunk MiniLM classifier (int8 ONNX ~22 MB, bundled):
|
|
128
|
+
|
|
129
|
+
- Split text into sentences, pack to model-sized chunks, score chunks in batched ONNX calls
|
|
130
|
+
- Catches paraphrased or novel injections missed by regex
|
|
131
|
+
- Uses chunked batch inference to bound memory on large payloads
|
|
132
|
+
|
|
133
|
+
### Optional SFE preprocessor
|
|
134
|
+
|
|
135
|
+
- `use_sfe=True` enables a field-level FastText pass before Tier 1/Tier 2
|
|
136
|
+
- Drops metadata-like leaves (IDs, enum-like strings) and keeps user-facing content
|
|
137
|
+
- Fails open if the runtime/model is unavailable: payload continues unfiltered
|
|
138
|
+
|
|
139
|
+
**Benchmarks** (F1 @ threshold 0.5):
|
|
140
|
+
|
|
141
|
+
| Benchmark | F1 | Samples |
|
|
142
|
+
|-----------|-----|--------|
|
|
143
|
+
| Qualifire (in-distribution) | 0.8686 | ~1.5k |
|
|
144
|
+
| xxz224 (out-of-distribution) | 0.8834 | ~22.5k |
|
|
145
|
+
| jayavibhav (adversarial) | 0.9717 | ~1k |
|
|
146
|
+
| **Average** | **0.9079** | ~25k |
|
|
147
|
+
|
|
148
|
+
### `allowed` vs `risk_level`
|
|
149
|
+
|
|
150
|
+
- Use **`allowed`** for gating when `block_high_risk=True`: `False` means do not pass `sanitized` to the model as-is.
|
|
151
|
+
- **`risk_level`** is diagnostic: it starts at `default_risk_level` (default `"medium"`) and is **escalated** by Tier 1 / Tier 2 signals — not reduced. Use it for logging, not as the sole block signal unless you implement your own policy.
|
|
152
|
+
|
|
153
|
+
| Level | Typical trigger |
|
|
154
|
+
|-------|------------------|
|
|
155
|
+
| `low` | No strong signals |
|
|
156
|
+
| `medium` | Lighter pattern / sanitization signals |
|
|
157
|
+
| `high` / `critical` | Strong injection patterns, encoding signals, or high Tier 2 score |
|
|
158
|
+
|
|
159
|
+
## API
|
|
160
|
+
|
|
161
|
+
### `create_prompt_defense(**kwargs)`
|
|
162
|
+
|
|
163
|
+
```python
|
|
164
|
+
defense = create_prompt_defense(
|
|
165
|
+
enable_tier1=True,
|
|
166
|
+
enable_tier2=True,
|
|
167
|
+
block_high_risk=False,
|
|
168
|
+
default_risk_level="medium",
|
|
169
|
+
tier2_fields=["subject", "body", "snippet"], # optional: scope Tier 2 to these JSON keys
|
|
170
|
+
use_sfe=True, # optional: enable semantic field extractor preprocessing
|
|
171
|
+
config={
|
|
172
|
+
"tier2": {
|
|
173
|
+
"high_risk_threshold": 0.8,
|
|
174
|
+
"tier2_fields": None, # or list[str]; constructor tier2_fields wins if set
|
|
175
|
+
},
|
|
176
|
+
},
|
|
177
|
+
)
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
### `defense.defend_tool_result(value, tool_name)`
|
|
181
|
+
|
|
182
|
+
Runs Tier 1 sanitization on risky fields, then Tier 2 on extracted text (with optional field scoping). **Synchronous** — no `await`.
|
|
183
|
+
|
|
184
|
+
```python
|
|
185
|
+
from dataclasses import dataclass, field
|
|
186
|
+
|
|
187
|
+
@dataclass
|
|
188
|
+
class DefenseResult:
|
|
189
|
+
allowed: bool
|
|
190
|
+
risk_level: RiskLevel
|
|
191
|
+
sanitized: Any
|
|
192
|
+
detections: list[str]
|
|
193
|
+
fields_sanitized: list[str]
|
|
194
|
+
patterns_by_field: dict[str, list[str]]
|
|
195
|
+
tier2_score: float | None = None
|
|
196
|
+
tier2_skip_reason: str | None = None
|
|
197
|
+
max_sentence: str | None = None
|
|
198
|
+
fields_dropped: list[str] = field(default_factory=list)
|
|
199
|
+
truncated_at_depth: bool | None = None
|
|
200
|
+
latency_ms: float = 0.0
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
### `defense.defend_tool_results(items)`
|
|
204
|
+
|
|
205
|
+
```python
|
|
206
|
+
results = defense.defend_tool_results([
|
|
207
|
+
{"value": email_data, "tool_name": "gmail_get_message"},
|
|
208
|
+
{"value": doc_data, "tool_name": "documents_get"},
|
|
209
|
+
{"value": pr_data, "tool_name": "github_get_pull_request"},
|
|
210
|
+
])
|
|
211
|
+
for r in results:
|
|
212
|
+
if not r.allowed:
|
|
213
|
+
print("Blocked:", ", ".join(r.fields_sanitized))
|
|
214
|
+
```
|
|
215
|
+
|
|
216
|
+
### `defense.analyze(text)`
|
|
217
|
+
|
|
218
|
+
Tier 1 only — useful for debugging pattern hits without full tool-result traversal.
|
|
219
|
+
|
|
220
|
+
### Tier 2 warmup
|
|
221
|
+
|
|
222
|
+
```python
|
|
223
|
+
defense = create_prompt_defense()
|
|
224
|
+
defense.warmup_tier2() # no-op if enable_tier2=False or ONNX extra missing
|
|
225
|
+
```
|
|
226
|
+
|
|
227
|
+
## Integration example
|
|
228
|
+
|
|
229
|
+
```python
|
|
230
|
+
from stackone_defender import create_prompt_defense
|
|
231
|
+
|
|
232
|
+
defense = create_prompt_defense(block_high_risk=True)
|
|
233
|
+
defense.warmup_tier2()
|
|
234
|
+
|
|
235
|
+
def run_tool_and_defend(raw_result: dict, tool_name: str):
|
|
236
|
+
outcome = defense.defend_tool_result(raw_result, tool_name)
|
|
237
|
+
if not outcome.allowed:
|
|
238
|
+
return {"error": "Content blocked by safety filter", "risk_level": outcome.risk_level}
|
|
239
|
+
return outcome.sanitized
|
|
240
|
+
|
|
241
|
+
# Example agent loop
|
|
242
|
+
sanitized = run_tool_and_defend(gmail_api.get_message(msg_id), "gmail_get_message")
|
|
243
|
+
```
|
|
244
|
+
|
|
245
|
+
## Risky field detection
|
|
246
|
+
|
|
247
|
+
Only **string** values under configured “risky” keys are scanned and sanitized. [`RiskyFieldConfig`](https://github.com/StackOneHQ/stackone-defender/blob/main/src/stackone_defender/types.py) provides global names/patterns plus **`tool_overrides`** (wildcard tool names → field list), same idea as the npm package.
|
|
248
|
+
|
|
249
|
+
| Tool pattern | Scanned fields |
|
|
250
|
+
|--------------|----------------|
|
|
251
|
+
| `gmail_*`, `email_*` | subject, body, snippet, content |
|
|
252
|
+
| `documents_*` | name, description, content, title |
|
|
253
|
+
| `github_*` | name, title, body, description, message |
|
|
254
|
+
| `hris_*` | name, notes, bio, description |
|
|
255
|
+
| `ats_*` | name, notes, description, summary |
|
|
256
|
+
| `crm_*` | name, description, notes, content |
|
|
257
|
+
|
|
258
|
+
Otherwise the default list applies: `name`, `description`, `content`, `title`, `notes`, `summary`, `bio`, `body`, `text`, `message`, `comment`, `subject`, plus suffix patterns like `*_body`, `*_description`, etc. Structural keys such as `id`, `url`, `created_at` are not treated as risky by default.
|
|
259
|
+
|
|
260
|
+
## Development
|
|
261
|
+
|
|
262
|
+
```bash
|
|
263
|
+
uv sync --group dev
|
|
264
|
+
uv run pytest
|
|
265
|
+
```
|
|
266
|
+
|
|
267
|
+
## License
|
|
268
|
+
|
|
269
|
+
Apache-2.0 — see [LICENSE](./LICENSE).
|
|
@@ -0,0 +1,243 @@
|
|
|
1
|
+
<div align="center">
|
|
2
|
+
|
|
3
|
+
<picture>
|
|
4
|
+
<source media="(prefers-color-scheme: dark)" srcset="https://raw.githubusercontent.com/StackOneHQ/defender/main/assets/banner-dark.svg" />
|
|
5
|
+
<img src="https://raw.githubusercontent.com/StackOneHQ/defender/main/assets/banner-light.svg" alt="Defender by StackOne — Indirect prompt injection protection for MCP tool calls" width="800" />
|
|
6
|
+
</picture>
|
|
7
|
+
|
|
8
|
+
<p>
|
|
9
|
+
<a href="https://pypi.org/project/stackone-defender/"><img src="https://img.shields.io/pypi/v/stackone-defender?style=flat-square&color=047B43&label=pypi" alt="PyPI version" /></a>
|
|
10
|
+
<a href="https://github.com/StackOneHQ/stackone-defender/releases"><img src="https://img.shields.io/github/v/release/StackOneHQ/stackone-defender?style=flat-square&color=047B43&label=release" alt="latest GitHub release" /></a>
|
|
11
|
+
<a href="https://github.com/StackOneHQ/stackone-defender/stargazers"><img src="https://img.shields.io/github/stars/StackOneHQ/stackone-defender?style=flat-square&color=047B43" alt="GitHub stars" /></a>
|
|
12
|
+
<a href="./LICENSE"><img src="https://img.shields.io/pypi/l/stackone-defender?style=flat-square&color=047B43" alt="License" /></a>
|
|
13
|
+
<img src="https://img.shields.io/badge/Python-3.11+-047B43?style=flat-square" alt="Python 3.11+" />
|
|
14
|
+
</p>
|
|
15
|
+
<p>
|
|
16
|
+
<img src="https://img.shields.io/badge/model-22MB-047B43?style=flat-square" alt="Model size: 22MB" />
|
|
17
|
+
<img src="https://img.shields.io/badge/latency-~10ms-047B43?style=flat-square" alt="Latency: ~10ms" />
|
|
18
|
+
<img src="https://img.shields.io/badge/CPU--only-no%20GPU%20needed-047B43?style=flat-square" alt="CPU only" />
|
|
19
|
+
<img src="https://img.shields.io/badge/F1%20Score-90.8%25-047B43?style=flat-square" alt="F1 Score: 90.8%" />
|
|
20
|
+
</p>
|
|
21
|
+
|
|
22
|
+
</div>
|
|
23
|
+
|
|
24
|
+
---
|
|
25
|
+
|
|
26
|
+
Indirect prompt injection defense for AI agents using tool calls (MCP, CLI, or direct APIs). Detects and neutralizes attacks hidden in tool results (emails, documents, PRs, etc.) before they reach your LLM.
|
|
27
|
+
|
|
28
|
+
**Python package:** [`stackone-defender`](https://pypi.org/project/stackone-defender/) — aligned with [`@stackone/defender`](https://www.npmjs.com/package/@stackone/defender) on npm.
|
|
29
|
+
|
|
30
|
+
## Installation
|
|
31
|
+
|
|
32
|
+
**pip**
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
pip install stackone-defender
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
**uv**
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
uv add stackone-defender
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
**Tier 2 (ONNX)** — add extras:
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
pip install stackone-defender[onnx]
|
|
48
|
+
# or: uv add "stackone-defender[onnx]"
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
The ONNX model (~22MB) is bundled in the wheel — no extra downloads at runtime.
|
|
52
|
+
|
|
53
|
+
**SFE preprocessor (optional)** — add extras:
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
pip install stackone-defender[sfe]
|
|
57
|
+
# or: uv add "stackone-defender[sfe]"
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
The `[sfe]` extra installs [`fasttext-ng`](https://pypi.org/project/fasttext-ng/) (provides the `fasttext` module). It requires **NumPy 2.3+**. PyPI may ship a wheel only for some platforms; otherwise pip/uv builds from source (needs a C++ toolchain).
|
|
61
|
+
|
|
62
|
+
## Quick start
|
|
63
|
+
|
|
64
|
+
```python
|
|
65
|
+
from stackone_defender import create_prompt_defense
|
|
66
|
+
|
|
67
|
+
# Tier 1 + Tier 2 are on by default. block_high_risk=True enables allow/block.
|
|
68
|
+
defense = create_prompt_defense(block_high_risk=True)
|
|
69
|
+
|
|
70
|
+
# Optional: preload ONNX to avoid first-call latency (requires [onnx] extra)
|
|
71
|
+
defense.warmup_tier2()
|
|
72
|
+
|
|
73
|
+
result = defense.defend_tool_result(tool_output, "gmail_get_message")
|
|
74
|
+
|
|
75
|
+
if not result.allowed:
|
|
76
|
+
print(f"Blocked: risk={result.risk_level}, score={result.tier2_score}")
|
|
77
|
+
print(f"Detections: {', '.join(result.detections)}")
|
|
78
|
+
else:
|
|
79
|
+
send_to_llm(result.sanitized)
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
## How it works
|
|
83
|
+
|
|
84
|
+
<picture>
|
|
85
|
+
<source media="(prefers-color-scheme: dark)" srcset="https://raw.githubusercontent.com/StackOneHQ/defender/main/assets/demo-dark.svg" />
|
|
86
|
+
<img src="https://raw.githubusercontent.com/StackOneHQ/defender/main/assets/demo-light.svg" alt="Defender flow: poisoned tool output is sanitized and evaluated; high-risk content can be blocked before the LLM" width="900" />
|
|
87
|
+
</picture>
|
|
88
|
+
|
|
89
|
+
`defend_tool_result()` runs two tiers:
|
|
90
|
+
|
|
91
|
+
### Tier 1 — Pattern detection (sync, ~1 ms)
|
|
92
|
+
|
|
93
|
+
- **Unicode normalization** — homoglyph resistance (e.g. Cyrillic `а` → ASCII `a`)
|
|
94
|
+
- **Role stripping** — `SYSTEM:`, `ASSISTANT:`, `<system>`, `[INST]`, etc.
|
|
95
|
+
- **Pattern removal** — phrases like “ignore previous instructions”
|
|
96
|
+
- **Encoding detection** — suspicious Base64/URL-shaped payloads
|
|
97
|
+
- **Boundary annotation** — `[UD-{id}]…[/UD-{id}]` wrappers around untrusted spans
|
|
98
|
+
|
|
99
|
+
### Tier 2 — ML classification (ONNX)
|
|
100
|
+
|
|
101
|
+
Packed-chunk MiniLM classifier (int8 ONNX ~22 MB, bundled):
|
|
102
|
+
|
|
103
|
+
- Split text into sentences, pack to model-sized chunks, score chunks in batched ONNX calls
|
|
104
|
+
- Catches paraphrased or novel injections missed by regex
|
|
105
|
+
- Uses chunked batch inference to bound memory on large payloads
|
|
106
|
+
|
|
107
|
+
### Optional SFE preprocessor
|
|
108
|
+
|
|
109
|
+
- `use_sfe=True` enables a field-level FastText pass before Tier 1/Tier 2
|
|
110
|
+
- Drops metadata-like leaves (IDs, enum-like strings) and keeps user-facing content
|
|
111
|
+
- Fails open if the runtime/model is unavailable: payload continues unfiltered
|
|
112
|
+
|
|
113
|
+
**Benchmarks** (F1 @ threshold 0.5):
|
|
114
|
+
|
|
115
|
+
| Benchmark | F1 | Samples |
|
|
116
|
+
|-----------|-----|--------|
|
|
117
|
+
| Qualifire (in-distribution) | 0.8686 | ~1.5k |
|
|
118
|
+
| xxz224 (out-of-distribution) | 0.8834 | ~22.5k |
|
|
119
|
+
| jayavibhav (adversarial) | 0.9717 | ~1k |
|
|
120
|
+
| **Average** | **0.9079** | ~25k |
|
|
121
|
+
|
|
122
|
+
### `allowed` vs `risk_level`
|
|
123
|
+
|
|
124
|
+
- Use **`allowed`** for gating when `block_high_risk=True`: `False` means do not pass `sanitized` to the model as-is.
|
|
125
|
+
- **`risk_level`** is diagnostic: it starts at `default_risk_level` (default `"medium"`) and is **escalated** by Tier 1 / Tier 2 signals — not reduced. Use it for logging, not as the sole block signal unless you implement your own policy.
|
|
126
|
+
|
|
127
|
+
| Level | Typical trigger |
|
|
128
|
+
|-------|------------------|
|
|
129
|
+
| `low` | No strong signals |
|
|
130
|
+
| `medium` | Lighter pattern / sanitization signals |
|
|
131
|
+
| `high` / `critical` | Strong injection patterns, encoding signals, or high Tier 2 score |
|
|
132
|
+
|
|
133
|
+
## API
|
|
134
|
+
|
|
135
|
+
### `create_prompt_defense(**kwargs)`
|
|
136
|
+
|
|
137
|
+
```python
|
|
138
|
+
defense = create_prompt_defense(
|
|
139
|
+
enable_tier1=True,
|
|
140
|
+
enable_tier2=True,
|
|
141
|
+
block_high_risk=False,
|
|
142
|
+
default_risk_level="medium",
|
|
143
|
+
tier2_fields=["subject", "body", "snippet"], # optional: scope Tier 2 to these JSON keys
|
|
144
|
+
use_sfe=True, # optional: enable semantic field extractor preprocessing
|
|
145
|
+
config={
|
|
146
|
+
"tier2": {
|
|
147
|
+
"high_risk_threshold": 0.8,
|
|
148
|
+
"tier2_fields": None, # or list[str]; constructor tier2_fields wins if set
|
|
149
|
+
},
|
|
150
|
+
},
|
|
151
|
+
)
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
### `defense.defend_tool_result(value, tool_name)`
|
|
155
|
+
|
|
156
|
+
Runs Tier 1 sanitization on risky fields, then Tier 2 on extracted text (with optional field scoping). **Synchronous** — no `await`.
|
|
157
|
+
|
|
158
|
+
```python
|
|
159
|
+
from dataclasses import dataclass, field
|
|
160
|
+
|
|
161
|
+
@dataclass
|
|
162
|
+
class DefenseResult:
|
|
163
|
+
allowed: bool
|
|
164
|
+
risk_level: RiskLevel
|
|
165
|
+
sanitized: Any
|
|
166
|
+
detections: list[str]
|
|
167
|
+
fields_sanitized: list[str]
|
|
168
|
+
patterns_by_field: dict[str, list[str]]
|
|
169
|
+
tier2_score: float | None = None
|
|
170
|
+
tier2_skip_reason: str | None = None
|
|
171
|
+
max_sentence: str | None = None
|
|
172
|
+
fields_dropped: list[str] = field(default_factory=list)
|
|
173
|
+
truncated_at_depth: bool | None = None
|
|
174
|
+
latency_ms: float = 0.0
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
### `defense.defend_tool_results(items)`
|
|
178
|
+
|
|
179
|
+
```python
|
|
180
|
+
results = defense.defend_tool_results([
|
|
181
|
+
{"value": email_data, "tool_name": "gmail_get_message"},
|
|
182
|
+
{"value": doc_data, "tool_name": "documents_get"},
|
|
183
|
+
{"value": pr_data, "tool_name": "github_get_pull_request"},
|
|
184
|
+
])
|
|
185
|
+
for r in results:
|
|
186
|
+
if not r.allowed:
|
|
187
|
+
print("Blocked:", ", ".join(r.fields_sanitized))
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
### `defense.analyze(text)`
|
|
191
|
+
|
|
192
|
+
Tier 1 only — useful for debugging pattern hits without full tool-result traversal.
|
|
193
|
+
|
|
194
|
+
### Tier 2 warmup
|
|
195
|
+
|
|
196
|
+
```python
|
|
197
|
+
defense = create_prompt_defense()
|
|
198
|
+
defense.warmup_tier2() # no-op if enable_tier2=False or ONNX extra missing
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
## Integration example
|
|
202
|
+
|
|
203
|
+
```python
|
|
204
|
+
from stackone_defender import create_prompt_defense
|
|
205
|
+
|
|
206
|
+
defense = create_prompt_defense(block_high_risk=True)
|
|
207
|
+
defense.warmup_tier2()
|
|
208
|
+
|
|
209
|
+
def run_tool_and_defend(raw_result: dict, tool_name: str):
|
|
210
|
+
outcome = defense.defend_tool_result(raw_result, tool_name)
|
|
211
|
+
if not outcome.allowed:
|
|
212
|
+
return {"error": "Content blocked by safety filter", "risk_level": outcome.risk_level}
|
|
213
|
+
return outcome.sanitized
|
|
214
|
+
|
|
215
|
+
# Example agent loop
|
|
216
|
+
sanitized = run_tool_and_defend(gmail_api.get_message(msg_id), "gmail_get_message")
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
## Risky field detection
|
|
220
|
+
|
|
221
|
+
Only **string** values under configured “risky” keys are scanned and sanitized. [`RiskyFieldConfig`](https://github.com/StackOneHQ/stackone-defender/blob/main/src/stackone_defender/types.py) provides global names/patterns plus **`tool_overrides`** (wildcard tool names → field list), same idea as the npm package.
|
|
222
|
+
|
|
223
|
+
| Tool pattern | Scanned fields |
|
|
224
|
+
|--------------|----------------|
|
|
225
|
+
| `gmail_*`, `email_*` | subject, body, snippet, content |
|
|
226
|
+
| `documents_*` | name, description, content, title |
|
|
227
|
+
| `github_*` | name, title, body, description, message |
|
|
228
|
+
| `hris_*` | name, notes, bio, description |
|
|
229
|
+
| `ats_*` | name, notes, description, summary |
|
|
230
|
+
| `crm_*` | name, description, notes, content |
|
|
231
|
+
|
|
232
|
+
Otherwise the default list applies: `name`, `description`, `content`, `title`, `notes`, `summary`, `bio`, `body`, `text`, `message`, `comment`, `subject`, plus suffix patterns like `*_body`, `*_description`, etc. Structural keys such as `id`, `url`, `created_at` are not treated as risky by default.
|
|
233
|
+
|
|
234
|
+
## Development
|
|
235
|
+
|
|
236
|
+
```bash
|
|
237
|
+
uv sync --group dev
|
|
238
|
+
uv run pytest
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
## License
|
|
242
|
+
|
|
243
|
+
Apache-2.0 — see [LICENSE](./LICENSE).
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
{
|
|
2
|
+
"add_cross_attention": false,
|
|
3
|
+
"architectures": [
|
|
4
|
+
"BertModel"
|
|
5
|
+
],
|
|
6
|
+
"attention_probs_dropout_prob": 0.1,
|
|
7
|
+
"bos_token_id": null,
|
|
8
|
+
"classifier_dropout": null,
|
|
9
|
+
"dtype": "float32",
|
|
10
|
+
"eos_token_id": null,
|
|
11
|
+
"gradient_checkpointing": false,
|
|
12
|
+
"hidden_act": "gelu",
|
|
13
|
+
"hidden_dropout_prob": 0.1,
|
|
14
|
+
"hidden_size": 384,
|
|
15
|
+
"initializer_range": 0.02,
|
|
16
|
+
"intermediate_size": 1536,
|
|
17
|
+
"is_decoder": false,
|
|
18
|
+
"layer_norm_eps": 1e-12,
|
|
19
|
+
"max_position_embeddings": 512,
|
|
20
|
+
"model_type": "bert",
|
|
21
|
+
"num_attention_heads": 12,
|
|
22
|
+
"num_hidden_layers": 6,
|
|
23
|
+
"pad_token_id": 0,
|
|
24
|
+
"position_embedding_type": "absolute",
|
|
25
|
+
"tie_word_embeddings": true,
|
|
26
|
+
"transformers_version": "5.3.0",
|
|
27
|
+
"type_vocab_size": 2,
|
|
28
|
+
"use_cache": true,
|
|
29
|
+
"vocab_size": 30522
|
|
30
|
+
}
|