stackone-defender 0.1.1__tar.gz → 0.6.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. stackone_defender-0.6.2/.release-please-manifest.json +1 -0
  2. stackone_defender-0.6.2/CHANGELOG.md +117 -0
  3. stackone_defender-0.6.2/PKG-INFO +269 -0
  4. stackone_defender-0.6.2/README.md +243 -0
  5. stackone_defender-0.6.2/models/minilm-full-aug/config.json +30 -0
  6. {stackone_defender-0.1.1 → stackone_defender-0.6.2}/models/minilm-full-aug/model_quantized.onnx +0 -0
  7. stackone_defender-0.6.2/models/minilm-full-aug/tokenizer.json +30686 -0
  8. stackone_defender-0.6.2/models/minilm-full-aug/tokenizer_config.json +23 -0
  9. {stackone_defender-0.1.1 → stackone_defender-0.6.2}/pyproject.toml +5 -1
  10. {stackone_defender-0.1.1 → stackone_defender-0.6.2}/src/stackone_defender/__init__.py +15 -2
  11. stackone_defender-0.6.2/src/stackone_defender/classifiers/onnx_classifier.py +148 -0
  12. stackone_defender-0.6.2/src/stackone_defender/classifiers/tier2_classifier.py +291 -0
  13. {stackone_defender-0.1.1 → stackone_defender-0.6.2}/src/stackone_defender/config.py +51 -56
  14. stackone_defender-0.6.2/src/stackone_defender/core/prompt_defense.py +313 -0
  15. {stackone_defender-0.1.1 → stackone_defender-0.6.2}/src/stackone_defender/core/tool_result_sanitizer.py +85 -51
  16. stackone_defender-0.6.2/src/stackone_defender/models/minilm-full-aug/config.json +30 -0
  17. {stackone_defender-0.1.1 → stackone_defender-0.6.2}/src/stackone_defender/models/minilm-full-aug/model_quantized.onnx +0 -0
  18. stackone_defender-0.6.2/src/stackone_defender/models/minilm-full-aug/tokenizer.json +30686 -0
  19. stackone_defender-0.6.2/src/stackone_defender/models/minilm-full-aug/tokenizer_config.json +23 -0
  20. stackone_defender-0.6.2/src/stackone_defender/sfe/__init__.py +19 -0
  21. stackone_defender-0.6.2/src/stackone_defender/sfe/model.ftz +0 -0
  22. stackone_defender-0.6.2/src/stackone_defender/sfe/preprocess.py +232 -0
  23. {stackone_defender-0.1.1 → stackone_defender-0.6.2}/src/stackone_defender/types.py +25 -14
  24. {stackone_defender-0.1.1 → stackone_defender-0.6.2}/src/stackone_defender/utils/__init__.py +1 -3
  25. {stackone_defender-0.1.1 → stackone_defender-0.6.2}/src/stackone_defender/utils/field_detection.py +1 -24
  26. {stackone_defender-0.1.1 → stackone_defender-0.6.2}/tests/test_integration.py +109 -53
  27. {stackone_defender-0.1.1 → stackone_defender-0.6.2}/tests/test_onnx_classifier.py +34 -0
  28. stackone_defender-0.6.2/tests/test_sfe.py +43 -0
  29. {stackone_defender-0.1.1 → stackone_defender-0.6.2}/tests/test_tier2_classifier.py +19 -0
  30. {stackone_defender-0.1.1 → stackone_defender-0.6.2}/tests/test_utils.py +1 -21
  31. {stackone_defender-0.1.1 → stackone_defender-0.6.2}/uv.lock +27 -156
  32. stackone_defender-0.1.1/.release-please-manifest.json +0 -1
  33. stackone_defender-0.1.1/CHANGELOG.md +0 -31
  34. stackone_defender-0.1.1/PKG-INFO +0 -229
  35. stackone_defender-0.1.1/README.md +0 -205
  36. stackone_defender-0.1.1/models/minilm-full-aug/config.json +0 -28
  37. stackone_defender-0.1.1/models/minilm-full-aug/tokenizer.json +0 -30678
  38. stackone_defender-0.1.1/models/minilm-full-aug/tokenizer_config.json +0 -16
  39. stackone_defender-0.1.1/src/stackone_defender/classifiers/onnx_classifier.py +0 -95
  40. stackone_defender-0.1.1/src/stackone_defender/classifiers/tier2_classifier.py +0 -164
  41. stackone_defender-0.1.1/src/stackone_defender/core/prompt_defense.py +0 -197
  42. stackone_defender-0.1.1/src/stackone_defender/models/minilm-full-aug/config.json +0 -28
  43. stackone_defender-0.1.1/src/stackone_defender/models/minilm-full-aug/tokenizer.json +0 -30678
  44. stackone_defender-0.1.1/src/stackone_defender/models/minilm-full-aug/tokenizer_config.json +0 -16
  45. {stackone_defender-0.1.1 → stackone_defender-0.6.2}/.github/workflows/ci.yaml +0 -0
  46. {stackone_defender-0.1.1 → stackone_defender-0.6.2}/.github/workflows/release.yaml +0 -0
  47. {stackone_defender-0.1.1 → stackone_defender-0.6.2}/.gitignore +0 -0
  48. {stackone_defender-0.1.1 → stackone_defender-0.6.2}/.python-version +0 -0
  49. {stackone_defender-0.1.1 → stackone_defender-0.6.2}/.release-please-config.json +0 -0
  50. {stackone_defender-0.1.1 → stackone_defender-0.6.2}/src/stackone_defender/classifiers/__init__.py +0 -0
  51. {stackone_defender-0.1.1 → stackone_defender-0.6.2}/src/stackone_defender/classifiers/pattern_detector.py +0 -0
  52. {stackone_defender-0.1.1 → stackone_defender-0.6.2}/src/stackone_defender/classifiers/patterns.py +0 -0
  53. {stackone_defender-0.1.1 → stackone_defender-0.6.2}/src/stackone_defender/core/__init__.py +0 -0
  54. {stackone_defender-0.1.1 → stackone_defender-0.6.2}/src/stackone_defender/sanitizers/__init__.py +0 -0
  55. {stackone_defender-0.1.1 → stackone_defender-0.6.2}/src/stackone_defender/sanitizers/encoding_detector.py +0 -0
  56. {stackone_defender-0.1.1 → stackone_defender-0.6.2}/src/stackone_defender/sanitizers/normalizer.py +0 -0
  57. {stackone_defender-0.1.1 → stackone_defender-0.6.2}/src/stackone_defender/sanitizers/pattern_remover.py +0 -0
  58. {stackone_defender-0.1.1 → stackone_defender-0.6.2}/src/stackone_defender/sanitizers/role_stripper.py +0 -0
  59. {stackone_defender-0.1.1 → stackone_defender-0.6.2}/src/stackone_defender/sanitizers/sanitizer.py +0 -0
  60. {stackone_defender-0.1.1 → stackone_defender-0.6.2}/src/stackone_defender/utils/boundary.py +0 -0
  61. {stackone_defender-0.1.1 → stackone_defender-0.6.2}/src/stackone_defender/utils/structure.py +0 -0
  62. {stackone_defender-0.1.1 → stackone_defender-0.6.2}/tests/__init__.py +0 -0
  63. {stackone_defender-0.1.1 → stackone_defender-0.6.2}/tests/test_pattern_detector.py +0 -0
  64. {stackone_defender-0.1.1 → stackone_defender-0.6.2}/tests/test_sanitizers.py +0 -0
@@ -0,0 +1 @@
1
+ {".":"0.6.2"}
@@ -0,0 +1,117 @@
1
+ # Changelog
2
+
3
+ ## [0.6.2](https://github.com/StackOneHQ/stackone-defender/compare/stackone-defender-v0.6.1...stackone-defender-v0.6.2) (2026-04-22)
4
+
5
+
6
+ ### ⚠ BREAKING CHANGES
7
+
8
+ * Drop ToolSanitizationRule, config/sanitizer tool_rules, use_default_tool_rules, and get_tool_rule/should_skip_field. Matches @stackone/defender post ENG-12594.
9
+
10
+ ### Features
11
+
12
+ * add missing functions for full TS API parity ([aec0c5b](https://github.com/StackOneHQ/stackone-defender/commit/aec0c5b8d31715df7e4ec2e4d306b55d595bb1c3))
13
+ * add PyPI publishing setup with Release Please CI ([2e28373](https://github.com/StackOneHQ/stackone-defender/commit/2e28373a27315dbb5e7deb23621977fe7fa2f7bc))
14
+ * add tier2_fields filter and export ToolSanitizationRule ([cb7fd93](https://github.com/StackOneHQ/stackone-defender/commit/cb7fd93fb88a30f40edc171ef3fcdc5d6ce2534d))
15
+ * align Python defender with Node (Tier 2 scoping, ONNX cache) ([482bfdd](https://github.com/StackOneHQ/stackone-defender/commit/482bfdda59b4617a75bc261621984cc321d28989))
16
+ * **ENG-12402:** add PyPI publishing setup with Release Please CI ([f979748](https://github.com/StackOneHQ/stackone-defender/commit/f979748a8a3b2084ea241c352866adcfcd0145ea))
17
+ * **ENG-12699:** TypeScript parity and synced ONNX bundle ([0449800](https://github.com/StackOneHQ/stackone-defender/commit/0449800fc2375c89ef231f5671f9a74bd84d3388))
18
+ * port stackone-defender from TypeScript to Python ([e3ff70d](https://github.com/StackOneHQ/stackone-defender/commit/e3ff70dd6a0bc94578dc4dbfde87c5d75f00b7b8))
19
+ * remove tool rules; batch Tier2 ONNX; lock ONNX load ([26c95c2](https://github.com/StackOneHQ/stackone-defender/commit/26c95c257175c892ae4be82ab7c17a099c1b6c6e))
20
+ * **sanitizer:** remove dead use_tier2_classification from ToolResultSanitizer ([4646179](https://github.com/StackOneHQ/stackone-defender/commit/46461798fcf5acc6ac6e23bc65177c35d9353d9c))
21
+ * sync Python package with TypeScript parity ([e1836dd](https://github.com/StackOneHQ/stackone-defender/commit/e1836dd967ad23997983ef1607118d1a25807e1c))
22
+ * upgrade ML classifier to jbv2 model (AgentShield 73.7 → 79.8) ([bcd27f8](https://github.com/StackOneHQ/stackone-defender/commit/bcd27f8abf954700276249f9b03de34f733c67c4))
23
+ * upgrade ML classifier to jbv5 (AgentShield 79.8 → 81.1) ([781dd10](https://github.com/StackOneHQ/stackone-defender/commit/781dd1007e7a0db03d58619a23b69f1b5d73e85d))
24
+
25
+
26
+ ### Bug Fixes
27
+
28
+ * address Copilot/cubic review (Tier2 scope, tokens, SFE, thresholds) ([bf173ac](https://github.com/StackOneHQ/stackone-defender/commit/bf173ac42f6aaa7513ea2a1fc19083806a5c5ee1))
29
+ * **ci:** avoid fasttext-wheel on Python 3.13 ([a6cda76](https://github.com/StackOneHQ/stackone-defender/commit/a6cda76894e3cd240c4f104e701e3202babb2682))
30
+ * **classifier:** surface classification errors in classify_by_sentence skip_reason ([bd94639](https://github.com/StackOneHQ/stackone-defender/commit/bd9463978dac5572f999d8ec3ed1adbaf0bb97f2))
31
+ * default enable_tier2 to True to match TypeScript SDK behaviour ([d66773b](https://github.com/StackOneHQ/stackone-defender/commit/d66773bee026517d09dd56b9311dd3c281c6f675))
32
+ * **defender:** fix _extract_strings filtering, None checks, and cache ONNX load failure ([bf4ce99](https://github.com/StackOneHQ/stackone-defender/commit/bf4ce993287db9e067b661100b5bd92cc21aef6b))
33
+ * **defender:** sync hasThreats blocking logic and tool rules precedence from JS package ([a217c3e](https://github.com/StackOneHQ/stackone-defender/commit/a217c3ef27aa0e4d92f21571bf0559ff9906f660))
34
+ * enable tier2 by default to match TypeScript package ([f1fe990](https://github.com/StackOneHQ/stackone-defender/commit/f1fe990e1a81c32cb271f6ca85cc063f3da49223))
35
+ * sync Python with TypeScript parity ([cec0813](https://github.com/StackOneHQ/stackone-defender/commit/cec0813ff8cc98f4502d5916d285a28877983d98))
36
+ * **tier2:** apply max_text_length truncation in classify_by_sentence ([a67d2c6](https://github.com/StackOneHQ/stackone-defender/commit/a67d2c6524fb1d6b4f9331f547f28221867038de))
37
+ * upgrade ML classifier to jbv2 (AgentShield 73.7 → 79.8) ([b452b39](https://github.com/StackOneHQ/stackone-defender/commit/b452b39c718329355f50c418bd50c37da2ed8698))
38
+ * upgrade ML classifier to jbv2 (AgentShield 73.7 → 79.8) ([ccb1204](https://github.com/StackOneHQ/stackone-defender/commit/ccb1204d5e3d9763bb916d71bb49b75039ceb197))
39
+ * use uv instead of pip in README installation instructions ([519759f](https://github.com/StackOneHQ/stackone-defender/commit/519759f09c6fc1eb6bf97f53ad0cbd25c78e2893))
40
+
41
+
42
+ ### Dependencies
43
+
44
+ * **sfe:** switch optional FastText bindings to fasttext-ng ([bc9cc28](https://github.com/StackOneHQ/stackone-defender/commit/bc9cc283bc2da9f10472415d4aa94a0df083ec3d))
45
+
46
+
47
+ ### Documentation
48
+
49
+ * add README adapted from TypeScript package ([a03c757](https://github.com/StackOneHQ/stackone-defender/commit/a03c757a1760b797d9a3ef444950e2839ca1c52d))
50
+ * update README — enable_tier2 defaults to True ([af0d059](https://github.com/StackOneHQ/stackone-defender/commit/af0d05957e39a83b7e6e18b1f78b95219b14a4f5))
51
+ * update README to reflect changes in package name and Python version ([d2fc2ca](https://github.com/StackOneHQ/stackone-defender/commit/d2fc2ca1900e2f6410df2ec075c5a8a1c3ac241b))
52
+
53
+
54
+ ### Miscellaneous Chores
55
+
56
+ * prepare patch release 0.6.2 ([7b3c105](https://github.com/StackOneHQ/stackone-defender/commit/7b3c105b2ce23f88f284d72e41c1917aefdc4537))
57
+
58
+ ## [0.6.1](https://github.com/StackOneHQ/stackone-defender/compare/stackone-defender-v0.1.2...stackone-defender-v0.6.1) (2026-04-21)
59
+
60
+ ### Features
61
+
62
+ * align Python package behavior with `@stackone/defender` 0.6.1
63
+ * add SFE preprocessing support (`use_sfe`) with fail-open optional runtime loading
64
+ * add packed-chunk Tier 2 batching and density-adjusted scoring
65
+ * add dangerous-key traversal hardening (`__proto__`, `constructor`, `prototype`)
66
+ * add cumulative-risk fractional thresholds to reduce list-response false positives
67
+
68
+ ### Bug Fixes
69
+
70
+ * use `fasttext-ng` instead of `fasttext-wheel` for the `[sfe]` extra and dev tests so Python 3.13 CI can install maintained FastText bindings (NumPy 2.3+).
71
+
72
+ ### Breaking Changes
73
+
74
+ * Python package version jumps from `0.1.2` to `0.6.1` to align release train with TypeScript parity.
75
+ * `DefenseResult` now includes `fields_dropped` and `truncated_at_depth`.
76
+
77
+ ## [0.1.2](https://github.com/StackOneHQ/stackone-defender/compare/stackone-defender-v0.1.1...stackone-defender-v0.1.2) (2026-04-08)
78
+
79
+
80
+ ### Bug Fixes
81
+
82
+ * upgrade ML classifier to jbv2 (AgentShield 73.7 → 79.8) ([b452b39](https://github.com/StackOneHQ/stackone-defender/commit/b452b39c718329355f50c418bd50c37da2ed8698))
83
+
84
+
85
+ ### Documentation
86
+
87
+ * update README to reflect changes in package name and Python version ([d2fc2ca](https://github.com/StackOneHQ/stackone-defender/commit/d2fc2ca1900e2f6410df2ec075c5a8a1c3ac241b))
88
+
89
+ ## [0.1.1](https://github.com/StackOneHQ/stackone-defender/compare/stackone-defender-v0.1.0...stackone-defender-v0.1.1) (2026-04-08)
90
+
91
+
92
+ ### Features
93
+
94
+ * add missing functions for full TS API parity ([aec0c5b](https://github.com/StackOneHQ/stackone-defender/commit/aec0c5b8d31715df7e4ec2e4d306b55d595bb1c3))
95
+ * add PyPI publishing setup with Release Please CI ([2e28373](https://github.com/StackOneHQ/stackone-defender/commit/2e28373a27315dbb5e7deb23621977fe7fa2f7bc))
96
+ * add tier2_fields filter and export ToolSanitizationRule ([cb7fd93](https://github.com/StackOneHQ/stackone-defender/commit/cb7fd93fb88a30f40edc171ef3fcdc5d6ce2534d))
97
+ * **ENG-12402:** add PyPI publishing setup with Release Please CI ([f979748](https://github.com/StackOneHQ/stackone-defender/commit/f979748a8a3b2084ea241c352866adcfcd0145ea))
98
+ * port stackone-defender from TypeScript to Python ([e3ff70d](https://github.com/StackOneHQ/stackone-defender/commit/e3ff70dd6a0bc94578dc4dbfde87c5d75f00b7b8))
99
+ * **sanitizer:** remove dead use_tier2_classification from ToolResultSanitizer ([4646179](https://github.com/StackOneHQ/stackone-defender/commit/46461798fcf5acc6ac6e23bc65177c35d9353d9c))
100
+ * sync Python package with TypeScript parity ([e1836dd](https://github.com/StackOneHQ/stackone-defender/commit/e1836dd967ad23997983ef1607118d1a25807e1c))
101
+
102
+
103
+ ### Bug Fixes
104
+
105
+ * **classifier:** surface classification errors in classify_by_sentence skip_reason ([bd94639](https://github.com/StackOneHQ/stackone-defender/commit/bd9463978dac5572f999d8ec3ed1adbaf0bb97f2))
106
+ * **defender:** fix _extract_strings filtering, None checks, and cache ONNX load failure ([bf4ce99](https://github.com/StackOneHQ/stackone-defender/commit/bf4ce993287db9e067b661100b5bd92cc21aef6b))
107
+ * **defender:** sync hasThreats blocking logic and tool rules precedence from JS package ([a217c3e](https://github.com/StackOneHQ/stackone-defender/commit/a217c3ef27aa0e4d92f21571bf0559ff9906f660))
108
+ * enable tier2 by default to match TypeScript package ([f1fe990](https://github.com/StackOneHQ/stackone-defender/commit/f1fe990e1a81c32cb271f6ca85cc063f3da49223))
109
+ * sync Python with TypeScript parity ([cec0813](https://github.com/StackOneHQ/stackone-defender/commit/cec0813ff8cc98f4502d5916d285a28877983d98))
110
+ * use uv instead of pip in README installation instructions ([519759f](https://github.com/StackOneHQ/stackone-defender/commit/519759f09c6fc1eb6bf97f53ad0cbd25c78e2893))
111
+
112
+
113
+ ### Documentation
114
+
115
+ * add README adapted from TypeScript package ([a03c757](https://github.com/StackOneHQ/stackone-defender/commit/a03c757a1760b797d9a3ef444950e2839ca1c52d))
116
+
117
+ ## Changelog
@@ -0,0 +1,269 @@
1
+ Metadata-Version: 2.4
2
+ Name: stackone-defender
3
+ Version: 0.6.2
4
+ Summary: Indirect prompt injection defense for AI agents using tool calls
5
+ Project-URL: Homepage, https://github.com/StackOneHQ/stackone-defender
6
+ Project-URL: Repository, https://github.com/StackOneHQ/stackone-defender
7
+ Author-email: StackOne <support@stackone.com>
8
+ License: Apache-2.0
9
+ Classifier: Development Status :: 4 - Beta
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: License :: OSI Approved :: Apache Software License
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Programming Language :: Python :: 3.13
16
+ Classifier: Topic :: Security
17
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
18
+ Requires-Python: >=3.11
19
+ Provides-Extra: onnx
20
+ Requires-Dist: numpy>=1.24.0; extra == 'onnx'
21
+ Requires-Dist: onnxruntime>=1.16.0; extra == 'onnx'
22
+ Requires-Dist: tokenizers>=0.15.0; extra == 'onnx'
23
+ Provides-Extra: sfe
24
+ Requires-Dist: fasttext-ng>=0.9.3; extra == 'sfe'
25
+ Description-Content-Type: text/markdown
26
+
27
+ <div align="center">
28
+
29
+ <picture>
30
+ <source media="(prefers-color-scheme: dark)" srcset="https://raw.githubusercontent.com/StackOneHQ/defender/main/assets/banner-dark.svg" />
31
+ <img src="https://raw.githubusercontent.com/StackOneHQ/defender/main/assets/banner-light.svg" alt="Defender by StackOne — Indirect prompt injection protection for MCP tool calls" width="800" />
32
+ </picture>
33
+
34
+ <p>
35
+ <a href="https://pypi.org/project/stackone-defender/"><img src="https://img.shields.io/pypi/v/stackone-defender?style=flat-square&color=047B43&label=pypi" alt="PyPI version" /></a>
36
+ <a href="https://github.com/StackOneHQ/stackone-defender/releases"><img src="https://img.shields.io/github/v/release/StackOneHQ/stackone-defender?style=flat-square&color=047B43&label=release" alt="latest GitHub release" /></a>
37
+ <a href="https://github.com/StackOneHQ/stackone-defender/stargazers"><img src="https://img.shields.io/github/stars/StackOneHQ/stackone-defender?style=flat-square&color=047B43" alt="GitHub stars" /></a>
38
+ <a href="./LICENSE"><img src="https://img.shields.io/pypi/l/stackone-defender?style=flat-square&color=047B43" alt="License" /></a>
39
+ <img src="https://img.shields.io/badge/Python-3.11+-047B43?style=flat-square" alt="Python 3.11+" />
40
+ </p>
41
+ <p>
42
+ <img src="https://img.shields.io/badge/model-22MB-047B43?style=flat-square" alt="Model size: 22MB" />
43
+ <img src="https://img.shields.io/badge/latency-~10ms-047B43?style=flat-square" alt="Latency: ~10ms" />
44
+ <img src="https://img.shields.io/badge/CPU--only-no%20GPU%20needed-047B43?style=flat-square" alt="CPU only" />
45
+ <img src="https://img.shields.io/badge/F1%20Score-90.8%25-047B43?style=flat-square" alt="F1 Score: 90.8%" />
46
+ </p>
47
+
48
+ </div>
49
+
50
+ ---
51
+
52
+ Indirect prompt injection defense for AI agents using tool calls (MCP, CLI, or direct APIs). Detects and neutralizes attacks hidden in tool results (emails, documents, PRs, etc.) before they reach your LLM.
53
+
54
+ **Python package:** [`stackone-defender`](https://pypi.org/project/stackone-defender/) — aligned with [`@stackone/defender`](https://www.npmjs.com/package/@stackone/defender) on npm.
55
+
56
+ ## Installation
57
+
58
+ **pip**
59
+
60
+ ```bash
61
+ pip install stackone-defender
62
+ ```
63
+
64
+ **uv**
65
+
66
+ ```bash
67
+ uv add stackone-defender
68
+ ```
69
+
70
+ **Tier 2 (ONNX)** — add extras:
71
+
72
+ ```bash
73
+ pip install stackone-defender[onnx]
74
+ # or: uv add "stackone-defender[onnx]"
75
+ ```
76
+
77
+ The ONNX model (~22MB) is bundled in the wheel — no extra downloads at runtime.
78
+
79
+ **SFE preprocessor (optional)** — add extras:
80
+
81
+ ```bash
82
+ pip install stackone-defender[sfe]
83
+ # or: uv add "stackone-defender[sfe]"
84
+ ```
85
+
86
+ The `[sfe]` extra installs [`fasttext-ng`](https://pypi.org/project/fasttext-ng/) (provides the `fasttext` module). It requires **NumPy 2.3+**. PyPI may ship a wheel only for some platforms; otherwise pip/uv builds from source (needs a C++ toolchain).
87
+
88
+ ## Quick start
89
+
90
+ ```python
91
+ from stackone_defender import create_prompt_defense
92
+
93
+ # Tier 1 + Tier 2 are on by default. block_high_risk=True enables allow/block.
94
+ defense = create_prompt_defense(block_high_risk=True)
95
+
96
+ # Optional: preload ONNX to avoid first-call latency (requires [onnx] extra)
97
+ defense.warmup_tier2()
98
+
99
+ result = defense.defend_tool_result(tool_output, "gmail_get_message")
100
+
101
+ if not result.allowed:
102
+ print(f"Blocked: risk={result.risk_level}, score={result.tier2_score}")
103
+ print(f"Detections: {', '.join(result.detections)}")
104
+ else:
105
+ send_to_llm(result.sanitized)
106
+ ```
107
+
108
+ ## How it works
109
+
110
+ <picture>
111
+ <source media="(prefers-color-scheme: dark)" srcset="https://raw.githubusercontent.com/StackOneHQ/defender/main/assets/demo-dark.svg" />
112
+ <img src="https://raw.githubusercontent.com/StackOneHQ/defender/main/assets/demo-light.svg" alt="Defender flow: poisoned tool output is sanitized and evaluated; high-risk content can be blocked before the LLM" width="900" />
113
+ </picture>
114
+
115
+ `defend_tool_result()` runs two tiers:
116
+
117
+ ### Tier 1 — Pattern detection (sync, ~1 ms)
118
+
119
+ - **Unicode normalization** — homoglyph resistance (e.g. Cyrillic `а` → ASCII `a`)
120
+ - **Role stripping** — `SYSTEM:`, `ASSISTANT:`, `<system>`, `[INST]`, etc.
121
+ - **Pattern removal** — phrases like “ignore previous instructions”
122
+ - **Encoding detection** — suspicious Base64/URL-shaped payloads
123
+ - **Boundary annotation** — `[UD-{id}]…[/UD-{id}]` wrappers around untrusted spans
124
+
125
+ ### Tier 2 — ML classification (ONNX)
126
+
127
+ Packed-chunk MiniLM classifier (int8 ONNX ~22 MB, bundled):
128
+
129
+ - Split text into sentences, pack to model-sized chunks, score chunks in batched ONNX calls
130
+ - Catches paraphrased or novel injections missed by regex
131
+ - Uses chunked batch inference to bound memory on large payloads
132
+
133
+ ### Optional SFE preprocessor
134
+
135
+ - `use_sfe=True` enables a field-level FastText pass before Tier 1/Tier 2
136
+ - Drops metadata-like leaves (IDs, enum-like strings) and keeps user-facing content
137
+ - Fails open if the runtime/model is unavailable: payload continues unfiltered
138
+
139
+ **Benchmarks** (F1 @ threshold 0.5):
140
+
141
+ | Benchmark | F1 | Samples |
142
+ |-----------|-----|--------|
143
+ | Qualifire (in-distribution) | 0.8686 | ~1.5k |
144
+ | xxz224 (out-of-distribution) | 0.8834 | ~22.5k |
145
+ | jayavibhav (adversarial) | 0.9717 | ~1k |
146
+ | **Average** | **0.9079** | ~25k |
147
+
148
+ ### `allowed` vs `risk_level`
149
+
150
+ - Use **`allowed`** for gating when `block_high_risk=True`: `False` means do not pass `sanitized` to the model as-is.
151
+ - **`risk_level`** is diagnostic: it starts at `default_risk_level` (default `"medium"`) and is **escalated** by Tier 1 / Tier 2 signals — not reduced. Use it for logging, not as the sole block signal unless you implement your own policy.
152
+
153
+ | Level | Typical trigger |
154
+ |-------|------------------|
155
+ | `low` | No strong signals |
156
+ | `medium` | Lighter pattern / sanitization signals |
157
+ | `high` / `critical` | Strong injection patterns, encoding signals, or high Tier 2 score |
158
+
159
+ ## API
160
+
161
+ ### `create_prompt_defense(**kwargs)`
162
+
163
+ ```python
164
+ defense = create_prompt_defense(
165
+ enable_tier1=True,
166
+ enable_tier2=True,
167
+ block_high_risk=False,
168
+ default_risk_level="medium",
169
+ tier2_fields=["subject", "body", "snippet"], # optional: scope Tier 2 to these JSON keys
170
+ use_sfe=True, # optional: enable semantic field extractor preprocessing
171
+ config={
172
+ "tier2": {
173
+ "high_risk_threshold": 0.8,
174
+ "tier2_fields": None, # or list[str]; constructor tier2_fields wins if set
175
+ },
176
+ },
177
+ )
178
+ ```
179
+
180
+ ### `defense.defend_tool_result(value, tool_name)`
181
+
182
+ Runs Tier 1 sanitization on risky fields, then Tier 2 on extracted text (with optional field scoping). **Synchronous** — no `await`.
183
+
184
+ ```python
185
+ from dataclasses import dataclass, field
186
+
187
+ @dataclass
188
+ class DefenseResult:
189
+ allowed: bool
190
+ risk_level: RiskLevel
191
+ sanitized: Any
192
+ detections: list[str]
193
+ fields_sanitized: list[str]
194
+ patterns_by_field: dict[str, list[str]]
195
+ tier2_score: float | None = None
196
+ tier2_skip_reason: str | None = None
197
+ max_sentence: str | None = None
198
+ fields_dropped: list[str] = field(default_factory=list)
199
+ truncated_at_depth: bool | None = None
200
+ latency_ms: float = 0.0
201
+ ```
202
+
203
+ ### `defense.defend_tool_results(items)`
204
+
205
+ ```python
206
+ results = defense.defend_tool_results([
207
+ {"value": email_data, "tool_name": "gmail_get_message"},
208
+ {"value": doc_data, "tool_name": "documents_get"},
209
+ {"value": pr_data, "tool_name": "github_get_pull_request"},
210
+ ])
211
+ for r in results:
212
+ if not r.allowed:
213
+ print("Blocked:", ", ".join(r.fields_sanitized))
214
+ ```
215
+
216
+ ### `defense.analyze(text)`
217
+
218
+ Tier 1 only — useful for debugging pattern hits without full tool-result traversal.
219
+
220
+ ### Tier 2 warmup
221
+
222
+ ```python
223
+ defense = create_prompt_defense()
224
+ defense.warmup_tier2() # no-op if enable_tier2=False or ONNX extra missing
225
+ ```
226
+
227
+ ## Integration example
228
+
229
+ ```python
230
+ from stackone_defender import create_prompt_defense
231
+
232
+ defense = create_prompt_defense(block_high_risk=True)
233
+ defense.warmup_tier2()
234
+
235
+ def run_tool_and_defend(raw_result: dict, tool_name: str):
236
+ outcome = defense.defend_tool_result(raw_result, tool_name)
237
+ if not outcome.allowed:
238
+ return {"error": "Content blocked by safety filter", "risk_level": outcome.risk_level}
239
+ return outcome.sanitized
240
+
241
+ # Example agent loop
242
+ sanitized = run_tool_and_defend(gmail_api.get_message(msg_id), "gmail_get_message")
243
+ ```
244
+
245
+ ## Risky field detection
246
+
247
+ Only **string** values under configured “risky” keys are scanned and sanitized. [`RiskyFieldConfig`](https://github.com/StackOneHQ/stackone-defender/blob/main/src/stackone_defender/types.py) provides global names/patterns plus **`tool_overrides`** (wildcard tool names → field list), same idea as the npm package.
248
+
249
+ | Tool pattern | Scanned fields |
250
+ |--------------|----------------|
251
+ | `gmail_*`, `email_*` | subject, body, snippet, content |
252
+ | `documents_*` | name, description, content, title |
253
+ | `github_*` | name, title, body, description, message |
254
+ | `hris_*` | name, notes, bio, description |
255
+ | `ats_*` | name, notes, description, summary |
256
+ | `crm_*` | name, description, notes, content |
257
+
258
+ Otherwise the default list applies: `name`, `description`, `content`, `title`, `notes`, `summary`, `bio`, `body`, `text`, `message`, `comment`, `subject`, plus suffix patterns like `*_body`, `*_description`, etc. Structural keys such as `id`, `url`, `created_at` are not treated as risky by default.
259
+
260
+ ## Development
261
+
262
+ ```bash
263
+ uv sync --group dev
264
+ uv run pytest
265
+ ```
266
+
267
+ ## License
268
+
269
+ Apache-2.0 — see [LICENSE](./LICENSE).
@@ -0,0 +1,243 @@
1
+ <div align="center">
2
+
3
+ <picture>
4
+ <source media="(prefers-color-scheme: dark)" srcset="https://raw.githubusercontent.com/StackOneHQ/defender/main/assets/banner-dark.svg" />
5
+ <img src="https://raw.githubusercontent.com/StackOneHQ/defender/main/assets/banner-light.svg" alt="Defender by StackOne — Indirect prompt injection protection for MCP tool calls" width="800" />
6
+ </picture>
7
+
8
+ <p>
9
+ <a href="https://pypi.org/project/stackone-defender/"><img src="https://img.shields.io/pypi/v/stackone-defender?style=flat-square&color=047B43&label=pypi" alt="PyPI version" /></a>
10
+ <a href="https://github.com/StackOneHQ/stackone-defender/releases"><img src="https://img.shields.io/github/v/release/StackOneHQ/stackone-defender?style=flat-square&color=047B43&label=release" alt="latest GitHub release" /></a>
11
+ <a href="https://github.com/StackOneHQ/stackone-defender/stargazers"><img src="https://img.shields.io/github/stars/StackOneHQ/stackone-defender?style=flat-square&color=047B43" alt="GitHub stars" /></a>
12
+ <a href="./LICENSE"><img src="https://img.shields.io/pypi/l/stackone-defender?style=flat-square&color=047B43" alt="License" /></a>
13
+ <img src="https://img.shields.io/badge/Python-3.11+-047B43?style=flat-square" alt="Python 3.11+" />
14
+ </p>
15
+ <p>
16
+ <img src="https://img.shields.io/badge/model-22MB-047B43?style=flat-square" alt="Model size: 22MB" />
17
+ <img src="https://img.shields.io/badge/latency-~10ms-047B43?style=flat-square" alt="Latency: ~10ms" />
18
+ <img src="https://img.shields.io/badge/CPU--only-no%20GPU%20needed-047B43?style=flat-square" alt="CPU only" />
19
+ <img src="https://img.shields.io/badge/F1%20Score-90.8%25-047B43?style=flat-square" alt="F1 Score: 90.8%" />
20
+ </p>
21
+
22
+ </div>
23
+
24
+ ---
25
+
26
+ Indirect prompt injection defense for AI agents using tool calls (MCP, CLI, or direct APIs). Detects and neutralizes attacks hidden in tool results (emails, documents, PRs, etc.) before they reach your LLM.
27
+
28
+ **Python package:** [`stackone-defender`](https://pypi.org/project/stackone-defender/) — aligned with [`@stackone/defender`](https://www.npmjs.com/package/@stackone/defender) on npm.
29
+
30
+ ## Installation
31
+
32
+ **pip**
33
+
34
+ ```bash
35
+ pip install stackone-defender
36
+ ```
37
+
38
+ **uv**
39
+
40
+ ```bash
41
+ uv add stackone-defender
42
+ ```
43
+
44
+ **Tier 2 (ONNX)** — add extras:
45
+
46
+ ```bash
47
+ pip install stackone-defender[onnx]
48
+ # or: uv add "stackone-defender[onnx]"
49
+ ```
50
+
51
+ The ONNX model (~22MB) is bundled in the wheel — no extra downloads at runtime.
52
+
53
+ **SFE preprocessor (optional)** — add extras:
54
+
55
+ ```bash
56
+ pip install stackone-defender[sfe]
57
+ # or: uv add "stackone-defender[sfe]"
58
+ ```
59
+
60
+ The `[sfe]` extra installs [`fasttext-ng`](https://pypi.org/project/fasttext-ng/) (provides the `fasttext` module). It requires **NumPy 2.3+**. PyPI may ship a wheel only for some platforms; otherwise pip/uv builds from source (needs a C++ toolchain).
61
+
62
+ ## Quick start
63
+
64
+ ```python
65
+ from stackone_defender import create_prompt_defense
66
+
67
+ # Tier 1 + Tier 2 are on by default. block_high_risk=True enables allow/block.
68
+ defense = create_prompt_defense(block_high_risk=True)
69
+
70
+ # Optional: preload ONNX to avoid first-call latency (requires [onnx] extra)
71
+ defense.warmup_tier2()
72
+
73
+ result = defense.defend_tool_result(tool_output, "gmail_get_message")
74
+
75
+ if not result.allowed:
76
+ print(f"Blocked: risk={result.risk_level}, score={result.tier2_score}")
77
+ print(f"Detections: {', '.join(result.detections)}")
78
+ else:
79
+ send_to_llm(result.sanitized)
80
+ ```
81
+
82
+ ## How it works
83
+
84
+ <picture>
85
+ <source media="(prefers-color-scheme: dark)" srcset="https://raw.githubusercontent.com/StackOneHQ/defender/main/assets/demo-dark.svg" />
86
+ <img src="https://raw.githubusercontent.com/StackOneHQ/defender/main/assets/demo-light.svg" alt="Defender flow: poisoned tool output is sanitized and evaluated; high-risk content can be blocked before the LLM" width="900" />
87
+ </picture>
88
+
89
+ `defend_tool_result()` runs two tiers:
90
+
91
+ ### Tier 1 — Pattern detection (sync, ~1 ms)
92
+
93
+ - **Unicode normalization** — homoglyph resistance (e.g. Cyrillic `а` → ASCII `a`)
94
+ - **Role stripping** — `SYSTEM:`, `ASSISTANT:`, `<system>`, `[INST]`, etc.
95
+ - **Pattern removal** — phrases like “ignore previous instructions”
96
+ - **Encoding detection** — suspicious Base64/URL-shaped payloads
97
+ - **Boundary annotation** — `[UD-{id}]…[/UD-{id}]` wrappers around untrusted spans
98
+
99
+ ### Tier 2 — ML classification (ONNX)
100
+
101
+ Packed-chunk MiniLM classifier (int8 ONNX ~22 MB, bundled):
102
+
103
+ - Split text into sentences, pack to model-sized chunks, score chunks in batched ONNX calls
104
+ - Catches paraphrased or novel injections missed by regex
105
+ - Uses chunked batch inference to bound memory on large payloads
106
+
107
+ ### Optional SFE preprocessor
108
+
109
+ - `use_sfe=True` enables a field-level FastText pass before Tier 1/Tier 2
110
+ - Drops metadata-like leaves (IDs, enum-like strings) and keeps user-facing content
111
+ - Fails open if the runtime/model is unavailable: payload continues unfiltered
112
+
113
+ **Benchmarks** (F1 @ threshold 0.5):
114
+
115
+ | Benchmark | F1 | Samples |
116
+ |-----------|-----|--------|
117
+ | Qualifire (in-distribution) | 0.8686 | ~1.5k |
118
+ | xxz224 (out-of-distribution) | 0.8834 | ~22.5k |
119
+ | jayavibhav (adversarial) | 0.9717 | ~1k |
120
+ | **Average** | **0.9079** | ~25k |
121
+
122
+ ### `allowed` vs `risk_level`
123
+
124
+ - Use **`allowed`** for gating when `block_high_risk=True`: `False` means do not pass `sanitized` to the model as-is.
125
+ - **`risk_level`** is diagnostic: it starts at `default_risk_level` (default `"medium"`) and is **escalated** by Tier 1 / Tier 2 signals — not reduced. Use it for logging, not as the sole block signal unless you implement your own policy.
126
+
127
+ | Level | Typical trigger |
128
+ |-------|------------------|
129
+ | `low` | No strong signals |
130
+ | `medium` | Lighter pattern / sanitization signals |
131
+ | `high` / `critical` | Strong injection patterns, encoding signals, or high Tier 2 score |
132
+
133
+ ## API
134
+
135
+ ### `create_prompt_defense(**kwargs)`
136
+
137
+ ```python
138
+ defense = create_prompt_defense(
139
+ enable_tier1=True,
140
+ enable_tier2=True,
141
+ block_high_risk=False,
142
+ default_risk_level="medium",
143
+ tier2_fields=["subject", "body", "snippet"], # optional: scope Tier 2 to these JSON keys
144
+ use_sfe=True, # optional: enable semantic field extractor preprocessing
145
+ config={
146
+ "tier2": {
147
+ "high_risk_threshold": 0.8,
148
+ "tier2_fields": None, # or list[str]; constructor tier2_fields wins if set
149
+ },
150
+ },
151
+ )
152
+ ```
153
+
154
+ ### `defense.defend_tool_result(value, tool_name)`
155
+
156
+ Runs Tier 1 sanitization on risky fields, then Tier 2 on extracted text (with optional field scoping). **Synchronous** — no `await`.
157
+
158
+ ```python
159
+ from dataclasses import dataclass, field
160
+
161
+ @dataclass
162
+ class DefenseResult:
163
+ allowed: bool
164
+ risk_level: RiskLevel
165
+ sanitized: Any
166
+ detections: list[str]
167
+ fields_sanitized: list[str]
168
+ patterns_by_field: dict[str, list[str]]
169
+ tier2_score: float | None = None
170
+ tier2_skip_reason: str | None = None
171
+ max_sentence: str | None = None
172
+ fields_dropped: list[str] = field(default_factory=list)
173
+ truncated_at_depth: bool | None = None
174
+ latency_ms: float = 0.0
175
+ ```
176
+
177
+ ### `defense.defend_tool_results(items)`
178
+
179
+ ```python
180
+ results = defense.defend_tool_results([
181
+ {"value": email_data, "tool_name": "gmail_get_message"},
182
+ {"value": doc_data, "tool_name": "documents_get"},
183
+ {"value": pr_data, "tool_name": "github_get_pull_request"},
184
+ ])
185
+ for r in results:
186
+ if not r.allowed:
187
+ print("Blocked:", ", ".join(r.fields_sanitized))
188
+ ```
189
+
190
+ ### `defense.analyze(text)`
191
+
192
+ Tier 1 only — useful for debugging pattern hits without full tool-result traversal.
193
+
194
+ ### Tier 2 warmup
195
+
196
+ ```python
197
+ defense = create_prompt_defense()
198
+ defense.warmup_tier2() # no-op if enable_tier2=False or ONNX extra missing
199
+ ```
200
+
201
+ ## Integration example
202
+
203
+ ```python
204
+ from stackone_defender import create_prompt_defense
205
+
206
+ defense = create_prompt_defense(block_high_risk=True)
207
+ defense.warmup_tier2()
208
+
209
+ def run_tool_and_defend(raw_result: dict, tool_name: str):
210
+ outcome = defense.defend_tool_result(raw_result, tool_name)
211
+ if not outcome.allowed:
212
+ return {"error": "Content blocked by safety filter", "risk_level": outcome.risk_level}
213
+ return outcome.sanitized
214
+
215
+ # Example agent loop
216
+ sanitized = run_tool_and_defend(gmail_api.get_message(msg_id), "gmail_get_message")
217
+ ```
218
+
219
+ ## Risky field detection
220
+
221
+ Only **string** values under configured “risky” keys are scanned and sanitized. [`RiskyFieldConfig`](https://github.com/StackOneHQ/stackone-defender/blob/main/src/stackone_defender/types.py) provides global names/patterns plus **`tool_overrides`** (wildcard tool names → field list), same idea as the npm package.
222
+
223
+ | Tool pattern | Scanned fields |
224
+ |--------------|----------------|
225
+ | `gmail_*`, `email_*` | subject, body, snippet, content |
226
+ | `documents_*` | name, description, content, title |
227
+ | `github_*` | name, title, body, description, message |
228
+ | `hris_*` | name, notes, bio, description |
229
+ | `ats_*` | name, notes, description, summary |
230
+ | `crm_*` | name, description, notes, content |
231
+
232
+ Otherwise the default list applies: `name`, `description`, `content`, `title`, `notes`, `summary`, `bio`, `body`, `text`, `message`, `comment`, `subject`, plus suffix patterns like `*_body`, `*_description`, etc. Structural keys such as `id`, `url`, `created_at` are not treated as risky by default.
233
+
234
+ ## Development
235
+
236
+ ```bash
237
+ uv sync --group dev
238
+ uv run pytest
239
+ ```
240
+
241
+ ## License
242
+
243
+ Apache-2.0 — see [LICENSE](./LICENSE).
@@ -0,0 +1,30 @@
1
+ {
2
+ "add_cross_attention": false,
3
+ "architectures": [
4
+ "BertModel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": null,
8
+ "classifier_dropout": null,
9
+ "dtype": "float32",
10
+ "eos_token_id": null,
11
+ "gradient_checkpointing": false,
12
+ "hidden_act": "gelu",
13
+ "hidden_dropout_prob": 0.1,
14
+ "hidden_size": 384,
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 1536,
17
+ "is_decoder": false,
18
+ "layer_norm_eps": 1e-12,
19
+ "max_position_embeddings": 512,
20
+ "model_type": "bert",
21
+ "num_attention_heads": 12,
22
+ "num_hidden_layers": 6,
23
+ "pad_token_id": 0,
24
+ "position_embedding_type": "absolute",
25
+ "tie_word_embeddings": true,
26
+ "transformers_version": "5.3.0",
27
+ "type_vocab_size": 2,
28
+ "use_cache": true,
29
+ "vocab_size": 30522
30
+ }