stackone-defender 0.1.2__tar.gz → 0.6.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- stackone_defender-0.6.2/.release-please-manifest.json +1 -0
- stackone_defender-0.6.2/CHANGELOG.md +117 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.2}/PKG-INFO +26 -4
- {stackone_defender-0.1.2 → stackone_defender-0.6.2}/README.md +23 -3
- {stackone_defender-0.1.2 → stackone_defender-0.6.2}/pyproject.toml +5 -1
- {stackone_defender-0.1.2 → stackone_defender-0.6.2}/src/stackone_defender/__init__.py +14 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.2}/src/stackone_defender/classifiers/onnx_classifier.py +19 -1
- stackone_defender-0.6.2/src/stackone_defender/classifiers/tier2_classifier.py +291 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.2}/src/stackone_defender/config.py +46 -4
- stackone_defender-0.6.2/src/stackone_defender/core/prompt_defense.py +313 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.2}/src/stackone_defender/core/tool_result_sanitizer.py +60 -20
- stackone_defender-0.6.2/src/stackone_defender/models/minilm-full-aug/config.json +30 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.2}/src/stackone_defender/models/minilm-full-aug/model_quantized.onnx +0 -0
- stackone_defender-0.6.2/src/stackone_defender/models/minilm-full-aug/tokenizer.json +30686 -0
- stackone_defender-0.6.2/src/stackone_defender/models/minilm-full-aug/tokenizer_config.json +23 -0
- stackone_defender-0.6.2/src/stackone_defender/sfe/__init__.py +19 -0
- stackone_defender-0.6.2/src/stackone_defender/sfe/model.ftz +0 -0
- stackone_defender-0.6.2/src/stackone_defender/sfe/preprocess.py +232 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.2}/src/stackone_defender/types.py +23 -4
- {stackone_defender-0.1.2 → stackone_defender-0.6.2}/tests/test_integration.py +40 -31
- {stackone_defender-0.1.2 → stackone_defender-0.6.2}/tests/test_onnx_classifier.py +24 -0
- stackone_defender-0.6.2/tests/test_sfe.py +43 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.2}/tests/test_tier2_classifier.py +19 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.2}/uv.lock +30 -2
- stackone_defender-0.1.2/.release-please-manifest.json +0 -1
- stackone_defender-0.1.2/CHANGELOG.md +0 -43
- stackone_defender-0.1.2/src/stackone_defender/classifiers/tier2_classifier.py +0 -173
- stackone_defender-0.1.2/src/stackone_defender/core/prompt_defense.py +0 -202
- stackone_defender-0.1.2/src/stackone_defender/models/minilm-full-aug/config.json +0 -28
- stackone_defender-0.1.2/src/stackone_defender/models/minilm-full-aug/tokenizer.json +0 -30678
- stackone_defender-0.1.2/src/stackone_defender/models/minilm-full-aug/tokenizer_config.json +0 -16
- {stackone_defender-0.1.2 → stackone_defender-0.6.2}/.github/workflows/ci.yaml +0 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.2}/.github/workflows/release.yaml +0 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.2}/.gitignore +0 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.2}/.python-version +0 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.2}/.release-please-config.json +0 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.2}/models/minilm-full-aug/config.json +0 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.2}/models/minilm-full-aug/model_quantized.onnx +0 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.2}/models/minilm-full-aug/tokenizer.json +0 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.2}/models/minilm-full-aug/tokenizer_config.json +0 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.2}/src/stackone_defender/classifiers/__init__.py +0 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.2}/src/stackone_defender/classifiers/pattern_detector.py +0 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.2}/src/stackone_defender/classifiers/patterns.py +0 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.2}/src/stackone_defender/core/__init__.py +0 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.2}/src/stackone_defender/sanitizers/__init__.py +0 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.2}/src/stackone_defender/sanitizers/encoding_detector.py +0 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.2}/src/stackone_defender/sanitizers/normalizer.py +0 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.2}/src/stackone_defender/sanitizers/pattern_remover.py +0 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.2}/src/stackone_defender/sanitizers/role_stripper.py +0 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.2}/src/stackone_defender/sanitizers/sanitizer.py +0 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.2}/src/stackone_defender/utils/__init__.py +0 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.2}/src/stackone_defender/utils/boundary.py +0 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.2}/src/stackone_defender/utils/field_detection.py +0 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.2}/src/stackone_defender/utils/structure.py +0 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.2}/tests/__init__.py +0 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.2}/tests/test_pattern_detector.py +0 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.2}/tests/test_sanitizers.py +0 -0
- {stackone_defender-0.1.2 → stackone_defender-0.6.2}/tests/test_utils.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{".":"0.6.2"}
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
## [0.6.2](https://github.com/StackOneHQ/stackone-defender/compare/stackone-defender-v0.6.1...stackone-defender-v0.6.2) (2026-04-22)
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
### ⚠ BREAKING CHANGES
|
|
7
|
+
|
|
8
|
+
* Drop ToolSanitizationRule, config/sanitizer tool_rules, use_default_tool_rules, and get_tool_rule/should_skip_field. Matches @stackone/defender post ENG-12594.
|
|
9
|
+
|
|
10
|
+
### Features
|
|
11
|
+
|
|
12
|
+
* add missing functions for full TS API parity ([aec0c5b](https://github.com/StackOneHQ/stackone-defender/commit/aec0c5b8d31715df7e4ec2e4d306b55d595bb1c3))
|
|
13
|
+
* add PyPI publishing setup with Release Please CI ([2e28373](https://github.com/StackOneHQ/stackone-defender/commit/2e28373a27315dbb5e7deb23621977fe7fa2f7bc))
|
|
14
|
+
* add tier2_fields filter and export ToolSanitizationRule ([cb7fd93](https://github.com/StackOneHQ/stackone-defender/commit/cb7fd93fb88a30f40edc171ef3fcdc5d6ce2534d))
|
|
15
|
+
* align Python defender with Node (Tier 2 scoping, ONNX cache) ([482bfdd](https://github.com/StackOneHQ/stackone-defender/commit/482bfdda59b4617a75bc261621984cc321d28989))
|
|
16
|
+
* **ENG-12402:** add PyPI publishing setup with Release Please CI ([f979748](https://github.com/StackOneHQ/stackone-defender/commit/f979748a8a3b2084ea241c352866adcfcd0145ea))
|
|
17
|
+
* **ENG-12699:** TypeScript parity and synced ONNX bundle ([0449800](https://github.com/StackOneHQ/stackone-defender/commit/0449800fc2375c89ef231f5671f9a74bd84d3388))
|
|
18
|
+
* port stackone-defender from TypeScript to Python ([e3ff70d](https://github.com/StackOneHQ/stackone-defender/commit/e3ff70dd6a0bc94578dc4dbfde87c5d75f00b7b8))
|
|
19
|
+
* remove tool rules; batch Tier2 ONNX; lock ONNX load ([26c95c2](https://github.com/StackOneHQ/stackone-defender/commit/26c95c257175c892ae4be82ab7c17a099c1b6c6e))
|
|
20
|
+
* **sanitizer:** remove dead use_tier2_classification from ToolResultSanitizer ([4646179](https://github.com/StackOneHQ/stackone-defender/commit/46461798fcf5acc6ac6e23bc65177c35d9353d9c))
|
|
21
|
+
* sync Python package with TypeScript parity ([e1836dd](https://github.com/StackOneHQ/stackone-defender/commit/e1836dd967ad23997983ef1607118d1a25807e1c))
|
|
22
|
+
* upgrade ML classifier to jbv2 model (AgentShield 73.7 → 79.8) ([bcd27f8](https://github.com/StackOneHQ/stackone-defender/commit/bcd27f8abf954700276249f9b03de34f733c67c4))
|
|
23
|
+
* upgrade ML classifier to jbv5 (AgentShield 79.8 → 81.1) ([781dd10](https://github.com/StackOneHQ/stackone-defender/commit/781dd1007e7a0db03d58619a23b69f1b5d73e85d))
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
### Bug Fixes
|
|
27
|
+
|
|
28
|
+
* address Copilot/cubic review (Tier2 scope, tokens, SFE, thresholds) ([bf173ac](https://github.com/StackOneHQ/stackone-defender/commit/bf173ac42f6aaa7513ea2a1fc19083806a5c5ee1))
|
|
29
|
+
* **ci:** avoid fasttext-wheel on Python 3.13 ([a6cda76](https://github.com/StackOneHQ/stackone-defender/commit/a6cda76894e3cd240c4f104e701e3202babb2682))
|
|
30
|
+
* **classifier:** surface classification errors in classify_by_sentence skip_reason ([bd94639](https://github.com/StackOneHQ/stackone-defender/commit/bd9463978dac5572f999d8ec3ed1adbaf0bb97f2))
|
|
31
|
+
* default enable_tier2 to True to match TypeScript SDK behaviour ([d66773b](https://github.com/StackOneHQ/stackone-defender/commit/d66773bee026517d09dd56b9311dd3c281c6f675))
|
|
32
|
+
* **defender:** fix _extract_strings filtering, None checks, and cache ONNX load failure ([bf4ce99](https://github.com/StackOneHQ/stackone-defender/commit/bf4ce993287db9e067b661100b5bd92cc21aef6b))
|
|
33
|
+
* **defender:** sync hasThreats blocking logic and tool rules precedence from JS package ([a217c3e](https://github.com/StackOneHQ/stackone-defender/commit/a217c3ef27aa0e4d92f21571bf0559ff9906f660))
|
|
34
|
+
* enable tier2 by default to match TypeScript package ([f1fe990](https://github.com/StackOneHQ/stackone-defender/commit/f1fe990e1a81c32cb271f6ca85cc063f3da49223))
|
|
35
|
+
* sync Python with TypeScript parity ([cec0813](https://github.com/StackOneHQ/stackone-defender/commit/cec0813ff8cc98f4502d5916d285a28877983d98))
|
|
36
|
+
* **tier2:** apply max_text_length truncation in classify_by_sentence ([a67d2c6](https://github.com/StackOneHQ/stackone-defender/commit/a67d2c6524fb1d6b4f9331f547f28221867038de))
|
|
37
|
+
* upgrade ML classifier to jbv2 (AgentShield 73.7 → 79.8) ([b452b39](https://github.com/StackOneHQ/stackone-defender/commit/b452b39c718329355f50c418bd50c37da2ed8698))
|
|
38
|
+
* upgrade ML classifier to jbv2 (AgentShield 73.7 → 79.8) ([ccb1204](https://github.com/StackOneHQ/stackone-defender/commit/ccb1204d5e3d9763bb916d71bb49b75039ceb197))
|
|
39
|
+
* use uv instead of pip in README installation instructions ([519759f](https://github.com/StackOneHQ/stackone-defender/commit/519759f09c6fc1eb6bf97f53ad0cbd25c78e2893))
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
### Dependencies
|
|
43
|
+
|
|
44
|
+
* **sfe:** switch optional FastText bindings to fasttext-ng ([bc9cc28](https://github.com/StackOneHQ/stackone-defender/commit/bc9cc283bc2da9f10472415d4aa94a0df083ec3d))
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
### Documentation
|
|
48
|
+
|
|
49
|
+
* add README adapted from TypeScript package ([a03c757](https://github.com/StackOneHQ/stackone-defender/commit/a03c757a1760b797d9a3ef444950e2839ca1c52d))
|
|
50
|
+
* update README — enable_tier2 defaults to True ([af0d059](https://github.com/StackOneHQ/stackone-defender/commit/af0d05957e39a83b7e6e18b1f78b95219b14a4f5))
|
|
51
|
+
* update README to reflect changes in package name and Python version ([d2fc2ca](https://github.com/StackOneHQ/stackone-defender/commit/d2fc2ca1900e2f6410df2ec075c5a8a1c3ac241b))
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
### Miscellaneous Chores
|
|
55
|
+
|
|
56
|
+
* prepare patch release 0.6.2 ([7b3c105](https://github.com/StackOneHQ/stackone-defender/commit/7b3c105b2ce23f88f284d72e41c1917aefdc4537))
|
|
57
|
+
|
|
58
|
+
## [0.6.1](https://github.com/StackOneHQ/stackone-defender/compare/stackone-defender-v0.1.2...stackone-defender-v0.6.1) (2026-04-21)
|
|
59
|
+
|
|
60
|
+
### Features
|
|
61
|
+
|
|
62
|
+
* align Python package behavior with `@stackone/defender` 0.6.1
|
|
63
|
+
* add SFE preprocessing support (`use_sfe`) with fail-open optional runtime loading
|
|
64
|
+
* add packed-chunk Tier 2 batching and density-adjusted scoring
|
|
65
|
+
* add dangerous-key traversal hardening (`__proto__`, `constructor`, `prototype`)
|
|
66
|
+
* add cumulative-risk fractional thresholds to reduce list-response false positives
|
|
67
|
+
|
|
68
|
+
### Bug Fixes
|
|
69
|
+
|
|
70
|
+
* use `fasttext-ng` instead of `fasttext-wheel` for the `[sfe]` extra and dev tests so Python 3.13 CI can install maintained FastText bindings (NumPy 2.3+).
|
|
71
|
+
|
|
72
|
+
### Breaking Changes
|
|
73
|
+
|
|
74
|
+
* Python package version jumps from `0.1.2` to `0.6.1` to align release train with TypeScript parity.
|
|
75
|
+
* `DefenseResult` now includes `fields_dropped` and `truncated_at_depth`.
|
|
76
|
+
|
|
77
|
+
## [0.1.2](https://github.com/StackOneHQ/stackone-defender/compare/stackone-defender-v0.1.1...stackone-defender-v0.1.2) (2026-04-08)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
### Bug Fixes
|
|
81
|
+
|
|
82
|
+
* upgrade ML classifier to jbv2 (AgentShield 73.7 → 79.8) ([b452b39](https://github.com/StackOneHQ/stackone-defender/commit/b452b39c718329355f50c418bd50c37da2ed8698))
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
### Documentation
|
|
86
|
+
|
|
87
|
+
* update README to reflect changes in package name and Python version ([d2fc2ca](https://github.com/StackOneHQ/stackone-defender/commit/d2fc2ca1900e2f6410df2ec075c5a8a1c3ac241b))
|
|
88
|
+
|
|
89
|
+
## [0.1.1](https://github.com/StackOneHQ/stackone-defender/compare/stackone-defender-v0.1.0...stackone-defender-v0.1.1) (2026-04-08)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
### Features
|
|
93
|
+
|
|
94
|
+
* add missing functions for full TS API parity ([aec0c5b](https://github.com/StackOneHQ/stackone-defender/commit/aec0c5b8d31715df7e4ec2e4d306b55d595bb1c3))
|
|
95
|
+
* add PyPI publishing setup with Release Please CI ([2e28373](https://github.com/StackOneHQ/stackone-defender/commit/2e28373a27315dbb5e7deb23621977fe7fa2f7bc))
|
|
96
|
+
* add tier2_fields filter and export ToolSanitizationRule ([cb7fd93](https://github.com/StackOneHQ/stackone-defender/commit/cb7fd93fb88a30f40edc171ef3fcdc5d6ce2534d))
|
|
97
|
+
* **ENG-12402:** add PyPI publishing setup with Release Please CI ([f979748](https://github.com/StackOneHQ/stackone-defender/commit/f979748a8a3b2084ea241c352866adcfcd0145ea))
|
|
98
|
+
* port stackone-defender from TypeScript to Python ([e3ff70d](https://github.com/StackOneHQ/stackone-defender/commit/e3ff70dd6a0bc94578dc4dbfde87c5d75f00b7b8))
|
|
99
|
+
* **sanitizer:** remove dead use_tier2_classification from ToolResultSanitizer ([4646179](https://github.com/StackOneHQ/stackone-defender/commit/46461798fcf5acc6ac6e23bc65177c35d9353d9c))
|
|
100
|
+
* sync Python package with TypeScript parity ([e1836dd](https://github.com/StackOneHQ/stackone-defender/commit/e1836dd967ad23997983ef1607118d1a25807e1c))
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
### Bug Fixes
|
|
104
|
+
|
|
105
|
+
* **classifier:** surface classification errors in classify_by_sentence skip_reason ([bd94639](https://github.com/StackOneHQ/stackone-defender/commit/bd9463978dac5572f999d8ec3ed1adbaf0bb97f2))
|
|
106
|
+
* **defender:** fix _extract_strings filtering, None checks, and cache ONNX load failure ([bf4ce99](https://github.com/StackOneHQ/stackone-defender/commit/bf4ce993287db9e067b661100b5bd92cc21aef6b))
|
|
107
|
+
* **defender:** sync hasThreats blocking logic and tool rules precedence from JS package ([a217c3e](https://github.com/StackOneHQ/stackone-defender/commit/a217c3ef27aa0e4d92f21571bf0559ff9906f660))
|
|
108
|
+
* enable tier2 by default to match TypeScript package ([f1fe990](https://github.com/StackOneHQ/stackone-defender/commit/f1fe990e1a81c32cb271f6ca85cc063f3da49223))
|
|
109
|
+
* sync Python with TypeScript parity ([cec0813](https://github.com/StackOneHQ/stackone-defender/commit/cec0813ff8cc98f4502d5916d285a28877983d98))
|
|
110
|
+
* use uv instead of pip in README installation instructions ([519759f](https://github.com/StackOneHQ/stackone-defender/commit/519759f09c6fc1eb6bf97f53ad0cbd25c78e2893))
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
### Documentation
|
|
114
|
+
|
|
115
|
+
* add README adapted from TypeScript package ([a03c757](https://github.com/StackOneHQ/stackone-defender/commit/a03c757a1760b797d9a3ef444950e2839ca1c52d))
|
|
116
|
+
|
|
117
|
+
## Changelog
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: stackone-defender
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.6.2
|
|
4
4
|
Summary: Indirect prompt injection defense for AI agents using tool calls
|
|
5
5
|
Project-URL: Homepage, https://github.com/StackOneHQ/stackone-defender
|
|
6
6
|
Project-URL: Repository, https://github.com/StackOneHQ/stackone-defender
|
|
@@ -20,6 +20,8 @@ Provides-Extra: onnx
|
|
|
20
20
|
Requires-Dist: numpy>=1.24.0; extra == 'onnx'
|
|
21
21
|
Requires-Dist: onnxruntime>=1.16.0; extra == 'onnx'
|
|
22
22
|
Requires-Dist: tokenizers>=0.15.0; extra == 'onnx'
|
|
23
|
+
Provides-Extra: sfe
|
|
24
|
+
Requires-Dist: fasttext-ng>=0.9.3; extra == 'sfe'
|
|
23
25
|
Description-Content-Type: text/markdown
|
|
24
26
|
|
|
25
27
|
<div align="center">
|
|
@@ -74,6 +76,15 @@ pip install stackone-defender[onnx]
|
|
|
74
76
|
|
|
75
77
|
The ONNX model (~22MB) is bundled in the wheel — no extra downloads at runtime.
|
|
76
78
|
|
|
79
|
+
**SFE preprocessor (optional)** — add extras:
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
pip install stackone-defender[sfe]
|
|
83
|
+
# or: uv add "stackone-defender[sfe]"
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
The `[sfe]` extra installs [`fasttext-ng`](https://pypi.org/project/fasttext-ng/) (provides the `fasttext` module). It requires **NumPy 2.3+**. PyPI may ship a wheel only for some platforms; otherwise pip/uv builds from source (needs a C++ toolchain).
|
|
87
|
+
|
|
77
88
|
## Quick start
|
|
78
89
|
|
|
79
90
|
```python
|
|
@@ -113,11 +124,17 @@ else:
|
|
|
113
124
|
|
|
114
125
|
### Tier 2 — ML classification (ONNX)
|
|
115
126
|
|
|
116
|
-
|
|
127
|
+
Packed-chunk MiniLM classifier (int8 ONNX ~22 MB, bundled):
|
|
117
128
|
|
|
118
|
-
- Split text into sentences,
|
|
129
|
+
- Split text into sentences, pack to model-sized chunks, score chunks in batched ONNX calls
|
|
119
130
|
- Catches paraphrased or novel injections missed by regex
|
|
120
|
-
-
|
|
131
|
+
- Uses chunked batch inference to bound memory on large payloads
|
|
132
|
+
|
|
133
|
+
### Optional SFE preprocessor
|
|
134
|
+
|
|
135
|
+
- `use_sfe=True` enables a field-level FastText pass before Tier 1/Tier 2
|
|
136
|
+
- Drops metadata-like leaves (IDs, enum-like strings) and keeps user-facing content
|
|
137
|
+
- Fails open if the runtime/model is unavailable: payload continues unfiltered
|
|
121
138
|
|
|
122
139
|
**Benchmarks** (F1 @ threshold 0.5):
|
|
123
140
|
|
|
@@ -150,6 +167,7 @@ defense = create_prompt_defense(
|
|
|
150
167
|
block_high_risk=False,
|
|
151
168
|
default_risk_level="medium",
|
|
152
169
|
tier2_fields=["subject", "body", "snippet"], # optional: scope Tier 2 to these JSON keys
|
|
170
|
+
use_sfe=True, # optional: enable semantic field extractor preprocessing
|
|
153
171
|
config={
|
|
154
172
|
"tier2": {
|
|
155
173
|
"high_risk_threshold": 0.8,
|
|
@@ -164,6 +182,8 @@ defense = create_prompt_defense(
|
|
|
164
182
|
Runs Tier 1 sanitization on risky fields, then Tier 2 on extracted text (with optional field scoping). **Synchronous** — no `await`.
|
|
165
183
|
|
|
166
184
|
```python
|
|
185
|
+
from dataclasses import dataclass, field
|
|
186
|
+
|
|
167
187
|
@dataclass
|
|
168
188
|
class DefenseResult:
|
|
169
189
|
allowed: bool
|
|
@@ -175,6 +195,8 @@ class DefenseResult:
|
|
|
175
195
|
tier2_score: float | None = None
|
|
176
196
|
tier2_skip_reason: str | None = None
|
|
177
197
|
max_sentence: str | None = None
|
|
198
|
+
fields_dropped: list[str] = field(default_factory=list)
|
|
199
|
+
truncated_at_depth: bool | None = None
|
|
178
200
|
latency_ms: float = 0.0
|
|
179
201
|
```
|
|
180
202
|
|
|
@@ -50,6 +50,15 @@ pip install stackone-defender[onnx]
|
|
|
50
50
|
|
|
51
51
|
The ONNX model (~22MB) is bundled in the wheel — no extra downloads at runtime.
|
|
52
52
|
|
|
53
|
+
**SFE preprocessor (optional)** — add extras:
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
pip install stackone-defender[sfe]
|
|
57
|
+
# or: uv add "stackone-defender[sfe]"
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
The `[sfe]` extra installs [`fasttext-ng`](https://pypi.org/project/fasttext-ng/) (provides the `fasttext` module). It requires **NumPy 2.3+**. PyPI may ship a wheel only for some platforms; otherwise pip/uv builds from source (needs a C++ toolchain).
|
|
61
|
+
|
|
53
62
|
## Quick start
|
|
54
63
|
|
|
55
64
|
```python
|
|
@@ -89,11 +98,17 @@ else:
|
|
|
89
98
|
|
|
90
99
|
### Tier 2 — ML classification (ONNX)
|
|
91
100
|
|
|
92
|
-
|
|
101
|
+
Packed-chunk MiniLM classifier (int8 ONNX ~22 MB, bundled):
|
|
93
102
|
|
|
94
|
-
- Split text into sentences,
|
|
103
|
+
- Split text into sentences, pack to model-sized chunks, score chunks in batched ONNX calls
|
|
95
104
|
- Catches paraphrased or novel injections missed by regex
|
|
96
|
-
-
|
|
105
|
+
- Uses chunked batch inference to bound memory on large payloads
|
|
106
|
+
|
|
107
|
+
### Optional SFE preprocessor
|
|
108
|
+
|
|
109
|
+
- `use_sfe=True` enables a field-level FastText pass before Tier 1/Tier 2
|
|
110
|
+
- Drops metadata-like leaves (IDs, enum-like strings) and keeps user-facing content
|
|
111
|
+
- Fails open if the runtime/model is unavailable: payload continues unfiltered
|
|
97
112
|
|
|
98
113
|
**Benchmarks** (F1 @ threshold 0.5):
|
|
99
114
|
|
|
@@ -126,6 +141,7 @@ defense = create_prompt_defense(
|
|
|
126
141
|
block_high_risk=False,
|
|
127
142
|
default_risk_level="medium",
|
|
128
143
|
tier2_fields=["subject", "body", "snippet"], # optional: scope Tier 2 to these JSON keys
|
|
144
|
+
use_sfe=True, # optional: enable semantic field extractor preprocessing
|
|
129
145
|
config={
|
|
130
146
|
"tier2": {
|
|
131
147
|
"high_risk_threshold": 0.8,
|
|
@@ -140,6 +156,8 @@ defense = create_prompt_defense(
|
|
|
140
156
|
Runs Tier 1 sanitization on risky fields, then Tier 2 on extracted text (with optional field scoping). **Synchronous** — no `await`.
|
|
141
157
|
|
|
142
158
|
```python
|
|
159
|
+
from dataclasses import dataclass, field
|
|
160
|
+
|
|
143
161
|
@dataclass
|
|
144
162
|
class DefenseResult:
|
|
145
163
|
allowed: bool
|
|
@@ -151,6 +169,8 @@ class DefenseResult:
|
|
|
151
169
|
tier2_score: float | None = None
|
|
152
170
|
tier2_skip_reason: str | None = None
|
|
153
171
|
max_sentence: str | None = None
|
|
172
|
+
fields_dropped: list[str] = field(default_factory=list)
|
|
173
|
+
truncated_at_depth: bool | None = None
|
|
154
174
|
latency_ms: float = 0.0
|
|
155
175
|
```
|
|
156
176
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "stackone-defender"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.6.2"
|
|
4
4
|
description = "Indirect prompt injection defense for AI agents using tool calls"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
requires-python = ">=3.11"
|
|
@@ -25,6 +25,9 @@ Repository = "https://github.com/StackOneHQ/stackone-defender"
|
|
|
25
25
|
|
|
26
26
|
[project.optional-dependencies]
|
|
27
27
|
onnx = ["onnxruntime>=1.16.0", "tokenizers>=0.15.0", "numpy>=1.24.0"]
|
|
28
|
+
# fasttext-ng provides the `fasttext` module (maintained bindings; supports 3.13).
|
|
29
|
+
# Pulls numpy>=2.3; SFE still fail-opens when import/load fails.
|
|
30
|
+
sfe = ["fasttext-ng>=0.9.3"]
|
|
28
31
|
|
|
29
32
|
[dependency-groups]
|
|
30
33
|
dev = [
|
|
@@ -32,6 +35,7 @@ dev = [
|
|
|
32
35
|
"onnxruntime>=1.16.0",
|
|
33
36
|
"tokenizers>=0.15.0",
|
|
34
37
|
"numpy>=1.24.0",
|
|
38
|
+
"fasttext-ng>=0.9.3",
|
|
35
39
|
]
|
|
36
40
|
|
|
37
41
|
[build-system]
|
|
@@ -12,12 +12,26 @@ Usage:
|
|
|
12
12
|
"""
|
|
13
13
|
|
|
14
14
|
from .core.prompt_defense import PromptDefense, create_prompt_defense
|
|
15
|
+
from .sfe.preprocess import (
|
|
16
|
+
DropDecision,
|
|
17
|
+
SfePredictor,
|
|
18
|
+
SfePreprocessResult,
|
|
19
|
+
get_default_predictor,
|
|
20
|
+
get_default_sfe_model_path,
|
|
21
|
+
sfe_preprocess,
|
|
22
|
+
)
|
|
15
23
|
from .types import DefenseResult, RiskLevel, Tier1Result
|
|
16
24
|
|
|
17
25
|
__all__ = [
|
|
18
26
|
"DefenseResult",
|
|
27
|
+
"DropDecision",
|
|
19
28
|
"PromptDefense",
|
|
20
29
|
"RiskLevel",
|
|
30
|
+
"SfePredictor",
|
|
31
|
+
"SfePreprocessResult",
|
|
21
32
|
"Tier1Result",
|
|
22
33
|
"create_prompt_defense",
|
|
34
|
+
"get_default_predictor",
|
|
35
|
+
"get_default_sfe_model_path",
|
|
36
|
+
"sfe_preprocess",
|
|
23
37
|
]
|
|
@@ -37,6 +37,8 @@ def _sigmoid(x: float) -> float:
|
|
|
37
37
|
class OnnxClassifier:
|
|
38
38
|
"""ONNX Classifier for fine-tuned MiniLM models."""
|
|
39
39
|
|
|
40
|
+
_MAX_BATCH_CHUNK = 32
|
|
41
|
+
|
|
40
42
|
def __init__(self, model_path: str | None = None):
|
|
41
43
|
self._model_path = model_path or _default_model_path()
|
|
42
44
|
self._session = None
|
|
@@ -105,10 +107,17 @@ class OnnxClassifier:
|
|
|
105
107
|
return _sigmoid(logit)
|
|
106
108
|
|
|
107
109
|
def classify_batch(self, texts: list[str]) -> list[float]:
|
|
108
|
-
"""Classify multiple texts in batch."""
|
|
110
|
+
"""Classify multiple texts in batch, bounded by chunk size."""
|
|
109
111
|
if not texts:
|
|
110
112
|
return []
|
|
111
113
|
self._ensure_loaded()
|
|
114
|
+
all_scores: list[float] = []
|
|
115
|
+
for offset in range(0, len(texts), self._MAX_BATCH_CHUNK):
|
|
116
|
+
chunk = texts[offset: offset + self._MAX_BATCH_CHUNK]
|
|
117
|
+
all_scores.extend(self._classify_batch_chunk(chunk))
|
|
118
|
+
return all_scores
|
|
119
|
+
|
|
120
|
+
def _classify_batch_chunk(self, texts: list[str]) -> list[float]:
|
|
112
121
|
import numpy as np
|
|
113
122
|
|
|
114
123
|
encodings = self._tokenizer.encode_batch(texts)
|
|
@@ -119,6 +128,15 @@ class OnnxClassifier:
|
|
|
119
128
|
logits = results[0]
|
|
120
129
|
return [_sigmoid(float(logits[i][0])) for i in range(len(texts))]
|
|
121
130
|
|
|
131
|
+
def count_tokens(self, text: str) -> int:
|
|
132
|
+
self._ensure_loaded()
|
|
133
|
+
encoding = self._tokenizer.encode(text)
|
|
134
|
+
# Padding is enabled at a fixed length; count only real (attended) tokens.
|
|
135
|
+
return int(sum(encoding.attention_mask))
|
|
136
|
+
|
|
137
|
+
def get_max_length(self) -> int:
|
|
138
|
+
return self._max_length
|
|
139
|
+
|
|
122
140
|
def warmup(self) -> None:
|
|
123
141
|
self.load_model()
|
|
124
142
|
|
|
@@ -0,0 +1,291 @@
|
|
|
1
|
+
"""Tier 2 Classifier: ML-based prompt injection detection (ONNX only)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
import time
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from ..types import RiskLevel, Tier2Result
|
|
10
|
+
from .onnx_classifier import OnnxClassifier
|
|
11
|
+
|
|
12
|
+
DEFAULT_TIER2_CLASSIFIER_CONFIG = {
|
|
13
|
+
"high_risk_threshold": 0.8,
|
|
14
|
+
"medium_risk_threshold": 0.5,
|
|
15
|
+
"min_text_length": 10,
|
|
16
|
+
"max_text_length": 10000,
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class Tier2Classifier:
|
|
21
|
+
"""Tier 2 Classifier using ONNX inference."""
|
|
22
|
+
|
|
23
|
+
def __init__(self, config: dict | None = None):
|
|
24
|
+
cfg = dict(DEFAULT_TIER2_CLASSIFIER_CONFIG)
|
|
25
|
+
if config:
|
|
26
|
+
cfg.update(config)
|
|
27
|
+
self._high_risk_threshold: float = cfg["high_risk_threshold"]
|
|
28
|
+
self._medium_risk_threshold: float = cfg["medium_risk_threshold"]
|
|
29
|
+
self._min_text_length: int = cfg["min_text_length"]
|
|
30
|
+
self._max_text_length: int = cfg["max_text_length"]
|
|
31
|
+
self._onnx = OnnxClassifier(cfg.get("onnx_model_path"))
|
|
32
|
+
|
|
33
|
+
def is_ready(self) -> bool:
|
|
34
|
+
return self._onnx.is_loaded()
|
|
35
|
+
|
|
36
|
+
def warmup(self) -> None:
|
|
37
|
+
self._onnx.warmup()
|
|
38
|
+
|
|
39
|
+
def classify(self, text: str) -> Tier2Result:
|
|
40
|
+
start = time.perf_counter()
|
|
41
|
+
if len(text) < self._min_text_length:
|
|
42
|
+
return Tier2Result(
|
|
43
|
+
score=0,
|
|
44
|
+
confidence=0,
|
|
45
|
+
skipped=True,
|
|
46
|
+
skip_reason=f"Text too short ({len(text)} < {self._min_text_length})",
|
|
47
|
+
latency_ms=_ms(start),
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
analysis_text = text[: self._max_text_length] if len(text) > self._max_text_length else text
|
|
51
|
+
|
|
52
|
+
try:
|
|
53
|
+
score = self._onnx.classify(analysis_text)
|
|
54
|
+
confidence = abs(score - 0.5) * 2
|
|
55
|
+
return Tier2Result(score=score, confidence=confidence, skipped=False, latency_ms=_ms(start))
|
|
56
|
+
except Exception as e:
|
|
57
|
+
return Tier2Result(
|
|
58
|
+
score=0,
|
|
59
|
+
confidence=0,
|
|
60
|
+
skipped=True,
|
|
61
|
+
skip_reason=f"Classification error: {e}",
|
|
62
|
+
latency_ms=_ms(start),
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
def classify_batch(self, texts: list[str]) -> list[Tier2Result]:
|
|
66
|
+
return [self.classify(t) for t in texts]
|
|
67
|
+
|
|
68
|
+
def classify_by_sentence(self, text: str) -> dict[str, Any]:
|
|
69
|
+
"""Classify text by sentence and return max score."""
|
|
70
|
+
start = time.perf_counter()
|
|
71
|
+
sentences = _split_into_sentences(text)
|
|
72
|
+
if not sentences:
|
|
73
|
+
return {"score": 0, "confidence": 0, "skipped": True, "skip_reason": "No sentences found", "latency_ms": _ms(start)}
|
|
74
|
+
|
|
75
|
+
original_sentences: list[str] = []
|
|
76
|
+
classifiable: list[str] = []
|
|
77
|
+
for sentence in sentences:
|
|
78
|
+
if len(sentence) < self._min_text_length:
|
|
79
|
+
continue
|
|
80
|
+
original_sentences.append(sentence)
|
|
81
|
+
classifiable.append(
|
|
82
|
+
sentence[: self._max_text_length] if len(sentence) > self._max_text_length else sentence
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
if not classifiable:
|
|
86
|
+
return {"score": 0, "confidence": 0, "skipped": True, "skip_reason": "No classifiable sentences", "latency_ms": _ms(start)}
|
|
87
|
+
|
|
88
|
+
try:
|
|
89
|
+
scores = self._onnx.classify_batch(classifiable)
|
|
90
|
+
except Exception as e:
|
|
91
|
+
return {
|
|
92
|
+
"score": 0,
|
|
93
|
+
"confidence": 0,
|
|
94
|
+
"skipped": True,
|
|
95
|
+
"skip_reason": f"Classification error: {e}",
|
|
96
|
+
"latency_ms": _ms(start),
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
sentence_scores: list[dict[str, Any]] = []
|
|
100
|
+
max_score = 0.0
|
|
101
|
+
max_sentence = ""
|
|
102
|
+
for sentence, score in zip(original_sentences, scores, strict=True):
|
|
103
|
+
safe_score = score if isinstance(score, (int, float)) and score == score else 0.0
|
|
104
|
+
sentence_scores.append({"sentence": sentence, "score": safe_score})
|
|
105
|
+
if safe_score > max_score:
|
|
106
|
+
max_score = safe_score
|
|
107
|
+
max_sentence = sentence
|
|
108
|
+
|
|
109
|
+
confidence = abs(max_score - 0.5) * 2
|
|
110
|
+
return {
|
|
111
|
+
"score": max_score,
|
|
112
|
+
"confidence": confidence,
|
|
113
|
+
"skipped": False,
|
|
114
|
+
"latency_ms": _ms(start),
|
|
115
|
+
"max_sentence": max_sentence,
|
|
116
|
+
"sentence_scores": sentence_scores,
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
def classify_by_chunks(self, text: str) -> dict[str, Any]:
|
|
120
|
+
start = time.perf_counter()
|
|
121
|
+
if len(text) < self._min_text_length:
|
|
122
|
+
return {"score": 0, "confidence": 0, "skipped": True, "skip_reason": "Text below minTextLength", "latency_ms": _ms(start)}
|
|
123
|
+
|
|
124
|
+
model_max_len = self._onnx.get_max_length()
|
|
125
|
+
bounded = text[: self._max_text_length] if len(text) > self._max_text_length else text
|
|
126
|
+
|
|
127
|
+
try:
|
|
128
|
+
self._onnx.warmup()
|
|
129
|
+
except Exception as e:
|
|
130
|
+
return {"score": 0, "confidence": 0, "skipped": True, "skip_reason": f"Warmup error: {e}", "latency_ms": _ms(start)}
|
|
131
|
+
|
|
132
|
+
try:
|
|
133
|
+
total_tokens = self._onnx.count_tokens(bounded)
|
|
134
|
+
except Exception as e:
|
|
135
|
+
return {"score": 0, "confidence": 0, "skipped": True, "skip_reason": f"Token count error: {e}", "latency_ms": _ms(start)}
|
|
136
|
+
|
|
137
|
+
if total_tokens <= model_max_len:
|
|
138
|
+
try:
|
|
139
|
+
score = self._onnx.classify(bounded)
|
|
140
|
+
except Exception as e:
|
|
141
|
+
return {"score": 0, "confidence": 0, "skipped": True, "skip_reason": f"Classification error: {e}", "latency_ms": _ms(start)}
|
|
142
|
+
safe_score = score if isinstance(score, (int, float)) and score == score else 0.0
|
|
143
|
+
return {
|
|
144
|
+
"score": safe_score,
|
|
145
|
+
"confidence": abs(safe_score - 0.5) * 2,
|
|
146
|
+
"skipped": False,
|
|
147
|
+
"max_sentence": bounded,
|
|
148
|
+
"sentence_scores": [{"sentence": bounded, "score": safe_score}],
|
|
149
|
+
"latency_ms": _ms(start),
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
max_content_tokens = model_max_len - 2
|
|
153
|
+
sentences = [s for s in _split_into_sentences(bounded) if len(s) >= self._min_text_length]
|
|
154
|
+
if not sentences:
|
|
155
|
+
return {"score": 0, "confidence": 0, "skipped": True, "skip_reason": "No classifiable sentences", "latency_ms": _ms(start)}
|
|
156
|
+
|
|
157
|
+
try:
|
|
158
|
+
chunks = self._pack_sentences(sentences, max_content_tokens)
|
|
159
|
+
scores = self._onnx.classify_batch(chunks)
|
|
160
|
+
except Exception as e:
|
|
161
|
+
return {"score": 0, "confidence": 0, "skipped": True, "skip_reason": f"Classification error: {e}", "latency_ms": _ms(start)}
|
|
162
|
+
|
|
163
|
+
max_score = 0.0
|
|
164
|
+
max_chunk = ""
|
|
165
|
+
chunk_scores: list[dict[str, Any]] = []
|
|
166
|
+
for i, raw in enumerate(scores):
|
|
167
|
+
safe_score = raw if isinstance(raw, (int, float)) and raw == raw else 0.0
|
|
168
|
+
chunk = chunks[i] if i < len(chunks) else ""
|
|
169
|
+
chunk_scores.append({"sentence": chunk, "score": safe_score})
|
|
170
|
+
if safe_score > max_score:
|
|
171
|
+
max_score = safe_score
|
|
172
|
+
max_chunk = chunk
|
|
173
|
+
|
|
174
|
+
return {
|
|
175
|
+
"score": max_score,
|
|
176
|
+
"confidence": abs(max_score - 0.5) * 2,
|
|
177
|
+
"skipped": False,
|
|
178
|
+
"max_sentence": max_chunk,
|
|
179
|
+
"sentence_scores": chunk_scores,
|
|
180
|
+
"latency_ms": _ms(start),
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
def prepare_chunks(self, text: str) -> dict[str, Any]:
|
|
184
|
+
if len(text) < self._min_text_length:
|
|
185
|
+
return {"chunks": [], "skipped": True, "skip_reason": "Text below minTextLength"}
|
|
186
|
+
|
|
187
|
+
model_max_len = self._onnx.get_max_length()
|
|
188
|
+
bounded = text[: self._max_text_length] if len(text) > self._max_text_length else text
|
|
189
|
+
try:
|
|
190
|
+
self._onnx.warmup()
|
|
191
|
+
except Exception as e:
|
|
192
|
+
return {"chunks": [], "skipped": True, "skip_reason": f"Warmup error: {e}"}
|
|
193
|
+
|
|
194
|
+
if len(bounded) + 2 <= model_max_len:
|
|
195
|
+
return {"chunks": [bounded], "skipped": False}
|
|
196
|
+
|
|
197
|
+
try:
|
|
198
|
+
total_tokens = self._onnx.count_tokens(bounded)
|
|
199
|
+
except Exception as e:
|
|
200
|
+
return {"chunks": [], "skipped": True, "skip_reason": f"Token count error: {e}"}
|
|
201
|
+
if total_tokens <= model_max_len:
|
|
202
|
+
return {"chunks": [bounded], "skipped": False}
|
|
203
|
+
|
|
204
|
+
max_content_tokens = model_max_len - 2
|
|
205
|
+
sentences = [s for s in _split_into_sentences(bounded) if len(s) >= self._min_text_length]
|
|
206
|
+
if not sentences:
|
|
207
|
+
return {"chunks": [], "skipped": True, "skip_reason": "No classifiable sentences"}
|
|
208
|
+
return {"chunks": self._pack_sentences(sentences, max_content_tokens), "skipped": False}
|
|
209
|
+
|
|
210
|
+
def classify_chunks_batch(self, chunks: list[str]) -> list[float]:
|
|
211
|
+
if not chunks:
|
|
212
|
+
return []
|
|
213
|
+
self._onnx.warmup()
|
|
214
|
+
return self._onnx.classify_batch(chunks)
|
|
215
|
+
|
|
216
|
+
def _pack_sentences(self, sentences: list[str], max_content_tokens: int) -> list[str]:
|
|
217
|
+
chunks: list[str] = []
|
|
218
|
+
current: list[str] = []
|
|
219
|
+
current_tokens = 0
|
|
220
|
+
|
|
221
|
+
for sentence in sentences:
|
|
222
|
+
sentence_tokens = self._onnx.count_tokens(sentence)
|
|
223
|
+
sentence_content_tokens = max(0, sentence_tokens - 2)
|
|
224
|
+
|
|
225
|
+
if sentence_content_tokens > max_content_tokens:
|
|
226
|
+
if current:
|
|
227
|
+
chunks.append(" ".join(current))
|
|
228
|
+
current = []
|
|
229
|
+
current_tokens = 0
|
|
230
|
+
chunks.append(sentence)
|
|
231
|
+
continue
|
|
232
|
+
|
|
233
|
+
if current_tokens + sentence_content_tokens > max_content_tokens:
|
|
234
|
+
chunks.append(" ".join(current))
|
|
235
|
+
current = [sentence]
|
|
236
|
+
current_tokens = sentence_content_tokens
|
|
237
|
+
else:
|
|
238
|
+
current.append(sentence)
|
|
239
|
+
current_tokens += sentence_content_tokens
|
|
240
|
+
|
|
241
|
+
if current:
|
|
242
|
+
chunks.append(" ".join(current))
|
|
243
|
+
|
|
244
|
+
return chunks
|
|
245
|
+
|
|
246
|
+
def is_injection(self, text: str, threshold: float | None = None) -> bool:
|
|
247
|
+
result = self.classify(text)
|
|
248
|
+
if result.skipped:
|
|
249
|
+
return False
|
|
250
|
+
return result.score >= (threshold if threshold is not None else self._medium_risk_threshold)
|
|
251
|
+
|
|
252
|
+
def get_config(self) -> dict:
|
|
253
|
+
return {
|
|
254
|
+
"high_risk_threshold": self._high_risk_threshold,
|
|
255
|
+
"medium_risk_threshold": self._medium_risk_threshold,
|
|
256
|
+
"min_text_length": self._min_text_length,
|
|
257
|
+
"max_text_length": self._max_text_length,
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
def get_risk_level(self, score: float) -> RiskLevel:
|
|
261
|
+
if score >= self._high_risk_threshold:
|
|
262
|
+
return "high"
|
|
263
|
+
if score >= self._medium_risk_threshold:
|
|
264
|
+
return "medium"
|
|
265
|
+
return "low"
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
def create_tier2_classifier(config: dict | None = None) -> Tier2Classifier:
|
|
269
|
+
return Tier2Classifier(config)
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
def _ms(start: float) -> float:
|
|
273
|
+
return (time.perf_counter() - start) * 1000
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def _split_into_sentences(text: str) -> list[str]:
|
|
277
|
+
"""Split text into sentences for granular analysis."""
|
|
278
|
+
sentences: list[str] = []
|
|
279
|
+
chunks = re.split(r"(?<=[.!?])\s+|\n\n+|\n(?=[A-Z0-9#\-*])|(?<=:)\s*\n", text)
|
|
280
|
+
for chunk in chunks:
|
|
281
|
+
trimmed = chunk.strip()
|
|
282
|
+
if not trimmed:
|
|
283
|
+
continue
|
|
284
|
+
if len(trimmed) > 200 and "\n" in trimmed:
|
|
285
|
+
for sub in trimmed.split("\n"):
|
|
286
|
+
sub = sub.strip()
|
|
287
|
+
if sub:
|
|
288
|
+
sentences.append(sub)
|
|
289
|
+
else:
|
|
290
|
+
sentences.append(trimmed)
|
|
291
|
+
return sentences
|