vektor-scan 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vektor_scan-0.2.0/LICENSE +21 -0
- vektor_scan-0.2.0/PKG-INFO +341 -0
- vektor_scan-0.2.0/README.md +308 -0
- vektor_scan-0.2.0/pyproject.toml +54 -0
- vektor_scan-0.2.0/setup.cfg +4 -0
- vektor_scan-0.2.0/vektor/__init__.py +184 -0
- vektor_scan-0.2.0/vektor/__main__.py +5 -0
- vektor_scan-0.2.0/vektor/attacks/__init__.py +4 -0
- vektor_scan-0.2.0/vektor/attacks/agent_attacks.py +537 -0
- vektor_scan-0.2.0/vektor/attacks/base.py +194 -0
- vektor_scan-0.2.0/vektor/attacks/data_extraction.py +191 -0
- vektor_scan-0.2.0/vektor/attacks/instruction_hijacking.py +312 -0
- vektor_scan-0.2.0/vektor/attacks/memory_attacks.py +1 -0
- vektor_scan-0.2.0/vektor/attacks/prompt_injection.py +313 -0
- vektor_scan-0.2.0/vektor/attacks/rag_attacks.py +634 -0
- vektor_scan-0.2.0/vektor/attacks/registry.py +40 -0
- vektor_scan-0.2.0/vektor/attacks/structured_output_injection.py +250 -0
- vektor_scan-0.2.0/vektor/cli.py +512 -0
- vektor_scan-0.2.0/vektor/config.py +122 -0
- vektor_scan-0.2.0/vektor/core/__init__.py +1 -0
- vektor_scan-0.2.0/vektor/core/diff.py +376 -0
- vektor_scan-0.2.0/vektor/core/engine.py +367 -0
- vektor_scan-0.2.0/vektor/core/plugin.py +163 -0
- vektor_scan-0.2.0/vektor/data/__init__.py +1 -0
- vektor_scan-0.2.0/vektor/demo.py +168 -0
- vektor_scan-0.2.0/vektor/scoring/__init__.py +3 -0
- vektor_scan-0.2.0/vektor/scoring/reporter.py +453 -0
- vektor_scan-0.2.0/vektor/scoring/severity.py +72 -0
- vektor_scan-0.2.0/vektor/targets/__init__.py +4 -0
- vektor_scan-0.2.0/vektor/targets/base.py +105 -0
- vektor_scan-0.2.0/vektor/targets/factory.py +99 -0
- vektor_scan-0.2.0/vektor/targets/gemini.py +139 -0
- vektor_scan-0.2.0/vektor/targets/http_endpoint.py +226 -0
- vektor_scan-0.2.0/vektor/targets/mock.py +102 -0
- vektor_scan-0.2.0/vektor/targets/multi_agent.py +247 -0
- vektor_scan-0.2.0/vektor/targets/openai_compatible.py +235 -0
- vektor_scan-0.2.0/vektor/targets/rag/__init__.py +92 -0
- vektor_scan-0.2.0/vektor/targets/rag/langchain_target.py +370 -0
- vektor_scan-0.2.0/vektor/targets/rag/llamaindex_target.py +289 -0
- vektor_scan-0.2.0/vektor/targets/vulnerable.py +120 -0
- vektor_scan-0.2.0/vektor/utils/__init__.py +3 -0
- vektor_scan-0.2.0/vektor/utils/budget.py +26 -0
- vektor_scan-0.2.0/vektor_scan.egg-info/PKG-INFO +341 -0
- vektor_scan-0.2.0/vektor_scan.egg-info/SOURCES.txt +46 -0
- vektor_scan-0.2.0/vektor_scan.egg-info/dependency_links.txt +1 -0
- vektor_scan-0.2.0/vektor_scan.egg-info/entry_points.txt +2 -0
- vektor_scan-0.2.0/vektor_scan.egg-info/requires.txt +12 -0
- vektor_scan-0.2.0/vektor_scan.egg-info/top_level.txt +1 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 LLMGuard-Lite Contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,341 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: vektor-scan
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: AI security testing framework โ pytest for LLM applications
|
|
5
|
+
Author-email: Swapnil <swapnil.wankhede23@spit.ac.in>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/Swapnil565/Vektor-
|
|
8
|
+
Project-URL: Issues, https://github.com/Swapnil565/Vektor-/issues
|
|
9
|
+
Keywords: llm,security,testing,prompt-injection,ai-safety
|
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Topic :: Security
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Requires-Python: >=3.8
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
License-File: LICENSE
|
|
21
|
+
Requires-Dist: openai>=1.12.0
|
|
22
|
+
Requires-Dist: click>=8.1.7
|
|
23
|
+
Requires-Dist: rich>=13.7.0
|
|
24
|
+
Requires-Dist: python-docx>=1.1.0
|
|
25
|
+
Requires-Dist: PyYAML>=6.0.1
|
|
26
|
+
Provides-Extra: dev
|
|
27
|
+
Requires-Dist: pytest>=7.4.3; extra == "dev"
|
|
28
|
+
Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
|
|
29
|
+
Requires-Dist: pytest-mock>=3.12.0; extra == "dev"
|
|
30
|
+
Requires-Dist: black>=23.12.0; extra == "dev"
|
|
31
|
+
Requires-Dist: ruff>=0.1.9; extra == "dev"
|
|
32
|
+
Dynamic: license-file
|
|
33
|
+
|
|
34
|
+
๏ปฟ# Vektor ๐ก๏ธ
|
|
35
|
+
|
|
36
|
+
**pytest for AI security โ scan LLM apps for vulnerabilities in 30 seconds**
|
|
37
|
+
|
|
38
|
+
[](https://opensource.org/licenses/MIT)
|
|
39
|
+
[](https://www.python.org/downloads/)
|
|
40
|
+
|
|
41
|
+
## ๐ฏ What is Vektor?
|
|
42
|
+
|
|
43
|
+
An automated security testing framework that scans LLM applications for vulnerabilities:
|
|
44
|
+
- โ
**27 validated attack vectors** across 6 categories
|
|
45
|
+
- โ
**$0.50 average scan cost** with built-in budget controls
|
|
46
|
+
- โ
**30-second results** - Docker run, immediate feedback
|
|
47
|
+
- โ
**CI/CD ready** - Integrate into your deployment pipeline
|
|
48
|
+
- โ
**Novel research** - First tool to systematically test document-based instruction hijacking
|
|
49
|
+
|
|
50
|
+
## ๐ Quick Start
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
# Step 1: Install
|
|
54
|
+
pip install vektor
|
|
55
|
+
|
|
56
|
+
# Step 2: Zero-setup demo (no API key)
|
|
57
|
+
vektor demo
|
|
58
|
+
|
|
59
|
+
# Step 3: Real scan, $0 cost, always works
|
|
60
|
+
vektor scan --target vulnerable --output my-first-report.html
|
|
61
|
+
|
|
62
|
+
# Step 4: Open the report
|
|
63
|
+
# Windows: start my-first-report.html
|
|
64
|
+
# Mac: open my-first-report.html
|
|
65
|
+
# Linux: xdg-open my-first-report.html
|
|
66
|
+
|
|
67
|
+
# Step 5 (optional): Scan your own LLM app
|
|
68
|
+
export OPENAI_API_KEY=sk-your-key
|
|
69
|
+
vektor scan --target openai --budget 1.0
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
## ๐ Scan Any AI API โ No SDK Needed
|
|
73
|
+
|
|
74
|
+
Point Vektor at any HTTP endpoint:
|
|
75
|
+
```bash
|
|
76
|
+
# Auto-detects OpenAI/Anthropic/custom shapes
|
|
77
|
+
vektor scan --url http://localhost:8000/chat
|
|
78
|
+
|
|
79
|
+
# With auth header
|
|
80
|
+
vektor scan --url https://my-app.com/api \
|
|
81
|
+
--header "Authorization: Bearer YOUR_TOKEN"
|
|
82
|
+
|
|
83
|
+
# Custom request/response field names
|
|
84
|
+
vektor scan --url http://localhost:8000/predict \
|
|
85
|
+
--request-field prompt --response-field answer
|
|
86
|
+
|
|
87
|
+
# Query-parameter mode (e.g. /api/parse?text=PAYLOAD)
|
|
88
|
+
vektor scan --url http://localhost:8000/api/parse \
|
|
89
|
+
--param-field text
|
|
90
|
+
|
|
91
|
+
# Rate-limited API โ add delay between requests
|
|
92
|
+
vektor scan --url http://localhost:8000/chat \
|
|
93
|
+
--request-delay 12.0
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
## ๐ก Why Vektor?
|
|
97
|
+
|
|
98
|
+
| Feature | Vektor | Garak | Promptfoo | PyRIT |
|
|
99
|
+
| :--- | :---: | :---: | :---: | :---: |
|
|
100
|
+
| **Primary Focus** | **Actionable Security** | Vulnerability Scanning | General Eval / Testing | Red Teaming Framework |
|
|
101
|
+
| **Setup Time** | **< 30s** | ~10 mins | ~5 mins | ~30 mins |
|
|
102
|
+
| **Scan Speed** | **Fast (Targeted)** | Slow (Exhaustive) | Fast | Slow (Agentic) |
|
|
103
|
+
| **Cost Control** | **โ
Built-in Budget** | โ | โ | โ |
|
|
104
|
+
| **CI/CD Ready** | **โ
Native** | โ ๏ธ Heavy | โ
| โ ๏ธ Complex |
|
|
105
|
+
| **RAG/Doc Attacks** | **โ
Specialized** | โ ๏ธ Limited | โ
| โ
|
|
|
106
|
+
|
|
107
|
+
| Before | After |
|
|
108
|
+
|--------|-------|
|
|
109
|
+
| Hire pentester ($5K) | Run: `vektor scan` |
|
|
110
|
+
| Wait 2 weeks | Get results in 1 minute |
|
|
111
|
+
| Get 50-page report | Actionable JSON/HTML reports |
|
|
112
|
+
| Still don't know if fixes work | Re-run to validate fixes |
|
|
113
|
+
|
|
114
|
+
## ๐ฌ Attack Categories
|
|
115
|
+
|
|
116
|
+
### 1. Prompt Injection (6 attacks)
|
|
117
|
+
- Direct instruction injection
|
|
118
|
+
- System prompt override
|
|
119
|
+
- Delimiter confusion
|
|
120
|
+
- Role manipulation
|
|
121
|
+
- Multi-turn context poisoning
|
|
122
|
+
- Encoding-based bypass
|
|
123
|
+
|
|
124
|
+
### 2. Data Extraction (4 attacks)
|
|
125
|
+
- Training data leak attempts
|
|
126
|
+
- System prompt disclosure
|
|
127
|
+
- Context window extraction
|
|
128
|
+
- PII leakage testing
|
|
129
|
+
|
|
130
|
+
### 3. Instruction Hijacking (5 attacks) โ **NOVEL**
|
|
131
|
+
- Simple document injection
|
|
132
|
+
- DOCX hidden text injection
|
|
133
|
+
- DOCX footnote injection
|
|
134
|
+
- Markdown comment injection
|
|
135
|
+
- Multi-document context poisoning
|
|
136
|
+
|
|
137
|
+
### 4. RAG Attacks (5 attacks)
|
|
138
|
+
- Context poisoning via retrieved docs
|
|
139
|
+
- RAG prompt leakage
|
|
140
|
+
- Source fabrication / hallucination injection
|
|
141
|
+
- Indirect injection via document store
|
|
142
|
+
- Chunking boundary exploitation
|
|
143
|
+
|
|
144
|
+
### 5. Agent Attacks (4 attacks)
|
|
145
|
+
- Tool call injection
|
|
146
|
+
- Goal hijacking
|
|
147
|
+
- Memory poisoning
|
|
148
|
+
- Agent scope escape
|
|
149
|
+
|
|
150
|
+
### 6. Structured Output Injection (3 attacks)
|
|
151
|
+
- JSON schema bypass
|
|
152
|
+
- Output format injection
|
|
153
|
+
- Type confusion attack
|
|
154
|
+
|
|
155
|
+
## ๐ฆ Installation
|
|
156
|
+
|
|
157
|
+
### Docker
|
|
158
|
+
```bash
|
|
159
|
+
docker build -t vektor .
|
|
160
|
+
docker run -e OPENAI_API_KEY=$OPENAI_API_KEY vektor scan --target openai
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
### pip
|
|
164
|
+
```bash
|
|
165
|
+
pip install vektor
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
### From Source
|
|
169
|
+
```bash
|
|
170
|
+
git clone https://github.com/swapnilwankhede23/vektor.git
|
|
171
|
+
cd vektor
|
|
172
|
+
pip install -e .
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
## ๐ Usage
|
|
176
|
+
|
|
177
|
+
### Basic Scan
|
|
178
|
+
```bash
|
|
179
|
+
vektor scan --target openai --budget 1.0
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
### Quick Mode (High-confidence attacks only)
|
|
183
|
+
```bash
|
|
184
|
+
vektor scan --target openai --quick
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
### CI/CD Integration
|
|
188
|
+
```bash
|
|
189
|
+
vektor scan --target openai --ci --output report.json
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
### Specific Attacks
|
|
193
|
+
```bash
|
|
194
|
+
vektor scan --target openai --attacks direct_injection,system_override
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
### Demo Mode (No API calls)
|
|
198
|
+
```bash
|
|
199
|
+
vektor demo
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
## ๐ Sample Output
|
|
203
|
+
|
|
204
|
+
```
|
|
205
|
+
โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
|
206
|
+
โ V E K T O R โ
|
|
207
|
+
โ AI Security Testing Framework โ
|
|
208
|
+
โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
|
209
|
+
|
|
210
|
+
โ Testing attacks... โโโโโโโโโโโโโโโโโโโโโโ 100% (15/15)
|
|
211
|
+
|
|
212
|
+
โโโโโโโโโโโโโโโโโโโโโโโโโโณโโโโโโโโโโโโณโโโโโโโโโโโโโ
|
|
213
|
+
โ Attack โ Severity โ Success โ
|
|
214
|
+
โกโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโฉ
|
|
215
|
+
โ Direct Injection โ HIGH โ 83% โ
|
|
216
|
+
โ DOCX Hidden Text โ CRITICAL โ 67% โ
|
|
217
|
+
โ System Prompt Reveal โ HIGH โ 50% โ
|
|
218
|
+
โโโโโโโโโโโโโโโโโโโโโโโโโโดโโโโโโโโโโโโดโโโโโโโโโโโโโ
|
|
219
|
+
|
|
220
|
+
โญโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโฎ
|
|
221
|
+
โ Summary โ
|
|
222
|
+
โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโค
|
|
223
|
+
โ Risk Score: 72/100 โ
|
|
224
|
+
โ Total Vulnerabilities: 3 โ
|
|
225
|
+
โ Cost: $0.47 โ
|
|
226
|
+
โ โ
|
|
227
|
+
โ HIGH RISK: Address all critical โ
|
|
228
|
+
โ vulnerabilities before deploy โ
|
|
229
|
+
โฐโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโฏ
|
|
230
|
+
|
|
231
|
+
โ Report saved to: report.json
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
## ๐ Novel Research: Instruction Hijacking
|
|
235
|
+
|
|
236
|
+
Vektor is the first tool to systematically test **document-based instruction hijacking** - a new class of vulnerabilities where attackers embed malicious instructions in document formats (DOCX, PDF, Markdown) that get processed by RAG systems.
|
|
237
|
+
|
|
238
|
+
Our research found:
|
|
239
|
+
- **60% of RAG systems** vulnerable to hidden text injection
|
|
240
|
+
- **40% vulnerable** to footnote/comment injection
|
|
241
|
+
- Standard sanitization **doesn't catch** format-specific exploits
|
|
242
|
+
|
|
243
|
+
[Read the full research paper โ](docs/INSTRUCTION_HIJACKING.md)
|
|
244
|
+
|
|
245
|
+
## ๐ ๏ธ CI/CD Integration
|
|
246
|
+
|
|
247
|
+
### GitHub Actions
|
|
248
|
+
```yaml
|
|
249
|
+
name: LLM Security Scan
|
|
250
|
+
on: [push, pull_request]
|
|
251
|
+
jobs:
|
|
252
|
+
security:
|
|
253
|
+
runs-on: ubuntu-latest
|
|
254
|
+
steps:
|
|
255
|
+
- uses: actions/checkout@v3
|
|
256
|
+
- uses: actions/setup-python@v4
|
|
257
|
+
with:
|
|
258
|
+
python-version: '3.11'
|
|
259
|
+
- name: Install Vektor
|
|
260
|
+
run: pip install vektor
|
|
261
|
+
- name: Scan (no API key needed)
|
|
262
|
+
run: vektor scan --target vulnerable --ci --output report.json
|
|
263
|
+
- name: Upload Report
|
|
264
|
+
uses: actions/upload-artifact@v3
|
|
265
|
+
with:
|
|
266
|
+
name: security-report
|
|
267
|
+
path: report.json
|
|
268
|
+
# Optional: scan your real LLM endpoint
|
|
269
|
+
# - name: Scan real endpoint
|
|
270
|
+
# env:
|
|
271
|
+
# OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
|
272
|
+
# run: vektor scan --target openai --ci --output report.json
|
|
273
|
+
```
|
|
274
|
+
|
|
275
|
+
## ๐ค Contributing
|
|
276
|
+
|
|
277
|
+
We welcome contributions! See [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
|
|
278
|
+
|
|
279
|
+
### Adding Custom Attacks
|
|
280
|
+
```python
|
|
281
|
+
from vektor.attacks.base import BaseAttack, Vulnerability
|
|
282
|
+
|
|
283
|
+
class MyCustomAttack(BaseAttack):
|
|
284
|
+
def __init__(self):
|
|
285
|
+
super().__init__(name="my_attack", category="Custom")
|
|
286
|
+
|
|
287
|
+
def execute(self, target):
|
|
288
|
+
# Your attack logic
|
|
289
|
+
pass
|
|
290
|
+
```
|
|
291
|
+
|
|
292
|
+
## ๐ Documentation
|
|
293
|
+
|
|
294
|
+
- [Demo Walkthrough](docs/DEMO.md)
|
|
295
|
+
- [Installation Guide](docs/INSTALL.md)
|
|
296
|
+
- [Usage Reference](docs/USAGE.md)
|
|
297
|
+
- [Research: Instruction Hijacking](docs/INSTRUCTION_HIJACKING.md)
|
|
298
|
+
|
|
299
|
+
## ๐บ๏ธ Roadmap
|
|
300
|
+
|
|
301
|
+
### v0.2 (Current)
|
|
302
|
+
- โ
27 attack vectors across 6 categories
|
|
303
|
+
- โ
HTTP endpoint target (`vektor scan --url http://localhost:8000/chat`)
|
|
304
|
+
- โ
RAG pipeline targets (LangChain, LlamaIndex)
|
|
305
|
+
- โ
Agent targets (LangGraph, CrewAI, AutoGen)
|
|
306
|
+
- โ
Regression diff system for CI gating
|
|
307
|
+
- โ
Python scan() API
|
|
308
|
+
- โ
Docker deployment + CI/CD integration
|
|
309
|
+
|
|
310
|
+
### v0.3 (Next)
|
|
311
|
+
- โณ Web dashboard
|
|
312
|
+
- โณ PDF document testing
|
|
313
|
+
- โณ Multi-model comparison
|
|
314
|
+
- โณ Compliance reporting (OWASP LLM Top 10 mapping)
|
|
315
|
+
|
|
316
|
+
## ๐ License
|
|
317
|
+
|
|
318
|
+
MIT License - see [LICENSE](LICENSE) for details
|
|
319
|
+
|
|
320
|
+
## ๐ Acknowledgments
|
|
321
|
+
|
|
322
|
+
Built on research from:
|
|
323
|
+
- Simon Willison ([@simonw](https://twitter.com/simonw)) - Prompt injection taxonomy
|
|
324
|
+
- Greshake et al. - Indirect prompt injection
|
|
325
|
+
- OWASP LLM Top 10 Project
|
|
326
|
+
|
|
327
|
+
## โญ Support
|
|
328
|
+
|
|
329
|
+
If you find this useful, please star the repository and share with your network!
|
|
330
|
+
|
|
331
|
+
- GitHub: [vektor](https://github.com/swapnilwankhede23/vektor)
|
|
332
|
+
- Issues: [Bug reports & feature requests](https://github.com/swapnilwankhede23/vektor/issues)
|
|
333
|
+
|
|
334
|
+
## ๐ง Contact
|
|
335
|
+
|
|
336
|
+
- Email: swapnil.wankhede23@spit.ac.in
|
|
337
|
+
- Author: Swapnil
|
|
338
|
+
|
|
339
|
+
---
|
|
340
|
+
|
|
341
|
+
**โ ๏ธ Disclaimer:** This tool is for security testing purposes only. Use responsibly and only on systems you have permission to test.
|
|
@@ -0,0 +1,308 @@
|
|
|
1
|
+
๏ปฟ# Vektor ๐ก๏ธ
|
|
2
|
+
|
|
3
|
+
**pytest for AI security โ scan LLM apps for vulnerabilities in 30 seconds**
|
|
4
|
+
|
|
5
|
+
[](https://opensource.org/licenses/MIT)
|
|
6
|
+
[](https://www.python.org/downloads/)
|
|
7
|
+
|
|
8
|
+
## ๐ฏ What is Vektor?
|
|
9
|
+
|
|
10
|
+
An automated security testing framework that scans LLM applications for vulnerabilities:
|
|
11
|
+
- โ
**27 validated attack vectors** across 6 categories
|
|
12
|
+
- โ
**$0.50 average scan cost** with built-in budget controls
|
|
13
|
+
- โ
**30-second results** - Docker run, immediate feedback
|
|
14
|
+
- โ
**CI/CD ready** - Integrate into your deployment pipeline
|
|
15
|
+
- โ
**Novel research** - First tool to systematically test document-based instruction hijacking
|
|
16
|
+
|
|
17
|
+
## ๐ Quick Start
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
# Step 1: Install
|
|
21
|
+
pip install vektor
|
|
22
|
+
|
|
23
|
+
# Step 2: Zero-setup demo (no API key)
|
|
24
|
+
vektor demo
|
|
25
|
+
|
|
26
|
+
# Step 3: Real scan, $0 cost, always works
|
|
27
|
+
vektor scan --target vulnerable --output my-first-report.html
|
|
28
|
+
|
|
29
|
+
# Step 4: Open the report
|
|
30
|
+
# Windows: start my-first-report.html
|
|
31
|
+
# Mac: open my-first-report.html
|
|
32
|
+
# Linux: xdg-open my-first-report.html
|
|
33
|
+
|
|
34
|
+
# Step 5 (optional): Scan your own LLM app
|
|
35
|
+
export OPENAI_API_KEY=sk-your-key
|
|
36
|
+
vektor scan --target openai --budget 1.0
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
## ๐ Scan Any AI API โ No SDK Needed
|
|
40
|
+
|
|
41
|
+
Point Vektor at any HTTP endpoint:
|
|
42
|
+
```bash
|
|
43
|
+
# Auto-detects OpenAI/Anthropic/custom shapes
|
|
44
|
+
vektor scan --url http://localhost:8000/chat
|
|
45
|
+
|
|
46
|
+
# With auth header
|
|
47
|
+
vektor scan --url https://my-app.com/api \
|
|
48
|
+
--header "Authorization: Bearer YOUR_TOKEN"
|
|
49
|
+
|
|
50
|
+
# Custom request/response field names
|
|
51
|
+
vektor scan --url http://localhost:8000/predict \
|
|
52
|
+
--request-field prompt --response-field answer
|
|
53
|
+
|
|
54
|
+
# Query-parameter mode (e.g. /api/parse?text=PAYLOAD)
|
|
55
|
+
vektor scan --url http://localhost:8000/api/parse \
|
|
56
|
+
--param-field text
|
|
57
|
+
|
|
58
|
+
# Rate-limited API โ add delay between requests
|
|
59
|
+
vektor scan --url http://localhost:8000/chat \
|
|
60
|
+
--request-delay 12.0
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
## ๐ก Why Vektor?
|
|
64
|
+
|
|
65
|
+
| Feature | Vektor | Garak | Promptfoo | PyRIT |
|
|
66
|
+
| :--- | :---: | :---: | :---: | :---: |
|
|
67
|
+
| **Primary Focus** | **Actionable Security** | Vulnerability Scanning | General Eval / Testing | Red Teaming Framework |
|
|
68
|
+
| **Setup Time** | **< 30s** | ~10 mins | ~5 mins | ~30 mins |
|
|
69
|
+
| **Scan Speed** | **Fast (Targeted)** | Slow (Exhaustive) | Fast | Slow (Agentic) |
|
|
70
|
+
| **Cost Control** | **โ
Built-in Budget** | โ | โ | โ |
|
|
71
|
+
| **CI/CD Ready** | **โ
Native** | โ ๏ธ Heavy | โ
| โ ๏ธ Complex |
|
|
72
|
+
| **RAG/Doc Attacks** | **โ
Specialized** | โ ๏ธ Limited | โ
| โ
|
|
|
73
|
+
|
|
74
|
+
| Before | After |
|
|
75
|
+
|--------|-------|
|
|
76
|
+
| Hire pentester ($5K) | Run: `vektor scan` |
|
|
77
|
+
| Wait 2 weeks | Get results in 1 minute |
|
|
78
|
+
| Get 50-page report | Actionable JSON/HTML reports |
|
|
79
|
+
| Still don't know if fixes work | Re-run to validate fixes |
|
|
80
|
+
|
|
81
|
+
## ๐ฌ Attack Categories
|
|
82
|
+
|
|
83
|
+
### 1. Prompt Injection (6 attacks)
|
|
84
|
+
- Direct instruction injection
|
|
85
|
+
- System prompt override
|
|
86
|
+
- Delimiter confusion
|
|
87
|
+
- Role manipulation
|
|
88
|
+
- Multi-turn context poisoning
|
|
89
|
+
- Encoding-based bypass
|
|
90
|
+
|
|
91
|
+
### 2. Data Extraction (4 attacks)
|
|
92
|
+
- Training data leak attempts
|
|
93
|
+
- System prompt disclosure
|
|
94
|
+
- Context window extraction
|
|
95
|
+
- PII leakage testing
|
|
96
|
+
|
|
97
|
+
### 3. Instruction Hijacking (5 attacks) โ **NOVEL**
|
|
98
|
+
- Simple document injection
|
|
99
|
+
- DOCX hidden text injection
|
|
100
|
+
- DOCX footnote injection
|
|
101
|
+
- Markdown comment injection
|
|
102
|
+
- Multi-document context poisoning
|
|
103
|
+
|
|
104
|
+
### 4. RAG Attacks (5 attacks)
|
|
105
|
+
- Context poisoning via retrieved docs
|
|
106
|
+
- RAG prompt leakage
|
|
107
|
+
- Source fabrication / hallucination injection
|
|
108
|
+
- Indirect injection via document store
|
|
109
|
+
- Chunking boundary exploitation
|
|
110
|
+
|
|
111
|
+
### 5. Agent Attacks (4 attacks)
|
|
112
|
+
- Tool call injection
|
|
113
|
+
- Goal hijacking
|
|
114
|
+
- Memory poisoning
|
|
115
|
+
- Agent scope escape
|
|
116
|
+
|
|
117
|
+
### 6. Structured Output Injection (3 attacks)
|
|
118
|
+
- JSON schema bypass
|
|
119
|
+
- Output format injection
|
|
120
|
+
- Type confusion attack
|
|
121
|
+
|
|
122
|
+
## ๐ฆ Installation
|
|
123
|
+
|
|
124
|
+
### Docker
|
|
125
|
+
```bash
|
|
126
|
+
docker build -t vektor .
|
|
127
|
+
docker run -e OPENAI_API_KEY=$OPENAI_API_KEY vektor scan --target openai
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
### pip
|
|
131
|
+
```bash
|
|
132
|
+
pip install vektor
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
### From Source
|
|
136
|
+
```bash
|
|
137
|
+
git clone https://github.com/swapnilwankhede23/vektor.git
|
|
138
|
+
cd vektor
|
|
139
|
+
pip install -e .
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
## ๐ Usage
|
|
143
|
+
|
|
144
|
+
### Basic Scan
|
|
145
|
+
```bash
|
|
146
|
+
vektor scan --target openai --budget 1.0
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
### Quick Mode (High-confidence attacks only)
|
|
150
|
+
```bash
|
|
151
|
+
vektor scan --target openai --quick
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
### CI/CD Integration
|
|
155
|
+
```bash
|
|
156
|
+
vektor scan --target openai --ci --output report.json
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
### Specific Attacks
|
|
160
|
+
```bash
|
|
161
|
+
vektor scan --target openai --attacks direct_injection,system_override
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
### Demo Mode (No API calls)
|
|
165
|
+
```bash
|
|
166
|
+
vektor demo
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
## ๐ Sample Output
|
|
170
|
+
|
|
171
|
+
```
|
|
172
|
+
โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
|
173
|
+
โ V E K T O R โ
|
|
174
|
+
โ AI Security Testing Framework โ
|
|
175
|
+
โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
|
176
|
+
|
|
177
|
+
โ Testing attacks... โโโโโโโโโโโโโโโโโโโโโโ 100% (15/15)
|
|
178
|
+
|
|
179
|
+
โโโโโโโโโโโโโโโโโโโโโโโโโโณโโโโโโโโโโโโณโโโโโโโโโโโโโ
|
|
180
|
+
โ Attack โ Severity โ Success โ
|
|
181
|
+
โกโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโฉ
|
|
182
|
+
โ Direct Injection โ HIGH โ 83% โ
|
|
183
|
+
โ DOCX Hidden Text โ CRITICAL โ 67% โ
|
|
184
|
+
โ System Prompt Reveal โ HIGH โ 50% โ
|
|
185
|
+
โโโโโโโโโโโโโโโโโโโโโโโโโโดโโโโโโโโโโโโดโโโโโโโโโโโโโ
|
|
186
|
+
|
|
187
|
+
โญโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโฎ
|
|
188
|
+
โ Summary โ
|
|
189
|
+
โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโค
|
|
190
|
+
โ Risk Score: 72/100 โ
|
|
191
|
+
โ Total Vulnerabilities: 3 โ
|
|
192
|
+
โ Cost: $0.47 โ
|
|
193
|
+
โ โ
|
|
194
|
+
โ HIGH RISK: Address all critical โ
|
|
195
|
+
โ vulnerabilities before deploy โ
|
|
196
|
+
โฐโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโฏ
|
|
197
|
+
|
|
198
|
+
โ Report saved to: report.json
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
## ๐ Novel Research: Instruction Hijacking
|
|
202
|
+
|
|
203
|
+
Vektor is the first tool to systematically test **document-based instruction hijacking** - a new class of vulnerabilities where attackers embed malicious instructions in document formats (DOCX, PDF, Markdown) that get processed by RAG systems.
|
|
204
|
+
|
|
205
|
+
Our research found:
|
|
206
|
+
- **60% of RAG systems** vulnerable to hidden text injection
|
|
207
|
+
- **40% vulnerable** to footnote/comment injection
|
|
208
|
+
- Standard sanitization **doesn't catch** format-specific exploits
|
|
209
|
+
|
|
210
|
+
[Read the full research paper โ](docs/INSTRUCTION_HIJACKING.md)
|
|
211
|
+
|
|
212
|
+
## ๐ ๏ธ CI/CD Integration
|
|
213
|
+
|
|
214
|
+
### GitHub Actions
|
|
215
|
+
```yaml
|
|
216
|
+
name: LLM Security Scan
|
|
217
|
+
on: [push, pull_request]
|
|
218
|
+
jobs:
|
|
219
|
+
security:
|
|
220
|
+
runs-on: ubuntu-latest
|
|
221
|
+
steps:
|
|
222
|
+
- uses: actions/checkout@v3
|
|
223
|
+
- uses: actions/setup-python@v4
|
|
224
|
+
with:
|
|
225
|
+
python-version: '3.11'
|
|
226
|
+
- name: Install Vektor
|
|
227
|
+
run: pip install vektor
|
|
228
|
+
- name: Scan (no API key needed)
|
|
229
|
+
run: vektor scan --target vulnerable --ci --output report.json
|
|
230
|
+
- name: Upload Report
|
|
231
|
+
uses: actions/upload-artifact@v3
|
|
232
|
+
with:
|
|
233
|
+
name: security-report
|
|
234
|
+
path: report.json
|
|
235
|
+
# Optional: scan your real LLM endpoint
|
|
236
|
+
# - name: Scan real endpoint
|
|
237
|
+
# env:
|
|
238
|
+
# OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
|
239
|
+
# run: vektor scan --target openai --ci --output report.json
|
|
240
|
+
```
|
|
241
|
+
|
|
242
|
+
## ๐ค Contributing
|
|
243
|
+
|
|
244
|
+
We welcome contributions! See [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
|
|
245
|
+
|
|
246
|
+
### Adding Custom Attacks
|
|
247
|
+
```python
|
|
248
|
+
from vektor.attacks.base import BaseAttack, Vulnerability
|
|
249
|
+
|
|
250
|
+
class MyCustomAttack(BaseAttack):
|
|
251
|
+
def __init__(self):
|
|
252
|
+
super().__init__(name="my_attack", category="Custom")
|
|
253
|
+
|
|
254
|
+
def execute(self, target):
|
|
255
|
+
# Your attack logic
|
|
256
|
+
pass
|
|
257
|
+
```
|
|
258
|
+
|
|
259
|
+
## ๐ Documentation
|
|
260
|
+
|
|
261
|
+
- [Demo Walkthrough](docs/DEMO.md)
|
|
262
|
+
- [Installation Guide](docs/INSTALL.md)
|
|
263
|
+
- [Usage Reference](docs/USAGE.md)
|
|
264
|
+
- [Research: Instruction Hijacking](docs/INSTRUCTION_HIJACKING.md)
|
|
265
|
+
|
|
266
|
+
## ๐บ๏ธ Roadmap
|
|
267
|
+
|
|
268
|
+
### v0.2 (Current)
|
|
269
|
+
- โ
27 attack vectors across 6 categories
|
|
270
|
+
- โ
HTTP endpoint target (`vektor scan --url http://localhost:8000/chat`)
|
|
271
|
+
- โ
RAG pipeline targets (LangChain, LlamaIndex)
|
|
272
|
+
- โ
Agent targets (LangGraph, CrewAI, AutoGen)
|
|
273
|
+
- โ
Regression diff system for CI gating
|
|
274
|
+
- โ
Python scan() API
|
|
275
|
+
- โ
Docker deployment + CI/CD integration
|
|
276
|
+
|
|
277
|
+
### v0.3 (Next)
|
|
278
|
+
- โณ Web dashboard
|
|
279
|
+
- โณ PDF document testing
|
|
280
|
+
- โณ Multi-model comparison
|
|
281
|
+
- โณ Compliance reporting (OWASP LLM Top 10 mapping)
|
|
282
|
+
|
|
283
|
+
## ๐ License
|
|
284
|
+
|
|
285
|
+
MIT License - see [LICENSE](LICENSE) for details
|
|
286
|
+
|
|
287
|
+
## ๐ Acknowledgments
|
|
288
|
+
|
|
289
|
+
Built on research from:
|
|
290
|
+
- Simon Willison ([@simonw](https://twitter.com/simonw)) - Prompt injection taxonomy
|
|
291
|
+
- Greshake et al. - Indirect prompt injection
|
|
292
|
+
- OWASP LLM Top 10 Project
|
|
293
|
+
|
|
294
|
+
## โญ Support
|
|
295
|
+
|
|
296
|
+
If you find this useful, please star the repository and share with your network!
|
|
297
|
+
|
|
298
|
+
- GitHub: [vektor](https://github.com/swapnilwankhede23/vektor)
|
|
299
|
+
- Issues: [Bug reports & feature requests](https://github.com/swapnilwankhede23/vektor/issues)
|
|
300
|
+
|
|
301
|
+
## ๐ง Contact
|
|
302
|
+
|
|
303
|
+
- Email: swapnil.wankhede23@spit.ac.in
|
|
304
|
+
- Author: Swapnil
|
|
305
|
+
|
|
306
|
+
---
|
|
307
|
+
|
|
308
|
+
**โ ๏ธ Disclaimer:** This tool is for security testing purposes only. Use responsibly and only on systems you have permission to test.
|