zen-ai-pentest 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agents/__init__.py +28 -0
- agents/agent_base.py +239 -0
- agents/agent_orchestrator.py +346 -0
- agents/analysis_agent.py +225 -0
- agents/cli.py +258 -0
- agents/exploit_agent.py +224 -0
- agents/integration.py +211 -0
- agents/post_scan_agent.py +937 -0
- agents/react_agent.py +384 -0
- agents/react_agent_enhanced.py +616 -0
- agents/react_agent_vm.py +298 -0
- agents/research_agent.py +176 -0
- api/__init__.py +11 -0
- api/auth.py +123 -0
- api/main.py +1027 -0
- api/schemas.py +357 -0
- api/websocket.py +97 -0
- autonomous/__init__.py +122 -0
- autonomous/agent.py +253 -0
- autonomous/agent_loop.py +1370 -0
- autonomous/exploit_validator.py +1537 -0
- autonomous/memory.py +448 -0
- autonomous/react.py +339 -0
- autonomous/tool_executor.py +488 -0
- backends/__init__.py +16 -0
- backends/chatgpt_direct.py +133 -0
- backends/claude_direct.py +130 -0
- backends/duckduckgo.py +138 -0
- backends/openrouter.py +120 -0
- benchmarks/__init__.py +149 -0
- benchmarks/benchmark_engine.py +904 -0
- benchmarks/ci_benchmark.py +785 -0
- benchmarks/comparison.py +729 -0
- benchmarks/metrics.py +553 -0
- benchmarks/run_benchmarks.py +809 -0
- ci_cd/__init__.py +2 -0
- core/__init__.py +17 -0
- core/async_pool.py +282 -0
- core/asyncio_fix.py +222 -0
- core/cache.py +472 -0
- core/container.py +277 -0
- core/database.py +114 -0
- core/input_validator.py +353 -0
- core/models.py +288 -0
- core/orchestrator.py +611 -0
- core/plugin_manager.py +571 -0
- core/rate_limiter.py +405 -0
- core/secure_config.py +328 -0
- core/shield_integration.py +296 -0
- modules/__init__.py +46 -0
- modules/cve_database.py +362 -0
- modules/exploit_assist.py +330 -0
- modules/nuclei_integration.py +480 -0
- modules/osint.py +604 -0
- modules/protonvpn.py +554 -0
- modules/recon.py +165 -0
- modules/sql_injection_db.py +826 -0
- modules/tool_orchestrator.py +498 -0
- modules/vuln_scanner.py +292 -0
- modules/wordlist_generator.py +566 -0
- risk_engine/__init__.py +99 -0
- risk_engine/business_impact.py +267 -0
- risk_engine/business_impact_calculator.py +563 -0
- risk_engine/cvss.py +156 -0
- risk_engine/epss.py +190 -0
- risk_engine/example_usage.py +294 -0
- risk_engine/false_positive_engine.py +1073 -0
- risk_engine/scorer.py +304 -0
- web_ui/backend/main.py +471 -0
- zen_ai_pentest-2.0.0.dist-info/METADATA +795 -0
- zen_ai_pentest-2.0.0.dist-info/RECORD +75 -0
- zen_ai_pentest-2.0.0.dist-info/WHEEL +5 -0
- zen_ai_pentest-2.0.0.dist-info/entry_points.txt +2 -0
- zen_ai_pentest-2.0.0.dist-info/licenses/LICENSE +21 -0
- zen_ai_pentest-2.0.0.dist-info/top_level.txt +10 -0
benchmarks/comparison.py
ADDED
|
@@ -0,0 +1,729 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Zen-AI-Pentest Benchmark Comparison Module
|
|
3
|
+
|
|
4
|
+
Compare Zen-AI-Pentest performance against other security tools.
|
|
5
|
+
Supports both AI-based and traditional security scanners.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from dataclasses import dataclass, field
|
|
9
|
+
from typing import Dict, List, Optional, Any, Callable
|
|
10
|
+
from datetime import datetime
|
|
11
|
+
from enum import Enum
|
|
12
|
+
import json
|
|
13
|
+
import statistics
|
|
14
|
+
from abc import ABC, abstractmethod
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class ToolCategory(Enum):
|
|
18
|
+
"""Categories of security tools."""
|
|
19
|
+
AI_PENTEST = "ai_pentest"
|
|
20
|
+
TRADITIONAL_SCANNER = "traditional_scanner"
|
|
21
|
+
STATIC_ANALYZER = "static_analyzer"
|
|
22
|
+
DYNAMIC_ANALYZER = "dynamic_analyzer"
|
|
23
|
+
NETWORK_SCANNER = "network_scanner"
|
|
24
|
+
WEB_SCANNER = "web_scanner"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class ToolCapabilities:
|
|
29
|
+
"""Capabilities of a security testing tool."""
|
|
30
|
+
# Core capabilities
|
|
31
|
+
supports_web_scanning: bool = False
|
|
32
|
+
supports_network_scanning: bool = False
|
|
33
|
+
supports_api_scanning: bool = False
|
|
34
|
+
supports_mobile_scanning: bool = False
|
|
35
|
+
supports_cloud_scanning: bool = False
|
|
36
|
+
supports_container_scanning: bool = False
|
|
37
|
+
|
|
38
|
+
# AI/ML features
|
|
39
|
+
uses_ai: bool = False
|
|
40
|
+
supports_autonomous_testing: bool = False
|
|
41
|
+
supports_contextual_analysis: bool = False
|
|
42
|
+
supports_attack_chain_building: bool = False
|
|
43
|
+
|
|
44
|
+
# Exploitation
|
|
45
|
+
supports_exploitation: bool = False
|
|
46
|
+
supports_post_exploitation: bool = False
|
|
47
|
+
supports_lateral_movement: bool = False
|
|
48
|
+
|
|
49
|
+
# Reporting
|
|
50
|
+
supports_pdf_reports: bool = False
|
|
51
|
+
supports_json_output: bool = False
|
|
52
|
+
supports_xml_output: bool = False
|
|
53
|
+
supports_sarif: bool = False
|
|
54
|
+
supports_cicd_integration: bool = False
|
|
55
|
+
|
|
56
|
+
# Integration
|
|
57
|
+
has_api: bool = False
|
|
58
|
+
has_webhook_support: bool = False
|
|
59
|
+
has_slack_integration: bool = False
|
|
60
|
+
has_jira_integration: bool = False
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
@dataclass
|
|
64
|
+
class ToolMetadata:
|
|
65
|
+
"""Metadata about a security tool."""
|
|
66
|
+
name: str
|
|
67
|
+
version: str
|
|
68
|
+
vendor: str
|
|
69
|
+
category: ToolCategory
|
|
70
|
+
license_type: str # "open_source", "commercial", "freemium"
|
|
71
|
+
pricing_model: str = ""
|
|
72
|
+
website: str = ""
|
|
73
|
+
documentation_url: str = ""
|
|
74
|
+
github_url: Optional[str] = None
|
|
75
|
+
|
|
76
|
+
# Capabilities
|
|
77
|
+
capabilities: ToolCapabilities = field(default_factory=ToolCapabilities)
|
|
78
|
+
|
|
79
|
+
# Performance characteristics
|
|
80
|
+
avg_scan_time_web: Optional[int] = None # minutes
|
|
81
|
+
avg_scan_time_network: Optional[int] = None
|
|
82
|
+
max_concurrent_scans: Optional[int] = None
|
|
83
|
+
requires_internet: bool = True
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
@dataclass
|
|
87
|
+
class ToolBenchmarkResult:
|
|
88
|
+
"""Benchmark result for a specific tool."""
|
|
89
|
+
tool_metadata: ToolMetadata
|
|
90
|
+
scenario_id: str
|
|
91
|
+
timestamp: datetime = field(default_factory=datetime.utcnow)
|
|
92
|
+
|
|
93
|
+
# Core metrics
|
|
94
|
+
scan_duration_seconds: float = 0.0
|
|
95
|
+
vulnerabilities_found: int = 0
|
|
96
|
+
true_positives: int = 0
|
|
97
|
+
false_positives: int = 0
|
|
98
|
+
false_negatives: int = 0
|
|
99
|
+
|
|
100
|
+
# Coverage metrics
|
|
101
|
+
endpoints_scanned: int = 0
|
|
102
|
+
total_endpoints: int = 0
|
|
103
|
+
parameters_tested: int = 0
|
|
104
|
+
total_parameters: int = 0
|
|
105
|
+
|
|
106
|
+
# Quality metrics
|
|
107
|
+
precision: float = 0.0
|
|
108
|
+
recall: float = 0.0
|
|
109
|
+
f1_score: float = 0.0
|
|
110
|
+
accuracy: float = 0.0
|
|
111
|
+
|
|
112
|
+
# Cost metrics
|
|
113
|
+
total_cost_usd: Optional[float] = None
|
|
114
|
+
tokens_used: Optional[int] = None
|
|
115
|
+
|
|
116
|
+
# Detailed results
|
|
117
|
+
findings: List[Dict[str, Any]] = field(default_factory=list)
|
|
118
|
+
raw_output: Dict[str, Any] = field(default_factory=dict)
|
|
119
|
+
errors: List[str] = field(default_factory=list)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
class CompetitorTool(ABC):
|
|
123
|
+
"""Abstract base class for competitor tools."""
|
|
124
|
+
|
|
125
|
+
def __init__(self, metadata: ToolMetadata):
|
|
126
|
+
self.metadata = metadata
|
|
127
|
+
|
|
128
|
+
@abstractmethod
|
|
129
|
+
async def run_scan(
|
|
130
|
+
self,
|
|
131
|
+
target: str,
|
|
132
|
+
scenario_config: Dict[str, Any]
|
|
133
|
+
) -> ToolBenchmarkResult:
|
|
134
|
+
"""Run a scan and return results."""
|
|
135
|
+
pass
|
|
136
|
+
|
|
137
|
+
@abstractmethod
|
|
138
|
+
def is_available(self) -> bool:
|
|
139
|
+
"""Check if the tool is available/installed."""
|
|
140
|
+
pass
|
|
141
|
+
|
|
142
|
+
def get_metadata(self) -> ToolMetadata:
|
|
143
|
+
"""Get tool metadata."""
|
|
144
|
+
return self.metadata
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
# =============================================================================
|
|
148
|
+
# PENTESTGPT COMPETITOR
|
|
149
|
+
# =============================================================================
|
|
150
|
+
|
|
151
|
+
PENTESTGPT_METADATA = ToolMetadata(
|
|
152
|
+
name="PentestGPT",
|
|
153
|
+
version="2.0",
|
|
154
|
+
vendor="Gelei Deng et al.",
|
|
155
|
+
category=ToolCategory.AI_PENTEST,
|
|
156
|
+
license_type="open_source",
|
|
157
|
+
website="https://github.com/GeleiDeng/PentestGPT",
|
|
158
|
+
github_url="https://github.com/GeleiDeng/PentestGPT",
|
|
159
|
+
capabilities=ToolCapabilities(
|
|
160
|
+
supports_web_scanning=True,
|
|
161
|
+
supports_network_scanning=True,
|
|
162
|
+
uses_ai=True,
|
|
163
|
+
supports_autonomous_testing=True,
|
|
164
|
+
supports_contextual_analysis=True,
|
|
165
|
+
supports_attack_chain_building=True,
|
|
166
|
+
supports_exploitation=True,
|
|
167
|
+
supports_json_output=True
|
|
168
|
+
)
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
class PentestGPTCompetitor(CompetitorTool):
|
|
173
|
+
"""PentestGPT competitor wrapper."""
|
|
174
|
+
|
|
175
|
+
def __init__(self):
|
|
176
|
+
super().__init__(PENTESTGPT_METADATA)
|
|
177
|
+
|
|
178
|
+
def is_available(self) -> bool:
|
|
179
|
+
"""Check if PentestGPT is available."""
|
|
180
|
+
# In real implementation, check for pentestgpt command
|
|
181
|
+
return False # Placeholder
|
|
182
|
+
|
|
183
|
+
async def run_scan(
|
|
184
|
+
self,
|
|
185
|
+
target: str,
|
|
186
|
+
scenario_config: Dict[str, Any]
|
|
187
|
+
) -> ToolBenchmarkResult:
|
|
188
|
+
"""Run PentestGPT scan."""
|
|
189
|
+
# Placeholder implementation
|
|
190
|
+
return ToolBenchmarkResult(
|
|
191
|
+
tool_metadata=self.metadata,
|
|
192
|
+
scenario_id=scenario_config.get("scenario_id", "unknown")
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
# =============================================================================
|
|
197
|
+
# AUTOPENTEST-DRL COMPETITOR
|
|
198
|
+
# =============================================================================
|
|
199
|
+
|
|
200
|
+
AUTOPENTEST_METADATA = ToolMetadata(
|
|
201
|
+
name="AutoPentest-DRL",
|
|
202
|
+
version="1.0",
|
|
203
|
+
vendor="Li et al.",
|
|
204
|
+
category=ToolCategory.AI_PENTEST,
|
|
205
|
+
license_type="open_source",
|
|
206
|
+
website="https://github.com/crond-jaist/AutoPentest-DRL",
|
|
207
|
+
github_url="https://github.com/crond-jaist/AutoPentest-DRL",
|
|
208
|
+
capabilities=ToolCapabilities(
|
|
209
|
+
supports_network_scanning=True,
|
|
210
|
+
uses_ai=True,
|
|
211
|
+
supports_autonomous_testing=True,
|
|
212
|
+
supports_exploitation=True,
|
|
213
|
+
supports_post_exploitation=True,
|
|
214
|
+
supports_json_output=True
|
|
215
|
+
)
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
class AutoPentestDRLCompetitor(CompetitorTool):
|
|
220
|
+
"""AutoPentest-DRL competitor wrapper."""
|
|
221
|
+
|
|
222
|
+
def __init__(self):
|
|
223
|
+
super().__init__(AUTOPENTEST_METADATA)
|
|
224
|
+
|
|
225
|
+
def is_available(self) -> bool:
|
|
226
|
+
"""Check if AutoPentest-DRL is available."""
|
|
227
|
+
return False # Placeholder
|
|
228
|
+
|
|
229
|
+
async def run_scan(
|
|
230
|
+
self,
|
|
231
|
+
target: str,
|
|
232
|
+
scenario_config: Dict[str, Any]
|
|
233
|
+
) -> ToolBenchmarkResult:
|
|
234
|
+
"""Run AutoPentest-DRL scan."""
|
|
235
|
+
return ToolBenchmarkResult(
|
|
236
|
+
tool_metadata=self.metadata,
|
|
237
|
+
scenario_id=scenario_config.get("scenario_id", "unknown")
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
# =============================================================================
|
|
242
|
+
# TRADITIONAL SCANNERS
|
|
243
|
+
# =============================================================================
|
|
244
|
+
|
|
245
|
+
NESSUS_METADATA = ToolMetadata(
|
|
246
|
+
name="Nessus",
|
|
247
|
+
version="10.7",
|
|
248
|
+
vendor="Tenable",
|
|
249
|
+
category=ToolCategory.TRADITIONAL_SCANNER,
|
|
250
|
+
license_type="commercial",
|
|
251
|
+
pricing_model="subscription",
|
|
252
|
+
website="https://www.tenable.com/products/nessus",
|
|
253
|
+
capabilities=ToolCapabilities(
|
|
254
|
+
supports_web_scanning=True,
|
|
255
|
+
supports_network_scanning=True,
|
|
256
|
+
supports_api_scanning=True,
|
|
257
|
+
supports_cloud_scanning=True,
|
|
258
|
+
supports_exploitation=True,
|
|
259
|
+
supports_pdf_reports=True,
|
|
260
|
+
supports_json_output=True,
|
|
261
|
+
supports_xml_output=True,
|
|
262
|
+
has_api=True,
|
|
263
|
+
has_jira_integration=True
|
|
264
|
+
),
|
|
265
|
+
avg_scan_time_network=30,
|
|
266
|
+
max_concurrent_scans=10
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
OPENVAS_METADATA = ToolMetadata(
|
|
270
|
+
name="OpenVAS",
|
|
271
|
+
version="22.4",
|
|
272
|
+
vendor="Greenbone",
|
|
273
|
+
category=ToolCategory.TRADITIONAL_SCANNER,
|
|
274
|
+
license_type="open_source",
|
|
275
|
+
website="https://www.greenbone.net/openvas",
|
|
276
|
+
github_url="https://github.com/greenbone/openvas",
|
|
277
|
+
capabilities=ToolCapabilities(
|
|
278
|
+
supports_web_scanning=True,
|
|
279
|
+
supports_network_scanning=True,
|
|
280
|
+
supports_api_scanning=True,
|
|
281
|
+
supports_pdf_reports=True,
|
|
282
|
+
supports_xml_output=True,
|
|
283
|
+
supports_sarif=True,
|
|
284
|
+
has_api=True
|
|
285
|
+
),
|
|
286
|
+
avg_scan_time_network=45,
|
|
287
|
+
max_concurrent_scans=5
|
|
288
|
+
)
|
|
289
|
+
|
|
290
|
+
BURP_SUITE_METADATA = ToolMetadata(
|
|
291
|
+
name="Burp Suite Professional",
|
|
292
|
+
version="2024.1",
|
|
293
|
+
vendor="PortSwigger",
|
|
294
|
+
category=ToolCategory.WEB_SCANNER,
|
|
295
|
+
license_type="commercial",
|
|
296
|
+
pricing_model="perpetual_license",
|
|
297
|
+
website="https://portswigger.net/burp",
|
|
298
|
+
capabilities=ToolCapabilities(
|
|
299
|
+
supports_web_scanning=True,
|
|
300
|
+
supports_api_scanning=True,
|
|
301
|
+
supports_mobile_scanning=True,
|
|
302
|
+
supports_autonomous_testing=True, # With Burp Scanner
|
|
303
|
+
supports_exploitation=True,
|
|
304
|
+
supports_pdf_reports=True,
|
|
305
|
+
supports_xml_output=True,
|
|
306
|
+
supports_sarif=True,
|
|
307
|
+
has_api=True,
|
|
308
|
+
has_jira_integration=True
|
|
309
|
+
),
|
|
310
|
+
avg_scan_time_web=20,
|
|
311
|
+
max_concurrent_scans=5
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
OWASP_ZAP_METADATA = ToolMetadata(
|
|
315
|
+
name="OWASP ZAP",
|
|
316
|
+
version="2.14",
|
|
317
|
+
vendor="OWASP",
|
|
318
|
+
category=ToolCategory.WEB_SCANNER,
|
|
319
|
+
license_type="open_source",
|
|
320
|
+
website="https://www.zaproxy.org",
|
|
321
|
+
github_url="https://github.com/zaproxy/zaproxy",
|
|
322
|
+
capabilities=ToolCapabilities(
|
|
323
|
+
supports_web_scanning=True,
|
|
324
|
+
supports_api_scanning=True,
|
|
325
|
+
supports_autonomous_testing=True,
|
|
326
|
+
supports_exploitation=True,
|
|
327
|
+
supports_pdf_reports=True,
|
|
328
|
+
supports_json_output=True,
|
|
329
|
+
supports_xml_output=True,
|
|
330
|
+
supports_sarif=True,
|
|
331
|
+
supports_cicd_integration=True,
|
|
332
|
+
has_api=True,
|
|
333
|
+
has_jira_integration=True
|
|
334
|
+
),
|
|
335
|
+
avg_scan_time_web=25,
|
|
336
|
+
max_concurrent_scans=3
|
|
337
|
+
)
|
|
338
|
+
|
|
339
|
+
NIKTO_METADATA = ToolMetadata(
|
|
340
|
+
name="Nikto",
|
|
341
|
+
version="2.5",
|
|
342
|
+
vendor="CIRT",
|
|
343
|
+
category=ToolCategory.WEB_SCANNER,
|
|
344
|
+
license_type="open_source",
|
|
345
|
+
website="https://cirt.net/Nikto2",
|
|
346
|
+
github_url="https://github.com/sullo/nikto",
|
|
347
|
+
capabilities=ToolCapabilities(
|
|
348
|
+
supports_web_scanning=True,
|
|
349
|
+
supports_json_output=True,
|
|
350
|
+
supports_xml_output=True,
|
|
351
|
+
supports_cicd_integration=True
|
|
352
|
+
),
|
|
353
|
+
avg_scan_time_web=15,
|
|
354
|
+
requires_internet=False
|
|
355
|
+
)
|
|
356
|
+
|
|
357
|
+
NUCLEI_METADATA = ToolMetadata(
|
|
358
|
+
name="Nuclei",
|
|
359
|
+
version="3.1",
|
|
360
|
+
vendor="ProjectDiscovery",
|
|
361
|
+
category=ToolCategory.WEB_SCANNER,
|
|
362
|
+
license_type="open_source",
|
|
363
|
+
website="https://nuclei.projectdiscovery.io",
|
|
364
|
+
github_url="https://github.com/projectdiscovery/nuclei",
|
|
365
|
+
capabilities=ToolCapabilities(
|
|
366
|
+
supports_web_scanning=True,
|
|
367
|
+
supports_network_scanning=True,
|
|
368
|
+
supports_api_scanning=True,
|
|
369
|
+
supports_cloud_scanning=True,
|
|
370
|
+
supports_exploitation=True,
|
|
371
|
+
supports_json_output=True,
|
|
372
|
+
supports_sarif=True,
|
|
373
|
+
supports_cicd_integration=True,
|
|
374
|
+
has_api=True
|
|
375
|
+
),
|
|
376
|
+
avg_scan_time_web=5,
|
|
377
|
+
max_concurrent_scans=50,
|
|
378
|
+
requires_internet=False
|
|
379
|
+
)
|
|
380
|
+
|
|
381
|
+
SQLMAP_METADATA = ToolMetadata(
|
|
382
|
+
name="SQLMap",
|
|
383
|
+
version="1.7",
|
|
384
|
+
vendor="SQLMap Project",
|
|
385
|
+
category=ToolCategory.WEB_SCANNER,
|
|
386
|
+
license_type="open_source",
|
|
387
|
+
website="https://sqlmap.org",
|
|
388
|
+
github_url="https://github.com/sqlmapproject/sqlmap",
|
|
389
|
+
capabilities=ToolCapabilities(
|
|
390
|
+
supports_web_scanning=True,
|
|
391
|
+
supports_exploitation=True,
|
|
392
|
+
supports_json_output=True,
|
|
393
|
+
supports_xml_output=True,
|
|
394
|
+
supports_cicd_integration=True
|
|
395
|
+
),
|
|
396
|
+
avg_scan_time_web=30,
|
|
397
|
+
requires_internet=False
|
|
398
|
+
)
|
|
399
|
+
|
|
400
|
+
|
|
401
|
+
# =============================================================================
|
|
402
|
+
# COMPARISON FRAMEWORK
|
|
403
|
+
# =============================================================================
|
|
404
|
+
|
|
405
|
+
@dataclass
|
|
406
|
+
class ComparisonResult:
|
|
407
|
+
"""Result of comparing multiple tools."""
|
|
408
|
+
|
|
409
|
+
# Tool results
|
|
410
|
+
zen_result: ToolBenchmarkResult
|
|
411
|
+
competitor_results: List[ToolBenchmarkResult]
|
|
412
|
+
|
|
413
|
+
# Scenario info
|
|
414
|
+
scenario_id: str
|
|
415
|
+
timestamp: datetime = field(default_factory=datetime.utcnow)
|
|
416
|
+
|
|
417
|
+
# Comparison metrics
|
|
418
|
+
metric_improvements: Dict[str, Dict[str, float]] = field(default_factory=dict)
|
|
419
|
+
rankings: Dict[str, List[str]] = field(default_factory=dict)
|
|
420
|
+
|
|
421
|
+
# Summary
|
|
422
|
+
winner: Optional[str] = None
|
|
423
|
+
statistical_significance: Dict[str, bool] = field(default_factory=dict)
|
|
424
|
+
|
|
425
|
+
def calculate_improvements(self) -> None:
|
|
426
|
+
"""Calculate improvement percentages over competitors."""
|
|
427
|
+
metrics_to_compare = [
|
|
428
|
+
"precision", "recall", "f1_score", "accuracy",
|
|
429
|
+
"vulnerabilities_found", "scan_duration_seconds"
|
|
430
|
+
]
|
|
431
|
+
|
|
432
|
+
for metric in metrics_to_compare:
|
|
433
|
+
zen_value = getattr(self.zen_result, metric, 0)
|
|
434
|
+
|
|
435
|
+
for comp_result in self.competitor_results:
|
|
436
|
+
comp_value = getattr(comp_result, metric, 0)
|
|
437
|
+
|
|
438
|
+
if metric == "scan_duration_seconds":
|
|
439
|
+
# Lower is better for duration
|
|
440
|
+
if comp_value > 0:
|
|
441
|
+
improvement = ((comp_value - zen_value) / comp_value) * 100
|
|
442
|
+
else:
|
|
443
|
+
improvement = 0
|
|
444
|
+
else:
|
|
445
|
+
# Higher is better for other metrics
|
|
446
|
+
if comp_value > 0:
|
|
447
|
+
improvement = ((zen_value - comp_value) / comp_value) * 100
|
|
448
|
+
else:
|
|
449
|
+
improvement = 100 if zen_value > 0 else 0
|
|
450
|
+
|
|
451
|
+
tool_name = comp_result.tool_metadata.name
|
|
452
|
+
if metric not in self.metric_improvements:
|
|
453
|
+
self.metric_improvements[metric] = {}
|
|
454
|
+
self.metric_improvements[metric][tool_name] = improvement
|
|
455
|
+
|
|
456
|
+
def calculate_rankings(self) -> None:
|
|
457
|
+
"""Calculate rankings for each metric."""
|
|
458
|
+
all_results = [self.zen_result] + self.competitor_results
|
|
459
|
+
|
|
460
|
+
metrics_to_rank = [
|
|
461
|
+
("precision", True), # (metric_name, higher_is_better)
|
|
462
|
+
("recall", True),
|
|
463
|
+
("f1_score", True),
|
|
464
|
+
("accuracy", True),
|
|
465
|
+
("scan_duration_seconds", False),
|
|
466
|
+
]
|
|
467
|
+
|
|
468
|
+
for metric, higher_is_better in metrics_to_rank:
|
|
469
|
+
sorted_results = sorted(
|
|
470
|
+
all_results,
|
|
471
|
+
key=lambda r: getattr(r, metric, 0),
|
|
472
|
+
reverse=higher_is_better
|
|
473
|
+
)
|
|
474
|
+
self.rankings[metric] = [
|
|
475
|
+
r.tool_metadata.name for r in sorted_results
|
|
476
|
+
]
|
|
477
|
+
|
|
478
|
+
def determine_winner(self) -> None:
|
|
479
|
+
"""Determine overall winner based on F1 score."""
|
|
480
|
+
all_results = [self.zen_result] + self.competitor_results
|
|
481
|
+
winner = max(all_results, key=lambda r: r.f1_score)
|
|
482
|
+
self.winner = winner.tool_metadata.name
|
|
483
|
+
|
|
484
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
485
|
+
"""Convert comparison result to dictionary."""
|
|
486
|
+
return {
|
|
487
|
+
"scenario_id": self.scenario_id,
|
|
488
|
+
"timestamp": self.timestamp.isoformat(),
|
|
489
|
+
"zen_result": {
|
|
490
|
+
"tool": self.zen_result.tool_metadata.name,
|
|
491
|
+
"precision": self.zen_result.precision,
|
|
492
|
+
"recall": self.zen_result.recall,
|
|
493
|
+
"f1_score": self.zen_result.f1_score,
|
|
494
|
+
"accuracy": self.zen_result.accuracy,
|
|
495
|
+
"duration_seconds": self.zen_result.scan_duration_seconds,
|
|
496
|
+
"vulnerabilities_found": self.zen_result.vulnerabilities_found
|
|
497
|
+
},
|
|
498
|
+
"competitor_results": [
|
|
499
|
+
{
|
|
500
|
+
"tool": r.tool_metadata.name,
|
|
501
|
+
"precision": r.precision,
|
|
502
|
+
"recall": r.recall,
|
|
503
|
+
"f1_score": r.f1_score,
|
|
504
|
+
"accuracy": r.accuracy,
|
|
505
|
+
"duration_seconds": r.scan_duration_seconds,
|
|
506
|
+
"vulnerabilities_found": r.vulnerabilities_found
|
|
507
|
+
}
|
|
508
|
+
for r in self.competitor_results
|
|
509
|
+
],
|
|
510
|
+
"improvements": self.metric_improvements,
|
|
511
|
+
"rankings": self.rankings,
|
|
512
|
+
"winner": self.winner
|
|
513
|
+
}
|
|
514
|
+
|
|
515
|
+
def generate_report_markdown(self) -> str:
|
|
516
|
+
"""Generate a markdown comparison report."""
|
|
517
|
+
lines = [
|
|
518
|
+
f"# Benchmark Comparison Report",
|
|
519
|
+
f"**Scenario:** {self.scenario_id}",
|
|
520
|
+
f"**Date:** {self.timestamp.strftime('%Y-%m-%d %H:%M:%S')}",
|
|
521
|
+
"",
|
|
522
|
+
"## Results Summary",
|
|
523
|
+
"",
|
|
524
|
+
"| Tool | Precision | Recall | F1-Score | Accuracy | Duration | Vulns Found |",
|
|
525
|
+
"|------|-----------|--------|----------|----------|----------|-------------|"
|
|
526
|
+
]
|
|
527
|
+
|
|
528
|
+
# Add Zen result
|
|
529
|
+
z = self.zen_result
|
|
530
|
+
lines.append(
|
|
531
|
+
f"| **{z.tool_metadata.name}** | "
|
|
532
|
+
f"{z.precision:.3f} | {z.recall:.3f} | {z.f1_score:.3f} | "
|
|
533
|
+
f"{z.accuracy:.3f} | {z.scan_duration_seconds:.1f}s | {z.vulnerabilities_found} |"
|
|
534
|
+
)
|
|
535
|
+
|
|
536
|
+
# Add competitors
|
|
537
|
+
for r in self.competitor_results:
|
|
538
|
+
lines.append(
|
|
539
|
+
f"| {r.tool_metadata.name} | "
|
|
540
|
+
f"{r.precision:.3f} | {r.recall:.3f} | {r.f1_score:.3f} | "
|
|
541
|
+
f"{r.accuracy:.3f} | {r.scan_duration_seconds:.1f}s | {r.vulnerabilities_found} |"
|
|
542
|
+
)
|
|
543
|
+
|
|
544
|
+
lines.extend([
|
|
545
|
+
"",
|
|
546
|
+
"## Improvements vs Competitors",
|
|
547
|
+
""
|
|
548
|
+
])
|
|
549
|
+
|
|
550
|
+
for metric, improvements in self.metric_improvements.items():
|
|
551
|
+
lines.append(f"### {metric.replace('_', ' ').title()}")
|
|
552
|
+
lines.append("")
|
|
553
|
+
for tool, improvement in improvements.items():
|
|
554
|
+
emoji = "✅" if improvement > 0 else "❌"
|
|
555
|
+
lines.append(f"- {emoji} vs {tool}: {improvement:+.1f}%")
|
|
556
|
+
lines.append("")
|
|
557
|
+
|
|
558
|
+
lines.extend([
|
|
559
|
+
"## Rankings",
|
|
560
|
+
""
|
|
561
|
+
])
|
|
562
|
+
|
|
563
|
+
for metric, ranking in self.rankings.items():
|
|
564
|
+
lines.append(f"### {metric.replace('_', ' ').title()}")
|
|
565
|
+
for i, tool in enumerate(ranking, 1):
|
|
566
|
+
medal = {1: "🥇", 2: "🥈", 3: "🥉"}.get(i, f"{i}.")
|
|
567
|
+
lines.append(f"{medal} {tool}")
|
|
568
|
+
lines.append("")
|
|
569
|
+
|
|
570
|
+
if self.winner:
|
|
571
|
+
lines.extend([
|
|
572
|
+
"## Winner",
|
|
573
|
+
f"",
|
|
574
|
+
f"🏆 **{self.winner}** achieved the best overall performance.",
|
|
575
|
+
""
|
|
576
|
+
])
|
|
577
|
+
|
|
578
|
+
return "\n".join(lines)
|
|
579
|
+
|
|
580
|
+
|
|
581
|
+
class ComparisonFramework:
|
|
582
|
+
"""Framework for comparing Zen-AI-Pentest with competitors."""
|
|
583
|
+
|
|
584
|
+
def __init__(self):
|
|
585
|
+
self.competitors: Dict[str, CompetitorTool] = {}
|
|
586
|
+
self.register_default_competitors()
|
|
587
|
+
|
|
588
|
+
def register_default_competitors(self) -> None:
|
|
589
|
+
"""Register default competitor tools."""
|
|
590
|
+
self.register_competitor(PentestGPTCompetitor())
|
|
591
|
+
self.register_competitor(AutoPentestDRLCompetitor())
|
|
592
|
+
|
|
593
|
+
def register_competitor(self, tool: CompetitorTool) -> None:
|
|
594
|
+
"""Register a competitor tool."""
|
|
595
|
+
self.competitors[tool.metadata.name] = tool
|
|
596
|
+
|
|
597
|
+
def get_available_competitors(self) -> List[str]:
|
|
598
|
+
"""Get list of available competitor tools."""
|
|
599
|
+
return [
|
|
600
|
+
name for name, tool in self.competitors.items()
|
|
601
|
+
if tool.is_available()
|
|
602
|
+
]
|
|
603
|
+
|
|
604
|
+
async def run_comparison(
|
|
605
|
+
self,
|
|
606
|
+
zen_result: ToolBenchmarkResult,
|
|
607
|
+
scenario_config: Dict[str, Any],
|
|
608
|
+
competitors: Optional[List[str]] = None
|
|
609
|
+
) -> ComparisonResult:
|
|
610
|
+
"""Run comparison between Zen-AI-Pentest and competitors."""
|
|
611
|
+
|
|
612
|
+
# Determine which competitors to test
|
|
613
|
+
if competitors is None:
|
|
614
|
+
competitors = self.get_available_competitors()
|
|
615
|
+
|
|
616
|
+
competitor_results = []
|
|
617
|
+
target = scenario_config.get("target_url") or scenario_config.get("target_host")
|
|
618
|
+
|
|
619
|
+
for comp_name in competitors:
|
|
620
|
+
if comp_name in self.competitors:
|
|
621
|
+
tool = self.competitors[comp_name]
|
|
622
|
+
if tool.is_available():
|
|
623
|
+
result = await tool.run_scan(target, scenario_config)
|
|
624
|
+
competitor_results.append(result)
|
|
625
|
+
|
|
626
|
+
# Create comparison result
|
|
627
|
+
comparison = ComparisonResult(
|
|
628
|
+
zen_result=zen_result,
|
|
629
|
+
competitor_results=competitor_results,
|
|
630
|
+
scenario_id=scenario_config.get("scenario_id", "unknown")
|
|
631
|
+
)
|
|
632
|
+
|
|
633
|
+
# Calculate comparisons
|
|
634
|
+
comparison.calculate_improvements()
|
|
635
|
+
comparison.calculate_rankings()
|
|
636
|
+
comparison.determine_winner()
|
|
637
|
+
|
|
638
|
+
return comparison
|
|
639
|
+
|
|
640
|
+
def get_tool_metadata(self, tool_name: str) -> Optional[ToolMetadata]:
|
|
641
|
+
"""Get metadata for a tool by name."""
|
|
642
|
+
# Built-in tools
|
|
643
|
+
metadata_map = {
|
|
644
|
+
"Zen-AI-Pentest": None, # Special case
|
|
645
|
+
"PentestGPT": PENTESTGPT_METADATA,
|
|
646
|
+
"AutoPentest-DRL": AUTOPENTEST_METADATA,
|
|
647
|
+
"Nessus": NESSUS_METADATA,
|
|
648
|
+
"OpenVAS": OPENVAS_METADATA,
|
|
649
|
+
"Burp Suite": BURP_SUITE_METADATA,
|
|
650
|
+
"OWASP ZAP": OWASP_ZAP_METADATA,
|
|
651
|
+
"Nikto": NIKTO_METADATA,
|
|
652
|
+
"Nuclei": NUCLEI_METADATA,
|
|
653
|
+
"SQLMap": SQLMAP_METADATA,
|
|
654
|
+
}
|
|
655
|
+
return metadata_map.get(tool_name)
|
|
656
|
+
|
|
657
|
+
def compare_capabilities(
|
|
658
|
+
self,
|
|
659
|
+
tools: List[str]
|
|
660
|
+
) -> Dict[str, Any]:
|
|
661
|
+
"""Compare capabilities of multiple tools."""
|
|
662
|
+
result = {
|
|
663
|
+
"tools": [],
|
|
664
|
+
"capability_matrix": {},
|
|
665
|
+
"unique_capabilities": {},
|
|
666
|
+
"common_capabilities": []
|
|
667
|
+
}
|
|
668
|
+
|
|
669
|
+
all_capabilities = set()
|
|
670
|
+
tool_capabilities = {}
|
|
671
|
+
|
|
672
|
+
for tool_name in tools:
|
|
673
|
+
metadata = self.get_tool_metadata(tool_name)
|
|
674
|
+
if metadata:
|
|
675
|
+
caps = metadata.capabilities
|
|
676
|
+
caps_dict = {
|
|
677
|
+
k: v for k, v in vars(caps).items()
|
|
678
|
+
if not k.startswith('_')
|
|
679
|
+
}
|
|
680
|
+
tool_capabilities[tool_name] = caps_dict
|
|
681
|
+
result["tools"].append({
|
|
682
|
+
"name": tool_name,
|
|
683
|
+
"category": metadata.category.value,
|
|
684
|
+
"license": metadata.license_type,
|
|
685
|
+
"capabilities": caps_dict
|
|
686
|
+
})
|
|
687
|
+
all_capabilities.update(caps_dict.keys())
|
|
688
|
+
|
|
689
|
+
# Build capability matrix
|
|
690
|
+
for cap in all_capabilities:
|
|
691
|
+
result["capability_matrix"][cap] = {
|
|
692
|
+
tool: tool_capabilities.get(tool, {}).get(cap, False)
|
|
693
|
+
for tool in tools
|
|
694
|
+
}
|
|
695
|
+
|
|
696
|
+
return result
|
|
697
|
+
|
|
698
|
+
|
|
699
|
+
def calculate_cohen_d(group1: List[float], group2: List[float]) -> float:
|
|
700
|
+
"""
|
|
701
|
+
Calculate Cohen's d effect size.
|
|
702
|
+
|
|
703
|
+
Small effect: ~0.2
|
|
704
|
+
Medium effect: ~0.5
|
|
705
|
+
Large effect: ~0.8+
|
|
706
|
+
"""
|
|
707
|
+
if len(group1) < 2 or len(group2) < 2:
|
|
708
|
+
return 0.0
|
|
709
|
+
|
|
710
|
+
mean1 = statistics.mean(group1)
|
|
711
|
+
mean2 = statistics.mean(group2)
|
|
712
|
+
|
|
713
|
+
std1 = statistics.stdev(group1)
|
|
714
|
+
std2 = statistics.stdev(group2)
|
|
715
|
+
|
|
716
|
+
# Pooled standard deviation
|
|
717
|
+
n1, n2 = len(group1), len(group2)
|
|
718
|
+
pooled_std = math.sqrt(
|
|
719
|
+
((n1 - 1) * std1 ** 2 + (n2 - 1) * std2 ** 2) / (n1 + n2 - 2)
|
|
720
|
+
)
|
|
721
|
+
|
|
722
|
+
if pooled_std == 0:
|
|
723
|
+
return 0.0
|
|
724
|
+
|
|
725
|
+
return (mean1 - mean2) / pooled_std
|
|
726
|
+
|
|
727
|
+
|
|
728
|
+
# Import math here for calculate_cohen_d
|
|
729
|
+
import math
|