zen-ai-pentest 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. agents/__init__.py +28 -0
  2. agents/agent_base.py +239 -0
  3. agents/agent_orchestrator.py +346 -0
  4. agents/analysis_agent.py +225 -0
  5. agents/cli.py +258 -0
  6. agents/exploit_agent.py +224 -0
  7. agents/integration.py +211 -0
  8. agents/post_scan_agent.py +937 -0
  9. agents/react_agent.py +384 -0
  10. agents/react_agent_enhanced.py +616 -0
  11. agents/react_agent_vm.py +298 -0
  12. agents/research_agent.py +176 -0
  13. api/__init__.py +11 -0
  14. api/auth.py +123 -0
  15. api/main.py +1027 -0
  16. api/schemas.py +357 -0
  17. api/websocket.py +97 -0
  18. autonomous/__init__.py +122 -0
  19. autonomous/agent.py +253 -0
  20. autonomous/agent_loop.py +1370 -0
  21. autonomous/exploit_validator.py +1537 -0
  22. autonomous/memory.py +448 -0
  23. autonomous/react.py +339 -0
  24. autonomous/tool_executor.py +488 -0
  25. backends/__init__.py +16 -0
  26. backends/chatgpt_direct.py +133 -0
  27. backends/claude_direct.py +130 -0
  28. backends/duckduckgo.py +138 -0
  29. backends/openrouter.py +120 -0
  30. benchmarks/__init__.py +149 -0
  31. benchmarks/benchmark_engine.py +904 -0
  32. benchmarks/ci_benchmark.py +785 -0
  33. benchmarks/comparison.py +729 -0
  34. benchmarks/metrics.py +553 -0
  35. benchmarks/run_benchmarks.py +809 -0
  36. ci_cd/__init__.py +2 -0
  37. core/__init__.py +17 -0
  38. core/async_pool.py +282 -0
  39. core/asyncio_fix.py +222 -0
  40. core/cache.py +472 -0
  41. core/container.py +277 -0
  42. core/database.py +114 -0
  43. core/input_validator.py +353 -0
  44. core/models.py +288 -0
  45. core/orchestrator.py +611 -0
  46. core/plugin_manager.py +571 -0
  47. core/rate_limiter.py +405 -0
  48. core/secure_config.py +328 -0
  49. core/shield_integration.py +296 -0
  50. modules/__init__.py +46 -0
  51. modules/cve_database.py +362 -0
  52. modules/exploit_assist.py +330 -0
  53. modules/nuclei_integration.py +480 -0
  54. modules/osint.py +604 -0
  55. modules/protonvpn.py +554 -0
  56. modules/recon.py +165 -0
  57. modules/sql_injection_db.py +826 -0
  58. modules/tool_orchestrator.py +498 -0
  59. modules/vuln_scanner.py +292 -0
  60. modules/wordlist_generator.py +566 -0
  61. risk_engine/__init__.py +99 -0
  62. risk_engine/business_impact.py +267 -0
  63. risk_engine/business_impact_calculator.py +563 -0
  64. risk_engine/cvss.py +156 -0
  65. risk_engine/epss.py +190 -0
  66. risk_engine/example_usage.py +294 -0
  67. risk_engine/false_positive_engine.py +1073 -0
  68. risk_engine/scorer.py +304 -0
  69. web_ui/backend/main.py +471 -0
  70. zen_ai_pentest-2.0.0.dist-info/METADATA +795 -0
  71. zen_ai_pentest-2.0.0.dist-info/RECORD +75 -0
  72. zen_ai_pentest-2.0.0.dist-info/WHEEL +5 -0
  73. zen_ai_pentest-2.0.0.dist-info/entry_points.txt +2 -0
  74. zen_ai_pentest-2.0.0.dist-info/licenses/LICENSE +21 -0
  75. zen_ai_pentest-2.0.0.dist-info/top_level.txt +10 -0
@@ -0,0 +1,729 @@
1
+ """
2
+ Zen-AI-Pentest Benchmark Comparison Module
3
+
4
+ Compare Zen-AI-Pentest performance against other security tools.
5
+ Supports both AI-based and traditional security scanners.
6
+ """
7
+
8
+ from dataclasses import dataclass, field
9
+ from typing import Dict, List, Optional, Any, Callable
10
+ from datetime import datetime
11
+ from enum import Enum
12
+ import json
13
+ import statistics
14
+ from abc import ABC, abstractmethod
15
+
16
+
17
+ class ToolCategory(Enum):
18
+ """Categories of security tools."""
19
+ AI_PENTEST = "ai_pentest"
20
+ TRADITIONAL_SCANNER = "traditional_scanner"
21
+ STATIC_ANALYZER = "static_analyzer"
22
+ DYNAMIC_ANALYZER = "dynamic_analyzer"
23
+ NETWORK_SCANNER = "network_scanner"
24
+ WEB_SCANNER = "web_scanner"
25
+
26
+
27
+ @dataclass
28
+ class ToolCapabilities:
29
+ """Capabilities of a security testing tool."""
30
+ # Core capabilities
31
+ supports_web_scanning: bool = False
32
+ supports_network_scanning: bool = False
33
+ supports_api_scanning: bool = False
34
+ supports_mobile_scanning: bool = False
35
+ supports_cloud_scanning: bool = False
36
+ supports_container_scanning: bool = False
37
+
38
+ # AI/ML features
39
+ uses_ai: bool = False
40
+ supports_autonomous_testing: bool = False
41
+ supports_contextual_analysis: bool = False
42
+ supports_attack_chain_building: bool = False
43
+
44
+ # Exploitation
45
+ supports_exploitation: bool = False
46
+ supports_post_exploitation: bool = False
47
+ supports_lateral_movement: bool = False
48
+
49
+ # Reporting
50
+ supports_pdf_reports: bool = False
51
+ supports_json_output: bool = False
52
+ supports_xml_output: bool = False
53
+ supports_sarif: bool = False
54
+ supports_cicd_integration: bool = False
55
+
56
+ # Integration
57
+ has_api: bool = False
58
+ has_webhook_support: bool = False
59
+ has_slack_integration: bool = False
60
+ has_jira_integration: bool = False
61
+
62
+
63
+ @dataclass
64
+ class ToolMetadata:
65
+ """Metadata about a security tool."""
66
+ name: str
67
+ version: str
68
+ vendor: str
69
+ category: ToolCategory
70
+ license_type: str # "open_source", "commercial", "freemium"
71
+ pricing_model: str = ""
72
+ website: str = ""
73
+ documentation_url: str = ""
74
+ github_url: Optional[str] = None
75
+
76
+ # Capabilities
77
+ capabilities: ToolCapabilities = field(default_factory=ToolCapabilities)
78
+
79
+ # Performance characteristics
80
+ avg_scan_time_web: Optional[int] = None # minutes
81
+ avg_scan_time_network: Optional[int] = None
82
+ max_concurrent_scans: Optional[int] = None
83
+ requires_internet: bool = True
84
+
85
+
86
+ @dataclass
87
+ class ToolBenchmarkResult:
88
+ """Benchmark result for a specific tool."""
89
+ tool_metadata: ToolMetadata
90
+ scenario_id: str
91
+ timestamp: datetime = field(default_factory=datetime.utcnow)
92
+
93
+ # Core metrics
94
+ scan_duration_seconds: float = 0.0
95
+ vulnerabilities_found: int = 0
96
+ true_positives: int = 0
97
+ false_positives: int = 0
98
+ false_negatives: int = 0
99
+
100
+ # Coverage metrics
101
+ endpoints_scanned: int = 0
102
+ total_endpoints: int = 0
103
+ parameters_tested: int = 0
104
+ total_parameters: int = 0
105
+
106
+ # Quality metrics
107
+ precision: float = 0.0
108
+ recall: float = 0.0
109
+ f1_score: float = 0.0
110
+ accuracy: float = 0.0
111
+
112
+ # Cost metrics
113
+ total_cost_usd: Optional[float] = None
114
+ tokens_used: Optional[int] = None
115
+
116
+ # Detailed results
117
+ findings: List[Dict[str, Any]] = field(default_factory=list)
118
+ raw_output: Dict[str, Any] = field(default_factory=dict)
119
+ errors: List[str] = field(default_factory=list)
120
+
121
+
122
+ class CompetitorTool(ABC):
123
+ """Abstract base class for competitor tools."""
124
+
125
+ def __init__(self, metadata: ToolMetadata):
126
+ self.metadata = metadata
127
+
128
+ @abstractmethod
129
+ async def run_scan(
130
+ self,
131
+ target: str,
132
+ scenario_config: Dict[str, Any]
133
+ ) -> ToolBenchmarkResult:
134
+ """Run a scan and return results."""
135
+ pass
136
+
137
+ @abstractmethod
138
+ def is_available(self) -> bool:
139
+ """Check if the tool is available/installed."""
140
+ pass
141
+
142
+ def get_metadata(self) -> ToolMetadata:
143
+ """Get tool metadata."""
144
+ return self.metadata
145
+
146
+
147
+ # =============================================================================
148
+ # PENTESTGPT COMPETITOR
149
+ # =============================================================================
150
+
151
+ PENTESTGPT_METADATA = ToolMetadata(
152
+ name="PentestGPT",
153
+ version="2.0",
154
+ vendor="Gelei Deng et al.",
155
+ category=ToolCategory.AI_PENTEST,
156
+ license_type="open_source",
157
+ website="https://github.com/GeleiDeng/PentestGPT",
158
+ github_url="https://github.com/GeleiDeng/PentestGPT",
159
+ capabilities=ToolCapabilities(
160
+ supports_web_scanning=True,
161
+ supports_network_scanning=True,
162
+ uses_ai=True,
163
+ supports_autonomous_testing=True,
164
+ supports_contextual_analysis=True,
165
+ supports_attack_chain_building=True,
166
+ supports_exploitation=True,
167
+ supports_json_output=True
168
+ )
169
+ )
170
+
171
+
172
+ class PentestGPTCompetitor(CompetitorTool):
173
+ """PentestGPT competitor wrapper."""
174
+
175
+ def __init__(self):
176
+ super().__init__(PENTESTGPT_METADATA)
177
+
178
+ def is_available(self) -> bool:
179
+ """Check if PentestGPT is available."""
180
+ # In real implementation, check for pentestgpt command
181
+ return False # Placeholder
182
+
183
+ async def run_scan(
184
+ self,
185
+ target: str,
186
+ scenario_config: Dict[str, Any]
187
+ ) -> ToolBenchmarkResult:
188
+ """Run PentestGPT scan."""
189
+ # Placeholder implementation
190
+ return ToolBenchmarkResult(
191
+ tool_metadata=self.metadata,
192
+ scenario_id=scenario_config.get("scenario_id", "unknown")
193
+ )
194
+
195
+
196
+ # =============================================================================
197
+ # AUTOPENTEST-DRL COMPETITOR
198
+ # =============================================================================
199
+
200
+ AUTOPENTEST_METADATA = ToolMetadata(
201
+ name="AutoPentest-DRL",
202
+ version="1.0",
203
+ vendor="Li et al.",
204
+ category=ToolCategory.AI_PENTEST,
205
+ license_type="open_source",
206
+ website="https://github.com/crond-jaist/AutoPentest-DRL",
207
+ github_url="https://github.com/crond-jaist/AutoPentest-DRL",
208
+ capabilities=ToolCapabilities(
209
+ supports_network_scanning=True,
210
+ uses_ai=True,
211
+ supports_autonomous_testing=True,
212
+ supports_exploitation=True,
213
+ supports_post_exploitation=True,
214
+ supports_json_output=True
215
+ )
216
+ )
217
+
218
+
219
+ class AutoPentestDRLCompetitor(CompetitorTool):
220
+ """AutoPentest-DRL competitor wrapper."""
221
+
222
+ def __init__(self):
223
+ super().__init__(AUTOPENTEST_METADATA)
224
+
225
+ def is_available(self) -> bool:
226
+ """Check if AutoPentest-DRL is available."""
227
+ return False # Placeholder
228
+
229
+ async def run_scan(
230
+ self,
231
+ target: str,
232
+ scenario_config: Dict[str, Any]
233
+ ) -> ToolBenchmarkResult:
234
+ """Run AutoPentest-DRL scan."""
235
+ return ToolBenchmarkResult(
236
+ tool_metadata=self.metadata,
237
+ scenario_id=scenario_config.get("scenario_id", "unknown")
238
+ )
239
+
240
+
241
+ # =============================================================================
242
+ # TRADITIONAL SCANNERS
243
+ # =============================================================================
244
+
245
+ NESSUS_METADATA = ToolMetadata(
246
+ name="Nessus",
247
+ version="10.7",
248
+ vendor="Tenable",
249
+ category=ToolCategory.TRADITIONAL_SCANNER,
250
+ license_type="commercial",
251
+ pricing_model="subscription",
252
+ website="https://www.tenable.com/products/nessus",
253
+ capabilities=ToolCapabilities(
254
+ supports_web_scanning=True,
255
+ supports_network_scanning=True,
256
+ supports_api_scanning=True,
257
+ supports_cloud_scanning=True,
258
+ supports_exploitation=True,
259
+ supports_pdf_reports=True,
260
+ supports_json_output=True,
261
+ supports_xml_output=True,
262
+ has_api=True,
263
+ has_jira_integration=True
264
+ ),
265
+ avg_scan_time_network=30,
266
+ max_concurrent_scans=10
267
+ )
268
+
269
+ OPENVAS_METADATA = ToolMetadata(
270
+ name="OpenVAS",
271
+ version="22.4",
272
+ vendor="Greenbone",
273
+ category=ToolCategory.TRADITIONAL_SCANNER,
274
+ license_type="open_source",
275
+ website="https://www.greenbone.net/openvas",
276
+ github_url="https://github.com/greenbone/openvas",
277
+ capabilities=ToolCapabilities(
278
+ supports_web_scanning=True,
279
+ supports_network_scanning=True,
280
+ supports_api_scanning=True,
281
+ supports_pdf_reports=True,
282
+ supports_xml_output=True,
283
+ supports_sarif=True,
284
+ has_api=True
285
+ ),
286
+ avg_scan_time_network=45,
287
+ max_concurrent_scans=5
288
+ )
289
+
290
+ BURP_SUITE_METADATA = ToolMetadata(
291
+ name="Burp Suite Professional",
292
+ version="2024.1",
293
+ vendor="PortSwigger",
294
+ category=ToolCategory.WEB_SCANNER,
295
+ license_type="commercial",
296
+ pricing_model="perpetual_license",
297
+ website="https://portswigger.net/burp",
298
+ capabilities=ToolCapabilities(
299
+ supports_web_scanning=True,
300
+ supports_api_scanning=True,
301
+ supports_mobile_scanning=True,
302
+ supports_autonomous_testing=True, # With Burp Scanner
303
+ supports_exploitation=True,
304
+ supports_pdf_reports=True,
305
+ supports_xml_output=True,
306
+ supports_sarif=True,
307
+ has_api=True,
308
+ has_jira_integration=True
309
+ ),
310
+ avg_scan_time_web=20,
311
+ max_concurrent_scans=5
312
+ )
313
+
314
+ OWASP_ZAP_METADATA = ToolMetadata(
315
+ name="OWASP ZAP",
316
+ version="2.14",
317
+ vendor="OWASP",
318
+ category=ToolCategory.WEB_SCANNER,
319
+ license_type="open_source",
320
+ website="https://www.zaproxy.org",
321
+ github_url="https://github.com/zaproxy/zaproxy",
322
+ capabilities=ToolCapabilities(
323
+ supports_web_scanning=True,
324
+ supports_api_scanning=True,
325
+ supports_autonomous_testing=True,
326
+ supports_exploitation=True,
327
+ supports_pdf_reports=True,
328
+ supports_json_output=True,
329
+ supports_xml_output=True,
330
+ supports_sarif=True,
331
+ supports_cicd_integration=True,
332
+ has_api=True,
333
+ has_jira_integration=True
334
+ ),
335
+ avg_scan_time_web=25,
336
+ max_concurrent_scans=3
337
+ )
338
+
339
+ NIKTO_METADATA = ToolMetadata(
340
+ name="Nikto",
341
+ version="2.5",
342
+ vendor="CIRT",
343
+ category=ToolCategory.WEB_SCANNER,
344
+ license_type="open_source",
345
+ website="https://cirt.net/Nikto2",
346
+ github_url="https://github.com/sullo/nikto",
347
+ capabilities=ToolCapabilities(
348
+ supports_web_scanning=True,
349
+ supports_json_output=True,
350
+ supports_xml_output=True,
351
+ supports_cicd_integration=True
352
+ ),
353
+ avg_scan_time_web=15,
354
+ requires_internet=False
355
+ )
356
+
357
+ NUCLEI_METADATA = ToolMetadata(
358
+ name="Nuclei",
359
+ version="3.1",
360
+ vendor="ProjectDiscovery",
361
+ category=ToolCategory.WEB_SCANNER,
362
+ license_type="open_source",
363
+ website="https://nuclei.projectdiscovery.io",
364
+ github_url="https://github.com/projectdiscovery/nuclei",
365
+ capabilities=ToolCapabilities(
366
+ supports_web_scanning=True,
367
+ supports_network_scanning=True,
368
+ supports_api_scanning=True,
369
+ supports_cloud_scanning=True,
370
+ supports_exploitation=True,
371
+ supports_json_output=True,
372
+ supports_sarif=True,
373
+ supports_cicd_integration=True,
374
+ has_api=True
375
+ ),
376
+ avg_scan_time_web=5,
377
+ max_concurrent_scans=50,
378
+ requires_internet=False
379
+ )
380
+
381
+ SQLMAP_METADATA = ToolMetadata(
382
+ name="SQLMap",
383
+ version="1.7",
384
+ vendor="SQLMap Project",
385
+ category=ToolCategory.WEB_SCANNER,
386
+ license_type="open_source",
387
+ website="https://sqlmap.org",
388
+ github_url="https://github.com/sqlmapproject/sqlmap",
389
+ capabilities=ToolCapabilities(
390
+ supports_web_scanning=True,
391
+ supports_exploitation=True,
392
+ supports_json_output=True,
393
+ supports_xml_output=True,
394
+ supports_cicd_integration=True
395
+ ),
396
+ avg_scan_time_web=30,
397
+ requires_internet=False
398
+ )
399
+
400
+
401
+ # =============================================================================
402
+ # COMPARISON FRAMEWORK
403
+ # =============================================================================
404
+
405
+ @dataclass
406
+ class ComparisonResult:
407
+ """Result of comparing multiple tools."""
408
+
409
+ # Tool results
410
+ zen_result: ToolBenchmarkResult
411
+ competitor_results: List[ToolBenchmarkResult]
412
+
413
+ # Scenario info
414
+ scenario_id: str
415
+ timestamp: datetime = field(default_factory=datetime.utcnow)
416
+
417
+ # Comparison metrics
418
+ metric_improvements: Dict[str, Dict[str, float]] = field(default_factory=dict)
419
+ rankings: Dict[str, List[str]] = field(default_factory=dict)
420
+
421
+ # Summary
422
+ winner: Optional[str] = None
423
+ statistical_significance: Dict[str, bool] = field(default_factory=dict)
424
+
425
+ def calculate_improvements(self) -> None:
426
+ """Calculate improvement percentages over competitors."""
427
+ metrics_to_compare = [
428
+ "precision", "recall", "f1_score", "accuracy",
429
+ "vulnerabilities_found", "scan_duration_seconds"
430
+ ]
431
+
432
+ for metric in metrics_to_compare:
433
+ zen_value = getattr(self.zen_result, metric, 0)
434
+
435
+ for comp_result in self.competitor_results:
436
+ comp_value = getattr(comp_result, metric, 0)
437
+
438
+ if metric == "scan_duration_seconds":
439
+ # Lower is better for duration
440
+ if comp_value > 0:
441
+ improvement = ((comp_value - zen_value) / comp_value) * 100
442
+ else:
443
+ improvement = 0
444
+ else:
445
+ # Higher is better for other metrics
446
+ if comp_value > 0:
447
+ improvement = ((zen_value - comp_value) / comp_value) * 100
448
+ else:
449
+ improvement = 100 if zen_value > 0 else 0
450
+
451
+ tool_name = comp_result.tool_metadata.name
452
+ if metric not in self.metric_improvements:
453
+ self.metric_improvements[metric] = {}
454
+ self.metric_improvements[metric][tool_name] = improvement
455
+
456
+ def calculate_rankings(self) -> None:
457
+ """Calculate rankings for each metric."""
458
+ all_results = [self.zen_result] + self.competitor_results
459
+
460
+ metrics_to_rank = [
461
+ ("precision", True), # (metric_name, higher_is_better)
462
+ ("recall", True),
463
+ ("f1_score", True),
464
+ ("accuracy", True),
465
+ ("scan_duration_seconds", False),
466
+ ]
467
+
468
+ for metric, higher_is_better in metrics_to_rank:
469
+ sorted_results = sorted(
470
+ all_results,
471
+ key=lambda r: getattr(r, metric, 0),
472
+ reverse=higher_is_better
473
+ )
474
+ self.rankings[metric] = [
475
+ r.tool_metadata.name for r in sorted_results
476
+ ]
477
+
478
+ def determine_winner(self) -> None:
479
+ """Determine overall winner based on F1 score."""
480
+ all_results = [self.zen_result] + self.competitor_results
481
+ winner = max(all_results, key=lambda r: r.f1_score)
482
+ self.winner = winner.tool_metadata.name
483
+
484
+ def to_dict(self) -> Dict[str, Any]:
485
+ """Convert comparison result to dictionary."""
486
+ return {
487
+ "scenario_id": self.scenario_id,
488
+ "timestamp": self.timestamp.isoformat(),
489
+ "zen_result": {
490
+ "tool": self.zen_result.tool_metadata.name,
491
+ "precision": self.zen_result.precision,
492
+ "recall": self.zen_result.recall,
493
+ "f1_score": self.zen_result.f1_score,
494
+ "accuracy": self.zen_result.accuracy,
495
+ "duration_seconds": self.zen_result.scan_duration_seconds,
496
+ "vulnerabilities_found": self.zen_result.vulnerabilities_found
497
+ },
498
+ "competitor_results": [
499
+ {
500
+ "tool": r.tool_metadata.name,
501
+ "precision": r.precision,
502
+ "recall": r.recall,
503
+ "f1_score": r.f1_score,
504
+ "accuracy": r.accuracy,
505
+ "duration_seconds": r.scan_duration_seconds,
506
+ "vulnerabilities_found": r.vulnerabilities_found
507
+ }
508
+ for r in self.competitor_results
509
+ ],
510
+ "improvements": self.metric_improvements,
511
+ "rankings": self.rankings,
512
+ "winner": self.winner
513
+ }
514
+
515
+ def generate_report_markdown(self) -> str:
516
+ """Generate a markdown comparison report."""
517
+ lines = [
518
+ f"# Benchmark Comparison Report",
519
+ f"**Scenario:** {self.scenario_id}",
520
+ f"**Date:** {self.timestamp.strftime('%Y-%m-%d %H:%M:%S')}",
521
+ "",
522
+ "## Results Summary",
523
+ "",
524
+ "| Tool | Precision | Recall | F1-Score | Accuracy | Duration | Vulns Found |",
525
+ "|------|-----------|--------|----------|----------|----------|-------------|"
526
+ ]
527
+
528
+ # Add Zen result
529
+ z = self.zen_result
530
+ lines.append(
531
+ f"| **{z.tool_metadata.name}** | "
532
+ f"{z.precision:.3f} | {z.recall:.3f} | {z.f1_score:.3f} | "
533
+ f"{z.accuracy:.3f} | {z.scan_duration_seconds:.1f}s | {z.vulnerabilities_found} |"
534
+ )
535
+
536
+ # Add competitors
537
+ for r in self.competitor_results:
538
+ lines.append(
539
+ f"| {r.tool_metadata.name} | "
540
+ f"{r.precision:.3f} | {r.recall:.3f} | {r.f1_score:.3f} | "
541
+ f"{r.accuracy:.3f} | {r.scan_duration_seconds:.1f}s | {r.vulnerabilities_found} |"
542
+ )
543
+
544
+ lines.extend([
545
+ "",
546
+ "## Improvements vs Competitors",
547
+ ""
548
+ ])
549
+
550
+ for metric, improvements in self.metric_improvements.items():
551
+ lines.append(f"### {metric.replace('_', ' ').title()}")
552
+ lines.append("")
553
+ for tool, improvement in improvements.items():
554
+ emoji = "✅" if improvement > 0 else "❌"
555
+ lines.append(f"- {emoji} vs {tool}: {improvement:+.1f}%")
556
+ lines.append("")
557
+
558
+ lines.extend([
559
+ "## Rankings",
560
+ ""
561
+ ])
562
+
563
+ for metric, ranking in self.rankings.items():
564
+ lines.append(f"### {metric.replace('_', ' ').title()}")
565
+ for i, tool in enumerate(ranking, 1):
566
+ medal = {1: "🥇", 2: "🥈", 3: "🥉"}.get(i, f"{i}.")
567
+ lines.append(f"{medal} {tool}")
568
+ lines.append("")
569
+
570
+ if self.winner:
571
+ lines.extend([
572
+ "## Winner",
573
+ f"",
574
+ f"🏆 **{self.winner}** achieved the best overall performance.",
575
+ ""
576
+ ])
577
+
578
+ return "\n".join(lines)
579
+
580
+
581
+ class ComparisonFramework:
582
+ """Framework for comparing Zen-AI-Pentest with competitors."""
583
+
584
+ def __init__(self):
585
+ self.competitors: Dict[str, CompetitorTool] = {}
586
+ self.register_default_competitors()
587
+
588
+ def register_default_competitors(self) -> None:
589
+ """Register default competitor tools."""
590
+ self.register_competitor(PentestGPTCompetitor())
591
+ self.register_competitor(AutoPentestDRLCompetitor())
592
+
593
+ def register_competitor(self, tool: CompetitorTool) -> None:
594
+ """Register a competitor tool."""
595
+ self.competitors[tool.metadata.name] = tool
596
+
597
+ def get_available_competitors(self) -> List[str]:
598
+ """Get list of available competitor tools."""
599
+ return [
600
+ name for name, tool in self.competitors.items()
601
+ if tool.is_available()
602
+ ]
603
+
604
+ async def run_comparison(
605
+ self,
606
+ zen_result: ToolBenchmarkResult,
607
+ scenario_config: Dict[str, Any],
608
+ competitors: Optional[List[str]] = None
609
+ ) -> ComparisonResult:
610
+ """Run comparison between Zen-AI-Pentest and competitors."""
611
+
612
+ # Determine which competitors to test
613
+ if competitors is None:
614
+ competitors = self.get_available_competitors()
615
+
616
+ competitor_results = []
617
+ target = scenario_config.get("target_url") or scenario_config.get("target_host")
618
+
619
+ for comp_name in competitors:
620
+ if comp_name in self.competitors:
621
+ tool = self.competitors[comp_name]
622
+ if tool.is_available():
623
+ result = await tool.run_scan(target, scenario_config)
624
+ competitor_results.append(result)
625
+
626
+ # Create comparison result
627
+ comparison = ComparisonResult(
628
+ zen_result=zen_result,
629
+ competitor_results=competitor_results,
630
+ scenario_id=scenario_config.get("scenario_id", "unknown")
631
+ )
632
+
633
+ # Calculate comparisons
634
+ comparison.calculate_improvements()
635
+ comparison.calculate_rankings()
636
+ comparison.determine_winner()
637
+
638
+ return comparison
639
+
640
+ def get_tool_metadata(self, tool_name: str) -> Optional[ToolMetadata]:
641
+ """Get metadata for a tool by name."""
642
+ # Built-in tools
643
+ metadata_map = {
644
+ "Zen-AI-Pentest": None, # Special case
645
+ "PentestGPT": PENTESTGPT_METADATA,
646
+ "AutoPentest-DRL": AUTOPENTEST_METADATA,
647
+ "Nessus": NESSUS_METADATA,
648
+ "OpenVAS": OPENVAS_METADATA,
649
+ "Burp Suite": BURP_SUITE_METADATA,
650
+ "OWASP ZAP": OWASP_ZAP_METADATA,
651
+ "Nikto": NIKTO_METADATA,
652
+ "Nuclei": NUCLEI_METADATA,
653
+ "SQLMap": SQLMAP_METADATA,
654
+ }
655
+ return metadata_map.get(tool_name)
656
+
657
+ def compare_capabilities(
658
+ self,
659
+ tools: List[str]
660
+ ) -> Dict[str, Any]:
661
+ """Compare capabilities of multiple tools."""
662
+ result = {
663
+ "tools": [],
664
+ "capability_matrix": {},
665
+ "unique_capabilities": {},
666
+ "common_capabilities": []
667
+ }
668
+
669
+ all_capabilities = set()
670
+ tool_capabilities = {}
671
+
672
+ for tool_name in tools:
673
+ metadata = self.get_tool_metadata(tool_name)
674
+ if metadata:
675
+ caps = metadata.capabilities
676
+ caps_dict = {
677
+ k: v for k, v in vars(caps).items()
678
+ if not k.startswith('_')
679
+ }
680
+ tool_capabilities[tool_name] = caps_dict
681
+ result["tools"].append({
682
+ "name": tool_name,
683
+ "category": metadata.category.value,
684
+ "license": metadata.license_type,
685
+ "capabilities": caps_dict
686
+ })
687
+ all_capabilities.update(caps_dict.keys())
688
+
689
+ # Build capability matrix
690
+ for cap in all_capabilities:
691
+ result["capability_matrix"][cap] = {
692
+ tool: tool_capabilities.get(tool, {}).get(cap, False)
693
+ for tool in tools
694
+ }
695
+
696
+ return result
697
+
698
+
699
+ def calculate_cohen_d(group1: List[float], group2: List[float]) -> float:
700
+ """
701
+ Calculate Cohen's d effect size.
702
+
703
+ Small effect: ~0.2
704
+ Medium effect: ~0.5
705
+ Large effect: ~0.8+
706
+ """
707
+ if len(group1) < 2 or len(group2) < 2:
708
+ return 0.0
709
+
710
+ mean1 = statistics.mean(group1)
711
+ mean2 = statistics.mean(group2)
712
+
713
+ std1 = statistics.stdev(group1)
714
+ std2 = statistics.stdev(group2)
715
+
716
+ # Pooled standard deviation
717
+ n1, n2 = len(group1), len(group2)
718
+ pooled_std = math.sqrt(
719
+ ((n1 - 1) * std1 ** 2 + (n2 - 1) * std2 ** 2) / (n1 + n2 - 2)
720
+ )
721
+
722
+ if pooled_std == 0:
723
+ return 0.0
724
+
725
+ return (mean1 - mean2) / pooled_std
726
+
727
+
728
+ # Import math here for calculate_cohen_d
729
+ import math