zen-ai-pentest 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. agents/__init__.py +28 -0
  2. agents/agent_base.py +239 -0
  3. agents/agent_orchestrator.py +346 -0
  4. agents/analysis_agent.py +225 -0
  5. agents/cli.py +258 -0
  6. agents/exploit_agent.py +224 -0
  7. agents/integration.py +211 -0
  8. agents/post_scan_agent.py +937 -0
  9. agents/react_agent.py +384 -0
  10. agents/react_agent_enhanced.py +616 -0
  11. agents/react_agent_vm.py +298 -0
  12. agents/research_agent.py +176 -0
  13. api/__init__.py +11 -0
  14. api/auth.py +123 -0
  15. api/main.py +1027 -0
  16. api/schemas.py +357 -0
  17. api/websocket.py +97 -0
  18. autonomous/__init__.py +122 -0
  19. autonomous/agent.py +253 -0
  20. autonomous/agent_loop.py +1370 -0
  21. autonomous/exploit_validator.py +1537 -0
  22. autonomous/memory.py +448 -0
  23. autonomous/react.py +339 -0
  24. autonomous/tool_executor.py +488 -0
  25. backends/__init__.py +16 -0
  26. backends/chatgpt_direct.py +133 -0
  27. backends/claude_direct.py +130 -0
  28. backends/duckduckgo.py +138 -0
  29. backends/openrouter.py +120 -0
  30. benchmarks/__init__.py +149 -0
  31. benchmarks/benchmark_engine.py +904 -0
  32. benchmarks/ci_benchmark.py +785 -0
  33. benchmarks/comparison.py +729 -0
  34. benchmarks/metrics.py +553 -0
  35. benchmarks/run_benchmarks.py +809 -0
  36. ci_cd/__init__.py +2 -0
  37. core/__init__.py +17 -0
  38. core/async_pool.py +282 -0
  39. core/asyncio_fix.py +222 -0
  40. core/cache.py +472 -0
  41. core/container.py +277 -0
  42. core/database.py +114 -0
  43. core/input_validator.py +353 -0
  44. core/models.py +288 -0
  45. core/orchestrator.py +611 -0
  46. core/plugin_manager.py +571 -0
  47. core/rate_limiter.py +405 -0
  48. core/secure_config.py +328 -0
  49. core/shield_integration.py +296 -0
  50. modules/__init__.py +46 -0
  51. modules/cve_database.py +362 -0
  52. modules/exploit_assist.py +330 -0
  53. modules/nuclei_integration.py +480 -0
  54. modules/osint.py +604 -0
  55. modules/protonvpn.py +554 -0
  56. modules/recon.py +165 -0
  57. modules/sql_injection_db.py +826 -0
  58. modules/tool_orchestrator.py +498 -0
  59. modules/vuln_scanner.py +292 -0
  60. modules/wordlist_generator.py +566 -0
  61. risk_engine/__init__.py +99 -0
  62. risk_engine/business_impact.py +267 -0
  63. risk_engine/business_impact_calculator.py +563 -0
  64. risk_engine/cvss.py +156 -0
  65. risk_engine/epss.py +190 -0
  66. risk_engine/example_usage.py +294 -0
  67. risk_engine/false_positive_engine.py +1073 -0
  68. risk_engine/scorer.py +304 -0
  69. web_ui/backend/main.py +471 -0
  70. zen_ai_pentest-2.0.0.dist-info/METADATA +795 -0
  71. zen_ai_pentest-2.0.0.dist-info/RECORD +75 -0
  72. zen_ai_pentest-2.0.0.dist-info/WHEEL +5 -0
  73. zen_ai_pentest-2.0.0.dist-info/entry_points.txt +2 -0
  74. zen_ai_pentest-2.0.0.dist-info/licenses/LICENSE +21 -0
  75. zen_ai_pentest-2.0.0.dist-info/top_level.txt +10 -0
@@ -0,0 +1,1073 @@
1
+ """
2
+ False-Positive-Reduction Engine mit Risk-Priorisierung für das Zen-AI-Pentest Framework.
3
+
4
+ Diese Engine kombiniert Multi-Faktor-Validierung, Multi-LLM-Voting, historische Daten
5
+ und Bayesian-Filtering zur Reduzierung von False Positives und Priorisierung von Risiken.
6
+ """
7
+
8
+ from enum import Enum
9
+ from typing import Dict, List, Optional, Tuple, Any
10
+ from dataclasses import dataclass, field
11
+ from datetime import datetime
12
+ import asyncio
13
+ import logging
14
+ import json
15
+ import hashlib
16
+ from collections import defaultdict
17
+ import math
18
+
19
+ # Business Impact Calculator importieren
20
+ from .business_impact_calculator import (
21
+ BusinessImpactCalculator,
22
+ AssetContext,
23
+ )
24
+
25
+ # Logger konfigurieren
26
+ logger = logging.getLogger(__name__)
27
+
28
+
29
+ class ConfidenceLevel(Enum):
30
+ """Konfidenzlevel für Validierungen."""
31
+ VERY_HIGH = 0.95
32
+ HIGH = 0.9
33
+ MEDIUM = 0.6
34
+ LOW = 0.3
35
+ VERY_LOW = 0.1
36
+
37
+
38
+ class FindingStatus(Enum):
39
+ """Status eines Security Findings."""
40
+ CONFIRMED = "confirmed"
41
+ LIKELY = "likely"
42
+ SUSPECTED = "suspected"
43
+ FALSE_POSITIVE = "false_positive"
44
+ UNDER_REVIEW = "under_review"
45
+ SUPPRESSED = "suppressed"
46
+
47
+
48
+ class VulnerabilityType(Enum):
49
+ """Typen von Schwachstellen."""
50
+ SQL_INJECTION = "sql_injection"
51
+ XSS = "xss"
52
+ AUTHENTICATION_BYPASS = "authentication_bypass"
53
+ AUTHORIZATION_ISSUE = "authorization_issue"
54
+ INFORMATION_DISCLOSURE = "information_disclosure"
55
+ MISCONFIGURATION = "misconfiguration"
56
+ OUTDATED_SOFTWARE = "outdated_software"
57
+ CRYPTOGRAPHIC_WEAKNESS = "cryptographic_weakness"
58
+ INJECTION = "injection"
59
+ BROKEN_ACCESS_CONTROL = "broken_access_control"
60
+ SECURITY_MISCONFIGURATION = "security_misconfiguration"
61
+ SENSITIVE_DATA_EXPOSURE = "sensitive_data_exposure"
62
+ INSUFFICIENT_LOGGING = "insufficient_logging"
63
+ SSRF = "ssrf"
64
+ CSRF = "csrf"
65
+ XXE = "xxe"
66
+ DESERIALIZATION = "deserialization"
67
+ RCE = "rce"
68
+ PATH_TRAVERSAL = "path_traversal"
69
+ BUSINESS_LOGIC = "business_logic"
70
+ UNKNOWN = "unknown"
71
+
72
+
73
+ @dataclass
74
+ class CVSSData:
75
+ """CVSS v3.1 und v4.0 Scoring Daten."""
76
+ version: str = "3.1"
77
+ base_score: float = 0.0
78
+ temporal_score: Optional[float] = None
79
+ environmental_score: Optional[float] = None
80
+ vector_string: str = ""
81
+
82
+ # CVSS v3.1 Metrics
83
+ attack_vector: Optional[str] = None # N, A, L, P
84
+ attack_complexity: Optional[str] = None # L, H
85
+ privileges_required: Optional[str] = None # N, L, H
86
+ user_interaction: Optional[str] = None # N, R
87
+ scope: Optional[str] = None # U, C
88
+ confidentiality_impact: Optional[str] = None # N, L, H
89
+ integrity_impact: Optional[str] = None # N, L, H
90
+ availability_impact: Optional[str] = None # N, L, H
91
+
92
+ # CVSS v4.0 zusätzliche Metrics
93
+ attack_requirements: Optional[str] = None
94
+ exploit_maturity: Optional[str] = None
95
+
96
+ def get_effective_score(self) -> float:
97
+ """Gibt den effektiven CVSS-Score zurück."""
98
+ if self.environmental_score is not None:
99
+ return self.environmental_score
100
+ if self.temporal_score is not None:
101
+ return self.temporal_score
102
+ return self.base_score
103
+
104
+ def get_severity(self) -> str:
105
+ """Ermittelt die Schwere basierend auf dem Score."""
106
+ score = self.get_effective_score()
107
+ if score >= 9.0:
108
+ return "Critical"
109
+ elif score >= 7.0:
110
+ return "High"
111
+ elif score >= 4.0:
112
+ return "Medium"
113
+ elif score >= 0.1:
114
+ return "Low"
115
+ return "None"
116
+
117
+
118
+ @dataclass
119
+ class EPSSData:
120
+ """EPSS (Exploit Prediction Scoring System) Daten."""
121
+ cve_id: str
122
+ epss_score: float # 0-1 Wahrscheinlichkeit der Ausnutzung
123
+ percentile: float # Perzentil-Ranking
124
+ date: datetime = field(default_factory=datetime.now)
125
+
126
+ def is_high_probability(self) -> bool:
127
+ """Prüft ob die Ausnutzungswahrscheinlichkeit hoch ist."""
128
+ return self.epss_score >= 0.5
129
+
130
+ def get_risk_level(self) -> str:
131
+ """Klassifiziert das EPSS-Risiko."""
132
+ if self.epss_score >= 0.7:
133
+ return "CRITICAL"
134
+ elif self.epss_score >= 0.4:
135
+ return "HIGH"
136
+ elif self.epss_score >= 0.1:
137
+ return "MEDIUM"
138
+ return "LOW"
139
+
140
+
141
+ @dataclass
142
+ class RiskFactors:
143
+ """Sammlung aller Risikofaktoren für ein Finding."""
144
+ cvss_data: CVSSData = field(default_factory=CVSSData)
145
+ epss_score: float = 0.0
146
+ business_impact: float = 0.0
147
+ exploitability: float = 0.0
148
+ asset_criticality: float = 0.0
149
+ internet_exposed: bool = False
150
+ data_classification: str = "internal"
151
+ patch_available: bool = False
152
+ exploit_code_available: bool = False
153
+ active_exploitation_observed: bool = False
154
+
155
+ # Kontext-Faktoren
156
+ network_segment: str = "internal"
157
+ authentication_required: bool = True
158
+ user_interaction_required: bool = False
159
+
160
+ def get_weighted_risk_score(self) -> float:
161
+ """Berechnet einen gewichteten Risiko-Score."""
162
+ cvss_weight = self.cvss_data.get_effective_score() / 10.0
163
+
164
+ # Gewichtung der Faktoren
165
+ weights = {
166
+ 'cvss': 0.25,
167
+ 'epss': 0.20,
168
+ 'business_impact': 0.20,
169
+ 'exploitability': 0.15,
170
+ 'asset_criticality': 0.15,
171
+ 'context': 0.05,
172
+ }
173
+
174
+ # Kontext-Multiplikatoren
175
+ context_multiplier = 1.0
176
+ if self.internet_exposed:
177
+ context_multiplier += 0.3
178
+ if self.exploit_code_available:
179
+ context_multiplier += 0.2
180
+ if self.active_exploitation_observed:
181
+ context_multiplier += 0.4
182
+ if not self.patch_available:
183
+ context_multiplier += 0.1
184
+
185
+ score = (
186
+ cvss_weight * weights['cvss'] +
187
+ self.epss_score * weights['epss'] +
188
+ self.business_impact * weights['business_impact'] +
189
+ self.exploitability * weights['exploitability'] +
190
+ self.asset_criticality * weights['asset_criticality']
191
+ ) * min(2.0, context_multiplier)
192
+
193
+ return min(1.0, score)
194
+
195
+
196
+ @dataclass
197
+ class Finding:
198
+ """Repräsentiert ein Security Finding."""
199
+ id: str
200
+ title: str
201
+ description: str
202
+ severity: str
203
+ vulnerability_type: VulnerabilityType = VulnerabilityType.UNKNOWN
204
+ risk_factors: RiskFactors = field(default_factory=RiskFactors)
205
+ raw_evidence: Dict[str, Any] = field(default_factory=dict)
206
+ confidence: float = 0.0
207
+ status: FindingStatus = FindingStatus.SUSPECTED
208
+
209
+ # Metadaten
210
+ created_at: datetime = field(default_factory=datetime.now)
211
+ updated_at: datetime = field(default_factory=datetime.now)
212
+ source: str = ""
213
+ scanner: str = ""
214
+ target: str = ""
215
+ cve_ids: List[str] = field(default_factory=list)
216
+ cwe_ids: List[str] = field(default_factory=list)
217
+
218
+ # Asset-Informationen
219
+ asset_id: Optional[str] = None
220
+ asset_name: Optional[str] = None
221
+
222
+ def get_hash(self) -> str:
223
+ """Erzeugt einen eindeutigen Hash für das Finding."""
224
+ evidence_json = json.dumps(self.raw_evidence, sort_keys=True, default=str)
225
+ content = f"{self.title}:{self.description}:{self.target}:{evidence_json}"
226
+ return hashlib.sha256(content.encode()).hexdigest()[:16]
227
+
228
+ def update_status(self, new_status: FindingStatus, confidence: float):
229
+ """Aktualisiert den Status und die Konfidenz."""
230
+ self.status = new_status
231
+ self.confidence = confidence
232
+ self.updated_at = datetime.now()
233
+
234
+
235
+ @dataclass
236
+ class ValidationResult:
237
+ """Ergebnis einer Validierung durch die FalsePositiveEngine."""
238
+ finding: Finding
239
+ is_false_positive: bool
240
+ confidence: float
241
+ risk_score: float
242
+ priority: int
243
+ reasoning: str
244
+ recommendations: List[str] = field(default_factory=list)
245
+ validation_methods: List[str] = field(default_factory=list)
246
+ llm_votes: Dict[str, bool] = field(default_factory=dict)
247
+
248
+ def to_dict(self) -> Dict[str, Any]:
249
+ """Konvertiert das Ergebnis in ein Dictionary."""
250
+ return {
251
+ "finding_id": self.finding.id,
252
+ "is_false_positive": self.is_false_positive,
253
+ "confidence": round(self.confidence, 3),
254
+ "risk_score": round(self.risk_score, 3),
255
+ "priority": self.priority,
256
+ "reasoning": self.reasoning,
257
+ "recommendations": self.recommendations,
258
+ "validation_methods": self.validation_methods,
259
+ "llm_votes": self.llm_votes,
260
+ }
261
+
262
+
263
+ @dataclass
264
+ class HistoricalFinding:
265
+ """Repräsentiert ein historisches Finding für die FP-Datenbank."""
266
+ finding_hash: str
267
+ is_false_positive: bool
268
+ first_seen: datetime
269
+ last_seen: datetime
270
+ occurrence_count: int = 1
271
+ user_feedback: Optional[bool] = None
272
+ feedback_timestamp: Optional[datetime] = None
273
+ feedback_user: Optional[str] = None
274
+
275
+
276
+ class BayesianFilter:
277
+ """Bayesian-Filter für False-Positive-Erkennung."""
278
+
279
+ def __init__(self):
280
+ self.word_probs_fp: Dict[str, float] = defaultdict(lambda: 0.5)
281
+ self.word_probs_tp: Dict[str, float] = defaultdict(lambda: 0.5)
282
+ self.fp_count = 0
283
+ self.tp_count = 0
284
+ self.min_word_count = 5
285
+
286
+ def train(self, text: str, is_false_positive: bool):
287
+ """Trainiert den Filter mit einem neuen Beispiel."""
288
+ words = self._extract_words(text)
289
+
290
+ if is_false_positive:
291
+ self.fp_count += 1
292
+ for word in words:
293
+ self.word_probs_fp[word] = (self.word_probs_fp[word] * self.fp_count + 1) / (self.fp_count + 2)
294
+ else:
295
+ self.tp_count += 1
296
+ for word in words:
297
+ self.word_probs_tp[word] = (self.word_probs_tp[word] * self.tp_count + 1) / (self.tp_count + 2)
298
+
299
+ def predict(self, text: str) -> Tuple[bool, float]:
300
+ """Klassifiziert einen Text als FP oder nicht."""
301
+ words = self._extract_words(text)
302
+ if not words:
303
+ return False, 0.5
304
+
305
+ # Naive Bayes Berechnung
306
+ fp_prob = math.log(self.fp_count + 1) - math.log(self.fp_count + self.tp_count + 2)
307
+ tp_prob = math.log(self.tp_count + 1) - math.log(self.fp_count + self.tp_count + 2)
308
+
309
+ for word in words:
310
+ fp_prob += math.log(self.word_probs_fp[word] + 0.01)
311
+ tp_prob += math.log(self.word_probs_tp[word] + 0.01)
312
+
313
+ # Normalisierung
314
+ fp_prob = math.exp(fp_prob)
315
+ tp_prob = math.exp(tp_prob)
316
+ total = fp_prob + tp_prob
317
+
318
+ if total == 0:
319
+ return False, 0.5
320
+
321
+ fp_likelihood = fp_prob / total
322
+ return fp_likelihood > 0.5, fp_likelihood
323
+
324
+ def _extract_words(self, text: str) -> List[str]:
325
+ """Extrahiert relevante Wörter aus dem Text."""
326
+ # Einfache Tokenisierung
327
+ words = text.lower().split()
328
+ # Filtere kurze Wörter und häufige Stopwords
329
+ stopwords = {'the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been', 'being'}
330
+ return [w.strip('.,;:!?()[]{}') for w in words if len(w) > 3 and w not in stopwords]
331
+
332
+
333
+ class FalsePositiveDatabase:
334
+ """Datenbank für historische Findings und False-Positive-Muster."""
335
+
336
+ def __init__(self, storage_path: Optional[str] = None):
337
+ self.storage_path = storage_path
338
+ self.findings: Dict[str, HistoricalFinding] = {}
339
+ self.bayesian_filter = BayesianFilter()
340
+ self.similarity_threshold = 0.85
341
+
342
+ if storage_path:
343
+ self._load_from_storage()
344
+
345
+ def add_finding(self, finding: Finding, is_false_positive: bool, user_feedback: Optional[bool] = None):
346
+ """Fügt ein Finding zur Datenbank hinzu."""
347
+ finding_hash = finding.get_hash()
348
+
349
+ if finding_hash in self.findings:
350
+ hist = self.findings[finding_hash]
351
+ hist.last_seen = datetime.now()
352
+ hist.occurrence_count += 1
353
+ if user_feedback is not None:
354
+ hist.user_feedback = user_feedback
355
+ hist.feedback_timestamp = datetime.now()
356
+ else:
357
+ self.findings[finding_hash] = HistoricalFinding(
358
+ finding_hash=finding_hash,
359
+ is_false_positive=is_false_positive,
360
+ first_seen=datetime.now(),
361
+ last_seen=datetime.now(),
362
+ user_feedback=user_feedback
363
+ )
364
+
365
+ # Trainiere Bayesian Filter
366
+ self.bayesian_filter.train(finding.description, is_false_positive)
367
+
368
+ if self.storage_path:
369
+ self._save_to_storage()
370
+
371
+ def check_historical_match(self, finding: Finding) -> Optional[HistoricalFinding]:
372
+ """Prüft ob ein ähnliches Finding bereits existiert."""
373
+ finding_hash = finding.get_hash()
374
+
375
+ # Exakte Übereinstimmung
376
+ if finding_hash in self.findings:
377
+ return self.findings[finding_hash]
378
+
379
+ # Ähnlichkeitssuche
380
+ for hist_hash, hist_finding in self.findings.items():
381
+ similarity = self._calculate_similarity(finding_hash, hist_hash)
382
+ if similarity >= self.similarity_threshold:
383
+ return hist_finding
384
+
385
+ return None
386
+
387
+ def get_fp_likelihood(self, finding: Finding) -> float:
388
+ """Berechnet die Wahrscheinlichkeit für ein False Positive."""
389
+ # Prüfe historische Daten
390
+ historical = self.check_historical_match(finding)
391
+ if historical:
392
+ if historical.user_feedback is not None:
393
+ return 1.0 if historical.user_feedback else 0.0
394
+ if historical.occurrence_count >= 3:
395
+ return 0.8 if historical.is_false_positive else 0.2
396
+
397
+ # Bayesian Filter
398
+ _, fp_likelihood = self.bayesian_filter.predict(finding.description)
399
+ return fp_likelihood
400
+
401
+ def _calculate_similarity(self, hash1: str, hash2: str) -> float:
402
+ """Berechnet die Ähnlichkeit zwischen zwei Findings."""
403
+ # Einfache Jaccard-Ähnlichkeit auf Zeichenebene
404
+ set1 = set(hash1)
405
+ set2 = set(hash2)
406
+ intersection = len(set1 & set2)
407
+ union = len(set1 | set2)
408
+ return intersection / union if union > 0 else 0.0
409
+
410
+ def _load_from_storage(self):
411
+ """Lädt die Datenbank aus dem Speicher."""
412
+ try:
413
+ import json
414
+ with open(self.storage_path, 'r') as f:
415
+ data = json.load(f)
416
+ for item in data.get('findings', []):
417
+ hist = HistoricalFinding(
418
+ finding_hash=item['finding_hash'],
419
+ is_false_positive=item['is_false_positive'],
420
+ first_seen=datetime.fromisoformat(item['first_seen']),
421
+ last_seen=datetime.fromisoformat(item['last_seen']),
422
+ occurrence_count=item['occurrence_count'],
423
+ user_feedback=item.get('user_feedback'),
424
+ feedback_timestamp=(
425
+ datetime.fromisoformat(item['feedback_timestamp'])
426
+ if item.get('feedback_timestamp') else None
427
+ ),
428
+ feedback_user=item.get('feedback_user')
429
+ )
430
+ self.findings[hist.finding_hash] = hist
431
+ except Exception as e:
432
+ logger.warning(f"Konnte FP-Datenbank nicht laden: {e}")
433
+
434
+ def _save_to_storage(self):
435
+ """Speichert die Datenbank."""
436
+ try:
437
+ import json
438
+ data = {
439
+ 'findings': [
440
+ {
441
+ 'finding_hash': h.finding_hash,
442
+ 'is_false_positive': h.is_false_positive,
443
+ 'first_seen': h.first_seen.isoformat(),
444
+ 'last_seen': h.last_seen.isoformat(),
445
+ 'occurrence_count': h.occurrence_count,
446
+ 'user_feedback': h.user_feedback,
447
+ 'feedback_timestamp': h.feedback_timestamp.isoformat() if h.feedback_timestamp else None,
448
+ 'feedback_user': h.feedback_user
449
+ }
450
+ for h in self.findings.values()
451
+ ]
452
+ }
453
+ with open(self.storage_path, 'w') as f:
454
+ json.dump(data, f, indent=2)
455
+ except Exception as e:
456
+ logger.warning(f"Konnte FP-Datenbank nicht speichern: {e}")
457
+
458
+
459
+ class LLMVotingEngine:
460
+ """Engine für Multi-LLM-Voting zur Finding-Validierung."""
461
+
462
+ def __init__(self):
463
+ self.llm_clients: Dict[str, Any] = {}
464
+ self.consensus_threshold = 0.6
465
+ self.min_confidence = 0.5
466
+
467
+ def register_llm(self, name: str, client: Any):
468
+ """Registriert einen LLM-Client für das Voting."""
469
+ self.llm_clients[name] = client
470
+ logger.info(f"LLM '{name}' registriert")
471
+
472
+ async def vote_on_finding(self, finding: Finding) -> Tuple[Dict[str, bool], float]:
473
+ """
474
+ Führt ein Multi-LLM-Voting für ein Finding durch.
475
+
476
+ Returns:
477
+ Tuple aus (Votes pro LLM, Gesamtkonfidenz)
478
+ """
479
+ if not self.llm_clients:
480
+ logger.warning("Keine LLMs registriert, überspringe Voting")
481
+ return {}, 0.5
482
+
483
+ votes = {}
484
+ tasks = []
485
+
486
+ for name, client in self.llm_clients.items():
487
+ task = self._query_llm(name, client, finding)
488
+ tasks.append(task)
489
+
490
+ results = await asyncio.gather(*tasks, return_exceptions=True)
491
+
492
+ for name, result in zip(self.llm_clients.keys(), results):
493
+ if isinstance(result, Exception):
494
+ logger.error(f"LLM {name} Fehler: {result}")
495
+ continue
496
+ votes[name] = result
497
+
498
+ if not votes:
499
+ return {}, 0.0
500
+
501
+ # Berechne Konsens
502
+ fp_votes = sum(1 for v in votes.values() if v)
503
+ total_votes = len(votes)
504
+ fp_ratio = fp_votes / total_votes
505
+
506
+ # Konfidenz basierend auf Einigkeit
507
+ agreement = abs(fp_ratio - 0.5) * 2 # 0 = geteilt, 1 = einstimmig
508
+ confidence = self.min_confidence + (agreement * 0.4)
509
+
510
+ logger.debug(f"LLM Voting: {fp_votes}/{total_votes} FP-Votes, Konfidenz: {confidence:.2f}")
511
+ return votes, confidence
512
+
513
+ async def _query_llm(self, name: str, client: Any, finding: Finding) -> bool:
514
+ """Fragt einen einzelnen LLM ab."""
515
+ prompt = self._build_prompt(finding)
516
+
517
+ try:
518
+ # Hier würde die tatsächliche LLM-Abfrage stattfinden
519
+ # Platzhalter für die Integration
520
+ if hasattr(client, 'analyze'):
521
+ response = await client.analyze(prompt)
522
+ return self._parse_response(response)
523
+ else:
524
+ # Fallback: Simuliere Entscheidung basierend auf Heuristiken
525
+ return self._heuristic_decision(finding)
526
+ except Exception as e:
527
+ logger.error(f"LLM {name} Anfrage fehlgeschlagen: {e}")
528
+ raise
529
+
530
+ def _build_prompt(self, finding: Finding) -> str:
531
+ """Erstellt den Prompt für die LLM-Analyse."""
532
+ return f"""Analyze the following security finding and determine if it is likely a FALSE POSITIVE:
533
+
534
+ Title: {finding.title}
535
+ Description: {finding.description}
536
+ Severity: {finding.severity}
537
+ Vulnerability Type: {finding.vulnerability_type.value}
538
+
539
+ Evidence:
540
+ {json.dumps(finding.raw_evidence, indent=2, default=str)}
541
+
542
+ Consider:
543
+ 1. Is the vulnerability actually exploitable in this context?
544
+ 2. Is the evidence conclusive or ambiguous?
545
+ 3. Are there any mitigating factors?
546
+ 4. Could this be a scanner misconfiguration?
547
+
548
+ Respond with ONLY "TRUE_POSITIVE" or "FALSE_POSITIVE".
549
+ """
550
+
551
+ def _parse_response(self, response: str) -> bool:
552
+ """Parst die LLM-Antwort."""
553
+ response_lower = response.lower().strip()
554
+ if "false_positive" in response_lower or "false positive" in response_lower:
555
+ return True
556
+ return False
557
+
558
+ def _heuristic_decision(self, finding: Finding) -> bool:
559
+ """Heuristische Entscheidung als Fallback."""
560
+ # Prüfe auf typische FP-Indikatoren
561
+ fp_indicators = [
562
+ "informational",
563
+ "note",
564
+ "best practice",
565
+ "recommendation",
566
+ "consider",
567
+ ]
568
+
569
+ description_lower = finding.description.lower()
570
+ indicator_count = sum(1 for ind in fp_indicators if ind in description_lower)
571
+
572
+ return indicator_count >= 2
573
+
574
+
575
+ class FalsePositiveEngine:
576
+ """
577
+ Hauptklasse für die False-Positive-Reduction Engine.
578
+
579
+ Kombiniert Multi-Faktor-Validierung, Multi-LLM-Voting, historische Daten
580
+ und Bayesian-Filtering für präzise Finding-Validierung.
581
+ """
582
+
583
+ def __init__(
584
+ self,
585
+ fp_database_path: Optional[str] = None,
586
+ epss_api_endpoint: Optional[str] = None,
587
+ enable_llm_voting: bool = True
588
+ ):
589
+ """
590
+ Initialisiert die FalsePositiveEngine.
591
+
592
+ Args:
593
+ fp_database_path: Pfad zur FP-Datenbank
594
+ epss_api_endpoint: EPSS API Endpoint
595
+ enable_llm_voting: Ob LLM-Voting aktiviert sein soll
596
+ """
597
+ self.fp_database = FalsePositiveDatabase(fp_database_path)
598
+ self.llm_voting = LLMVotingEngine() if enable_llm_voting else None
599
+ self.business_calculator = BusinessImpactCalculator()
600
+ self.epss_api_endpoint = epss_api_endpoint or "https://api.first.org/data/v1/epss"
601
+
602
+ # Konfiguration
603
+ self.cvss_weight = 0.25
604
+ self.epss_weight = 0.20
605
+ self.business_weight = 0.20
606
+ self.exploitability_weight = 0.15
607
+ self.context_weight = 0.20
608
+
609
+ # Thresholds
610
+ self.fp_confidence_threshold = 0.75
611
+ self.confirmed_confidence_threshold = 0.85
612
+
613
+ logger.info("FalsePositiveEngine initialisiert")
614
+
615
+ async def validate_finding(self, finding: Finding) -> ValidationResult:
616
+ """
617
+ Validiert ein Finding und bestimmt ob es ein False Positive ist.
618
+
619
+ Args:
620
+ finding: Das zu validierende Finding
621
+
622
+ Returns:
623
+ ValidationResult mit Validierungsdetails
624
+ """
625
+ validation_methods = []
626
+
627
+ # 1. Historische Validierung
628
+ historical_result = self._check_historical_data(finding)
629
+ validation_methods.append("historical")
630
+
631
+ # 2. Multi-LLM Voting
632
+ llm_votes = {}
633
+ llm_confidence = 0.5
634
+ if self.llm_voting:
635
+ llm_votes, llm_confidence = await self.multi_llm_voting(finding)
636
+ validation_methods.append("llm_voting")
637
+
638
+ # 3. EPSS Prüfung
639
+ if finding.cve_ids:
640
+ for cve in finding.cve_ids:
641
+ epss_score = await self.check_epss(cve)
642
+ finding.risk_factors.epss_score = max(finding.risk_factors.epss_score, epss_score)
643
+ validation_methods.append("epss")
644
+
645
+ # 4. Risiko-Score Berechnung
646
+ risk_score = self.calculate_risk_score(finding.risk_factors)
647
+ validation_methods.append("risk_scoring")
648
+
649
+ # 5. Kontext-Analyse
650
+ context_score = self._analyze_context(finding)
651
+ validation_methods.append("context_analysis")
652
+
653
+ # Entscheidungsfindung
654
+ is_fp, confidence, reasoning = self._make_decision(
655
+ finding, historical_result, llm_votes, llm_confidence,
656
+ risk_score, context_score
657
+ )
658
+
659
+ # Status aktualisieren
660
+ if is_fp and confidence >= self.fp_confidence_threshold:
661
+ finding.update_status(FindingStatus.FALSE_POSITIVE, confidence)
662
+ elif confidence >= self.confirmed_confidence_threshold:
663
+ finding.update_status(FindingStatus.CONFIRMED, confidence)
664
+ elif confidence >= 0.6:
665
+ finding.update_status(FindingStatus.LIKELY, confidence)
666
+ else:
667
+ finding.update_status(FindingStatus.SUSPECTED, confidence)
668
+
669
+ # Priorität berechnen (1 = höchste)
670
+ priority = self._calculate_priority(finding, risk_score, is_fp)
671
+
672
+ # Empfehlungen generieren
673
+ recommendations = self._generate_recommendations(finding, is_fp, risk_score)
674
+
675
+ result = ValidationResult(
676
+ finding=finding,
677
+ is_false_positive=is_fp,
678
+ confidence=confidence,
679
+ risk_score=risk_score,
680
+ priority=priority,
681
+ reasoning=reasoning,
682
+ recommendations=recommendations,
683
+ validation_methods=validation_methods,
684
+ llm_votes=llm_votes
685
+ )
686
+
687
+ logger.info(f"Finding {finding.id} validiert: FP={is_fp}, Confidence={confidence:.2f}, Priority={priority}")
688
+ return result
689
+
690
+ async def multi_llm_voting(self, finding: Finding) -> Tuple[Dict[str, bool], float]:
691
+ """
692
+ Führt ein Multi-LLM-Voting durch.
693
+
694
+ Args:
695
+ finding: Das zu bewertende Finding
696
+
697
+ Returns:
698
+ Tuple aus (Votes pro LLM, Gesamtkonfidenz)
699
+ """
700
+ if not self.llm_voting:
701
+ return {}, 0.5
702
+ return await self.llm_voting.vote_on_finding(finding)
703
+
704
+ def calculate_risk_score(self, factors: RiskFactors) -> float:
705
+ """
706
+ Berechnet den Risiko-Score basierend auf allen Faktoren.
707
+
708
+ Formel: Risk = f(CVSS, EPSS, BusinessImpact, Exploitability, AssetValue)
709
+
710
+ Args:
711
+ factors: Die Risikofaktoren
712
+
713
+ Returns:
714
+ Risiko-Score zwischen 0 und 1
715
+ """
716
+ # Normalisierte Komponenten
717
+ cvss_component = (factors.cvss_data.get_effective_score() / 10.0) * self.cvss_weight
718
+ epss_component = factors.epss_score * self.epss_weight
719
+ business_component = factors.business_impact * self.business_weight
720
+ exploitability_component = factors.exploitability * self.exploitability_weight
721
+
722
+ # Kontext-Komponente
723
+ context_multiplier = 1.0
724
+ if factors.internet_exposed:
725
+ context_multiplier += 0.4
726
+ if factors.exploit_code_available:
727
+ context_multiplier += 0.3
728
+ if factors.active_exploitation_observed:
729
+ context_multiplier += 0.5
730
+ if not factors.patch_available:
731
+ context_multiplier += 0.2
732
+ if not factors.authentication_required:
733
+ context_multiplier += 0.2
734
+ if not factors.user_interaction_required:
735
+ context_multiplier += 0.1
736
+
737
+ context_component = (context_multiplier - 1.0) * self.context_weight
738
+
739
+ # Gesamt-Score
740
+ base_score = cvss_component + epss_component + business_component + exploitability_component
741
+ risk_score = min(1.0, base_score * (1 + context_component))
742
+
743
+ return risk_score
744
+
745
+ async def check_epss(self, cve_id: str) -> float:
746
+ """
747
+ Ruft den EPSS-Score für eine CVE ab.
748
+
749
+ Args:
750
+ cve_id: Die CVE-ID
751
+
752
+ Returns:
753
+ EPSS-Score zwischen 0 und 1
754
+ """
755
+ try:
756
+ # In einer echten Implementierung: API-Call zu FIRST.org EPSS
757
+ # Hier: Simulierte Werte basierend auf CVE-Muster
758
+
759
+ # Extrahiere Jahr und Nummer aus CVE-ID
760
+ if not cve_id.startswith("CVE-"):
761
+ return 0.0
762
+
763
+ parts = cve_id.split("-")
764
+ if len(parts) < 3:
765
+ return 0.0
766
+
767
+ year = int(parts[1])
768
+
769
+ # Neuere CVEs haben tendenziell höhere EPSS-Scores
770
+ current_year = datetime.now().year
771
+ age_factor = max(0, 1 - (current_year - year) * 0.1)
772
+
773
+ # Simuliere EPSS-Score mit etwas Zufall
774
+ import random
775
+ random.seed(cve_id)
776
+ base_score = random.uniform(0.05, 0.8)
777
+
778
+ return min(1.0, base_score * (0.5 + age_factor * 0.5))
779
+
780
+ except Exception as e:
781
+ logger.error(f"EPSS-Abfrage für {cve_id} fehlgeschlagen: {e}")
782
+ return 0.0
783
+
784
+ def prioritize_findings(self, findings: List[Finding]) -> List[Finding]:
785
+ """
786
+ Priorisiert eine Liste von Findings nach Risiko.
787
+
788
+ Args:
789
+ findings: Liste der zu priorisierenden Findings
790
+
791
+ Returns:
792
+ Nach Priorität sortierte Liste (höchste zuerst)
793
+ """
794
+ def get_priority_score(finding: Finding) -> float:
795
+ # Berechne Risiko-Score
796
+ risk = self.calculate_risk_score(finding.risk_factors)
797
+
798
+ # Multiplikatoren
799
+ multiplier = 1.0
800
+ if finding.status == FindingStatus.CONFIRMED:
801
+ multiplier += 0.2
802
+ if finding.status == FindingStatus.FALSE_POSITIVE:
803
+ multiplier = 0.0
804
+
805
+ # Konfidenz berücksichtigen
806
+ confidence_factor = finding.confidence
807
+
808
+ return risk * multiplier * confidence_factor
809
+
810
+ # Sortiere nach Prioritäts-Score (absteigend)
811
+ return sorted(findings, key=get_priority_score, reverse=True)
812
+
813
+ async def learn_from_feedback(self, finding_id: str, is_fp: bool, user: Optional[str] = None):
814
+ """
815
+ Lernen aus Benutzer-Feedback zur Verbesserung der Engine.
816
+
817
+ Args:
818
+ finding_id: ID des Findings
819
+ is_fp: Ob es ein False Positive war
820
+ user: Optionaler Benutzername
821
+ """
822
+ # Finde das Finding (in einer echten Implementierung aus DB laden)
823
+ logger.info(f"Feedback erhalten für {finding_id}: is_fp={is_fp}, user={user}")
824
+
825
+ # Aktualisiere FP-Datenbank
826
+ # Hinweis: Hier müsste das tatsächliche Finding-Objekt geladen werden
827
+ # self.fp_database.add_finding(finding, is_fp, user_feedback=is_fp)
828
+
829
+ # Trainiere Bayesian Filter
830
+ # self.fp_database.bayesian_filter.train(finding.description, is_fp)
831
+
832
+ logger.info(f"Engine aus Feedback für {finding_id} aktualisiert")
833
+
834
+ def _check_historical_data(self, finding: Finding) -> Optional[HistoricalFinding]:
835
+ """Prüft historische Daten für ein Finding."""
836
+ return self.fp_database.check_historical_match(finding)
837
+
838
+ def _analyze_context(self, finding: Finding) -> float:
839
+ """Analysiert den Kontext und gibt einen Score zurück."""
840
+ score = 0.5
841
+
842
+ # Asset-Kritikalität
843
+ score += finding.risk_factors.asset_criticality * 0.3
844
+
845
+ # Exposition
846
+ if finding.risk_factors.internet_exposed:
847
+ score += 0.2
848
+
849
+ # Datenklassifizierung
850
+ data_weights = {"public": 0.0, "internal": 0.1, "confidential": 0.2, "restricted": 0.3}
851
+ score += data_weights.get(finding.risk_factors.data_classification, 0.1)
852
+
853
+ return min(1.0, score)
854
+
855
+ def _make_decision(
856
+ self,
857
+ finding: Finding,
858
+ historical: Optional[HistoricalFinding],
859
+ llm_votes: Dict[str, bool],
860
+ llm_confidence: float,
861
+ risk_score: float,
862
+ context_score: float
863
+ ) -> Tuple[bool, float, str]:
864
+ """Trifft die finale Entscheidung über FP-Status."""
865
+
866
+ # Historische Daten haben höchste Priorität
867
+ if historical and historical.user_feedback is not None:
868
+ return (
869
+ historical.user_feedback,
870
+ 0.95,
871
+ f"Basierend auf historischem Feedback ({historical.occurrence_count} Vorkommen)"
872
+ )
873
+
874
+ # Kombinierte Bewertung
875
+ fp_indicators = 0
876
+ total_indicators = 0
877
+
878
+ # Historische FP-Wahrscheinlichkeit
879
+ if historical:
880
+ fp_prob = self.fp_database.get_fp_likelihood(finding)
881
+ if fp_prob > 0.7:
882
+ fp_indicators += 1
883
+ total_indicators += 1
884
+
885
+ # LLM Voting
886
+ if llm_votes:
887
+ fp_votes = sum(1 for v in llm_votes.values() if v)
888
+ if fp_votes > len(llm_votes) / 2:
889
+ fp_indicators += 1
890
+ total_indicators += 1
891
+
892
+ # Risiko-Score (niedriges Risiko = höhere FP-Wahrscheinlichkeit)
893
+ if risk_score < 0.2:
894
+ fp_indicators += 1
895
+ total_indicators += 1
896
+
897
+ # Typische FP-Muster
898
+ fp_patterns = [
899
+ "informational",
900
+ "low severity",
901
+ "best practice",
902
+ "consider implementing",
903
+ "might be",
904
+ "possibly",
905
+ "could potentially"
906
+ ]
907
+ desc_lower = finding.description.lower()
908
+ pattern_matches = sum(1 for p in fp_patterns if p in desc_lower)
909
+ if pattern_matches >= 2:
910
+ fp_indicators += 1
911
+ total_indicators += 1
912
+
913
+ # Entscheidung
914
+ fp_ratio = fp_indicators / total_indicators if total_indicators > 0 else 0
915
+ is_fp = fp_ratio >= 0.5
916
+
917
+ # Konfidenzberechnung
918
+ confidence = 0.5 + (abs(fp_ratio - 0.5) * 2) * 0.4
919
+ if historical:
920
+ confidence += 0.1
921
+ if llm_votes:
922
+ confidence += 0.05
923
+ confidence = min(0.95, confidence)
924
+
925
+ # Reasoning
926
+ reasons = []
927
+ if historical:
928
+ reasons.append(f"Historisch: {historical.occurrence_count}x gesehen")
929
+ if llm_votes:
930
+ fp_count = sum(1 for v in llm_votes.values() if v)
931
+ reasons.append(f"LLM Voting: {fp_count}/{len(llm_votes)} FP-Votes")
932
+ if pattern_matches >= 2:
933
+ reasons.append(f"FP-Patterns gefunden: {pattern_matches}")
934
+ reasons.append(f"Risiko-Score: {risk_score:.2f}")
935
+
936
+ reasoning = "; ".join(reasons) if reasons else "Basierend auf kombinierter Analyse"
937
+
938
+ return is_fp, confidence, reasoning
939
+
940
+ def _calculate_priority(self, finding: Finding, risk_score: float, is_fp: bool) -> int:
941
+ """Berechnet die Priorität (1 = höchste)."""
942
+ if is_fp:
943
+ return 999 # Niedrigste Priorität für FPs
944
+
945
+ base_priority = 1
946
+
947
+ # Risiko-basiert
948
+ if risk_score >= 0.8:
949
+ base_priority = 1
950
+ elif risk_score >= 0.6:
951
+ base_priority = 2
952
+ elif risk_score >= 0.4:
953
+ base_priority = 3
954
+ elif risk_score >= 0.2:
955
+ base_priority = 4
956
+ else:
957
+ base_priority = 5
958
+
959
+ # Anpassungen basierend auf Faktoren
960
+ if finding.risk_factors.internet_exposed:
961
+ base_priority = max(1, base_priority - 1)
962
+ if finding.risk_factors.active_exploitation_observed:
963
+ base_priority = 1
964
+ if not finding.risk_factors.patch_available:
965
+ base_priority = max(1, base_priority - 1)
966
+
967
+ return base_priority
968
+
969
+ def _generate_recommendations(
970
+ self,
971
+ finding: Finding,
972
+ is_fp: bool,
973
+ risk_score: float
974
+ ) -> List[str]:
975
+ """Generiert Empfehlungen basierend auf dem Ergebnis."""
976
+ recommendations = []
977
+
978
+ if is_fp:
979
+ recommendations.append("Als False Positive markieren und ausblenden")
980
+ recommendations.append("Scanner-Konfiguration überprüfen")
981
+ if finding.confidence < 0.9:
982
+ recommendations.append("Manuelle Überprüfung empfohlen")
983
+ else:
984
+ if risk_score >= 0.8:
985
+ recommendations.append("SOFORTIGE BEHEBUNG ERFORDERLICH")
986
+ recommendations.append("Sicherheitsteam benachrichtigen")
987
+ elif risk_score >= 0.6:
988
+ recommendations.append("Hohe Priorität für Behebung")
989
+ recommendations.append("Innerhalb von 7 Tagen beheben")
990
+ elif risk_score >= 0.4:
991
+ recommendations.append("Mittlere Priorität")
992
+ recommendations.append("Innerhalb von 30 Tagen beheben")
993
+ else:
994
+ recommendations.append("Niedrige Priorität")
995
+ recommendations.append("In regelmäßigem Patch-Zyklus beheben")
996
+
997
+ if finding.risk_factors.internet_exposed:
998
+ recommendations.append("Internet-Exposition reduzieren oder WAF implementieren")
999
+
1000
+ if not finding.risk_factors.patch_available:
1001
+ recommendations.append("Kompensierende Kontrollen implementieren")
1002
+
1003
+ return recommendations
1004
+
1005
+ def register_llm(self, name: str, client: Any):
1006
+ """Registriert einen LLM-Client für das Voting."""
1007
+ if self.llm_voting:
1008
+ self.llm_voting.register_llm(name, client)
1009
+
1010
+
1011
+ # Hilfsfunktionen für einfache Nutzung
1012
+
1013
+ def create_finding_from_scan_result(
1014
+ scan_result: Dict[str, Any],
1015
+ asset_context: Optional[AssetContext] = None
1016
+ ) -> Finding:
1017
+ """
1018
+ Erstellt ein Finding-Objekt aus einem Scan-Ergebnis.
1019
+
1020
+ Args:
1021
+ scan_result: Das Scan-Ergebnis als Dictionary
1022
+ asset_context: Optionaler Asset-Kontext
1023
+
1024
+ Returns:
1025
+ Finding-Objekt
1026
+ """
1027
+ # CVSS-Daten extrahieren
1028
+ cvss_data = CVSSData()
1029
+ if "cvss" in scan_result:
1030
+ cvss_info = scan_result["cvss"]
1031
+ cvss_data.base_score = cvss_info.get("base_score", 0.0)
1032
+ cvss_data.vector_string = cvss_info.get("vector_string", "")
1033
+ cvss_data.version = cvss_info.get("version", "3.1")
1034
+
1035
+ # Risikofaktoren
1036
+ risk_factors = RiskFactors(
1037
+ cvss_data=cvss_data,
1038
+ epss_score=scan_result.get("epss_score", 0.0),
1039
+ business_impact=scan_result.get("business_impact", 0.0),
1040
+ exploitability=scan_result.get("exploitability", 0.0),
1041
+ internet_exposed=scan_result.get("internet_exposed", False),
1042
+ data_classification=scan_result.get("data_classification", "internal"),
1043
+ patch_available=scan_result.get("patch_available", False),
1044
+ )
1045
+
1046
+ if asset_context:
1047
+ risk_factors.asset_criticality = asset_context.criticality.weight
1048
+
1049
+ # Vulnerability Type bestimmen
1050
+ vuln_type_str = scan_result.get("type", "unknown").lower()
1051
+ try:
1052
+ vulnerability_type = VulnerabilityType(vuln_type_str)
1053
+ except ValueError:
1054
+ vulnerability_type = VulnerabilityType.UNKNOWN
1055
+
1056
+ finding = Finding(
1057
+ id=scan_result.get("id", ""),
1058
+ title=scan_result.get("title", ""),
1059
+ description=scan_result.get("description", ""),
1060
+ severity=scan_result.get("severity", "info"),
1061
+ vulnerability_type=vulnerability_type,
1062
+ risk_factors=risk_factors,
1063
+ raw_evidence=scan_result.get("evidence", {}),
1064
+ source=scan_result.get("source", ""),
1065
+ scanner=scan_result.get("scanner", ""),
1066
+ target=scan_result.get("target", ""),
1067
+ cve_ids=scan_result.get("cve_ids", []),
1068
+ cwe_ids=scan_result.get("cwe_ids", []),
1069
+ asset_id=scan_result.get("asset_id"),
1070
+ asset_name=scan_result.get("asset_name"),
1071
+ )
1072
+
1073
+ return finding