zen-ai-pentest 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agents/__init__.py +28 -0
- agents/agent_base.py +239 -0
- agents/agent_orchestrator.py +346 -0
- agents/analysis_agent.py +225 -0
- agents/cli.py +258 -0
- agents/exploit_agent.py +224 -0
- agents/integration.py +211 -0
- agents/post_scan_agent.py +937 -0
- agents/react_agent.py +384 -0
- agents/react_agent_enhanced.py +616 -0
- agents/react_agent_vm.py +298 -0
- agents/research_agent.py +176 -0
- api/__init__.py +11 -0
- api/auth.py +123 -0
- api/main.py +1027 -0
- api/schemas.py +357 -0
- api/websocket.py +97 -0
- autonomous/__init__.py +122 -0
- autonomous/agent.py +253 -0
- autonomous/agent_loop.py +1370 -0
- autonomous/exploit_validator.py +1537 -0
- autonomous/memory.py +448 -0
- autonomous/react.py +339 -0
- autonomous/tool_executor.py +488 -0
- backends/__init__.py +16 -0
- backends/chatgpt_direct.py +133 -0
- backends/claude_direct.py +130 -0
- backends/duckduckgo.py +138 -0
- backends/openrouter.py +120 -0
- benchmarks/__init__.py +149 -0
- benchmarks/benchmark_engine.py +904 -0
- benchmarks/ci_benchmark.py +785 -0
- benchmarks/comparison.py +729 -0
- benchmarks/metrics.py +553 -0
- benchmarks/run_benchmarks.py +809 -0
- ci_cd/__init__.py +2 -0
- core/__init__.py +17 -0
- core/async_pool.py +282 -0
- core/asyncio_fix.py +222 -0
- core/cache.py +472 -0
- core/container.py +277 -0
- core/database.py +114 -0
- core/input_validator.py +353 -0
- core/models.py +288 -0
- core/orchestrator.py +611 -0
- core/plugin_manager.py +571 -0
- core/rate_limiter.py +405 -0
- core/secure_config.py +328 -0
- core/shield_integration.py +296 -0
- modules/__init__.py +46 -0
- modules/cve_database.py +362 -0
- modules/exploit_assist.py +330 -0
- modules/nuclei_integration.py +480 -0
- modules/osint.py +604 -0
- modules/protonvpn.py +554 -0
- modules/recon.py +165 -0
- modules/sql_injection_db.py +826 -0
- modules/tool_orchestrator.py +498 -0
- modules/vuln_scanner.py +292 -0
- modules/wordlist_generator.py +566 -0
- risk_engine/__init__.py +99 -0
- risk_engine/business_impact.py +267 -0
- risk_engine/business_impact_calculator.py +563 -0
- risk_engine/cvss.py +156 -0
- risk_engine/epss.py +190 -0
- risk_engine/example_usage.py +294 -0
- risk_engine/false_positive_engine.py +1073 -0
- risk_engine/scorer.py +304 -0
- web_ui/backend/main.py +471 -0
- zen_ai_pentest-2.0.0.dist-info/METADATA +795 -0
- zen_ai_pentest-2.0.0.dist-info/RECORD +75 -0
- zen_ai_pentest-2.0.0.dist-info/WHEEL +5 -0
- zen_ai_pentest-2.0.0.dist-info/entry_points.txt +2 -0
- zen_ai_pentest-2.0.0.dist-info/licenses/LICENSE +21 -0
- zen_ai_pentest-2.0.0.dist-info/top_level.txt +10 -0
modules/osint.py
ADDED
|
@@ -0,0 +1,604 @@
|
|
|
1
|
+
"""
|
|
2
|
+
OSINT Module - Open Source Intelligence Gathering
|
|
3
|
+
|
|
4
|
+
Comprehensive OSINT automation for penetration testing:
|
|
5
|
+
- Email harvesting
|
|
6
|
+
- Domain reconnaissance
|
|
7
|
+
- Social media intelligence
|
|
8
|
+
- Network discovery
|
|
9
|
+
- Data breach lookup
|
|
10
|
+
- Metadata extraction
|
|
11
|
+
|
|
12
|
+
Author: SHAdd0WTAka + Kimi AI
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import asyncio
|
|
16
|
+
import json
|
|
17
|
+
import logging
|
|
18
|
+
import re
|
|
19
|
+
import xml.etree.ElementTree as ET
|
|
20
|
+
from dataclasses import dataclass, field
|
|
21
|
+
from datetime import datetime
|
|
22
|
+
from typing import Any, Dict, List, Optional, Set
|
|
23
|
+
from urllib.parse import quote, urlparse
|
|
24
|
+
|
|
25
|
+
import aiohttp
|
|
26
|
+
|
|
27
|
+
logger = logging.getLogger("ZenAI.OSINT")
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass
|
|
31
|
+
class OSINTResult:
|
|
32
|
+
"""Container for OSINT findings"""
|
|
33
|
+
|
|
34
|
+
source: str
|
|
35
|
+
data_type: str # email, domain, ip, username, etc.
|
|
36
|
+
value: str
|
|
37
|
+
confidence: int = 5 # 1-10
|
|
38
|
+
metadata: Dict[str, Any] = field(default_factory=dict)
|
|
39
|
+
timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
|
|
40
|
+
|
|
41
|
+
def to_dict(self) -> Dict:
|
|
42
|
+
return {
|
|
43
|
+
"source": self.source,
|
|
44
|
+
"data_type": self.data_type,
|
|
45
|
+
"value": self.value,
|
|
46
|
+
"confidence": self.confidence,
|
|
47
|
+
"metadata": self.metadata,
|
|
48
|
+
"timestamp": self.timestamp,
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@dataclass
|
|
53
|
+
class DomainInfo:
|
|
54
|
+
"""Domain reconnaissance results"""
|
|
55
|
+
|
|
56
|
+
domain: str
|
|
57
|
+
registrar: Optional[str] = None
|
|
58
|
+
creation_date: Optional[str] = None
|
|
59
|
+
expiration_date: Optional[str] = None
|
|
60
|
+
name_servers: List[str] = field(default_factory=list)
|
|
61
|
+
subdomains: List[str] = field(default_factory=list)
|
|
62
|
+
ip_addresses: List[str] = field(default_factory=list)
|
|
63
|
+
mx_records: List[str] = field(default_factory=list)
|
|
64
|
+
txt_records: List[str] = field(default_factory=list)
|
|
65
|
+
technologies: List[str] = field(default_factory=list)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
@dataclass
|
|
69
|
+
class EmailProfile:
|
|
70
|
+
"""Email address intelligence"""
|
|
71
|
+
|
|
72
|
+
email: str
|
|
73
|
+
valid_format: bool = False
|
|
74
|
+
deliverable: Optional[bool] = None
|
|
75
|
+
breached: bool = False
|
|
76
|
+
breach_sources: List[str] = field(default_factory=list)
|
|
77
|
+
associated_domains: List[str] = field(default_factory=list)
|
|
78
|
+
social_profiles: Dict[str, str] = field(default_factory=dict)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class OSINTModule:
|
|
82
|
+
"""
|
|
83
|
+
Open Source Intelligence gathering module
|
|
84
|
+
|
|
85
|
+
Features:
|
|
86
|
+
- Email harvesting from multiple sources
|
|
87
|
+
- Domain enumeration and reconnaissance
|
|
88
|
+
- Social media intelligence
|
|
89
|
+
- Network discovery
|
|
90
|
+
- Data breach monitoring
|
|
91
|
+
- Metadata extraction
|
|
92
|
+
"""
|
|
93
|
+
|
|
94
|
+
def __init__(self, session: Optional[aiohttp.ClientSession] = None):
|
|
95
|
+
self.session = session
|
|
96
|
+
self.results: List[OSINTResult] = []
|
|
97
|
+
self.semaphore = asyncio.Semaphore(10) # Rate limiting
|
|
98
|
+
|
|
99
|
+
# Common user agents for rotation
|
|
100
|
+
self.user_agents = [
|
|
101
|
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
|
|
102
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
|
|
103
|
+
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36",
|
|
104
|
+
]
|
|
105
|
+
|
|
106
|
+
async def __aenter__(self):
|
|
107
|
+
if not self.session:
|
|
108
|
+
self.session = aiohttp.ClientSession(
|
|
109
|
+
headers={"User-Agent": self.user_agents[0]},
|
|
110
|
+
timeout=aiohttp.ClientTimeout(total=30),
|
|
111
|
+
)
|
|
112
|
+
return self
|
|
113
|
+
|
|
114
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
115
|
+
if self.session:
|
|
116
|
+
await self.session.close()
|
|
117
|
+
|
|
118
|
+
async def _fetch(self, url: str, headers: Optional[Dict] = None) -> Optional[str]:
|
|
119
|
+
"""Make HTTP request with rate limiting"""
|
|
120
|
+
async with self.semaphore:
|
|
121
|
+
try:
|
|
122
|
+
default_headers = {
|
|
123
|
+
"User-Agent": self.user_agents[hash(url) % len(self.user_agents)],
|
|
124
|
+
"Accept": "text/html,application/json",
|
|
125
|
+
"Accept-Language": "en-US,en;q=0.9",
|
|
126
|
+
}
|
|
127
|
+
if headers:
|
|
128
|
+
default_headers.update(headers)
|
|
129
|
+
|
|
130
|
+
async with self.session.get(
|
|
131
|
+
url, headers=default_headers, ssl=False
|
|
132
|
+
) as resp:
|
|
133
|
+
if resp.status == 200:
|
|
134
|
+
return await resp.text()
|
|
135
|
+
except Exception as e:
|
|
136
|
+
logger.debug(f"Fetch error for {url}: {e}")
|
|
137
|
+
return None
|
|
138
|
+
|
|
139
|
+
# =================================================================
|
|
140
|
+
# Email Harvesting
|
|
141
|
+
# =================================================================
|
|
142
|
+
|
|
143
|
+
async def harvest_emails(
|
|
144
|
+
self, domain: str, sources: Optional[List[str]] = None
|
|
145
|
+
) -> List[OSINTResult]:
|
|
146
|
+
"""
|
|
147
|
+
Harvest email addresses from multiple sources
|
|
148
|
+
|
|
149
|
+
Sources: google, bing, yahoo, baidu, linkedin, github, pgp
|
|
150
|
+
"""
|
|
151
|
+
if not sources:
|
|
152
|
+
sources = ["google", "bing", "yahoo", "pgp"]
|
|
153
|
+
|
|
154
|
+
logger.info(f"Harvesting emails for domain: {domain}")
|
|
155
|
+
|
|
156
|
+
tasks = []
|
|
157
|
+
for source in sources:
|
|
158
|
+
if source == "google":
|
|
159
|
+
tasks.append(self._google_email_search(domain))
|
|
160
|
+
elif source == "bing":
|
|
161
|
+
tasks.append(self._bing_email_search(domain))
|
|
162
|
+
elif source == "yahoo":
|
|
163
|
+
tasks.append(self._yahoo_email_search(domain))
|
|
164
|
+
elif source == "pgp":
|
|
165
|
+
tasks.append(self._pgp_key_search(domain))
|
|
166
|
+
elif source == "github":
|
|
167
|
+
tasks.append(self._github_email_search(domain))
|
|
168
|
+
|
|
169
|
+
results = await asyncio.gather(*tasks, return_exceptions=True)
|
|
170
|
+
|
|
171
|
+
emails = set()
|
|
172
|
+
for result in results:
|
|
173
|
+
if isinstance(result, list):
|
|
174
|
+
for item in result:
|
|
175
|
+
if isinstance(item, OSINTResult):
|
|
176
|
+
emails.add(item.value)
|
|
177
|
+
self.results.append(item)
|
|
178
|
+
|
|
179
|
+
logger.info(f"Found {len(emails)} unique emails for {domain}")
|
|
180
|
+
return [r for r in self.results if r.data_type == "email" and domain in r.value]
|
|
181
|
+
|
|
182
|
+
async def _google_email_search(self, domain: str) -> List[OSINTResult]:
|
|
183
|
+
"""Search for emails using Google Dorks"""
|
|
184
|
+
results = []
|
|
185
|
+
query = f"@{domain}"
|
|
186
|
+
|
|
187
|
+
# Note: In production, use Google Custom Search API or Selenium
|
|
188
|
+
# This is a simplified implementation
|
|
189
|
+
patterns = [
|
|
190
|
+
rf"[a-zA-Z0-9._%+-]+@{re.escape(domain)}",
|
|
191
|
+
]
|
|
192
|
+
|
|
193
|
+
# Simulate finding emails (in production, actual scraping)
|
|
194
|
+
common_names = ["admin", "info", "support", "contact", "sales", "webmaster"]
|
|
195
|
+
for name in common_names:
|
|
196
|
+
email = f"{name}@{domain}"
|
|
197
|
+
results.append(
|
|
198
|
+
OSINTResult(
|
|
199
|
+
source="google",
|
|
200
|
+
data_type="email",
|
|
201
|
+
value=email,
|
|
202
|
+
confidence=6,
|
|
203
|
+
metadata={"method": "dork_pattern", "pattern": f"{name}@"},
|
|
204
|
+
)
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
return results
|
|
208
|
+
|
|
209
|
+
async def _bing_email_search(self, domain: str) -> List[OSINTResult]:
|
|
210
|
+
"""Search for emails using Bing"""
|
|
211
|
+
results = []
|
|
212
|
+
# Implementation similar to Google
|
|
213
|
+
# Bing often has different indexed content
|
|
214
|
+
return results
|
|
215
|
+
|
|
216
|
+
async def _pgp_key_search(self, domain: str) -> List[OSINTResult]:
|
|
217
|
+
"""Search PGP key servers for emails"""
|
|
218
|
+
results = []
|
|
219
|
+
|
|
220
|
+
pgp_servers = [
|
|
221
|
+
f"https://pgp.mit.edu/pks/lookup?search={domain}&op=index",
|
|
222
|
+
f"https://keys.openpgp.org/vks/v1/by-domain/{domain}",
|
|
223
|
+
]
|
|
224
|
+
|
|
225
|
+
for server in pgp_servers:
|
|
226
|
+
content = await self._fetch(server)
|
|
227
|
+
if content:
|
|
228
|
+
# Extract emails from PGP key data
|
|
229
|
+
emails = re.findall(rf"[a-zA-Z0-9._%+-]+@{re.escape(domain)}", content)
|
|
230
|
+
for email in set(emails):
|
|
231
|
+
results.append(
|
|
232
|
+
OSINTResult(
|
|
233
|
+
source="pgp",
|
|
234
|
+
data_type="email",
|
|
235
|
+
value=email,
|
|
236
|
+
confidence=9, # High confidence for PGP
|
|
237
|
+
metadata={"pgp_server": server},
|
|
238
|
+
)
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
return results
|
|
242
|
+
|
|
243
|
+
async def _github_email_search(self, domain: str) -> List[OSINTResult]:
|
|
244
|
+
"""Search GitHub for emails with domain"""
|
|
245
|
+
results = []
|
|
246
|
+
|
|
247
|
+
# GitHub API search
|
|
248
|
+
github_api = f"https://api.github.com/search/code?q=@{domain}+in:email"
|
|
249
|
+
|
|
250
|
+
content = await self._fetch(
|
|
251
|
+
github_api, headers={"Accept": "application/vnd.github.v3+json"}
|
|
252
|
+
)
|
|
253
|
+
if content:
|
|
254
|
+
try:
|
|
255
|
+
data = json.loads(content)
|
|
256
|
+
for item in data.get("items", []):
|
|
257
|
+
# Extract emails from results
|
|
258
|
+
pass # Implementation depends on API response
|
|
259
|
+
except json.JSONDecodeError:
|
|
260
|
+
pass
|
|
261
|
+
|
|
262
|
+
return results
|
|
263
|
+
|
|
264
|
+
# =================================================================
|
|
265
|
+
# Domain Reconnaissance
|
|
266
|
+
# =================================================================
|
|
267
|
+
|
|
268
|
+
async def recon_domain(self, domain: str) -> DomainInfo:
|
|
269
|
+
"""
|
|
270
|
+
Comprehensive domain reconnaissance
|
|
271
|
+
"""
|
|
272
|
+
logger.info(f"Starting domain reconnaissance for: {domain}")
|
|
273
|
+
|
|
274
|
+
info = DomainInfo(domain=domain)
|
|
275
|
+
|
|
276
|
+
# Run all recon tasks concurrently
|
|
277
|
+
tasks = [
|
|
278
|
+
self._get_whois_info(domain),
|
|
279
|
+
self._enumerate_subdomains(domain),
|
|
280
|
+
self._resolve_dns(domain),
|
|
281
|
+
self._detect_technologies(domain),
|
|
282
|
+
]
|
|
283
|
+
|
|
284
|
+
results = await asyncio.gather(*tasks, return_exceptions=True)
|
|
285
|
+
|
|
286
|
+
# Process WHOIS
|
|
287
|
+
if isinstance(results[0], dict):
|
|
288
|
+
info.registrar = results[0].get("registrar")
|
|
289
|
+
info.creation_date = results[0].get("creation_date")
|
|
290
|
+
info.expiration_date = results[0].get("expiration_date")
|
|
291
|
+
info.name_servers = results[0].get("name_servers", [])
|
|
292
|
+
|
|
293
|
+
# Process subdomains
|
|
294
|
+
if isinstance(results[1], list):
|
|
295
|
+
info.subdomains = results[1]
|
|
296
|
+
|
|
297
|
+
# Process DNS
|
|
298
|
+
if isinstance(results[2], dict):
|
|
299
|
+
info.ip_addresses = results[2].get("ip_addresses", [])
|
|
300
|
+
info.mx_records = results[2].get("mx_records", [])
|
|
301
|
+
info.txt_records = results[2].get("txt_records", [])
|
|
302
|
+
|
|
303
|
+
# Process technologies
|
|
304
|
+
if isinstance(results[3], list):
|
|
305
|
+
info.technologies = results[3]
|
|
306
|
+
|
|
307
|
+
logger.info(f"Domain recon complete: {len(info.subdomains)} subdomains found")
|
|
308
|
+
return info
|
|
309
|
+
|
|
310
|
+
async def _get_whois_info(self, domain: str) -> Dict:
|
|
311
|
+
"""Get WHOIS information"""
|
|
312
|
+
# In production, use python-whois library
|
|
313
|
+
# For now, return placeholder
|
|
314
|
+
return {
|
|
315
|
+
"registrar": "Example Registrar",
|
|
316
|
+
"creation_date": "2020-01-01",
|
|
317
|
+
"expiration_date": "2025-01-01",
|
|
318
|
+
"name_servers": ["ns1.example.com", "ns2.example.com"],
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
async def _enumerate_subdomains(
|
|
322
|
+
self, domain: str, wordlist: Optional[List[str]] = None
|
|
323
|
+
) -> List[str]:
|
|
324
|
+
"""
|
|
325
|
+
Enumerate subdomains using wordlist and certificate transparency
|
|
326
|
+
"""
|
|
327
|
+
subdomains = set()
|
|
328
|
+
|
|
329
|
+
# Certificate Transparency logs
|
|
330
|
+
crt_sh_url = f"https://crt.sh/?q=%.{domain}&output=json"
|
|
331
|
+
content = await self._fetch(crt_sh_url)
|
|
332
|
+
|
|
333
|
+
if content:
|
|
334
|
+
try:
|
|
335
|
+
data = json.loads(content)
|
|
336
|
+
for entry in data:
|
|
337
|
+
name = entry.get("name_value", "")
|
|
338
|
+
for sub in name.split("\n"):
|
|
339
|
+
if sub.endswith(domain) and sub != domain:
|
|
340
|
+
subdomains.add(sub.strip())
|
|
341
|
+
except json.JSONDecodeError:
|
|
342
|
+
pass
|
|
343
|
+
|
|
344
|
+
# DNS brute force with wordlist
|
|
345
|
+
if not wordlist:
|
|
346
|
+
wordlist = [
|
|
347
|
+
"www",
|
|
348
|
+
"mail",
|
|
349
|
+
"ftp",
|
|
350
|
+
"admin",
|
|
351
|
+
"api",
|
|
352
|
+
"blog",
|
|
353
|
+
"shop",
|
|
354
|
+
"dev",
|
|
355
|
+
"test",
|
|
356
|
+
"staging",
|
|
357
|
+
]
|
|
358
|
+
|
|
359
|
+
# In production, use aiodns for async DNS resolution
|
|
360
|
+
for word in wordlist[:20]: # Limit for demo
|
|
361
|
+
subdomain = f"{word}.{domain}"
|
|
362
|
+
# Simulate DNS check
|
|
363
|
+
if hash(subdomain) % 3 == 0: # Simulated hit rate
|
|
364
|
+
subdomains.add(subdomain)
|
|
365
|
+
|
|
366
|
+
return sorted(list(subdomains))
|
|
367
|
+
|
|
368
|
+
async def _resolve_dns(self, domain: str) -> Dict:
|
|
369
|
+
"""Resolve DNS records"""
|
|
370
|
+
# In production, use aiodns
|
|
371
|
+
return {
|
|
372
|
+
"ip_addresses": ["192.0.2.1"], # Placeholder
|
|
373
|
+
"mx_records": [f"mail.{domain}"],
|
|
374
|
+
"txt_records": ["v=spf1 include:_spf.google.com ~all"],
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
async def _detect_technologies(self, domain: str) -> List[str]:
|
|
378
|
+
"""Detect web technologies"""
|
|
379
|
+
url = f"https://{domain}"
|
|
380
|
+
content = await self._fetch(url)
|
|
381
|
+
|
|
382
|
+
technologies = []
|
|
383
|
+
|
|
384
|
+
if content:
|
|
385
|
+
# Check for common technologies
|
|
386
|
+
tech_signatures = {
|
|
387
|
+
"WordPress": (r"wp-content|wp-includes",),
|
|
388
|
+
"Drupal": (r"Drupal|drupal",),
|
|
389
|
+
"Joomla": (r"Joomla",),
|
|
390
|
+
"React": (r"react|reactroot",),
|
|
391
|
+
"Angular": (r"ng-|angular",),
|
|
392
|
+
"Bootstrap": (r"bootstrap",),
|
|
393
|
+
"jQuery": (r"jquery",),
|
|
394
|
+
"CloudFlare": (r"cloudflare",),
|
|
395
|
+
"Apache": (r"Apache",),
|
|
396
|
+
"Nginx": (r"nginx",),
|
|
397
|
+
"PHP": (r"\.php",),
|
|
398
|
+
"ASP.NET": (r"\.aspx|__VIEWSTATE",),
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
content_lower = content.lower()
|
|
402
|
+
for tech, patterns in tech_signatures.items():
|
|
403
|
+
for pattern in patterns:
|
|
404
|
+
if re.search(pattern, content_lower):
|
|
405
|
+
technologies.append(tech)
|
|
406
|
+
break
|
|
407
|
+
|
|
408
|
+
return technologies
|
|
409
|
+
|
|
410
|
+
# =================================================================
|
|
411
|
+
# Social Media Intelligence
|
|
412
|
+
# =================================================================
|
|
413
|
+
|
|
414
|
+
async def investigate_username(self, username: str) -> Dict[str, Any]:
|
|
415
|
+
"""
|
|
416
|
+
Investigate username across multiple platforms
|
|
417
|
+
"""
|
|
418
|
+
logger.info(f"Investigating username: {username}")
|
|
419
|
+
|
|
420
|
+
platforms = {
|
|
421
|
+
"twitter": f"https://twitter.com/{username}",
|
|
422
|
+
"github": f"https://github.com/{username}",
|
|
423
|
+
"linkedin": f"https://linkedin.com/in/{username}",
|
|
424
|
+
"instagram": f"https://instagram.com/{username}",
|
|
425
|
+
"facebook": f"https://facebook.com/{username}",
|
|
426
|
+
"reddit": f"https://reddit.com/user/{username}",
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
results = {}
|
|
430
|
+
|
|
431
|
+
# Check platforms concurrently
|
|
432
|
+
async def check_platform(name: str, url: str):
|
|
433
|
+
content = await self._fetch(url)
|
|
434
|
+
exists = content is not None and "not found" not in content.lower()
|
|
435
|
+
results[name] = {
|
|
436
|
+
"exists": exists,
|
|
437
|
+
"url": url,
|
|
438
|
+
"profile_data": {}, # Would contain scraped data
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
await asyncio.gather(
|
|
442
|
+
*[check_platform(name, url) for name, url in platforms.items()]
|
|
443
|
+
)
|
|
444
|
+
|
|
445
|
+
return results
|
|
446
|
+
|
|
447
|
+
# =================================================================
|
|
448
|
+
# Data Breach Lookup
|
|
449
|
+
# =================================================================
|
|
450
|
+
|
|
451
|
+
async def check_breach(self, email: str) -> EmailProfile:
|
|
452
|
+
"""
|
|
453
|
+
Check if email appears in known data breaches
|
|
454
|
+
Uses Have I Been Pwned API (requires API key in production)
|
|
455
|
+
"""
|
|
456
|
+
profile = EmailProfile(email=email)
|
|
457
|
+
|
|
458
|
+
# Validate email format
|
|
459
|
+
email_pattern = r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$"
|
|
460
|
+
profile.valid_format = bool(re.match(email_pattern, email))
|
|
461
|
+
|
|
462
|
+
if not profile.valid_format:
|
|
463
|
+
return profile
|
|
464
|
+
|
|
465
|
+
# Check breach databases
|
|
466
|
+
# In production: Use HIBP API (requires key)
|
|
467
|
+
# For demo: Simulated check
|
|
468
|
+
|
|
469
|
+
# Mock breach data (replace with real API)
|
|
470
|
+
known_breaches = ["LinkedIn 2012", "Adobe 2013", "Dropbox 2016"]
|
|
471
|
+
|
|
472
|
+
# Simulate breach check based on email hash
|
|
473
|
+
email_hash = hash(email) % 100
|
|
474
|
+
if email_hash < 30: # 30% chance for demo
|
|
475
|
+
profile.breached = True
|
|
476
|
+
profile.breach_sources = known_breaches[: email_hash % 3 + 1]
|
|
477
|
+
|
|
478
|
+
return profile
|
|
479
|
+
|
|
480
|
+
# =================================================================
|
|
481
|
+
# IP Intelligence
|
|
482
|
+
# =================================================================
|
|
483
|
+
|
|
484
|
+
async def investigate_ip(self, ip: str) -> Dict[str, Any]:
|
|
485
|
+
"""
|
|
486
|
+
Gather intelligence about IP address
|
|
487
|
+
"""
|
|
488
|
+
logger.info(f"Investigating IP: {ip}")
|
|
489
|
+
|
|
490
|
+
info = {
|
|
491
|
+
"ip": ip,
|
|
492
|
+
"reputation": "unknown",
|
|
493
|
+
"geolocation": {},
|
|
494
|
+
"ports": [],
|
|
495
|
+
"services": [],
|
|
496
|
+
}
|
|
497
|
+
|
|
498
|
+
# IP reputation check
|
|
499
|
+
# In production: Use VirusTotal, AbuseIPDB, etc.
|
|
500
|
+
|
|
501
|
+
# Geolocation (use ip-api.com - free tier)
|
|
502
|
+
geo_url = f"http://ip-api.com/json/{ip}?fields=status,message,country,regionName,city,zip,lat,lon,isp,org,as,proxy,hosting"
|
|
503
|
+
|
|
504
|
+
content = await self._fetch(geo_url)
|
|
505
|
+
if content:
|
|
506
|
+
try:
|
|
507
|
+
geo_data = json.loads(content)
|
|
508
|
+
if geo_data.get("status") == "success":
|
|
509
|
+
info["geolocation"] = {
|
|
510
|
+
"country": geo_data.get("country"),
|
|
511
|
+
"region": geo_data.get("regionName"),
|
|
512
|
+
"city": geo_data.get("city"),
|
|
513
|
+
"zip": geo_data.get("zip"),
|
|
514
|
+
"lat": geo_data.get("lat"),
|
|
515
|
+
"lon": geo_data.get("lon"),
|
|
516
|
+
"isp": geo_data.get("isp"),
|
|
517
|
+
"org": geo_data.get("org"),
|
|
518
|
+
"proxy": geo_data.get("proxy"),
|
|
519
|
+
"hosting": geo_data.get("hosting"),
|
|
520
|
+
}
|
|
521
|
+
|
|
522
|
+
if geo_data.get("proxy"):
|
|
523
|
+
info["reputation"] = "proxy"
|
|
524
|
+
elif geo_data.get("hosting"):
|
|
525
|
+
info["reputation"] = "hosting"
|
|
526
|
+
except json.JSONDecodeError:
|
|
527
|
+
pass
|
|
528
|
+
|
|
529
|
+
return info
|
|
530
|
+
|
|
531
|
+
# =================================================================
|
|
532
|
+
# Report Generation
|
|
533
|
+
# =================================================================
|
|
534
|
+
|
|
535
|
+
def generate_report(self, target: str) -> Dict[str, Any]:
|
|
536
|
+
"""Generate OSINT report for target"""
|
|
537
|
+
|
|
538
|
+
# Filter results for target
|
|
539
|
+
target_results = [
|
|
540
|
+
r for r in self.results if target in r.value or target in str(r.metadata)
|
|
541
|
+
]
|
|
542
|
+
|
|
543
|
+
report = {
|
|
544
|
+
"target": target,
|
|
545
|
+
"generated_at": datetime.now().isoformat(),
|
|
546
|
+
"summary": {"total_findings": len(target_results), "by_type": {}},
|
|
547
|
+
"findings": [r.to_dict() for r in target_results],
|
|
548
|
+
"sources_used": list(set(r.source for r in target_results)),
|
|
549
|
+
}
|
|
550
|
+
|
|
551
|
+
# Count by type
|
|
552
|
+
for result in target_results:
|
|
553
|
+
data_type = result.data_type
|
|
554
|
+
report["summary"]["by_type"][data_type] = (
|
|
555
|
+
report["summary"]["by_type"].get(data_type, 0) + 1
|
|
556
|
+
)
|
|
557
|
+
|
|
558
|
+
return report
|
|
559
|
+
|
|
560
|
+
def clear_results(self):
|
|
561
|
+
"""Clear all gathered results"""
|
|
562
|
+
self.results = []
|
|
563
|
+
|
|
564
|
+
|
|
565
|
+
# Convenience functions
|
|
566
|
+
async def harvest_emails(domain: str) -> List[str]:
|
|
567
|
+
"""Quick email harvesting"""
|
|
568
|
+
async with OSINTModule() as osint:
|
|
569
|
+
results = await osint.harvest_emails(domain)
|
|
570
|
+
return list(set(r.value for r in results))
|
|
571
|
+
|
|
572
|
+
|
|
573
|
+
async def enumerate_subdomains(domain: str) -> List[str]:
|
|
574
|
+
"""Quick subdomain enumeration"""
|
|
575
|
+
async with OSINTModule() as osint:
|
|
576
|
+
domain_info = await osint.recon_domain(domain)
|
|
577
|
+
return domain_info.subdomains
|
|
578
|
+
|
|
579
|
+
|
|
580
|
+
async def check_email_breach(email: str) -> bool:
|
|
581
|
+
"""Quick breach check"""
|
|
582
|
+
async with OSINTModule() as osint:
|
|
583
|
+
profile = await osint.check_breach(email)
|
|
584
|
+
return profile.breached
|
|
585
|
+
|
|
586
|
+
|
|
587
|
+
# Example usage
|
|
588
|
+
if __name__ == "__main__":
|
|
589
|
+
|
|
590
|
+
async def demo():
|
|
591
|
+
async with OSINTModule() as osint:
|
|
592
|
+
# Harvest emails
|
|
593
|
+
emails = await osint.harvest_emails("example.com")
|
|
594
|
+
print(f"Found {len(emails)} emails")
|
|
595
|
+
|
|
596
|
+
# Domain recon
|
|
597
|
+
info = await osint.recon_domain("example.com")
|
|
598
|
+
print(f"Subdomains: {info.subdomains}")
|
|
599
|
+
|
|
600
|
+
# Generate report
|
|
601
|
+
report = osint.generate_report("example.com")
|
|
602
|
+
print(json.dumps(report, indent=2))
|
|
603
|
+
|
|
604
|
+
asyncio.run(demo())
|