voidaccess 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- analysis/__init__.py +49 -0
- analysis/opsec.py +454 -0
- analysis/patterns.py +202 -0
- analysis/temporal.py +201 -0
- api/__init__.py +1 -0
- api/auth.py +163 -0
- api/main.py +509 -0
- api/routes/__init__.py +1 -0
- api/routes/admin.py +214 -0
- api/routes/auth.py +157 -0
- api/routes/entities.py +871 -0
- api/routes/export.py +359 -0
- api/routes/investigations.py +2567 -0
- api/routes/monitors.py +405 -0
- api/routes/search.py +157 -0
- api/routes/settings.py +851 -0
- auth/__init__.py +1 -0
- auth/token_blacklist.py +108 -0
- cli/__init__.py +3 -0
- cli/adapters/__init__.py +1 -0
- cli/adapters/sqlite.py +273 -0
- cli/browser.py +376 -0
- cli/commands/__init__.py +1 -0
- cli/commands/configure.py +185 -0
- cli/commands/enrich.py +154 -0
- cli/commands/export.py +158 -0
- cli/commands/investigate.py +601 -0
- cli/commands/show.py +87 -0
- cli/config.py +180 -0
- cli/display.py +212 -0
- cli/main.py +154 -0
- cli/tor_detect.py +71 -0
- config.py +180 -0
- crawler/__init__.py +28 -0
- crawler/dedup.py +97 -0
- crawler/frontier.py +115 -0
- crawler/spider.py +462 -0
- crawler/utils.py +122 -0
- db/__init__.py +47 -0
- db/migrations/__init__.py +0 -0
- db/migrations/env.py +80 -0
- db/migrations/versions/0001_initial_schema.py +270 -0
- db/migrations/versions/0002_add_investigation_status_column.py +27 -0
- db/migrations/versions/0002_add_missing_tables.py +33 -0
- db/migrations/versions/0003_add_canonical_value_and_entity_links.py +61 -0
- db/migrations/versions/0004_add_page_posted_at.py +41 -0
- db/migrations/versions/0005_add_extraction_method.py +32 -0
- db/migrations/versions/0006_add_monitor_alerts.py +26 -0
- db/migrations/versions/0007_add_actor_style_profiles.py +23 -0
- db/migrations/versions/0008_add_users_table.py +47 -0
- db/migrations/versions/0009_add_investigation_id_to_relationships.py +29 -0
- db/migrations/versions/0010_add_composite_index_entity_relationships.py +22 -0
- db/migrations/versions/0011_add_page_extraction_cache.py +52 -0
- db/migrations/versions/0013_add_graph_status.py +31 -0
- db/migrations/versions/0015_add_progress_fields.py +41 -0
- db/migrations/versions/0016_backfill_graph_status.py +33 -0
- db/migrations/versions/0017_add_user_api_keys.py +44 -0
- db/migrations/versions/0018_add_user_id_to_investigations.py +33 -0
- db/migrations/versions/0019_add_content_safety_log.py +46 -0
- db/migrations/versions/0020_add_entity_source_tracking.py +50 -0
- db/models.py +618 -0
- db/queries.py +841 -0
- db/session.py +270 -0
- export/__init__.py +34 -0
- export/misp.py +257 -0
- export/sigma.py +342 -0
- export/stix.py +418 -0
- extractor/__init__.py +21 -0
- extractor/llm_extract.py +372 -0
- extractor/ner.py +512 -0
- extractor/normalizer.py +638 -0
- extractor/pipeline.py +401 -0
- extractor/regex_patterns.py +325 -0
- fingerprint/__init__.py +33 -0
- fingerprint/profiler.py +240 -0
- fingerprint/stylometry.py +249 -0
- graph/__init__.py +73 -0
- graph/builder.py +894 -0
- graph/export.py +225 -0
- graph/model.py +83 -0
- graph/queries.py +297 -0
- graph/visualize.py +178 -0
- i18n/__init__.py +24 -0
- i18n/detect.py +76 -0
- i18n/query_expand.py +72 -0
- i18n/translate.py +210 -0
- monitor/__init__.py +27 -0
- monitor/_db.py +74 -0
- monitor/alerts.py +345 -0
- monitor/config.py +118 -0
- monitor/diff.py +75 -0
- monitor/jobs.py +247 -0
- monitor/scheduler.py +184 -0
- scraper/__init__.py +0 -0
- scraper/scrape.py +857 -0
- scraper/scrape_js.py +272 -0
- search/__init__.py +318 -0
- search/circuit_breaker.py +240 -0
- search/search.py +334 -0
- sources/__init__.py +96 -0
- sources/blockchain.py +444 -0
- sources/cache.py +93 -0
- sources/cisa.py +108 -0
- sources/dns_enrichment.py +557 -0
- sources/domain_reputation.py +643 -0
- sources/email_reputation.py +635 -0
- sources/engines.py +244 -0
- sources/enrichment.py +1244 -0
- sources/github_scraper.py +589 -0
- sources/gitlab_scraper.py +624 -0
- sources/hash_reputation.py +856 -0
- sources/historical_intel.py +253 -0
- sources/ip_reputation.py +521 -0
- sources/paste_scraper.py +484 -0
- sources/pastes.py +278 -0
- sources/rss_scraper.py +576 -0
- sources/seed_manager.py +373 -0
- sources/seeds.py +368 -0
- sources/shodan.py +103 -0
- sources/telegram.py +199 -0
- sources/virustotal.py +113 -0
- utils/__init__.py +0 -0
- utils/async_utils.py +89 -0
- utils/content_safety.py +193 -0
- utils/defang.py +94 -0
- utils/encryption.py +34 -0
- utils/ioc_freshness.py +124 -0
- utils/user_keys.py +33 -0
- vector/__init__.py +39 -0
- vector/embedder.py +100 -0
- vector/model_singleton.py +49 -0
- vector/search.py +87 -0
- vector/store.py +514 -0
- voidaccess/__init__.py +0 -0
- voidaccess/llm.py +717 -0
- voidaccess/llm_utils.py +696 -0
- voidaccess-1.3.0.dist-info/METADATA +395 -0
- voidaccess-1.3.0.dist-info/RECORD +142 -0
- voidaccess-1.3.0.dist-info/WHEEL +5 -0
- voidaccess-1.3.0.dist-info/entry_points.txt +2 -0
- voidaccess-1.3.0.dist-info/licenses/LICENSE +21 -0
- voidaccess-1.3.0.dist-info/top_level.txt +19 -0
|
@@ -0,0 +1,557 @@
|
|
|
1
|
+
"""
|
|
2
|
+
WHOIS/passive DNS enrichment using CIRCL pDNS, CIRCL pSSL, and RDAP.
|
|
3
|
+
|
|
4
|
+
Enriches extracted IP and domain entities with DNS history, WHOIS data,
|
|
5
|
+
and infrastructure overlap detection. Free, no auth required for CIRCL/RDAP.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import asyncio
|
|
9
|
+
import aiohttp
|
|
10
|
+
import ipaddress
|
|
11
|
+
import json
|
|
12
|
+
import logging
|
|
13
|
+
import os
|
|
14
|
+
import re
|
|
15
|
+
from datetime import datetime, timezone
|
|
16
|
+
from typing import Optional
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
CIRCL_PDNS_URL = "https://www.circl.lu/pdns/query"
|
|
21
|
+
CIRCL_PSSL_URL = "https://www.circl.lu/v2pssl/query"
|
|
22
|
+
|
|
23
|
+
RDAP_IP_URL = "https://rdap.arin.net/registry/ip/{ip}"
|
|
24
|
+
RDAP_DOMAIN_URL = "https://rdap.org/domain/{domain}"
|
|
25
|
+
|
|
26
|
+
CIRCL_TIMEOUT = 15
|
|
27
|
+
WHOIS_TIMEOUT = 10
|
|
28
|
+
|
|
29
|
+
MAX_IPS_TO_ENRICH = 20
|
|
30
|
+
MAX_DOMAINS_TO_ENRICH = 20
|
|
31
|
+
|
|
32
|
+
MAX_RELATED_PER_ENTITY = 5
|
|
33
|
+
|
|
34
|
+
CIRCL_DELAY = 0.5
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class DNSEnrichment:
|
|
38
|
+
"""
|
|
39
|
+
Enriches IP and domain entities with passive DNS history, WHOIS data,
|
|
40
|
+
and infrastructure overlap detection.
|
|
41
|
+
|
|
42
|
+
Uses CIRCL passive DNS (free, no auth).
|
|
43
|
+
Optional: SecurityTrails (free tier, key needed).
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
def __init__(self):
|
|
47
|
+
self._session: Optional[aiohttp.ClientSession] = None
|
|
48
|
+
self._st_key = os.getenv("SECURITYTRAILS_API_KEY", "").strip()
|
|
49
|
+
|
|
50
|
+
async def __aenter__(self):
|
|
51
|
+
headers = {
|
|
52
|
+
"User-Agent": "VoidAccess-OSINT/1.1 (security research)",
|
|
53
|
+
"Accept": "application/json",
|
|
54
|
+
}
|
|
55
|
+
self._session = aiohttp.ClientSession(
|
|
56
|
+
headers=headers,
|
|
57
|
+
timeout=aiohttp.ClientTimeout(total=20),
|
|
58
|
+
)
|
|
59
|
+
return self
|
|
60
|
+
|
|
61
|
+
async def __aexit__(self, *args):
|
|
62
|
+
if self._session:
|
|
63
|
+
await self._session.close()
|
|
64
|
+
|
|
65
|
+
async def enrich_entities(self, entities: list[dict]) -> dict:
|
|
66
|
+
"""
|
|
67
|
+
Main entry point. Takes a list of extracted entities, enriches IPs
|
|
68
|
+
and domains with DNS/WHOIS data.
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
{
|
|
72
|
+
"ip_enrichments": {ip: {...}},
|
|
73
|
+
"domain_enrichments": {domain: {...}},
|
|
74
|
+
"new_entities": [...],
|
|
75
|
+
"infrastructure_clusters": [...],
|
|
76
|
+
}
|
|
77
|
+
"""
|
|
78
|
+
ips = []
|
|
79
|
+
domains = []
|
|
80
|
+
|
|
81
|
+
for entity in entities:
|
|
82
|
+
etype = entity.get("entity_type", "")
|
|
83
|
+
value = entity.get("canonical_value", "") or entity.get("value", "")
|
|
84
|
+
|
|
85
|
+
if not value:
|
|
86
|
+
continue
|
|
87
|
+
|
|
88
|
+
if etype == "IP_ADDRESS":
|
|
89
|
+
if self._is_valid_public_ip(value):
|
|
90
|
+
ips.append(value)
|
|
91
|
+
elif etype == "DOMAIN":
|
|
92
|
+
domains.append(value)
|
|
93
|
+
|
|
94
|
+
ips = list(set(ips))[:MAX_IPS_TO_ENRICH]
|
|
95
|
+
domains = list(set(domains))[:MAX_DOMAINS_TO_ENRICH]
|
|
96
|
+
|
|
97
|
+
if not ips and not domains:
|
|
98
|
+
return {
|
|
99
|
+
"ip_enrichments": {},
|
|
100
|
+
"domain_enrichments": {},
|
|
101
|
+
"new_entities": [],
|
|
102
|
+
"infrastructure_clusters": [],
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
logger.info(
|
|
106
|
+
"DNS enrichment: %d IPs, %d domains", len(ips), len(domains)
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
sem = asyncio.Semaphore(3)
|
|
110
|
+
|
|
111
|
+
ip_tasks = [self._enrich_ip(ip, sem) for ip in ips]
|
|
112
|
+
domain_tasks = [self._enrich_domain(domain, sem) for domain in domains]
|
|
113
|
+
|
|
114
|
+
ip_results = await asyncio.gather(*ip_tasks, return_exceptions=True)
|
|
115
|
+
domain_results = await asyncio.gather(*domain_tasks, return_exceptions=True)
|
|
116
|
+
|
|
117
|
+
ip_enrichments = {}
|
|
118
|
+
domain_enrichments = {}
|
|
119
|
+
new_entities = []
|
|
120
|
+
|
|
121
|
+
for ip, result in zip(ips, ip_results):
|
|
122
|
+
if isinstance(result, dict):
|
|
123
|
+
ip_enrichments[ip] = result
|
|
124
|
+
new_entities.extend(result.get("new_entities", []))
|
|
125
|
+
|
|
126
|
+
for domain, result in zip(domains, domain_results):
|
|
127
|
+
if isinstance(result, dict):
|
|
128
|
+
domain_enrichments[domain] = result
|
|
129
|
+
new_entities.extend(result.get("new_entities", []))
|
|
130
|
+
|
|
131
|
+
seen: set[str] = set()
|
|
132
|
+
unique_new = []
|
|
133
|
+
for e in new_entities:
|
|
134
|
+
key = f"{e['type']}:{e['value']}"
|
|
135
|
+
if key not in seen:
|
|
136
|
+
seen.add(key)
|
|
137
|
+
unique_new.append(e)
|
|
138
|
+
|
|
139
|
+
clusters = self._detect_infrastructure_clusters(ip_enrichments, domain_enrichments)
|
|
140
|
+
|
|
141
|
+
logger.info(
|
|
142
|
+
"DNS enrichment complete: %d new entities, %d clusters found",
|
|
143
|
+
len(unique_new),
|
|
144
|
+
len(clusters),
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
return {
|
|
148
|
+
"ip_enrichments": ip_enrichments,
|
|
149
|
+
"domain_enrichments": domain_enrichments,
|
|
150
|
+
"new_entities": unique_new,
|
|
151
|
+
"infrastructure_clusters": clusters,
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
async def _enrich_ip(self, ip: str, sem: asyncio.Semaphore) -> dict:
|
|
155
|
+
async with sem:
|
|
156
|
+
result: dict = {
|
|
157
|
+
"ip": ip,
|
|
158
|
+
"passive_dns": [],
|
|
159
|
+
"whois": {},
|
|
160
|
+
"ssl_certs": [],
|
|
161
|
+
"new_entities": [],
|
|
162
|
+
"tags": [],
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
pdns, whois, ssl = await asyncio.gather(
|
|
166
|
+
self._circl_pdns_ip(ip),
|
|
167
|
+
self._rdap_ip(ip),
|
|
168
|
+
self._circl_pssl_ip(ip),
|
|
169
|
+
return_exceptions=True,
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
await asyncio.sleep(CIRCL_DELAY)
|
|
173
|
+
|
|
174
|
+
if isinstance(pdns, list):
|
|
175
|
+
result["passive_dns"] = pdns
|
|
176
|
+
for record in pdns[:MAX_RELATED_PER_ENTITY]:
|
|
177
|
+
rrname = record.get("rrname", "").rstrip(".")
|
|
178
|
+
if rrname and self._is_valid_domain(rrname):
|
|
179
|
+
result["new_entities"].append({
|
|
180
|
+
"type": "DOMAIN",
|
|
181
|
+
"value": rrname,
|
|
182
|
+
"source": "circl_pdns",
|
|
183
|
+
"context": f"Resolved to {ip} (passive DNS)",
|
|
184
|
+
"confidence": 0.75,
|
|
185
|
+
})
|
|
186
|
+
if pdns:
|
|
187
|
+
result["tags"].append("has_pdns_history")
|
|
188
|
+
|
|
189
|
+
if isinstance(whois, dict):
|
|
190
|
+
result["whois"] = whois
|
|
191
|
+
org = whois.get("org", "").lower()
|
|
192
|
+
country = whois.get("country", "")
|
|
193
|
+
C2_HOSTERS = [
|
|
194
|
+
"choopa", "vultr", "digitalocean", "linode",
|
|
195
|
+
"frantech", "m247", "serverius", "combahton",
|
|
196
|
+
"servermania", "sharktech",
|
|
197
|
+
]
|
|
198
|
+
for hoster in C2_HOSTERS:
|
|
199
|
+
if hoster in org:
|
|
200
|
+
result["tags"].append(f"c2_hoster_{hoster}")
|
|
201
|
+
if country in ("RU", "CN", "KP", "IR"):
|
|
202
|
+
result["tags"].append(f"country_{country.lower()}")
|
|
203
|
+
|
|
204
|
+
if isinstance(ssl, list):
|
|
205
|
+
result["ssl_certs"] = ssl
|
|
206
|
+
for cert in ssl[:MAX_RELATED_PER_ENTITY]:
|
|
207
|
+
cn = cert.get("cn", "")
|
|
208
|
+
if cn and self._is_valid_domain(cn):
|
|
209
|
+
result["new_entities"].append({
|
|
210
|
+
"type": "DOMAIN",
|
|
211
|
+
"value": cn,
|
|
212
|
+
"source": "circl_pssl",
|
|
213
|
+
"context": f"SSL certificate on {ip}",
|
|
214
|
+
"confidence": 0.80,
|
|
215
|
+
})
|
|
216
|
+
if ssl:
|
|
217
|
+
result["tags"].append("has_ssl_history")
|
|
218
|
+
|
|
219
|
+
return result
|
|
220
|
+
|
|
221
|
+
async def _enrich_domain(self, domain: str, sem: asyncio.Semaphore) -> dict:
|
|
222
|
+
async with sem:
|
|
223
|
+
result: dict = {
|
|
224
|
+
"domain": domain,
|
|
225
|
+
"passive_dns": [],
|
|
226
|
+
"whois": {},
|
|
227
|
+
"new_entities": [],
|
|
228
|
+
"tags": [],
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
pdns, whois = await asyncio.gather(
|
|
232
|
+
self._circl_pdns_domain(domain),
|
|
233
|
+
self._rdap_domain(domain),
|
|
234
|
+
return_exceptions=True,
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
await asyncio.sleep(CIRCL_DELAY)
|
|
238
|
+
|
|
239
|
+
if isinstance(pdns, list):
|
|
240
|
+
result["passive_dns"] = pdns
|
|
241
|
+
seen_ips: set[str] = set()
|
|
242
|
+
for record in pdns:
|
|
243
|
+
rdata = record.get("rdata", "")
|
|
244
|
+
if self._is_valid_public_ip(rdata) and rdata not in seen_ips:
|
|
245
|
+
seen_ips.add(rdata)
|
|
246
|
+
result["new_entities"].append({
|
|
247
|
+
"type": "IP_ADDRESS",
|
|
248
|
+
"value": rdata,
|
|
249
|
+
"source": "circl_pdns",
|
|
250
|
+
"context": f"{domain} resolved to this IP (passive DNS)",
|
|
251
|
+
"confidence": 0.80,
|
|
252
|
+
})
|
|
253
|
+
if len(result["new_entities"]) >= MAX_RELATED_PER_ENTITY:
|
|
254
|
+
break
|
|
255
|
+
if pdns:
|
|
256
|
+
result["tags"].append("has_pdns_history")
|
|
257
|
+
|
|
258
|
+
if isinstance(whois, dict):
|
|
259
|
+
result["whois"] = whois
|
|
260
|
+
reg_date = whois.get("registered", "")
|
|
261
|
+
if reg_date:
|
|
262
|
+
result["tags"].append(f"registered_{reg_date[:7]}")
|
|
263
|
+
|
|
264
|
+
registrant = whois.get("registrant", "").lower()
|
|
265
|
+
PRIVACY_SERVICES = [
|
|
266
|
+
"whoisguard", "privacyprotect", "perfect privacy",
|
|
267
|
+
"domainsbyproxy", "withheld for privacy",
|
|
268
|
+
]
|
|
269
|
+
for svc in PRIVACY_SERVICES:
|
|
270
|
+
if svc in registrant:
|
|
271
|
+
result["tags"].append("privacy_protected")
|
|
272
|
+
break
|
|
273
|
+
|
|
274
|
+
if reg_date:
|
|
275
|
+
try:
|
|
276
|
+
from dateutil.parser import parse as parse_date
|
|
277
|
+
|
|
278
|
+
reg_dt = parse_date(reg_date)
|
|
279
|
+
now = datetime.now(timezone.utc)
|
|
280
|
+
if reg_dt.tzinfo is None:
|
|
281
|
+
reg_dt = reg_dt.replace(tzinfo=timezone.utc)
|
|
282
|
+
age_days = (now - reg_dt).days
|
|
283
|
+
if age_days < 30:
|
|
284
|
+
result["tags"].append("recently_registered")
|
|
285
|
+
elif age_days < 90:
|
|
286
|
+
result["tags"].append("new_domain")
|
|
287
|
+
except Exception:
|
|
288
|
+
pass
|
|
289
|
+
|
|
290
|
+
return result
|
|
291
|
+
|
|
292
|
+
async def _circl_pdns_ip(self, ip: str) -> list:
|
|
293
|
+
if not self._session:
|
|
294
|
+
return []
|
|
295
|
+
try:
|
|
296
|
+
async with self._session.get(
|
|
297
|
+
f"{CIRCL_PDNS_URL}/{ip}",
|
|
298
|
+
timeout=aiohttp.ClientTimeout(total=CIRCL_TIMEOUT),
|
|
299
|
+
) as resp:
|
|
300
|
+
if resp.status != 200:
|
|
301
|
+
return []
|
|
302
|
+
text = await resp.text()
|
|
303
|
+
records = []
|
|
304
|
+
for line in text.strip().split("\n"):
|
|
305
|
+
if line.strip():
|
|
306
|
+
try:
|
|
307
|
+
records.append(json.loads(line))
|
|
308
|
+
except Exception:
|
|
309
|
+
pass
|
|
310
|
+
return records[:20]
|
|
311
|
+
except Exception as e:
|
|
312
|
+
logger.debug("CIRCL PDNS IP error %s: %s", ip, e)
|
|
313
|
+
return []
|
|
314
|
+
|
|
315
|
+
async def _circl_pdns_domain(self, domain: str) -> list:
|
|
316
|
+
if not self._session:
|
|
317
|
+
return []
|
|
318
|
+
try:
|
|
319
|
+
async with self._session.get(
|
|
320
|
+
f"{CIRCL_PDNS_URL}/{domain}",
|
|
321
|
+
timeout=aiohttp.ClientTimeout(total=CIRCL_TIMEOUT),
|
|
322
|
+
) as resp:
|
|
323
|
+
if resp.status != 200:
|
|
324
|
+
return []
|
|
325
|
+
text = await resp.text()
|
|
326
|
+
records = []
|
|
327
|
+
for line in text.strip().split("\n"):
|
|
328
|
+
if line.strip():
|
|
329
|
+
try:
|
|
330
|
+
records.append(json.loads(line))
|
|
331
|
+
except Exception:
|
|
332
|
+
pass
|
|
333
|
+
return records[:20]
|
|
334
|
+
except Exception as e:
|
|
335
|
+
logger.debug("CIRCL PDNS domain error %s: %s", domain, e)
|
|
336
|
+
return []
|
|
337
|
+
|
|
338
|
+
async def _circl_pssl_ip(self, ip: str) -> list:
|
|
339
|
+
if not self._session:
|
|
340
|
+
return []
|
|
341
|
+
try:
|
|
342
|
+
async with self._session.get(
|
|
343
|
+
f"{CIRCL_PSSL_URL}/{ip}",
|
|
344
|
+
timeout=aiohttp.ClientTimeout(total=CIRCL_TIMEOUT),
|
|
345
|
+
) as resp:
|
|
346
|
+
if resp.status != 200:
|
|
347
|
+
return []
|
|
348
|
+
data = await resp.json()
|
|
349
|
+
certs = []
|
|
350
|
+
for sha1, cert_data in list(data.items())[:10]:
|
|
351
|
+
subjects = cert_data.get("subjects", {})
|
|
352
|
+
cn = subjects.get("cn", [])
|
|
353
|
+
if isinstance(cn, list):
|
|
354
|
+
cn = cn[0] if cn else ""
|
|
355
|
+
certs.append({"sha1": sha1, "cn": cn, "subject": subjects})
|
|
356
|
+
return certs
|
|
357
|
+
except Exception as e:
|
|
358
|
+
logger.debug("CIRCL PSSL error %s: %s", ip, e)
|
|
359
|
+
return []
|
|
360
|
+
|
|
361
|
+
async def _rdap_ip(self, ip: str) -> dict:
|
|
362
|
+
if not self._session:
|
|
363
|
+
return {}
|
|
364
|
+
try:
|
|
365
|
+
async with self._session.get(
|
|
366
|
+
RDAP_IP_URL.format(ip=ip),
|
|
367
|
+
timeout=aiohttp.ClientTimeout(total=WHOIS_TIMEOUT),
|
|
368
|
+
) as resp:
|
|
369
|
+
if resp.status != 200:
|
|
370
|
+
return {}
|
|
371
|
+
data = await resp.json()
|
|
372
|
+
|
|
373
|
+
result: dict = {}
|
|
374
|
+
|
|
375
|
+
for entity in data.get("entities", []):
|
|
376
|
+
vcards = entity.get("vcardArray", [None, []])
|
|
377
|
+
if isinstance(vcards, list) and len(vcards) > 1:
|
|
378
|
+
for vcard in vcards[1]:
|
|
379
|
+
if isinstance(vcard, list) and len(vcard) >= 4:
|
|
380
|
+
if vcard[0] == "fn":
|
|
381
|
+
result["org"] = vcard[3]
|
|
382
|
+
break
|
|
383
|
+
|
|
384
|
+
result["country"] = data.get("country", "")
|
|
385
|
+
|
|
386
|
+
cidrs = data.get("cidr0_cidrs", [])
|
|
387
|
+
if cidrs:
|
|
388
|
+
cidr = cidrs[0]
|
|
389
|
+
result["cidr"] = (
|
|
390
|
+
f"{cidr.get('v4prefix', '')}/{cidr.get('length', '')}"
|
|
391
|
+
)
|
|
392
|
+
|
|
393
|
+
handle = data.get("handle", "")
|
|
394
|
+
if handle.startswith("NET-"):
|
|
395
|
+
result["network"] = handle
|
|
396
|
+
result["raw_handle"] = handle
|
|
397
|
+
|
|
398
|
+
return result
|
|
399
|
+
except Exception as e:
|
|
400
|
+
logger.debug("RDAP IP error %s: %s", ip, e)
|
|
401
|
+
return {}
|
|
402
|
+
|
|
403
|
+
async def _rdap_domain(self, domain: str) -> dict:
|
|
404
|
+
if not self._session:
|
|
405
|
+
return {}
|
|
406
|
+
try:
|
|
407
|
+
async with self._session.get(
|
|
408
|
+
RDAP_DOMAIN_URL.format(domain=domain),
|
|
409
|
+
timeout=aiohttp.ClientTimeout(total=WHOIS_TIMEOUT),
|
|
410
|
+
) as resp:
|
|
411
|
+
if resp.status != 200:
|
|
412
|
+
return {}
|
|
413
|
+
data = await resp.json()
|
|
414
|
+
|
|
415
|
+
result: dict = {}
|
|
416
|
+
|
|
417
|
+
for event in data.get("events", []):
|
|
418
|
+
action = event.get("eventAction", "")
|
|
419
|
+
date = event.get("eventDate", "")
|
|
420
|
+
if action == "registration":
|
|
421
|
+
result["registered"] = date
|
|
422
|
+
elif action == "expiration":
|
|
423
|
+
result["expires"] = date
|
|
424
|
+
elif action == "last changed":
|
|
425
|
+
result["updated"] = date
|
|
426
|
+
|
|
427
|
+
result["nameservers"] = [
|
|
428
|
+
ns.get("ldhName", "").lower()
|
|
429
|
+
for ns in data.get("nameservers", [])
|
|
430
|
+
]
|
|
431
|
+
|
|
432
|
+
for entity in data.get("entities", []):
|
|
433
|
+
roles = entity.get("roles", [])
|
|
434
|
+
vcards = entity.get("vcardArray", [None, []])
|
|
435
|
+
if not (isinstance(vcards, list) and len(vcards) > 1):
|
|
436
|
+
continue
|
|
437
|
+
for vcard in vcards[1]:
|
|
438
|
+
if not (isinstance(vcard, list) and len(vcard) >= 4):
|
|
439
|
+
continue
|
|
440
|
+
if vcard[0] == "fn":
|
|
441
|
+
if "registrar" in roles:
|
|
442
|
+
result["registrar"] = vcard[3]
|
|
443
|
+
if "registrant" in roles:
|
|
444
|
+
result["registrant"] = vcard[3]
|
|
445
|
+
break
|
|
446
|
+
|
|
447
|
+
result["status"] = data.get("status", [])
|
|
448
|
+
return result
|
|
449
|
+
except Exception as e:
|
|
450
|
+
logger.debug("RDAP domain error %s: %s", domain, e)
|
|
451
|
+
return {}
|
|
452
|
+
|
|
453
|
+
def _detect_infrastructure_clusters(
|
|
454
|
+
self,
|
|
455
|
+
ip_enrichments: dict,
|
|
456
|
+
domain_enrichments: dict,
|
|
457
|
+
) -> list[dict]:
|
|
458
|
+
"""Find shared IP and shared nameserver clusters across investigated entities."""
|
|
459
|
+
clusters = []
|
|
460
|
+
|
|
461
|
+
ip_to_domains: dict[str, set] = {}
|
|
462
|
+
for ip, data in ip_enrichments.items():
|
|
463
|
+
domains: set[str] = set()
|
|
464
|
+
for record in data.get("passive_dns", []):
|
|
465
|
+
rrname = record.get("rrname", "").rstrip(".")
|
|
466
|
+
if rrname:
|
|
467
|
+
domains.add(rrname)
|
|
468
|
+
ip_to_domains[ip] = domains
|
|
469
|
+
|
|
470
|
+
for ip, domains in ip_to_domains.items():
|
|
471
|
+
investigated = [d for d in domains if d in domain_enrichments]
|
|
472
|
+
if len(investigated) >= 2:
|
|
473
|
+
clusters.append({
|
|
474
|
+
"type": "shared_ip",
|
|
475
|
+
"ip": ip,
|
|
476
|
+
"domains": investigated,
|
|
477
|
+
"description": (
|
|
478
|
+
f"IP {ip} hosts multiple investigated domains: "
|
|
479
|
+
f"{', '.join(investigated)}"
|
|
480
|
+
),
|
|
481
|
+
})
|
|
482
|
+
|
|
483
|
+
ns_to_domains: dict[str, list] = {}
|
|
484
|
+
for domain, data in domain_enrichments.items():
|
|
485
|
+
for ns in data.get("whois", {}).get("nameservers", []):
|
|
486
|
+
if ns not in ns_to_domains:
|
|
487
|
+
ns_to_domains[ns] = []
|
|
488
|
+
ns_to_domains[ns].append(domain)
|
|
489
|
+
|
|
490
|
+
for ns, domains in ns_to_domains.items():
|
|
491
|
+
if len(domains) >= 2:
|
|
492
|
+
clusters.append({
|
|
493
|
+
"type": "shared_nameserver",
|
|
494
|
+
"nameserver": ns,
|
|
495
|
+
"domains": domains,
|
|
496
|
+
"description": (
|
|
497
|
+
f"Domains sharing nameserver {ns}: "
|
|
498
|
+
f"{', '.join(domains)}"
|
|
499
|
+
),
|
|
500
|
+
})
|
|
501
|
+
|
|
502
|
+
return clusters
|
|
503
|
+
|
|
504
|
+
def _is_valid_public_ip(self, value: str) -> bool:
|
|
505
|
+
if not value:
|
|
506
|
+
return False
|
|
507
|
+
try:
|
|
508
|
+
ip = ipaddress.ip_address(value.strip())
|
|
509
|
+
return (
|
|
510
|
+
not ip.is_private
|
|
511
|
+
and not ip.is_loopback
|
|
512
|
+
and not ip.is_multicast
|
|
513
|
+
and not ip.is_reserved
|
|
514
|
+
and ip.version == 4
|
|
515
|
+
)
|
|
516
|
+
except ValueError:
|
|
517
|
+
return False
|
|
518
|
+
|
|
519
|
+
def _is_valid_domain(self, value: str) -> bool:
|
|
520
|
+
if not value or len(value) < 4:
|
|
521
|
+
return False
|
|
522
|
+
if "." not in value:
|
|
523
|
+
return False
|
|
524
|
+
if value.endswith(".onion"):
|
|
525
|
+
return False
|
|
526
|
+
pattern = re.compile(
|
|
527
|
+
r"^[a-zA-Z0-9]([a-zA-Z0-9\-]{0,61}[a-zA-Z0-9])?(\.[a-zA-Z]{2,})+$"
|
|
528
|
+
)
|
|
529
|
+
return bool(pattern.match(value))
|
|
530
|
+
|
|
531
|
+
|
|
532
|
+
async def enrich_with_dns(entities: list[dict]) -> dict:
|
|
533
|
+
"""
|
|
534
|
+
Main entry point for DNS/WHOIS enrichment.
|
|
535
|
+
Takes extracted entities, returns enrichment results including new entities.
|
|
536
|
+
"""
|
|
537
|
+
enabled = os.getenv("DNS_ENRICHMENT_ENABLED", "true").lower() == "true"
|
|
538
|
+
|
|
539
|
+
if not enabled:
|
|
540
|
+
logger.info("DNS enrichment disabled")
|
|
541
|
+
return {
|
|
542
|
+
"ip_enrichments": {},
|
|
543
|
+
"domain_enrichments": {},
|
|
544
|
+
"new_entities": [],
|
|
545
|
+
"infrastructure_clusters": [],
|
|
546
|
+
}
|
|
547
|
+
|
|
548
|
+
if not entities:
|
|
549
|
+
return {
|
|
550
|
+
"ip_enrichments": {},
|
|
551
|
+
"domain_enrichments": {},
|
|
552
|
+
"new_entities": [],
|
|
553
|
+
"infrastructure_clusters": [],
|
|
554
|
+
}
|
|
555
|
+
|
|
556
|
+
async with DNSEnrichment() as enricher:
|
|
557
|
+
return await enricher.enrich_entities(entities)
|