voidaccess 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- analysis/__init__.py +49 -0
- analysis/opsec.py +454 -0
- analysis/patterns.py +202 -0
- analysis/temporal.py +201 -0
- api/__init__.py +1 -0
- api/auth.py +163 -0
- api/main.py +509 -0
- api/routes/__init__.py +1 -0
- api/routes/admin.py +214 -0
- api/routes/auth.py +157 -0
- api/routes/entities.py +871 -0
- api/routes/export.py +359 -0
- api/routes/investigations.py +2567 -0
- api/routes/monitors.py +405 -0
- api/routes/search.py +157 -0
- api/routes/settings.py +851 -0
- auth/__init__.py +1 -0
- auth/token_blacklist.py +108 -0
- cli/__init__.py +3 -0
- cli/adapters/__init__.py +1 -0
- cli/adapters/sqlite.py +273 -0
- cli/browser.py +376 -0
- cli/commands/__init__.py +1 -0
- cli/commands/configure.py +185 -0
- cli/commands/enrich.py +154 -0
- cli/commands/export.py +158 -0
- cli/commands/investigate.py +601 -0
- cli/commands/show.py +87 -0
- cli/config.py +180 -0
- cli/display.py +212 -0
- cli/main.py +154 -0
- cli/tor_detect.py +71 -0
- config.py +180 -0
- crawler/__init__.py +28 -0
- crawler/dedup.py +97 -0
- crawler/frontier.py +115 -0
- crawler/spider.py +462 -0
- crawler/utils.py +122 -0
- db/__init__.py +47 -0
- db/migrations/__init__.py +0 -0
- db/migrations/env.py +80 -0
- db/migrations/versions/0001_initial_schema.py +270 -0
- db/migrations/versions/0002_add_investigation_status_column.py +27 -0
- db/migrations/versions/0002_add_missing_tables.py +33 -0
- db/migrations/versions/0003_add_canonical_value_and_entity_links.py +61 -0
- db/migrations/versions/0004_add_page_posted_at.py +41 -0
- db/migrations/versions/0005_add_extraction_method.py +32 -0
- db/migrations/versions/0006_add_monitor_alerts.py +26 -0
- db/migrations/versions/0007_add_actor_style_profiles.py +23 -0
- db/migrations/versions/0008_add_users_table.py +47 -0
- db/migrations/versions/0009_add_investigation_id_to_relationships.py +29 -0
- db/migrations/versions/0010_add_composite_index_entity_relationships.py +22 -0
- db/migrations/versions/0011_add_page_extraction_cache.py +52 -0
- db/migrations/versions/0013_add_graph_status.py +31 -0
- db/migrations/versions/0015_add_progress_fields.py +41 -0
- db/migrations/versions/0016_backfill_graph_status.py +33 -0
- db/migrations/versions/0017_add_user_api_keys.py +44 -0
- db/migrations/versions/0018_add_user_id_to_investigations.py +33 -0
- db/migrations/versions/0019_add_content_safety_log.py +46 -0
- db/migrations/versions/0020_add_entity_source_tracking.py +50 -0
- db/models.py +618 -0
- db/queries.py +841 -0
- db/session.py +270 -0
- export/__init__.py +34 -0
- export/misp.py +257 -0
- export/sigma.py +342 -0
- export/stix.py +418 -0
- extractor/__init__.py +21 -0
- extractor/llm_extract.py +372 -0
- extractor/ner.py +512 -0
- extractor/normalizer.py +638 -0
- extractor/pipeline.py +401 -0
- extractor/regex_patterns.py +325 -0
- fingerprint/__init__.py +33 -0
- fingerprint/profiler.py +240 -0
- fingerprint/stylometry.py +249 -0
- graph/__init__.py +73 -0
- graph/builder.py +894 -0
- graph/export.py +225 -0
- graph/model.py +83 -0
- graph/queries.py +297 -0
- graph/visualize.py +178 -0
- i18n/__init__.py +24 -0
- i18n/detect.py +76 -0
- i18n/query_expand.py +72 -0
- i18n/translate.py +210 -0
- monitor/__init__.py +27 -0
- monitor/_db.py +74 -0
- monitor/alerts.py +345 -0
- monitor/config.py +118 -0
- monitor/diff.py +75 -0
- monitor/jobs.py +247 -0
- monitor/scheduler.py +184 -0
- scraper/__init__.py +0 -0
- scraper/scrape.py +857 -0
- scraper/scrape_js.py +272 -0
- search/__init__.py +318 -0
- search/circuit_breaker.py +240 -0
- search/search.py +334 -0
- sources/__init__.py +96 -0
- sources/blockchain.py +444 -0
- sources/cache.py +93 -0
- sources/cisa.py +108 -0
- sources/dns_enrichment.py +557 -0
- sources/domain_reputation.py +643 -0
- sources/email_reputation.py +635 -0
- sources/engines.py +244 -0
- sources/enrichment.py +1244 -0
- sources/github_scraper.py +589 -0
- sources/gitlab_scraper.py +624 -0
- sources/hash_reputation.py +856 -0
- sources/historical_intel.py +253 -0
- sources/ip_reputation.py +521 -0
- sources/paste_scraper.py +484 -0
- sources/pastes.py +278 -0
- sources/rss_scraper.py +576 -0
- sources/seed_manager.py +373 -0
- sources/seeds.py +368 -0
- sources/shodan.py +103 -0
- sources/telegram.py +199 -0
- sources/virustotal.py +113 -0
- utils/__init__.py +0 -0
- utils/async_utils.py +89 -0
- utils/content_safety.py +193 -0
- utils/defang.py +94 -0
- utils/encryption.py +34 -0
- utils/ioc_freshness.py +124 -0
- utils/user_keys.py +33 -0
- vector/__init__.py +39 -0
- vector/embedder.py +100 -0
- vector/model_singleton.py +49 -0
- vector/search.py +87 -0
- vector/store.py +514 -0
- voidaccess/__init__.py +0 -0
- voidaccess/llm.py +717 -0
- voidaccess/llm_utils.py +696 -0
- voidaccess-1.3.0.dist-info/METADATA +395 -0
- voidaccess-1.3.0.dist-info/RECORD +142 -0
- voidaccess-1.3.0.dist-info/WHEEL +5 -0
- voidaccess-1.3.0.dist-info/entry_points.txt +2 -0
- voidaccess-1.3.0.dist-info/licenses/LICENSE +21 -0
- voidaccess-1.3.0.dist-info/top_level.txt +19 -0
sources/enrichment.py
ADDED
|
@@ -0,0 +1,1244 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Threat intelligence enrichment — OTX (AlienVault) and abuse.ch (MalwareBazaar,
|
|
3
|
+
ThreatFox, URLhaus).
|
|
4
|
+
|
|
5
|
+
Returns page-shaped dicts compatible with ``extract_entities_from_pages`` (``url``,
|
|
6
|
+
``text`` / ``content``, plus ``link``, ``status``, ``source`` for traceability).
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import asyncio
|
|
12
|
+
import logging
|
|
13
|
+
import os
|
|
14
|
+
import re
|
|
15
|
+
from typing import Any, Optional
|
|
16
|
+
from urllib.parse import urlparse
|
|
17
|
+
|
|
18
|
+
import aiohttp
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
OTX_BASE_URL = "https://otx.alienvault.com/api/v1"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def is_onion_url(url: str) -> bool:
|
|
26
|
+
"""Return True if *url* points to a .onion hidden service."""
|
|
27
|
+
if not url:
|
|
28
|
+
return False
|
|
29
|
+
try:
|
|
30
|
+
from urllib.parse import urlparse
|
|
31
|
+
parsed = urlparse(url)
|
|
32
|
+
host = parsed.hostname or ""
|
|
33
|
+
return host.endswith(".onion")
|
|
34
|
+
except Exception:
|
|
35
|
+
return False
|
|
36
|
+
MALWAREBAZAAR_URL = "https://mb-api.abuse.ch/api/v1/"
|
|
37
|
+
URLHAUS_URL = "https://urlhaus-api.abuse.ch/v1/"
|
|
38
|
+
THREATFOX_URL = "https://threatfox-api.abuse.ch/api/v1/"
|
|
39
|
+
|
|
40
|
+
# All HTTP calls use at most 30s client timeout (enforced per request).
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _abusech_headers() -> dict[str, str]:
|
|
44
|
+
key = (os.environ.get("ABUSECH_API_KEY") or "").strip()
|
|
45
|
+
return {"Auth-Key": key} if key else {}
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def is_onion_url(url: str) -> bool:
|
|
49
|
+
"""
|
|
50
|
+
Return True if *url* looks like a Tor hidden service URL (.onion).
|
|
51
|
+
"""
|
|
52
|
+
if not url or not isinstance(url, str):
|
|
53
|
+
return False
|
|
54
|
+
try:
|
|
55
|
+
parsed = urlparse(url.strip())
|
|
56
|
+
host = (parsed.hostname or "").lower()
|
|
57
|
+
return host.endswith(".onion")
|
|
58
|
+
except Exception:
|
|
59
|
+
return ".onion" in url.lower()
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
async def fetch_otx_pulses(query: str, api_key: str, limit: int = 20) -> list[dict]:
|
|
63
|
+
"""
|
|
64
|
+
Search OTX for threat pulses related to the query.
|
|
65
|
+
|
|
66
|
+
Returns list of dicts with pulse metadata and optional ``indicators``.
|
|
67
|
+
"""
|
|
68
|
+
if not (api_key or "").strip():
|
|
69
|
+
logger.debug("OTX skipped — no API key configured")
|
|
70
|
+
return []
|
|
71
|
+
|
|
72
|
+
headers = {"X-OTX-API-KEY": api_key.strip()}
|
|
73
|
+
results: list[dict] = []
|
|
74
|
+
|
|
75
|
+
try:
|
|
76
|
+
timeout = aiohttp.ClientTimeout(total=30)
|
|
77
|
+
async with aiohttp.ClientSession(headers=headers, timeout=timeout) as session:
|
|
78
|
+
url = f"{OTX_BASE_URL}/search/pulses"
|
|
79
|
+
params = {"q": query, "limit": limit, "page": 1}
|
|
80
|
+
|
|
81
|
+
async with session.get(url, params=params) as resp:
|
|
82
|
+
if resp.status != 200:
|
|
83
|
+
logger.warning("OTX pulse search returned HTTP %s", resp.status)
|
|
84
|
+
return []
|
|
85
|
+
|
|
86
|
+
data = await resp.json()
|
|
87
|
+
pulses = data.get("results", [])
|
|
88
|
+
logger.info("OTX: %d results", len(pulses))
|
|
89
|
+
|
|
90
|
+
for pulse in pulses:
|
|
91
|
+
mf = pulse.get("malware_families") or []
|
|
92
|
+
if mf and isinstance(mf[0], str):
|
|
93
|
+
malware_families_fmt: list[Any] = mf
|
|
94
|
+
else:
|
|
95
|
+
malware_families_fmt = mf
|
|
96
|
+
|
|
97
|
+
result = {
|
|
98
|
+
"source": "otx_pulse",
|
|
99
|
+
"pulse_id": pulse.get("id"),
|
|
100
|
+
"title": pulse.get("name", ""),
|
|
101
|
+
"description": pulse.get("description", ""),
|
|
102
|
+
"tags": pulse.get("tags", []),
|
|
103
|
+
"created": pulse.get("created"),
|
|
104
|
+
"modified": pulse.get("modified"),
|
|
105
|
+
"tlp": pulse.get("tlp", "white"),
|
|
106
|
+
"indicator_count": pulse.get("indicator_count", 0),
|
|
107
|
+
"malware_families": malware_families_fmt,
|
|
108
|
+
"attack_ids": [
|
|
109
|
+
a.get("display_name")
|
|
110
|
+
for a in (pulse.get("attack_ids") or [])
|
|
111
|
+
if isinstance(a, dict)
|
|
112
|
+
],
|
|
113
|
+
"indicators": [],
|
|
114
|
+
}
|
|
115
|
+
results.append(result)
|
|
116
|
+
|
|
117
|
+
for pulse_result in results[:5]:
|
|
118
|
+
indicators = await fetch_otx_pulse_indicators(
|
|
119
|
+
str(pulse_result["pulse_id"]), api_key, session
|
|
120
|
+
)
|
|
121
|
+
pulse_result["indicators"] = indicators
|
|
122
|
+
|
|
123
|
+
except asyncio.TimeoutError:
|
|
124
|
+
logger.warning("OTX: Request timed out")
|
|
125
|
+
except aiohttp.ClientError as e:
|
|
126
|
+
logger.warning("OTX: Client error: %s", e)
|
|
127
|
+
except Exception as e:
|
|
128
|
+
logger.warning("OTX: Error fetching pulses: %s", e)
|
|
129
|
+
|
|
130
|
+
return results
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
async def fetch_otx_pulse_indicators(
|
|
134
|
+
pulse_id: str, api_key: str, session: aiohttp.ClientSession
|
|
135
|
+
) -> list[dict]:
|
|
136
|
+
"""Fetch IOCs for a pulse."""
|
|
137
|
+
try:
|
|
138
|
+
url = f"{OTX_BASE_URL}/pulses/{pulse_id}/indicators"
|
|
139
|
+
headers = {"X-OTX-API-KEY": api_key}
|
|
140
|
+
|
|
141
|
+
async with session.get(url, headers=headers) as resp:
|
|
142
|
+
if resp.status != 200:
|
|
143
|
+
return []
|
|
144
|
+
|
|
145
|
+
data = await resp.json()
|
|
146
|
+
indicators = data.get("results", [])
|
|
147
|
+
|
|
148
|
+
return [
|
|
149
|
+
{
|
|
150
|
+
"type": ind.get("type"),
|
|
151
|
+
"value": ind.get("indicator"),
|
|
152
|
+
"description": ind.get("description", ""),
|
|
153
|
+
"created": ind.get("created"),
|
|
154
|
+
}
|
|
155
|
+
for ind in indicators
|
|
156
|
+
if ind.get("indicator")
|
|
157
|
+
]
|
|
158
|
+
|
|
159
|
+
except Exception as e:
|
|
160
|
+
logger.debug("OTX: Error fetching indicators for pulse %s: %s", pulse_id, e)
|
|
161
|
+
return []
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def otx_pulse_to_page(pulse: dict) -> dict:
|
|
165
|
+
"""Convert an OTX pulse to page-shaped dict for the entity extractor."""
|
|
166
|
+
lines: list[str] = []
|
|
167
|
+
|
|
168
|
+
if pulse.get("title"):
|
|
169
|
+
lines.append(f"Threat Report: {pulse['title']}")
|
|
170
|
+
|
|
171
|
+
if pulse.get("description"):
|
|
172
|
+
lines.append(f"\nDescription: {pulse['description']}")
|
|
173
|
+
|
|
174
|
+
if pulse.get("tags"):
|
|
175
|
+
lines.append(f"\nTags: {', '.join(pulse['tags'])}")
|
|
176
|
+
|
|
177
|
+
mf = pulse.get("malware_families") or []
|
|
178
|
+
if mf:
|
|
179
|
+
families: list[str] = []
|
|
180
|
+
for m in mf:
|
|
181
|
+
if isinstance(m, dict):
|
|
182
|
+
families.append(m.get("display_name") or m.get("name") or "")
|
|
183
|
+
elif isinstance(m, str):
|
|
184
|
+
families.append(m)
|
|
185
|
+
families = [f for f in families if f]
|
|
186
|
+
if families:
|
|
187
|
+
lines.append(f"\nMalware Families: {', '.join(families)}")
|
|
188
|
+
|
|
189
|
+
if pulse.get("attack_ids"):
|
|
190
|
+
lines.append(f"\nMITRE ATT&CK: {', '.join(pulse['attack_ids'])}")
|
|
191
|
+
|
|
192
|
+
indicators = pulse.get("indicators", [])
|
|
193
|
+
if indicators:
|
|
194
|
+
lines.append("\nIndicators of Compromise:")
|
|
195
|
+
for ind in indicators:
|
|
196
|
+
ind_type = ind.get("type", "")
|
|
197
|
+
ind_value = ind.get("value", "")
|
|
198
|
+
ind_desc = ind.get("description", "")
|
|
199
|
+
if ind_value:
|
|
200
|
+
extra = f" ({ind_desc})" if ind_desc else ""
|
|
201
|
+
lines.append(f" {ind_type}: {ind_value}{extra}")
|
|
202
|
+
|
|
203
|
+
content = "\n".join(lines)
|
|
204
|
+
pid = pulse.get("pulse_id") or ""
|
|
205
|
+
link = f"https://otx.alienvault.com/pulse/{pid}"
|
|
206
|
+
|
|
207
|
+
return {
|
|
208
|
+
"link": link,
|
|
209
|
+
"url": link,
|
|
210
|
+
"content": content,
|
|
211
|
+
"text": content,
|
|
212
|
+
"status": 200,
|
|
213
|
+
"source": "alienvault_otx",
|
|
214
|
+
"title": pulse.get("title", "OTX Threat Report"),
|
|
215
|
+
"via": "otx_api",
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
async def fetch_malwarebazaar(query: str, limit: int = 20) -> list[dict]:
|
|
220
|
+
"""Query MalwareBazaar by tag then by signature."""
|
|
221
|
+
results: list[dict] = []
|
|
222
|
+
q = (query or "").strip()
|
|
223
|
+
if not q:
|
|
224
|
+
# Fetch most recent samples (last 100)
|
|
225
|
+
try:
|
|
226
|
+
headers = _abusech_headers()
|
|
227
|
+
timeout = aiohttp.ClientTimeout(total=30)
|
|
228
|
+
async with aiohttp.ClientSession(headers=headers, timeout=timeout) as session:
|
|
229
|
+
payload = {"query": "get_recent", "selector": "time"}
|
|
230
|
+
async with session.post(MALWAREBAZAAR_URL, data=payload) as resp:
|
|
231
|
+
if resp.status == 200:
|
|
232
|
+
data = await resp.json()
|
|
233
|
+
if data.get("query_status") == "ok":
|
|
234
|
+
samples = data.get("data") or []
|
|
235
|
+
for sample in samples:
|
|
236
|
+
results.append({
|
|
237
|
+
"source": "malwarebazaar",
|
|
238
|
+
"sha256": sample.get("sha256_hash"),
|
|
239
|
+
"signature": sample.get("signature"),
|
|
240
|
+
"malware_family": sample.get("signature", ""),
|
|
241
|
+
"tags": sample.get("tags", []),
|
|
242
|
+
"first_seen": sample.get("first_seen"),
|
|
243
|
+
})
|
|
244
|
+
return results
|
|
245
|
+
except Exception as e:
|
|
246
|
+
logger.warning("MalwareBazaar recent fetch failed: %s", e)
|
|
247
|
+
return []
|
|
248
|
+
return []
|
|
249
|
+
|
|
250
|
+
headers = _abusech_headers()
|
|
251
|
+
timeout = aiohttp.ClientTimeout(total=30)
|
|
252
|
+
|
|
253
|
+
def _map_sample(sample: dict) -> dict:
|
|
254
|
+
return {
|
|
255
|
+
"source": "malwarebazaar",
|
|
256
|
+
"sha256": sample.get("sha256_hash"),
|
|
257
|
+
"md5": sample.get("md5_hash"),
|
|
258
|
+
"file_name": sample.get("file_name"),
|
|
259
|
+
"file_type": sample.get("file_type"),
|
|
260
|
+
"signature": sample.get("signature"),
|
|
261
|
+
"tags": sample.get("tags", []),
|
|
262
|
+
"malware_family": sample.get("signature", ""),
|
|
263
|
+
"first_seen": sample.get("first_seen"),
|
|
264
|
+
"last_seen": sample.get("last_seen"),
|
|
265
|
+
"reporter": sample.get("reporter", ""),
|
|
266
|
+
"comment": sample.get("comment", ""),
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
try:
|
|
270
|
+
async with aiohttp.ClientSession(headers=headers, timeout=timeout) as session:
|
|
271
|
+
tag_payload = {"query": "get_taginfo", "tag": q, "limit": limit}
|
|
272
|
+
async with session.post(MALWAREBAZAAR_URL, data=tag_payload) as resp:
|
|
273
|
+
if resp.status != 200:
|
|
274
|
+
logger.warning("MalwareBazaar: HTTP %s (tag)", resp.status)
|
|
275
|
+
else:
|
|
276
|
+
data = await resp.json()
|
|
277
|
+
if data.get("query_status") == "no_api_key":
|
|
278
|
+
logger.warning(
|
|
279
|
+
"MalwareBazaar: no_api_key — set ABUSECH_API_KEY for abuse.ch APIs"
|
|
280
|
+
)
|
|
281
|
+
return []
|
|
282
|
+
if data.get("query_status") == "ok":
|
|
283
|
+
samples = data.get("data") or []
|
|
284
|
+
logger.info("MalwareBazaar: %d samples (tag)", len(samples))
|
|
285
|
+
for sample in samples:
|
|
286
|
+
results.append(_map_sample(sample))
|
|
287
|
+
if results:
|
|
288
|
+
return results
|
|
289
|
+
|
|
290
|
+
sig_payload = {"query": "get_siginfo", "signature": q, "limit": limit}
|
|
291
|
+
async with session.post(MALWAREBAZAAR_URL, data=sig_payload) as resp:
|
|
292
|
+
if resp.status != 200:
|
|
293
|
+
logger.warning("MalwareBazaar: HTTP %s (signature)", resp.status)
|
|
294
|
+
return []
|
|
295
|
+
data = await resp.json()
|
|
296
|
+
if data.get("query_status") != "ok":
|
|
297
|
+
return []
|
|
298
|
+
samples = data.get("data") or []
|
|
299
|
+
logger.info("MalwareBazaar: %d samples (signature)", len(samples))
|
|
300
|
+
for sample in samples:
|
|
301
|
+
results.append(_map_sample(sample))
|
|
302
|
+
|
|
303
|
+
except asyncio.TimeoutError:
|
|
304
|
+
logger.warning("MalwareBazaar: Request timed out")
|
|
305
|
+
except aiohttp.ClientError as e:
|
|
306
|
+
logger.warning("MalwareBazaar: Client error: %s", e)
|
|
307
|
+
except Exception as e:
|
|
308
|
+
logger.warning("MalwareBazaar: Error: %s", e)
|
|
309
|
+
|
|
310
|
+
return results
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
async def fetch_threatfox(query: str, limit: int = 50) -> list[dict]:
|
|
314
|
+
"""Search ThreatFox IOCs by search term."""
|
|
315
|
+
results: list[dict] = []
|
|
316
|
+
q = (query or "").strip()
|
|
317
|
+
if not q:
|
|
318
|
+
# Fetch most recent IOCs (last 24 hours)
|
|
319
|
+
payload = {"query": "get_iocs", "days": 1}
|
|
320
|
+
try:
|
|
321
|
+
headers = _abusech_headers()
|
|
322
|
+
timeout = aiohttp.ClientTimeout(total=30)
|
|
323
|
+
async with aiohttp.ClientSession(headers=headers, timeout=timeout) as session:
|
|
324
|
+
async with session.post(THREATFOX_URL, json=payload) as resp:
|
|
325
|
+
if resp.status == 200:
|
|
326
|
+
data = await resp.json()
|
|
327
|
+
if data.get("query_status") == "ok":
|
|
328
|
+
iocs = data.get("data") or []
|
|
329
|
+
for ioc in iocs[:limit]:
|
|
330
|
+
conf = ioc.get("confidence_level")
|
|
331
|
+
conf_f = float(conf) / 100.0 if conf is not None else 0.0
|
|
332
|
+
results.append({
|
|
333
|
+
"source": "threatfox",
|
|
334
|
+
"ioc_type": ioc.get("ioc_type"),
|
|
335
|
+
"ioc_value": ioc.get("ioc"),
|
|
336
|
+
"malware": ioc.get("malware_printable"),
|
|
337
|
+
"confidence": conf_f,
|
|
338
|
+
"tags": ioc.get("tags", []),
|
|
339
|
+
})
|
|
340
|
+
return results
|
|
341
|
+
except Exception as e:
|
|
342
|
+
logger.warning("ThreatFox recent fetch failed: %s", e)
|
|
343
|
+
return []
|
|
344
|
+
return []
|
|
345
|
+
|
|
346
|
+
headers = _abusech_headers()
|
|
347
|
+
timeout = aiohttp.ClientTimeout(total=30)
|
|
348
|
+
payload = {"query": "search_ioc", "search_term": q}
|
|
349
|
+
|
|
350
|
+
try:
|
|
351
|
+
async with aiohttp.ClientSession(headers=headers, timeout=timeout) as session:
|
|
352
|
+
async with session.post(THREATFOX_URL, json=payload) as resp:
|
|
353
|
+
if resp.status != 200:
|
|
354
|
+
logger.warning("ThreatFox: HTTP %s", resp.status)
|
|
355
|
+
return []
|
|
356
|
+
|
|
357
|
+
data = await resp.json()
|
|
358
|
+
if data.get("query_status") == "no_api_key":
|
|
359
|
+
logger.warning(
|
|
360
|
+
"ThreatFox: no_api_key — set ABUSECH_API_KEY for abuse.ch APIs"
|
|
361
|
+
)
|
|
362
|
+
return []
|
|
363
|
+
if data.get("query_status") != "ok":
|
|
364
|
+
return []
|
|
365
|
+
|
|
366
|
+
iocs = data.get("data") or []
|
|
367
|
+
logger.info("ThreatFox: %d results", len(iocs))
|
|
368
|
+
|
|
369
|
+
for ioc in iocs[:limit]:
|
|
370
|
+
conf = ioc.get("confidence_level")
|
|
371
|
+
conf_f = float(conf) / 100.0 if conf is not None else 0.0
|
|
372
|
+
results.append(
|
|
373
|
+
{
|
|
374
|
+
"source": "threatfox",
|
|
375
|
+
"ioc_type": ioc.get("ioc_type"),
|
|
376
|
+
"ioc_value": ioc.get("ioc"),
|
|
377
|
+
"malware": ioc.get("malware"),
|
|
378
|
+
"malware_printable": ioc.get("malware_printable"),
|
|
379
|
+
"confidence": conf_f,
|
|
380
|
+
"first_seen": ioc.get("first_seen"),
|
|
381
|
+
"last_seen": ioc.get("last_seen"),
|
|
382
|
+
"tags": ioc.get("tags", []),
|
|
383
|
+
"comment": ioc.get("comment", ""),
|
|
384
|
+
"reporter": ioc.get("reporter", ""),
|
|
385
|
+
}
|
|
386
|
+
)
|
|
387
|
+
|
|
388
|
+
except asyncio.TimeoutError:
|
|
389
|
+
logger.warning("ThreatFox: Request timed out")
|
|
390
|
+
except aiohttp.ClientError as e:
|
|
391
|
+
logger.warning("ThreatFox: Client error: %s", e)
|
|
392
|
+
except Exception as e:
|
|
393
|
+
logger.warning("ThreatFox: Error: %s", e)
|
|
394
|
+
|
|
395
|
+
return results
|
|
396
|
+
|
|
397
|
+
|
|
398
|
+
async def fetch_urlhaus(query: str, limit: int = 20) -> list[dict]:
|
|
399
|
+
"""Search URLhaus by tag."""
|
|
400
|
+
results: list[dict] = []
|
|
401
|
+
q = (query or "").strip()
|
|
402
|
+
if not q:
|
|
403
|
+
return []
|
|
404
|
+
|
|
405
|
+
headers = _abusech_headers()
|
|
406
|
+
timeout = aiohttp.ClientTimeout(total=30)
|
|
407
|
+
payload = {"tag": q}
|
|
408
|
+
|
|
409
|
+
try:
|
|
410
|
+
async with aiohttp.ClientSession(headers=headers, timeout=timeout) as session:
|
|
411
|
+
async with session.post(f"{URLHAUS_URL}tag/", data=payload) as resp:
|
|
412
|
+
if resp.status != 200:
|
|
413
|
+
logger.warning("URLhaus: HTTP %s", resp.status)
|
|
414
|
+
return []
|
|
415
|
+
|
|
416
|
+
data = await resp.json()
|
|
417
|
+
if data.get("query_status") == "no_api_key":
|
|
418
|
+
logger.warning(
|
|
419
|
+
"URLhaus: no_api_key — set ABUSECH_API_KEY for abuse.ch APIs"
|
|
420
|
+
)
|
|
421
|
+
return []
|
|
422
|
+
if data.get("query_status") != "ok":
|
|
423
|
+
return []
|
|
424
|
+
|
|
425
|
+
urls = (data.get("urls") or [])[:limit]
|
|
426
|
+
logger.info("URLhaus: %d results", len(urls))
|
|
427
|
+
|
|
428
|
+
for url_entry in urls:
|
|
429
|
+
results.append(
|
|
430
|
+
{
|
|
431
|
+
"source": "urlhaus",
|
|
432
|
+
"url": url_entry.get("url"),
|
|
433
|
+
"url_status": url_entry.get("url_status"),
|
|
434
|
+
"tags": url_entry.get("tags", []),
|
|
435
|
+
"threat": url_entry.get("threat"),
|
|
436
|
+
"date_added": url_entry.get("date_added"),
|
|
437
|
+
"reporter": url_entry.get("reporter", ""),
|
|
438
|
+
}
|
|
439
|
+
)
|
|
440
|
+
|
|
441
|
+
except asyncio.TimeoutError:
|
|
442
|
+
logger.warning("URLhaus: Request timed out")
|
|
443
|
+
except aiohttp.ClientError as e:
|
|
444
|
+
logger.warning("URLhaus: Client error: %s", e)
|
|
445
|
+
except Exception as e:
|
|
446
|
+
logger.warning("URLhaus: Error: %s", e)
|
|
447
|
+
|
|
448
|
+
return results
|
|
449
|
+
|
|
450
|
+
|
|
451
|
+
def abusech_to_pages(
|
|
452
|
+
malwarebazaar_results: list[dict],
|
|
453
|
+
threatfox_results: list[dict],
|
|
454
|
+
urlhaus_results: list[dict],
|
|
455
|
+
) -> list[dict]:
|
|
456
|
+
"""Group Abuse.ch results into page-shaped dicts."""
|
|
457
|
+
pages: list[dict] = []
|
|
458
|
+
|
|
459
|
+
if malwarebazaar_results:
|
|
460
|
+
lines = ["MalwareBazaar Threat Intelligence Report\n"]
|
|
461
|
+
for sample in malwarebazaar_results[:20]:
|
|
462
|
+
lines.append(f"Malware Family: {sample.get('malware_family', 'Unknown')}")
|
|
463
|
+
if sample.get("sha256"):
|
|
464
|
+
lines.append(f"SHA256: {sample['sha256']}")
|
|
465
|
+
if sample.get("tags"):
|
|
466
|
+
lines.append(f"Tags: {', '.join(sample['tags'])}")
|
|
467
|
+
if sample.get("reporter"):
|
|
468
|
+
lines.append(f"Reporter: {sample['reporter']}")
|
|
469
|
+
if sample.get("first_seen"):
|
|
470
|
+
lines.append(f"First seen: {sample['first_seen']}")
|
|
471
|
+
lines.append("")
|
|
472
|
+
|
|
473
|
+
content = "\n".join(lines)
|
|
474
|
+
link = "https://bazaar.abuse.ch/browse/"
|
|
475
|
+
pages.append(
|
|
476
|
+
{
|
|
477
|
+
"link": link,
|
|
478
|
+
"url": link,
|
|
479
|
+
"content": content,
|
|
480
|
+
"text": content,
|
|
481
|
+
"status": 200,
|
|
482
|
+
"source": "malwarebazaar",
|
|
483
|
+
"via": "abusech_api",
|
|
484
|
+
}
|
|
485
|
+
)
|
|
486
|
+
|
|
487
|
+
if threatfox_results:
|
|
488
|
+
lines = ["ThreatFox IOC Intelligence Report\n"]
|
|
489
|
+
for ioc in threatfox_results[:30]:
|
|
490
|
+
lines.append(f"IOC Type: {ioc.get('ioc_type', 'Unknown')}")
|
|
491
|
+
lines.append(f"IOC Value: {ioc.get('ioc_value', '')}")
|
|
492
|
+
if ioc.get("malware_printable"):
|
|
493
|
+
lines.append(f"Malware: {ioc['malware_printable']}")
|
|
494
|
+
if ioc.get("confidence"):
|
|
495
|
+
lines.append(f"Confidence: {ioc['confidence']:.0%}")
|
|
496
|
+
if ioc.get("tags"):
|
|
497
|
+
lines.append(f"Tags: {', '.join(ioc['tags'])}")
|
|
498
|
+
lines.append("")
|
|
499
|
+
|
|
500
|
+
content = "\n".join(lines)
|
|
501
|
+
link = "https://threatfox.abuse.ch/"
|
|
502
|
+
pages.append(
|
|
503
|
+
{
|
|
504
|
+
"link": link,
|
|
505
|
+
"url": link,
|
|
506
|
+
"content": content,
|
|
507
|
+
"text": content,
|
|
508
|
+
"status": 200,
|
|
509
|
+
"source": "threatfox",
|
|
510
|
+
"via": "abusech_api",
|
|
511
|
+
}
|
|
512
|
+
)
|
|
513
|
+
|
|
514
|
+
if urlhaus_results:
|
|
515
|
+
lines = ["URLhaus Malicious URL Intelligence Report\n"]
|
|
516
|
+
for url_entry in urlhaus_results[:20]:
|
|
517
|
+
lines.append(f"URL: {url_entry.get('url', '')}")
|
|
518
|
+
lines.append(f"Threat: {url_entry.get('threat', 'Unknown')}")
|
|
519
|
+
if url_entry.get("tags"):
|
|
520
|
+
lines.append(f"Tags: {', '.join(url_entry['tags'])}")
|
|
521
|
+
lines.append("")
|
|
522
|
+
|
|
523
|
+
content = "\n".join(lines)
|
|
524
|
+
link = "https://urlhaus.abuse.ch/"
|
|
525
|
+
pages.append(
|
|
526
|
+
{
|
|
527
|
+
"link": link,
|
|
528
|
+
"url": link,
|
|
529
|
+
"content": content,
|
|
530
|
+
"text": content,
|
|
531
|
+
"status": 200,
|
|
532
|
+
"source": "urlhaus",
|
|
533
|
+
"via": "abusech_api",
|
|
534
|
+
}
|
|
535
|
+
)
|
|
536
|
+
|
|
537
|
+
return pages
|
|
538
|
+
|
|
539
|
+
|
|
540
|
+
_RANSOMWARE_LIVE_BASE = "https://api.ransomware.live/v2"
|
|
541
|
+
_RANSOMWARE_LIVE_HEADERS = {"User-Agent": "VoidAccess-OSINT/1.0", "Accept": "application/json"}
|
|
542
|
+
|
|
543
|
+
|
|
544
|
+
def _rl_extract_onion_urls(group: dict) -> list[str]:
|
|
545
|
+
"""Extract .onion leak-site URLs from a group dict (available sites first)."""
|
|
546
|
+
locations = group.get("locations") or []
|
|
547
|
+
if not isinstance(locations, list):
|
|
548
|
+
return []
|
|
549
|
+
# available=True sites first, then the rest
|
|
550
|
+
locations = sorted(locations, key=lambda l: not l.get("available", False))
|
|
551
|
+
urls: list[str] = []
|
|
552
|
+
for loc in locations:
|
|
553
|
+
fqdn = (loc.get("fqdn") or "").strip()
|
|
554
|
+
if fqdn and ".onion" in fqdn:
|
|
555
|
+
urls.append(fqdn if fqdn.startswith("http") else f"http://{fqdn}")
|
|
556
|
+
return urls
|
|
557
|
+
|
|
558
|
+
|
|
559
|
+
async def fetch_ransomware_live(query: str) -> list[dict]:
|
|
560
|
+
"""
|
|
561
|
+
Search ransomware.live for threat group profiles, leak-site .onion addresses,
|
|
562
|
+
and recent victim claim URLs.
|
|
563
|
+
|
|
564
|
+
Produces three kinds of intelligence:
|
|
565
|
+
1. Group profile + TTPs (text for entity extraction)
|
|
566
|
+
2. Leak-site .onion addresses (scrape seeds — bypass search engine discovery)
|
|
567
|
+
3. Individual victim claim URLs (specific .onion post pages to scrape)
|
|
568
|
+
|
|
569
|
+
Free public API — no key required.
|
|
570
|
+
"""
|
|
571
|
+
q = (query or "").strip().lower()
|
|
572
|
+
if not q:
|
|
573
|
+
return []
|
|
574
|
+
|
|
575
|
+
results: list[dict] = []
|
|
576
|
+
timeout = aiohttp.ClientTimeout(total=25)
|
|
577
|
+
|
|
578
|
+
try:
|
|
579
|
+
async with aiohttp.ClientSession(headers=_RANSOMWARE_LIVE_HEADERS, timeout=timeout) as session:
|
|
580
|
+
# ── 1. Match groups from the full group index ──────────────────────
|
|
581
|
+
async with session.get(f"{_RANSOMWARE_LIVE_BASE}/groups") as resp:
|
|
582
|
+
if resp.status != 200:
|
|
583
|
+
logger.warning("ransomware.live /groups HTTP %s", resp.status)
|
|
584
|
+
return []
|
|
585
|
+
all_groups = await resp.json(content_type=None)
|
|
586
|
+
|
|
587
|
+
matched_summary: list[dict] = []
|
|
588
|
+
for g in (all_groups if isinstance(all_groups, list) else []):
|
|
589
|
+
name = (g.get("name") or "").lower()
|
|
590
|
+
if q in name:
|
|
591
|
+
matched_summary.append(g)
|
|
592
|
+
|
|
593
|
+
if not matched_summary:
|
|
594
|
+
logger.info("ransomware.live: no groups matched %r", query)
|
|
595
|
+
return []
|
|
596
|
+
|
|
597
|
+
logger.info("ransomware.live: %d groups matched %r", len(matched_summary), query)
|
|
598
|
+
|
|
599
|
+
# ── 2. Fetch full group detail for each match (has ttps, tools, locations) ──
|
|
600
|
+
async def _fetch_group_detail(gname: str) -> Optional[dict]:
|
|
601
|
+
try:
|
|
602
|
+
async with session.get(f"{_RANSOMWARE_LIVE_BASE}/group/{gname}") as r:
|
|
603
|
+
if r.status == 200:
|
|
604
|
+
text = await r.text()
|
|
605
|
+
if text.strip()[:1] in "[{":
|
|
606
|
+
return await r.json(content_type=None) if False else \
|
|
607
|
+
__import__("json").loads(text)
|
|
608
|
+
except Exception:
|
|
609
|
+
pass
|
|
610
|
+
return None
|
|
611
|
+
|
|
612
|
+
detail_tasks = [_fetch_group_detail(g.get("name", "")) for g in matched_summary[:5]]
|
|
613
|
+
details = await asyncio.gather(*detail_tasks, return_exceptions=True)
|
|
614
|
+
|
|
615
|
+
group_map: dict[str, dict] = {}
|
|
616
|
+
for g, detail in zip(matched_summary[:5], details):
|
|
617
|
+
gname = g.get("name", "")
|
|
618
|
+
if isinstance(detail, dict):
|
|
619
|
+
group_map[gname] = {**g, **detail}
|
|
620
|
+
else:
|
|
621
|
+
group_map[gname] = g
|
|
622
|
+
|
|
623
|
+
# ── 3. Pull recent victims and filter by matched groups ────────────
|
|
624
|
+
recent_victims: list[dict] = []
|
|
625
|
+
matched_names = {g.get("name", "").lower() for g in matched_summary}
|
|
626
|
+
for endpoint in ("/v2/recentvictims", "/v2/recentcyberattacks"):
|
|
627
|
+
try:
|
|
628
|
+
async with session.get(f"https://api.ransomware.live{endpoint}") as r:
|
|
629
|
+
if r.status == 200:
|
|
630
|
+
text = await r.text()
|
|
631
|
+
if text.strip()[:1] == "[":
|
|
632
|
+
raw: list = __import__("json").loads(text)
|
|
633
|
+
for v in raw:
|
|
634
|
+
if (v.get("group") or "").lower() in matched_names:
|
|
635
|
+
recent_victims.append(v)
|
|
636
|
+
except Exception:
|
|
637
|
+
pass
|
|
638
|
+
|
|
639
|
+
logger.info("ransomware.live: %d recent victims for matched groups", len(recent_victims))
|
|
640
|
+
|
|
641
|
+
# ── 4. Assemble results ───────────────────────────────────────────
|
|
642
|
+
for gname, gdata in group_map.items():
|
|
643
|
+
onion_urls = _rl_extract_onion_urls(gdata)
|
|
644
|
+
|
|
645
|
+
# Collect victims for this specific group
|
|
646
|
+
group_victims = [
|
|
647
|
+
v for v in recent_victims
|
|
648
|
+
if (v.get("group") or "").lower() == gname.lower()
|
|
649
|
+
]
|
|
650
|
+
|
|
651
|
+
# Claim URLs are individual victim post pages on the leak site
|
|
652
|
+
claim_urls = [
|
|
653
|
+
v.get("claim_url") for v in group_victims
|
|
654
|
+
if v.get("claim_url") and ".onion" in (v.get("claim_url") or "")
|
|
655
|
+
]
|
|
656
|
+
|
|
657
|
+
results.append({
|
|
658
|
+
"group": gname,
|
|
659
|
+
"description": gdata.get("description") or "",
|
|
660
|
+
"onion_urls": onion_urls,
|
|
661
|
+
"claim_urls": claim_urls[:30],
|
|
662
|
+
"victims": group_victims[:50],
|
|
663
|
+
"ttps": gdata.get("ttps") or [],
|
|
664
|
+
"tools": gdata.get("tools") or [],
|
|
665
|
+
"victim_count": gdata.get("_victim_count", 0),
|
|
666
|
+
})
|
|
667
|
+
|
|
668
|
+
except asyncio.TimeoutError:
|
|
669
|
+
logger.warning("ransomware.live: request timed out")
|
|
670
|
+
except aiohttp.ClientError as exc:
|
|
671
|
+
logger.warning("ransomware.live: client error: %s", exc)
|
|
672
|
+
except Exception as exc:
|
|
673
|
+
logger.warning("ransomware.live: unexpected error: %s", exc)
|
|
674
|
+
|
|
675
|
+
return results
|
|
676
|
+
|
|
677
|
+
|
|
678
|
+
def ransomwarelive_to_pages(groups: list[dict]) -> list[dict]:
|
|
679
|
+
"""Convert ransomware.live group data into page-shaped dicts.
|
|
680
|
+
|
|
681
|
+
Produces two kinds of pages:
|
|
682
|
+
1. A rich text summary page (for entity extraction)
|
|
683
|
+
2. One stub page per discovered .onion URL (so the scraper will visit them)
|
|
684
|
+
"""
|
|
685
|
+
pages: list[dict] = []
|
|
686
|
+
|
|
687
|
+
for gd in groups:
|
|
688
|
+
gname = gd.get("group", "Unknown")
|
|
689
|
+
lines: list[str] = [f"Ransomware Group Intelligence Report: {gname}"]
|
|
690
|
+
|
|
691
|
+
if gd.get("description"):
|
|
692
|
+
lines.append(f"\nDescription: {gd['description']}")
|
|
693
|
+
|
|
694
|
+
onion_urls = gd.get("onion_urls", [])
|
|
695
|
+
if onion_urls:
|
|
696
|
+
lines.append(f"\nLeak Site URLs: {', '.join(onion_urls)}")
|
|
697
|
+
|
|
698
|
+
victims = gd.get("victims", [])
|
|
699
|
+
if victims:
|
|
700
|
+
lines.append(f"\nKnown Victims ({len(victims)} total):")
|
|
701
|
+
for v in victims[:40]:
|
|
702
|
+
title = v.get("victim") or v.get("post_title") or v.get("website") or ""
|
|
703
|
+
domain = v.get("domain") or v.get("website") or ""
|
|
704
|
+
date = v.get("attackdate") or v.get("published") or v.get("date") or ""
|
|
705
|
+
country = v.get("country") or ""
|
|
706
|
+
activity = v.get("activity") or ""
|
|
707
|
+
victim_line = f" - {title}"
|
|
708
|
+
if domain and domain != title:
|
|
709
|
+
victim_line += f" ({domain})"
|
|
710
|
+
if country:
|
|
711
|
+
victim_line += f" [{country}]"
|
|
712
|
+
if date:
|
|
713
|
+
victim_line += f" {date}"
|
|
714
|
+
if activity:
|
|
715
|
+
victim_line += f" — {activity}"
|
|
716
|
+
lines.append(victim_line)
|
|
717
|
+
|
|
718
|
+
claim_urls = gd.get("claim_urls", [])
|
|
719
|
+
|
|
720
|
+
content = "\n".join(lines)
|
|
721
|
+
base_link = f"https://www.ransomware.live/group/{gname}"
|
|
722
|
+
|
|
723
|
+
pages.append({
|
|
724
|
+
"link": base_link,
|
|
725
|
+
"url": base_link,
|
|
726
|
+
"content": content,
|
|
727
|
+
"text": content,
|
|
728
|
+
"status": 200,
|
|
729
|
+
"source": "ransomware_live",
|
|
730
|
+
"title": f"ransomware.live — {gname}",
|
|
731
|
+
"via": "ransomware_live_api",
|
|
732
|
+
})
|
|
733
|
+
|
|
734
|
+
# Stub pages for each .onion leak site so the scraper will visit them
|
|
735
|
+
for onion_url in onion_urls:
|
|
736
|
+
if onion_url and ".onion" in onion_url:
|
|
737
|
+
stub = f"{gname} ransomware group leak site: {onion_url}"
|
|
738
|
+
pages.append({
|
|
739
|
+
"link": onion_url,
|
|
740
|
+
"url": onion_url,
|
|
741
|
+
"content": stub,
|
|
742
|
+
"text": stub,
|
|
743
|
+
"status": 200,
|
|
744
|
+
"source": "ransomware_live",
|
|
745
|
+
"title": f"{gname} leak site",
|
|
746
|
+
"via": "ransomware_live_onion_seed",
|
|
747
|
+
"_scrape_seed": True,
|
|
748
|
+
})
|
|
749
|
+
|
|
750
|
+
# Stub pages for individual victim claim URLs (specific post pages on leak sites)
|
|
751
|
+
for claim_url in claim_urls[:20]:
|
|
752
|
+
if claim_url and ".onion" in claim_url:
|
|
753
|
+
stub = f"{gname} ransomware victim post: {claim_url}"
|
|
754
|
+
pages.append({
|
|
755
|
+
"link": claim_url,
|
|
756
|
+
"url": claim_url,
|
|
757
|
+
"content": stub,
|
|
758
|
+
"text": stub,
|
|
759
|
+
"status": 200,
|
|
760
|
+
"source": "ransomware_live",
|
|
761
|
+
"title": f"{gname} victim claim",
|
|
762
|
+
"via": "ransomware_live_claim_seed",
|
|
763
|
+
"_scrape_seed": True,
|
|
764
|
+
})
|
|
765
|
+
|
|
766
|
+
return pages
|
|
767
|
+
|
|
768
|
+
|
|
769
|
+
async def _enrich_new_sources(query: str, entities: list[dict]) -> list[dict]:
|
|
770
|
+
"""
|
|
771
|
+
Run the 4 new enrichment sources concurrently and return page-shaped dicts.
|
|
772
|
+
|
|
773
|
+
Sources:
|
|
774
|
+
- CISA KEV + advisories (cisa.py)
|
|
775
|
+
- Shodan InternetDB (shodan.py)
|
|
776
|
+
- VirusTotal (virustotal.py)
|
|
777
|
+
- Historical intel (historical_intel.py)
|
|
778
|
+
"""
|
|
779
|
+
from sources.cisa import enrich_cisa
|
|
780
|
+
from sources.shodan import enrich_shodan
|
|
781
|
+
from sources.virustotal import enrich_virustotal
|
|
782
|
+
from sources.historical_intel import enrich_historical
|
|
783
|
+
|
|
784
|
+
async def _gather():
|
|
785
|
+
return await asyncio.gather(
|
|
786
|
+
enrich_cisa(query, entities),
|
|
787
|
+
enrich_shodan(entities),
|
|
788
|
+
enrich_virustotal(entities),
|
|
789
|
+
return_exceptions=True,
|
|
790
|
+
)
|
|
791
|
+
|
|
792
|
+
cisa_results, shodan_results, vt_results = [], [], []
|
|
793
|
+
try:
|
|
794
|
+
packed = await asyncio.wait_for(_gather(), timeout=55.0)
|
|
795
|
+
except asyncio.TimeoutError:
|
|
796
|
+
logger.warning("_enrich_new_sources: deadline exceeded")
|
|
797
|
+
return []
|
|
798
|
+
|
|
799
|
+
cisa_results, shodan_results, vt_results = packed
|
|
800
|
+
|
|
801
|
+
if isinstance(cisa_results, Exception):
|
|
802
|
+
logger.warning("CISA enrichment failed: %s", cisa_results)
|
|
803
|
+
cisa_results = []
|
|
804
|
+
if isinstance(shodan_results, Exception):
|
|
805
|
+
logger.warning("Shodan enrichment failed: %s", shodan_results)
|
|
806
|
+
shodan_results = []
|
|
807
|
+
if isinstance(vt_results, Exception):
|
|
808
|
+
logger.warning("VirusTotal enrichment failed: %s", vt_results)
|
|
809
|
+
vt_results = []
|
|
810
|
+
|
|
811
|
+
pages: list[dict] = []
|
|
812
|
+
|
|
813
|
+
if cisa_results:
|
|
814
|
+
pages.extend(_cisa_results_to_pages(cisa_results, query))
|
|
815
|
+
if shodan_results:
|
|
816
|
+
pages.extend(_shodan_results_to_pages(shodan_results))
|
|
817
|
+
if vt_results:
|
|
818
|
+
pages.extend(_vt_results_to_pages(vt_results))
|
|
819
|
+
|
|
820
|
+
if cisa_results or shodan_results or vt_results:
|
|
821
|
+
unenriched = _group_unenriched_entities(entities, cisa_results, shodan_results, vt_results)
|
|
822
|
+
if unenriched:
|
|
823
|
+
hist_pages = await enrich_historical(unenriched)
|
|
824
|
+
pages.extend(_historical_results_to_pages(hist_pages))
|
|
825
|
+
|
|
826
|
+
# Entity-based MITRE overlay: fires when the caller passes pre-extracted entities
|
|
827
|
+
# that contain actors but zero CVE/MITRE_TECHNIQUE results.
|
|
828
|
+
_actor_types = {"THREAT_ACTOR", "RANSOMWARE_GROUP", "MALWARE_FAMILY"}
|
|
829
|
+
_cve_mitre_types = {"CVE", "MITRE_TECHNIQUE"}
|
|
830
|
+
_actor_ents = [
|
|
831
|
+
e for e in entities
|
|
832
|
+
if (e.get("type") or e.get("entity_type", "")) in _actor_types
|
|
833
|
+
]
|
|
834
|
+
_has_cve_or_mitre = any(
|
|
835
|
+
(e.get("type") or e.get("entity_type", "")) in _cve_mitre_types
|
|
836
|
+
for e in entities
|
|
837
|
+
)
|
|
838
|
+
if _actor_ents and not _has_cve_or_mitre:
|
|
839
|
+
from sources.historical_intel import get_techniques_for_actor
|
|
840
|
+
for _actor_ent in _actor_ents:
|
|
841
|
+
_actor_name = (
|
|
842
|
+
_actor_ent.get("value")
|
|
843
|
+
or _actor_ent.get("canonical_value")
|
|
844
|
+
or _actor_ent.get("entity_value", "")
|
|
845
|
+
)
|
|
846
|
+
if not _actor_name:
|
|
847
|
+
continue
|
|
848
|
+
try:
|
|
849
|
+
_techniques = await get_techniques_for_actor(_actor_name)
|
|
850
|
+
except Exception as _exc:
|
|
851
|
+
logger.warning("MITRE overlay: failed for '%s': %s", _actor_name, _exc)
|
|
852
|
+
_techniques = []
|
|
853
|
+
if not _techniques:
|
|
854
|
+
continue
|
|
855
|
+
logger.info(f"MITRE overlay: added {len(_techniques)} techniques for actor '{_actor_name}'")
|
|
856
|
+
_oc = (
|
|
857
|
+
f"MITRE ATT&CK Overlay: Techniques associated with {_actor_name} "
|
|
858
|
+
f"(source: mitre_attack_overlay)\n" + "\n".join(_techniques)
|
|
859
|
+
)
|
|
860
|
+
pages.append({
|
|
861
|
+
"link": "https://attack.mitre.org/",
|
|
862
|
+
"url": "https://attack.mitre.org/",
|
|
863
|
+
"content": _oc,
|
|
864
|
+
"text": _oc,
|
|
865
|
+
"status": 200,
|
|
866
|
+
"source": "mitre_attack_overlay",
|
|
867
|
+
"via": "mitre_overlay",
|
|
868
|
+
})
|
|
869
|
+
|
|
870
|
+
return pages
|
|
871
|
+
|
|
872
|
+
|
|
873
|
+
def _cisa_results_to_pages(results: list[dict], query: str) -> list[dict]:
|
|
874
|
+
pages: list[dict] = []
|
|
875
|
+
kev_entries = [r for r in results if r.get("source") == "cisa_kev"]
|
|
876
|
+
adv_entries = [r for r in results if r.get("source") == "cisa_advisory"]
|
|
877
|
+
|
|
878
|
+
if kev_entries:
|
|
879
|
+
lines = ["CISA Known Exploited Vulnerabilities (KEV) Catalog\n"]
|
|
880
|
+
for r in kev_entries:
|
|
881
|
+
lines.append(f"CVE: {r.get('entity_value', '')}")
|
|
882
|
+
if r.get("vendor_project"):
|
|
883
|
+
lines.append(f" Vendor/Project: {r['vendor_project']}")
|
|
884
|
+
if r.get("product"):
|
|
885
|
+
lines.append(f" Product: {r['product']}")
|
|
886
|
+
if r.get("vulnerability_name"):
|
|
887
|
+
lines.append(f" Vulnerability: {r['vulnerability_name']}")
|
|
888
|
+
if r.get("date_added"):
|
|
889
|
+
lines.append(f" Date Added to KEV: {r['date_added']}")
|
|
890
|
+
if r.get("short_description"):
|
|
891
|
+
lines.append(f" Description: {r['short_description']}")
|
|
892
|
+
lines.append("")
|
|
893
|
+
pages.append({
|
|
894
|
+
"link": "https://www.cisa.gov/known-exploited-vulnerabilities-catalog",
|
|
895
|
+
"url": "https://www.cisa.gov/known-exploited-vulnerabilities-catalog",
|
|
896
|
+
"content": "\n".join(lines),
|
|
897
|
+
"text": "\n".join(lines),
|
|
898
|
+
"status": 200,
|
|
899
|
+
"source": "cisa_kev",
|
|
900
|
+
"via": "cisa_feed",
|
|
901
|
+
})
|
|
902
|
+
|
|
903
|
+
if adv_entries:
|
|
904
|
+
lines = ["CISA Cybersecurity Advisories\n"]
|
|
905
|
+
for r in adv_entries:
|
|
906
|
+
lines.append(f"Title: {r.get('advisory_title', '')}")
|
|
907
|
+
if r.get("advisory_url"):
|
|
908
|
+
lines.append(f" URL: {r['advisory_url']}")
|
|
909
|
+
if r.get("advisory_date"):
|
|
910
|
+
lines.append(f" Date: {r['advisory_date']}")
|
|
911
|
+
lines.append("")
|
|
912
|
+
pages.append({
|
|
913
|
+
"link": "https://www.cisa.gov/cybersecurity-advisories",
|
|
914
|
+
"url": "https://www.cisa.gov/cybersecurity-advisories",
|
|
915
|
+
"content": "\n".join(lines),
|
|
916
|
+
"text": "\n".join(lines),
|
|
917
|
+
"status": 200,
|
|
918
|
+
"source": "cisa_advisory",
|
|
919
|
+
"via": "cisa_feed",
|
|
920
|
+
})
|
|
921
|
+
|
|
922
|
+
return pages
|
|
923
|
+
|
|
924
|
+
|
|
925
|
+
def _shodan_results_to_pages(results: list[dict]) -> list[dict]:
|
|
926
|
+
pages: list[dict] = []
|
|
927
|
+
for r in results:
|
|
928
|
+
lines = [f"Shodan InternetDB: {r.get('entity_value', '')}\n"]
|
|
929
|
+
if r.get("open_ports"):
|
|
930
|
+
lines.append(f"Open Ports: {', '.join(str(p) for p in r['open_ports'])}")
|
|
931
|
+
if r.get("hostnames"):
|
|
932
|
+
lines.append(f"Hostnames: {', '.join(r['hostnames'])}")
|
|
933
|
+
if r.get("tags"):
|
|
934
|
+
lines.append(f"Tags: {', '.join(r['tags'])}")
|
|
935
|
+
if r.get("vulns"):
|
|
936
|
+
lines.append(f"Vulnerabilities: {', '.join(r['vulns'])}")
|
|
937
|
+
if r.get("correlated_cves"):
|
|
938
|
+
lines.append(f"Correlated CVEs (also extracted): {', '.join(r['correlated_cves'])}")
|
|
939
|
+
if r.get("high_confidence_c2"):
|
|
940
|
+
lines.append("** HIGH CONFIDENCE C2 **")
|
|
941
|
+
pages.append({
|
|
942
|
+
"link": f"https://internetdb.shodan.io/{r.get('entity_value', '')}",
|
|
943
|
+
"url": f"https://internetdb.shodan.io/{r.get('entity_value', '')}",
|
|
944
|
+
"content": "\n".join(lines),
|
|
945
|
+
"text": "\n".join(lines),
|
|
946
|
+
"status": 200,
|
|
947
|
+
"source": "shodan_internetdb",
|
|
948
|
+
"via": "shodan_api",
|
|
949
|
+
})
|
|
950
|
+
return pages
|
|
951
|
+
|
|
952
|
+
|
|
953
|
+
def _vt_results_to_pages(results: list[dict]) -> list[dict]:
|
|
954
|
+
pages: list[dict] = []
|
|
955
|
+
for r in results:
|
|
956
|
+
lines = [f"VirusTotal: {r.get('entity_value', '')}\n"]
|
|
957
|
+
lines.append(f"Detection: {r.get('malicious_count', 0)}/{r.get('total_engines', 0)} ({r.get('detection_ratio', 0):.0%})")
|
|
958
|
+
if r.get("suggested_threat_label"):
|
|
959
|
+
lines.append(f"Threat Label: {r['suggested_threat_label']}")
|
|
960
|
+
if r.get("first_seen"):
|
|
961
|
+
lines.append(f"First Seen: {r['first_seen']}")
|
|
962
|
+
if r.get("last_seen"):
|
|
963
|
+
lines.append(f"Last Seen: {r['last_seen']}")
|
|
964
|
+
if r.get("confirmed_malicious"):
|
|
965
|
+
lines.append("** CONFIRMED MALICIOUS **")
|
|
966
|
+
pages.append({
|
|
967
|
+
"link": f"https://www.virustotal.com/gui/file/{r.get('entity_value', '')}",
|
|
968
|
+
"url": f"https://www.virustotal.com/gui/file/{r.get('entity_value', '')}",
|
|
969
|
+
"content": "\n".join(lines),
|
|
970
|
+
"text": "\n".join(lines),
|
|
971
|
+
"status": 200,
|
|
972
|
+
"source": "virustotal",
|
|
973
|
+
"via": "virustotal_api",
|
|
974
|
+
})
|
|
975
|
+
return pages
|
|
976
|
+
|
|
977
|
+
|
|
978
|
+
def _group_unenriched_entities(
|
|
979
|
+
entities: list[dict],
|
|
980
|
+
cisa_results: list[dict],
|
|
981
|
+
shodan_results: list[dict],
|
|
982
|
+
vt_results: list[dict],
|
|
983
|
+
) -> dict[str, list[dict]]:
|
|
984
|
+
"""
|
|
985
|
+
Determine which THREAT_ACTOR / RANSOMWARE_GROUP / MALWARE_FAMILY entities
|
|
986
|
+
received zero enrichment results from CISA, Shodan, and VT.
|
|
987
|
+
Returns a dict mapping entity type -> list of entities with no enrichment.
|
|
988
|
+
"""
|
|
989
|
+
fallback_types = {"THREAT_ACTOR", "RANSOMWARE_GROUP", "MALWARE_FAMILY"}
|
|
990
|
+
ent_by_type: dict[str, list[dict]] = {t: [] for t in fallback_types}
|
|
991
|
+
|
|
992
|
+
for e in entities:
|
|
993
|
+
et = e.get("type") or e.get("entity_type", "")
|
|
994
|
+
if et in fallback_types:
|
|
995
|
+
ent_by_type[et].append(e)
|
|
996
|
+
|
|
997
|
+
enriched_values: set[str] = set()
|
|
998
|
+
for r in cisa_results:
|
|
999
|
+
ev = r.get("entity_value", "")
|
|
1000
|
+
if ev:
|
|
1001
|
+
enriched_values.add(ev.lower())
|
|
1002
|
+
for r in shodan_results:
|
|
1003
|
+
ev = r.get("entity_value", "")
|
|
1004
|
+
if ev:
|
|
1005
|
+
enriched_values.add(ev.lower())
|
|
1006
|
+
for r in vt_results:
|
|
1007
|
+
ev = r.get("entity_value", "")
|
|
1008
|
+
if ev:
|
|
1009
|
+
enriched_values.add(ev.lower())
|
|
1010
|
+
|
|
1011
|
+
result: dict[str, list[dict]] = {}
|
|
1012
|
+
for et, ent_list in ent_by_type.items():
|
|
1013
|
+
unenriched = [
|
|
1014
|
+
ent for ent in ent_list
|
|
1015
|
+
if (ent.get("value") or ent.get("entity_value", "")).lower() not in enriched_values
|
|
1016
|
+
]
|
|
1017
|
+
if unenriched:
|
|
1018
|
+
result[et] = unenriched
|
|
1019
|
+
|
|
1020
|
+
return result
|
|
1021
|
+
|
|
1022
|
+
|
|
1023
|
+
def _historical_results_to_pages(results: list[dict]) -> list[dict]:
|
|
1024
|
+
pages: list[dict] = []
|
|
1025
|
+
for r in results:
|
|
1026
|
+
src = r.get("source", "")
|
|
1027
|
+
lines = [f"Historical Intel: {r.get('entity_value', '')}\n"]
|
|
1028
|
+
if src == "mitre_attack":
|
|
1029
|
+
lines.append(f"MITRE ATT&CK ID: {r.get('mitre_id', '')}")
|
|
1030
|
+
lines.append(f"Name: {r.get('mitre_name', '')}")
|
|
1031
|
+
if r.get("aliases"):
|
|
1032
|
+
lines.append(f"Aliases: {', '.join(r['aliases'])}")
|
|
1033
|
+
if r.get("techniques"):
|
|
1034
|
+
lines.append(f"Techniques: {', '.join(r['techniques'])}")
|
|
1035
|
+
if r.get("description"):
|
|
1036
|
+
lines.append(f"Description: {r['description']}")
|
|
1037
|
+
pages.append({
|
|
1038
|
+
"link": f"https://attack.mitre.org/groups/{r.get('mitre_id', '')}",
|
|
1039
|
+
"url": f"https://attack.mitre.org/groups/{r.get('mitre_id', '')}",
|
|
1040
|
+
"content": "\n".join(lines),
|
|
1041
|
+
"text": "\n".join(lines),
|
|
1042
|
+
"status": 200,
|
|
1043
|
+
"source": "mitre_attack",
|
|
1044
|
+
"via": "mitre_cti",
|
|
1045
|
+
})
|
|
1046
|
+
elif src == "fbi_doj_press":
|
|
1047
|
+
lines.append(f"Title: {r.get('press_title', '')}")
|
|
1048
|
+
lines.append(f"Date: {r.get('press_date', '')}")
|
|
1049
|
+
pages.append({
|
|
1050
|
+
"link": r.get("press_url", ""),
|
|
1051
|
+
"url": r.get("press_url", ""),
|
|
1052
|
+
"content": "\n".join(lines),
|
|
1053
|
+
"text": "\n".join(lines),
|
|
1054
|
+
"status": 200,
|
|
1055
|
+
"source": "fbi_doj_press",
|
|
1056
|
+
"via": "fbi_rss",
|
|
1057
|
+
})
|
|
1058
|
+
elif src == "cisa_advisory_historical":
|
|
1059
|
+
lines.append(f"Title: {r.get('advisory_title', '')}")
|
|
1060
|
+
lines.append(f"URL: {r.get('advisory_url', '')}")
|
|
1061
|
+
lines.append(f"Date: {r.get('advisory_date', '')}")
|
|
1062
|
+
pages.append({
|
|
1063
|
+
"link": r.get("advisory_url", ""),
|
|
1064
|
+
"url": r.get("advisory_url", ""),
|
|
1065
|
+
"content": "\n".join(lines),
|
|
1066
|
+
"text": "\n".join(lines),
|
|
1067
|
+
"status": 200,
|
|
1068
|
+
"source": "cisa_advisory",
|
|
1069
|
+
"via": "cisa_feed",
|
|
1070
|
+
})
|
|
1071
|
+
return pages
|
|
1072
|
+
|
|
1073
|
+
|
|
1074
|
+
async def run_dns_enrichment(extracted_entities: list[dict]) -> dict:
|
|
1075
|
+
"""
|
|
1076
|
+
Run DNS/WHOIS enrichment on extracted IP and domain entities.
|
|
1077
|
+
Returns ip_enrichments, domain_enrichments, new_entities, infrastructure_clusters.
|
|
1078
|
+
"""
|
|
1079
|
+
try:
|
|
1080
|
+
from sources.dns_enrichment import enrich_with_dns
|
|
1081
|
+
return await enrich_with_dns(extracted_entities)
|
|
1082
|
+
except Exception as e:
|
|
1083
|
+
logger.error("DNS enrichment error: %s", e)
|
|
1084
|
+
return {
|
|
1085
|
+
"ip_enrichments": {},
|
|
1086
|
+
"domain_enrichments": {},
|
|
1087
|
+
"new_entities": [],
|
|
1088
|
+
"infrastructure_clusters": [],
|
|
1089
|
+
}
|
|
1090
|
+
|
|
1091
|
+
|
|
1092
|
+
async def enrich_investigation(
|
|
1093
|
+
query: str,
|
|
1094
|
+
otx_api_key: Optional[str] = None,
|
|
1095
|
+
entities: Optional[list[dict]] = None,
|
|
1096
|
+
) -> list[dict]:
|
|
1097
|
+
"""
|
|
1098
|
+
Run all threat intel sources in parallel; return page dicts for extraction.
|
|
1099
|
+
|
|
1100
|
+
Sources:
|
|
1101
|
+
- OTX (AlienVault) — requires OTX_API_KEY
|
|
1102
|
+
- MalwareBazaar — free (ABUSECH_API_KEY improves rate limits)
|
|
1103
|
+
- ThreatFox — free
|
|
1104
|
+
- URLhaus — free
|
|
1105
|
+
- ransomware.live — free, no key required
|
|
1106
|
+
- CISA KEV + advisories — free, no key required (clearnet)
|
|
1107
|
+
- Shodan InternetDB — free, no key required (clearnet)
|
|
1108
|
+
- VirusTotal — requires VT_API_KEY (clearnet)
|
|
1109
|
+
|
|
1110
|
+
Completes within ~60s (enforced via ``asyncio.wait_for``).
|
|
1111
|
+
"""
|
|
1112
|
+
logger.info("Starting threat intel enrichment for: %s", query)
|
|
1113
|
+
|
|
1114
|
+
_entities = entities if entities is not None else []
|
|
1115
|
+
|
|
1116
|
+
async def _gather():
|
|
1117
|
+
return await asyncio.gather(
|
|
1118
|
+
fetch_otx_pulses(query, otx_api_key or "", limit=20),
|
|
1119
|
+
fetch_malwarebazaar(query, limit=20),
|
|
1120
|
+
fetch_threatfox(query, limit=50),
|
|
1121
|
+
fetch_urlhaus(query, limit=20),
|
|
1122
|
+
fetch_ransomware_live(query),
|
|
1123
|
+
_enrich_new_sources(query, _entities),
|
|
1124
|
+
return_exceptions=True,
|
|
1125
|
+
)
|
|
1126
|
+
|
|
1127
|
+
try:
|
|
1128
|
+
packed = await asyncio.wait_for(_gather(), timeout=59.0)
|
|
1129
|
+
except asyncio.TimeoutError:
|
|
1130
|
+
logger.warning("Enrichment: deadline exceeded (59s), returning empty")
|
|
1131
|
+
return []
|
|
1132
|
+
|
|
1133
|
+
otx_pulses, mb_results, tf_results, uh_results, rl_groups, new_pages = packed
|
|
1134
|
+
|
|
1135
|
+
if isinstance(otx_pulses, Exception):
|
|
1136
|
+
logger.warning("OTX failed: %s", otx_pulses)
|
|
1137
|
+
otx_pulses = []
|
|
1138
|
+
if isinstance(mb_results, Exception):
|
|
1139
|
+
logger.warning("MalwareBazaar failed: %s", mb_results)
|
|
1140
|
+
mb_results = []
|
|
1141
|
+
if isinstance(tf_results, Exception):
|
|
1142
|
+
logger.warning("ThreatFox failed: %s", tf_results)
|
|
1143
|
+
tf_results = []
|
|
1144
|
+
if isinstance(uh_results, Exception):
|
|
1145
|
+
logger.warning("URLhaus failed: %s", uh_results)
|
|
1146
|
+
uh_results = []
|
|
1147
|
+
if isinstance(rl_groups, Exception):
|
|
1148
|
+
logger.warning("ransomware.live failed: %s", rl_groups)
|
|
1149
|
+
rl_groups = []
|
|
1150
|
+
if isinstance(new_pages, Exception):
|
|
1151
|
+
logger.warning("New enrichment sources failed: %s", new_pages)
|
|
1152
|
+
new_pages = []
|
|
1153
|
+
|
|
1154
|
+
pages: list[dict] = []
|
|
1155
|
+
|
|
1156
|
+
for pulse in otx_pulses:
|
|
1157
|
+
page = otx_pulse_to_page(pulse)
|
|
1158
|
+
if page.get("content"):
|
|
1159
|
+
pages.append(page)
|
|
1160
|
+
|
|
1161
|
+
pages.extend(abusech_to_pages(mb_results, tf_results, uh_results))
|
|
1162
|
+
pages.extend(ransomwarelive_to_pages(rl_groups))
|
|
1163
|
+
pages.extend(new_pages or [])
|
|
1164
|
+
|
|
1165
|
+
# Page-scan MITRE overlay: extract actor names from ransomware.live / OTX results
|
|
1166
|
+
# and inject T-codes when no MITRE techniques appear in any enrichment page.
|
|
1167
|
+
# This fires without a pre-extracted entity list, covering the current pipeline.
|
|
1168
|
+
_overlay_actor_names: list[str] = []
|
|
1169
|
+
for _g in (rl_groups if isinstance(rl_groups, list) else []):
|
|
1170
|
+
_gname = _g.get("group", "")
|
|
1171
|
+
if _gname and _gname not in _overlay_actor_names:
|
|
1172
|
+
_overlay_actor_names.append(_gname)
|
|
1173
|
+
for _pulse in (otx_pulses if isinstance(otx_pulses, list) else []):
|
|
1174
|
+
for _mf in (_pulse.get("malware_families") or []):
|
|
1175
|
+
_mfname = _mf if isinstance(_mf, str) else (_mf.get("display_name") or _mf.get("name", ""))
|
|
1176
|
+
if _mfname and _mfname not in _overlay_actor_names:
|
|
1177
|
+
_overlay_actor_names.append(_mfname)
|
|
1178
|
+
|
|
1179
|
+
if _overlay_actor_names:
|
|
1180
|
+
_t_pattern = re.compile(r'\bT\d{4}(?:\.\d{3})?\b')
|
|
1181
|
+
_t_found = any(
|
|
1182
|
+
_t_pattern.search(p.get("content", "") or p.get("text", ""))
|
|
1183
|
+
for p in pages
|
|
1184
|
+
)
|
|
1185
|
+
if not _t_found:
|
|
1186
|
+
from sources.historical_intel import get_techniques_for_actor
|
|
1187
|
+
|
|
1188
|
+
OVERLAY_TIMEOUT = 20
|
|
1189
|
+
|
|
1190
|
+
_q_lower = query.lower()
|
|
1191
|
+
_capped = _overlay_actor_names[:10]
|
|
1192
|
+
_prioritized = sorted(
|
|
1193
|
+
_capped,
|
|
1194
|
+
key=lambda a: 0 if a.lower() in _q_lower else 1,
|
|
1195
|
+
)
|
|
1196
|
+
|
|
1197
|
+
async def _run_overlay():
|
|
1198
|
+
_results = []
|
|
1199
|
+
for _aname in _prioritized:
|
|
1200
|
+
try:
|
|
1201
|
+
_techs = await get_techniques_for_actor(_aname)
|
|
1202
|
+
except Exception as _oexc:
|
|
1203
|
+
logger.warning("MITRE overlay: failed for '%s': %s", _aname, _oexc)
|
|
1204
|
+
_techs = []
|
|
1205
|
+
if not _techs:
|
|
1206
|
+
continue
|
|
1207
|
+
logger.info(f"MITRE overlay: added {len(_techs)} techniques for actor '{_aname}'")
|
|
1208
|
+
_ocontent = (
|
|
1209
|
+
f"MITRE ATT&CK Overlay: Techniques associated with {_aname} "
|
|
1210
|
+
f"(source: mitre_attack_overlay)\n" + "\n".join(_techs)
|
|
1211
|
+
)
|
|
1212
|
+
_results.append({
|
|
1213
|
+
"link": "https://attack.mitre.org/",
|
|
1214
|
+
"url": "https://attack.mitre.org/",
|
|
1215
|
+
"content": _ocontent,
|
|
1216
|
+
"text": _ocontent,
|
|
1217
|
+
"status": 200,
|
|
1218
|
+
"source": "mitre_attack_overlay",
|
|
1219
|
+
"via": "mitre_overlay",
|
|
1220
|
+
})
|
|
1221
|
+
return _results
|
|
1222
|
+
|
|
1223
|
+
try:
|
|
1224
|
+
_overlay_pages = await asyncio.wait_for(
|
|
1225
|
+
_run_overlay(),
|
|
1226
|
+
timeout=OVERLAY_TIMEOUT,
|
|
1227
|
+
)
|
|
1228
|
+
pages.extend(_overlay_pages)
|
|
1229
|
+
except asyncio.TimeoutError:
|
|
1230
|
+
logger.warning(
|
|
1231
|
+
"MITRE overlay timed out after %ds — skipping",
|
|
1232
|
+
OVERLAY_TIMEOUT,
|
|
1233
|
+
)
|
|
1234
|
+
|
|
1235
|
+
total_onion_seeds = sum(1 for p in pages if p.get("_scrape_seed"))
|
|
1236
|
+
logger.info(
|
|
1237
|
+
"Enrichment complete: %s OTX pulses, %s MalwareBazaar, "
|
|
1238
|
+
"%s ThreatFox IOCs, %s URLhaus, %s ransomware.live groups "
|
|
1239
|
+
"(%s .onion seeds) → %s enrichment pages total",
|
|
1240
|
+
len(otx_pulses), len(mb_results), len(tf_results),
|
|
1241
|
+
len(uh_results), len(rl_groups), total_onion_seeds, len(pages),
|
|
1242
|
+
)
|
|
1243
|
+
|
|
1244
|
+
return pages
|