voidaccess 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- analysis/__init__.py +49 -0
- analysis/opsec.py +454 -0
- analysis/patterns.py +202 -0
- analysis/temporal.py +201 -0
- api/__init__.py +1 -0
- api/auth.py +163 -0
- api/main.py +509 -0
- api/routes/__init__.py +1 -0
- api/routes/admin.py +214 -0
- api/routes/auth.py +157 -0
- api/routes/entities.py +871 -0
- api/routes/export.py +359 -0
- api/routes/investigations.py +2567 -0
- api/routes/monitors.py +405 -0
- api/routes/search.py +157 -0
- api/routes/settings.py +851 -0
- auth/__init__.py +1 -0
- auth/token_blacklist.py +108 -0
- cli/__init__.py +3 -0
- cli/adapters/__init__.py +1 -0
- cli/adapters/sqlite.py +273 -0
- cli/browser.py +376 -0
- cli/commands/__init__.py +1 -0
- cli/commands/configure.py +185 -0
- cli/commands/enrich.py +154 -0
- cli/commands/export.py +158 -0
- cli/commands/investigate.py +601 -0
- cli/commands/show.py +87 -0
- cli/config.py +180 -0
- cli/display.py +212 -0
- cli/main.py +154 -0
- cli/tor_detect.py +71 -0
- config.py +180 -0
- crawler/__init__.py +28 -0
- crawler/dedup.py +97 -0
- crawler/frontier.py +115 -0
- crawler/spider.py +462 -0
- crawler/utils.py +122 -0
- db/__init__.py +47 -0
- db/migrations/__init__.py +0 -0
- db/migrations/env.py +80 -0
- db/migrations/versions/0001_initial_schema.py +270 -0
- db/migrations/versions/0002_add_investigation_status_column.py +27 -0
- db/migrations/versions/0002_add_missing_tables.py +33 -0
- db/migrations/versions/0003_add_canonical_value_and_entity_links.py +61 -0
- db/migrations/versions/0004_add_page_posted_at.py +41 -0
- db/migrations/versions/0005_add_extraction_method.py +32 -0
- db/migrations/versions/0006_add_monitor_alerts.py +26 -0
- db/migrations/versions/0007_add_actor_style_profiles.py +23 -0
- db/migrations/versions/0008_add_users_table.py +47 -0
- db/migrations/versions/0009_add_investigation_id_to_relationships.py +29 -0
- db/migrations/versions/0010_add_composite_index_entity_relationships.py +22 -0
- db/migrations/versions/0011_add_page_extraction_cache.py +52 -0
- db/migrations/versions/0013_add_graph_status.py +31 -0
- db/migrations/versions/0015_add_progress_fields.py +41 -0
- db/migrations/versions/0016_backfill_graph_status.py +33 -0
- db/migrations/versions/0017_add_user_api_keys.py +44 -0
- db/migrations/versions/0018_add_user_id_to_investigations.py +33 -0
- db/migrations/versions/0019_add_content_safety_log.py +46 -0
- db/migrations/versions/0020_add_entity_source_tracking.py +50 -0
- db/models.py +618 -0
- db/queries.py +841 -0
- db/session.py +270 -0
- export/__init__.py +34 -0
- export/misp.py +257 -0
- export/sigma.py +342 -0
- export/stix.py +418 -0
- extractor/__init__.py +21 -0
- extractor/llm_extract.py +372 -0
- extractor/ner.py +512 -0
- extractor/normalizer.py +638 -0
- extractor/pipeline.py +401 -0
- extractor/regex_patterns.py +325 -0
- fingerprint/__init__.py +33 -0
- fingerprint/profiler.py +240 -0
- fingerprint/stylometry.py +249 -0
- graph/__init__.py +73 -0
- graph/builder.py +894 -0
- graph/export.py +225 -0
- graph/model.py +83 -0
- graph/queries.py +297 -0
- graph/visualize.py +178 -0
- i18n/__init__.py +24 -0
- i18n/detect.py +76 -0
- i18n/query_expand.py +72 -0
- i18n/translate.py +210 -0
- monitor/__init__.py +27 -0
- monitor/_db.py +74 -0
- monitor/alerts.py +345 -0
- monitor/config.py +118 -0
- monitor/diff.py +75 -0
- monitor/jobs.py +247 -0
- monitor/scheduler.py +184 -0
- scraper/__init__.py +0 -0
- scraper/scrape.py +857 -0
- scraper/scrape_js.py +272 -0
- search/__init__.py +318 -0
- search/circuit_breaker.py +240 -0
- search/search.py +334 -0
- sources/__init__.py +96 -0
- sources/blockchain.py +444 -0
- sources/cache.py +93 -0
- sources/cisa.py +108 -0
- sources/dns_enrichment.py +557 -0
- sources/domain_reputation.py +643 -0
- sources/email_reputation.py +635 -0
- sources/engines.py +244 -0
- sources/enrichment.py +1244 -0
- sources/github_scraper.py +589 -0
- sources/gitlab_scraper.py +624 -0
- sources/hash_reputation.py +856 -0
- sources/historical_intel.py +253 -0
- sources/ip_reputation.py +521 -0
- sources/paste_scraper.py +484 -0
- sources/pastes.py +278 -0
- sources/rss_scraper.py +576 -0
- sources/seed_manager.py +373 -0
- sources/seeds.py +368 -0
- sources/shodan.py +103 -0
- sources/telegram.py +199 -0
- sources/virustotal.py +113 -0
- utils/__init__.py +0 -0
- utils/async_utils.py +89 -0
- utils/content_safety.py +193 -0
- utils/defang.py +94 -0
- utils/encryption.py +34 -0
- utils/ioc_freshness.py +124 -0
- utils/user_keys.py +33 -0
- vector/__init__.py +39 -0
- vector/embedder.py +100 -0
- vector/model_singleton.py +49 -0
- vector/search.py +87 -0
- vector/store.py +514 -0
- voidaccess/__init__.py +0 -0
- voidaccess/llm.py +717 -0
- voidaccess/llm_utils.py +696 -0
- voidaccess-1.3.0.dist-info/METADATA +395 -0
- voidaccess-1.3.0.dist-info/RECORD +142 -0
- voidaccess-1.3.0.dist-info/WHEEL +5 -0
- voidaccess-1.3.0.dist-info/entry_points.txt +2 -0
- voidaccess-1.3.0.dist-info/licenses/LICENSE +21 -0
- voidaccess-1.3.0.dist-info/top_level.txt +19 -0
|
@@ -0,0 +1,253 @@
|
|
|
1
|
+
"""
|
|
2
|
+
sources/historical_intel.py — Historical threat-actor fallback enrichment.
|
|
3
|
+
|
|
4
|
+
Activated ONLY when:
|
|
5
|
+
a) entity type is THREAT_ACTOR, RANSOMWARE_GROUP, or MALWARE_FAMILY
|
|
6
|
+
b) all other enrichment sources returned 0 results for this entity
|
|
7
|
+
|
|
8
|
+
Queries:
|
|
9
|
+
A. CISA advisories (cache already populated by sources/cisa.py)
|
|
10
|
+
B. MITRE ATT&CK STIX data (7-day cache, ~50MB)
|
|
11
|
+
C. FBI/DOJ press releases RSS (12-hour cache)
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import asyncio
|
|
17
|
+
import logging
|
|
18
|
+
import os
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
from typing import Optional
|
|
21
|
+
|
|
22
|
+
import aiohttp
|
|
23
|
+
|
|
24
|
+
from sources.cache import CachedFeed
|
|
25
|
+
|
|
26
|
+
logger = logging.getLogger(__name__)
|
|
27
|
+
|
|
28
|
+
_MITRE_CACHE = "/tmp/voidaccess_mitre_attack.json"
|
|
29
|
+
_FBI_CACHE = "/tmp/voidaccess_fbi_press.json"
|
|
30
|
+
|
|
31
|
+
ACTOR_ALIASES = {
|
|
32
|
+
"revil": "Wizard Spider",
|
|
33
|
+
"sodinokibi": "Wizard Spider",
|
|
34
|
+
"gandcrab": "Wizard Spider",
|
|
35
|
+
"lockbit": None,
|
|
36
|
+
"conti": "Wizard Spider",
|
|
37
|
+
"ryuk": "Wizard Spider",
|
|
38
|
+
"trickbot": "Wizard Spider",
|
|
39
|
+
"darkside": "FIN7",
|
|
40
|
+
"blackmatter": "FIN7",
|
|
41
|
+
"alphv": None,
|
|
42
|
+
"blackcat": None,
|
|
43
|
+
"hive": None,
|
|
44
|
+
"cl0p": "TA505",
|
|
45
|
+
"ta505": "TA505",
|
|
46
|
+
"cobalt strike": "Cobalt Group",
|
|
47
|
+
"apt28": "APT28",
|
|
48
|
+
"fancy bear": "APT28",
|
|
49
|
+
"lazarus": "Lazarus Group",
|
|
50
|
+
"apt38": "Lazarus Group",
|
|
51
|
+
"fin7": "FIN7",
|
|
52
|
+
"carbanak": "FIN7",
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
_mitre_feed = CachedFeed(
|
|
56
|
+
"https://raw.githubusercontent.com/mitre/cti/master/enterprise-attack/enterprise-attack.json",
|
|
57
|
+
_MITRE_CACHE,
|
|
58
|
+
ttl_seconds=604800,
|
|
59
|
+
)
|
|
60
|
+
_fbi_feed = CachedFeed(
|
|
61
|
+
"https://www.justice.gov/news/press-releases/rss",
|
|
62
|
+
_FBI_CACHE,
|
|
63
|
+
ttl_seconds=43200)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
async def _fetch_mitre_index() -> dict:
|
|
67
|
+
data = await _mitre_feed.fetch()
|
|
68
|
+
if data is None:
|
|
69
|
+
return {}
|
|
70
|
+
|
|
71
|
+
index: dict = {}
|
|
72
|
+
objects = data if isinstance(data, list) else data.get("objects", [])
|
|
73
|
+
for obj in objects:
|
|
74
|
+
if obj.get("type") != "intrusion-set":
|
|
75
|
+
continue
|
|
76
|
+
name = (obj.get("name") or "").lower()
|
|
77
|
+
aliases = [a.lower() for a in obj.get("aliases") or []]
|
|
78
|
+
for key in [name] + aliases:
|
|
79
|
+
if key:
|
|
80
|
+
index[key] = obj
|
|
81
|
+
return index
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
async def _fetch_fbi_results(entity_value: str) -> list[dict]:
|
|
85
|
+
data = await _fbi_feed.fetch()
|
|
86
|
+
if data is None:
|
|
87
|
+
return []
|
|
88
|
+
|
|
89
|
+
entries = data if isinstance(data, list) else []
|
|
90
|
+
q = entity_value.lower()
|
|
91
|
+
results = []
|
|
92
|
+
|
|
93
|
+
for entry in entries:
|
|
94
|
+
title = (entry.get("title") or "").lower()
|
|
95
|
+
if q in title:
|
|
96
|
+
results.append({
|
|
97
|
+
"source": "fbi_doj_press",
|
|
98
|
+
"entity_value": entity_value,
|
|
99
|
+
"press_title": entry.get("title", ""),
|
|
100
|
+
"press_url": entry.get("link", ""),
|
|
101
|
+
"press_date": entry.get("published", ""),
|
|
102
|
+
})
|
|
103
|
+
return results
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
async def get_techniques_for_actor(actor_name: str) -> list[str]:
|
|
107
|
+
"""Return MITRE ATT&CK T-codes used by a threat group (case-insensitive partial match).
|
|
108
|
+
|
|
109
|
+
Searches group names and aliases in the local STIX cache, then follows
|
|
110
|
+
``uses`` relationships to attack-pattern objects to collect T-codes.
|
|
111
|
+
Returns [] when the actor is not found or the cache is unavailable.
|
|
112
|
+
"""
|
|
113
|
+
data = await _mitre_feed.fetch()
|
|
114
|
+
if data is None:
|
|
115
|
+
return []
|
|
116
|
+
|
|
117
|
+
objects = data if isinstance(data, list) else data.get("objects", [])
|
|
118
|
+
q = actor_name.lower()
|
|
119
|
+
|
|
120
|
+
alias_result = ACTOR_ALIASES.get(q)
|
|
121
|
+
if alias_result is None and q in ACTOR_ALIASES:
|
|
122
|
+
return []
|
|
123
|
+
if alias_result is not None:
|
|
124
|
+
q = alias_result.lower()
|
|
125
|
+
|
|
126
|
+
# Locate intrusion-set STIX ID by name / alias (partial match)
|
|
127
|
+
group_stix_id: Optional[str] = None
|
|
128
|
+
for obj in objects:
|
|
129
|
+
if obj.get("type") != "intrusion-set":
|
|
130
|
+
continue
|
|
131
|
+
name = (obj.get("name") or "").lower()
|
|
132
|
+
aliases = [a.lower() for a in (obj.get("aliases") or [])]
|
|
133
|
+
if q in name or any(q in alias for alias in aliases):
|
|
134
|
+
group_stix_id = obj.get("id")
|
|
135
|
+
break
|
|
136
|
+
|
|
137
|
+
if not group_stix_id:
|
|
138
|
+
return []
|
|
139
|
+
|
|
140
|
+
# Build attack-pattern stix_id → T-code lookup
|
|
141
|
+
technique_map: dict[str, str] = {}
|
|
142
|
+
for obj in objects:
|
|
143
|
+
if obj.get("type") != "attack-pattern":
|
|
144
|
+
continue
|
|
145
|
+
for ref in (obj.get("external_references") or []):
|
|
146
|
+
if ref.get("source_name") == "mitre-attack":
|
|
147
|
+
ext_id = ref.get("external_id", "")
|
|
148
|
+
if ext_id.startswith("T"):
|
|
149
|
+
technique_map[obj.get("id", "")] = ext_id
|
|
150
|
+
break
|
|
151
|
+
|
|
152
|
+
# Collect T-codes via "uses" relationships from this group
|
|
153
|
+
t_codes: list[str] = []
|
|
154
|
+
seen: set[str] = set()
|
|
155
|
+
for obj in objects:
|
|
156
|
+
if (
|
|
157
|
+
obj.get("type") == "relationship"
|
|
158
|
+
and obj.get("relationship_type") == "uses"
|
|
159
|
+
and obj.get("source_ref") == group_stix_id
|
|
160
|
+
):
|
|
161
|
+
t_code = technique_map.get(obj.get("target_ref", ""))
|
|
162
|
+
if t_code and t_code not in seen:
|
|
163
|
+
seen.add(t_code)
|
|
164
|
+
t_codes.append(t_code)
|
|
165
|
+
|
|
166
|
+
return t_codes
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
async def enrich_historical(entities_by_type: dict[str, list[dict]]) -> list[dict]:
|
|
170
|
+
"""
|
|
171
|
+
Historical fallback enrichment.
|
|
172
|
+
|
|
173
|
+
*entities_by_type* is a dict mapping entity type string ->
|
|
174
|
+
list of entity dicts that had no enrichment results.
|
|
175
|
+
|
|
176
|
+
Only processes THREAT_ACTOR, RANSOMWARE_GROUP, MALWARE_FAMILY.
|
|
177
|
+
"""
|
|
178
|
+
fallback_types = {"THREAT_ACTOR", "RANSOMWARE_GROUP", "MALWARE_FAMILY"}
|
|
179
|
+
relevant_entities = []
|
|
180
|
+
for et in fallback_types:
|
|
181
|
+
relevant_entities.extend(entities_by_type.get(et, []))
|
|
182
|
+
|
|
183
|
+
if not relevant_entities:
|
|
184
|
+
return []
|
|
185
|
+
|
|
186
|
+
results: list[dict] = []
|
|
187
|
+
mitre_index: dict = {}
|
|
188
|
+
|
|
189
|
+
for ent in relevant_entities:
|
|
190
|
+
ev = ent.get("value") or ent.get("entity_value", "")
|
|
191
|
+
if not ev:
|
|
192
|
+
continue
|
|
193
|
+
|
|
194
|
+
q = ev.lower()
|
|
195
|
+
|
|
196
|
+
if not mitre_index:
|
|
197
|
+
mitre_index = await _fetch_mitre_index()
|
|
198
|
+
|
|
199
|
+
mitre_match = mitre_index.get(q)
|
|
200
|
+
if mitre_match:
|
|
201
|
+
results.append({
|
|
202
|
+
"source": "mitre_attack",
|
|
203
|
+
"entity_type": ent.get("type") or ent.get("entity_type", ""),
|
|
204
|
+
"entity_value": ev,
|
|
205
|
+
"mitre_id": mitre_match.get("external_references", [{}])[0].get("external_id", ""),
|
|
206
|
+
"mitre_name": mitre_match.get("name", ""),
|
|
207
|
+
"aliases": mitre_match.get("aliases", []),
|
|
208
|
+
"description": mitre_match.get("description", ""),
|
|
209
|
+
"techniques": [
|
|
210
|
+
ref.get("external_id", "")
|
|
211
|
+
for ref in mitre_match.get("external_references") or []
|
|
212
|
+
if ref.get("source_name") == "mitre-attack"
|
|
213
|
+
],
|
|
214
|
+
})
|
|
215
|
+
|
|
216
|
+
fbi_results = await _fetch_fbi_results(ev)
|
|
217
|
+
results.extend(fbi_results)
|
|
218
|
+
|
|
219
|
+
cisa_adv = await _cisa_advisory_for_entity(ev, ent.get("type") or ent.get("entity_type", ""))
|
|
220
|
+
if cisa_adv:
|
|
221
|
+
results.append(cisa_adv)
|
|
222
|
+
|
|
223
|
+
await asyncio.sleep(0.5)
|
|
224
|
+
|
|
225
|
+
return results
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
async def _cisa_advisory_for_entity(entity_value: str, entity_type: str) -> Optional[dict]:
|
|
229
|
+
try:
|
|
230
|
+
from sources.cisa import _adv_feed
|
|
231
|
+
except Exception:
|
|
232
|
+
return None
|
|
233
|
+
|
|
234
|
+
data = await _adv_feed.fetch()
|
|
235
|
+
if data is None:
|
|
236
|
+
return None
|
|
237
|
+
|
|
238
|
+
advisories = data if isinstance(data, list) else data.get("items", [])
|
|
239
|
+
q = entity_value.lower()
|
|
240
|
+
|
|
241
|
+
for adv in advisories:
|
|
242
|
+
title = (adv.get("title") or "").lower()
|
|
243
|
+
tags = " ".join(adv.get("tags") or []).lower()
|
|
244
|
+
if q in title or q in tags:
|
|
245
|
+
return {
|
|
246
|
+
"source": "cisa_advisory_historical",
|
|
247
|
+
"entity_type": entity_type,
|
|
248
|
+
"entity_value": entity_value,
|
|
249
|
+
"advisory_title": adv.get("title", ""),
|
|
250
|
+
"advisory_url": adv.get("url", ""),
|
|
251
|
+
"advisory_date": adv.get("datePublished", ""),
|
|
252
|
+
}
|
|
253
|
+
return None
|