voidaccess 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- analysis/__init__.py +49 -0
- analysis/opsec.py +454 -0
- analysis/patterns.py +202 -0
- analysis/temporal.py +201 -0
- api/__init__.py +1 -0
- api/auth.py +163 -0
- api/main.py +509 -0
- api/routes/__init__.py +1 -0
- api/routes/admin.py +214 -0
- api/routes/auth.py +157 -0
- api/routes/entities.py +871 -0
- api/routes/export.py +359 -0
- api/routes/investigations.py +2567 -0
- api/routes/monitors.py +405 -0
- api/routes/search.py +157 -0
- api/routes/settings.py +851 -0
- auth/__init__.py +1 -0
- auth/token_blacklist.py +108 -0
- cli/__init__.py +3 -0
- cli/adapters/__init__.py +1 -0
- cli/adapters/sqlite.py +273 -0
- cli/browser.py +376 -0
- cli/commands/__init__.py +1 -0
- cli/commands/configure.py +185 -0
- cli/commands/enrich.py +154 -0
- cli/commands/export.py +158 -0
- cli/commands/investigate.py +601 -0
- cli/commands/show.py +87 -0
- cli/config.py +180 -0
- cli/display.py +212 -0
- cli/main.py +154 -0
- cli/tor_detect.py +71 -0
- config.py +180 -0
- crawler/__init__.py +28 -0
- crawler/dedup.py +97 -0
- crawler/frontier.py +115 -0
- crawler/spider.py +462 -0
- crawler/utils.py +122 -0
- db/__init__.py +47 -0
- db/migrations/__init__.py +0 -0
- db/migrations/env.py +80 -0
- db/migrations/versions/0001_initial_schema.py +270 -0
- db/migrations/versions/0002_add_investigation_status_column.py +27 -0
- db/migrations/versions/0002_add_missing_tables.py +33 -0
- db/migrations/versions/0003_add_canonical_value_and_entity_links.py +61 -0
- db/migrations/versions/0004_add_page_posted_at.py +41 -0
- db/migrations/versions/0005_add_extraction_method.py +32 -0
- db/migrations/versions/0006_add_monitor_alerts.py +26 -0
- db/migrations/versions/0007_add_actor_style_profiles.py +23 -0
- db/migrations/versions/0008_add_users_table.py +47 -0
- db/migrations/versions/0009_add_investigation_id_to_relationships.py +29 -0
- db/migrations/versions/0010_add_composite_index_entity_relationships.py +22 -0
- db/migrations/versions/0011_add_page_extraction_cache.py +52 -0
- db/migrations/versions/0013_add_graph_status.py +31 -0
- db/migrations/versions/0015_add_progress_fields.py +41 -0
- db/migrations/versions/0016_backfill_graph_status.py +33 -0
- db/migrations/versions/0017_add_user_api_keys.py +44 -0
- db/migrations/versions/0018_add_user_id_to_investigations.py +33 -0
- db/migrations/versions/0019_add_content_safety_log.py +46 -0
- db/migrations/versions/0020_add_entity_source_tracking.py +50 -0
- db/models.py +618 -0
- db/queries.py +841 -0
- db/session.py +270 -0
- export/__init__.py +34 -0
- export/misp.py +257 -0
- export/sigma.py +342 -0
- export/stix.py +418 -0
- extractor/__init__.py +21 -0
- extractor/llm_extract.py +372 -0
- extractor/ner.py +512 -0
- extractor/normalizer.py +638 -0
- extractor/pipeline.py +401 -0
- extractor/regex_patterns.py +325 -0
- fingerprint/__init__.py +33 -0
- fingerprint/profiler.py +240 -0
- fingerprint/stylometry.py +249 -0
- graph/__init__.py +73 -0
- graph/builder.py +894 -0
- graph/export.py +225 -0
- graph/model.py +83 -0
- graph/queries.py +297 -0
- graph/visualize.py +178 -0
- i18n/__init__.py +24 -0
- i18n/detect.py +76 -0
- i18n/query_expand.py +72 -0
- i18n/translate.py +210 -0
- monitor/__init__.py +27 -0
- monitor/_db.py +74 -0
- monitor/alerts.py +345 -0
- monitor/config.py +118 -0
- monitor/diff.py +75 -0
- monitor/jobs.py +247 -0
- monitor/scheduler.py +184 -0
- scraper/__init__.py +0 -0
- scraper/scrape.py +857 -0
- scraper/scrape_js.py +272 -0
- search/__init__.py +318 -0
- search/circuit_breaker.py +240 -0
- search/search.py +334 -0
- sources/__init__.py +96 -0
- sources/blockchain.py +444 -0
- sources/cache.py +93 -0
- sources/cisa.py +108 -0
- sources/dns_enrichment.py +557 -0
- sources/domain_reputation.py +643 -0
- sources/email_reputation.py +635 -0
- sources/engines.py +244 -0
- sources/enrichment.py +1244 -0
- sources/github_scraper.py +589 -0
- sources/gitlab_scraper.py +624 -0
- sources/hash_reputation.py +856 -0
- sources/historical_intel.py +253 -0
- sources/ip_reputation.py +521 -0
- sources/paste_scraper.py +484 -0
- sources/pastes.py +278 -0
- sources/rss_scraper.py +576 -0
- sources/seed_manager.py +373 -0
- sources/seeds.py +368 -0
- sources/shodan.py +103 -0
- sources/telegram.py +199 -0
- sources/virustotal.py +113 -0
- utils/__init__.py +0 -0
- utils/async_utils.py +89 -0
- utils/content_safety.py +193 -0
- utils/defang.py +94 -0
- utils/encryption.py +34 -0
- utils/ioc_freshness.py +124 -0
- utils/user_keys.py +33 -0
- vector/__init__.py +39 -0
- vector/embedder.py +100 -0
- vector/model_singleton.py +49 -0
- vector/search.py +87 -0
- vector/store.py +514 -0
- voidaccess/__init__.py +0 -0
- voidaccess/llm.py +717 -0
- voidaccess/llm_utils.py +696 -0
- voidaccess-1.3.0.dist-info/METADATA +395 -0
- voidaccess-1.3.0.dist-info/RECORD +142 -0
- voidaccess-1.3.0.dist-info/WHEEL +5 -0
- voidaccess-1.3.0.dist-info/entry_points.txt +2 -0
- voidaccess-1.3.0.dist-info/licenses/LICENSE +21 -0
- voidaccess-1.3.0.dist-info/top_level.txt +19 -0
export/stix.py
ADDED
|
@@ -0,0 +1,418 @@
|
|
|
1
|
+
"""
|
|
2
|
+
export/stix.py — Converts VoidAccess entities and investigations into STIX 2.1 bundles.
|
|
3
|
+
|
|
4
|
+
Uses the stix2 Python library throughout; no manual JSON construction.
|
|
5
|
+
|
|
6
|
+
Public interface
|
|
7
|
+
----------------
|
|
8
|
+
entity_to_stix_indicator(entity) → stix2.Indicator | None
|
|
9
|
+
entity_to_stix_malware(entity) → stix2.Malware | None
|
|
10
|
+
entity_to_stix_threat_actor(entity) → stix2.ThreatActor | None
|
|
11
|
+
investigation_to_stix_bundle(investigation_id, include_relationships) → stix2.Bundle
|
|
12
|
+
bundle_to_json(bundle) → str
|
|
13
|
+
bundle_to_dict(bundle) → dict
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import json
|
|
19
|
+
import logging
|
|
20
|
+
import os
|
|
21
|
+
from typing import Any, Optional, Union
|
|
22
|
+
import uuid
|
|
23
|
+
|
|
24
|
+
logger = logging.getLogger(__name__)
|
|
25
|
+
|
|
26
|
+
# ---------------------------------------------------------------------------
|
|
27
|
+
# Graceful import of stix2
|
|
28
|
+
# ---------------------------------------------------------------------------
|
|
29
|
+
|
|
30
|
+
try:
|
|
31
|
+
import stix2 # type: ignore
|
|
32
|
+
_STIX2_AVAILABLE = True
|
|
33
|
+
except ImportError:
|
|
34
|
+
stix2 = None # type: ignore
|
|
35
|
+
_STIX2_AVAILABLE = False
|
|
36
|
+
logger.warning(
|
|
37
|
+
"stix2 not installed — export/stix.py functions will return None / empty Bundle"
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
# ---------------------------------------------------------------------------
|
|
42
|
+
# STIX pattern templates per entity type
|
|
43
|
+
# ---------------------------------------------------------------------------
|
|
44
|
+
|
|
45
|
+
_STIX_PATTERNS: dict[str, str] = {
|
|
46
|
+
"BITCOIN_ADDRESS": "[cryptocurrency-wallet:address = '{value}']",
|
|
47
|
+
"ETHEREUM_ADDRESS": "[cryptocurrency-wallet:address = '{value}']",
|
|
48
|
+
"MONERO_ADDRESS": "[cryptocurrency-wallet:address = '{value}']",
|
|
49
|
+
"EMAIL_ADDRESS": "[email-message:from_ref.value = '{value}']",
|
|
50
|
+
"ONION_URL": "[url:value = '{value}']",
|
|
51
|
+
"IP_ADDRESS": "[ipv4-addr:value = '{value}']",
|
|
52
|
+
"CVE_NUMBER": "[vulnerability:name = '{value}']",
|
|
53
|
+
"MALWARE_FAMILY": "[malware:name = '{value}']",
|
|
54
|
+
"RANSOMWARE_GROUP": "[malware:name = '{value}']",
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
# Entity types that map to STIX Malware objects
|
|
58
|
+
_MALWARE_TYPES = frozenset({"MALWARE_FAMILY", "RANSOMWARE_GROUP"})
|
|
59
|
+
|
|
60
|
+
# ---------------------------------------------------------------------------
|
|
61
|
+
# Confidence mapping: VoidAccess float → STIX integer (0-100)
|
|
62
|
+
# ---------------------------------------------------------------------------
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _to_stix_confidence(confidence: float) -> int:
|
|
66
|
+
return min(100, max(0, int(round(confidence * 100))))
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
# ---------------------------------------------------------------------------
|
|
70
|
+
# Public conversion functions
|
|
71
|
+
# ---------------------------------------------------------------------------
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def entity_to_stix_indicator(entity: Any) -> Optional[Any]:
|
|
75
|
+
"""
|
|
76
|
+
Convert a single NormalizedEntity to a STIX 2.1 Indicator object.
|
|
77
|
+
|
|
78
|
+
Returns None for entity types without a clear STIX pattern mapping,
|
|
79
|
+
and returns None (with a warning) if stix2 is not installed.
|
|
80
|
+
"""
|
|
81
|
+
if not _STIX2_AVAILABLE:
|
|
82
|
+
return None
|
|
83
|
+
|
|
84
|
+
pattern_template = _STIX_PATTERNS.get(entity.entity_type)
|
|
85
|
+
if pattern_template is None:
|
|
86
|
+
return None
|
|
87
|
+
|
|
88
|
+
safe_value = entity.value.replace("'", "\\'")
|
|
89
|
+
pattern = pattern_template.format(value=safe_value)
|
|
90
|
+
|
|
91
|
+
# Determine indicator_types from entity_type
|
|
92
|
+
indicator_types = ["unknown"]
|
|
93
|
+
etype = entity.entity_type
|
|
94
|
+
if etype in ("MALWARE_FAMILY", "RANSOMWARE_GROUP"):
|
|
95
|
+
indicator_types = ["malicious-activity"]
|
|
96
|
+
elif etype in ("BITCOIN_ADDRESS", "ETHEREUM_ADDRESS", "MONERO_ADDRESS"):
|
|
97
|
+
indicator_types = ["malicious-activity"]
|
|
98
|
+
elif etype in ("IP_ADDRESS", "ONION_URL"):
|
|
99
|
+
indicator_types = ["malicious-activity"]
|
|
100
|
+
elif etype == "CVE_NUMBER":
|
|
101
|
+
indicator_types = ["compromised"]
|
|
102
|
+
|
|
103
|
+
try:
|
|
104
|
+
indicator = stix2.Indicator(
|
|
105
|
+
name=f"{entity.entity_type}: {entity.value[:80]}",
|
|
106
|
+
pattern=pattern,
|
|
107
|
+
pattern_type="stix",
|
|
108
|
+
indicator_types=indicator_types,
|
|
109
|
+
confidence=_to_stix_confidence(entity.confidence),
|
|
110
|
+
external_references=(
|
|
111
|
+
[{"source_name": "voidaccess", "url": entity.source_url}]
|
|
112
|
+
if entity.source_url
|
|
113
|
+
else []
|
|
114
|
+
),
|
|
115
|
+
)
|
|
116
|
+
return indicator
|
|
117
|
+
except Exception as exc:
|
|
118
|
+
logger.warning("entity_to_stix_indicator failed for %r: %s", entity.value, exc)
|
|
119
|
+
return None
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def entity_to_stix_malware(entity: Any) -> Optional[Any]:
|
|
123
|
+
"""
|
|
124
|
+
Convert a MALWARE_FAMILY or RANSOMWARE_GROUP entity to a STIX 2.1 Malware object.
|
|
125
|
+
|
|
126
|
+
Returns None for all other entity types.
|
|
127
|
+
"""
|
|
128
|
+
if not _STIX2_AVAILABLE:
|
|
129
|
+
return None
|
|
130
|
+
|
|
131
|
+
if entity.entity_type not in _MALWARE_TYPES:
|
|
132
|
+
return None
|
|
133
|
+
|
|
134
|
+
try:
|
|
135
|
+
malware = stix2.Malware(
|
|
136
|
+
name=entity.value,
|
|
137
|
+
is_family=True,
|
|
138
|
+
confidence=_to_stix_confidence(entity.confidence),
|
|
139
|
+
external_references=(
|
|
140
|
+
[{"source_name": "voidaccess", "url": entity.source_url}]
|
|
141
|
+
if entity.source_url
|
|
142
|
+
else []
|
|
143
|
+
),
|
|
144
|
+
)
|
|
145
|
+
return malware
|
|
146
|
+
except Exception as exc:
|
|
147
|
+
logger.warning("entity_to_stix_malware failed for %r: %s", entity.value, exc)
|
|
148
|
+
return None
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def entity_to_stix_threat_actor(entity: Any) -> Optional[Any]:
|
|
152
|
+
"""
|
|
153
|
+
Convert a THREAT_ACTOR_HANDLE entity to a STIX 2.1 ThreatActor object.
|
|
154
|
+
|
|
155
|
+
Returns None for all other entity types.
|
|
156
|
+
"""
|
|
157
|
+
if not _STIX2_AVAILABLE:
|
|
158
|
+
return None
|
|
159
|
+
|
|
160
|
+
if entity.entity_type != "THREAT_ACTOR_HANDLE":
|
|
161
|
+
return None
|
|
162
|
+
|
|
163
|
+
try:
|
|
164
|
+
threat_actor = stix2.ThreatActor(
|
|
165
|
+
name=entity.value,
|
|
166
|
+
aliases=[entity.value],
|
|
167
|
+
confidence=_to_stix_confidence(entity.confidence),
|
|
168
|
+
external_references=(
|
|
169
|
+
[{"source_name": "voidaccess", "url": entity.source_url}]
|
|
170
|
+
if entity.source_url
|
|
171
|
+
else []
|
|
172
|
+
),
|
|
173
|
+
)
|
|
174
|
+
return threat_actor
|
|
175
|
+
except Exception as exc:
|
|
176
|
+
logger.warning(
|
|
177
|
+
"entity_to_stix_threat_actor failed for %r: %s", entity.value, exc
|
|
178
|
+
)
|
|
179
|
+
return None
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def investigation_to_stix_bundle(
|
|
183
|
+
investigation_id: Any,
|
|
184
|
+
include_relationships: bool = True,
|
|
185
|
+
entity_ids: Optional[list[str]] = None,
|
|
186
|
+
) -> Any:
|
|
187
|
+
"""
|
|
188
|
+
Load all entities for an investigation and return a STIX 2.1 Bundle.
|
|
189
|
+
|
|
190
|
+
If include_relationships=True, adds STIX Relationship objects for entity pairs
|
|
191
|
+
that have edges in the graph (loaded via graph.build_graph_from_db).
|
|
192
|
+
|
|
193
|
+
Returns an empty Bundle if:
|
|
194
|
+
- stix2 is not installed
|
|
195
|
+
- DATABASE_URL is not set
|
|
196
|
+
- investigation not found
|
|
197
|
+
"""
|
|
198
|
+
if not _STIX2_AVAILABLE:
|
|
199
|
+
return _empty_bundle()
|
|
200
|
+
|
|
201
|
+
filter_uuids: Optional[list[uuid.UUID]] = None
|
|
202
|
+
if entity_ids:
|
|
203
|
+
filter_uuids = []
|
|
204
|
+
for raw in entity_ids:
|
|
205
|
+
try:
|
|
206
|
+
filter_uuids.append(uuid.UUID(str(raw)))
|
|
207
|
+
except (ValueError, AttributeError):
|
|
208
|
+
continue
|
|
209
|
+
if not filter_uuids:
|
|
210
|
+
return _empty_bundle()
|
|
211
|
+
|
|
212
|
+
entities = _load_entities_for_investigation(investigation_id, entity_ids=filter_uuids)
|
|
213
|
+
if not entities:
|
|
214
|
+
return _empty_bundle()
|
|
215
|
+
|
|
216
|
+
stix_objects: list[Any] = []
|
|
217
|
+
stix_id_map: dict[str, str] = {} # entity.value → stix_object.id
|
|
218
|
+
|
|
219
|
+
for entity in entities:
|
|
220
|
+
indicator = entity_to_stix_indicator(entity)
|
|
221
|
+
if indicator:
|
|
222
|
+
stix_objects.append(indicator)
|
|
223
|
+
stix_id_map[entity.value] = indicator.id
|
|
224
|
+
|
|
225
|
+
malware = entity_to_stix_malware(entity)
|
|
226
|
+
if malware:
|
|
227
|
+
stix_objects.append(malware)
|
|
228
|
+
stix_id_map.setdefault(entity.value, malware.id)
|
|
229
|
+
|
|
230
|
+
actor = entity_to_stix_threat_actor(entity)
|
|
231
|
+
if actor:
|
|
232
|
+
stix_objects.append(actor)
|
|
233
|
+
stix_id_map.setdefault(entity.value, actor.id)
|
|
234
|
+
|
|
235
|
+
if include_relationships and stix_objects:
|
|
236
|
+
stix_objects.extend(_build_stix_relationships(investigation_id, stix_id_map))
|
|
237
|
+
|
|
238
|
+
try:
|
|
239
|
+
return stix2.Bundle(*stix_objects, allow_custom=True)
|
|
240
|
+
except Exception as exc:
|
|
241
|
+
logger.warning("investigation_to_stix_bundle: Bundle construction failed: %s", exc)
|
|
242
|
+
return _empty_bundle()
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def bundle_to_json(bundle: Any) -> str:
|
|
246
|
+
"""Return JSON string of a STIX bundle (pretty-printed, 2-space indent)."""
|
|
247
|
+
if not _STIX2_AVAILABLE or bundle is None:
|
|
248
|
+
return "{}"
|
|
249
|
+
try:
|
|
250
|
+
return bundle.serialize(pretty=True, indent=2)
|
|
251
|
+
except Exception as exc:
|
|
252
|
+
logger.warning("bundle_to_json failed: %s", exc)
|
|
253
|
+
return "{}"
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
def bundle_to_dict(bundle: Any) -> dict:
|
|
257
|
+
"""Return a plain Python dict representation of the bundle (no stix2 objects)."""
|
|
258
|
+
if not _STIX2_AVAILABLE or bundle is None:
|
|
259
|
+
return {}
|
|
260
|
+
try:
|
|
261
|
+
raw = bundle_to_json(bundle)
|
|
262
|
+
return json.loads(raw)
|
|
263
|
+
except Exception as exc:
|
|
264
|
+
logger.warning("bundle_to_dict failed: %s", exc)
|
|
265
|
+
return {}
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
# ---------------------------------------------------------------------------
|
|
269
|
+
# Internal helpers
|
|
270
|
+
# ---------------------------------------------------------------------------
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
def _empty_bundle() -> Any:
|
|
274
|
+
"""Return an empty STIX Bundle, or a plain dict sentinel if stix2 absent."""
|
|
275
|
+
if not _STIX2_AVAILABLE:
|
|
276
|
+
return None
|
|
277
|
+
try:
|
|
278
|
+
return stix2.Bundle(allow_custom=True)
|
|
279
|
+
except Exception:
|
|
280
|
+
return stix2.Bundle()
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
def _load_entities_for_investigation(
|
|
284
|
+
investigation_id: Any,
|
|
285
|
+
entity_ids: Optional[list[uuid.UUID]] = None,
|
|
286
|
+
) -> list[Any]:
|
|
287
|
+
"""
|
|
288
|
+
Load entities from DB for the given investigation_id.
|
|
289
|
+
|
|
290
|
+
Includes entities owned directly by the investigation AND entities linked
|
|
291
|
+
via InvestigationEntityLink (canonical dedup junction table).
|
|
292
|
+
|
|
293
|
+
Returns [] if DATABASE_URL is not set, investigation not found, or any error.
|
|
294
|
+
"""
|
|
295
|
+
if not os.getenv("DATABASE_URL"):
|
|
296
|
+
return []
|
|
297
|
+
|
|
298
|
+
try:
|
|
299
|
+
from db.session import get_session # noqa: PLC0415
|
|
300
|
+
from db.queries import get_investigation_by_id_or_run # noqa: PLC0415
|
|
301
|
+
from db.models import Entity, InvestigationEntityLink # noqa: PLC0415
|
|
302
|
+
from extractor.normalizer import NormalizedEntity # noqa: PLC0415
|
|
303
|
+
|
|
304
|
+
inv_uuid = _coerce_uuid(investigation_id)
|
|
305
|
+
if inv_uuid is None:
|
|
306
|
+
return []
|
|
307
|
+
|
|
308
|
+
with get_session() as session:
|
|
309
|
+
inv = get_investigation_by_id_or_run(session, inv_uuid)
|
|
310
|
+
if inv is None:
|
|
311
|
+
return []
|
|
312
|
+
|
|
313
|
+
linked_ids_subq = (
|
|
314
|
+
session.query(InvestigationEntityLink.entity_id)
|
|
315
|
+
.filter(InvestigationEntityLink.investigation_id == inv.id)
|
|
316
|
+
.subquery()
|
|
317
|
+
)
|
|
318
|
+
q = session.query(Entity).filter(
|
|
319
|
+
(Entity.investigation_id == inv.id)
|
|
320
|
+
| Entity.id.in_(linked_ids_subq)
|
|
321
|
+
)
|
|
322
|
+
db_entities = q.all()
|
|
323
|
+
|
|
324
|
+
if entity_ids is not None:
|
|
325
|
+
want = frozenset(entity_ids)
|
|
326
|
+
db_entities = [e for e in db_entities if e.id in want]
|
|
327
|
+
|
|
328
|
+
result: list[NormalizedEntity] = []
|
|
329
|
+
for e in db_entities:
|
|
330
|
+
source_url = ""
|
|
331
|
+
try:
|
|
332
|
+
if e.page:
|
|
333
|
+
source_url = e.page.url or ""
|
|
334
|
+
except Exception:
|
|
335
|
+
pass
|
|
336
|
+
ne = NormalizedEntity(
|
|
337
|
+
entity_type=e.entity_type,
|
|
338
|
+
value=e.canonical_value or e.value,
|
|
339
|
+
confidence=e.confidence,
|
|
340
|
+
source_url=source_url,
|
|
341
|
+
page_id=e.page_id,
|
|
342
|
+
context_snippet=e.context_snippet or "",
|
|
343
|
+
extraction_method="db",
|
|
344
|
+
)
|
|
345
|
+
result.append(ne)
|
|
346
|
+
return result
|
|
347
|
+
|
|
348
|
+
except Exception as exc:
|
|
349
|
+
logger.warning("_load_entities_for_investigation failed: %s", exc)
|
|
350
|
+
return []
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
def _build_stix_relationships(
|
|
354
|
+
investigation_id: Any,
|
|
355
|
+
stix_id_map: dict[str, str],
|
|
356
|
+
) -> list[Any]:
|
|
357
|
+
"""
|
|
358
|
+
Build STIX Relationship objects from graph edges for the investigation.
|
|
359
|
+
|
|
360
|
+
Returns [] on any error.
|
|
361
|
+
"""
|
|
362
|
+
if not _STIX2_AVAILABLE:
|
|
363
|
+
return []
|
|
364
|
+
try:
|
|
365
|
+
from graph.builder import build_graph_from_db # noqa: PLC0415
|
|
366
|
+
|
|
367
|
+
inv_uuid = _coerce_uuid(investigation_id)
|
|
368
|
+
graph = build_graph_from_db(investigation_id=inv_uuid)
|
|
369
|
+
|
|
370
|
+
relationships: list[Any] = []
|
|
371
|
+
for source_node, target_node, data in graph.edges(data=True):
|
|
372
|
+
src_stix_id = stix_id_map.get(source_node)
|
|
373
|
+
tgt_stix_id = stix_id_map.get(target_node)
|
|
374
|
+
if not src_stix_id or not tgt_stix_id:
|
|
375
|
+
continue
|
|
376
|
+
edge_type = data.get("edge_type", "related-to")
|
|
377
|
+
# Map VoidAccess edge types to STIX relationship types
|
|
378
|
+
rel_type = _edge_type_to_stix(edge_type)
|
|
379
|
+
try:
|
|
380
|
+
rel = stix2.Relationship(
|
|
381
|
+
relationship_type=rel_type,
|
|
382
|
+
source_ref=src_stix_id,
|
|
383
|
+
target_ref=tgt_stix_id,
|
|
384
|
+
)
|
|
385
|
+
relationships.append(rel)
|
|
386
|
+
except Exception:
|
|
387
|
+
continue
|
|
388
|
+
return relationships
|
|
389
|
+
except Exception as exc:
|
|
390
|
+
logger.warning("_build_stix_relationships failed: %s", exc)
|
|
391
|
+
return []
|
|
392
|
+
|
|
393
|
+
|
|
394
|
+
def _edge_type_to_stix(edge_type: str) -> str:
|
|
395
|
+
"""Map VoidAccess graph edge types to STIX relationship type strings."""
|
|
396
|
+
mapping = {
|
|
397
|
+
"CO_APPEARED_ON": "related-to",
|
|
398
|
+
"POSTED_BY": "attributed-to",
|
|
399
|
+
"LINKED_TO": "related-to",
|
|
400
|
+
"PAID_TO": "related-to",
|
|
401
|
+
"MEMBER_OF": "member-of",
|
|
402
|
+
"USED": "uses",
|
|
403
|
+
"CLAIMED": "attributed-to",
|
|
404
|
+
"LIKELY_SAME_ACTOR": "related-to",
|
|
405
|
+
"CONFIRMED_SAME_ACTOR": "related-to",
|
|
406
|
+
"FUNDED_BY": "related-to",
|
|
407
|
+
}
|
|
408
|
+
return mapping.get(edge_type, "related-to")
|
|
409
|
+
|
|
410
|
+
|
|
411
|
+
def _coerce_uuid(value: Any) -> Optional[uuid.UUID]:
|
|
412
|
+
"""Try to coerce an arbitrary value to uuid.UUID. Returns None on failure."""
|
|
413
|
+
if isinstance(value, uuid.UUID):
|
|
414
|
+
return value
|
|
415
|
+
try:
|
|
416
|
+
return uuid.UUID(str(value))
|
|
417
|
+
except (ValueError, AttributeError):
|
|
418
|
+
return None
|
extractor/__init__.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""
|
|
2
|
+
extractor — Phase 2 entity extraction pipeline.
|
|
3
|
+
|
|
4
|
+
Public exports
|
|
5
|
+
--------------
|
|
6
|
+
ExtractionResult — dataclass returned by extraction functions
|
|
7
|
+
extract_entities_from_page — extract entities from a single page (async)
|
|
8
|
+
extract_entities_from_pages — extract entities from multiple pages concurrently (async)
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from extractor.pipeline import (
|
|
12
|
+
ExtractionResult,
|
|
13
|
+
extract_entities_from_page,
|
|
14
|
+
extract_entities_from_pages,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
__all__ = [
|
|
18
|
+
"ExtractionResult",
|
|
19
|
+
"extract_entities_from_page",
|
|
20
|
+
"extract_entities_from_pages",
|
|
21
|
+
]
|