voidaccess 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- analysis/__init__.py +49 -0
- analysis/opsec.py +454 -0
- analysis/patterns.py +202 -0
- analysis/temporal.py +201 -0
- api/__init__.py +1 -0
- api/auth.py +163 -0
- api/main.py +509 -0
- api/routes/__init__.py +1 -0
- api/routes/admin.py +214 -0
- api/routes/auth.py +157 -0
- api/routes/entities.py +871 -0
- api/routes/export.py +359 -0
- api/routes/investigations.py +2567 -0
- api/routes/monitors.py +405 -0
- api/routes/search.py +157 -0
- api/routes/settings.py +851 -0
- auth/__init__.py +1 -0
- auth/token_blacklist.py +108 -0
- cli/__init__.py +3 -0
- cli/adapters/__init__.py +1 -0
- cli/adapters/sqlite.py +273 -0
- cli/browser.py +376 -0
- cli/commands/__init__.py +1 -0
- cli/commands/configure.py +185 -0
- cli/commands/enrich.py +154 -0
- cli/commands/export.py +158 -0
- cli/commands/investigate.py +601 -0
- cli/commands/show.py +87 -0
- cli/config.py +180 -0
- cli/display.py +212 -0
- cli/main.py +154 -0
- cli/tor_detect.py +71 -0
- config.py +180 -0
- crawler/__init__.py +28 -0
- crawler/dedup.py +97 -0
- crawler/frontier.py +115 -0
- crawler/spider.py +462 -0
- crawler/utils.py +122 -0
- db/__init__.py +47 -0
- db/migrations/__init__.py +0 -0
- db/migrations/env.py +80 -0
- db/migrations/versions/0001_initial_schema.py +270 -0
- db/migrations/versions/0002_add_investigation_status_column.py +27 -0
- db/migrations/versions/0002_add_missing_tables.py +33 -0
- db/migrations/versions/0003_add_canonical_value_and_entity_links.py +61 -0
- db/migrations/versions/0004_add_page_posted_at.py +41 -0
- db/migrations/versions/0005_add_extraction_method.py +32 -0
- db/migrations/versions/0006_add_monitor_alerts.py +26 -0
- db/migrations/versions/0007_add_actor_style_profiles.py +23 -0
- db/migrations/versions/0008_add_users_table.py +47 -0
- db/migrations/versions/0009_add_investigation_id_to_relationships.py +29 -0
- db/migrations/versions/0010_add_composite_index_entity_relationships.py +22 -0
- db/migrations/versions/0011_add_page_extraction_cache.py +52 -0
- db/migrations/versions/0013_add_graph_status.py +31 -0
- db/migrations/versions/0015_add_progress_fields.py +41 -0
- db/migrations/versions/0016_backfill_graph_status.py +33 -0
- db/migrations/versions/0017_add_user_api_keys.py +44 -0
- db/migrations/versions/0018_add_user_id_to_investigations.py +33 -0
- db/migrations/versions/0019_add_content_safety_log.py +46 -0
- db/migrations/versions/0020_add_entity_source_tracking.py +50 -0
- db/models.py +618 -0
- db/queries.py +841 -0
- db/session.py +270 -0
- export/__init__.py +34 -0
- export/misp.py +257 -0
- export/sigma.py +342 -0
- export/stix.py +418 -0
- extractor/__init__.py +21 -0
- extractor/llm_extract.py +372 -0
- extractor/ner.py +512 -0
- extractor/normalizer.py +638 -0
- extractor/pipeline.py +401 -0
- extractor/regex_patterns.py +325 -0
- fingerprint/__init__.py +33 -0
- fingerprint/profiler.py +240 -0
- fingerprint/stylometry.py +249 -0
- graph/__init__.py +73 -0
- graph/builder.py +894 -0
- graph/export.py +225 -0
- graph/model.py +83 -0
- graph/queries.py +297 -0
- graph/visualize.py +178 -0
- i18n/__init__.py +24 -0
- i18n/detect.py +76 -0
- i18n/query_expand.py +72 -0
- i18n/translate.py +210 -0
- monitor/__init__.py +27 -0
- monitor/_db.py +74 -0
- monitor/alerts.py +345 -0
- monitor/config.py +118 -0
- monitor/diff.py +75 -0
- monitor/jobs.py +247 -0
- monitor/scheduler.py +184 -0
- scraper/__init__.py +0 -0
- scraper/scrape.py +857 -0
- scraper/scrape_js.py +272 -0
- search/__init__.py +318 -0
- search/circuit_breaker.py +240 -0
- search/search.py +334 -0
- sources/__init__.py +96 -0
- sources/blockchain.py +444 -0
- sources/cache.py +93 -0
- sources/cisa.py +108 -0
- sources/dns_enrichment.py +557 -0
- sources/domain_reputation.py +643 -0
- sources/email_reputation.py +635 -0
- sources/engines.py +244 -0
- sources/enrichment.py +1244 -0
- sources/github_scraper.py +589 -0
- sources/gitlab_scraper.py +624 -0
- sources/hash_reputation.py +856 -0
- sources/historical_intel.py +253 -0
- sources/ip_reputation.py +521 -0
- sources/paste_scraper.py +484 -0
- sources/pastes.py +278 -0
- sources/rss_scraper.py +576 -0
- sources/seed_manager.py +373 -0
- sources/seeds.py +368 -0
- sources/shodan.py +103 -0
- sources/telegram.py +199 -0
- sources/virustotal.py +113 -0
- utils/__init__.py +0 -0
- utils/async_utils.py +89 -0
- utils/content_safety.py +193 -0
- utils/defang.py +94 -0
- utils/encryption.py +34 -0
- utils/ioc_freshness.py +124 -0
- utils/user_keys.py +33 -0
- vector/__init__.py +39 -0
- vector/embedder.py +100 -0
- vector/model_singleton.py +49 -0
- vector/search.py +87 -0
- vector/store.py +514 -0
- voidaccess/__init__.py +0 -0
- voidaccess/llm.py +717 -0
- voidaccess/llm_utils.py +696 -0
- voidaccess-1.3.0.dist-info/METADATA +395 -0
- voidaccess-1.3.0.dist-info/RECORD +142 -0
- voidaccess-1.3.0.dist-info/WHEEL +5 -0
- voidaccess-1.3.0.dist-info/entry_points.txt +2 -0
- voidaccess-1.3.0.dist-info/licenses/LICENSE +21 -0
- voidaccess-1.3.0.dist-info/top_level.txt +19 -0
db/session.py
ADDED
|
@@ -0,0 +1,270 @@
|
|
|
1
|
+
"""
|
|
2
|
+
SQLAlchemy engine and session factory.
|
|
3
|
+
|
|
4
|
+
Usage (application code)
|
|
5
|
+
------------------------
|
|
6
|
+
from db.session import get_session
|
|
7
|
+
|
|
8
|
+
with get_session() as session:
|
|
9
|
+
session.add(some_object)
|
|
10
|
+
# commits on exit, rolls back on exception
|
|
11
|
+
|
|
12
|
+
For async code, prefer get_async_session() with async with:
|
|
13
|
+
------------------------
|
|
14
|
+
from db.session import get_async_session
|
|
15
|
+
|
|
16
|
+
async with get_async_session() as session:
|
|
17
|
+
await session.add(some_object)
|
|
18
|
+
await session.commit()
|
|
19
|
+
|
|
20
|
+
For short-lived async operations, use async_session_scope():
|
|
21
|
+
------------------------
|
|
22
|
+
from db.session import async_session_scope
|
|
23
|
+
|
|
24
|
+
async with async_session_scope() as session:
|
|
25
|
+
# session is auto-committed on exit, rolled back on exception
|
|
26
|
+
await session.execute(...)
|
|
27
|
+
|
|
28
|
+
Usage (testing — pass an explicit URL to avoid needing DATABASE_URL in env)
|
|
29
|
+
---------------------------------------------------------------------------
|
|
30
|
+
from db.session import get_engine, get_session_factory
|
|
31
|
+
from db.models import Base
|
|
32
|
+
|
|
33
|
+
engine = get_engine("sqlite:///:memory:")
|
|
34
|
+
Base.metadata.create_all(engine)
|
|
35
|
+
Session = get_session_factory("sqlite:///:memory:")
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
from __future__ import annotations
|
|
39
|
+
|
|
40
|
+
from functools import lru_cache
|
|
41
|
+
|
|
42
|
+
from contextlib import asynccontextmanager, contextmanager
|
|
43
|
+
from typing import AsyncGenerator, Generator, Optional
|
|
44
|
+
|
|
45
|
+
from sqlalchemy import create_engine, Engine
|
|
46
|
+
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
|
|
47
|
+
from sqlalchemy.orm import sessionmaker, Session
|
|
48
|
+
|
|
49
|
+
import config
|
|
50
|
+
|
|
51
|
+
_async_engine_cache: dict[str, "AsyncEngine"] = {}
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@lru_cache(maxsize=8)
|
|
55
|
+
def _get_engine_cached(target_url: str) -> Engine:
|
|
56
|
+
is_sqlite = target_url.startswith("sqlite")
|
|
57
|
+
|
|
58
|
+
if is_sqlite:
|
|
59
|
+
engine = create_engine(
|
|
60
|
+
target_url,
|
|
61
|
+
pool_pre_ping=True,
|
|
62
|
+
connect_args={"check_same_thread": False},
|
|
63
|
+
)
|
|
64
|
+
else:
|
|
65
|
+
engine = create_engine(
|
|
66
|
+
target_url,
|
|
67
|
+
pool_pre_ping=True,
|
|
68
|
+
pool_size=20,
|
|
69
|
+
max_overflow=40,
|
|
70
|
+
pool_timeout=30,
|
|
71
|
+
pool_recycle=1800,
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
return engine
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def get_engine(url: Optional[str] = None) -> Engine:
|
|
78
|
+
"""
|
|
79
|
+
Return a SQLAlchemy Engine for *url* (defaults to DATABASE_URL env var).
|
|
80
|
+
|
|
81
|
+
Uses lru_cache with maxsize=8 to bound the cache and prevent unbounded
|
|
82
|
+
growth during test suites. Least-recently-used engines are evicted
|
|
83
|
+
automatically when the limit is reached.
|
|
84
|
+
|
|
85
|
+
PostgreSQL gets a connection pool tuned for the scraping workload.
|
|
86
|
+
SQLite skips pool parameters that only apply to QueuePool.
|
|
87
|
+
"""
|
|
88
|
+
target_url = url or config.DATABASE_URL
|
|
89
|
+
if not target_url:
|
|
90
|
+
raise RuntimeError(
|
|
91
|
+
"DATABASE_URL is not configured.\n"
|
|
92
|
+
"Add it to your .env file, e.g.:\n"
|
|
93
|
+
" DATABASE_URL=postgresql://voidaccess:voidaccess@localhost:5433/voidaccess"
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
return _get_engine_cached(target_url)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def release_engine(url: Optional[str] = None) -> None:
|
|
100
|
+
"""
|
|
101
|
+
Explicitly release and remove an engine from the cache.
|
|
102
|
+
|
|
103
|
+
Calls engine.dispose() to release connection pool resources and file handles,
|
|
104
|
+
then clears the cache. Use this in test teardown to prevent leaks.
|
|
105
|
+
"""
|
|
106
|
+
target_url = url or config.DATABASE_URL
|
|
107
|
+
if target_url:
|
|
108
|
+
try:
|
|
109
|
+
engine = get_engine(target_url)
|
|
110
|
+
engine.dispose()
|
|
111
|
+
except Exception:
|
|
112
|
+
pass
|
|
113
|
+
_get_engine_cached.cache_clear()
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def get_async_engine(url: Optional[str] = None) -> "AsyncEngine":
|
|
117
|
+
"""
|
|
118
|
+
Return an async SQLAlchemy AsyncEngine for *url*.
|
|
119
|
+
|
|
120
|
+
Converts postgresql:// to postgresql+asyncpg:// and sqlite:// to sqlite+aiosqlite://.
|
|
121
|
+
"""
|
|
122
|
+
from sqlalchemy.ext.asyncio import AsyncEngine
|
|
123
|
+
|
|
124
|
+
target_url = url or config.DATABASE_URL
|
|
125
|
+
if not target_url:
|
|
126
|
+
raise RuntimeError(
|
|
127
|
+
"DATABASE_URL is not configured.\n"
|
|
128
|
+
"Add it to your .env file, e.g.:\n"
|
|
129
|
+
" DATABASE_URL=postgresql://voidaccess:voidaccess@localhost:5433/voidaccess"
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
if target_url in _async_engine_cache:
|
|
133
|
+
return _async_engine_cache[target_url]
|
|
134
|
+
|
|
135
|
+
if target_url.startswith("postgresql://"):
|
|
136
|
+
async_url = target_url.replace("postgresql://", "postgresql+asyncpg://", 1)
|
|
137
|
+
elif target_url.startswith("sqlite://"):
|
|
138
|
+
async_url = target_url.replace("sqlite://", "sqlite+aiosqlite://", 1)
|
|
139
|
+
else:
|
|
140
|
+
async_url = target_url
|
|
141
|
+
|
|
142
|
+
is_sqlite = "sqlite" in async_url
|
|
143
|
+
|
|
144
|
+
if is_sqlite:
|
|
145
|
+
engine = create_async_engine(
|
|
146
|
+
async_url,
|
|
147
|
+
pool_pre_ping=True,
|
|
148
|
+
connect_args={"check_same_thread": False},
|
|
149
|
+
)
|
|
150
|
+
else:
|
|
151
|
+
engine = create_async_engine(
|
|
152
|
+
async_url,
|
|
153
|
+
pool_pre_ping=True,
|
|
154
|
+
pool_size=20,
|
|
155
|
+
max_overflow=40,
|
|
156
|
+
pool_timeout=30,
|
|
157
|
+
pool_recycle=1800,
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
_async_engine_cache[target_url] = engine
|
|
161
|
+
return engine
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def release_async_engine(url: Optional[str] = None) -> None:
|
|
165
|
+
"""
|
|
166
|
+
Explicitly release and remove an async engine from the cache.
|
|
167
|
+
|
|
168
|
+
Calls engine.dispose() to release connection pool resources and file handles.
|
|
169
|
+
"""
|
|
170
|
+
target_url = url or config.DATABASE_URL
|
|
171
|
+
if target_url in _async_engine_cache:
|
|
172
|
+
_async_engine_cache[target_url].dispose()
|
|
173
|
+
del _async_engine_cache[target_url]
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def get_session_factory(url: Optional[str] = None) -> sessionmaker:
|
|
177
|
+
"""Return a sessionmaker bound to an engine for *url*."""
|
|
178
|
+
engine = get_engine(url)
|
|
179
|
+
return sessionmaker(bind=engine, autoflush=False, autocommit=False)
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def get_async_session_factory(url: Optional[str] = None) -> async_sessionmaker:
|
|
183
|
+
"""Return an async_sessionmaker bound to an async engine for *url*."""
|
|
184
|
+
engine = get_async_engine(url)
|
|
185
|
+
return async_sessionmaker(bind=engine, autoflush=False, expire_on_commit=False)
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
@contextmanager
|
|
189
|
+
def get_session(url: Optional[str] = None) -> Generator[Session, None, None]:
|
|
190
|
+
"""
|
|
191
|
+
Context manager that yields a sync Session, commits on clean exit,
|
|
192
|
+
rolls back on any exception, and always closes.
|
|
193
|
+
|
|
194
|
+
Example::
|
|
195
|
+
|
|
196
|
+
with get_session() as session:
|
|
197
|
+
session.add(entity)
|
|
198
|
+
# committed here
|
|
199
|
+
"""
|
|
200
|
+
factory = get_session_factory(url)
|
|
201
|
+
session: Session = factory()
|
|
202
|
+
try:
|
|
203
|
+
yield session
|
|
204
|
+
session.commit()
|
|
205
|
+
except Exception:
|
|
206
|
+
session.rollback()
|
|
207
|
+
raise
|
|
208
|
+
finally:
|
|
209
|
+
session.close()
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def get_db(url: Optional[str] = None) -> Generator[Session, None, None]:
|
|
213
|
+
"""
|
|
214
|
+
FastAPI dependency that yields a database session.
|
|
215
|
+
The session is closed automatically after the request.
|
|
216
|
+
Usage: db: Session = Depends(get_db)
|
|
217
|
+
"""
|
|
218
|
+
factory = get_session_factory(url)
|
|
219
|
+
db = factory()
|
|
220
|
+
try:
|
|
221
|
+
yield db
|
|
222
|
+
finally:
|
|
223
|
+
db.close()
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
@asynccontextmanager
|
|
227
|
+
async def get_async_session(url: Optional[str] = None) -> AsyncGenerator[AsyncSession, None]:
|
|
228
|
+
"""
|
|
229
|
+
Async generator that yields an AsyncSession.
|
|
230
|
+
|
|
231
|
+
Usage::
|
|
232
|
+
|
|
233
|
+
async with get_async_session() as session:
|
|
234
|
+
await session.add(entity)
|
|
235
|
+
await session.commit()
|
|
236
|
+
|
|
237
|
+
The session is automatically closed on exit.
|
|
238
|
+
"""
|
|
239
|
+
factory = get_async_session_factory(url)
|
|
240
|
+
async with factory() as session:
|
|
241
|
+
yield session
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
@asynccontextmanager
|
|
245
|
+
async def async_session_scope(
|
|
246
|
+
url: Optional[str] = None,
|
|
247
|
+
) -> AsyncGenerator[AsyncSession, None]:
|
|
248
|
+
"""
|
|
249
|
+
Async context manager for short-lived sessions.
|
|
250
|
+
|
|
251
|
+
Automatically commits on clean exit, rolls back on exception,
|
|
252
|
+
and always closes the session. Use this for targeted DB operations.
|
|
253
|
+
|
|
254
|
+
Example::
|
|
255
|
+
|
|
256
|
+
async with async_session_scope() as session:
|
|
257
|
+
result = await session.execute(select(Investigation))
|
|
258
|
+
await session.commit()
|
|
259
|
+
|
|
260
|
+
This is the preferred pattern for the investigation pipeline —
|
|
261
|
+
each step gets its own session that commits and closes immediately.
|
|
262
|
+
"""
|
|
263
|
+
factory = get_async_session_factory(url)
|
|
264
|
+
async with factory() as session:
|
|
265
|
+
try:
|
|
266
|
+
yield session
|
|
267
|
+
await session.commit()
|
|
268
|
+
except Exception:
|
|
269
|
+
await session.rollback()
|
|
270
|
+
raise
|
export/__init__.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
"""
|
|
2
|
+
export — Phase 5 intelligence export module.
|
|
3
|
+
|
|
4
|
+
Re-exports the public API from stix, misp, and sigma sub-modules.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from export.stix import (
|
|
8
|
+
bundle_to_dict,
|
|
9
|
+
bundle_to_json,
|
|
10
|
+
investigation_to_stix_bundle,
|
|
11
|
+
)
|
|
12
|
+
from export.misp import (
|
|
13
|
+
investigation_to_misp_event,
|
|
14
|
+
misp_event_to_json,
|
|
15
|
+
)
|
|
16
|
+
from export.sigma import (
|
|
17
|
+
entities_to_sigma_rules,
|
|
18
|
+
export_sigma_rules,
|
|
19
|
+
sigma_rule_to_yaml,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
__all__ = [
|
|
23
|
+
# stix
|
|
24
|
+
"investigation_to_stix_bundle",
|
|
25
|
+
"bundle_to_json",
|
|
26
|
+
"bundle_to_dict",
|
|
27
|
+
# misp
|
|
28
|
+
"investigation_to_misp_event",
|
|
29
|
+
"misp_event_to_json",
|
|
30
|
+
# sigma
|
|
31
|
+
"entities_to_sigma_rules",
|
|
32
|
+
"sigma_rule_to_yaml",
|
|
33
|
+
"export_sigma_rules",
|
|
34
|
+
]
|
export/misp.py
ADDED
|
@@ -0,0 +1,257 @@
|
|
|
1
|
+
"""
|
|
2
|
+
export/misp.py — Generates MISP event JSON from a VoidAccess investigation.
|
|
3
|
+
|
|
4
|
+
MISP format is constructed directly as a dict — no MISP library required.
|
|
5
|
+
The format follows the MISP standard event structure as documented at
|
|
6
|
+
https://www.misp-standard.org/rfc/misp-core-format.html
|
|
7
|
+
|
|
8
|
+
Public interface
|
|
9
|
+
----------------
|
|
10
|
+
investigation_to_misp_event(investigation_id) → dict
|
|
11
|
+
misp_event_to_json(event) → str
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import json
|
|
17
|
+
import logging
|
|
18
|
+
import os
|
|
19
|
+
from datetime import datetime, timezone
|
|
20
|
+
from typing import Any, Optional
|
|
21
|
+
|
|
22
|
+
logger = logging.getLogger(__name__)
|
|
23
|
+
|
|
24
|
+
# ---------------------------------------------------------------------------
|
|
25
|
+
# Entity type → MISP attribute mapping
|
|
26
|
+
# ---------------------------------------------------------------------------
|
|
27
|
+
|
|
28
|
+
_MISP_ATTR_MAP: dict[str, dict] = {
|
|
29
|
+
"BITCOIN_ADDRESS": {
|
|
30
|
+
"type": "btc",
|
|
31
|
+
"category": "Financial fraud",
|
|
32
|
+
"to_ids": True,
|
|
33
|
+
},
|
|
34
|
+
"ETHEREUM_ADDRESS": {
|
|
35
|
+
"type": "other",
|
|
36
|
+
"category": "Financial fraud",
|
|
37
|
+
"to_ids": True,
|
|
38
|
+
},
|
|
39
|
+
"MONERO_ADDRESS": {
|
|
40
|
+
"type": "other",
|
|
41
|
+
"category": "Financial fraud",
|
|
42
|
+
"to_ids": True,
|
|
43
|
+
},
|
|
44
|
+
"EMAIL_ADDRESS": {
|
|
45
|
+
"type": "email-src",
|
|
46
|
+
"category": "Network activity",
|
|
47
|
+
"to_ids": False,
|
|
48
|
+
},
|
|
49
|
+
"ONION_URL": {
|
|
50
|
+
"type": "url",
|
|
51
|
+
"category": "Network activity",
|
|
52
|
+
"to_ids": True,
|
|
53
|
+
},
|
|
54
|
+
"IP_ADDRESS": {
|
|
55
|
+
"type": "ip-dst",
|
|
56
|
+
"category": "Network activity",
|
|
57
|
+
"to_ids": True,
|
|
58
|
+
},
|
|
59
|
+
"CVE_NUMBER": {
|
|
60
|
+
"type": "vulnerability",
|
|
61
|
+
"category": "External analysis",
|
|
62
|
+
"to_ids": False,
|
|
63
|
+
},
|
|
64
|
+
"MALWARE_FAMILY": {
|
|
65
|
+
"type": "malware-type",
|
|
66
|
+
"category": "Antivirus detection",
|
|
67
|
+
"to_ids": False,
|
|
68
|
+
},
|
|
69
|
+
"RANSOMWARE_GROUP": {
|
|
70
|
+
"type": "malware-type",
|
|
71
|
+
"category": "Antivirus detection",
|
|
72
|
+
"to_ids": False,
|
|
73
|
+
},
|
|
74
|
+
"THREAT_ACTOR_HANDLE": {
|
|
75
|
+
"type": "threat-actor",
|
|
76
|
+
"category": "Attribution",
|
|
77
|
+
"to_ids": False,
|
|
78
|
+
},
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
# ---------------------------------------------------------------------------
|
|
83
|
+
# Public interface
|
|
84
|
+
# ---------------------------------------------------------------------------
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def investigation_to_misp_event(
|
|
88
|
+
investigation_id: Any,
|
|
89
|
+
entity_ids: Optional[list[str]] = None,
|
|
90
|
+
) -> dict:
|
|
91
|
+
"""
|
|
92
|
+
Build a MISP-compatible event dict for the given investigation.
|
|
93
|
+
|
|
94
|
+
Returns a valid (but empty-attribute) event if the investigation is not found.
|
|
95
|
+
Never raises.
|
|
96
|
+
"""
|
|
97
|
+
investigation, entities = _load_investigation_and_entities(
|
|
98
|
+
investigation_id, entity_ids=entity_ids
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
if investigation is None:
|
|
102
|
+
return {
|
|
103
|
+
"Event": {
|
|
104
|
+
"info": "Not found",
|
|
105
|
+
"Attribute": [],
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
date_str = _utc_date_str(investigation.created_at)
|
|
110
|
+
query = getattr(investigation, "query", "") or ""
|
|
111
|
+
|
|
112
|
+
attributes: list[dict] = []
|
|
113
|
+
for entity in entities:
|
|
114
|
+
mapping = _MISP_ATTR_MAP.get(entity.entity_type)
|
|
115
|
+
if mapping is None:
|
|
116
|
+
continue
|
|
117
|
+
attr = {
|
|
118
|
+
"type": mapping["type"],
|
|
119
|
+
"category": mapping["category"],
|
|
120
|
+
"value": entity.value,
|
|
121
|
+
"comment": f"Source: {entity.source_url}" if entity.source_url else "Source: unknown",
|
|
122
|
+
"to_ids": mapping["to_ids"],
|
|
123
|
+
}
|
|
124
|
+
attributes.append(attr)
|
|
125
|
+
|
|
126
|
+
return {
|
|
127
|
+
"Event": {
|
|
128
|
+
"info": f"VoidAccess Investigation: {query}",
|
|
129
|
+
"date": date_str,
|
|
130
|
+
"threat_level_id": "2", # Medium
|
|
131
|
+
"analysis": "2", # Completed
|
|
132
|
+
"distribution": "0", # Your organisation only
|
|
133
|
+
"Attribute": attributes,
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def misp_event_to_json(event: dict) -> str:
|
|
139
|
+
"""
|
|
140
|
+
Return JSON string of a MISP event dict (pretty-printed, 2-space indent).
|
|
141
|
+
"""
|
|
142
|
+
try:
|
|
143
|
+
return json.dumps(event, indent=2, default=str)
|
|
144
|
+
except Exception as exc:
|
|
145
|
+
logger.warning("misp_event_to_json failed: %s", exc)
|
|
146
|
+
return json.dumps({"Event": {"info": "Not found", "Attribute": []}}, indent=2)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
# ---------------------------------------------------------------------------
|
|
150
|
+
# Internal helpers
|
|
151
|
+
# ---------------------------------------------------------------------------
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def _load_investigation_and_entities(
|
|
155
|
+
investigation_id: Any,
|
|
156
|
+
entity_ids: Optional[list[str]] = None,
|
|
157
|
+
):
|
|
158
|
+
"""
|
|
159
|
+
Load the investigation record and its entities from DB.
|
|
160
|
+
|
|
161
|
+
Includes entities owned directly by the investigation AND entities linked
|
|
162
|
+
via InvestigationEntityLink (canonical dedup junction table).
|
|
163
|
+
|
|
164
|
+
Returns (investigation, entities) or (None, []) on error / not found.
|
|
165
|
+
"""
|
|
166
|
+
import uuid as _uuid
|
|
167
|
+
|
|
168
|
+
if not os.getenv("DATABASE_URL"):
|
|
169
|
+
return None, []
|
|
170
|
+
|
|
171
|
+
try:
|
|
172
|
+
from db.session import get_session # noqa: PLC0415
|
|
173
|
+
from db.queries import get_investigation_by_id_or_run # noqa: PLC0415
|
|
174
|
+
from db.models import Entity, InvestigationEntityLink # noqa: PLC0415
|
|
175
|
+
from extractor.normalizer import NormalizedEntity # noqa: PLC0415
|
|
176
|
+
|
|
177
|
+
inv_uuid = _coerce_uuid(investigation_id)
|
|
178
|
+
if inv_uuid is None:
|
|
179
|
+
return None, []
|
|
180
|
+
|
|
181
|
+
filter_uuids: Optional[list[_uuid.UUID]] = None
|
|
182
|
+
if entity_ids:
|
|
183
|
+
filter_uuids = []
|
|
184
|
+
for raw in entity_ids:
|
|
185
|
+
try:
|
|
186
|
+
filter_uuids.append(_uuid.UUID(str(raw)))
|
|
187
|
+
except (ValueError, AttributeError):
|
|
188
|
+
continue
|
|
189
|
+
|
|
190
|
+
with get_session() as session:
|
|
191
|
+
investigation = get_investigation_by_id_or_run(session, inv_uuid)
|
|
192
|
+
if investigation is None:
|
|
193
|
+
return None, []
|
|
194
|
+
|
|
195
|
+
linked_ids_subq = (
|
|
196
|
+
session.query(InvestigationEntityLink.entity_id)
|
|
197
|
+
.filter(InvestigationEntityLink.investigation_id == investigation.id)
|
|
198
|
+
.subquery()
|
|
199
|
+
)
|
|
200
|
+
db_entities = (
|
|
201
|
+
session.query(Entity)
|
|
202
|
+
.filter(
|
|
203
|
+
(Entity.investigation_id == investigation.id)
|
|
204
|
+
| Entity.id.in_(linked_ids_subq)
|
|
205
|
+
)
|
|
206
|
+
.all()
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
if filter_uuids is not None:
|
|
210
|
+
want = frozenset(filter_uuids)
|
|
211
|
+
db_entities = [e for e in db_entities if e.id in want]
|
|
212
|
+
|
|
213
|
+
normalized: list[NormalizedEntity] = []
|
|
214
|
+
for e in db_entities:
|
|
215
|
+
source_url = ""
|
|
216
|
+
try:
|
|
217
|
+
if e.page:
|
|
218
|
+
source_url = e.page.url or ""
|
|
219
|
+
except Exception:
|
|
220
|
+
pass
|
|
221
|
+
ne = NormalizedEntity(
|
|
222
|
+
entity_type=e.entity_type,
|
|
223
|
+
value=e.canonical_value or e.value,
|
|
224
|
+
confidence=e.confidence,
|
|
225
|
+
source_url=source_url,
|
|
226
|
+
page_id=e.page_id,
|
|
227
|
+
context_snippet=e.context_snippet or "",
|
|
228
|
+
)
|
|
229
|
+
normalized.append(ne)
|
|
230
|
+
|
|
231
|
+
session.expunge_all()
|
|
232
|
+
return investigation, normalized
|
|
233
|
+
|
|
234
|
+
except Exception as exc:
|
|
235
|
+
logger.warning("_load_investigation_and_entities failed: %s", exc)
|
|
236
|
+
return None, []
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def _coerce_uuid(value: Any):
|
|
240
|
+
"""Coerce value to uuid.UUID. Returns None on failure."""
|
|
241
|
+
import uuid as _uuid
|
|
242
|
+
if isinstance(value, _uuid.UUID):
|
|
243
|
+
return value
|
|
244
|
+
try:
|
|
245
|
+
return _uuid.UUID(str(value))
|
|
246
|
+
except (ValueError, AttributeError):
|
|
247
|
+
return None
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
def _utc_date_str(dt: Optional[Any]) -> str:
|
|
251
|
+
"""Format a datetime as YYYY-MM-DD string. Defaults to today on None."""
|
|
252
|
+
if dt is None:
|
|
253
|
+
return datetime.now(timezone.utc).strftime("%Y-%m-%d")
|
|
254
|
+
try:
|
|
255
|
+
return dt.strftime("%Y-%m-%d")
|
|
256
|
+
except Exception:
|
|
257
|
+
return datetime.now(timezone.utc).strftime("%Y-%m-%d")
|