voidaccess 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- analysis/__init__.py +49 -0
- analysis/opsec.py +454 -0
- analysis/patterns.py +202 -0
- analysis/temporal.py +201 -0
- api/__init__.py +1 -0
- api/auth.py +163 -0
- api/main.py +509 -0
- api/routes/__init__.py +1 -0
- api/routes/admin.py +214 -0
- api/routes/auth.py +157 -0
- api/routes/entities.py +871 -0
- api/routes/export.py +359 -0
- api/routes/investigations.py +2567 -0
- api/routes/monitors.py +405 -0
- api/routes/search.py +157 -0
- api/routes/settings.py +851 -0
- auth/__init__.py +1 -0
- auth/token_blacklist.py +108 -0
- cli/__init__.py +3 -0
- cli/adapters/__init__.py +1 -0
- cli/adapters/sqlite.py +273 -0
- cli/browser.py +376 -0
- cli/commands/__init__.py +1 -0
- cli/commands/configure.py +185 -0
- cli/commands/enrich.py +154 -0
- cli/commands/export.py +158 -0
- cli/commands/investigate.py +601 -0
- cli/commands/show.py +87 -0
- cli/config.py +180 -0
- cli/display.py +212 -0
- cli/main.py +154 -0
- cli/tor_detect.py +71 -0
- config.py +180 -0
- crawler/__init__.py +28 -0
- crawler/dedup.py +97 -0
- crawler/frontier.py +115 -0
- crawler/spider.py +462 -0
- crawler/utils.py +122 -0
- db/__init__.py +47 -0
- db/migrations/__init__.py +0 -0
- db/migrations/env.py +80 -0
- db/migrations/versions/0001_initial_schema.py +270 -0
- db/migrations/versions/0002_add_investigation_status_column.py +27 -0
- db/migrations/versions/0002_add_missing_tables.py +33 -0
- db/migrations/versions/0003_add_canonical_value_and_entity_links.py +61 -0
- db/migrations/versions/0004_add_page_posted_at.py +41 -0
- db/migrations/versions/0005_add_extraction_method.py +32 -0
- db/migrations/versions/0006_add_monitor_alerts.py +26 -0
- db/migrations/versions/0007_add_actor_style_profiles.py +23 -0
- db/migrations/versions/0008_add_users_table.py +47 -0
- db/migrations/versions/0009_add_investigation_id_to_relationships.py +29 -0
- db/migrations/versions/0010_add_composite_index_entity_relationships.py +22 -0
- db/migrations/versions/0011_add_page_extraction_cache.py +52 -0
- db/migrations/versions/0013_add_graph_status.py +31 -0
- db/migrations/versions/0015_add_progress_fields.py +41 -0
- db/migrations/versions/0016_backfill_graph_status.py +33 -0
- db/migrations/versions/0017_add_user_api_keys.py +44 -0
- db/migrations/versions/0018_add_user_id_to_investigations.py +33 -0
- db/migrations/versions/0019_add_content_safety_log.py +46 -0
- db/migrations/versions/0020_add_entity_source_tracking.py +50 -0
- db/models.py +618 -0
- db/queries.py +841 -0
- db/session.py +270 -0
- export/__init__.py +34 -0
- export/misp.py +257 -0
- export/sigma.py +342 -0
- export/stix.py +418 -0
- extractor/__init__.py +21 -0
- extractor/llm_extract.py +372 -0
- extractor/ner.py +512 -0
- extractor/normalizer.py +638 -0
- extractor/pipeline.py +401 -0
- extractor/regex_patterns.py +325 -0
- fingerprint/__init__.py +33 -0
- fingerprint/profiler.py +240 -0
- fingerprint/stylometry.py +249 -0
- graph/__init__.py +73 -0
- graph/builder.py +894 -0
- graph/export.py +225 -0
- graph/model.py +83 -0
- graph/queries.py +297 -0
- graph/visualize.py +178 -0
- i18n/__init__.py +24 -0
- i18n/detect.py +76 -0
- i18n/query_expand.py +72 -0
- i18n/translate.py +210 -0
- monitor/__init__.py +27 -0
- monitor/_db.py +74 -0
- monitor/alerts.py +345 -0
- monitor/config.py +118 -0
- monitor/diff.py +75 -0
- monitor/jobs.py +247 -0
- monitor/scheduler.py +184 -0
- scraper/__init__.py +0 -0
- scraper/scrape.py +857 -0
- scraper/scrape_js.py +272 -0
- search/__init__.py +318 -0
- search/circuit_breaker.py +240 -0
- search/search.py +334 -0
- sources/__init__.py +96 -0
- sources/blockchain.py +444 -0
- sources/cache.py +93 -0
- sources/cisa.py +108 -0
- sources/dns_enrichment.py +557 -0
- sources/domain_reputation.py +643 -0
- sources/email_reputation.py +635 -0
- sources/engines.py +244 -0
- sources/enrichment.py +1244 -0
- sources/github_scraper.py +589 -0
- sources/gitlab_scraper.py +624 -0
- sources/hash_reputation.py +856 -0
- sources/historical_intel.py +253 -0
- sources/ip_reputation.py +521 -0
- sources/paste_scraper.py +484 -0
- sources/pastes.py +278 -0
- sources/rss_scraper.py +576 -0
- sources/seed_manager.py +373 -0
- sources/seeds.py +368 -0
- sources/shodan.py +103 -0
- sources/telegram.py +199 -0
- sources/virustotal.py +113 -0
- utils/__init__.py +0 -0
- utils/async_utils.py +89 -0
- utils/content_safety.py +193 -0
- utils/defang.py +94 -0
- utils/encryption.py +34 -0
- utils/ioc_freshness.py +124 -0
- utils/user_keys.py +33 -0
- vector/__init__.py +39 -0
- vector/embedder.py +100 -0
- vector/model_singleton.py +49 -0
- vector/search.py +87 -0
- vector/store.py +514 -0
- voidaccess/__init__.py +0 -0
- voidaccess/llm.py +717 -0
- voidaccess/llm_utils.py +696 -0
- voidaccess-1.3.0.dist-info/METADATA +395 -0
- voidaccess-1.3.0.dist-info/RECORD +142 -0
- voidaccess-1.3.0.dist-info/WHEEL +5 -0
- voidaccess-1.3.0.dist-info/entry_points.txt +2 -0
- voidaccess-1.3.0.dist-info/licenses/LICENSE +21 -0
- voidaccess-1.3.0.dist-info/top_level.txt +19 -0
sources/blockchain.py
ADDED
|
@@ -0,0 +1,444 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Blockchain transaction lookup for extracted wallet addresses.
|
|
3
|
+
Queries free APIs to get transaction history and connected addresses.
|
|
4
|
+
|
|
5
|
+
Supports: Bitcoin (BlockCypher), Ethereum (Etherscan)
|
|
6
|
+
Monero: privacy coin, no public lookup possible
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import asyncio
|
|
10
|
+
import aiohttp
|
|
11
|
+
import logging
|
|
12
|
+
import uuid
|
|
13
|
+
from typing import Optional, List, Dict, Any
|
|
14
|
+
from datetime import datetime, timezone
|
|
15
|
+
|
|
16
|
+
from db.models import Entity, EntityRelationship, RelationshipType
|
|
17
|
+
from db.queries import upsert_entity_canonical
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
BLOCKCYPHER_BASE = "https://api.blockcypher.com/v1/btc/main"
|
|
22
|
+
ETHERSCAN_BASE = "https://api.etherscan.io/api"
|
|
23
|
+
|
|
24
|
+
# Reasonable caps to avoid hammering free APIs
|
|
25
|
+
MAX_TRANSACTIONS_PER_WALLET = 50
|
|
26
|
+
MAX_CONNECTED_ADDRESSES = 10 # How many counterparty addresses to extract
|
|
27
|
+
|
|
28
|
+
# Entity type constants to match extractor/regex_patterns.py
|
|
29
|
+
BITCOIN_ADDRESS = "BITCOIN_ADDRESS"
|
|
30
|
+
ETHEREUM_ADDRESS = "ETHEREUM_ADDRESS"
|
|
31
|
+
MONERO_ADDRESS = "MONERO_ADDRESS"
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def detect_wallet_type(address: str) -> Optional[str]:
|
|
35
|
+
"""
|
|
36
|
+
Detect cryptocurrency type from address format.
|
|
37
|
+
|
|
38
|
+
Returns: BITCOIN_ADDRESS, ETHEREUM_ADDRESS, MONERO_ADDRESS, or None
|
|
39
|
+
"""
|
|
40
|
+
address = address.strip()
|
|
41
|
+
|
|
42
|
+
# Bitcoin: starts with 1, 3, or bc1
|
|
43
|
+
if address.startswith(("1", "3")) and 25 <= len(address) <= 34:
|
|
44
|
+
return BITCOIN_ADDRESS
|
|
45
|
+
if address.startswith("bc1") and len(address) >= 42:
|
|
46
|
+
return BITCOIN_ADDRESS
|
|
47
|
+
|
|
48
|
+
# Ethereum: 0x prefix, 42 chars total
|
|
49
|
+
if address.startswith("0x") and len(address) == 42:
|
|
50
|
+
return ETHEREUM_ADDRESS
|
|
51
|
+
|
|
52
|
+
# Monero: starts with 4, 95-106 chars
|
|
53
|
+
if address.startswith("4") and 95 <= len(address) <= 106:
|
|
54
|
+
return MONERO_ADDRESS
|
|
55
|
+
|
|
56
|
+
return None
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
async def lookup_bitcoin_address(
|
|
60
|
+
address: str,
|
|
61
|
+
api_token: str = "",
|
|
62
|
+
) -> dict:
|
|
63
|
+
"""
|
|
64
|
+
Look up Bitcoin address via BlockCypher API.
|
|
65
|
+
|
|
66
|
+
Returns dict with financial metadata and connected addresses.
|
|
67
|
+
"""
|
|
68
|
+
result = {
|
|
69
|
+
"address": address,
|
|
70
|
+
"wallet_type": BITCOIN_ADDRESS,
|
|
71
|
+
"total_received_btc": 0.0,
|
|
72
|
+
"total_sent_btc": 0.0,
|
|
73
|
+
"balance_btc": 0.0,
|
|
74
|
+
"transaction_count": 0,
|
|
75
|
+
"first_seen": None,
|
|
76
|
+
"last_seen": None,
|
|
77
|
+
"connected_addresses": [],
|
|
78
|
+
"lookup_successful": False,
|
|
79
|
+
"error": None,
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
params = {"limit": MAX_TRANSACTIONS_PER_WALLET}
|
|
83
|
+
if api_token:
|
|
84
|
+
params["token"] = api_token
|
|
85
|
+
|
|
86
|
+
try:
|
|
87
|
+
connector = aiohttp.TCPConnector(ssl=True)
|
|
88
|
+
timeout = aiohttp.ClientTimeout(total=15)
|
|
89
|
+
async with aiohttp.ClientSession(connector=connector, timeout=timeout) as session:
|
|
90
|
+
url = f"{BLOCKCYPHER_BASE}/addrs/{address}"
|
|
91
|
+
async with session.get(url, params=params) as resp:
|
|
92
|
+
|
|
93
|
+
if resp.status == 429:
|
|
94
|
+
logger.warning(f"BlockCypher rate limited for {address[:12]}...")
|
|
95
|
+
result["error"] = "rate_limited"
|
|
96
|
+
return result
|
|
97
|
+
|
|
98
|
+
if resp.status == 404:
|
|
99
|
+
# Valid address with no transactions
|
|
100
|
+
result["lookup_successful"] = True
|
|
101
|
+
result["error"] = "no_transactions"
|
|
102
|
+
return result
|
|
103
|
+
|
|
104
|
+
if resp.status != 200:
|
|
105
|
+
result["error"] = f"http_{resp.status}"
|
|
106
|
+
return result
|
|
107
|
+
|
|
108
|
+
data = await resp.json()
|
|
109
|
+
|
|
110
|
+
# Satoshis to BTC conversion
|
|
111
|
+
sat_to_btc = 1 / 100_000_000
|
|
112
|
+
|
|
113
|
+
result["total_received_btc"] = data.get("total_received", 0) * sat_to_btc
|
|
114
|
+
result["total_sent_btc"] = data.get("total_sent", 0) * sat_to_btc
|
|
115
|
+
result["balance_btc"] = data.get("final_balance", 0) * sat_to_btc
|
|
116
|
+
result["transaction_count"] = data.get("n_tx", 0)
|
|
117
|
+
result["lookup_successful"] = True
|
|
118
|
+
|
|
119
|
+
# Extract connected addresses from transaction refs
|
|
120
|
+
txrefs = data.get("txrefs", []) + data.get("unconfirmed_txrefs", [])
|
|
121
|
+
|
|
122
|
+
# Sort by time to get first/last seen
|
|
123
|
+
confirmed_txs = [t for t in txrefs if t.get("confirmed")]
|
|
124
|
+
if confirmed_txs:
|
|
125
|
+
times = [t.get("confirmed") for t in confirmed_txs if t.get("confirmed")]
|
|
126
|
+
if times:
|
|
127
|
+
result["first_seen"] = min(times)
|
|
128
|
+
result["last_seen"] = max(times)
|
|
129
|
+
|
|
130
|
+
# Extract counterparty addresses from inputs/outputs
|
|
131
|
+
# BlockCypher address endpoint gives txrefs but not full tx details
|
|
132
|
+
# We mark incoming as FUNDED_BY, outgoing as PAID_TO
|
|
133
|
+
connected = {}
|
|
134
|
+
for tx in txrefs[:MAX_TRANSACTIONS_PER_WALLET]:
|
|
135
|
+
value_btc = tx.get("value", 0) * sat_to_btc
|
|
136
|
+
tx_hash = tx.get("tx_hash", "")
|
|
137
|
+
|
|
138
|
+
# tx_input_n >= 0 means this address was an input (sending)
|
|
139
|
+
if tx.get("tx_input_n", -1) >= 0:
|
|
140
|
+
direction = "sent" # We sent to someone
|
|
141
|
+
else:
|
|
142
|
+
direction = "received" # Someone sent to us
|
|
143
|
+
|
|
144
|
+
# Note: Getting actual counterparty addresses requires
|
|
145
|
+
# fetching the full transaction. For free tier, we'll simple-link
|
|
146
|
+
if value_btc > 0.001 and tx_hash: # Only significant transactions
|
|
147
|
+
connected[tx_hash] = {
|
|
148
|
+
"direction": direction,
|
|
149
|
+
"amount": round(value_btc, 8),
|
|
150
|
+
"tx_hash": tx_hash,
|
|
151
|
+
"confirmed": tx.get("confirmed", ""),
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
result["connected_addresses"] = list(connected.values())[:MAX_CONNECTED_ADDRESSES]
|
|
155
|
+
|
|
156
|
+
except asyncio.TimeoutError:
|
|
157
|
+
result["error"] = "timeout"
|
|
158
|
+
logger.warning(f"BlockCypher timeout for {address[:12]}...")
|
|
159
|
+
except Exception as e:
|
|
160
|
+
result["error"] = str(e)[:100]
|
|
161
|
+
logger.warning(f"BlockCypher lookup failed for {address[:12]}...: {e}")
|
|
162
|
+
|
|
163
|
+
return result
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
async def lookup_ethereum_address(
|
|
167
|
+
address: str,
|
|
168
|
+
api_key: str = "",
|
|
169
|
+
) -> dict:
|
|
170
|
+
"""
|
|
171
|
+
Look up Ethereum address via Etherscan API.
|
|
172
|
+
"""
|
|
173
|
+
result = {
|
|
174
|
+
"address": address,
|
|
175
|
+
"wallet_type": ETHEREUM_ADDRESS,
|
|
176
|
+
"balance_eth": 0.0,
|
|
177
|
+
"transaction_count": 0,
|
|
178
|
+
"first_seen": None,
|
|
179
|
+
"last_seen": None,
|
|
180
|
+
"connected_addresses": [],
|
|
181
|
+
"lookup_successful": False,
|
|
182
|
+
"error": None,
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
if not api_key:
|
|
186
|
+
result["error"] = "no_api_key"
|
|
187
|
+
return result
|
|
188
|
+
|
|
189
|
+
try:
|
|
190
|
+
connector = aiohttp.TCPConnector(ssl=True)
|
|
191
|
+
timeout = aiohttp.ClientTimeout(total=15)
|
|
192
|
+
async with aiohttp.ClientSession(connector=connector, timeout=timeout) as session:
|
|
193
|
+
|
|
194
|
+
# Get balance
|
|
195
|
+
async with session.get(
|
|
196
|
+
ETHERSCAN_BASE,
|
|
197
|
+
params={
|
|
198
|
+
"module": "account",
|
|
199
|
+
"action": "balance",
|
|
200
|
+
"address": address,
|
|
201
|
+
"tag": "latest",
|
|
202
|
+
"apikey": api_key,
|
|
203
|
+
},
|
|
204
|
+
) as resp:
|
|
205
|
+
if resp.status == 200:
|
|
206
|
+
data = await resp.json()
|
|
207
|
+
if data.get("status") == "1":
|
|
208
|
+
wei_to_eth = 1 / 10**18
|
|
209
|
+
result["balance_eth"] = int(data.get("result", 0)) * wei_to_eth
|
|
210
|
+
|
|
211
|
+
# Get transactions
|
|
212
|
+
async with session.get(
|
|
213
|
+
ETHERSCAN_BASE,
|
|
214
|
+
params={
|
|
215
|
+
"module": "account",
|
|
216
|
+
"action": "txlist",
|
|
217
|
+
"address": address,
|
|
218
|
+
"startblock": 0,
|
|
219
|
+
"endblock": 99999999,
|
|
220
|
+
"page": 1,
|
|
221
|
+
"offset": MAX_TRANSACTIONS_PER_WALLET,
|
|
222
|
+
"sort": "desc",
|
|
223
|
+
"apikey": api_key,
|
|
224
|
+
},
|
|
225
|
+
) as resp:
|
|
226
|
+
if resp.status == 200:
|
|
227
|
+
data = await resp.json()
|
|
228
|
+
if data.get("status") == "1":
|
|
229
|
+
txs = data.get("result", [])
|
|
230
|
+
result["transaction_count"] = len(txs)
|
|
231
|
+
result["lookup_successful"] = True
|
|
232
|
+
|
|
233
|
+
wei_to_eth = 1 / 10**18
|
|
234
|
+
connected = []
|
|
235
|
+
|
|
236
|
+
for tx in txs:
|
|
237
|
+
is_incoming = tx.get("to", "").lower() == address.lower()
|
|
238
|
+
counterparty = tx.get("from") if is_incoming else tx.get("to")
|
|
239
|
+
value_eth = int(tx.get("value", 0)) * wei_to_eth
|
|
240
|
+
|
|
241
|
+
if counterparty and counterparty.lower() != address.lower() and value_eth > 0.001:
|
|
242
|
+
connected.append({
|
|
243
|
+
"address": counterparty,
|
|
244
|
+
"direction": "received" if is_incoming else "sent",
|
|
245
|
+
"amount": round(value_eth, 6),
|
|
246
|
+
"tx_hash": tx.get("hash", ""),
|
|
247
|
+
"confirmed": tx.get("timeStamp", ""),
|
|
248
|
+
})
|
|
249
|
+
|
|
250
|
+
result["connected_addresses"] = connected[:MAX_CONNECTED_ADDRESSES]
|
|
251
|
+
|
|
252
|
+
if txs:
|
|
253
|
+
# Sorted desc, so first is newest
|
|
254
|
+
result["last_seen"] = txs[0].get("timeStamp", "")
|
|
255
|
+
result["first_seen"] = txs[-1].get("timeStamp", "")
|
|
256
|
+
|
|
257
|
+
except asyncio.TimeoutError:
|
|
258
|
+
result["error"] = "timeout"
|
|
259
|
+
except Exception as e:
|
|
260
|
+
result["error"] = str(e)[:100]
|
|
261
|
+
logger.warning(f"Etherscan lookup failed for {address[:12]}...: {e}")
|
|
262
|
+
|
|
263
|
+
return result
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
async def lookup_wallet(
|
|
267
|
+
address: str,
|
|
268
|
+
blockcypher_token: str = "",
|
|
269
|
+
etherscan_key: str = "",
|
|
270
|
+
) -> dict:
|
|
271
|
+
"""
|
|
272
|
+
Unified wallet lookup. Detects type and routes to correct API.
|
|
273
|
+
"""
|
|
274
|
+
wallet_type = detect_wallet_type(address)
|
|
275
|
+
|
|
276
|
+
if wallet_type == BITCOIN_ADDRESS:
|
|
277
|
+
return await lookup_bitcoin_address(address, blockcypher_token)
|
|
278
|
+
elif wallet_type == ETHEREUM_ADDRESS:
|
|
279
|
+
return await lookup_ethereum_address(address, etherscan_key)
|
|
280
|
+
elif wallet_type == MONERO_ADDRESS:
|
|
281
|
+
return {
|
|
282
|
+
"address": address,
|
|
283
|
+
"wallet_type": MONERO_ADDRESS,
|
|
284
|
+
"lookup_successful": False,
|
|
285
|
+
"error": "monero_privacy_coin",
|
|
286
|
+
"note": "Monero transactions are private and cannot be looked up without view key",
|
|
287
|
+
}
|
|
288
|
+
else:
|
|
289
|
+
return {
|
|
290
|
+
"address": address,
|
|
291
|
+
"wallet_type": "unknown",
|
|
292
|
+
"lookup_successful": False,
|
|
293
|
+
"error": "unrecognized_format",
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
async def enrich_wallets_for_investigation(
|
|
298
|
+
investigation_id: uuid.UUID,
|
|
299
|
+
session: Any,
|
|
300
|
+
blockcypher_token: str = "",
|
|
301
|
+
etherscan_key: str = "",
|
|
302
|
+
max_wallets: int = 10,
|
|
303
|
+
) -> dict:
|
|
304
|
+
"""
|
|
305
|
+
For all crypto wallet entities in an investigation:
|
|
306
|
+
1. Look up transaction data from blockchain APIs
|
|
307
|
+
2. Store transaction metadata on the entity
|
|
308
|
+
3. Create connected address entities and PAID_TO/FUNDED_BY relationships
|
|
309
|
+
"""
|
|
310
|
+
stats = {
|
|
311
|
+
"wallets_looked_up": 0,
|
|
312
|
+
"successful_lookups": 0,
|
|
313
|
+
"edges_created": 0,
|
|
314
|
+
"connected_wallets_found": 0,
|
|
315
|
+
"errors": 0,
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
# Get all wallet entities for this investigation
|
|
319
|
+
wallets = (
|
|
320
|
+
session.query(Entity)
|
|
321
|
+
.filter(
|
|
322
|
+
Entity.investigation_id == investigation_id,
|
|
323
|
+
Entity.entity_type.in_([BITCOIN_ADDRESS, ETHEREUM_ADDRESS, MONERO_ADDRESS]),
|
|
324
|
+
Entity.value.isnot(None),
|
|
325
|
+
)
|
|
326
|
+
.limit(max_wallets)
|
|
327
|
+
.all()
|
|
328
|
+
)
|
|
329
|
+
|
|
330
|
+
if not wallets:
|
|
331
|
+
return stats
|
|
332
|
+
|
|
333
|
+
logger.warning(f"Blockchain enrichment: {len(wallets)} wallets to process")
|
|
334
|
+
|
|
335
|
+
for wallet_entity in wallets:
|
|
336
|
+
address = wallet_entity.value.strip()
|
|
337
|
+
stats["wallets_looked_up"] += 1
|
|
338
|
+
|
|
339
|
+
try:
|
|
340
|
+
lookup_result = await lookup_wallet(
|
|
341
|
+
address=address,
|
|
342
|
+
blockcypher_token=blockcypher_token,
|
|
343
|
+
etherscan_key=etherscan_key,
|
|
344
|
+
)
|
|
345
|
+
|
|
346
|
+
if not lookup_result.get("lookup_successful"):
|
|
347
|
+
if lookup_result.get("error") != "monero_privacy_coin":
|
|
348
|
+
stats["errors"] += 1
|
|
349
|
+
continue
|
|
350
|
+
|
|
351
|
+
stats["successful_lookups"] += 1
|
|
352
|
+
|
|
353
|
+
# Update entity historical_context with financial summary
|
|
354
|
+
wallet_type = lookup_result.get("wallet_type", "")
|
|
355
|
+
tx_count = lookup_result.get("transaction_count", 0)
|
|
356
|
+
|
|
357
|
+
if wallet_type == BITCOIN_ADDRESS:
|
|
358
|
+
balance = lookup_result.get("balance_btc", 0)
|
|
359
|
+
summary = f"BTC Balance: {balance:.4f} BTC, Transactions: {tx_count}"
|
|
360
|
+
else:
|
|
361
|
+
balance = lookup_result.get("balance_eth", 0)
|
|
362
|
+
summary = f"ETH Balance: {balance:.4f} ETH, Transactions: {tx_count}"
|
|
363
|
+
|
|
364
|
+
if not wallet_entity.historical_context:
|
|
365
|
+
wallet_entity.historical_context = summary
|
|
366
|
+
|
|
367
|
+
# Update first_seen if available
|
|
368
|
+
first_seen_val = lookup_result.get("first_seen")
|
|
369
|
+
if first_seen_val and not wallet_entity.first_seen:
|
|
370
|
+
try:
|
|
371
|
+
if isinstance(first_seen_val, int) or (isinstance(first_seen_val, str) and first_seen_val.isdigit()):
|
|
372
|
+
wallet_entity.first_seen = datetime.fromtimestamp(int(first_seen_val), tz=timezone.utc)
|
|
373
|
+
elif isinstance(first_seen_val, str):
|
|
374
|
+
# BlockCypher ISO format
|
|
375
|
+
wallet_entity.first_seen = datetime.fromisoformat(first_seen_val.replace("Z", "+00:00"))
|
|
376
|
+
except Exception:
|
|
377
|
+
pass
|
|
378
|
+
|
|
379
|
+
# Process connected addresses
|
|
380
|
+
connected = lookup_result.get("connected_addresses", [])
|
|
381
|
+
for conn in connected:
|
|
382
|
+
conn_address = conn.get("address")
|
|
383
|
+
if not conn_address or conn_address.lower() == address.lower():
|
|
384
|
+
continue
|
|
385
|
+
|
|
386
|
+
# Detect type for counterparty
|
|
387
|
+
conn_type = detect_wallet_type(conn_address) or wallet_type
|
|
388
|
+
|
|
389
|
+
# Create counterparty entity
|
|
390
|
+
conn_entity, _ = upsert_entity_canonical(
|
|
391
|
+
session=session,
|
|
392
|
+
investigation_id=investigation_id,
|
|
393
|
+
entity_type=conn_type,
|
|
394
|
+
entity_value=conn_address,
|
|
395
|
+
confidence=0.95,
|
|
396
|
+
context_snippet=f"Related to {address[:12]} via blockchain transaction",
|
|
397
|
+
extraction_method="blockchain_api",
|
|
398
|
+
)
|
|
399
|
+
stats["connected_wallets_found"] += 1
|
|
400
|
+
|
|
401
|
+
# Build Relationship
|
|
402
|
+
direction = conn.get("direction", "sent")
|
|
403
|
+
if direction == "received":
|
|
404
|
+
# conn -> us (FUNDED_BY) or conn PAID_TO us
|
|
405
|
+
source_id = conn_entity.id
|
|
406
|
+
target_id = wallet_entity.id
|
|
407
|
+
else:
|
|
408
|
+
# us -> conn (PAID_TO)
|
|
409
|
+
source_id = wallet_entity.id
|
|
410
|
+
target_id = conn_entity.id
|
|
411
|
+
|
|
412
|
+
# Check duplication
|
|
413
|
+
existing = session.query(EntityRelationship).filter_by(
|
|
414
|
+
entity_a_id=source_id,
|
|
415
|
+
entity_b_id=target_id,
|
|
416
|
+
relationship_type=RelationshipType.PAID_TO.value
|
|
417
|
+
).first()
|
|
418
|
+
|
|
419
|
+
if not existing:
|
|
420
|
+
rel = EntityRelationship(
|
|
421
|
+
entity_a_id=source_id,
|
|
422
|
+
entity_b_id=target_id,
|
|
423
|
+
relationship_type=RelationshipType.PAID_TO.value,
|
|
424
|
+
confidence=0.95,
|
|
425
|
+
metadata_json={
|
|
426
|
+
"amount": conn.get("amount"),
|
|
427
|
+
"currency": "BTC" if wallet_type == BITCOIN_ADDRESS else "ETH",
|
|
428
|
+
"tx_hash": conn.get("tx_hash"),
|
|
429
|
+
} if hasattr(EntityRelationship, "metadata_json") else None
|
|
430
|
+
)
|
|
431
|
+
session.add(rel)
|
|
432
|
+
stats["edges_created"] += 1
|
|
433
|
+
|
|
434
|
+
session.flush()
|
|
435
|
+
|
|
436
|
+
except Exception as e:
|
|
437
|
+
stats["errors"] += 1
|
|
438
|
+
logger.warning(f"Wallet enrichment failed for {address[:12]}: {e}")
|
|
439
|
+
|
|
440
|
+
# Respect rate limits
|
|
441
|
+
await asyncio.sleep(0.4)
|
|
442
|
+
|
|
443
|
+
session.commit()
|
|
444
|
+
return stats
|
sources/cache.py
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
"""
|
|
2
|
+
sources/cache.py — Simple file-based TTL cache for external feeds.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
import logging
|
|
9
|
+
import os
|
|
10
|
+
import time
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Any, Optional
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
_memory_cache: dict[str, tuple[Any, float]] = {}
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class CachedFeed:
|
|
20
|
+
"""
|
|
21
|
+
Fetch a remote JSON feed and cache it to *cache_path* for *ttl_seconds*.
|
|
22
|
+
|
|
23
|
+
On every call to :meth:`fetch`:
|
|
24
|
+
- If a fresh cache file exists (mtime < ttl_seconds), return cached data.
|
|
25
|
+
- If stale or missing: fetch from *url*, save, and return.
|
|
26
|
+
- If fetch fails but stale cache exists: return stale cache with a warning.
|
|
27
|
+
- If fetch fails and no cache exists: log error and return None.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
def __init__(self, url: str, cache_path: str, ttl_seconds: int):
|
|
31
|
+
self.url = url
|
|
32
|
+
self.cache_path = Path(cache_path)
|
|
33
|
+
self.ttl_seconds = ttl_seconds
|
|
34
|
+
|
|
35
|
+
def _is_fresh(self) -> bool:
|
|
36
|
+
if not self.cache_path.exists():
|
|
37
|
+
return False
|
|
38
|
+
mtime = self.cache_path.stat().st_mtime
|
|
39
|
+
return (time.time() - mtime) < self.ttl_seconds
|
|
40
|
+
|
|
41
|
+
async def fetch(self) -> Optional[dict | list]:
|
|
42
|
+
import aiohttp
|
|
43
|
+
|
|
44
|
+
now = time.time()
|
|
45
|
+
cache_key = self.url
|
|
46
|
+
|
|
47
|
+
if cache_key in _memory_cache:
|
|
48
|
+
cached_data, timestamp = _memory_cache[cache_key]
|
|
49
|
+
if (now - timestamp) < self.ttl_seconds:
|
|
50
|
+
return cached_data
|
|
51
|
+
|
|
52
|
+
if self._is_fresh():
|
|
53
|
+
try:
|
|
54
|
+
with self.cache_path.open("r", encoding="utf-8") as f:
|
|
55
|
+
data = json.load(f)
|
|
56
|
+
_memory_cache[cache_key] = (data, now)
|
|
57
|
+
return data
|
|
58
|
+
except Exception:
|
|
59
|
+
pass
|
|
60
|
+
|
|
61
|
+
try:
|
|
62
|
+
timeout = aiohttp.ClientTimeout(total=30)
|
|
63
|
+
async with aiohttp.ClientSession(timeout=timeout) as session:
|
|
64
|
+
async with session.get(self.url) as resp:
|
|
65
|
+
if resp.status != 200:
|
|
66
|
+
logger.warning("CachedFeed: HTTP %s for %s", resp.status, self.url)
|
|
67
|
+
stale = self._stale_cache()
|
|
68
|
+
if stale is not None and cache_key in _memory_cache:
|
|
69
|
+
return stale
|
|
70
|
+
return stale
|
|
71
|
+
data = await resp.json(content_type=None)
|
|
72
|
+
self.cache_path.parent.mkdir(parents=True, exist_ok=True)
|
|
73
|
+
with self.cache_path.open("w", encoding="utf-8") as f:
|
|
74
|
+
json.dump(data, f)
|
|
75
|
+
_memory_cache[cache_key] = (data, now)
|
|
76
|
+
return data
|
|
77
|
+
except Exception as e:
|
|
78
|
+
logger.warning("CachedFeed: fetch failed for %s: %s", self.url, e)
|
|
79
|
+
if cache_key in _memory_cache:
|
|
80
|
+
cached_data, timestamp = _memory_cache[cache_key]
|
|
81
|
+
logger.warning("CachedFeed: returning stale memory cache for %s", self.url)
|
|
82
|
+
return cached_data
|
|
83
|
+
return self._stale_cache()
|
|
84
|
+
|
|
85
|
+
def _stale_cache(self) -> Optional[dict | list]:
|
|
86
|
+
if self.cache_path.exists():
|
|
87
|
+
try:
|
|
88
|
+
with self.cache_path.open("r", encoding="utf-8") as f:
|
|
89
|
+
logger.warning("CachedFeed: falling back to stale cache %s", self.cache_path)
|
|
90
|
+
return json.load(f)
|
|
91
|
+
except Exception:
|
|
92
|
+
pass
|
|
93
|
+
return None
|
sources/cisa.py
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
"""
|
|
2
|
+
sources/cisa.py — CISA KEV catalog + CISA advisories feed enrichment.
|
|
3
|
+
|
|
4
|
+
Fetches two CISA feeds (clearnet, not through Tor):
|
|
5
|
+
1. Known Exploited Vulnerabilities (KEV) catalog — 24-hour TTL cache
|
|
6
|
+
2. Cybersecurity advisories index — 6-hour TTL cache
|
|
7
|
+
|
|
8
|
+
For CVE entities: checks if they appear in the KEV catalog and marks them
|
|
9
|
+
as actively exploited.
|
|
10
|
+
For THREAT_ACTOR / RANSOMWARE_GROUP / MALWARE_FAMILY entities: searches
|
|
11
|
+
advisory titles and tags for name matches.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import logging
|
|
17
|
+
from typing import Optional
|
|
18
|
+
|
|
19
|
+
from sources.cache import CachedFeed
|
|
20
|
+
|
|
21
|
+
logger = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
CISA_KEV_URL = "https://www.cisa.gov/sites/default/files/feeds/known_exploited_vulnerabilities.json"
|
|
24
|
+
CISA_ADVISORIES_URL = "https://www.cisa.gov/cybersecurity-advisories/all.json"
|
|
25
|
+
|
|
26
|
+
_KEV_CACHE = "/tmp/voidaccess_cisa_kev.json"
|
|
27
|
+
_ADVISORIES_CACHE = "/tmp/voidaccess_cisa_advisories.json"
|
|
28
|
+
|
|
29
|
+
_kev_feed = CachedFeed(CISA_KEV_URL, _KEV_CACHE, ttl_seconds=86400)
|
|
30
|
+
_adv_feed = CachedFeed(CISA_ADVISORIES_URL, _ADVISORIES_CACHE, ttl_seconds=21600)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
async def enrich_cisa_cve(cve_id: str) -> list[dict]:
|
|
34
|
+
"""
|
|
35
|
+
Check if *cve_id* appears in the CISA KEV catalog.
|
|
36
|
+
|
|
37
|
+
Returns a list with one EnrichmentResult dict if found, empty list otherwise.
|
|
38
|
+
"""
|
|
39
|
+
data = await _kev_feed.fetch()
|
|
40
|
+
if data is None:
|
|
41
|
+
return []
|
|
42
|
+
|
|
43
|
+
kev_list = data if isinstance(data, list) else data.get("vulnerabilities", [])
|
|
44
|
+
for entry in kev_list:
|
|
45
|
+
if (entry.get("cveID") or "").upper() == cve_id.upper():
|
|
46
|
+
return [{
|
|
47
|
+
"source": "cisa_kev",
|
|
48
|
+
"entity_type": "CVE_NUMBER",
|
|
49
|
+
"entity_value": cve_id,
|
|
50
|
+
"is_actively_exploited": True,
|
|
51
|
+
"vendor_project": entry.get("vendorProject", ""),
|
|
52
|
+
"product": entry.get("product", ""),
|
|
53
|
+
"vulnerability_name": entry.get("vulnerabilityName", ""),
|
|
54
|
+
"date_added": entry.get("dateAdded", ""),
|
|
55
|
+
"short_description": entry.get("shortDescription", ""),
|
|
56
|
+
}]
|
|
57
|
+
return []
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
async def enrich_cisa_advisories(entity_value: str, entity_type: str) -> list[dict]:
|
|
61
|
+
"""
|
|
62
|
+
Search CISA advisories for *entity_value* matching THREAT_ACTOR,
|
|
63
|
+
RANSOMWARE_GROUP, or MALWARE_FAMILY.
|
|
64
|
+
"""
|
|
65
|
+
if entity_type not in ("THREAT_ACTOR", "RANSOMWARE_GROUP", "MALWARE_FAMILY"):
|
|
66
|
+
return []
|
|
67
|
+
|
|
68
|
+
data = await _adv_feed.fetch()
|
|
69
|
+
if data is None:
|
|
70
|
+
return []
|
|
71
|
+
|
|
72
|
+
advisories = data if isinstance(data, list) else data.get("items", [])
|
|
73
|
+
results = []
|
|
74
|
+
q = entity_value.lower()
|
|
75
|
+
|
|
76
|
+
for adv in advisories:
|
|
77
|
+
title = (adv.get("title") or "").lower()
|
|
78
|
+
tags = " ".join(adv.get("tags") or []).lower()
|
|
79
|
+
if q in title or q in tags:
|
|
80
|
+
results.append({
|
|
81
|
+
"source": "cisa_advisory",
|
|
82
|
+
"entity_type": entity_type,
|
|
83
|
+
"entity_value": entity_value,
|
|
84
|
+
"advisory_title": adv.get("title", ""),
|
|
85
|
+
"advisory_url": adv.get("url", ""),
|
|
86
|
+
"advisory_date": adv.get("datePublished", ""),
|
|
87
|
+
})
|
|
88
|
+
return results
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
async def enrich_cisa(query: str, entities: list[dict]) -> list[dict]:
|
|
92
|
+
"""
|
|
93
|
+
Main entry point for CISA enrichment.
|
|
94
|
+
|
|
95
|
+
For each CVE entity, checks KEV.
|
|
96
|
+
For each THREAT_ACTOR / RANSOMWARE_GROUP / MALWARE_FAMILY entity, searches advisories.
|
|
97
|
+
"""
|
|
98
|
+
results: list[dict] = []
|
|
99
|
+
for ent in entities:
|
|
100
|
+
et = ent.get("type") or ent.get("entity_type", "")
|
|
101
|
+
ev = ent.get("value") or ent.get("entity_value", "")
|
|
102
|
+
|
|
103
|
+
if et == "CVE_NUMBER" and ev:
|
|
104
|
+
results.extend(await enrich_cisa_cve(ev))
|
|
105
|
+
elif et in ("THREAT_ACTOR", "RANSOMWARE_GROUP", "MALWARE_FAMILY") and ev:
|
|
106
|
+
results.extend(await enrich_cisa_advisories(ev, et))
|
|
107
|
+
|
|
108
|
+
return results
|