voidaccess 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- analysis/__init__.py +49 -0
- analysis/opsec.py +454 -0
- analysis/patterns.py +202 -0
- analysis/temporal.py +201 -0
- api/__init__.py +1 -0
- api/auth.py +163 -0
- api/main.py +509 -0
- api/routes/__init__.py +1 -0
- api/routes/admin.py +214 -0
- api/routes/auth.py +157 -0
- api/routes/entities.py +871 -0
- api/routes/export.py +359 -0
- api/routes/investigations.py +2567 -0
- api/routes/monitors.py +405 -0
- api/routes/search.py +157 -0
- api/routes/settings.py +851 -0
- auth/__init__.py +1 -0
- auth/token_blacklist.py +108 -0
- cli/__init__.py +3 -0
- cli/adapters/__init__.py +1 -0
- cli/adapters/sqlite.py +273 -0
- cli/browser.py +376 -0
- cli/commands/__init__.py +1 -0
- cli/commands/configure.py +185 -0
- cli/commands/enrich.py +154 -0
- cli/commands/export.py +158 -0
- cli/commands/investigate.py +601 -0
- cli/commands/show.py +87 -0
- cli/config.py +180 -0
- cli/display.py +212 -0
- cli/main.py +154 -0
- cli/tor_detect.py +71 -0
- config.py +180 -0
- crawler/__init__.py +28 -0
- crawler/dedup.py +97 -0
- crawler/frontier.py +115 -0
- crawler/spider.py +462 -0
- crawler/utils.py +122 -0
- db/__init__.py +47 -0
- db/migrations/__init__.py +0 -0
- db/migrations/env.py +80 -0
- db/migrations/versions/0001_initial_schema.py +270 -0
- db/migrations/versions/0002_add_investigation_status_column.py +27 -0
- db/migrations/versions/0002_add_missing_tables.py +33 -0
- db/migrations/versions/0003_add_canonical_value_and_entity_links.py +61 -0
- db/migrations/versions/0004_add_page_posted_at.py +41 -0
- db/migrations/versions/0005_add_extraction_method.py +32 -0
- db/migrations/versions/0006_add_monitor_alerts.py +26 -0
- db/migrations/versions/0007_add_actor_style_profiles.py +23 -0
- db/migrations/versions/0008_add_users_table.py +47 -0
- db/migrations/versions/0009_add_investigation_id_to_relationships.py +29 -0
- db/migrations/versions/0010_add_composite_index_entity_relationships.py +22 -0
- db/migrations/versions/0011_add_page_extraction_cache.py +52 -0
- db/migrations/versions/0013_add_graph_status.py +31 -0
- db/migrations/versions/0015_add_progress_fields.py +41 -0
- db/migrations/versions/0016_backfill_graph_status.py +33 -0
- db/migrations/versions/0017_add_user_api_keys.py +44 -0
- db/migrations/versions/0018_add_user_id_to_investigations.py +33 -0
- db/migrations/versions/0019_add_content_safety_log.py +46 -0
- db/migrations/versions/0020_add_entity_source_tracking.py +50 -0
- db/models.py +618 -0
- db/queries.py +841 -0
- db/session.py +270 -0
- export/__init__.py +34 -0
- export/misp.py +257 -0
- export/sigma.py +342 -0
- export/stix.py +418 -0
- extractor/__init__.py +21 -0
- extractor/llm_extract.py +372 -0
- extractor/ner.py +512 -0
- extractor/normalizer.py +638 -0
- extractor/pipeline.py +401 -0
- extractor/regex_patterns.py +325 -0
- fingerprint/__init__.py +33 -0
- fingerprint/profiler.py +240 -0
- fingerprint/stylometry.py +249 -0
- graph/__init__.py +73 -0
- graph/builder.py +894 -0
- graph/export.py +225 -0
- graph/model.py +83 -0
- graph/queries.py +297 -0
- graph/visualize.py +178 -0
- i18n/__init__.py +24 -0
- i18n/detect.py +76 -0
- i18n/query_expand.py +72 -0
- i18n/translate.py +210 -0
- monitor/__init__.py +27 -0
- monitor/_db.py +74 -0
- monitor/alerts.py +345 -0
- monitor/config.py +118 -0
- monitor/diff.py +75 -0
- monitor/jobs.py +247 -0
- monitor/scheduler.py +184 -0
- scraper/__init__.py +0 -0
- scraper/scrape.py +857 -0
- scraper/scrape_js.py +272 -0
- search/__init__.py +318 -0
- search/circuit_breaker.py +240 -0
- search/search.py +334 -0
- sources/__init__.py +96 -0
- sources/blockchain.py +444 -0
- sources/cache.py +93 -0
- sources/cisa.py +108 -0
- sources/dns_enrichment.py +557 -0
- sources/domain_reputation.py +643 -0
- sources/email_reputation.py +635 -0
- sources/engines.py +244 -0
- sources/enrichment.py +1244 -0
- sources/github_scraper.py +589 -0
- sources/gitlab_scraper.py +624 -0
- sources/hash_reputation.py +856 -0
- sources/historical_intel.py +253 -0
- sources/ip_reputation.py +521 -0
- sources/paste_scraper.py +484 -0
- sources/pastes.py +278 -0
- sources/rss_scraper.py +576 -0
- sources/seed_manager.py +373 -0
- sources/seeds.py +368 -0
- sources/shodan.py +103 -0
- sources/telegram.py +199 -0
- sources/virustotal.py +113 -0
- utils/__init__.py +0 -0
- utils/async_utils.py +89 -0
- utils/content_safety.py +193 -0
- utils/defang.py +94 -0
- utils/encryption.py +34 -0
- utils/ioc_freshness.py +124 -0
- utils/user_keys.py +33 -0
- vector/__init__.py +39 -0
- vector/embedder.py +100 -0
- vector/model_singleton.py +49 -0
- vector/search.py +87 -0
- vector/store.py +514 -0
- voidaccess/__init__.py +0 -0
- voidaccess/llm.py +717 -0
- voidaccess/llm_utils.py +696 -0
- voidaccess-1.3.0.dist-info/METADATA +395 -0
- voidaccess-1.3.0.dist-info/RECORD +142 -0
- voidaccess-1.3.0.dist-info/WHEEL +5 -0
- voidaccess-1.3.0.dist-info/entry_points.txt +2 -0
- voidaccess-1.3.0.dist-info/licenses/LICENSE +21 -0
- voidaccess-1.3.0.dist-info/top_level.txt +19 -0
sources/ip_reputation.py
ADDED
|
@@ -0,0 +1,521 @@
|
|
|
1
|
+
"""
|
|
2
|
+
sources/ip_reputation.py — IP reputation enrichment.
|
|
3
|
+
|
|
4
|
+
Checks extracted IP addresses against four sources:
|
|
5
|
+
- Feodo Tracker (abuse.ch): confirmed C2 IPs for banking trojans/ransomware loaders
|
|
6
|
+
- C2IntelFeeds (montysecurity/C2-Tracker): framework-specific C2 IPs
|
|
7
|
+
- AbuseIPDB: community abuse reports (requires ABUSEIPDB_API_KEY)
|
|
8
|
+
- GreyNoise: scanner classification (requires GREYNOISE_API_KEY)
|
|
9
|
+
|
|
10
|
+
GreyNoise "benign" IPs (known legitimate scanners) are SUPPRESSED from results.
|
|
11
|
+
All other sources run without API keys — Feodo and C2IntelFeeds are fully public.
|
|
12
|
+
|
|
13
|
+
Public interface
|
|
14
|
+
----------------
|
|
15
|
+
async load_feodo_feed() → dict[ip, malware_family]
|
|
16
|
+
async load_c2_feeds() → dict[framework, set[ip]]
|
|
17
|
+
async check_ip_reputation(ip, base_conf) → dict with suppress/tags/threat_confidence
|
|
18
|
+
async enrich_ip_entities(extraction_results, investigation_id) → (results, stats)
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
import asyncio
|
|
24
|
+
import csv
|
|
25
|
+
import ipaddress
|
|
26
|
+
import json
|
|
27
|
+
import logging
|
|
28
|
+
import os
|
|
29
|
+
import time
|
|
30
|
+
from typing import Any
|
|
31
|
+
|
|
32
|
+
import aiohttp
|
|
33
|
+
|
|
34
|
+
logger = logging.getLogger(__name__)
|
|
35
|
+
|
|
36
|
+
MAX_IPS = 50
|
|
37
|
+
|
|
38
|
+
FEODO_CSV_URL = "https://feodotracker.abuse.ch/downloads/ipblocklist.csv"
|
|
39
|
+
|
|
40
|
+
C2_FEED_URLS: dict[str, str] = {
|
|
41
|
+
"cobalt_strike": "https://raw.githubusercontent.com/montysecurity/C2-Tracker/main/data/Cobalt%20Strike%20C2%20IPs.txt",
|
|
42
|
+
"sliver": "https://raw.githubusercontent.com/montysecurity/C2-Tracker/main/data/Sliver%20C2%20IPs.txt",
|
|
43
|
+
"metasploit": "https://raw.githubusercontent.com/montysecurity/C2-Tracker/main/data/Metasploit%20Framework%20C2%20IPs.txt",
|
|
44
|
+
"brute_ratel": "https://raw.githubusercontent.com/montysecurity/C2-Tracker/main/data/Brute%20Ratel%20C4%20IPs.txt",
|
|
45
|
+
"posh_c2": "https://raw.githubusercontent.com/montysecurity/C2-Tracker/main/data/Posh%20C2%20IPs.txt",
|
|
46
|
+
"havoc": "https://raw.githubusercontent.com/montysecurity/C2-Tracker/main/data/Havoc%20C2%20IPs.txt",
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
# In-memory feed caches (module-level singletons, refreshed on TTL expiry)
|
|
50
|
+
_feed_cache: dict[str, dict] = {
|
|
51
|
+
"feodo": {"ips": {}, "loaded_at": 0.0},
|
|
52
|
+
"c2feeds": {"ips": {}, "loaded_at": 0.0},
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
# ---------------------------------------------------------------------------
|
|
57
|
+
# Helpers
|
|
58
|
+
# ---------------------------------------------------------------------------
|
|
59
|
+
|
|
60
|
+
def _feed_ttl_seconds() -> float:
|
|
61
|
+
try:
|
|
62
|
+
hours = float(os.getenv("C2_FEED_CACHE_TTL", "24"))
|
|
63
|
+
except ValueError:
|
|
64
|
+
hours = 24.0
|
|
65
|
+
return hours * 3600.0
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def is_private_ip(ip: str) -> bool:
|
|
69
|
+
"""Return True if *ip* is private, loopback, link-local, or reserved."""
|
|
70
|
+
try:
|
|
71
|
+
addr = ipaddress.ip_address(ip.strip())
|
|
72
|
+
return (
|
|
73
|
+
addr.is_private
|
|
74
|
+
or addr.is_loopback
|
|
75
|
+
or addr.is_reserved
|
|
76
|
+
or addr.is_link_local
|
|
77
|
+
or addr.is_multicast
|
|
78
|
+
or addr.is_unspecified
|
|
79
|
+
)
|
|
80
|
+
except ValueError:
|
|
81
|
+
return False
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _parse_feodo_csv(csv_text: str) -> dict[str, str]:
|
|
85
|
+
"""Parse Feodo Tracker ipblocklist.csv → {ip: malware_family}."""
|
|
86
|
+
result: dict[str, str] = {}
|
|
87
|
+
# Strip comment lines; the first non-comment line is the CSV header
|
|
88
|
+
lines = [
|
|
89
|
+
line for line in csv_text.splitlines()
|
|
90
|
+
if line.strip() and not line.strip().startswith("#")
|
|
91
|
+
]
|
|
92
|
+
if not lines:
|
|
93
|
+
return result
|
|
94
|
+
try:
|
|
95
|
+
reader = csv.DictReader(lines)
|
|
96
|
+
for row in reader:
|
|
97
|
+
ip = (row.get("dst_ip") or row.get("ip_address") or "").strip()
|
|
98
|
+
malware = (row.get("malware") or row.get("malware_family") or "").strip()
|
|
99
|
+
if ip:
|
|
100
|
+
result[ip] = malware or "unknown"
|
|
101
|
+
except Exception as exc:
|
|
102
|
+
logger.warning("ip_reputation: Feodo CSV parse error: %s", exc)
|
|
103
|
+
return result
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def _parse_c2_txt(text: str) -> set[str]:
|
|
107
|
+
"""Parse a plain-text C2 IP list (one entry per line, optional comments)."""
|
|
108
|
+
ips: set[str] = set()
|
|
109
|
+
for line in text.splitlines():
|
|
110
|
+
line = line.strip()
|
|
111
|
+
if not line or line.startswith("#"):
|
|
112
|
+
continue
|
|
113
|
+
# Strip port suffix (1.2.3.4:8080 → 1.2.3.4) and CIDR (/32)
|
|
114
|
+
ip = line.split(":")[0].split("/")[0].strip()
|
|
115
|
+
if ip:
|
|
116
|
+
ips.add(ip)
|
|
117
|
+
return ips
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
# ---------------------------------------------------------------------------
|
|
121
|
+
# Feed loaders (cached, refreshed on TTL expiry)
|
|
122
|
+
# ---------------------------------------------------------------------------
|
|
123
|
+
|
|
124
|
+
async def load_feodo_feed() -> dict[str, str]:
|
|
125
|
+
"""Fetch and cache Feodo Tracker blocklist. Returns {ip: malware_family}."""
|
|
126
|
+
cache = _feed_cache["feodo"]
|
|
127
|
+
if time.time() - cache["loaded_at"] < _feed_ttl_seconds() and cache["ips"]:
|
|
128
|
+
return cache["ips"] # type: ignore[return-value]
|
|
129
|
+
|
|
130
|
+
logger.info("ip_reputation: Refreshing Feodo Tracker feed")
|
|
131
|
+
try:
|
|
132
|
+
timeout = aiohttp.ClientTimeout(total=30)
|
|
133
|
+
async with aiohttp.ClientSession(timeout=timeout) as session:
|
|
134
|
+
async with session.get(FEODO_CSV_URL) as resp:
|
|
135
|
+
if resp.status != 200:
|
|
136
|
+
logger.warning("ip_reputation: Feodo returned HTTP %s", resp.status)
|
|
137
|
+
return dict(cache["ips"])
|
|
138
|
+
text = await resp.text()
|
|
139
|
+
|
|
140
|
+
parsed = _parse_feodo_csv(text)
|
|
141
|
+
cache["ips"] = parsed
|
|
142
|
+
cache["loaded_at"] = time.time()
|
|
143
|
+
logger.info("ip_reputation: Feodo Tracker: %d C2 IPs loaded", len(parsed))
|
|
144
|
+
return parsed
|
|
145
|
+
except Exception as exc:
|
|
146
|
+
logger.warning("ip_reputation: Feodo fetch failed: %s", exc)
|
|
147
|
+
return dict(cache["ips"])
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
async def load_c2_feeds() -> dict[str, set[str]]:
|
|
151
|
+
"""Fetch and cache all C2IntelFeeds. Returns {framework: set_of_ips}."""
|
|
152
|
+
cache = _feed_cache["c2feeds"]
|
|
153
|
+
if time.time() - cache["loaded_at"] < _feed_ttl_seconds() and cache["ips"]:
|
|
154
|
+
return cache["ips"] # type: ignore[return-value]
|
|
155
|
+
|
|
156
|
+
logger.info("ip_reputation: Refreshing C2IntelFeeds")
|
|
157
|
+
|
|
158
|
+
async def _fetch_one(framework: str, url: str) -> tuple[str, set[str]]:
|
|
159
|
+
try:
|
|
160
|
+
timeout = aiohttp.ClientTimeout(total=20)
|
|
161
|
+
async with aiohttp.ClientSession(timeout=timeout) as session:
|
|
162
|
+
async with session.get(url) as resp:
|
|
163
|
+
if resp.status != 200:
|
|
164
|
+
logger.debug("ip_reputation: C2Feed %s → HTTP %s", framework, resp.status)
|
|
165
|
+
return framework, set()
|
|
166
|
+
text = await resp.text()
|
|
167
|
+
return framework, _parse_c2_txt(text)
|
|
168
|
+
except Exception as exc:
|
|
169
|
+
logger.debug("ip_reputation: C2Feed %s fetch failed: %s", framework, exc)
|
|
170
|
+
return framework, set()
|
|
171
|
+
|
|
172
|
+
fetched = await asyncio.gather(*[_fetch_one(fw, url) for fw, url in C2_FEED_URLS.items()])
|
|
173
|
+
|
|
174
|
+
results: dict[str, set[str]] = {}
|
|
175
|
+
for framework, ips in fetched:
|
|
176
|
+
results[framework] = ips
|
|
177
|
+
logger.info("ip_reputation: C2Feed %-14s %d IPs", framework, len(ips))
|
|
178
|
+
|
|
179
|
+
cache["ips"] = results
|
|
180
|
+
cache["loaded_at"] = time.time()
|
|
181
|
+
return results
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
# ---------------------------------------------------------------------------
|
|
185
|
+
# External API checks
|
|
186
|
+
# ---------------------------------------------------------------------------
|
|
187
|
+
|
|
188
|
+
async def _check_abuseipdb(ip: str, api_key: str) -> dict:
|
|
189
|
+
"""Query AbuseIPDB v2 /check. Returns parsed response or {}."""
|
|
190
|
+
try:
|
|
191
|
+
headers = {"Key": api_key, "Accept": "application/json"}
|
|
192
|
+
params = {"ipAddress": ip, "maxAgeInDays": 90}
|
|
193
|
+
timeout = aiohttp.ClientTimeout(total=15)
|
|
194
|
+
async with aiohttp.ClientSession(timeout=timeout) as session:
|
|
195
|
+
async with session.get(
|
|
196
|
+
"https://api.abuseipdb.com/api/v2/check",
|
|
197
|
+
headers=headers,
|
|
198
|
+
params=params,
|
|
199
|
+
) as resp:
|
|
200
|
+
if resp.status != 200:
|
|
201
|
+
logger.debug("ip_reputation: AbuseIPDB → HTTP %s for %s", resp.status, ip)
|
|
202
|
+
return {}
|
|
203
|
+
return await resp.json()
|
|
204
|
+
except Exception as exc:
|
|
205
|
+
logger.debug("ip_reputation: AbuseIPDB check failed for %s: %s", ip, exc)
|
|
206
|
+
return {}
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
async def _check_greynoise(ip: str, api_key: str) -> dict:
|
|
210
|
+
"""Query GreyNoise community API. Returns parsed response or {}."""
|
|
211
|
+
try:
|
|
212
|
+
headers = {"key": api_key}
|
|
213
|
+
timeout = aiohttp.ClientTimeout(total=15)
|
|
214
|
+
async with aiohttp.ClientSession(timeout=timeout) as session:
|
|
215
|
+
async with session.get(
|
|
216
|
+
f"https://api.greynoise.io/v3/community/{ip}",
|
|
217
|
+
headers=headers,
|
|
218
|
+
) as resp:
|
|
219
|
+
if resp.status == 404:
|
|
220
|
+
return {"classification": "unknown"}
|
|
221
|
+
if resp.status != 200:
|
|
222
|
+
logger.debug("ip_reputation: GreyNoise → HTTP %s for %s", resp.status, ip)
|
|
223
|
+
return {}
|
|
224
|
+
return await resp.json()
|
|
225
|
+
except Exception as exc:
|
|
226
|
+
logger.debug("ip_reputation: GreyNoise check failed for %s: %s", ip, exc)
|
|
227
|
+
return {}
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
# ---------------------------------------------------------------------------
|
|
231
|
+
# Core reputation check
|
|
232
|
+
# ---------------------------------------------------------------------------
|
|
233
|
+
|
|
234
|
+
async def check_ip_reputation(
|
|
235
|
+
ip: str,
|
|
236
|
+
base_confidence: float = 1.0,
|
|
237
|
+
) -> dict[str, Any]:
|
|
238
|
+
"""
|
|
239
|
+
Run all four reputation checks for a single IP address.
|
|
240
|
+
|
|
241
|
+
Returns a dict with keys:
|
|
242
|
+
ip, feodo_hit, feodo_malware, c2feed_hit, c2feed_framework,
|
|
243
|
+
abuseipdb_score, abuseipdb_categories, greynoise_classification,
|
|
244
|
+
suppress, tags, threat_confidence
|
|
245
|
+
"""
|
|
246
|
+
result: dict[str, Any] = {
|
|
247
|
+
"ip": ip,
|
|
248
|
+
"feodo_hit": False,
|
|
249
|
+
"feodo_malware": None,
|
|
250
|
+
"c2feed_hit": False,
|
|
251
|
+
"c2feed_framework": None,
|
|
252
|
+
"abuseipdb_score": None,
|
|
253
|
+
"abuseipdb_categories": [],
|
|
254
|
+
"greynoise_classification": None,
|
|
255
|
+
"suppress": False,
|
|
256
|
+
"tags": [],
|
|
257
|
+
"threat_confidence": base_confidence,
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
if is_private_ip(ip):
|
|
261
|
+
return result
|
|
262
|
+
|
|
263
|
+
abuseipdb_key = (os.getenv("ABUSEIPDB_API_KEY") or "").strip()
|
|
264
|
+
greynoise_key = (os.getenv("GREYNOISE_API_KEY") or "").strip()
|
|
265
|
+
|
|
266
|
+
# Load local feeds (both are cached — near-instant after first load)
|
|
267
|
+
feodo_data, c2feeds_data = await asyncio.gather(
|
|
268
|
+
load_feodo_feed(),
|
|
269
|
+
load_c2_feeds(),
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
# --- Feodo Tracker check ---
|
|
273
|
+
if ip in feodo_data:
|
|
274
|
+
malware = feodo_data[ip]
|
|
275
|
+
result["feodo_hit"] = True
|
|
276
|
+
result["feodo_malware"] = malware
|
|
277
|
+
result["tags"].append("confirmed_c2")
|
|
278
|
+
if malware and malware.lower() != "unknown":
|
|
279
|
+
slug = malware.lower().replace(" ", "_").replace("-", "_")
|
|
280
|
+
result["tags"].append(f"confirmed_c2_{slug}")
|
|
281
|
+
|
|
282
|
+
# --- C2IntelFeeds check ---
|
|
283
|
+
for framework, ips in c2feeds_data.items():
|
|
284
|
+
if ip in ips:
|
|
285
|
+
result["c2feed_hit"] = True
|
|
286
|
+
result["c2feed_framework"] = framework
|
|
287
|
+
if "confirmed_c2" not in result["tags"]:
|
|
288
|
+
result["tags"].append("confirmed_c2")
|
|
289
|
+
result["tags"].append(f"confirmed_c2_{framework}")
|
|
290
|
+
break
|
|
291
|
+
|
|
292
|
+
# --- AbuseIPDB check ---
|
|
293
|
+
if abuseipdb_key:
|
|
294
|
+
abuse_resp = await _check_abuseipdb(ip, abuseipdb_key)
|
|
295
|
+
if abuse_resp:
|
|
296
|
+
data = abuse_resp.get("data", {})
|
|
297
|
+
score = data.get("abuseConfidenceScore")
|
|
298
|
+
result["abuseipdb_score"] = score
|
|
299
|
+
# usageType is a string; categories come from individual reports
|
|
300
|
+
usage = data.get("usageType")
|
|
301
|
+
result["abuseipdb_categories"] = [usage] if usage else []
|
|
302
|
+
if score is not None and score > 50:
|
|
303
|
+
result["tags"].append("abuse_confirmed")
|
|
304
|
+
else:
|
|
305
|
+
logger.debug("ip_reputation: AbuseIPDB skipped — no API key")
|
|
306
|
+
|
|
307
|
+
# --- GreyNoise check ---
|
|
308
|
+
if greynoise_key:
|
|
309
|
+
gn_resp = await _check_greynoise(ip, greynoise_key)
|
|
310
|
+
if gn_resp:
|
|
311
|
+
classification = gn_resp.get("classification", "unknown")
|
|
312
|
+
result["greynoise_classification"] = classification
|
|
313
|
+
|
|
314
|
+
if classification == "benign":
|
|
315
|
+
result["suppress"] = True
|
|
316
|
+
logger.info("IP %s suppressed — GreyNoise benign scanner", ip)
|
|
317
|
+
return result
|
|
318
|
+
|
|
319
|
+
if classification == "malicious":
|
|
320
|
+
result["tags"].append("greynoise_malicious")
|
|
321
|
+
for gn_tag in gn_resp.get("tags") or []:
|
|
322
|
+
slug = str(gn_tag).lower().replace(" ", "_")
|
|
323
|
+
result["tags"].append(f"greynoise_{slug}")
|
|
324
|
+
else:
|
|
325
|
+
logger.debug("ip_reputation: GreyNoise skipped — no API key")
|
|
326
|
+
|
|
327
|
+
# --- Threat confidence calculation ---
|
|
328
|
+
conf = base_confidence
|
|
329
|
+
if result["feodo_hit"]:
|
|
330
|
+
conf += 0.15
|
|
331
|
+
if result["c2feed_hit"]:
|
|
332
|
+
conf += 0.15
|
|
333
|
+
score = result["abuseipdb_score"]
|
|
334
|
+
if score is not None:
|
|
335
|
+
if score > 80:
|
|
336
|
+
conf += 0.10
|
|
337
|
+
elif score >= 50:
|
|
338
|
+
conf += 0.05
|
|
339
|
+
if "greynoise_malicious" in result["tags"]:
|
|
340
|
+
conf += 0.10
|
|
341
|
+
result["threat_confidence"] = min(conf, 1.0)
|
|
342
|
+
|
|
343
|
+
return result
|
|
344
|
+
|
|
345
|
+
|
|
346
|
+
# ---------------------------------------------------------------------------
|
|
347
|
+
# DB helpers (sync — called via asyncio.to_thread or direct from sync context)
|
|
348
|
+
# ---------------------------------------------------------------------------
|
|
349
|
+
|
|
350
|
+
def _suppress_entities_in_db(suppressed_ips: set[str], investigation_id: Any) -> None:
|
|
351
|
+
"""Remove suppressed IPs from an investigation's entity pool in the DB."""
|
|
352
|
+
if not os.getenv("DATABASE_URL") or not suppressed_ips:
|
|
353
|
+
return
|
|
354
|
+
try:
|
|
355
|
+
from db.session import get_session
|
|
356
|
+
from db.models import Entity, InvestigationEntityLink
|
|
357
|
+
|
|
358
|
+
with get_session() as session:
|
|
359
|
+
entity_ids = [
|
|
360
|
+
row[0]
|
|
361
|
+
for row in session.query(Entity.id).filter(
|
|
362
|
+
Entity.entity_type == "IP_ADDRESS",
|
|
363
|
+
Entity.value.in_(suppressed_ips),
|
|
364
|
+
).all()
|
|
365
|
+
]
|
|
366
|
+
if not entity_ids:
|
|
367
|
+
return
|
|
368
|
+
|
|
369
|
+
session.query(InvestigationEntityLink).filter(
|
|
370
|
+
InvestigationEntityLink.investigation_id == investigation_id,
|
|
371
|
+
InvestigationEntityLink.entity_id.in_(entity_ids),
|
|
372
|
+
).delete(synchronize_session=False)
|
|
373
|
+
|
|
374
|
+
session.query(Entity).filter(
|
|
375
|
+
Entity.investigation_id == investigation_id,
|
|
376
|
+
Entity.id.in_(entity_ids),
|
|
377
|
+
).update({"investigation_id": None}, synchronize_session=False)
|
|
378
|
+
|
|
379
|
+
session.commit()
|
|
380
|
+
logger.info(
|
|
381
|
+
"ip_reputation: Suppressed %d IP(s) from investigation %s",
|
|
382
|
+
len(entity_ids),
|
|
383
|
+
investigation_id,
|
|
384
|
+
)
|
|
385
|
+
except Exception as exc:
|
|
386
|
+
logger.warning("ip_reputation: DB suppression failed: %s", exc)
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
def _update_entity_reputations(
|
|
390
|
+
updates: list[tuple[str, float, list[str]]],
|
|
391
|
+
) -> None:
|
|
392
|
+
"""
|
|
393
|
+
Update confidence and corroborating_sources for non-suppressed IP entities.
|
|
394
|
+
|
|
395
|
+
*updates* is a list of (ip_value, new_confidence, tags).
|
|
396
|
+
"""
|
|
397
|
+
if not os.getenv("DATABASE_URL") or not updates:
|
|
398
|
+
return
|
|
399
|
+
try:
|
|
400
|
+
from db.session import get_session
|
|
401
|
+
from db.models import Entity
|
|
402
|
+
|
|
403
|
+
with get_session() as session:
|
|
404
|
+
for ip_val, confidence, tags in updates:
|
|
405
|
+
db_entity = session.query(Entity).filter(
|
|
406
|
+
Entity.entity_type == "IP_ADDRESS",
|
|
407
|
+
Entity.value == ip_val,
|
|
408
|
+
).first()
|
|
409
|
+
if db_entity is None:
|
|
410
|
+
continue
|
|
411
|
+
if confidence > (db_entity.confidence or 0.0):
|
|
412
|
+
db_entity.confidence = confidence
|
|
413
|
+
if tags:
|
|
414
|
+
existing: list = json.loads(db_entity.corroborating_sources or "[]")
|
|
415
|
+
for tag in tags:
|
|
416
|
+
if tag not in existing:
|
|
417
|
+
existing.append(tag)
|
|
418
|
+
db_entity.corroborating_sources = json.dumps(existing)
|
|
419
|
+
session.commit()
|
|
420
|
+
except Exception as exc:
|
|
421
|
+
logger.warning("ip_reputation: DB reputation update failed: %s", exc)
|
|
422
|
+
|
|
423
|
+
|
|
424
|
+
# ---------------------------------------------------------------------------
|
|
425
|
+
# Pipeline integration
|
|
426
|
+
# ---------------------------------------------------------------------------
|
|
427
|
+
|
|
428
|
+
async def enrich_ip_entities(
|
|
429
|
+
extraction_results: list,
|
|
430
|
+
investigation_id: Any,
|
|
431
|
+
) -> tuple[list, dict]:
|
|
432
|
+
"""
|
|
433
|
+
Post-extraction IP reputation enrichment step.
|
|
434
|
+
|
|
435
|
+
- Collects IP_ADDRESS entities from *extraction_results*
|
|
436
|
+
- Limits to MAX_IPS unique IPs per investigation
|
|
437
|
+
- Runs all four checks concurrently
|
|
438
|
+
- Suppresses benign scanner IPs (GreyNoise benign): removes from results + DB
|
|
439
|
+
- Updates confidence and corroborating_sources for remaining IPs
|
|
440
|
+
|
|
441
|
+
Returns (filtered_extraction_results, stats_dict).
|
|
442
|
+
"""
|
|
443
|
+
# Collect unique IPs → (first_entity, confidence)
|
|
444
|
+
seen: dict[str, float] = {}
|
|
445
|
+
for exr in extraction_results:
|
|
446
|
+
for entity in getattr(exr, "entities", []):
|
|
447
|
+
if getattr(entity, "entity_type", "") == "IP_ADDRESS":
|
|
448
|
+
ip = entity.value
|
|
449
|
+
if ip not in seen:
|
|
450
|
+
seen[ip] = getattr(entity, "confidence", 1.0)
|
|
451
|
+
|
|
452
|
+
unique_ips = list(seen.keys())
|
|
453
|
+
if not unique_ips:
|
|
454
|
+
return extraction_results, {"ip_reputation": "ok_0_ips"}
|
|
455
|
+
|
|
456
|
+
if len(unique_ips) > MAX_IPS:
|
|
457
|
+
logger.info(
|
|
458
|
+
"ip_reputation: capping to %d of %d unique IPs",
|
|
459
|
+
MAX_IPS, len(unique_ips),
|
|
460
|
+
)
|
|
461
|
+
unique_ips = unique_ips[:MAX_IPS]
|
|
462
|
+
|
|
463
|
+
logger.info("ip_reputation: checking %d unique IP(s)", len(unique_ips))
|
|
464
|
+
|
|
465
|
+
# Run all checks concurrently
|
|
466
|
+
rep_list = await asyncio.gather(
|
|
467
|
+
*[check_ip_reputation(ip, base_confidence=seen[ip]) for ip in unique_ips],
|
|
468
|
+
return_exceptions=True,
|
|
469
|
+
)
|
|
470
|
+
|
|
471
|
+
suppressed_ips: set[str] = set()
|
|
472
|
+
db_updates: list[tuple[str, float, list[str]]] = []
|
|
473
|
+
stats = {
|
|
474
|
+
"checked": len(unique_ips),
|
|
475
|
+
"suppressed": 0,
|
|
476
|
+
"c2_confirmed": 0,
|
|
477
|
+
"abuse_confirmed": 0,
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
for ip, rep in zip(unique_ips, rep_list):
|
|
481
|
+
if isinstance(rep, Exception):
|
|
482
|
+
logger.debug("ip_reputation: check raised for %s: %s", ip, rep)
|
|
483
|
+
continue
|
|
484
|
+
if rep["suppress"]:
|
|
485
|
+
suppressed_ips.add(ip)
|
|
486
|
+
stats["suppressed"] += 1
|
|
487
|
+
continue
|
|
488
|
+
if rep["c2feed_hit"] or rep["feodo_hit"]:
|
|
489
|
+
stats["c2_confirmed"] += 1
|
|
490
|
+
if (rep["abuseipdb_score"] or 0) > 50:
|
|
491
|
+
stats["abuse_confirmed"] += 1
|
|
492
|
+
if rep["tags"] or rep["threat_confidence"] > seen[ip]:
|
|
493
|
+
db_updates.append((ip, rep["threat_confidence"], rep["tags"]))
|
|
494
|
+
|
|
495
|
+
# Apply suppression to in-memory extraction results
|
|
496
|
+
if suppressed_ips:
|
|
497
|
+
for exr in extraction_results:
|
|
498
|
+
exr.entities = [
|
|
499
|
+
e for e in getattr(exr, "entities", [])
|
|
500
|
+
if not (
|
|
501
|
+
getattr(e, "entity_type", "") == "IP_ADDRESS"
|
|
502
|
+
and e.value in suppressed_ips
|
|
503
|
+
)
|
|
504
|
+
]
|
|
505
|
+
exr.entity_count = len(exr.entities)
|
|
506
|
+
await asyncio.to_thread(_suppress_entities_in_db, suppressed_ips, investigation_id)
|
|
507
|
+
|
|
508
|
+
# Update DB for non-suppressed IPs
|
|
509
|
+
if db_updates:
|
|
510
|
+
await asyncio.to_thread(_update_entity_reputations, db_updates)
|
|
511
|
+
|
|
512
|
+
checked = stats["checked"]
|
|
513
|
+
sup = stats["suppressed"]
|
|
514
|
+
status = f"ok_{checked}_ips" + (f"_{sup}_suppressed" if sup else "")
|
|
515
|
+
|
|
516
|
+
logger.info(
|
|
517
|
+
"ip_reputation: done — %d checked, %d suppressed, %d C2, %d abuse",
|
|
518
|
+
checked, sup, stats["c2_confirmed"], stats["abuse_confirmed"],
|
|
519
|
+
)
|
|
520
|
+
|
|
521
|
+
return extraction_results, {"ip_reputation": status, **stats}
|