voidaccess 1.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. analysis/__init__.py +49 -0
  2. analysis/opsec.py +454 -0
  3. analysis/patterns.py +202 -0
  4. analysis/temporal.py +201 -0
  5. api/__init__.py +1 -0
  6. api/auth.py +163 -0
  7. api/main.py +509 -0
  8. api/routes/__init__.py +1 -0
  9. api/routes/admin.py +214 -0
  10. api/routes/auth.py +157 -0
  11. api/routes/entities.py +871 -0
  12. api/routes/export.py +359 -0
  13. api/routes/investigations.py +2567 -0
  14. api/routes/monitors.py +405 -0
  15. api/routes/search.py +157 -0
  16. api/routes/settings.py +851 -0
  17. auth/__init__.py +1 -0
  18. auth/token_blacklist.py +108 -0
  19. cli/__init__.py +3 -0
  20. cli/adapters/__init__.py +1 -0
  21. cli/adapters/sqlite.py +273 -0
  22. cli/browser.py +376 -0
  23. cli/commands/__init__.py +1 -0
  24. cli/commands/configure.py +185 -0
  25. cli/commands/enrich.py +154 -0
  26. cli/commands/export.py +158 -0
  27. cli/commands/investigate.py +601 -0
  28. cli/commands/show.py +87 -0
  29. cli/config.py +180 -0
  30. cli/display.py +212 -0
  31. cli/main.py +154 -0
  32. cli/tor_detect.py +71 -0
  33. config.py +180 -0
  34. crawler/__init__.py +28 -0
  35. crawler/dedup.py +97 -0
  36. crawler/frontier.py +115 -0
  37. crawler/spider.py +462 -0
  38. crawler/utils.py +122 -0
  39. db/__init__.py +47 -0
  40. db/migrations/__init__.py +0 -0
  41. db/migrations/env.py +80 -0
  42. db/migrations/versions/0001_initial_schema.py +270 -0
  43. db/migrations/versions/0002_add_investigation_status_column.py +27 -0
  44. db/migrations/versions/0002_add_missing_tables.py +33 -0
  45. db/migrations/versions/0003_add_canonical_value_and_entity_links.py +61 -0
  46. db/migrations/versions/0004_add_page_posted_at.py +41 -0
  47. db/migrations/versions/0005_add_extraction_method.py +32 -0
  48. db/migrations/versions/0006_add_monitor_alerts.py +26 -0
  49. db/migrations/versions/0007_add_actor_style_profiles.py +23 -0
  50. db/migrations/versions/0008_add_users_table.py +47 -0
  51. db/migrations/versions/0009_add_investigation_id_to_relationships.py +29 -0
  52. db/migrations/versions/0010_add_composite_index_entity_relationships.py +22 -0
  53. db/migrations/versions/0011_add_page_extraction_cache.py +52 -0
  54. db/migrations/versions/0013_add_graph_status.py +31 -0
  55. db/migrations/versions/0015_add_progress_fields.py +41 -0
  56. db/migrations/versions/0016_backfill_graph_status.py +33 -0
  57. db/migrations/versions/0017_add_user_api_keys.py +44 -0
  58. db/migrations/versions/0018_add_user_id_to_investigations.py +33 -0
  59. db/migrations/versions/0019_add_content_safety_log.py +46 -0
  60. db/migrations/versions/0020_add_entity_source_tracking.py +50 -0
  61. db/models.py +618 -0
  62. db/queries.py +841 -0
  63. db/session.py +270 -0
  64. export/__init__.py +34 -0
  65. export/misp.py +257 -0
  66. export/sigma.py +342 -0
  67. export/stix.py +418 -0
  68. extractor/__init__.py +21 -0
  69. extractor/llm_extract.py +372 -0
  70. extractor/ner.py +512 -0
  71. extractor/normalizer.py +638 -0
  72. extractor/pipeline.py +401 -0
  73. extractor/regex_patterns.py +325 -0
  74. fingerprint/__init__.py +33 -0
  75. fingerprint/profiler.py +240 -0
  76. fingerprint/stylometry.py +249 -0
  77. graph/__init__.py +73 -0
  78. graph/builder.py +894 -0
  79. graph/export.py +225 -0
  80. graph/model.py +83 -0
  81. graph/queries.py +297 -0
  82. graph/visualize.py +178 -0
  83. i18n/__init__.py +24 -0
  84. i18n/detect.py +76 -0
  85. i18n/query_expand.py +72 -0
  86. i18n/translate.py +210 -0
  87. monitor/__init__.py +27 -0
  88. monitor/_db.py +74 -0
  89. monitor/alerts.py +345 -0
  90. monitor/config.py +118 -0
  91. monitor/diff.py +75 -0
  92. monitor/jobs.py +247 -0
  93. monitor/scheduler.py +184 -0
  94. scraper/__init__.py +0 -0
  95. scraper/scrape.py +857 -0
  96. scraper/scrape_js.py +272 -0
  97. search/__init__.py +318 -0
  98. search/circuit_breaker.py +240 -0
  99. search/search.py +334 -0
  100. sources/__init__.py +96 -0
  101. sources/blockchain.py +444 -0
  102. sources/cache.py +93 -0
  103. sources/cisa.py +108 -0
  104. sources/dns_enrichment.py +557 -0
  105. sources/domain_reputation.py +643 -0
  106. sources/email_reputation.py +635 -0
  107. sources/engines.py +244 -0
  108. sources/enrichment.py +1244 -0
  109. sources/github_scraper.py +589 -0
  110. sources/gitlab_scraper.py +624 -0
  111. sources/hash_reputation.py +856 -0
  112. sources/historical_intel.py +253 -0
  113. sources/ip_reputation.py +521 -0
  114. sources/paste_scraper.py +484 -0
  115. sources/pastes.py +278 -0
  116. sources/rss_scraper.py +576 -0
  117. sources/seed_manager.py +373 -0
  118. sources/seeds.py +368 -0
  119. sources/shodan.py +103 -0
  120. sources/telegram.py +199 -0
  121. sources/virustotal.py +113 -0
  122. utils/__init__.py +0 -0
  123. utils/async_utils.py +89 -0
  124. utils/content_safety.py +193 -0
  125. utils/defang.py +94 -0
  126. utils/encryption.py +34 -0
  127. utils/ioc_freshness.py +124 -0
  128. utils/user_keys.py +33 -0
  129. vector/__init__.py +39 -0
  130. vector/embedder.py +100 -0
  131. vector/model_singleton.py +49 -0
  132. vector/search.py +87 -0
  133. vector/store.py +514 -0
  134. voidaccess/__init__.py +0 -0
  135. voidaccess/llm.py +717 -0
  136. voidaccess/llm_utils.py +696 -0
  137. voidaccess-1.3.0.dist-info/METADATA +395 -0
  138. voidaccess-1.3.0.dist-info/RECORD +142 -0
  139. voidaccess-1.3.0.dist-info/WHEEL +5 -0
  140. voidaccess-1.3.0.dist-info/entry_points.txt +2 -0
  141. voidaccess-1.3.0.dist-info/licenses/LICENSE +21 -0
  142. voidaccess-1.3.0.dist-info/top_level.txt +19 -0
sources/virustotal.py ADDED
@@ -0,0 +1,113 @@
1
+ """
2
+ sources/virustotal.py — VirusTotal hash enrichment (file hash lookup).
3
+
4
+ Requires VT_API_KEY in config. Free tier: 4 requests/minute.
5
+ Max 20 hashes per investigation.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import asyncio
11
+ import logging
12
+ from typing import Optional
13
+
14
+ import aiohttp
15
+
16
+ from config import VT_API_KEY
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+ _VT_BASE = "https://www.virustotal.com/api/v3"
21
+ _VT_HASH_LIMIT = 20
22
+ _VT_RATE_LIMIT_DELAY = 15.0
23
+
24
+
25
+ def _is_enabled() -> bool:
26
+ key = getattr(VT_API_KEY, "strip", lambda: "")()
27
+ return bool(key)
28
+
29
+
30
+ async def _fetch_hash(hash_value: str, session: aiohttp.ClientSession) -> Optional[dict]:
31
+ try:
32
+ headers = {"x-apikey": VT_API_KEY.strip()}
33
+ timeout = aiohttp.ClientTimeout(total=15)
34
+ async with session.get(
35
+ f"{_VT_BASE}/files/{hash_value}", headers=headers, timeout=timeout
36
+ ) as resp:
37
+ if resp.status == 404:
38
+ return None
39
+ if resp.status == 401:
40
+ logger.warning("VirusTotal: invalid API key")
41
+ return None
42
+ if resp.status == 429:
43
+ logger.warning("VirusTotal: rate limited")
44
+ return None
45
+ if resp.status != 200:
46
+ return None
47
+ return await resp.json()
48
+ except asyncio.TimeoutError:
49
+ logger.warning("VirusTotal: timeout for hash %s", hash_value[:16])
50
+ return None
51
+ except Exception as e:
52
+ logger.warning("VirusTotal: error for hash %s: %s", hash_value[:16], e)
53
+ return None
54
+
55
+
56
+ async def enrich_virustotal(entities: list[dict]) -> list[dict]:
57
+ """
58
+ For each FILE_HASH_MD5 / FILE_HASH_SHA1 / FILE_HASH_SHA256 entity,
59
+ query VirusTotal and return detection stats.
60
+ """
61
+ if not _is_enabled():
62
+ logger.debug("VirusTotal skipped — no API key configured")
63
+ return []
64
+
65
+ hash_type_map = {
66
+ "FILE_HASH_MD5": "md5",
67
+ "FILE_HASH_SHA1": "sha1",
68
+ "FILE_HASH_SHA256": "sha256",
69
+ }
70
+
71
+ hash_entities = [
72
+ e for e in entities
73
+ if (e.get("type") or e.get("entity_type", "")) in hash_type_map
74
+ and (e.get("value") or e.get("entity_value", ""))
75
+ ]
76
+
77
+ hashes_to_query = [
78
+ (e.get("value") or e.get("entity_value", ""), (e.get("type") or e.get("entity_type", "")))
79
+ for e in hash_entities
80
+ ][:_VT_HASH_LIMIT]
81
+
82
+ results: list[dict] = []
83
+ async with aiohttp.ClientSession() as session:
84
+ for hash_val, hash_type in hashes_to_query:
85
+ data = await _fetch_hash(hash_val, session)
86
+ if data is None:
87
+ await asyncio.sleep(_VT_RATE_LIMIT_DELAY)
88
+ continue
89
+
90
+ attr = data.get("data", {}).get("attributes", {})
91
+ stats = attr.get("last_analysis_stats", {})
92
+ mal = stats.get("malicious", 0)
93
+ total = sum(stats.values())
94
+ detection_ratio = mal / total if total > 0 else 0.0
95
+
96
+ results.append({
97
+ "source": "virustotal",
98
+ "entity_type": hash_type_map.get(hash_type, "FILE_HASH"),
99
+ "entity_value": hash_val,
100
+ "malicious_count": mal,
101
+ "total_engines": total,
102
+ "detection_ratio": detection_ratio,
103
+ "suggested_threat_label": attr.get("popular_threat_classification", {}).get("suggested_threat_label", ""),
104
+ "first_seen": attr.get("creation_date", ""),
105
+ "last_seen": attr.get("last_analysis_date", ""),
106
+ "confirmed_malicious": detection_ratio > 0.5,
107
+ })
108
+
109
+ await asyncio.sleep(_VT_RATE_LIMIT_DELAY)
110
+
111
+ if results:
112
+ logger.info("VirusTotal: %d results", len(results))
113
+ return results
utils/__init__.py ADDED
File without changes
utils/async_utils.py ADDED
@@ -0,0 +1,89 @@
1
+ """
2
+ Async utilities for safely running coroutines in various contexts.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ import asyncio
8
+ import logging
9
+ import threading
10
+ from concurrent.futures import Future, ThreadPoolExecutor
11
+ from typing import Any, Coroutine, TypeVar
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+ _T = TypeVar("_T")
16
+
17
+ _executor: ThreadPoolExecutor | None = None
18
+ _executor_lock = threading.Lock()
19
+
20
+
21
+ def _get_executor() -> ThreadPoolExecutor:
22
+ global _executor
23
+ if _executor is None:
24
+ with _executor_lock:
25
+ if _executor is None:
26
+ _executor = ThreadPoolExecutor(max_workers=4, thread_name_prefix="async_utils_")
27
+ return _executor
28
+
29
+
30
+ def run_async(coro: Coroutine[Any, Any, _T]) -> _T:
31
+ """
32
+ Safely run a coroutine regardless of whether there's already a running event loop.
33
+
34
+ Uses a thread-isolated event loop when called from:
35
+ - An already-running event loop (e.g., inside APScheduler jobs, pytest-asyncio)
36
+ - A synchronous context
37
+
38
+ Args:
39
+ coro: The coroutine to run
40
+
41
+ Returns:
42
+ The result of the coroutine
43
+
44
+ Raises:
45
+ RuntimeError: If the coroutine fails to run
46
+ """
47
+ try:
48
+ loop = asyncio.get_running_loop()
49
+ except RuntimeError:
50
+ loop = None
51
+
52
+ if loop is not None:
53
+ return _run_in_thread(coro)
54
+
55
+ try:
56
+ return asyncio.run(coro)
57
+ except RuntimeError as e:
58
+ if "already running" in str(e).lower():
59
+ return _run_in_thread(coro)
60
+ raise
61
+
62
+
63
+ def _run_in_thread(coro: Coroutine[Any, Any, _T]) -> _T:
64
+ """
65
+ Run a coroutine in a dedicated thread with its own event loop.
66
+ """
67
+ future: Future[_T] = Future()
68
+
69
+ def _run() -> None:
70
+ local_loop = asyncio.new_event_loop()
71
+ asyncio.set_event_loop(local_loop)
72
+ try:
73
+ result = local_loop.run_until_complete(coro)
74
+ future.set_result(result)
75
+ except Exception as exc:
76
+ future.set_exception(exc)
77
+ finally:
78
+ local_loop.close()
79
+
80
+ executor = _get_executor()
81
+ executor.submit(_run)
82
+ return future.result()
83
+
84
+
85
+ def run_async_optional(coro: Coroutine[Any, Any, _T] | None) -> _T | None:
86
+ """Run a coroutine if provided, otherwise return None."""
87
+ if coro is None:
88
+ return None
89
+ return run_async(coro)
@@ -0,0 +1,193 @@
1
+ """
2
+ utils/content_safety.py — Mandatory content safety filters for VoidAccess.
3
+
4
+ Operates at multiple layers: query intake, URL filtering, content scanning,
5
+ and post-extraction entity value filtering.
6
+ Never logs actual prohibited content — only event metadata.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import hashlib
12
+ import logging
13
+ import re
14
+ from typing import Optional
15
+
16
+ _logger = logging.getLogger(__name__)
17
+
18
+ # ---------------------------------------------------------------------------
19
+ # Blocklists
20
+ # ---------------------------------------------------------------------------
21
+
22
+ BLOCKED_TERMS = [
23
+ # CSAM related
24
+ "child porn", "cp porn", "lolita", "pedo",
25
+ "pedophil", "childporn", "child sex", "minor sex",
26
+ "underage sex", "jailbait", "preteen sex",
27
+ "child abuse material", "csam", "child model",
28
+ "hurtcore", "daisy's destruction",
29
+ # Gore related
30
+ "gore site", "gore video", "snuff film",
31
+ "murder video", "execution video", "beheading video",
32
+ "torture video", "bestgore", "livegore",
33
+ "watchpeopledie", "realsnuff",
34
+ ]
35
+
36
+ BLOCKED_PATTERNS = [
37
+ r'\bcp\b.{0,20}\bonion\b', # "cp" near "onion"
38
+ r'\bchild.{0,10}\bnaked\b',
39
+ r'\bminor.{0,10}\bnaked\b',
40
+ r'\bkid.{0,10}\bporn\b',
41
+ r'\bteen.{0,10}\bporn\b',
42
+ ]
43
+
44
+ BLOCKED_URL_TERMS = [
45
+ "pedo", "loli", "jailbait", "childporn",
46
+ "hurtcore", "csam", "bestgore", "livegore",
47
+ "watchpeople", "realsnuff", "daisy",
48
+ ]
49
+
50
+ CONTENT_BLOCKLIST = [
51
+ "child pornography", "child porn",
52
+ "child sexual abuse", "csam",
53
+ "snuff film", "murder porn",
54
+ ]
55
+
56
+ # ---------------------------------------------------------------------------
57
+ # Entity value blocklist — applied after extraction, before DB storage
58
+ # Only checked against text-based entity types (not technical IOCs)
59
+ # ---------------------------------------------------------------------------
60
+
61
+ ENTITY_VALUE_BLOCKLIST: list[str] = [
62
+ # Adult content categories
63
+ "porn", "blowjob", "bdsm", "hardcore",
64
+ "xxx", "nude", "nudes", "naked", "escort",
65
+ "onlyfans", "cam girl", "sex tape",
66
+ "adult content", "adult site",
67
+ # Gore/violence
68
+ "snuff", "gore", "murder video",
69
+ "execution video", "beheading",
70
+ # Exploitation
71
+ "jailbait", "pedo", "csam",
72
+ "child", "minor",
73
+ ]
74
+
75
+ # Entity types where prohibited content can appear as names/labels.
76
+ # Technical IOC types (hashes, IPs, CVEs, wallets, onion URLs) are
77
+ # intentionally omitted — they cannot contain prohibited content.
78
+ _TEXT_ENTITY_TYPES: frozenset[str] = frozenset({
79
+ "ORGANIZATION_NAME",
80
+ "THREAT_ACTOR_HANDLE",
81
+ "PERSON_NAME",
82
+ "MALWARE_FAMILY",
83
+ })
84
+
85
+
86
+ # ---------------------------------------------------------------------------
87
+ # Public API
88
+ # ---------------------------------------------------------------------------
89
+
90
+ def is_blocked_query(query: str) -> tuple[bool, str]:
91
+ """
92
+ Check if a query should be blocked.
93
+ Returns (is_blocked, reason).
94
+ Never logs the actual query.
95
+ """
96
+ query_lower = query.lower()
97
+
98
+ for term in BLOCKED_TERMS:
99
+ if term in query_lower:
100
+ return True, "Query contains prohibited content"
101
+
102
+ for pattern in BLOCKED_PATTERNS:
103
+ if re.search(pattern, query_lower, re.IGNORECASE):
104
+ return True, "Query contains prohibited content"
105
+
106
+ return False, ""
107
+
108
+
109
+ def is_blocked_entity_value(entity_type: str, value: str) -> bool:
110
+ """
111
+ Return True if an entity value should be dropped before storage.
112
+
113
+ Only applies to text-based entity types where prohibited content can
114
+ appear as organisation/actor names (ORGANIZATION_NAME, THREAT_ACTOR_HANDLE,
115
+ PERSON_NAME, MALWARE_FAMILY).
116
+
117
+ Never applies to technical IOC types such as FILE_HASH_*, IP_ADDRESS, CVE,
118
+ ONION_URL, or wallet addresses \u2014 these cannot contain prohibited content
119
+ by definition and are intentionally excluded.
120
+
121
+ The check is case-insensitive substring matching against
122
+ ENTITY_VALUE_BLOCKLIST. The actual value is never logged.
123
+ """
124
+ if entity_type not in _TEXT_ENTITY_TYPES:
125
+ return False
126
+
127
+ value_lower = (value or "").lower()
128
+ for term in ENTITY_VALUE_BLOCKLIST:
129
+ if term in value_lower:
130
+ return True
131
+
132
+ return False
133
+
134
+
135
+ def is_blocked_url(url: str) -> tuple[bool, str]:
136
+ """
137
+ Check if a URL should be blocked from scraping.
138
+ Returns (is_blocked, reason).
139
+ """
140
+ url_lower = url.lower()
141
+ for term in BLOCKED_URL_TERMS:
142
+ if term in url_lower:
143
+ return True, "URL blocked — prohibited content"
144
+ return False, ""
145
+
146
+
147
+ def sanitize_content(text: str) -> tuple[str, bool]:
148
+ """
149
+ Scan scraped text for CSAM/gore indicators.
150
+ Returns (sanitized_text, was_flagged).
151
+ If flagged, returns empty string — the original text is never stored.
152
+ """
153
+ if not text:
154
+ return text, False
155
+
156
+ text_lower = text.lower()
157
+ for term in CONTENT_BLOCKLIST:
158
+ if term in text_lower:
159
+ return "", True
160
+
161
+ return text, False
162
+
163
+
164
+ def log_content_safety_event(
165
+ event_type: str,
166
+ content_hash: Optional[str] = None,
167
+ user_id: Optional[int] = None,
168
+ ) -> None:
169
+ """
170
+ Persist a content safety block event to the DB for operator review.
171
+ Fails silently — never disrupts the calling pipeline.
172
+ event_type: one of "query_blocked", "url_blocked", "content_blocked"
173
+ content_hash: SHA-256 hex prefix (≤16 chars) of the blocked item, for correlation only.
174
+ """
175
+ try:
176
+ import os
177
+ if not os.getenv("DATABASE_URL"):
178
+ return
179
+ from db.session import get_session
180
+ from db.models import ContentSafetyEvent
181
+ from datetime import datetime, timezone
182
+
183
+ with get_session() as session:
184
+ event = ContentSafetyEvent(
185
+ event_type=event_type,
186
+ user_id=user_id,
187
+ content_hash=content_hash,
188
+ timestamp=datetime.now(timezone.utc),
189
+ )
190
+ session.add(event)
191
+ session.commit()
192
+ except Exception as exc:
193
+ _logger.debug("content_safety: DB log failed (non-critical): %s", exc)
utils/defang.py ADDED
@@ -0,0 +1,94 @@
1
+ import re
2
+
3
+
4
+ def defang_url(url: str) -> str:
5
+ """
6
+ Defang a URL for safe sharing in reports.
7
+ hxxp://example[.]com/path
8
+ """
9
+ if not url:
10
+ return url
11
+ url = url.replace("http://", "hxxp://")
12
+ url = url.replace("https://", "hxxps://")
13
+ url = url.replace("ftp://", "fxp://")
14
+ parts = url.split("/", 3)
15
+ if len(parts) >= 3:
16
+ parts[2] = parts[2].replace(".", "[.]")
17
+ url = "/".join(parts)
18
+ else:
19
+ url = url.replace(".", "[.]")
20
+ return url
21
+
22
+
23
+ def defang_ip(ip: str) -> str:
24
+ """
25
+ Defang an IP address.
26
+ 1.2.3.4 -> 1.2.3[.]4
27
+ """
28
+ if not ip:
29
+ return ip
30
+ parts = ip.rsplit(".", 1)
31
+ if len(parts) == 2:
32
+ return f"{parts[0]}[.]{parts[1]}"
33
+ return ip
34
+
35
+
36
+ def defang_email(email: str) -> str:
37
+ """
38
+ Defang an email address.
39
+ user@example.com -> user[@]example[.]com
40
+ """
41
+ if not email:
42
+ return email
43
+ email = email.replace("@", "[@]")
44
+ parts = email.split("[@]", 1)
45
+ if len(parts) == 2:
46
+ parts[1] = parts[1].replace(".", "[.]")
47
+ email = "[@]".join(parts)
48
+ return email
49
+
50
+
51
+ def defang_value(entity_type: str, value: str) -> str:
52
+ """
53
+ Defang an entity value based on its type.
54
+ Returns the defanged version for display.
55
+ """
56
+ if entity_type in (
57
+ "ONION_URL",
58
+ "DOMAIN",
59
+ ):
60
+ return defang_url(value)
61
+ elif entity_type == "IP_ADDRESS":
62
+ return defang_ip(value)
63
+ elif entity_type == "EMAIL_ADDRESS":
64
+ return defang_email(value)
65
+ else:
66
+ return value
67
+
68
+
69
+ def defang_text(text: str) -> str:
70
+ """
71
+ Defang all URLs and IPs found in free text.
72
+ Use for report summaries and context snippets.
73
+ """
74
+ if not text:
75
+ return text
76
+
77
+ text = re.sub(
78
+ r'https?://',
79
+ lambda m: m.group().replace(
80
+ "http://", "hxxp://"
81
+ ).replace(
82
+ "https://", "hxxps://"
83
+ ),
84
+ text
85
+ )
86
+
87
+ text = re.sub(
88
+ r'\b(\d{1,3})\.(\d{1,3})\.(\d{1,3})'
89
+ r'\.(\d{1,3})\b',
90
+ r'\1.\2.\3[.]\4',
91
+ text,
92
+ )
93
+
94
+ return text
utils/encryption.py ADDED
@@ -0,0 +1,34 @@
1
+ """
2
+ Encryption utilities for user API keys.
3
+
4
+ Uses Fernet (AES-128-CBC) with a key derived from JWT_SECRET so that
5
+ no new secret needs to be distributed — only the existing JWT_SECRET
6
+ is required.
7
+ """
8
+
9
+ import base64
10
+ import hashlib
11
+
12
+ from cryptography.fernet import Fernet
13
+
14
+
15
+ def _get_fernet() -> Fernet:
16
+ from config import JWT_SECRET
17
+ key_bytes = hashlib.sha256(JWT_SECRET.encode()).digest()
18
+ fernet_key = base64.urlsafe_b64encode(key_bytes)
19
+ return Fernet(fernet_key)
20
+
21
+
22
+ def encrypt_api_key(plaintext: str) -> str:
23
+ if not plaintext:
24
+ return ""
25
+ return _get_fernet().encrypt(plaintext.encode()).decode()
26
+
27
+
28
+ def decrypt_api_key(ciphertext: str) -> str:
29
+ if not ciphertext:
30
+ return ""
31
+ try:
32
+ return _get_fernet().decrypt(ciphertext.encode()).decode()
33
+ except Exception:
34
+ return ""
utils/ioc_freshness.py ADDED
@@ -0,0 +1,124 @@
1
+ from datetime import datetime, timedelta, timezone
2
+ from enum import Enum
3
+
4
+
5
+ class FreshnessTag(str, Enum):
6
+ FRESH = "fresh"
7
+ AGING = "aging"
8
+ STALE = "stale"
9
+ EXPIRED = "expired"
10
+ UNKNOWN = "unknown"
11
+
12
+
13
+ FRESHNESS_THRESHOLDS = {
14
+ "IP_ADDRESS": {
15
+ "fresh": 14,
16
+ "aging": 30,
17
+ "stale": 90,
18
+ },
19
+ "DOMAIN": {
20
+ "fresh": 30,
21
+ "aging": 90,
22
+ "stale": 180,
23
+ },
24
+ "ONION_URL": {
25
+ "fresh": 60,
26
+ "aging": 180,
27
+ "stale": 365,
28
+ },
29
+ "FILE_HASH_MD5": {
30
+ "fresh": 365,
31
+ "aging": 730,
32
+ "stale": 1825,
33
+ },
34
+ "FILE_HASH_SHA256": {
35
+ "fresh": 365,
36
+ "aging": 730,
37
+ "stale": 1825,
38
+ },
39
+ "CVE": {
40
+ "fresh": 365,
41
+ "aging": 730,
42
+ "stale": 1825,
43
+ },
44
+ "BITCOIN_ADDRESS": {
45
+ "fresh": 90,
46
+ "aging": 180,
47
+ "stale": 365,
48
+ },
49
+ "THREAT_ACTOR": {
50
+ "fresh": 90,
51
+ "aging": 365,
52
+ "stale": 730,
53
+ },
54
+ "DEFAULT": {
55
+ "fresh": 30,
56
+ "aging": 90,
57
+ "stale": 180,
58
+ },
59
+ }
60
+
61
+
62
+ def get_freshness_tag(
63
+ entity_type: str,
64
+ last_seen_at: datetime | None,
65
+ first_seen_at: datetime | None = None,
66
+ ) -> FreshnessTag:
67
+ """
68
+ Calculate freshness tag for an entity based on its type and when it was last seen.
69
+ """
70
+ if not last_seen_at:
71
+ return FreshnessTag.UNKNOWN
72
+
73
+ thresholds = FRESHNESS_THRESHOLDS.get(
74
+ entity_type,
75
+ FRESHNESS_THRESHOLDS["DEFAULT"],
76
+ )
77
+
78
+ now = datetime.now(timezone.utc)
79
+ # Ensure last_seen_at is tz-aware before subtracting
80
+ if last_seen_at.tzinfo is None:
81
+ last_seen_at = last_seen_at.replace(tzinfo=timezone.utc)
82
+ days_since_seen = (now - last_seen_at).days
83
+
84
+ if days_since_seen <= thresholds["fresh"]:
85
+ return FreshnessTag.FRESH
86
+ elif days_since_seen <= thresholds["aging"]:
87
+ return FreshnessTag.AGING
88
+ elif days_since_seen <= thresholds["stale"]:
89
+ return FreshnessTag.STALE
90
+ else:
91
+ return FreshnessTag.EXPIRED
92
+
93
+
94
+ def get_freshness_display(tag: FreshnessTag) -> dict:
95
+ """
96
+ Get display config for a freshness tag.
97
+ """
98
+ return {
99
+ FreshnessTag.FRESH: {
100
+ "label": "Fresh",
101
+ "color": "green",
102
+ "description": "Recently observed",
103
+ },
104
+ FreshnessTag.AGING: {
105
+ "label": "Aging",
106
+ "color": "yellow",
107
+ "description": "Observed 1-3 months ago",
108
+ },
109
+ FreshnessTag.STALE: {
110
+ "label": "Stale",
111
+ "color": "orange",
112
+ "description": "Observed 3-6 months ago — verify before use",
113
+ },
114
+ FreshnessTag.EXPIRED: {
115
+ "label": "Expired",
116
+ "color": "red",
117
+ "description": "Observed over 6 months ago — likely inactive",
118
+ },
119
+ FreshnessTag.UNKNOWN: {
120
+ "label": "Unknown",
121
+ "color": "gray",
122
+ "description": "No date information available",
123
+ },
124
+ }.get(tag, {"label": "Unknown", "color": "gray"})
utils/user_keys.py ADDED
@@ -0,0 +1,33 @@
1
+ """
2
+ Per-user API key resolution with fallback chain.
3
+
4
+ resolve_api_key checks the user's personal key first, then falls back to
5
+ the server-level environment variable in config.py.
6
+ """
7
+
8
+ from sqlalchemy import select as sa_select
9
+ from sqlalchemy.ext.asyncio import AsyncSession
10
+
11
+ from db.models import UserApiKey
12
+ from utils.encryption import decrypt_api_key
13
+ import config as _config
14
+
15
+
16
+ async def get_user_key(user_id: int, key_name: str, session: AsyncSession) -> str | None:
17
+ result = await session.execute(
18
+ sa_select(UserApiKey).where(
19
+ UserApiKey.user_id == user_id,
20
+ UserApiKey.key_name == key_name,
21
+ )
22
+ )
23
+ record = result.scalar_one_or_none()
24
+ if record:
25
+ return decrypt_api_key(record.encrypted_value)
26
+ return None
27
+
28
+
29
+ async def resolve_api_key(user_id: int, key_name: str, session: AsyncSession) -> str:
30
+ user_key = await get_user_key(user_id, key_name, session)
31
+ if user_key:
32
+ return user_key
33
+ return getattr(_config, key_name, "") or ""