voidaccess 1.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. analysis/__init__.py +49 -0
  2. analysis/opsec.py +454 -0
  3. analysis/patterns.py +202 -0
  4. analysis/temporal.py +201 -0
  5. api/__init__.py +1 -0
  6. api/auth.py +163 -0
  7. api/main.py +509 -0
  8. api/routes/__init__.py +1 -0
  9. api/routes/admin.py +214 -0
  10. api/routes/auth.py +157 -0
  11. api/routes/entities.py +871 -0
  12. api/routes/export.py +359 -0
  13. api/routes/investigations.py +2567 -0
  14. api/routes/monitors.py +405 -0
  15. api/routes/search.py +157 -0
  16. api/routes/settings.py +851 -0
  17. auth/__init__.py +1 -0
  18. auth/token_blacklist.py +108 -0
  19. cli/__init__.py +3 -0
  20. cli/adapters/__init__.py +1 -0
  21. cli/adapters/sqlite.py +273 -0
  22. cli/browser.py +376 -0
  23. cli/commands/__init__.py +1 -0
  24. cli/commands/configure.py +185 -0
  25. cli/commands/enrich.py +154 -0
  26. cli/commands/export.py +158 -0
  27. cli/commands/investigate.py +601 -0
  28. cli/commands/show.py +87 -0
  29. cli/config.py +180 -0
  30. cli/display.py +212 -0
  31. cli/main.py +154 -0
  32. cli/tor_detect.py +71 -0
  33. config.py +180 -0
  34. crawler/__init__.py +28 -0
  35. crawler/dedup.py +97 -0
  36. crawler/frontier.py +115 -0
  37. crawler/spider.py +462 -0
  38. crawler/utils.py +122 -0
  39. db/__init__.py +47 -0
  40. db/migrations/__init__.py +0 -0
  41. db/migrations/env.py +80 -0
  42. db/migrations/versions/0001_initial_schema.py +270 -0
  43. db/migrations/versions/0002_add_investigation_status_column.py +27 -0
  44. db/migrations/versions/0002_add_missing_tables.py +33 -0
  45. db/migrations/versions/0003_add_canonical_value_and_entity_links.py +61 -0
  46. db/migrations/versions/0004_add_page_posted_at.py +41 -0
  47. db/migrations/versions/0005_add_extraction_method.py +32 -0
  48. db/migrations/versions/0006_add_monitor_alerts.py +26 -0
  49. db/migrations/versions/0007_add_actor_style_profiles.py +23 -0
  50. db/migrations/versions/0008_add_users_table.py +47 -0
  51. db/migrations/versions/0009_add_investigation_id_to_relationships.py +29 -0
  52. db/migrations/versions/0010_add_composite_index_entity_relationships.py +22 -0
  53. db/migrations/versions/0011_add_page_extraction_cache.py +52 -0
  54. db/migrations/versions/0013_add_graph_status.py +31 -0
  55. db/migrations/versions/0015_add_progress_fields.py +41 -0
  56. db/migrations/versions/0016_backfill_graph_status.py +33 -0
  57. db/migrations/versions/0017_add_user_api_keys.py +44 -0
  58. db/migrations/versions/0018_add_user_id_to_investigations.py +33 -0
  59. db/migrations/versions/0019_add_content_safety_log.py +46 -0
  60. db/migrations/versions/0020_add_entity_source_tracking.py +50 -0
  61. db/models.py +618 -0
  62. db/queries.py +841 -0
  63. db/session.py +270 -0
  64. export/__init__.py +34 -0
  65. export/misp.py +257 -0
  66. export/sigma.py +342 -0
  67. export/stix.py +418 -0
  68. extractor/__init__.py +21 -0
  69. extractor/llm_extract.py +372 -0
  70. extractor/ner.py +512 -0
  71. extractor/normalizer.py +638 -0
  72. extractor/pipeline.py +401 -0
  73. extractor/regex_patterns.py +325 -0
  74. fingerprint/__init__.py +33 -0
  75. fingerprint/profiler.py +240 -0
  76. fingerprint/stylometry.py +249 -0
  77. graph/__init__.py +73 -0
  78. graph/builder.py +894 -0
  79. graph/export.py +225 -0
  80. graph/model.py +83 -0
  81. graph/queries.py +297 -0
  82. graph/visualize.py +178 -0
  83. i18n/__init__.py +24 -0
  84. i18n/detect.py +76 -0
  85. i18n/query_expand.py +72 -0
  86. i18n/translate.py +210 -0
  87. monitor/__init__.py +27 -0
  88. monitor/_db.py +74 -0
  89. monitor/alerts.py +345 -0
  90. monitor/config.py +118 -0
  91. monitor/diff.py +75 -0
  92. monitor/jobs.py +247 -0
  93. monitor/scheduler.py +184 -0
  94. scraper/__init__.py +0 -0
  95. scraper/scrape.py +857 -0
  96. scraper/scrape_js.py +272 -0
  97. search/__init__.py +318 -0
  98. search/circuit_breaker.py +240 -0
  99. search/search.py +334 -0
  100. sources/__init__.py +96 -0
  101. sources/blockchain.py +444 -0
  102. sources/cache.py +93 -0
  103. sources/cisa.py +108 -0
  104. sources/dns_enrichment.py +557 -0
  105. sources/domain_reputation.py +643 -0
  106. sources/email_reputation.py +635 -0
  107. sources/engines.py +244 -0
  108. sources/enrichment.py +1244 -0
  109. sources/github_scraper.py +589 -0
  110. sources/gitlab_scraper.py +624 -0
  111. sources/hash_reputation.py +856 -0
  112. sources/historical_intel.py +253 -0
  113. sources/ip_reputation.py +521 -0
  114. sources/paste_scraper.py +484 -0
  115. sources/pastes.py +278 -0
  116. sources/rss_scraper.py +576 -0
  117. sources/seed_manager.py +373 -0
  118. sources/seeds.py +368 -0
  119. sources/shodan.py +103 -0
  120. sources/telegram.py +199 -0
  121. sources/virustotal.py +113 -0
  122. utils/__init__.py +0 -0
  123. utils/async_utils.py +89 -0
  124. utils/content_safety.py +193 -0
  125. utils/defang.py +94 -0
  126. utils/encryption.py +34 -0
  127. utils/ioc_freshness.py +124 -0
  128. utils/user_keys.py +33 -0
  129. vector/__init__.py +39 -0
  130. vector/embedder.py +100 -0
  131. vector/model_singleton.py +49 -0
  132. vector/search.py +87 -0
  133. vector/store.py +514 -0
  134. voidaccess/__init__.py +0 -0
  135. voidaccess/llm.py +717 -0
  136. voidaccess/llm_utils.py +696 -0
  137. voidaccess-1.3.0.dist-info/METADATA +395 -0
  138. voidaccess-1.3.0.dist-info/RECORD +142 -0
  139. voidaccess-1.3.0.dist-info/WHEEL +5 -0
  140. voidaccess-1.3.0.dist-info/entry_points.txt +2 -0
  141. voidaccess-1.3.0.dist-info/licenses/LICENSE +21 -0
  142. voidaccess-1.3.0.dist-info/top_level.txt +19 -0
@@ -0,0 +1,253 @@
1
+ """
2
+ sources/historical_intel.py — Historical threat-actor fallback enrichment.
3
+
4
+ Activated ONLY when:
5
+ a) entity type is THREAT_ACTOR, RANSOMWARE_GROUP, or MALWARE_FAMILY
6
+ b) all other enrichment sources returned 0 results for this entity
7
+
8
+ Queries:
9
+ A. CISA advisories (cache already populated by sources/cisa.py)
10
+ B. MITRE ATT&CK STIX data (7-day cache, ~50MB)
11
+ C. FBI/DOJ press releases RSS (12-hour cache)
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import asyncio
17
+ import logging
18
+ import os
19
+ from pathlib import Path
20
+ from typing import Optional
21
+
22
+ import aiohttp
23
+
24
+ from sources.cache import CachedFeed
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+ _MITRE_CACHE = "/tmp/voidaccess_mitre_attack.json"
29
+ _FBI_CACHE = "/tmp/voidaccess_fbi_press.json"
30
+
31
+ ACTOR_ALIASES = {
32
+ "revil": "Wizard Spider",
33
+ "sodinokibi": "Wizard Spider",
34
+ "gandcrab": "Wizard Spider",
35
+ "lockbit": None,
36
+ "conti": "Wizard Spider",
37
+ "ryuk": "Wizard Spider",
38
+ "trickbot": "Wizard Spider",
39
+ "darkside": "FIN7",
40
+ "blackmatter": "FIN7",
41
+ "alphv": None,
42
+ "blackcat": None,
43
+ "hive": None,
44
+ "cl0p": "TA505",
45
+ "ta505": "TA505",
46
+ "cobalt strike": "Cobalt Group",
47
+ "apt28": "APT28",
48
+ "fancy bear": "APT28",
49
+ "lazarus": "Lazarus Group",
50
+ "apt38": "Lazarus Group",
51
+ "fin7": "FIN7",
52
+ "carbanak": "FIN7",
53
+ }
54
+
55
+ _mitre_feed = CachedFeed(
56
+ "https://raw.githubusercontent.com/mitre/cti/master/enterprise-attack/enterprise-attack.json",
57
+ _MITRE_CACHE,
58
+ ttl_seconds=604800,
59
+ )
60
+ _fbi_feed = CachedFeed(
61
+ "https://www.justice.gov/news/press-releases/rss",
62
+ _FBI_CACHE,
63
+ ttl_seconds=43200)
64
+
65
+
66
+ async def _fetch_mitre_index() -> dict:
67
+ data = await _mitre_feed.fetch()
68
+ if data is None:
69
+ return {}
70
+
71
+ index: dict = {}
72
+ objects = data if isinstance(data, list) else data.get("objects", [])
73
+ for obj in objects:
74
+ if obj.get("type") != "intrusion-set":
75
+ continue
76
+ name = (obj.get("name") or "").lower()
77
+ aliases = [a.lower() for a in obj.get("aliases") or []]
78
+ for key in [name] + aliases:
79
+ if key:
80
+ index[key] = obj
81
+ return index
82
+
83
+
84
+ async def _fetch_fbi_results(entity_value: str) -> list[dict]:
85
+ data = await _fbi_feed.fetch()
86
+ if data is None:
87
+ return []
88
+
89
+ entries = data if isinstance(data, list) else []
90
+ q = entity_value.lower()
91
+ results = []
92
+
93
+ for entry in entries:
94
+ title = (entry.get("title") or "").lower()
95
+ if q in title:
96
+ results.append({
97
+ "source": "fbi_doj_press",
98
+ "entity_value": entity_value,
99
+ "press_title": entry.get("title", ""),
100
+ "press_url": entry.get("link", ""),
101
+ "press_date": entry.get("published", ""),
102
+ })
103
+ return results
104
+
105
+
106
+ async def get_techniques_for_actor(actor_name: str) -> list[str]:
107
+ """Return MITRE ATT&CK T-codes used by a threat group (case-insensitive partial match).
108
+
109
+ Searches group names and aliases in the local STIX cache, then follows
110
+ ``uses`` relationships to attack-pattern objects to collect T-codes.
111
+ Returns [] when the actor is not found or the cache is unavailable.
112
+ """
113
+ data = await _mitre_feed.fetch()
114
+ if data is None:
115
+ return []
116
+
117
+ objects = data if isinstance(data, list) else data.get("objects", [])
118
+ q = actor_name.lower()
119
+
120
+ alias_result = ACTOR_ALIASES.get(q)
121
+ if alias_result is None and q in ACTOR_ALIASES:
122
+ return []
123
+ if alias_result is not None:
124
+ q = alias_result.lower()
125
+
126
+ # Locate intrusion-set STIX ID by name / alias (partial match)
127
+ group_stix_id: Optional[str] = None
128
+ for obj in objects:
129
+ if obj.get("type") != "intrusion-set":
130
+ continue
131
+ name = (obj.get("name") or "").lower()
132
+ aliases = [a.lower() for a in (obj.get("aliases") or [])]
133
+ if q in name or any(q in alias for alias in aliases):
134
+ group_stix_id = obj.get("id")
135
+ break
136
+
137
+ if not group_stix_id:
138
+ return []
139
+
140
+ # Build attack-pattern stix_id → T-code lookup
141
+ technique_map: dict[str, str] = {}
142
+ for obj in objects:
143
+ if obj.get("type") != "attack-pattern":
144
+ continue
145
+ for ref in (obj.get("external_references") or []):
146
+ if ref.get("source_name") == "mitre-attack":
147
+ ext_id = ref.get("external_id", "")
148
+ if ext_id.startswith("T"):
149
+ technique_map[obj.get("id", "")] = ext_id
150
+ break
151
+
152
+ # Collect T-codes via "uses" relationships from this group
153
+ t_codes: list[str] = []
154
+ seen: set[str] = set()
155
+ for obj in objects:
156
+ if (
157
+ obj.get("type") == "relationship"
158
+ and obj.get("relationship_type") == "uses"
159
+ and obj.get("source_ref") == group_stix_id
160
+ ):
161
+ t_code = technique_map.get(obj.get("target_ref", ""))
162
+ if t_code and t_code not in seen:
163
+ seen.add(t_code)
164
+ t_codes.append(t_code)
165
+
166
+ return t_codes
167
+
168
+
169
+ async def enrich_historical(entities_by_type: dict[str, list[dict]]) -> list[dict]:
170
+ """
171
+ Historical fallback enrichment.
172
+
173
+ *entities_by_type* is a dict mapping entity type string ->
174
+ list of entity dicts that had no enrichment results.
175
+
176
+ Only processes THREAT_ACTOR, RANSOMWARE_GROUP, MALWARE_FAMILY.
177
+ """
178
+ fallback_types = {"THREAT_ACTOR", "RANSOMWARE_GROUP", "MALWARE_FAMILY"}
179
+ relevant_entities = []
180
+ for et in fallback_types:
181
+ relevant_entities.extend(entities_by_type.get(et, []))
182
+
183
+ if not relevant_entities:
184
+ return []
185
+
186
+ results: list[dict] = []
187
+ mitre_index: dict = {}
188
+
189
+ for ent in relevant_entities:
190
+ ev = ent.get("value") or ent.get("entity_value", "")
191
+ if not ev:
192
+ continue
193
+
194
+ q = ev.lower()
195
+
196
+ if not mitre_index:
197
+ mitre_index = await _fetch_mitre_index()
198
+
199
+ mitre_match = mitre_index.get(q)
200
+ if mitre_match:
201
+ results.append({
202
+ "source": "mitre_attack",
203
+ "entity_type": ent.get("type") or ent.get("entity_type", ""),
204
+ "entity_value": ev,
205
+ "mitre_id": mitre_match.get("external_references", [{}])[0].get("external_id", ""),
206
+ "mitre_name": mitre_match.get("name", ""),
207
+ "aliases": mitre_match.get("aliases", []),
208
+ "description": mitre_match.get("description", ""),
209
+ "techniques": [
210
+ ref.get("external_id", "")
211
+ for ref in mitre_match.get("external_references") or []
212
+ if ref.get("source_name") == "mitre-attack"
213
+ ],
214
+ })
215
+
216
+ fbi_results = await _fetch_fbi_results(ev)
217
+ results.extend(fbi_results)
218
+
219
+ cisa_adv = await _cisa_advisory_for_entity(ev, ent.get("type") or ent.get("entity_type", ""))
220
+ if cisa_adv:
221
+ results.append(cisa_adv)
222
+
223
+ await asyncio.sleep(0.5)
224
+
225
+ return results
226
+
227
+
228
+ async def _cisa_advisory_for_entity(entity_value: str, entity_type: str) -> Optional[dict]:
229
+ try:
230
+ from sources.cisa import _adv_feed
231
+ except Exception:
232
+ return None
233
+
234
+ data = await _adv_feed.fetch()
235
+ if data is None:
236
+ return None
237
+
238
+ advisories = data if isinstance(data, list) else data.get("items", [])
239
+ q = entity_value.lower()
240
+
241
+ for adv in advisories:
242
+ title = (adv.get("title") or "").lower()
243
+ tags = " ".join(adv.get("tags") or []).lower()
244
+ if q in title or q in tags:
245
+ return {
246
+ "source": "cisa_advisory_historical",
247
+ "entity_type": entity_type,
248
+ "entity_value": entity_value,
249
+ "advisory_title": adv.get("title", ""),
250
+ "advisory_url": adv.get("url", ""),
251
+ "advisory_date": adv.get("datePublished", ""),
252
+ }
253
+ return None