voidaccess 1.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. analysis/__init__.py +49 -0
  2. analysis/opsec.py +454 -0
  3. analysis/patterns.py +202 -0
  4. analysis/temporal.py +201 -0
  5. api/__init__.py +1 -0
  6. api/auth.py +163 -0
  7. api/main.py +509 -0
  8. api/routes/__init__.py +1 -0
  9. api/routes/admin.py +214 -0
  10. api/routes/auth.py +157 -0
  11. api/routes/entities.py +871 -0
  12. api/routes/export.py +359 -0
  13. api/routes/investigations.py +2567 -0
  14. api/routes/monitors.py +405 -0
  15. api/routes/search.py +157 -0
  16. api/routes/settings.py +851 -0
  17. auth/__init__.py +1 -0
  18. auth/token_blacklist.py +108 -0
  19. cli/__init__.py +3 -0
  20. cli/adapters/__init__.py +1 -0
  21. cli/adapters/sqlite.py +273 -0
  22. cli/browser.py +376 -0
  23. cli/commands/__init__.py +1 -0
  24. cli/commands/configure.py +185 -0
  25. cli/commands/enrich.py +154 -0
  26. cli/commands/export.py +158 -0
  27. cli/commands/investigate.py +601 -0
  28. cli/commands/show.py +87 -0
  29. cli/config.py +180 -0
  30. cli/display.py +212 -0
  31. cli/main.py +154 -0
  32. cli/tor_detect.py +71 -0
  33. config.py +180 -0
  34. crawler/__init__.py +28 -0
  35. crawler/dedup.py +97 -0
  36. crawler/frontier.py +115 -0
  37. crawler/spider.py +462 -0
  38. crawler/utils.py +122 -0
  39. db/__init__.py +47 -0
  40. db/migrations/__init__.py +0 -0
  41. db/migrations/env.py +80 -0
  42. db/migrations/versions/0001_initial_schema.py +270 -0
  43. db/migrations/versions/0002_add_investigation_status_column.py +27 -0
  44. db/migrations/versions/0002_add_missing_tables.py +33 -0
  45. db/migrations/versions/0003_add_canonical_value_and_entity_links.py +61 -0
  46. db/migrations/versions/0004_add_page_posted_at.py +41 -0
  47. db/migrations/versions/0005_add_extraction_method.py +32 -0
  48. db/migrations/versions/0006_add_monitor_alerts.py +26 -0
  49. db/migrations/versions/0007_add_actor_style_profiles.py +23 -0
  50. db/migrations/versions/0008_add_users_table.py +47 -0
  51. db/migrations/versions/0009_add_investigation_id_to_relationships.py +29 -0
  52. db/migrations/versions/0010_add_composite_index_entity_relationships.py +22 -0
  53. db/migrations/versions/0011_add_page_extraction_cache.py +52 -0
  54. db/migrations/versions/0013_add_graph_status.py +31 -0
  55. db/migrations/versions/0015_add_progress_fields.py +41 -0
  56. db/migrations/versions/0016_backfill_graph_status.py +33 -0
  57. db/migrations/versions/0017_add_user_api_keys.py +44 -0
  58. db/migrations/versions/0018_add_user_id_to_investigations.py +33 -0
  59. db/migrations/versions/0019_add_content_safety_log.py +46 -0
  60. db/migrations/versions/0020_add_entity_source_tracking.py +50 -0
  61. db/models.py +618 -0
  62. db/queries.py +841 -0
  63. db/session.py +270 -0
  64. export/__init__.py +34 -0
  65. export/misp.py +257 -0
  66. export/sigma.py +342 -0
  67. export/stix.py +418 -0
  68. extractor/__init__.py +21 -0
  69. extractor/llm_extract.py +372 -0
  70. extractor/ner.py +512 -0
  71. extractor/normalizer.py +638 -0
  72. extractor/pipeline.py +401 -0
  73. extractor/regex_patterns.py +325 -0
  74. fingerprint/__init__.py +33 -0
  75. fingerprint/profiler.py +240 -0
  76. fingerprint/stylometry.py +249 -0
  77. graph/__init__.py +73 -0
  78. graph/builder.py +894 -0
  79. graph/export.py +225 -0
  80. graph/model.py +83 -0
  81. graph/queries.py +297 -0
  82. graph/visualize.py +178 -0
  83. i18n/__init__.py +24 -0
  84. i18n/detect.py +76 -0
  85. i18n/query_expand.py +72 -0
  86. i18n/translate.py +210 -0
  87. monitor/__init__.py +27 -0
  88. monitor/_db.py +74 -0
  89. monitor/alerts.py +345 -0
  90. monitor/config.py +118 -0
  91. monitor/diff.py +75 -0
  92. monitor/jobs.py +247 -0
  93. monitor/scheduler.py +184 -0
  94. scraper/__init__.py +0 -0
  95. scraper/scrape.py +857 -0
  96. scraper/scrape_js.py +272 -0
  97. search/__init__.py +318 -0
  98. search/circuit_breaker.py +240 -0
  99. search/search.py +334 -0
  100. sources/__init__.py +96 -0
  101. sources/blockchain.py +444 -0
  102. sources/cache.py +93 -0
  103. sources/cisa.py +108 -0
  104. sources/dns_enrichment.py +557 -0
  105. sources/domain_reputation.py +643 -0
  106. sources/email_reputation.py +635 -0
  107. sources/engines.py +244 -0
  108. sources/enrichment.py +1244 -0
  109. sources/github_scraper.py +589 -0
  110. sources/gitlab_scraper.py +624 -0
  111. sources/hash_reputation.py +856 -0
  112. sources/historical_intel.py +253 -0
  113. sources/ip_reputation.py +521 -0
  114. sources/paste_scraper.py +484 -0
  115. sources/pastes.py +278 -0
  116. sources/rss_scraper.py +576 -0
  117. sources/seed_manager.py +373 -0
  118. sources/seeds.py +368 -0
  119. sources/shodan.py +103 -0
  120. sources/telegram.py +199 -0
  121. sources/virustotal.py +113 -0
  122. utils/__init__.py +0 -0
  123. utils/async_utils.py +89 -0
  124. utils/content_safety.py +193 -0
  125. utils/defang.py +94 -0
  126. utils/encryption.py +34 -0
  127. utils/ioc_freshness.py +124 -0
  128. utils/user_keys.py +33 -0
  129. vector/__init__.py +39 -0
  130. vector/embedder.py +100 -0
  131. vector/model_singleton.py +49 -0
  132. vector/search.py +87 -0
  133. vector/store.py +514 -0
  134. voidaccess/__init__.py +0 -0
  135. voidaccess/llm.py +717 -0
  136. voidaccess/llm_utils.py +696 -0
  137. voidaccess-1.3.0.dist-info/METADATA +395 -0
  138. voidaccess-1.3.0.dist-info/RECORD +142 -0
  139. voidaccess-1.3.0.dist-info/WHEEL +5 -0
  140. voidaccess-1.3.0.dist-info/entry_points.txt +2 -0
  141. voidaccess-1.3.0.dist-info/licenses/LICENSE +21 -0
  142. voidaccess-1.3.0.dist-info/top_level.txt +19 -0
export/stix.py ADDED
@@ -0,0 +1,418 @@
1
+ """
2
+ export/stix.py — Converts VoidAccess entities and investigations into STIX 2.1 bundles.
3
+
4
+ Uses the stix2 Python library throughout; no manual JSON construction.
5
+
6
+ Public interface
7
+ ----------------
8
+ entity_to_stix_indicator(entity) → stix2.Indicator | None
9
+ entity_to_stix_malware(entity) → stix2.Malware | None
10
+ entity_to_stix_threat_actor(entity) → stix2.ThreatActor | None
11
+ investigation_to_stix_bundle(investigation_id, include_relationships) → stix2.Bundle
12
+ bundle_to_json(bundle) → str
13
+ bundle_to_dict(bundle) → dict
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import json
19
+ import logging
20
+ import os
21
+ from typing import Any, Optional, Union
22
+ import uuid
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+ # ---------------------------------------------------------------------------
27
+ # Graceful import of stix2
28
+ # ---------------------------------------------------------------------------
29
+
30
+ try:
31
+ import stix2 # type: ignore
32
+ _STIX2_AVAILABLE = True
33
+ except ImportError:
34
+ stix2 = None # type: ignore
35
+ _STIX2_AVAILABLE = False
36
+ logger.warning(
37
+ "stix2 not installed — export/stix.py functions will return None / empty Bundle"
38
+ )
39
+
40
+
41
+ # ---------------------------------------------------------------------------
42
+ # STIX pattern templates per entity type
43
+ # ---------------------------------------------------------------------------
44
+
45
+ _STIX_PATTERNS: dict[str, str] = {
46
+ "BITCOIN_ADDRESS": "[cryptocurrency-wallet:address = '{value}']",
47
+ "ETHEREUM_ADDRESS": "[cryptocurrency-wallet:address = '{value}']",
48
+ "MONERO_ADDRESS": "[cryptocurrency-wallet:address = '{value}']",
49
+ "EMAIL_ADDRESS": "[email-message:from_ref.value = '{value}']",
50
+ "ONION_URL": "[url:value = '{value}']",
51
+ "IP_ADDRESS": "[ipv4-addr:value = '{value}']",
52
+ "CVE_NUMBER": "[vulnerability:name = '{value}']",
53
+ "MALWARE_FAMILY": "[malware:name = '{value}']",
54
+ "RANSOMWARE_GROUP": "[malware:name = '{value}']",
55
+ }
56
+
57
+ # Entity types that map to STIX Malware objects
58
+ _MALWARE_TYPES = frozenset({"MALWARE_FAMILY", "RANSOMWARE_GROUP"})
59
+
60
+ # ---------------------------------------------------------------------------
61
+ # Confidence mapping: VoidAccess float → STIX integer (0-100)
62
+ # ---------------------------------------------------------------------------
63
+
64
+
65
+ def _to_stix_confidence(confidence: float) -> int:
66
+ return min(100, max(0, int(round(confidence * 100))))
67
+
68
+
69
+ # ---------------------------------------------------------------------------
70
+ # Public conversion functions
71
+ # ---------------------------------------------------------------------------
72
+
73
+
74
+ def entity_to_stix_indicator(entity: Any) -> Optional[Any]:
75
+ """
76
+ Convert a single NormalizedEntity to a STIX 2.1 Indicator object.
77
+
78
+ Returns None for entity types without a clear STIX pattern mapping,
79
+ and returns None (with a warning) if stix2 is not installed.
80
+ """
81
+ if not _STIX2_AVAILABLE:
82
+ return None
83
+
84
+ pattern_template = _STIX_PATTERNS.get(entity.entity_type)
85
+ if pattern_template is None:
86
+ return None
87
+
88
+ safe_value = entity.value.replace("'", "\\'")
89
+ pattern = pattern_template.format(value=safe_value)
90
+
91
+ # Determine indicator_types from entity_type
92
+ indicator_types = ["unknown"]
93
+ etype = entity.entity_type
94
+ if etype in ("MALWARE_FAMILY", "RANSOMWARE_GROUP"):
95
+ indicator_types = ["malicious-activity"]
96
+ elif etype in ("BITCOIN_ADDRESS", "ETHEREUM_ADDRESS", "MONERO_ADDRESS"):
97
+ indicator_types = ["malicious-activity"]
98
+ elif etype in ("IP_ADDRESS", "ONION_URL"):
99
+ indicator_types = ["malicious-activity"]
100
+ elif etype == "CVE_NUMBER":
101
+ indicator_types = ["compromised"]
102
+
103
+ try:
104
+ indicator = stix2.Indicator(
105
+ name=f"{entity.entity_type}: {entity.value[:80]}",
106
+ pattern=pattern,
107
+ pattern_type="stix",
108
+ indicator_types=indicator_types,
109
+ confidence=_to_stix_confidence(entity.confidence),
110
+ external_references=(
111
+ [{"source_name": "voidaccess", "url": entity.source_url}]
112
+ if entity.source_url
113
+ else []
114
+ ),
115
+ )
116
+ return indicator
117
+ except Exception as exc:
118
+ logger.warning("entity_to_stix_indicator failed for %r: %s", entity.value, exc)
119
+ return None
120
+
121
+
122
+ def entity_to_stix_malware(entity: Any) -> Optional[Any]:
123
+ """
124
+ Convert a MALWARE_FAMILY or RANSOMWARE_GROUP entity to a STIX 2.1 Malware object.
125
+
126
+ Returns None for all other entity types.
127
+ """
128
+ if not _STIX2_AVAILABLE:
129
+ return None
130
+
131
+ if entity.entity_type not in _MALWARE_TYPES:
132
+ return None
133
+
134
+ try:
135
+ malware = stix2.Malware(
136
+ name=entity.value,
137
+ is_family=True,
138
+ confidence=_to_stix_confidence(entity.confidence),
139
+ external_references=(
140
+ [{"source_name": "voidaccess", "url": entity.source_url}]
141
+ if entity.source_url
142
+ else []
143
+ ),
144
+ )
145
+ return malware
146
+ except Exception as exc:
147
+ logger.warning("entity_to_stix_malware failed for %r: %s", entity.value, exc)
148
+ return None
149
+
150
+
151
+ def entity_to_stix_threat_actor(entity: Any) -> Optional[Any]:
152
+ """
153
+ Convert a THREAT_ACTOR_HANDLE entity to a STIX 2.1 ThreatActor object.
154
+
155
+ Returns None for all other entity types.
156
+ """
157
+ if not _STIX2_AVAILABLE:
158
+ return None
159
+
160
+ if entity.entity_type != "THREAT_ACTOR_HANDLE":
161
+ return None
162
+
163
+ try:
164
+ threat_actor = stix2.ThreatActor(
165
+ name=entity.value,
166
+ aliases=[entity.value],
167
+ confidence=_to_stix_confidence(entity.confidence),
168
+ external_references=(
169
+ [{"source_name": "voidaccess", "url": entity.source_url}]
170
+ if entity.source_url
171
+ else []
172
+ ),
173
+ )
174
+ return threat_actor
175
+ except Exception as exc:
176
+ logger.warning(
177
+ "entity_to_stix_threat_actor failed for %r: %s", entity.value, exc
178
+ )
179
+ return None
180
+
181
+
182
+ def investigation_to_stix_bundle(
183
+ investigation_id: Any,
184
+ include_relationships: bool = True,
185
+ entity_ids: Optional[list[str]] = None,
186
+ ) -> Any:
187
+ """
188
+ Load all entities for an investigation and return a STIX 2.1 Bundle.
189
+
190
+ If include_relationships=True, adds STIX Relationship objects for entity pairs
191
+ that have edges in the graph (loaded via graph.build_graph_from_db).
192
+
193
+ Returns an empty Bundle if:
194
+ - stix2 is not installed
195
+ - DATABASE_URL is not set
196
+ - investigation not found
197
+ """
198
+ if not _STIX2_AVAILABLE:
199
+ return _empty_bundle()
200
+
201
+ filter_uuids: Optional[list[uuid.UUID]] = None
202
+ if entity_ids:
203
+ filter_uuids = []
204
+ for raw in entity_ids:
205
+ try:
206
+ filter_uuids.append(uuid.UUID(str(raw)))
207
+ except (ValueError, AttributeError):
208
+ continue
209
+ if not filter_uuids:
210
+ return _empty_bundle()
211
+
212
+ entities = _load_entities_for_investigation(investigation_id, entity_ids=filter_uuids)
213
+ if not entities:
214
+ return _empty_bundle()
215
+
216
+ stix_objects: list[Any] = []
217
+ stix_id_map: dict[str, str] = {} # entity.value → stix_object.id
218
+
219
+ for entity in entities:
220
+ indicator = entity_to_stix_indicator(entity)
221
+ if indicator:
222
+ stix_objects.append(indicator)
223
+ stix_id_map[entity.value] = indicator.id
224
+
225
+ malware = entity_to_stix_malware(entity)
226
+ if malware:
227
+ stix_objects.append(malware)
228
+ stix_id_map.setdefault(entity.value, malware.id)
229
+
230
+ actor = entity_to_stix_threat_actor(entity)
231
+ if actor:
232
+ stix_objects.append(actor)
233
+ stix_id_map.setdefault(entity.value, actor.id)
234
+
235
+ if include_relationships and stix_objects:
236
+ stix_objects.extend(_build_stix_relationships(investigation_id, stix_id_map))
237
+
238
+ try:
239
+ return stix2.Bundle(*stix_objects, allow_custom=True)
240
+ except Exception as exc:
241
+ logger.warning("investigation_to_stix_bundle: Bundle construction failed: %s", exc)
242
+ return _empty_bundle()
243
+
244
+
245
+ def bundle_to_json(bundle: Any) -> str:
246
+ """Return JSON string of a STIX bundle (pretty-printed, 2-space indent)."""
247
+ if not _STIX2_AVAILABLE or bundle is None:
248
+ return "{}"
249
+ try:
250
+ return bundle.serialize(pretty=True, indent=2)
251
+ except Exception as exc:
252
+ logger.warning("bundle_to_json failed: %s", exc)
253
+ return "{}"
254
+
255
+
256
+ def bundle_to_dict(bundle: Any) -> dict:
257
+ """Return a plain Python dict representation of the bundle (no stix2 objects)."""
258
+ if not _STIX2_AVAILABLE or bundle is None:
259
+ return {}
260
+ try:
261
+ raw = bundle_to_json(bundle)
262
+ return json.loads(raw)
263
+ except Exception as exc:
264
+ logger.warning("bundle_to_dict failed: %s", exc)
265
+ return {}
266
+
267
+
268
+ # ---------------------------------------------------------------------------
269
+ # Internal helpers
270
+ # ---------------------------------------------------------------------------
271
+
272
+
273
+ def _empty_bundle() -> Any:
274
+ """Return an empty STIX Bundle, or a plain dict sentinel if stix2 absent."""
275
+ if not _STIX2_AVAILABLE:
276
+ return None
277
+ try:
278
+ return stix2.Bundle(allow_custom=True)
279
+ except Exception:
280
+ return stix2.Bundle()
281
+
282
+
283
+ def _load_entities_for_investigation(
284
+ investigation_id: Any,
285
+ entity_ids: Optional[list[uuid.UUID]] = None,
286
+ ) -> list[Any]:
287
+ """
288
+ Load entities from DB for the given investigation_id.
289
+
290
+ Includes entities owned directly by the investigation AND entities linked
291
+ via InvestigationEntityLink (canonical dedup junction table).
292
+
293
+ Returns [] if DATABASE_URL is not set, investigation not found, or any error.
294
+ """
295
+ if not os.getenv("DATABASE_URL"):
296
+ return []
297
+
298
+ try:
299
+ from db.session import get_session # noqa: PLC0415
300
+ from db.queries import get_investigation_by_id_or_run # noqa: PLC0415
301
+ from db.models import Entity, InvestigationEntityLink # noqa: PLC0415
302
+ from extractor.normalizer import NormalizedEntity # noqa: PLC0415
303
+
304
+ inv_uuid = _coerce_uuid(investigation_id)
305
+ if inv_uuid is None:
306
+ return []
307
+
308
+ with get_session() as session:
309
+ inv = get_investigation_by_id_or_run(session, inv_uuid)
310
+ if inv is None:
311
+ return []
312
+
313
+ linked_ids_subq = (
314
+ session.query(InvestigationEntityLink.entity_id)
315
+ .filter(InvestigationEntityLink.investigation_id == inv.id)
316
+ .subquery()
317
+ )
318
+ q = session.query(Entity).filter(
319
+ (Entity.investigation_id == inv.id)
320
+ | Entity.id.in_(linked_ids_subq)
321
+ )
322
+ db_entities = q.all()
323
+
324
+ if entity_ids is not None:
325
+ want = frozenset(entity_ids)
326
+ db_entities = [e for e in db_entities if e.id in want]
327
+
328
+ result: list[NormalizedEntity] = []
329
+ for e in db_entities:
330
+ source_url = ""
331
+ try:
332
+ if e.page:
333
+ source_url = e.page.url or ""
334
+ except Exception:
335
+ pass
336
+ ne = NormalizedEntity(
337
+ entity_type=e.entity_type,
338
+ value=e.canonical_value or e.value,
339
+ confidence=e.confidence,
340
+ source_url=source_url,
341
+ page_id=e.page_id,
342
+ context_snippet=e.context_snippet or "",
343
+ extraction_method="db",
344
+ )
345
+ result.append(ne)
346
+ return result
347
+
348
+ except Exception as exc:
349
+ logger.warning("_load_entities_for_investigation failed: %s", exc)
350
+ return []
351
+
352
+
353
+ def _build_stix_relationships(
354
+ investigation_id: Any,
355
+ stix_id_map: dict[str, str],
356
+ ) -> list[Any]:
357
+ """
358
+ Build STIX Relationship objects from graph edges for the investigation.
359
+
360
+ Returns [] on any error.
361
+ """
362
+ if not _STIX2_AVAILABLE:
363
+ return []
364
+ try:
365
+ from graph.builder import build_graph_from_db # noqa: PLC0415
366
+
367
+ inv_uuid = _coerce_uuid(investigation_id)
368
+ graph = build_graph_from_db(investigation_id=inv_uuid)
369
+
370
+ relationships: list[Any] = []
371
+ for source_node, target_node, data in graph.edges(data=True):
372
+ src_stix_id = stix_id_map.get(source_node)
373
+ tgt_stix_id = stix_id_map.get(target_node)
374
+ if not src_stix_id or not tgt_stix_id:
375
+ continue
376
+ edge_type = data.get("edge_type", "related-to")
377
+ # Map VoidAccess edge types to STIX relationship types
378
+ rel_type = _edge_type_to_stix(edge_type)
379
+ try:
380
+ rel = stix2.Relationship(
381
+ relationship_type=rel_type,
382
+ source_ref=src_stix_id,
383
+ target_ref=tgt_stix_id,
384
+ )
385
+ relationships.append(rel)
386
+ except Exception:
387
+ continue
388
+ return relationships
389
+ except Exception as exc:
390
+ logger.warning("_build_stix_relationships failed: %s", exc)
391
+ return []
392
+
393
+
394
+ def _edge_type_to_stix(edge_type: str) -> str:
395
+ """Map VoidAccess graph edge types to STIX relationship type strings."""
396
+ mapping = {
397
+ "CO_APPEARED_ON": "related-to",
398
+ "POSTED_BY": "attributed-to",
399
+ "LINKED_TO": "related-to",
400
+ "PAID_TO": "related-to",
401
+ "MEMBER_OF": "member-of",
402
+ "USED": "uses",
403
+ "CLAIMED": "attributed-to",
404
+ "LIKELY_SAME_ACTOR": "related-to",
405
+ "CONFIRMED_SAME_ACTOR": "related-to",
406
+ "FUNDED_BY": "related-to",
407
+ }
408
+ return mapping.get(edge_type, "related-to")
409
+
410
+
411
+ def _coerce_uuid(value: Any) -> Optional[uuid.UUID]:
412
+ """Try to coerce an arbitrary value to uuid.UUID. Returns None on failure."""
413
+ if isinstance(value, uuid.UUID):
414
+ return value
415
+ try:
416
+ return uuid.UUID(str(value))
417
+ except (ValueError, AttributeError):
418
+ return None
extractor/__init__.py ADDED
@@ -0,0 +1,21 @@
1
+ """
2
+ extractor — Phase 2 entity extraction pipeline.
3
+
4
+ Public exports
5
+ --------------
6
+ ExtractionResult — dataclass returned by extraction functions
7
+ extract_entities_from_page — extract entities from a single page (async)
8
+ extract_entities_from_pages — extract entities from multiple pages concurrently (async)
9
+ """
10
+
11
+ from extractor.pipeline import (
12
+ ExtractionResult,
13
+ extract_entities_from_page,
14
+ extract_entities_from_pages,
15
+ )
16
+
17
+ __all__ = [
18
+ "ExtractionResult",
19
+ "extract_entities_from_page",
20
+ "extract_entities_from_pages",
21
+ ]