voidaccess 1.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. analysis/__init__.py +49 -0
  2. analysis/opsec.py +454 -0
  3. analysis/patterns.py +202 -0
  4. analysis/temporal.py +201 -0
  5. api/__init__.py +1 -0
  6. api/auth.py +163 -0
  7. api/main.py +509 -0
  8. api/routes/__init__.py +1 -0
  9. api/routes/admin.py +214 -0
  10. api/routes/auth.py +157 -0
  11. api/routes/entities.py +871 -0
  12. api/routes/export.py +359 -0
  13. api/routes/investigations.py +2567 -0
  14. api/routes/monitors.py +405 -0
  15. api/routes/search.py +157 -0
  16. api/routes/settings.py +851 -0
  17. auth/__init__.py +1 -0
  18. auth/token_blacklist.py +108 -0
  19. cli/__init__.py +3 -0
  20. cli/adapters/__init__.py +1 -0
  21. cli/adapters/sqlite.py +273 -0
  22. cli/browser.py +376 -0
  23. cli/commands/__init__.py +1 -0
  24. cli/commands/configure.py +185 -0
  25. cli/commands/enrich.py +154 -0
  26. cli/commands/export.py +158 -0
  27. cli/commands/investigate.py +601 -0
  28. cli/commands/show.py +87 -0
  29. cli/config.py +180 -0
  30. cli/display.py +212 -0
  31. cli/main.py +154 -0
  32. cli/tor_detect.py +71 -0
  33. config.py +180 -0
  34. crawler/__init__.py +28 -0
  35. crawler/dedup.py +97 -0
  36. crawler/frontier.py +115 -0
  37. crawler/spider.py +462 -0
  38. crawler/utils.py +122 -0
  39. db/__init__.py +47 -0
  40. db/migrations/__init__.py +0 -0
  41. db/migrations/env.py +80 -0
  42. db/migrations/versions/0001_initial_schema.py +270 -0
  43. db/migrations/versions/0002_add_investigation_status_column.py +27 -0
  44. db/migrations/versions/0002_add_missing_tables.py +33 -0
  45. db/migrations/versions/0003_add_canonical_value_and_entity_links.py +61 -0
  46. db/migrations/versions/0004_add_page_posted_at.py +41 -0
  47. db/migrations/versions/0005_add_extraction_method.py +32 -0
  48. db/migrations/versions/0006_add_monitor_alerts.py +26 -0
  49. db/migrations/versions/0007_add_actor_style_profiles.py +23 -0
  50. db/migrations/versions/0008_add_users_table.py +47 -0
  51. db/migrations/versions/0009_add_investigation_id_to_relationships.py +29 -0
  52. db/migrations/versions/0010_add_composite_index_entity_relationships.py +22 -0
  53. db/migrations/versions/0011_add_page_extraction_cache.py +52 -0
  54. db/migrations/versions/0013_add_graph_status.py +31 -0
  55. db/migrations/versions/0015_add_progress_fields.py +41 -0
  56. db/migrations/versions/0016_backfill_graph_status.py +33 -0
  57. db/migrations/versions/0017_add_user_api_keys.py +44 -0
  58. db/migrations/versions/0018_add_user_id_to_investigations.py +33 -0
  59. db/migrations/versions/0019_add_content_safety_log.py +46 -0
  60. db/migrations/versions/0020_add_entity_source_tracking.py +50 -0
  61. db/models.py +618 -0
  62. db/queries.py +841 -0
  63. db/session.py +270 -0
  64. export/__init__.py +34 -0
  65. export/misp.py +257 -0
  66. export/sigma.py +342 -0
  67. export/stix.py +418 -0
  68. extractor/__init__.py +21 -0
  69. extractor/llm_extract.py +372 -0
  70. extractor/ner.py +512 -0
  71. extractor/normalizer.py +638 -0
  72. extractor/pipeline.py +401 -0
  73. extractor/regex_patterns.py +325 -0
  74. fingerprint/__init__.py +33 -0
  75. fingerprint/profiler.py +240 -0
  76. fingerprint/stylometry.py +249 -0
  77. graph/__init__.py +73 -0
  78. graph/builder.py +894 -0
  79. graph/export.py +225 -0
  80. graph/model.py +83 -0
  81. graph/queries.py +297 -0
  82. graph/visualize.py +178 -0
  83. i18n/__init__.py +24 -0
  84. i18n/detect.py +76 -0
  85. i18n/query_expand.py +72 -0
  86. i18n/translate.py +210 -0
  87. monitor/__init__.py +27 -0
  88. monitor/_db.py +74 -0
  89. monitor/alerts.py +345 -0
  90. monitor/config.py +118 -0
  91. monitor/diff.py +75 -0
  92. monitor/jobs.py +247 -0
  93. monitor/scheduler.py +184 -0
  94. scraper/__init__.py +0 -0
  95. scraper/scrape.py +857 -0
  96. scraper/scrape_js.py +272 -0
  97. search/__init__.py +318 -0
  98. search/circuit_breaker.py +240 -0
  99. search/search.py +334 -0
  100. sources/__init__.py +96 -0
  101. sources/blockchain.py +444 -0
  102. sources/cache.py +93 -0
  103. sources/cisa.py +108 -0
  104. sources/dns_enrichment.py +557 -0
  105. sources/domain_reputation.py +643 -0
  106. sources/email_reputation.py +635 -0
  107. sources/engines.py +244 -0
  108. sources/enrichment.py +1244 -0
  109. sources/github_scraper.py +589 -0
  110. sources/gitlab_scraper.py +624 -0
  111. sources/hash_reputation.py +856 -0
  112. sources/historical_intel.py +253 -0
  113. sources/ip_reputation.py +521 -0
  114. sources/paste_scraper.py +484 -0
  115. sources/pastes.py +278 -0
  116. sources/rss_scraper.py +576 -0
  117. sources/seed_manager.py +373 -0
  118. sources/seeds.py +368 -0
  119. sources/shodan.py +103 -0
  120. sources/telegram.py +199 -0
  121. sources/virustotal.py +113 -0
  122. utils/__init__.py +0 -0
  123. utils/async_utils.py +89 -0
  124. utils/content_safety.py +193 -0
  125. utils/defang.py +94 -0
  126. utils/encryption.py +34 -0
  127. utils/ioc_freshness.py +124 -0
  128. utils/user_keys.py +33 -0
  129. vector/__init__.py +39 -0
  130. vector/embedder.py +100 -0
  131. vector/model_singleton.py +49 -0
  132. vector/search.py +87 -0
  133. vector/store.py +514 -0
  134. voidaccess/__init__.py +0 -0
  135. voidaccess/llm.py +717 -0
  136. voidaccess/llm_utils.py +696 -0
  137. voidaccess-1.3.0.dist-info/METADATA +395 -0
  138. voidaccess-1.3.0.dist-info/RECORD +142 -0
  139. voidaccess-1.3.0.dist-info/WHEEL +5 -0
  140. voidaccess-1.3.0.dist-info/entry_points.txt +2 -0
  141. voidaccess-1.3.0.dist-info/licenses/LICENSE +21 -0
  142. voidaccess-1.3.0.dist-info/top_level.txt +19 -0
api/routes/export.py ADDED
@@ -0,0 +1,359 @@
1
+ """
2
+ api/routes/export.py — Export endpoints for STIX, MISP, and Sigma.
3
+
4
+ GET /export/{investigation_id}/stix — download STIX 2.1 bundle as JSON
5
+ GET /export/{investigation_id}/misp — download MISP event as JSON
6
+ GET /export/{investigation_id}/sigma — download Sigma rules as ZIP
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import io
12
+ import logging
13
+ import os
14
+ import uuid
15
+ import zipfile
16
+
17
+ from fastapi import APIRouter, Depends, HTTPException
18
+ from fastapi.responses import Response, StreamingResponse
19
+ from pydantic import BaseModel, Field
20
+ from api.auth import CurrentUser, get_current_user
21
+
22
+ logger = logging.getLogger(__name__)
23
+ router = APIRouter()
24
+
25
+
26
+ class ExportSelectedBody(BaseModel):
27
+ """Subset of entity primary keys to include in an export bundle."""
28
+
29
+ entity_ids: list[str] = Field(default_factory=list)
30
+
31
+
32
+ # ---------------------------------------------------------------------------
33
+ # Auth helper
34
+ # ---------------------------------------------------------------------------
35
+
36
+
37
+ def _check_investigation_owner(investigation_id: str, current_user: CurrentUser) -> None:
38
+ """Raise 404 if the investigation does not exist, 403 if the user does not own it."""
39
+ if not os.getenv("DATABASE_URL"):
40
+ raise HTTPException(status_code=503, detail="Database not configured")
41
+ try:
42
+ uid = uuid.UUID(investigation_id)
43
+ except ValueError:
44
+ raise HTTPException(status_code=422, detail="Invalid investigation ID format")
45
+ try:
46
+ from db.session import get_session
47
+ from db.queries import get_investigation_by_id_or_run
48
+ with get_session() as session:
49
+ inv = get_investigation_by_id_or_run(session, uid)
50
+ if inv is None:
51
+ raise HTTPException(status_code=404, detail="Investigation not found")
52
+ if str(inv.user_id) != str(current_user.user.id):
53
+ raise HTTPException(status_code=403, detail="Forbidden")
54
+ except HTTPException:
55
+ raise
56
+ except Exception as exc:
57
+ logger.warning("_check_investigation_owner failed: %s", exc)
58
+ raise HTTPException(status_code=500, detail="Internal error")
59
+
60
+
61
+ # ---------------------------------------------------------------------------
62
+ # Routes
63
+ # ---------------------------------------------------------------------------
64
+
65
+
66
+ @router.get("/{investigation_id}/stix")
67
+ async def export_stix(
68
+ investigation_id: str,
69
+ current_user: CurrentUser = Depends(get_current_user),
70
+ ) -> Response:
71
+ """
72
+ Return STIX 2.1 bundle as JSON download.
73
+
74
+ Content-Type: application/json
75
+ Content-Disposition: attachment; filename="voidaccess_{id}_stix.json"
76
+ """
77
+ _check_investigation_owner(investigation_id, current_user)
78
+ _validate_uuid(investigation_id)
79
+ try:
80
+ from export.stix import investigation_to_stix_bundle, bundle_to_json, _load_entities_for_investigation # noqa: PLC0415
81
+
82
+ internal_id = _resolve_internal_investigation_id(investigation_id)
83
+ entities = _load_entities_for_investigation(str(internal_id))
84
+ if not entities:
85
+ raise HTTPException(
86
+ status_code=422,
87
+ detail=(
88
+ "No exportable entities found for this investigation. "
89
+ "Ensure the investigation has completed successfully."
90
+ ),
91
+ )
92
+ bundle = investigation_to_stix_bundle(str(internal_id))
93
+ json_str = bundle_to_json(bundle)
94
+ filename = f"voidaccess_{investigation_id}_stix.json"
95
+ return Response(
96
+ content=json_str,
97
+ media_type="application/json",
98
+ headers={"Content-Disposition": f'attachment; filename="{filename}"'},
99
+ )
100
+ except HTTPException:
101
+ raise
102
+ except Exception as exc:
103
+ logger.warning("export_stix failed: %s", exc)
104
+ raise HTTPException(status_code=500, detail="STIX export failed")
105
+
106
+
107
+ @router.get("/{investigation_id}/misp")
108
+ async def export_misp(
109
+ investigation_id: str,
110
+ current_user: CurrentUser = Depends(get_current_user),
111
+ ) -> Response:
112
+ """
113
+ Return MISP event as JSON download.
114
+
115
+ Content-Type: application/json
116
+ Content-Disposition: attachment; filename="voidaccess_{id}_misp.json"
117
+ """
118
+ _check_investigation_owner(investigation_id, current_user)
119
+ _validate_uuid(investigation_id)
120
+ try:
121
+ from export.misp import investigation_to_misp_event, misp_event_to_json # noqa: PLC0415
122
+ from export.stix import _load_entities_for_investigation # noqa: PLC0415
123
+
124
+ internal_id = _resolve_internal_investigation_id(investigation_id)
125
+ entities = _load_entities_for_investigation(str(internal_id))
126
+ if not entities:
127
+ raise HTTPException(
128
+ status_code=422,
129
+ detail=(
130
+ "No exportable entities found for this investigation. "
131
+ "Ensure the investigation has completed successfully."
132
+ ),
133
+ )
134
+ event = investigation_to_misp_event(str(internal_id))
135
+ json_str = misp_event_to_json(event)
136
+ filename = f"voidaccess_{investigation_id}_misp.json"
137
+ return Response(
138
+ content=json_str,
139
+ media_type="application/json",
140
+ headers={"Content-Disposition": f'attachment; filename="{filename}"'},
141
+ )
142
+ except HTTPException:
143
+ raise
144
+ except Exception as exc:
145
+ logger.warning("export_misp failed: %s", exc)
146
+ raise HTTPException(status_code=500, detail="MISP export failed")
147
+
148
+
149
+ @router.get("/{investigation_id}/sigma")
150
+ async def export_sigma(
151
+ investigation_id: str,
152
+ current_user: CurrentUser = Depends(get_current_user),
153
+ ) -> StreamingResponse:
154
+ """
155
+ Generate Sigma rules and return as a ZIP download.
156
+
157
+ Content-Type: application/zip
158
+ Content-Disposition: attachment; filename="voidaccess_{id}_sigma.zip"
159
+ """
160
+ _check_investigation_owner(investigation_id, current_user)
161
+ _validate_uuid(investigation_id)
162
+ try:
163
+ from export.sigma import ( # noqa: PLC0415
164
+ entities_to_sigma_rules,
165
+ sigma_rule_to_yaml,
166
+ )
167
+ from export.stix import _load_entities_for_investigation # noqa: PLC0415
168
+
169
+ internal_id = _resolve_internal_investigation_id(investigation_id)
170
+ entities = _load_entities_for_investigation(str(internal_id))
171
+ if not entities:
172
+ raise HTTPException(
173
+ status_code=422,
174
+ detail=(
175
+ "No exportable entities found for this investigation. "
176
+ "Ensure the investigation has completed successfully."
177
+ ),
178
+ )
179
+ rules = entities_to_sigma_rules(entities)
180
+ if not rules:
181
+ raise HTTPException(
182
+ status_code=422,
183
+ detail=(
184
+ "No Sigma-compatible entities found (requires IP_ADDRESS, "
185
+ "ONION_URL, CVE_NUMBER, MALWARE_FAMILY, or RANSOMWARE_GROUP)."
186
+ ),
187
+ )
188
+
189
+ # Build zip in memory
190
+ buf = io.BytesIO()
191
+ with zipfile.ZipFile(buf, mode="w", compression=zipfile.ZIP_DEFLATED) as zf:
192
+ for rule in rules:
193
+ rule_id = rule.get("id", str(uuid.uuid4()))
194
+ yaml_content = sigma_rule_to_yaml(rule)
195
+ zf.writestr(f"{rule_id}.yml", yaml_content)
196
+ buf.seek(0)
197
+
198
+ filename = f"voidaccess_{investigation_id}_sigma.zip"
199
+ return StreamingResponse(
200
+ buf,
201
+ media_type="application/zip",
202
+ headers={"Content-Disposition": f'attachment; filename="{filename}"'},
203
+ )
204
+ except HTTPException:
205
+ raise
206
+ except Exception as exc:
207
+ logger.warning("export_sigma failed: %s", exc)
208
+ raise HTTPException(status_code=500, detail="Sigma export failed")
209
+
210
+
211
+ @router.post("/{investigation_id}/stix/selected")
212
+ async def export_stix_selected(
213
+ investigation_id: str,
214
+ body: ExportSelectedBody,
215
+ current_user: CurrentUser = Depends(get_current_user),
216
+ ) -> Response:
217
+ """STIX bundle including only the given entity rows (or all if *entity_ids* is empty)."""
218
+ _check_investigation_owner(investigation_id, current_user)
219
+ _validate_uuid(investigation_id)
220
+ try:
221
+ from export.stix import investigation_to_stix_bundle, bundle_to_json # noqa: PLC0415
222
+
223
+ internal_id = _resolve_internal_investigation_id(investigation_id)
224
+ bundle = investigation_to_stix_bundle(
225
+ str(internal_id),
226
+ entity_ids=body.entity_ids or None,
227
+ )
228
+ json_str = bundle_to_json(bundle)
229
+ filename = f"voidaccess_{investigation_id}_stix.json"
230
+ return Response(
231
+ content=json_str,
232
+ media_type="application/json",
233
+ headers={"Content-Disposition": f'attachment; filename="{filename}"'},
234
+ )
235
+ except HTTPException:
236
+ raise
237
+ except Exception as exc:
238
+ logger.warning("export_stix_selected failed: %s", exc)
239
+ raise HTTPException(status_code=500, detail="STIX export failed")
240
+
241
+
242
+ @router.post("/{investigation_id}/misp/selected")
243
+ async def export_misp_selected(
244
+ investigation_id: str,
245
+ body: ExportSelectedBody,
246
+ current_user: CurrentUser = Depends(get_current_user),
247
+ ) -> Response:
248
+ """MISP JSON including only the given entities (or all if *entity_ids* is empty)."""
249
+ _check_investigation_owner(investigation_id, current_user)
250
+ _validate_uuid(investigation_id)
251
+ try:
252
+ from export.misp import investigation_to_misp_event, misp_event_to_json # noqa: PLC0415
253
+
254
+ internal_id = _resolve_internal_investigation_id(investigation_id)
255
+ event = investigation_to_misp_event(
256
+ str(internal_id),
257
+ entity_ids=body.entity_ids or None,
258
+ )
259
+ json_str = misp_event_to_json(event)
260
+ filename = f"voidaccess_{investigation_id}_misp.json"
261
+ return Response(
262
+ content=json_str,
263
+ media_type="application/json",
264
+ headers={"Content-Disposition": f'attachment; filename="{filename}"'},
265
+ )
266
+ except HTTPException:
267
+ raise
268
+ except Exception as exc:
269
+ logger.warning("export_misp_selected failed: %s", exc)
270
+ raise HTTPException(status_code=500, detail="MISP export failed")
271
+
272
+
273
+ @router.post("/{investigation_id}/sigma/selected")
274
+ async def export_sigma_selected(
275
+ investigation_id: str,
276
+ body: ExportSelectedBody,
277
+ current_user: CurrentUser = Depends(get_current_user),
278
+ ) -> StreamingResponse:
279
+ """Sigma ZIP built from a subset of entities (or all if *entity_ids* is empty)."""
280
+ _check_investigation_owner(investigation_id, current_user)
281
+ _validate_uuid(investigation_id)
282
+ try:
283
+ from export.sigma import ( # noqa: PLC0415
284
+ entities_to_sigma_rules,
285
+ sigma_rule_to_yaml,
286
+ )
287
+ from export.stix import _load_entities_for_investigation # noqa: PLC0415
288
+
289
+ internal_id = _resolve_internal_investigation_id(investigation_id)
290
+ filter_ids = None
291
+ if body.entity_ids:
292
+ filter_ids = []
293
+ for raw in body.entity_ids:
294
+ try:
295
+ filter_ids.append(uuid.UUID(str(raw)))
296
+ except (ValueError, AttributeError):
297
+ continue
298
+ entities = _load_entities_for_investigation(
299
+ str(internal_id),
300
+ entity_ids=filter_ids,
301
+ )
302
+ rules = entities_to_sigma_rules(entities)
303
+
304
+ buf = io.BytesIO()
305
+ with zipfile.ZipFile(buf, mode="w", compression=zipfile.ZIP_DEFLATED) as zf:
306
+ for rule in rules:
307
+ rule_id = rule.get("id", str(uuid.uuid4()))
308
+ yaml_content = sigma_rule_to_yaml(rule)
309
+ zf.writestr(f"{rule_id}.yml", yaml_content)
310
+ buf.seek(0)
311
+
312
+ filename = f"voidaccess_{investigation_id}_sigma.zip"
313
+ return StreamingResponse(
314
+ buf,
315
+ media_type="application/zip",
316
+ headers={"Content-Disposition": f'attachment; filename="{filename}"'},
317
+ )
318
+ except HTTPException:
319
+ raise
320
+ except Exception as exc:
321
+ logger.warning("export_sigma_selected failed: %s", exc)
322
+ raise HTTPException(status_code=500, detail="Sigma export failed")
323
+
324
+
325
+ # ---------------------------------------------------------------------------
326
+ # Helpers
327
+ # ---------------------------------------------------------------------------
328
+
329
+
330
+ def _resolve_internal_investigation_id(investigation_id: str) -> uuid.UUID:
331
+ """Map URL *investigation_id* (primary key or ``run_id``) to internal investigation PK."""
332
+ if not os.getenv("DATABASE_URL"):
333
+ raise HTTPException(status_code=503, detail="Database not configured")
334
+ try:
335
+ uid = uuid.UUID(investigation_id)
336
+ except ValueError:
337
+ raise HTTPException(status_code=422, detail="Invalid investigation ID format")
338
+ try:
339
+ from db.session import get_session # noqa: PLC0415
340
+ from db.queries import get_investigation_by_id_or_run # noqa: PLC0415
341
+
342
+ with get_session() as session:
343
+ inv = get_investigation_by_id_or_run(session, uid)
344
+ if inv is None:
345
+ raise HTTPException(status_code=404, detail="Investigation not found")
346
+ return inv.id
347
+ except HTTPException:
348
+ raise
349
+ except Exception as exc:
350
+ logger.warning("_resolve_internal_investigation_id failed: %s", exc)
351
+ raise HTTPException(status_code=500, detail="Internal error")
352
+
353
+
354
+ def _validate_uuid(value: str) -> None:
355
+ """Raise HTTPException 422 if value is not a valid UUID string."""
356
+ try:
357
+ uuid.UUID(value)
358
+ except ValueError:
359
+ raise HTTPException(status_code=422, detail="Invalid investigation ID format")