voidaccess 1.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. analysis/__init__.py +49 -0
  2. analysis/opsec.py +454 -0
  3. analysis/patterns.py +202 -0
  4. analysis/temporal.py +201 -0
  5. api/__init__.py +1 -0
  6. api/auth.py +163 -0
  7. api/main.py +509 -0
  8. api/routes/__init__.py +1 -0
  9. api/routes/admin.py +214 -0
  10. api/routes/auth.py +157 -0
  11. api/routes/entities.py +871 -0
  12. api/routes/export.py +359 -0
  13. api/routes/investigations.py +2567 -0
  14. api/routes/monitors.py +405 -0
  15. api/routes/search.py +157 -0
  16. api/routes/settings.py +851 -0
  17. auth/__init__.py +1 -0
  18. auth/token_blacklist.py +108 -0
  19. cli/__init__.py +3 -0
  20. cli/adapters/__init__.py +1 -0
  21. cli/adapters/sqlite.py +273 -0
  22. cli/browser.py +376 -0
  23. cli/commands/__init__.py +1 -0
  24. cli/commands/configure.py +185 -0
  25. cli/commands/enrich.py +154 -0
  26. cli/commands/export.py +158 -0
  27. cli/commands/investigate.py +601 -0
  28. cli/commands/show.py +87 -0
  29. cli/config.py +180 -0
  30. cli/display.py +212 -0
  31. cli/main.py +154 -0
  32. cli/tor_detect.py +71 -0
  33. config.py +180 -0
  34. crawler/__init__.py +28 -0
  35. crawler/dedup.py +97 -0
  36. crawler/frontier.py +115 -0
  37. crawler/spider.py +462 -0
  38. crawler/utils.py +122 -0
  39. db/__init__.py +47 -0
  40. db/migrations/__init__.py +0 -0
  41. db/migrations/env.py +80 -0
  42. db/migrations/versions/0001_initial_schema.py +270 -0
  43. db/migrations/versions/0002_add_investigation_status_column.py +27 -0
  44. db/migrations/versions/0002_add_missing_tables.py +33 -0
  45. db/migrations/versions/0003_add_canonical_value_and_entity_links.py +61 -0
  46. db/migrations/versions/0004_add_page_posted_at.py +41 -0
  47. db/migrations/versions/0005_add_extraction_method.py +32 -0
  48. db/migrations/versions/0006_add_monitor_alerts.py +26 -0
  49. db/migrations/versions/0007_add_actor_style_profiles.py +23 -0
  50. db/migrations/versions/0008_add_users_table.py +47 -0
  51. db/migrations/versions/0009_add_investigation_id_to_relationships.py +29 -0
  52. db/migrations/versions/0010_add_composite_index_entity_relationships.py +22 -0
  53. db/migrations/versions/0011_add_page_extraction_cache.py +52 -0
  54. db/migrations/versions/0013_add_graph_status.py +31 -0
  55. db/migrations/versions/0015_add_progress_fields.py +41 -0
  56. db/migrations/versions/0016_backfill_graph_status.py +33 -0
  57. db/migrations/versions/0017_add_user_api_keys.py +44 -0
  58. db/migrations/versions/0018_add_user_id_to_investigations.py +33 -0
  59. db/migrations/versions/0019_add_content_safety_log.py +46 -0
  60. db/migrations/versions/0020_add_entity_source_tracking.py +50 -0
  61. db/models.py +618 -0
  62. db/queries.py +841 -0
  63. db/session.py +270 -0
  64. export/__init__.py +34 -0
  65. export/misp.py +257 -0
  66. export/sigma.py +342 -0
  67. export/stix.py +418 -0
  68. extractor/__init__.py +21 -0
  69. extractor/llm_extract.py +372 -0
  70. extractor/ner.py +512 -0
  71. extractor/normalizer.py +638 -0
  72. extractor/pipeline.py +401 -0
  73. extractor/regex_patterns.py +325 -0
  74. fingerprint/__init__.py +33 -0
  75. fingerprint/profiler.py +240 -0
  76. fingerprint/stylometry.py +249 -0
  77. graph/__init__.py +73 -0
  78. graph/builder.py +894 -0
  79. graph/export.py +225 -0
  80. graph/model.py +83 -0
  81. graph/queries.py +297 -0
  82. graph/visualize.py +178 -0
  83. i18n/__init__.py +24 -0
  84. i18n/detect.py +76 -0
  85. i18n/query_expand.py +72 -0
  86. i18n/translate.py +210 -0
  87. monitor/__init__.py +27 -0
  88. monitor/_db.py +74 -0
  89. monitor/alerts.py +345 -0
  90. monitor/config.py +118 -0
  91. monitor/diff.py +75 -0
  92. monitor/jobs.py +247 -0
  93. monitor/scheduler.py +184 -0
  94. scraper/__init__.py +0 -0
  95. scraper/scrape.py +857 -0
  96. scraper/scrape_js.py +272 -0
  97. search/__init__.py +318 -0
  98. search/circuit_breaker.py +240 -0
  99. search/search.py +334 -0
  100. sources/__init__.py +96 -0
  101. sources/blockchain.py +444 -0
  102. sources/cache.py +93 -0
  103. sources/cisa.py +108 -0
  104. sources/dns_enrichment.py +557 -0
  105. sources/domain_reputation.py +643 -0
  106. sources/email_reputation.py +635 -0
  107. sources/engines.py +244 -0
  108. sources/enrichment.py +1244 -0
  109. sources/github_scraper.py +589 -0
  110. sources/gitlab_scraper.py +624 -0
  111. sources/hash_reputation.py +856 -0
  112. sources/historical_intel.py +253 -0
  113. sources/ip_reputation.py +521 -0
  114. sources/paste_scraper.py +484 -0
  115. sources/pastes.py +278 -0
  116. sources/rss_scraper.py +576 -0
  117. sources/seed_manager.py +373 -0
  118. sources/seeds.py +368 -0
  119. sources/shodan.py +103 -0
  120. sources/telegram.py +199 -0
  121. sources/virustotal.py +113 -0
  122. utils/__init__.py +0 -0
  123. utils/async_utils.py +89 -0
  124. utils/content_safety.py +193 -0
  125. utils/defang.py +94 -0
  126. utils/encryption.py +34 -0
  127. utils/ioc_freshness.py +124 -0
  128. utils/user_keys.py +33 -0
  129. vector/__init__.py +39 -0
  130. vector/embedder.py +100 -0
  131. vector/model_singleton.py +49 -0
  132. vector/search.py +87 -0
  133. vector/store.py +514 -0
  134. voidaccess/__init__.py +0 -0
  135. voidaccess/llm.py +717 -0
  136. voidaccess/llm_utils.py +696 -0
  137. voidaccess-1.3.0.dist-info/METADATA +395 -0
  138. voidaccess-1.3.0.dist-info/RECORD +142 -0
  139. voidaccess-1.3.0.dist-info/WHEEL +5 -0
  140. voidaccess-1.3.0.dist-info/entry_points.txt +2 -0
  141. voidaccess-1.3.0.dist-info/licenses/LICENSE +21 -0
  142. voidaccess-1.3.0.dist-info/top_level.txt +19 -0
export/sigma.py ADDED
@@ -0,0 +1,342 @@
1
+ """
2
+ export/sigma.py — Generates draft Sigma detection rules from investigation entities.
3
+
4
+ Sigma rules are YAML-formatted SIEM-agnostic detection rules.
5
+ LLM assistance is optional; if provided, enriches description, tags, and falsepositives.
6
+
7
+ Public interface
8
+ ----------------
9
+ entities_to_sigma_rules(entities, llm) → list[dict]
10
+ sigma_rule_to_yaml(rule) → str
11
+ export_sigma_rules(investigation_id, output_dir, llm) → list[str]
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import json
17
+ import logging
18
+ import os
19
+ import uuid as _uuid_module
20
+ from pathlib import Path
21
+ from typing import Any, Optional
22
+
23
+ import yaml
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+ # ---------------------------------------------------------------------------
28
+ # Entity types that produce Sigma rules
29
+ # ---------------------------------------------------------------------------
30
+
31
+ _SIGMA_ENTITY_TYPES = frozenset({"IP_ADDRESS", "ONION_URL", "CVE_NUMBER", "MALWARE_FAMILY", "RANSOMWARE_GROUP"})
32
+
33
+ # ---------------------------------------------------------------------------
34
+ # Base rule builders per entity type
35
+ # ---------------------------------------------------------------------------
36
+
37
+
38
+ def _base_rule_for_ip(entity: Any) -> dict:
39
+ return {
40
+ "title": f"Network connection to suspicious IP: {entity.value}",
41
+ "id": str(_uuid_module.uuid4()),
42
+ "status": "experimental",
43
+ "description": f"Detects outbound network connection to IP address {entity.value} "
44
+ "associated with dark web activity.",
45
+ "references": [entity.source_url] if entity.source_url else [],
46
+ "tags": ["attack.initial_access"],
47
+ "logsource": {"category": "network", "product": "any"},
48
+ "detection": {
49
+ "selection": {"DestinationIp": entity.value},
50
+ "condition": "selection",
51
+ },
52
+ "falsepositives": ["Unknown"],
53
+ "level": "medium",
54
+ }
55
+
56
+
57
+ def _base_rule_for_onion(entity: Any) -> dict:
58
+ return {
59
+ "title": f"DNS query or connection to .onion address: {entity.value[:60]}",
60
+ "id": str(_uuid_module.uuid4()),
61
+ "status": "experimental",
62
+ "description": f"Detects connection attempt to Tor hidden service {entity.value}.",
63
+ "references": [entity.source_url] if entity.source_url else [],
64
+ "tags": ["attack.command_and_control"],
65
+ "logsource": {"category": "network", "product": "any"},
66
+ "detection": {
67
+ "selection": {"DestinationHostname|contains": ".onion"},
68
+ "condition": "selection",
69
+ },
70
+ "falsepositives": ["Legitimate Tor browser usage"],
71
+ "level": "medium",
72
+ }
73
+
74
+
75
+ def _base_rule_for_cve(entity: Any) -> dict:
76
+ return {
77
+ "title": f"Exploitation attempt for {entity.value}",
78
+ "id": str(_uuid_module.uuid4()),
79
+ "status": "experimental",
80
+ "description": f"Detects activity patterns related to exploitation of {entity.value} "
81
+ "observed in dark web intelligence.",
82
+ "references": [entity.source_url] if entity.source_url else [],
83
+ "tags": ["attack.initial_access", "attack.exploitation"],
84
+ "logsource": {"category": "network", "product": "any"},
85
+ "detection": {
86
+ "selection": {"CommandLine|contains": entity.value},
87
+ "condition": "selection",
88
+ },
89
+ "falsepositives": ["Security scanners", "Penetration testing tools"],
90
+ "level": "high",
91
+ }
92
+
93
+
94
+ def _base_rule_for_malware(entity: Any) -> dict:
95
+ name = entity.value
96
+ return {
97
+ "title": f"Malware family activity: {name}",
98
+ "id": str(_uuid_module.uuid4()),
99
+ "status": "experimental",
100
+ "description": f"Detects activity associated with {name} malware family "
101
+ "as observed in dark web intelligence.",
102
+ "references": [entity.source_url] if entity.source_url else [],
103
+ "tags": ["attack.execution"],
104
+ "logsource": {"category": "process_creation", "product": "windows"},
105
+ "detection": {
106
+ "selection": {"CommandLine|contains": name},
107
+ "condition": "selection",
108
+ },
109
+ "falsepositives": ["Unknown"],
110
+ "level": "high",
111
+ }
112
+
113
+
114
+ def _build_base_rule(entity: Any) -> Optional[dict]:
115
+ """Return a base Sigma rule dict for the entity, or None if unsupported type."""
116
+ etype = entity.entity_type
117
+ if etype == "IP_ADDRESS":
118
+ return _base_rule_for_ip(entity)
119
+ if etype == "ONION_URL":
120
+ return _base_rule_for_onion(entity)
121
+ if etype == "CVE_NUMBER":
122
+ return _base_rule_for_cve(entity)
123
+ if etype in ("MALWARE_FAMILY", "RANSOMWARE_GROUP"):
124
+ return _base_rule_for_malware(entity)
125
+ return None
126
+
127
+
128
+ # ---------------------------------------------------------------------------
129
+ # LLM enrichment
130
+ # ---------------------------------------------------------------------------
131
+
132
+ _LLM_PROMPT_TEMPLATE = """You are a threat intelligence analyst writing Sigma detection rules.
133
+ Given the following base Sigma rule as JSON, enrich three fields:
134
+ 1. "description" — make it more precise and actionable
135
+ 2. "tags" — use MITRE ATT&CK tactic/technique tags (e.g. attack.t1071)
136
+ 3. "falsepositives" — list realistic false positive scenarios
137
+
138
+ Return ONLY a JSON object with exactly these three keys: description, tags, falsepositives.
139
+ Do not include any other text.
140
+
141
+ Base rule:
142
+ {base_rule_json}
143
+ """
144
+
145
+
146
+ def _enrich_with_llm(rule: dict, llm: Any) -> dict:
147
+ """
148
+ Send base rule to LLM to enrich description, tags, and falsepositives.
149
+
150
+ Returns the original rule unchanged if LLM fails or returns invalid JSON.
151
+ """
152
+ try:
153
+ base_json = json.dumps(rule, indent=2)
154
+ prompt = _LLM_PROMPT_TEMPLATE.format(base_rule_json=base_json)
155
+
156
+ # Support both LangChain-style (invoke) and simple (predict/call) interfaces
157
+ if hasattr(llm, "invoke"):
158
+ response = llm.invoke(prompt)
159
+ # LangChain returns an AIMessage; get .content
160
+ content = getattr(response, "content", str(response))
161
+ elif callable(llm):
162
+ content = str(llm(prompt))
163
+ else:
164
+ return rule
165
+
166
+ # Strip markdown code fences if present
167
+ content = content.strip()
168
+ if content.startswith("```"):
169
+ lines = content.split("\n")
170
+ lines = [l for l in lines if not l.startswith("```")]
171
+ content = "\n".join(lines).strip()
172
+
173
+ enriched = json.loads(content)
174
+ if not isinstance(enriched, dict):
175
+ return rule
176
+
177
+ updated = dict(rule)
178
+ if "description" in enriched and isinstance(enriched["description"], str):
179
+ updated["description"] = enriched["description"]
180
+ if "tags" in enriched and isinstance(enriched["tags"], list):
181
+ updated["tags"] = enriched["tags"]
182
+ if "falsepositives" in enriched and isinstance(enriched["falsepositives"], list):
183
+ updated["falsepositives"] = enriched["falsepositives"]
184
+ return updated
185
+
186
+ except Exception as exc:
187
+ logger.warning("LLM enrichment failed for Sigma rule %r: %s", rule.get("id"), exc)
188
+ return rule
189
+
190
+
191
+ # ---------------------------------------------------------------------------
192
+ # Public interface
193
+ # ---------------------------------------------------------------------------
194
+
195
+
196
+ def entities_to_sigma_rules(
197
+ entities: list[Any],
198
+ llm: Optional[Any] = None,
199
+ ) -> list[dict]:
200
+ """
201
+ Generate Sigma rule dicts for relevant entities.
202
+
203
+ Entity types that produce rules: IP_ADDRESS, ONION_URL, CVE_NUMBER,
204
+ MALWARE_FAMILY, RANSOMWARE_GROUP.
205
+
206
+ If llm is provided, enriches description, tags, and falsepositives via LLM.
207
+ Falls back to base rule if LLM fails.
208
+ """
209
+ rules: list[dict] = []
210
+ for entity in entities:
211
+ if entity.entity_type not in _SIGMA_ENTITY_TYPES:
212
+ continue
213
+ base = _build_base_rule(entity)
214
+ if base is None:
215
+ continue
216
+ if llm is not None:
217
+ base = _enrich_with_llm(base, llm)
218
+ rules.append(base)
219
+ return rules
220
+
221
+
222
+ def sigma_rule_to_yaml(rule: dict) -> str:
223
+ """Convert a Sigma rule dict to a valid YAML string."""
224
+ try:
225
+ return yaml.dump(rule, default_flow_style=False, allow_unicode=True, sort_keys=False)
226
+ except Exception as exc:
227
+ logger.warning("sigma_rule_to_yaml failed: %s", exc)
228
+ return ""
229
+
230
+
231
+ def export_sigma_rules(
232
+ investigation_id: Any,
233
+ output_dir: str,
234
+ llm: Optional[Any] = None,
235
+ ) -> list[str]:
236
+ """
237
+ Load entities for an investigation, generate Sigma rules, and write each to
238
+ {output_dir}/{uuid}.yml.
239
+
240
+ Returns list of file paths written. Creates output_dir if it doesn't exist.
241
+ Returns [] if investigation not found or DATABASE_URL not set.
242
+ """
243
+ entities = _load_entities_for_investigation(investigation_id)
244
+ if not entities:
245
+ return []
246
+
247
+ rules = entities_to_sigma_rules(entities, llm=llm)
248
+ if not rules:
249
+ return []
250
+
251
+ out_path = Path(output_dir)
252
+ out_path.mkdir(parents=True, exist_ok=True)
253
+
254
+ written: list[str] = []
255
+ for rule in rules:
256
+ rule_id = rule.get("id") or str(_uuid_module.uuid4())
257
+ filename = out_path / f"{rule_id}.yml"
258
+ try:
259
+ yaml_content = sigma_rule_to_yaml(rule)
260
+ filename.write_text(yaml_content, encoding="utf-8")
261
+ written.append(str(filename))
262
+ except Exception as exc:
263
+ logger.warning("Failed to write Sigma rule %r: %s", rule_id, exc)
264
+
265
+ return written
266
+
267
+
268
+ # ---------------------------------------------------------------------------
269
+ # Internal DB helper
270
+ # ---------------------------------------------------------------------------
271
+
272
+
273
+ def _load_entities_for_investigation(investigation_id: Any) -> list[Any]:
274
+ """Load NormalizedEntity list from DB for this investigation.
275
+
276
+ Includes entities owned directly AND entities linked via InvestigationEntityLink.
277
+ Returns [] on error.
278
+ """
279
+ if not os.getenv("DATABASE_URL"):
280
+ return []
281
+
282
+ try:
283
+ from db.session import get_session # noqa: PLC0415
284
+ from db.queries import get_investigation_by_id_or_run # noqa: PLC0415
285
+ from db.models import Entity, InvestigationEntityLink # noqa: PLC0415
286
+ from extractor.normalizer import NormalizedEntity # noqa: PLC0415
287
+
288
+ inv_uuid = _coerce_uuid(investigation_id)
289
+ if inv_uuid is None:
290
+ return []
291
+
292
+ with get_session() as session:
293
+ inv = get_investigation_by_id_or_run(session, inv_uuid)
294
+ if inv is None:
295
+ return []
296
+
297
+ linked_ids_subq = (
298
+ session.query(InvestigationEntityLink.entity_id)
299
+ .filter(InvestigationEntityLink.investigation_id == inv.id)
300
+ .subquery()
301
+ )
302
+ db_entities = (
303
+ session.query(Entity)
304
+ .filter(
305
+ (Entity.investigation_id == inv.id)
306
+ | Entity.id.in_(linked_ids_subq)
307
+ )
308
+ .all()
309
+ )
310
+
311
+ result: list[NormalizedEntity] = []
312
+ for e in db_entities:
313
+ source_url = ""
314
+ try:
315
+ if e.page:
316
+ source_url = e.page.url or ""
317
+ except Exception:
318
+ pass
319
+ result.append(NormalizedEntity(
320
+ entity_type=e.entity_type,
321
+ value=e.canonical_value or e.value,
322
+ confidence=e.confidence,
323
+ source_url=source_url,
324
+ page_id=e.page_id,
325
+ context_snippet=e.context_snippet or "",
326
+ ))
327
+ return result
328
+
329
+ except Exception as exc:
330
+ logger.warning("sigma _load_entities_for_investigation failed: %s", exc)
331
+ return []
332
+
333
+
334
+ def _coerce_uuid(value: Any):
335
+ """Coerce value to uuid.UUID. Returns None on failure."""
336
+ import uuid as _uuid
337
+ if isinstance(value, _uuid.UUID):
338
+ return value
339
+ try:
340
+ return _uuid.UUID(str(value))
341
+ except (ValueError, AttributeError):
342
+ return None