voidaccess 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- analysis/__init__.py +49 -0
- analysis/opsec.py +454 -0
- analysis/patterns.py +202 -0
- analysis/temporal.py +201 -0
- api/__init__.py +1 -0
- api/auth.py +163 -0
- api/main.py +509 -0
- api/routes/__init__.py +1 -0
- api/routes/admin.py +214 -0
- api/routes/auth.py +157 -0
- api/routes/entities.py +871 -0
- api/routes/export.py +359 -0
- api/routes/investigations.py +2567 -0
- api/routes/monitors.py +405 -0
- api/routes/search.py +157 -0
- api/routes/settings.py +851 -0
- auth/__init__.py +1 -0
- auth/token_blacklist.py +108 -0
- cli/__init__.py +3 -0
- cli/adapters/__init__.py +1 -0
- cli/adapters/sqlite.py +273 -0
- cli/browser.py +376 -0
- cli/commands/__init__.py +1 -0
- cli/commands/configure.py +185 -0
- cli/commands/enrich.py +154 -0
- cli/commands/export.py +158 -0
- cli/commands/investigate.py +601 -0
- cli/commands/show.py +87 -0
- cli/config.py +180 -0
- cli/display.py +212 -0
- cli/main.py +154 -0
- cli/tor_detect.py +71 -0
- config.py +180 -0
- crawler/__init__.py +28 -0
- crawler/dedup.py +97 -0
- crawler/frontier.py +115 -0
- crawler/spider.py +462 -0
- crawler/utils.py +122 -0
- db/__init__.py +47 -0
- db/migrations/__init__.py +0 -0
- db/migrations/env.py +80 -0
- db/migrations/versions/0001_initial_schema.py +270 -0
- db/migrations/versions/0002_add_investigation_status_column.py +27 -0
- db/migrations/versions/0002_add_missing_tables.py +33 -0
- db/migrations/versions/0003_add_canonical_value_and_entity_links.py +61 -0
- db/migrations/versions/0004_add_page_posted_at.py +41 -0
- db/migrations/versions/0005_add_extraction_method.py +32 -0
- db/migrations/versions/0006_add_monitor_alerts.py +26 -0
- db/migrations/versions/0007_add_actor_style_profiles.py +23 -0
- db/migrations/versions/0008_add_users_table.py +47 -0
- db/migrations/versions/0009_add_investigation_id_to_relationships.py +29 -0
- db/migrations/versions/0010_add_composite_index_entity_relationships.py +22 -0
- db/migrations/versions/0011_add_page_extraction_cache.py +52 -0
- db/migrations/versions/0013_add_graph_status.py +31 -0
- db/migrations/versions/0015_add_progress_fields.py +41 -0
- db/migrations/versions/0016_backfill_graph_status.py +33 -0
- db/migrations/versions/0017_add_user_api_keys.py +44 -0
- db/migrations/versions/0018_add_user_id_to_investigations.py +33 -0
- db/migrations/versions/0019_add_content_safety_log.py +46 -0
- db/migrations/versions/0020_add_entity_source_tracking.py +50 -0
- db/models.py +618 -0
- db/queries.py +841 -0
- db/session.py +270 -0
- export/__init__.py +34 -0
- export/misp.py +257 -0
- export/sigma.py +342 -0
- export/stix.py +418 -0
- extractor/__init__.py +21 -0
- extractor/llm_extract.py +372 -0
- extractor/ner.py +512 -0
- extractor/normalizer.py +638 -0
- extractor/pipeline.py +401 -0
- extractor/regex_patterns.py +325 -0
- fingerprint/__init__.py +33 -0
- fingerprint/profiler.py +240 -0
- fingerprint/stylometry.py +249 -0
- graph/__init__.py +73 -0
- graph/builder.py +894 -0
- graph/export.py +225 -0
- graph/model.py +83 -0
- graph/queries.py +297 -0
- graph/visualize.py +178 -0
- i18n/__init__.py +24 -0
- i18n/detect.py +76 -0
- i18n/query_expand.py +72 -0
- i18n/translate.py +210 -0
- monitor/__init__.py +27 -0
- monitor/_db.py +74 -0
- monitor/alerts.py +345 -0
- monitor/config.py +118 -0
- monitor/diff.py +75 -0
- monitor/jobs.py +247 -0
- monitor/scheduler.py +184 -0
- scraper/__init__.py +0 -0
- scraper/scrape.py +857 -0
- scraper/scrape_js.py +272 -0
- search/__init__.py +318 -0
- search/circuit_breaker.py +240 -0
- search/search.py +334 -0
- sources/__init__.py +96 -0
- sources/blockchain.py +444 -0
- sources/cache.py +93 -0
- sources/cisa.py +108 -0
- sources/dns_enrichment.py +557 -0
- sources/domain_reputation.py +643 -0
- sources/email_reputation.py +635 -0
- sources/engines.py +244 -0
- sources/enrichment.py +1244 -0
- sources/github_scraper.py +589 -0
- sources/gitlab_scraper.py +624 -0
- sources/hash_reputation.py +856 -0
- sources/historical_intel.py +253 -0
- sources/ip_reputation.py +521 -0
- sources/paste_scraper.py +484 -0
- sources/pastes.py +278 -0
- sources/rss_scraper.py +576 -0
- sources/seed_manager.py +373 -0
- sources/seeds.py +368 -0
- sources/shodan.py +103 -0
- sources/telegram.py +199 -0
- sources/virustotal.py +113 -0
- utils/__init__.py +0 -0
- utils/async_utils.py +89 -0
- utils/content_safety.py +193 -0
- utils/defang.py +94 -0
- utils/encryption.py +34 -0
- utils/ioc_freshness.py +124 -0
- utils/user_keys.py +33 -0
- vector/__init__.py +39 -0
- vector/embedder.py +100 -0
- vector/model_singleton.py +49 -0
- vector/search.py +87 -0
- vector/store.py +514 -0
- voidaccess/__init__.py +0 -0
- voidaccess/llm.py +717 -0
- voidaccess/llm_utils.py +696 -0
- voidaccess-1.3.0.dist-info/METADATA +395 -0
- voidaccess-1.3.0.dist-info/RECORD +142 -0
- voidaccess-1.3.0.dist-info/WHEEL +5 -0
- voidaccess-1.3.0.dist-info/entry_points.txt +2 -0
- voidaccess-1.3.0.dist-info/licenses/LICENSE +21 -0
- voidaccess-1.3.0.dist-info/top_level.txt +19 -0
cli/commands/enrich.py
ADDED
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
"""
|
|
2
|
+
cli/commands/enrich.py — re-run enrichment over a stored investigation.
|
|
3
|
+
|
|
4
|
+
Useful after weeks/months: IP feeds shift, emails appear in new breaches,
|
|
5
|
+
domains move. This refreshes:
|
|
6
|
+
- IP reputation (Feodo, AbuseIPDB, GreyNoise)
|
|
7
|
+
- Domain reputation (URLScan, SecurityTrails — when keys present)
|
|
8
|
+
- Hash reputation (VirusTotal, Hybrid Analysis)
|
|
9
|
+
- Email reputation (HIBP, EmailRep)
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import asyncio
|
|
15
|
+
import uuid
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from typing import Optional
|
|
18
|
+
|
|
19
|
+
import typer
|
|
20
|
+
from rich.console import Console
|
|
21
|
+
|
|
22
|
+
console = Console()
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def run(
|
|
26
|
+
target: str = typer.Argument(..., help="Investigation id or .json file"),
|
|
27
|
+
skip_ips: bool = typer.Option(False, "--skip-ips"),
|
|
28
|
+
skip_domains: bool = typer.Option(False, "--skip-domains"),
|
|
29
|
+
skip_hashes: bool = typer.Option(False, "--skip-hashes"),
|
|
30
|
+
skip_emails: bool = typer.Option(False, "--skip-emails"),
|
|
31
|
+
) -> None:
|
|
32
|
+
"""Re-enrich entities for an existing investigation."""
|
|
33
|
+
from cli import config as cli_config
|
|
34
|
+
cli_config.apply_env()
|
|
35
|
+
|
|
36
|
+
inv_id = _resolve_investigation_id(target)
|
|
37
|
+
if inv_id is None:
|
|
38
|
+
console.print(f"[red]Cannot resolve investigation:[/red] {target}")
|
|
39
|
+
raise typer.Exit(code=1)
|
|
40
|
+
|
|
41
|
+
asyncio.run(_run(inv_id, skip_ips, skip_domains, skip_hashes, skip_emails))
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _resolve_investigation_id(target: str) -> Optional[str]:
|
|
45
|
+
import json as _json
|
|
46
|
+
p = Path(target).expanduser()
|
|
47
|
+
if p.exists() and p.suffix == ".json":
|
|
48
|
+
try:
|
|
49
|
+
data = _json.loads(p.read_text(encoding="utf-8"))
|
|
50
|
+
except Exception:
|
|
51
|
+
return None
|
|
52
|
+
return data.get("investigation", {}).get("id") or data.get("id")
|
|
53
|
+
from cli.adapters import sqlite as sqlite_adapter
|
|
54
|
+
sqlite_adapter.init_db()
|
|
55
|
+
resolved = sqlite_adapter.resolve_investigation_id(target) or target
|
|
56
|
+
row = sqlite_adapter.get_investigation(resolved)
|
|
57
|
+
return row["id"] if row else None
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class _FakeEntity:
|
|
61
|
+
"""Minimal stand-in shaped like extractor.normalizer.NormalizedEntity."""
|
|
62
|
+
__slots__ = ("entity_type", "value", "confidence", "canonical_value")
|
|
63
|
+
|
|
64
|
+
def __init__(self, entity_type: str, value: str, confidence: float,
|
|
65
|
+
canonical_value: str | None = None):
|
|
66
|
+
self.entity_type = entity_type
|
|
67
|
+
self.value = value
|
|
68
|
+
self.confidence = confidence
|
|
69
|
+
self.canonical_value = canonical_value or value
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class _FakeResult:
|
|
73
|
+
"""Shape: ExtractionResult — only .entities is used by reputation enrichers."""
|
|
74
|
+
__slots__ = ("entities",)
|
|
75
|
+
|
|
76
|
+
def __init__(self, entities: list):
|
|
77
|
+
self.entities = entities
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
_TYPE_MAP = {
|
|
81
|
+
"ip_address": "IP_ADDRESS",
|
|
82
|
+
"domain": "DOMAIN",
|
|
83
|
+
"email": "EMAIL_ADDRESS",
|
|
84
|
+
"file_hash_md5": "FILE_HASH_MD5",
|
|
85
|
+
"file_hash_sha1": "FILE_HASH_SHA1",
|
|
86
|
+
"file_hash_sha256": "FILE_HASH_SHA256",
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _load_extraction_results(investigation_id: str) -> list:
|
|
91
|
+
"""Reconstruct ExtractionResult-shaped objects from the DB."""
|
|
92
|
+
from cli.adapters import sqlite as sqlite_adapter
|
|
93
|
+
rows = sqlite_adapter.get_entities(investigation_id)
|
|
94
|
+
fakes = []
|
|
95
|
+
for r in rows:
|
|
96
|
+
canonical = _TYPE_MAP.get(r["entity_type"], r["entity_type"].upper())
|
|
97
|
+
fakes.append(
|
|
98
|
+
_FakeEntity(
|
|
99
|
+
entity_type=canonical,
|
|
100
|
+
value=r["value"],
|
|
101
|
+
confidence=r.get("confidence") or 1.0,
|
|
102
|
+
canonical_value=r.get("canonical_value"),
|
|
103
|
+
)
|
|
104
|
+
)
|
|
105
|
+
return [_FakeResult(fakes)]
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
async def _run(
|
|
109
|
+
investigation_id: str,
|
|
110
|
+
skip_ips: bool,
|
|
111
|
+
skip_domains: bool,
|
|
112
|
+
skip_hashes: bool,
|
|
113
|
+
skip_emails: bool,
|
|
114
|
+
) -> None:
|
|
115
|
+
inv_uuid = uuid.UUID(investigation_id)
|
|
116
|
+
extraction_results = _load_extraction_results(investigation_id)
|
|
117
|
+
|
|
118
|
+
if not skip_ips:
|
|
119
|
+
try:
|
|
120
|
+
from sources.ip_reputation import enrich_ip_entities
|
|
121
|
+
console.print("• IP reputation…")
|
|
122
|
+
await enrich_ip_entities(extraction_results, inv_uuid)
|
|
123
|
+
console.print(" [green]done[/green]")
|
|
124
|
+
except Exception as exc:
|
|
125
|
+
console.print(f" [red]failed:[/red] {exc}")
|
|
126
|
+
|
|
127
|
+
if not skip_domains:
|
|
128
|
+
try:
|
|
129
|
+
from sources.domain_reputation import enrich_domain_entities
|
|
130
|
+
console.print("• Domain reputation…")
|
|
131
|
+
await enrich_domain_entities(extraction_results, inv_uuid)
|
|
132
|
+
console.print(" [green]done[/green]")
|
|
133
|
+
except Exception as exc:
|
|
134
|
+
console.print(f" [red]failed:[/red] {exc}")
|
|
135
|
+
|
|
136
|
+
if not skip_hashes:
|
|
137
|
+
try:
|
|
138
|
+
from sources.hash_reputation import enrich_hash_entities
|
|
139
|
+
console.print("• Hash reputation…")
|
|
140
|
+
await enrich_hash_entities(extraction_results, inv_uuid)
|
|
141
|
+
console.print(" [green]done[/green]")
|
|
142
|
+
except Exception as exc:
|
|
143
|
+
console.print(f" [red]failed:[/red] {exc}")
|
|
144
|
+
|
|
145
|
+
if not skip_emails:
|
|
146
|
+
try:
|
|
147
|
+
from sources.email_reputation import enrich_email_entities
|
|
148
|
+
console.print("• Email reputation…")
|
|
149
|
+
await enrich_email_entities(extraction_results, inv_uuid)
|
|
150
|
+
console.print(" [green]done[/green]")
|
|
151
|
+
except Exception as exc:
|
|
152
|
+
console.print(f" [red]failed:[/red] {exc}")
|
|
153
|
+
|
|
154
|
+
console.print("\n[green]Re-enrichment complete.[/green]")
|
cli/commands/export.py
ADDED
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
"""
|
|
2
|
+
cli/commands/export.py — convert a saved investigation to a sharable format.
|
|
3
|
+
|
|
4
|
+
voidaccess export <id_or_json_file> --format stix|misp|sigma|csv|md|json
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import csv
|
|
10
|
+
import io
|
|
11
|
+
import json
|
|
12
|
+
import uuid
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Optional
|
|
15
|
+
|
|
16
|
+
import typer
|
|
17
|
+
from rich.console import Console
|
|
18
|
+
|
|
19
|
+
console = Console()
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def run(
|
|
23
|
+
target: str = typer.Argument(..., help="Investigation id or .json file"),
|
|
24
|
+
fmt: str = typer.Option("json", "--format", help="stix|misp|sigma|csv|md|json"),
|
|
25
|
+
output: Optional[Path] = typer.Option(None, "--output", help="Output file"),
|
|
26
|
+
) -> None:
|
|
27
|
+
"""Export an investigation."""
|
|
28
|
+
from cli import config as cli_config
|
|
29
|
+
cli_config.apply_env()
|
|
30
|
+
|
|
31
|
+
fmt = fmt.lower()
|
|
32
|
+
if fmt not in ("stix", "misp", "sigma", "csv", "md", "json"):
|
|
33
|
+
console.print(f"[red]Unsupported format:[/red] {fmt}")
|
|
34
|
+
raise typer.Exit(code=2)
|
|
35
|
+
|
|
36
|
+
inv_id, data = _load_target(target)
|
|
37
|
+
if not data:
|
|
38
|
+
console.print(f"[red]Could not load investigation:[/red] {target}")
|
|
39
|
+
raise typer.Exit(code=1)
|
|
40
|
+
|
|
41
|
+
payload, suffix = _render(fmt, inv_id, data)
|
|
42
|
+
out_path = output or _default_out_path(target, suffix)
|
|
43
|
+
out_path = Path(out_path).expanduser()
|
|
44
|
+
out_path.parent.mkdir(parents=True, exist_ok=True)
|
|
45
|
+
if isinstance(payload, bytes):
|
|
46
|
+
out_path.write_bytes(payload)
|
|
47
|
+
else:
|
|
48
|
+
out_path.write_text(payload, encoding="utf-8")
|
|
49
|
+
console.print(f"[green]Wrote[/green] {out_path}")
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _load_target(target: str) -> tuple[Optional[str], Optional[dict]]:
|
|
53
|
+
p = Path(target).expanduser()
|
|
54
|
+
if p.exists() and p.suffix == ".json":
|
|
55
|
+
try:
|
|
56
|
+
data = json.loads(p.read_text(encoding="utf-8"))
|
|
57
|
+
except Exception:
|
|
58
|
+
return None, None
|
|
59
|
+
inv_id = data.get("investigation", {}).get("id") or data.get("id")
|
|
60
|
+
return inv_id, data
|
|
61
|
+
from cli.adapters import sqlite as sqlite_adapter
|
|
62
|
+
sqlite_adapter.init_db()
|
|
63
|
+
resolved = sqlite_adapter.resolve_investigation_id(target) or target
|
|
64
|
+
data = sqlite_adapter.investigation_to_export_dict(resolved)
|
|
65
|
+
if not data or not data.get("investigation"):
|
|
66
|
+
return None, None
|
|
67
|
+
return resolved, data
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _render(fmt: str, inv_id: Optional[str], data: dict) -> tuple[str | bytes, str]:
|
|
71
|
+
if fmt == "json":
|
|
72
|
+
return json.dumps(data, indent=2, default=str), ".json"
|
|
73
|
+
|
|
74
|
+
if fmt == "csv":
|
|
75
|
+
return _csv_from_data(data), ".csv"
|
|
76
|
+
|
|
77
|
+
if fmt == "md":
|
|
78
|
+
from cli.commands.investigate import _render_markdown # reuse renderer
|
|
79
|
+
# Adapt shape: _render_markdown expects flat payload
|
|
80
|
+
flat = _flatten_for_md(data)
|
|
81
|
+
return _render_markdown(flat), ".md"
|
|
82
|
+
|
|
83
|
+
# STIX/MISP/Sigma need investigation_id (UUID) and load from DB
|
|
84
|
+
if inv_id is None:
|
|
85
|
+
raise typer.BadParameter(
|
|
86
|
+
"STIX, MISP, and Sigma export require an investigation id in the database "
|
|
87
|
+
"(not a bare JSON file)."
|
|
88
|
+
)
|
|
89
|
+
try:
|
|
90
|
+
inv_uuid = uuid.UUID(inv_id)
|
|
91
|
+
except (ValueError, TypeError) as exc:
|
|
92
|
+
raise typer.BadParameter(f"Invalid investigation id: {inv_id} ({exc})") from exc
|
|
93
|
+
|
|
94
|
+
if fmt == "stix":
|
|
95
|
+
from export import investigation_to_stix_bundle, bundle_to_json
|
|
96
|
+
bundle = investigation_to_stix_bundle(inv_uuid)
|
|
97
|
+
return bundle_to_json(bundle), ".json"
|
|
98
|
+
|
|
99
|
+
if fmt == "misp":
|
|
100
|
+
from export import investigation_to_misp_event, misp_event_to_json
|
|
101
|
+
event = investigation_to_misp_event(inv_uuid)
|
|
102
|
+
return misp_event_to_json(event), ".json"
|
|
103
|
+
|
|
104
|
+
if fmt == "sigma":
|
|
105
|
+
from export import export_sigma_rules
|
|
106
|
+
rules_yaml = export_sigma_rules(inv_uuid)
|
|
107
|
+
return rules_yaml if isinstance(rules_yaml, str) else "\n---\n".join(rules_yaml), ".yml"
|
|
108
|
+
|
|
109
|
+
raise typer.BadParameter(f"Unknown format: {fmt}")
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def _csv_from_data(data: dict) -> str:
|
|
113
|
+
entities = data.get("entities", [])
|
|
114
|
+
if not entities and isinstance(data.get("investigation"), dict):
|
|
115
|
+
entities = data.get("entities", [])
|
|
116
|
+
buf = io.StringIO()
|
|
117
|
+
writer = csv.writer(buf)
|
|
118
|
+
writer.writerow(
|
|
119
|
+
["entity_type", "value", "canonical_value", "confidence",
|
|
120
|
+
"extraction_method", "corroborating_sources", "context_snippet"]
|
|
121
|
+
)
|
|
122
|
+
for e in entities:
|
|
123
|
+
writer.writerow(
|
|
124
|
+
[
|
|
125
|
+
e.get("entity_type", ""),
|
|
126
|
+
e.get("value", ""),
|
|
127
|
+
e.get("canonical_value", ""),
|
|
128
|
+
e.get("confidence", ""),
|
|
129
|
+
e.get("extraction_method", ""),
|
|
130
|
+
e.get("corroborating_sources", ""),
|
|
131
|
+
(e.get("context_snippet") or "").replace("\n", " ")[:500],
|
|
132
|
+
]
|
|
133
|
+
)
|
|
134
|
+
return buf.getvalue()
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def _flatten_for_md(data: dict) -> dict:
|
|
138
|
+
if "investigation" in data:
|
|
139
|
+
inv = data["investigation"]
|
|
140
|
+
return {
|
|
141
|
+
"query": inv.get("query", ""),
|
|
142
|
+
"refined_query": inv.get("refined_query"),
|
|
143
|
+
"model_used": inv.get("model_used"),
|
|
144
|
+
"created_at": inv.get("created_at", ""),
|
|
145
|
+
"summary": inv.get("summary"),
|
|
146
|
+
"entities": data.get("entities", []),
|
|
147
|
+
"relationships": data.get("relationships", []),
|
|
148
|
+
"sources_used": data.get("sources_used", {}),
|
|
149
|
+
}
|
|
150
|
+
return data
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def _default_out_path(target: str, suffix: str) -> Path:
|
|
154
|
+
p = Path(target).expanduser()
|
|
155
|
+
if p.exists():
|
|
156
|
+
return p.with_suffix(suffix)
|
|
157
|
+
from cli import config as cli_config
|
|
158
|
+
return cli_config.get_output_dir() / f"{target}{suffix}"
|