voidaccess 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- analysis/__init__.py +49 -0
- analysis/opsec.py +454 -0
- analysis/patterns.py +202 -0
- analysis/temporal.py +201 -0
- api/__init__.py +1 -0
- api/auth.py +163 -0
- api/main.py +509 -0
- api/routes/__init__.py +1 -0
- api/routes/admin.py +214 -0
- api/routes/auth.py +157 -0
- api/routes/entities.py +871 -0
- api/routes/export.py +359 -0
- api/routes/investigations.py +2567 -0
- api/routes/monitors.py +405 -0
- api/routes/search.py +157 -0
- api/routes/settings.py +851 -0
- auth/__init__.py +1 -0
- auth/token_blacklist.py +108 -0
- cli/__init__.py +3 -0
- cli/adapters/__init__.py +1 -0
- cli/adapters/sqlite.py +273 -0
- cli/browser.py +376 -0
- cli/commands/__init__.py +1 -0
- cli/commands/configure.py +185 -0
- cli/commands/enrich.py +154 -0
- cli/commands/export.py +158 -0
- cli/commands/investigate.py +601 -0
- cli/commands/show.py +87 -0
- cli/config.py +180 -0
- cli/display.py +212 -0
- cli/main.py +154 -0
- cli/tor_detect.py +71 -0
- config.py +180 -0
- crawler/__init__.py +28 -0
- crawler/dedup.py +97 -0
- crawler/frontier.py +115 -0
- crawler/spider.py +462 -0
- crawler/utils.py +122 -0
- db/__init__.py +47 -0
- db/migrations/__init__.py +0 -0
- db/migrations/env.py +80 -0
- db/migrations/versions/0001_initial_schema.py +270 -0
- db/migrations/versions/0002_add_investigation_status_column.py +27 -0
- db/migrations/versions/0002_add_missing_tables.py +33 -0
- db/migrations/versions/0003_add_canonical_value_and_entity_links.py +61 -0
- db/migrations/versions/0004_add_page_posted_at.py +41 -0
- db/migrations/versions/0005_add_extraction_method.py +32 -0
- db/migrations/versions/0006_add_monitor_alerts.py +26 -0
- db/migrations/versions/0007_add_actor_style_profiles.py +23 -0
- db/migrations/versions/0008_add_users_table.py +47 -0
- db/migrations/versions/0009_add_investigation_id_to_relationships.py +29 -0
- db/migrations/versions/0010_add_composite_index_entity_relationships.py +22 -0
- db/migrations/versions/0011_add_page_extraction_cache.py +52 -0
- db/migrations/versions/0013_add_graph_status.py +31 -0
- db/migrations/versions/0015_add_progress_fields.py +41 -0
- db/migrations/versions/0016_backfill_graph_status.py +33 -0
- db/migrations/versions/0017_add_user_api_keys.py +44 -0
- db/migrations/versions/0018_add_user_id_to_investigations.py +33 -0
- db/migrations/versions/0019_add_content_safety_log.py +46 -0
- db/migrations/versions/0020_add_entity_source_tracking.py +50 -0
- db/models.py +618 -0
- db/queries.py +841 -0
- db/session.py +270 -0
- export/__init__.py +34 -0
- export/misp.py +257 -0
- export/sigma.py +342 -0
- export/stix.py +418 -0
- extractor/__init__.py +21 -0
- extractor/llm_extract.py +372 -0
- extractor/ner.py +512 -0
- extractor/normalizer.py +638 -0
- extractor/pipeline.py +401 -0
- extractor/regex_patterns.py +325 -0
- fingerprint/__init__.py +33 -0
- fingerprint/profiler.py +240 -0
- fingerprint/stylometry.py +249 -0
- graph/__init__.py +73 -0
- graph/builder.py +894 -0
- graph/export.py +225 -0
- graph/model.py +83 -0
- graph/queries.py +297 -0
- graph/visualize.py +178 -0
- i18n/__init__.py +24 -0
- i18n/detect.py +76 -0
- i18n/query_expand.py +72 -0
- i18n/translate.py +210 -0
- monitor/__init__.py +27 -0
- monitor/_db.py +74 -0
- monitor/alerts.py +345 -0
- monitor/config.py +118 -0
- monitor/diff.py +75 -0
- monitor/jobs.py +247 -0
- monitor/scheduler.py +184 -0
- scraper/__init__.py +0 -0
- scraper/scrape.py +857 -0
- scraper/scrape_js.py +272 -0
- search/__init__.py +318 -0
- search/circuit_breaker.py +240 -0
- search/search.py +334 -0
- sources/__init__.py +96 -0
- sources/blockchain.py +444 -0
- sources/cache.py +93 -0
- sources/cisa.py +108 -0
- sources/dns_enrichment.py +557 -0
- sources/domain_reputation.py +643 -0
- sources/email_reputation.py +635 -0
- sources/engines.py +244 -0
- sources/enrichment.py +1244 -0
- sources/github_scraper.py +589 -0
- sources/gitlab_scraper.py +624 -0
- sources/hash_reputation.py +856 -0
- sources/historical_intel.py +253 -0
- sources/ip_reputation.py +521 -0
- sources/paste_scraper.py +484 -0
- sources/pastes.py +278 -0
- sources/rss_scraper.py +576 -0
- sources/seed_manager.py +373 -0
- sources/seeds.py +368 -0
- sources/shodan.py +103 -0
- sources/telegram.py +199 -0
- sources/virustotal.py +113 -0
- utils/__init__.py +0 -0
- utils/async_utils.py +89 -0
- utils/content_safety.py +193 -0
- utils/defang.py +94 -0
- utils/encryption.py +34 -0
- utils/ioc_freshness.py +124 -0
- utils/user_keys.py +33 -0
- vector/__init__.py +39 -0
- vector/embedder.py +100 -0
- vector/model_singleton.py +49 -0
- vector/search.py +87 -0
- vector/store.py +514 -0
- voidaccess/__init__.py +0 -0
- voidaccess/llm.py +717 -0
- voidaccess/llm_utils.py +696 -0
- voidaccess-1.3.0.dist-info/METADATA +395 -0
- voidaccess-1.3.0.dist-info/RECORD +142 -0
- voidaccess-1.3.0.dist-info/WHEEL +5 -0
- voidaccess-1.3.0.dist-info/entry_points.txt +2 -0
- voidaccess-1.3.0.dist-info/licenses/LICENSE +21 -0
- voidaccess-1.3.0.dist-info/top_level.txt +19 -0
cli/config.py
ADDED
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
"""
|
|
2
|
+
cli/config.py — Persistent config for the voidaccess CLI.
|
|
3
|
+
|
|
4
|
+
Stores LLM provider/model, API keys, Tor proxy settings, and output dir
|
|
5
|
+
in ~/.voidaccess/config.json. Exposes helpers and an apply_env() function
|
|
6
|
+
that pushes the saved config into os.environ before any voidaccess module
|
|
7
|
+
is imported (the existing modules read API keys from env at import time).
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import json
|
|
13
|
+
import os
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import Any, Optional
|
|
16
|
+
|
|
17
|
+
CLI_HOME = Path(os.path.expanduser("~/.voidaccess"))
|
|
18
|
+
CONFIG_PATH = CLI_HOME / "config.json"
|
|
19
|
+
DB_PATH = CLI_HOME / "investigations.db"
|
|
20
|
+
DEFAULT_OUTPUT_DIR = CLI_HOME / "results"
|
|
21
|
+
|
|
22
|
+
ENRICHMENT_KEYS = [
|
|
23
|
+
"OTX_API_KEY",
|
|
24
|
+
"VT_API_KEY",
|
|
25
|
+
"ABUSEIPDB_API_KEY",
|
|
26
|
+
"GREYNOISE_API_KEY",
|
|
27
|
+
"URLSCAN_API_KEY",
|
|
28
|
+
"SECURITYTRAILS_API_KEY",
|
|
29
|
+
"GITHUB_TOKEN",
|
|
30
|
+
"GITLAB_TOKEN",
|
|
31
|
+
"HYBRID_ANALYSIS_API_KEY",
|
|
32
|
+
"HIBP_API_KEY",
|
|
33
|
+
"EMAILREP_API_KEY",
|
|
34
|
+
"SHODAN_API_KEY",
|
|
35
|
+
"BLOCKCYPHER_TOKEN",
|
|
36
|
+
"ETHERSCAN_API_KEY",
|
|
37
|
+
"DEEPL_API_KEY",
|
|
38
|
+
"DARKSEARCH_API_KEY",
|
|
39
|
+
]
|
|
40
|
+
|
|
41
|
+
PROVIDER_ENV = {
|
|
42
|
+
"openrouter": "OPENROUTER_API_KEY",
|
|
43
|
+
"groq": "GROQ_API_KEY",
|
|
44
|
+
"google": "GOOGLE_API_KEY",
|
|
45
|
+
"openai": "OPENAI_API_KEY",
|
|
46
|
+
"anthropic": "ANTHROPIC_API_KEY",
|
|
47
|
+
"ollama": None,
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
DEFAULT_CONFIG: dict[str, Any] = {
|
|
51
|
+
"llm": {
|
|
52
|
+
"provider": "openrouter",
|
|
53
|
+
"model": "openrouter/deepseek/deepseek-chat",
|
|
54
|
+
"api_key": "",
|
|
55
|
+
},
|
|
56
|
+
"enrichment_keys": {k: "" for k in ENRICHMENT_KEYS},
|
|
57
|
+
"tor": {
|
|
58
|
+
"host": "127.0.0.1",
|
|
59
|
+
"port": 9050,
|
|
60
|
+
},
|
|
61
|
+
"output_dir": str(DEFAULT_OUTPUT_DIR),
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _ensure_home() -> None:
|
|
66
|
+
CLI_HOME.mkdir(parents=True, exist_ok=True)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def load_config() -> dict[str, Any]:
|
|
70
|
+
"""Return saved config or DEFAULT_CONFIG if none exists."""
|
|
71
|
+
_ensure_home()
|
|
72
|
+
if not CONFIG_PATH.exists():
|
|
73
|
+
return json.loads(json.dumps(DEFAULT_CONFIG))
|
|
74
|
+
try:
|
|
75
|
+
cfg = json.loads(CONFIG_PATH.read_text(encoding="utf-8"))
|
|
76
|
+
except Exception:
|
|
77
|
+
return json.loads(json.dumps(DEFAULT_CONFIG))
|
|
78
|
+
# Merge with defaults so missing keys don't crash
|
|
79
|
+
merged = json.loads(json.dumps(DEFAULT_CONFIG))
|
|
80
|
+
merged["llm"].update(cfg.get("llm", {}))
|
|
81
|
+
merged["tor"].update(cfg.get("tor", {}))
|
|
82
|
+
merged["enrichment_keys"].update(cfg.get("enrichment_keys", {}))
|
|
83
|
+
if cfg.get("output_dir"):
|
|
84
|
+
merged["output_dir"] = cfg["output_dir"]
|
|
85
|
+
return merged
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def save_config(config: dict[str, Any]) -> None:
|
|
89
|
+
_ensure_home()
|
|
90
|
+
CONFIG_PATH.write_text(
|
|
91
|
+
json.dumps(config, indent=2, sort_keys=True),
|
|
92
|
+
encoding="utf-8",
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def is_configured() -> bool:
|
|
97
|
+
if not CONFIG_PATH.exists():
|
|
98
|
+
return False
|
|
99
|
+
cfg = load_config()
|
|
100
|
+
provider = cfg.get("llm", {}).get("provider", "")
|
|
101
|
+
api_key = cfg.get("llm", {}).get("api_key", "")
|
|
102
|
+
if provider == "ollama":
|
|
103
|
+
return True
|
|
104
|
+
return bool(provider and api_key)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def get_llm_key(config: Optional[dict[str, Any]] = None) -> str:
|
|
108
|
+
cfg = config or load_config()
|
|
109
|
+
return cfg.get("llm", {}).get("api_key", "") or ""
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def get_llm_model(config: Optional[dict[str, Any]] = None) -> str:
|
|
113
|
+
cfg = config or load_config()
|
|
114
|
+
return cfg.get("llm", {}).get("model", "") or ""
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def get_llm_provider(config: Optional[dict[str, Any]] = None) -> str:
|
|
118
|
+
cfg = config or load_config()
|
|
119
|
+
return cfg.get("llm", {}).get("provider", "") or ""
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def get_tor_proxy(config: Optional[dict[str, Any]] = None) -> str:
|
|
123
|
+
cfg = config or load_config()
|
|
124
|
+
host = cfg.get("tor", {}).get("host", "127.0.0.1")
|
|
125
|
+
port = cfg.get("tor", {}).get("port", 9050)
|
|
126
|
+
return f"socks5://{host}:{port}"
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def get_output_dir(config: Optional[dict[str, Any]] = None) -> Path:
|
|
130
|
+
cfg = config or load_config()
|
|
131
|
+
p = Path(os.path.expanduser(cfg.get("output_dir") or str(DEFAULT_OUTPUT_DIR)))
|
|
132
|
+
p.mkdir(parents=True, exist_ok=True)
|
|
133
|
+
return p
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def db_url() -> str:
|
|
137
|
+
"""SQLite URL used by db.session via DATABASE_URL env var."""
|
|
138
|
+
_ensure_home()
|
|
139
|
+
return f"sqlite:///{DB_PATH.as_posix()}"
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def apply_env(config: Optional[dict[str, Any]] = None) -> None:
|
|
143
|
+
"""
|
|
144
|
+
Push saved config into os.environ so that the existing voidaccess
|
|
145
|
+
modules (config.py, llm.py, sources/*) pick up the values at import.
|
|
146
|
+
|
|
147
|
+
Must be called BEFORE any voidaccess module is imported.
|
|
148
|
+
"""
|
|
149
|
+
cfg = config or load_config()
|
|
150
|
+
|
|
151
|
+
os.environ.setdefault("DATABASE_URL", db_url())
|
|
152
|
+
os.environ.setdefault("JWT_SECRET", "voidaccess-cli-local-no-auth")
|
|
153
|
+
os.environ.setdefault("DISABLE_RATE_LIMIT", "true")
|
|
154
|
+
os.environ.setdefault("PLAYWRIGHT_ENABLED", "false")
|
|
155
|
+
|
|
156
|
+
def _set_env_if_present(key: str, value: Any, *, clear_if_empty: bool = False) -> None:
|
|
157
|
+
text = str(value).strip() if value is not None else ""
|
|
158
|
+
if text:
|
|
159
|
+
os.environ[key] = text
|
|
160
|
+
elif clear_if_empty:
|
|
161
|
+
os.environ.pop(key, None)
|
|
162
|
+
|
|
163
|
+
# Tor proxy
|
|
164
|
+
_set_env_if_present("TOR_PROXY_HOST", cfg.get("tor", {}).get("host", "127.0.0.1"))
|
|
165
|
+
_set_env_if_present("TOR_PROXY_PORT", cfg.get("tor", {}).get("port", 9050))
|
|
166
|
+
|
|
167
|
+
# LLM provider key (push under its canonical env var name)
|
|
168
|
+
provider = cfg.get("llm", {}).get("provider", "")
|
|
169
|
+
api_key = cfg.get("llm", {}).get("api_key", "")
|
|
170
|
+
env_name = PROVIDER_ENV.get(provider)
|
|
171
|
+
if env_name:
|
|
172
|
+
_set_env_if_present(env_name, api_key, clear_if_empty=True)
|
|
173
|
+
|
|
174
|
+
# Default model
|
|
175
|
+
default_model = cfg.get("llm", {}).get("model", "")
|
|
176
|
+
_set_env_if_present("DEFAULT_MODEL", default_model)
|
|
177
|
+
|
|
178
|
+
# Enrichment keys
|
|
179
|
+
for k, v in (cfg.get("enrichment_keys") or {}).items():
|
|
180
|
+
_set_env_if_present(k, v, clear_if_empty=True)
|
cli/display.py
ADDED
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
"""
|
|
2
|
+
cli/display.py — Rich live display for investigations.
|
|
3
|
+
|
|
4
|
+
Three-zone layout:
|
|
5
|
+
title bar — query + elapsed timer
|
|
6
|
+
step table — pipeline stages with status icons
|
|
7
|
+
activity line — current URL / sub-task detail
|
|
8
|
+
|
|
9
|
+
Status icons:
|
|
10
|
+
pending · gray dot
|
|
11
|
+
active ⠹ spinner (cycles in tick())
|
|
12
|
+
ok ✓ green
|
|
13
|
+
fail ✗ red
|
|
14
|
+
skip ↷ yellow
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import time
|
|
20
|
+
from dataclasses import dataclass, field
|
|
21
|
+
from typing import Optional
|
|
22
|
+
|
|
23
|
+
from rich.console import Console, Group
|
|
24
|
+
from rich.live import Live
|
|
25
|
+
from rich.panel import Panel
|
|
26
|
+
from rich.table import Table
|
|
27
|
+
from rich.text import Text
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
SPINNER_FRAMES = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"]
|
|
31
|
+
|
|
32
|
+
STATUS_GLYPH = {
|
|
33
|
+
"pending": ("·", "grey50"),
|
|
34
|
+
"active": ("⠹", "cyan"),
|
|
35
|
+
"ok": ("✓", "green"),
|
|
36
|
+
"fail": ("✗", "red"),
|
|
37
|
+
"skip": ("↷", "yellow"),
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@dataclass
|
|
42
|
+
class StepRow:
|
|
43
|
+
name: str
|
|
44
|
+
status: str = "pending"
|
|
45
|
+
detail: str = ""
|
|
46
|
+
substeps: list[tuple[str, str]] = field(default_factory=list) # (label, status)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class InvestigationDisplay:
|
|
50
|
+
"""Live terminal display driven by .update_step() and .update_current_url()."""
|
|
51
|
+
|
|
52
|
+
def __init__(self, console: Optional[Console] = None, quiet: bool = False):
|
|
53
|
+
self.console = console or Console()
|
|
54
|
+
self.quiet = quiet
|
|
55
|
+
self._live: Optional[Live] = None
|
|
56
|
+
self._query: str = ""
|
|
57
|
+
self._start_ts: float = time.monotonic()
|
|
58
|
+
self._steps: list[StepRow] = []
|
|
59
|
+
self._current_url: str = ""
|
|
60
|
+
self._spinner_index = 0
|
|
61
|
+
self._final_summary: Optional[dict] = None
|
|
62
|
+
self._error: Optional[str] = None
|
|
63
|
+
|
|
64
|
+
# -- lifecycle ----------------------------------------------------------
|
|
65
|
+
|
|
66
|
+
def start(self, query: str, steps: Optional[list[str]] = None) -> None:
|
|
67
|
+
self._query = query
|
|
68
|
+
self._start_ts = time.monotonic()
|
|
69
|
+
names = steps or [
|
|
70
|
+
"Refining query",
|
|
71
|
+
"Searching dark web",
|
|
72
|
+
"Filtering results",
|
|
73
|
+
"Scraping pages",
|
|
74
|
+
"Extracting entities",
|
|
75
|
+
"Enriching intelligence",
|
|
76
|
+
"Building graph",
|
|
77
|
+
"Generating summary",
|
|
78
|
+
"Finalizing results",
|
|
79
|
+
]
|
|
80
|
+
self._steps = [StepRow(name=n) for n in names]
|
|
81
|
+
if self.quiet:
|
|
82
|
+
self.console.print(f"[bold]VoidAccess[/bold] — {query}")
|
|
83
|
+
return
|
|
84
|
+
self._live = Live(
|
|
85
|
+
self._render(),
|
|
86
|
+
console=self.console,
|
|
87
|
+
refresh_per_second=8,
|
|
88
|
+
transient=False,
|
|
89
|
+
)
|
|
90
|
+
self._live.start()
|
|
91
|
+
|
|
92
|
+
def stop(self) -> None:
|
|
93
|
+
if self._live is not None:
|
|
94
|
+
self._live.update(self._render(), refresh=True)
|
|
95
|
+
self._live.stop()
|
|
96
|
+
self._live = None
|
|
97
|
+
|
|
98
|
+
# -- updates ------------------------------------------------------------
|
|
99
|
+
|
|
100
|
+
def update_step(self, step_name: str, status: str, detail: str = "") -> None:
|
|
101
|
+
row = self._find_step(step_name)
|
|
102
|
+
if row is None:
|
|
103
|
+
row = StepRow(name=step_name)
|
|
104
|
+
self._steps.append(row)
|
|
105
|
+
row.status = status
|
|
106
|
+
if detail:
|
|
107
|
+
row.detail = detail
|
|
108
|
+
self._refresh()
|
|
109
|
+
if self.quiet:
|
|
110
|
+
icon = STATUS_GLYPH.get(status, ("·", "grey50"))[0]
|
|
111
|
+
d = f" — {detail}" if detail else ""
|
|
112
|
+
self.console.print(f" {icon} {step_name}{d}")
|
|
113
|
+
|
|
114
|
+
def update_substep(self, step_name: str, label: str, status: str) -> None:
|
|
115
|
+
row = self._find_step(step_name)
|
|
116
|
+
if row is None:
|
|
117
|
+
return
|
|
118
|
+
for idx, (existing, _) in enumerate(row.substeps):
|
|
119
|
+
if existing == label:
|
|
120
|
+
row.substeps[idx] = (label, status)
|
|
121
|
+
self._refresh()
|
|
122
|
+
return
|
|
123
|
+
row.substeps.append((label, status))
|
|
124
|
+
self._refresh()
|
|
125
|
+
|
|
126
|
+
def update_current_url(self, url: str) -> None:
|
|
127
|
+
self._current_url = url
|
|
128
|
+
self._refresh()
|
|
129
|
+
|
|
130
|
+
def complete(self, summary: dict) -> None:
|
|
131
|
+
self._final_summary = summary
|
|
132
|
+
self.stop()
|
|
133
|
+
self._print_completion(summary)
|
|
134
|
+
|
|
135
|
+
def error(self, msg: str) -> None:
|
|
136
|
+
self._error = msg
|
|
137
|
+
self.stop()
|
|
138
|
+
self.console.print(f"[bold red]Investigation failed:[/bold red] {msg}")
|
|
139
|
+
|
|
140
|
+
# -- render -------------------------------------------------------------
|
|
141
|
+
|
|
142
|
+
def _refresh(self) -> None:
|
|
143
|
+
self._spinner_index = (self._spinner_index + 1) % len(SPINNER_FRAMES)
|
|
144
|
+
if self._live is not None:
|
|
145
|
+
self._live.update(self._render(), refresh=True)
|
|
146
|
+
|
|
147
|
+
def _find_step(self, name: str) -> Optional[StepRow]:
|
|
148
|
+
for row in self._steps:
|
|
149
|
+
if row.name == name:
|
|
150
|
+
return row
|
|
151
|
+
return None
|
|
152
|
+
|
|
153
|
+
def _render(self) -> Panel:
|
|
154
|
+
elapsed = time.monotonic() - self._start_ts
|
|
155
|
+
title = Text()
|
|
156
|
+
title.append("VoidAccess", style="bold magenta")
|
|
157
|
+
title.append(f" — \"{self._query}\"", style="bold white")
|
|
158
|
+
title.append(f" Elapsed: {self._fmt_elapsed(elapsed)}", style="grey50")
|
|
159
|
+
|
|
160
|
+
table = Table.grid(padding=(0, 1))
|
|
161
|
+
table.add_column(width=2)
|
|
162
|
+
table.add_column(no_wrap=False)
|
|
163
|
+
for row in self._steps:
|
|
164
|
+
glyph, colour = STATUS_GLYPH.get(row.status, ("·", "grey50"))
|
|
165
|
+
if row.status == "active":
|
|
166
|
+
glyph = SPINNER_FRAMES[self._spinner_index]
|
|
167
|
+
line = Text()
|
|
168
|
+
line.append(f"{glyph} ", style=colour)
|
|
169
|
+
line.append(row.name, style="white" if row.status != "pending" else "grey50")
|
|
170
|
+
if row.detail:
|
|
171
|
+
line.append(f" ({row.detail})", style="grey62")
|
|
172
|
+
table.add_row("", line)
|
|
173
|
+
for sub_label, sub_status in row.substeps:
|
|
174
|
+
sg, sc = STATUS_GLYPH.get(sub_status, ("·", "grey50"))
|
|
175
|
+
sub = Text(f" {sg} {sub_label}", style=sc)
|
|
176
|
+
table.add_row("", sub)
|
|
177
|
+
|
|
178
|
+
activity = Text()
|
|
179
|
+
if self._current_url:
|
|
180
|
+
activity.append("Fetching: ", style="bold")
|
|
181
|
+
activity.append(self._current_url, style="cyan")
|
|
182
|
+
else:
|
|
183
|
+
activity.append("", style="grey50")
|
|
184
|
+
|
|
185
|
+
body = Group(title, Text(""), table, Text(""), activity)
|
|
186
|
+
return Panel(body, border_style="magenta", padding=(1, 2))
|
|
187
|
+
|
|
188
|
+
@staticmethod
|
|
189
|
+
def _fmt_elapsed(secs: float) -> str:
|
|
190
|
+
m, s = divmod(int(secs), 60)
|
|
191
|
+
return f"{m}m {s:02d}s"
|
|
192
|
+
|
|
193
|
+
def _print_completion(self, summary: dict) -> None:
|
|
194
|
+
table = Table.grid(padding=(0, 2))
|
|
195
|
+
table.add_column(style="bold")
|
|
196
|
+
table.add_column()
|
|
197
|
+
table.add_row("Entities", str(summary.get("entity_count", "—")))
|
|
198
|
+
table.add_row("Pages", str(summary.get("page_count", "—")))
|
|
199
|
+
if "c2_ips" in summary:
|
|
200
|
+
table.add_row("C2 IPs", f"{summary['c2_ips']} confirmed")
|
|
201
|
+
table.add_row("Sources", str(summary.get("sources_used", "—")))
|
|
202
|
+
if summary.get("report_path"):
|
|
203
|
+
table.add_row("Report", str(summary["report_path"]))
|
|
204
|
+
if summary.get("data_path"):
|
|
205
|
+
table.add_row("Data", str(summary["data_path"]))
|
|
206
|
+
|
|
207
|
+
panel = Panel(
|
|
208
|
+
Group(Text("✓ Investigation complete", style="bold green"), Text(""), table),
|
|
209
|
+
border_style="green",
|
|
210
|
+
padding=(1, 2),
|
|
211
|
+
)
|
|
212
|
+
self.console.print(panel)
|
cli/main.py
ADDED
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
"""
|
|
2
|
+
cli/main.py — typer entry point exposed as the `voidaccess` script.
|
|
3
|
+
|
|
4
|
+
Defined as the [project.scripts] target in pyproject.toml:
|
|
5
|
+
voidaccess = "cli.main:app"
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
import os
|
|
12
|
+
import sys
|
|
13
|
+
|
|
14
|
+
# Force UTF-8 on Windows consoles so rich glyphs render reliably
|
|
15
|
+
if sys.platform == "win32":
|
|
16
|
+
os.environ.setdefault("PYTHONIOENCODING", "utf-8")
|
|
17
|
+
try:
|
|
18
|
+
sys.stdout.reconfigure(encoding="utf-8") # type: ignore[attr-defined]
|
|
19
|
+
sys.stderr.reconfigure(encoding="utf-8") # type: ignore[attr-defined]
|
|
20
|
+
except Exception:
|
|
21
|
+
pass
|
|
22
|
+
|
|
23
|
+
import typer
|
|
24
|
+
from rich.console import Console
|
|
25
|
+
from rich.table import Table
|
|
26
|
+
|
|
27
|
+
from cli import __version__
|
|
28
|
+
from cli import config as cli_config
|
|
29
|
+
from cli.commands import configure, enrich, export, investigate, show
|
|
30
|
+
|
|
31
|
+
console = Console()
|
|
32
|
+
|
|
33
|
+
app = typer.Typer(
|
|
34
|
+
name="voidaccess",
|
|
35
|
+
help="Dark web OSINT — query to intelligence report.",
|
|
36
|
+
no_args_is_help=True,
|
|
37
|
+
add_completion=False,
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
# Sub-commands
|
|
41
|
+
app.add_typer(configure.app, name="configure", help="Configure the CLI (LLM, keys, Tor).")
|
|
42
|
+
app.command("investigate", help="Run a new investigation.")(investigate.run)
|
|
43
|
+
app.command("show", help="Open the entity browser TUI.")(show.run)
|
|
44
|
+
app.command("export", help="Export an investigation to STIX/MISP/Sigma/CSV/MD/JSON.")(export.run)
|
|
45
|
+
app.command("enrich", help="Re-enrich a stored investigation against current feeds.")(enrich.run)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _ensure_first_run() -> None:
|
|
49
|
+
"""Auto-launch wizard on first invocation when no config exists."""
|
|
50
|
+
if cli_config.CONFIG_PATH.exists():
|
|
51
|
+
return
|
|
52
|
+
console.print(
|
|
53
|
+
"[bold magenta]Welcome to voidaccess.[/bold magenta] "
|
|
54
|
+
"Let's get you configured first."
|
|
55
|
+
)
|
|
56
|
+
# Invoke wizard via Typer
|
|
57
|
+
try:
|
|
58
|
+
configure.configure_default(ctx=typer.Context(configure.app))
|
|
59
|
+
except Exception:
|
|
60
|
+
pass
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
@app.command("status")
|
|
64
|
+
def status() -> None:
|
|
65
|
+
"""Show current config, Tor status, and detected API keys."""
|
|
66
|
+
from cli.tor_detect import detect_tor
|
|
67
|
+
cli_config.apply_env()
|
|
68
|
+
cfg = cli_config.load_config()
|
|
69
|
+
|
|
70
|
+
table = Table(title="voidaccess status", show_lines=False)
|
|
71
|
+
table.add_column("Setting", style="bold")
|
|
72
|
+
table.add_column("Value")
|
|
73
|
+
table.add_row("Version", __version__)
|
|
74
|
+
table.add_row("Config path", str(cli_config.CONFIG_PATH))
|
|
75
|
+
table.add_row("DB path", str(cli_config.DB_PATH))
|
|
76
|
+
table.add_row("Output dir", str(cli_config.get_output_dir(cfg)))
|
|
77
|
+
|
|
78
|
+
llm = cfg.get("llm", {})
|
|
79
|
+
table.add_row("LLM provider", llm.get("provider") or "—")
|
|
80
|
+
table.add_row("LLM model", llm.get("model") or "—")
|
|
81
|
+
table.add_row("LLM key", "[green]set[/green]" if llm.get("api_key") else "[red]missing[/red]")
|
|
82
|
+
|
|
83
|
+
table.add_row("Tor host", cfg.get("tor", {}).get("host", "—"))
|
|
84
|
+
table.add_row("Tor port", str(cfg.get("tor", {}).get("port", "—")))
|
|
85
|
+
|
|
86
|
+
tor_status = detect_tor()
|
|
87
|
+
if tor_status.proxy_url:
|
|
88
|
+
table.add_row("Tor reachable", f"[green]{tor_status.source}[/green] at {tor_status.proxy_url}")
|
|
89
|
+
else:
|
|
90
|
+
table.add_row("Tor reachable", "[red]no proxy responded[/red]")
|
|
91
|
+
|
|
92
|
+
try:
|
|
93
|
+
import spacy
|
|
94
|
+
|
|
95
|
+
spacy.load("en_core_web_sm")
|
|
96
|
+
spacy_status = "ready"
|
|
97
|
+
except Exception:
|
|
98
|
+
spacy_status = "not installed"
|
|
99
|
+
table.add_row("spaCy NER", spacy_status)
|
|
100
|
+
|
|
101
|
+
keys = cfg.get("enrichment_keys", {})
|
|
102
|
+
set_count = sum(1 for v in keys.values() if v)
|
|
103
|
+
table.add_row("Enrichment keys", f"{set_count}/{len(keys)} set")
|
|
104
|
+
|
|
105
|
+
console.print(table)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
@app.command("list")
|
|
109
|
+
def list_investigations(
|
|
110
|
+
limit: int = typer.Option(20, "--limit", "-n", help="Number of rows"),
|
|
111
|
+
as_json: bool = typer.Option(False, "--json", help="JSON output"),
|
|
112
|
+
) -> None:
|
|
113
|
+
"""List saved investigations."""
|
|
114
|
+
cli_config.apply_env()
|
|
115
|
+
from cli.adapters import sqlite as sqlite_adapter
|
|
116
|
+
sqlite_adapter.init_db()
|
|
117
|
+
rows = sqlite_adapter.list_investigations(limit=limit)
|
|
118
|
+
if as_json:
|
|
119
|
+
console.print_json(json.dumps(rows, default=str))
|
|
120
|
+
return
|
|
121
|
+
if not rows:
|
|
122
|
+
console.print("[grey50]No saved investigations.[/grey50]")
|
|
123
|
+
return
|
|
124
|
+
table = Table(title="Saved investigations")
|
|
125
|
+
table.add_column("Id", style="cyan")
|
|
126
|
+
table.add_column("Query")
|
|
127
|
+
table.add_column("Status")
|
|
128
|
+
table.add_column("Entities", justify="right")
|
|
129
|
+
table.add_column("Created")
|
|
130
|
+
for r in rows:
|
|
131
|
+
table.add_row(
|
|
132
|
+
r["id"][:8],
|
|
133
|
+
(r["query"] or "")[:60],
|
|
134
|
+
r["status"] or "",
|
|
135
|
+
str(r["entity_count"]),
|
|
136
|
+
(r["created_at"] or "")[:19],
|
|
137
|
+
)
|
|
138
|
+
console.print(table)
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
@app.command("version")
|
|
142
|
+
def version() -> None:
|
|
143
|
+
"""Print the installed version."""
|
|
144
|
+
console.print(f"voidaccess {__version__}")
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
@app.callback()
|
|
148
|
+
def _global(ctx: typer.Context) -> None:
|
|
149
|
+
"""Set env vars before sub-commands import voidaccess modules."""
|
|
150
|
+
cli_config.apply_env()
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
if __name__ == "__main__":
|
|
154
|
+
app()
|
cli/tor_detect.py
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
"""
|
|
2
|
+
cli/tor_detect.py — Locate a running Tor SOCKS5 proxy.
|
|
3
|
+
|
|
4
|
+
Probe order:
|
|
5
|
+
1. 127.0.0.1:9050 (system tor)
|
|
6
|
+
2. 127.0.0.1:9150 (Tor Browser bundle)
|
|
7
|
+
3. config override (cli/config.py)
|
|
8
|
+
|
|
9
|
+
Tests by performing a SOCKS5 handshake — no full HTTP round-trip required,
|
|
10
|
+
so detection stays fast even when the wider Tor network is slow.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import socket
|
|
16
|
+
import struct
|
|
17
|
+
from dataclasses import dataclass
|
|
18
|
+
from typing import Optional
|
|
19
|
+
|
|
20
|
+
from cli.config import load_config
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@dataclass
|
|
24
|
+
class TorStatus:
|
|
25
|
+
proxy_url: Optional[str]
|
|
26
|
+
source: str # "system_tor" | "tor_browser" | "config" | "none"
|
|
27
|
+
host: Optional[str] = None
|
|
28
|
+
port: Optional[int] = None
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _socks5_handshake(host: str, port: int, timeout: float = 2.0) -> bool:
|
|
32
|
+
"""Open TCP, send a SOCKS5 NO-AUTH greeting, expect 0x05 0x00 back."""
|
|
33
|
+
try:
|
|
34
|
+
with socket.create_connection((host, port), timeout=timeout) as sock:
|
|
35
|
+
sock.sendall(b"\x05\x01\x00")
|
|
36
|
+
resp = sock.recv(2)
|
|
37
|
+
return len(resp) == 2 and resp[0] == 0x05 and resp[1] == 0x00
|
|
38
|
+
except Exception:
|
|
39
|
+
return False
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def detect_tor() -> TorStatus:
|
|
43
|
+
cfg = load_config()
|
|
44
|
+
cfg_host = cfg.get("tor", {}).get("host", "127.0.0.1")
|
|
45
|
+
cfg_port = int(cfg.get("tor", {}).get("port", 9050))
|
|
46
|
+
|
|
47
|
+
probes = [
|
|
48
|
+
("system_tor", "127.0.0.1", 9050),
|
|
49
|
+
("tor_browser", "127.0.0.1", 9150),
|
|
50
|
+
]
|
|
51
|
+
if (cfg_host, cfg_port) not in {(h, p) for _, h, p in probes}:
|
|
52
|
+
probes.append(("config", cfg_host, cfg_port))
|
|
53
|
+
|
|
54
|
+
for source, host, port in probes:
|
|
55
|
+
if _socks5_handshake(host, port):
|
|
56
|
+
return TorStatus(
|
|
57
|
+
proxy_url=f"socks5://{host}:{port}",
|
|
58
|
+
source=source,
|
|
59
|
+
host=host,
|
|
60
|
+
port=port,
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
return TorStatus(proxy_url=None, source="none")
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def tor_unavailable_message() -> str:
|
|
67
|
+
return (
|
|
68
|
+
"Tor not found. Install: https://torproject.org\n"
|
|
69
|
+
"Or run Tor Browser before investigating.\n"
|
|
70
|
+
"Use --no-tor to skip Tor entirely (clearnet sources only)."
|
|
71
|
+
)
|