voidaccess 1.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. analysis/__init__.py +49 -0
  2. analysis/opsec.py +454 -0
  3. analysis/patterns.py +202 -0
  4. analysis/temporal.py +201 -0
  5. api/__init__.py +1 -0
  6. api/auth.py +163 -0
  7. api/main.py +509 -0
  8. api/routes/__init__.py +1 -0
  9. api/routes/admin.py +214 -0
  10. api/routes/auth.py +157 -0
  11. api/routes/entities.py +871 -0
  12. api/routes/export.py +359 -0
  13. api/routes/investigations.py +2567 -0
  14. api/routes/monitors.py +405 -0
  15. api/routes/search.py +157 -0
  16. api/routes/settings.py +851 -0
  17. auth/__init__.py +1 -0
  18. auth/token_blacklist.py +108 -0
  19. cli/__init__.py +3 -0
  20. cli/adapters/__init__.py +1 -0
  21. cli/adapters/sqlite.py +273 -0
  22. cli/browser.py +376 -0
  23. cli/commands/__init__.py +1 -0
  24. cli/commands/configure.py +185 -0
  25. cli/commands/enrich.py +154 -0
  26. cli/commands/export.py +158 -0
  27. cli/commands/investigate.py +601 -0
  28. cli/commands/show.py +87 -0
  29. cli/config.py +180 -0
  30. cli/display.py +212 -0
  31. cli/main.py +154 -0
  32. cli/tor_detect.py +71 -0
  33. config.py +180 -0
  34. crawler/__init__.py +28 -0
  35. crawler/dedup.py +97 -0
  36. crawler/frontier.py +115 -0
  37. crawler/spider.py +462 -0
  38. crawler/utils.py +122 -0
  39. db/__init__.py +47 -0
  40. db/migrations/__init__.py +0 -0
  41. db/migrations/env.py +80 -0
  42. db/migrations/versions/0001_initial_schema.py +270 -0
  43. db/migrations/versions/0002_add_investigation_status_column.py +27 -0
  44. db/migrations/versions/0002_add_missing_tables.py +33 -0
  45. db/migrations/versions/0003_add_canonical_value_and_entity_links.py +61 -0
  46. db/migrations/versions/0004_add_page_posted_at.py +41 -0
  47. db/migrations/versions/0005_add_extraction_method.py +32 -0
  48. db/migrations/versions/0006_add_monitor_alerts.py +26 -0
  49. db/migrations/versions/0007_add_actor_style_profiles.py +23 -0
  50. db/migrations/versions/0008_add_users_table.py +47 -0
  51. db/migrations/versions/0009_add_investigation_id_to_relationships.py +29 -0
  52. db/migrations/versions/0010_add_composite_index_entity_relationships.py +22 -0
  53. db/migrations/versions/0011_add_page_extraction_cache.py +52 -0
  54. db/migrations/versions/0013_add_graph_status.py +31 -0
  55. db/migrations/versions/0015_add_progress_fields.py +41 -0
  56. db/migrations/versions/0016_backfill_graph_status.py +33 -0
  57. db/migrations/versions/0017_add_user_api_keys.py +44 -0
  58. db/migrations/versions/0018_add_user_id_to_investigations.py +33 -0
  59. db/migrations/versions/0019_add_content_safety_log.py +46 -0
  60. db/migrations/versions/0020_add_entity_source_tracking.py +50 -0
  61. db/models.py +618 -0
  62. db/queries.py +841 -0
  63. db/session.py +270 -0
  64. export/__init__.py +34 -0
  65. export/misp.py +257 -0
  66. export/sigma.py +342 -0
  67. export/stix.py +418 -0
  68. extractor/__init__.py +21 -0
  69. extractor/llm_extract.py +372 -0
  70. extractor/ner.py +512 -0
  71. extractor/normalizer.py +638 -0
  72. extractor/pipeline.py +401 -0
  73. extractor/regex_patterns.py +325 -0
  74. fingerprint/__init__.py +33 -0
  75. fingerprint/profiler.py +240 -0
  76. fingerprint/stylometry.py +249 -0
  77. graph/__init__.py +73 -0
  78. graph/builder.py +894 -0
  79. graph/export.py +225 -0
  80. graph/model.py +83 -0
  81. graph/queries.py +297 -0
  82. graph/visualize.py +178 -0
  83. i18n/__init__.py +24 -0
  84. i18n/detect.py +76 -0
  85. i18n/query_expand.py +72 -0
  86. i18n/translate.py +210 -0
  87. monitor/__init__.py +27 -0
  88. monitor/_db.py +74 -0
  89. monitor/alerts.py +345 -0
  90. monitor/config.py +118 -0
  91. monitor/diff.py +75 -0
  92. monitor/jobs.py +247 -0
  93. monitor/scheduler.py +184 -0
  94. scraper/__init__.py +0 -0
  95. scraper/scrape.py +857 -0
  96. scraper/scrape_js.py +272 -0
  97. search/__init__.py +318 -0
  98. search/circuit_breaker.py +240 -0
  99. search/search.py +334 -0
  100. sources/__init__.py +96 -0
  101. sources/blockchain.py +444 -0
  102. sources/cache.py +93 -0
  103. sources/cisa.py +108 -0
  104. sources/dns_enrichment.py +557 -0
  105. sources/domain_reputation.py +643 -0
  106. sources/email_reputation.py +635 -0
  107. sources/engines.py +244 -0
  108. sources/enrichment.py +1244 -0
  109. sources/github_scraper.py +589 -0
  110. sources/gitlab_scraper.py +624 -0
  111. sources/hash_reputation.py +856 -0
  112. sources/historical_intel.py +253 -0
  113. sources/ip_reputation.py +521 -0
  114. sources/paste_scraper.py +484 -0
  115. sources/pastes.py +278 -0
  116. sources/rss_scraper.py +576 -0
  117. sources/seed_manager.py +373 -0
  118. sources/seeds.py +368 -0
  119. sources/shodan.py +103 -0
  120. sources/telegram.py +199 -0
  121. sources/virustotal.py +113 -0
  122. utils/__init__.py +0 -0
  123. utils/async_utils.py +89 -0
  124. utils/content_safety.py +193 -0
  125. utils/defang.py +94 -0
  126. utils/encryption.py +34 -0
  127. utils/ioc_freshness.py +124 -0
  128. utils/user_keys.py +33 -0
  129. vector/__init__.py +39 -0
  130. vector/embedder.py +100 -0
  131. vector/model_singleton.py +49 -0
  132. vector/search.py +87 -0
  133. vector/store.py +514 -0
  134. voidaccess/__init__.py +0 -0
  135. voidaccess/llm.py +717 -0
  136. voidaccess/llm_utils.py +696 -0
  137. voidaccess-1.3.0.dist-info/METADATA +395 -0
  138. voidaccess-1.3.0.dist-info/RECORD +142 -0
  139. voidaccess-1.3.0.dist-info/WHEEL +5 -0
  140. voidaccess-1.3.0.dist-info/entry_points.txt +2 -0
  141. voidaccess-1.3.0.dist-info/licenses/LICENSE +21 -0
  142. voidaccess-1.3.0.dist-info/top_level.txt +19 -0
cli/config.py ADDED
@@ -0,0 +1,180 @@
1
+ """
2
+ cli/config.py — Persistent config for the voidaccess CLI.
3
+
4
+ Stores LLM provider/model, API keys, Tor proxy settings, and output dir
5
+ in ~/.voidaccess/config.json. Exposes helpers and an apply_env() function
6
+ that pushes the saved config into os.environ before any voidaccess module
7
+ is imported (the existing modules read API keys from env at import time).
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import json
13
+ import os
14
+ from pathlib import Path
15
+ from typing import Any, Optional
16
+
17
+ CLI_HOME = Path(os.path.expanduser("~/.voidaccess"))
18
+ CONFIG_PATH = CLI_HOME / "config.json"
19
+ DB_PATH = CLI_HOME / "investigations.db"
20
+ DEFAULT_OUTPUT_DIR = CLI_HOME / "results"
21
+
22
+ ENRICHMENT_KEYS = [
23
+ "OTX_API_KEY",
24
+ "VT_API_KEY",
25
+ "ABUSEIPDB_API_KEY",
26
+ "GREYNOISE_API_KEY",
27
+ "URLSCAN_API_KEY",
28
+ "SECURITYTRAILS_API_KEY",
29
+ "GITHUB_TOKEN",
30
+ "GITLAB_TOKEN",
31
+ "HYBRID_ANALYSIS_API_KEY",
32
+ "HIBP_API_KEY",
33
+ "EMAILREP_API_KEY",
34
+ "SHODAN_API_KEY",
35
+ "BLOCKCYPHER_TOKEN",
36
+ "ETHERSCAN_API_KEY",
37
+ "DEEPL_API_KEY",
38
+ "DARKSEARCH_API_KEY",
39
+ ]
40
+
41
+ PROVIDER_ENV = {
42
+ "openrouter": "OPENROUTER_API_KEY",
43
+ "groq": "GROQ_API_KEY",
44
+ "google": "GOOGLE_API_KEY",
45
+ "openai": "OPENAI_API_KEY",
46
+ "anthropic": "ANTHROPIC_API_KEY",
47
+ "ollama": None,
48
+ }
49
+
50
+ DEFAULT_CONFIG: dict[str, Any] = {
51
+ "llm": {
52
+ "provider": "openrouter",
53
+ "model": "openrouter/deepseek/deepseek-chat",
54
+ "api_key": "",
55
+ },
56
+ "enrichment_keys": {k: "" for k in ENRICHMENT_KEYS},
57
+ "tor": {
58
+ "host": "127.0.0.1",
59
+ "port": 9050,
60
+ },
61
+ "output_dir": str(DEFAULT_OUTPUT_DIR),
62
+ }
63
+
64
+
65
+ def _ensure_home() -> None:
66
+ CLI_HOME.mkdir(parents=True, exist_ok=True)
67
+
68
+
69
+ def load_config() -> dict[str, Any]:
70
+ """Return saved config or DEFAULT_CONFIG if none exists."""
71
+ _ensure_home()
72
+ if not CONFIG_PATH.exists():
73
+ return json.loads(json.dumps(DEFAULT_CONFIG))
74
+ try:
75
+ cfg = json.loads(CONFIG_PATH.read_text(encoding="utf-8"))
76
+ except Exception:
77
+ return json.loads(json.dumps(DEFAULT_CONFIG))
78
+ # Merge with defaults so missing keys don't crash
79
+ merged = json.loads(json.dumps(DEFAULT_CONFIG))
80
+ merged["llm"].update(cfg.get("llm", {}))
81
+ merged["tor"].update(cfg.get("tor", {}))
82
+ merged["enrichment_keys"].update(cfg.get("enrichment_keys", {}))
83
+ if cfg.get("output_dir"):
84
+ merged["output_dir"] = cfg["output_dir"]
85
+ return merged
86
+
87
+
88
+ def save_config(config: dict[str, Any]) -> None:
89
+ _ensure_home()
90
+ CONFIG_PATH.write_text(
91
+ json.dumps(config, indent=2, sort_keys=True),
92
+ encoding="utf-8",
93
+ )
94
+
95
+
96
+ def is_configured() -> bool:
97
+ if not CONFIG_PATH.exists():
98
+ return False
99
+ cfg = load_config()
100
+ provider = cfg.get("llm", {}).get("provider", "")
101
+ api_key = cfg.get("llm", {}).get("api_key", "")
102
+ if provider == "ollama":
103
+ return True
104
+ return bool(provider and api_key)
105
+
106
+
107
+ def get_llm_key(config: Optional[dict[str, Any]] = None) -> str:
108
+ cfg = config or load_config()
109
+ return cfg.get("llm", {}).get("api_key", "") or ""
110
+
111
+
112
+ def get_llm_model(config: Optional[dict[str, Any]] = None) -> str:
113
+ cfg = config or load_config()
114
+ return cfg.get("llm", {}).get("model", "") or ""
115
+
116
+
117
+ def get_llm_provider(config: Optional[dict[str, Any]] = None) -> str:
118
+ cfg = config or load_config()
119
+ return cfg.get("llm", {}).get("provider", "") or ""
120
+
121
+
122
+ def get_tor_proxy(config: Optional[dict[str, Any]] = None) -> str:
123
+ cfg = config or load_config()
124
+ host = cfg.get("tor", {}).get("host", "127.0.0.1")
125
+ port = cfg.get("tor", {}).get("port", 9050)
126
+ return f"socks5://{host}:{port}"
127
+
128
+
129
+ def get_output_dir(config: Optional[dict[str, Any]] = None) -> Path:
130
+ cfg = config or load_config()
131
+ p = Path(os.path.expanduser(cfg.get("output_dir") or str(DEFAULT_OUTPUT_DIR)))
132
+ p.mkdir(parents=True, exist_ok=True)
133
+ return p
134
+
135
+
136
+ def db_url() -> str:
137
+ """SQLite URL used by db.session via DATABASE_URL env var."""
138
+ _ensure_home()
139
+ return f"sqlite:///{DB_PATH.as_posix()}"
140
+
141
+
142
+ def apply_env(config: Optional[dict[str, Any]] = None) -> None:
143
+ """
144
+ Push saved config into os.environ so that the existing voidaccess
145
+ modules (config.py, llm.py, sources/*) pick up the values at import.
146
+
147
+ Must be called BEFORE any voidaccess module is imported.
148
+ """
149
+ cfg = config or load_config()
150
+
151
+ os.environ.setdefault("DATABASE_URL", db_url())
152
+ os.environ.setdefault("JWT_SECRET", "voidaccess-cli-local-no-auth")
153
+ os.environ.setdefault("DISABLE_RATE_LIMIT", "true")
154
+ os.environ.setdefault("PLAYWRIGHT_ENABLED", "false")
155
+
156
+ def _set_env_if_present(key: str, value: Any, *, clear_if_empty: bool = False) -> None:
157
+ text = str(value).strip() if value is not None else ""
158
+ if text:
159
+ os.environ[key] = text
160
+ elif clear_if_empty:
161
+ os.environ.pop(key, None)
162
+
163
+ # Tor proxy
164
+ _set_env_if_present("TOR_PROXY_HOST", cfg.get("tor", {}).get("host", "127.0.0.1"))
165
+ _set_env_if_present("TOR_PROXY_PORT", cfg.get("tor", {}).get("port", 9050))
166
+
167
+ # LLM provider key (push under its canonical env var name)
168
+ provider = cfg.get("llm", {}).get("provider", "")
169
+ api_key = cfg.get("llm", {}).get("api_key", "")
170
+ env_name = PROVIDER_ENV.get(provider)
171
+ if env_name:
172
+ _set_env_if_present(env_name, api_key, clear_if_empty=True)
173
+
174
+ # Default model
175
+ default_model = cfg.get("llm", {}).get("model", "")
176
+ _set_env_if_present("DEFAULT_MODEL", default_model)
177
+
178
+ # Enrichment keys
179
+ for k, v in (cfg.get("enrichment_keys") or {}).items():
180
+ _set_env_if_present(k, v, clear_if_empty=True)
cli/display.py ADDED
@@ -0,0 +1,212 @@
1
+ """
2
+ cli/display.py — Rich live display for investigations.
3
+
4
+ Three-zone layout:
5
+ title bar — query + elapsed timer
6
+ step table — pipeline stages with status icons
7
+ activity line — current URL / sub-task detail
8
+
9
+ Status icons:
10
+ pending · gray dot
11
+ active ⠹ spinner (cycles in tick())
12
+ ok ✓ green
13
+ fail ✗ red
14
+ skip ↷ yellow
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import time
20
+ from dataclasses import dataclass, field
21
+ from typing import Optional
22
+
23
+ from rich.console import Console, Group
24
+ from rich.live import Live
25
+ from rich.panel import Panel
26
+ from rich.table import Table
27
+ from rich.text import Text
28
+
29
+
30
+ SPINNER_FRAMES = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"]
31
+
32
+ STATUS_GLYPH = {
33
+ "pending": ("·", "grey50"),
34
+ "active": ("⠹", "cyan"),
35
+ "ok": ("✓", "green"),
36
+ "fail": ("✗", "red"),
37
+ "skip": ("↷", "yellow"),
38
+ }
39
+
40
+
41
+ @dataclass
42
+ class StepRow:
43
+ name: str
44
+ status: str = "pending"
45
+ detail: str = ""
46
+ substeps: list[tuple[str, str]] = field(default_factory=list) # (label, status)
47
+
48
+
49
+ class InvestigationDisplay:
50
+ """Live terminal display driven by .update_step() and .update_current_url()."""
51
+
52
+ def __init__(self, console: Optional[Console] = None, quiet: bool = False):
53
+ self.console = console or Console()
54
+ self.quiet = quiet
55
+ self._live: Optional[Live] = None
56
+ self._query: str = ""
57
+ self._start_ts: float = time.monotonic()
58
+ self._steps: list[StepRow] = []
59
+ self._current_url: str = ""
60
+ self._spinner_index = 0
61
+ self._final_summary: Optional[dict] = None
62
+ self._error: Optional[str] = None
63
+
64
+ # -- lifecycle ----------------------------------------------------------
65
+
66
+ def start(self, query: str, steps: Optional[list[str]] = None) -> None:
67
+ self._query = query
68
+ self._start_ts = time.monotonic()
69
+ names = steps or [
70
+ "Refining query",
71
+ "Searching dark web",
72
+ "Filtering results",
73
+ "Scraping pages",
74
+ "Extracting entities",
75
+ "Enriching intelligence",
76
+ "Building graph",
77
+ "Generating summary",
78
+ "Finalizing results",
79
+ ]
80
+ self._steps = [StepRow(name=n) for n in names]
81
+ if self.quiet:
82
+ self.console.print(f"[bold]VoidAccess[/bold] — {query}")
83
+ return
84
+ self._live = Live(
85
+ self._render(),
86
+ console=self.console,
87
+ refresh_per_second=8,
88
+ transient=False,
89
+ )
90
+ self._live.start()
91
+
92
+ def stop(self) -> None:
93
+ if self._live is not None:
94
+ self._live.update(self._render(), refresh=True)
95
+ self._live.stop()
96
+ self._live = None
97
+
98
+ # -- updates ------------------------------------------------------------
99
+
100
+ def update_step(self, step_name: str, status: str, detail: str = "") -> None:
101
+ row = self._find_step(step_name)
102
+ if row is None:
103
+ row = StepRow(name=step_name)
104
+ self._steps.append(row)
105
+ row.status = status
106
+ if detail:
107
+ row.detail = detail
108
+ self._refresh()
109
+ if self.quiet:
110
+ icon = STATUS_GLYPH.get(status, ("·", "grey50"))[0]
111
+ d = f" — {detail}" if detail else ""
112
+ self.console.print(f" {icon} {step_name}{d}")
113
+
114
+ def update_substep(self, step_name: str, label: str, status: str) -> None:
115
+ row = self._find_step(step_name)
116
+ if row is None:
117
+ return
118
+ for idx, (existing, _) in enumerate(row.substeps):
119
+ if existing == label:
120
+ row.substeps[idx] = (label, status)
121
+ self._refresh()
122
+ return
123
+ row.substeps.append((label, status))
124
+ self._refresh()
125
+
126
+ def update_current_url(self, url: str) -> None:
127
+ self._current_url = url
128
+ self._refresh()
129
+
130
+ def complete(self, summary: dict) -> None:
131
+ self._final_summary = summary
132
+ self.stop()
133
+ self._print_completion(summary)
134
+
135
+ def error(self, msg: str) -> None:
136
+ self._error = msg
137
+ self.stop()
138
+ self.console.print(f"[bold red]Investigation failed:[/bold red] {msg}")
139
+
140
+ # -- render -------------------------------------------------------------
141
+
142
+ def _refresh(self) -> None:
143
+ self._spinner_index = (self._spinner_index + 1) % len(SPINNER_FRAMES)
144
+ if self._live is not None:
145
+ self._live.update(self._render(), refresh=True)
146
+
147
+ def _find_step(self, name: str) -> Optional[StepRow]:
148
+ for row in self._steps:
149
+ if row.name == name:
150
+ return row
151
+ return None
152
+
153
+ def _render(self) -> Panel:
154
+ elapsed = time.monotonic() - self._start_ts
155
+ title = Text()
156
+ title.append("VoidAccess", style="bold magenta")
157
+ title.append(f" — \"{self._query}\"", style="bold white")
158
+ title.append(f" Elapsed: {self._fmt_elapsed(elapsed)}", style="grey50")
159
+
160
+ table = Table.grid(padding=(0, 1))
161
+ table.add_column(width=2)
162
+ table.add_column(no_wrap=False)
163
+ for row in self._steps:
164
+ glyph, colour = STATUS_GLYPH.get(row.status, ("·", "grey50"))
165
+ if row.status == "active":
166
+ glyph = SPINNER_FRAMES[self._spinner_index]
167
+ line = Text()
168
+ line.append(f"{glyph} ", style=colour)
169
+ line.append(row.name, style="white" if row.status != "pending" else "grey50")
170
+ if row.detail:
171
+ line.append(f" ({row.detail})", style="grey62")
172
+ table.add_row("", line)
173
+ for sub_label, sub_status in row.substeps:
174
+ sg, sc = STATUS_GLYPH.get(sub_status, ("·", "grey50"))
175
+ sub = Text(f" {sg} {sub_label}", style=sc)
176
+ table.add_row("", sub)
177
+
178
+ activity = Text()
179
+ if self._current_url:
180
+ activity.append("Fetching: ", style="bold")
181
+ activity.append(self._current_url, style="cyan")
182
+ else:
183
+ activity.append("", style="grey50")
184
+
185
+ body = Group(title, Text(""), table, Text(""), activity)
186
+ return Panel(body, border_style="magenta", padding=(1, 2))
187
+
188
+ @staticmethod
189
+ def _fmt_elapsed(secs: float) -> str:
190
+ m, s = divmod(int(secs), 60)
191
+ return f"{m}m {s:02d}s"
192
+
193
+ def _print_completion(self, summary: dict) -> None:
194
+ table = Table.grid(padding=(0, 2))
195
+ table.add_column(style="bold")
196
+ table.add_column()
197
+ table.add_row("Entities", str(summary.get("entity_count", "—")))
198
+ table.add_row("Pages", str(summary.get("page_count", "—")))
199
+ if "c2_ips" in summary:
200
+ table.add_row("C2 IPs", f"{summary['c2_ips']} confirmed")
201
+ table.add_row("Sources", str(summary.get("sources_used", "—")))
202
+ if summary.get("report_path"):
203
+ table.add_row("Report", str(summary["report_path"]))
204
+ if summary.get("data_path"):
205
+ table.add_row("Data", str(summary["data_path"]))
206
+
207
+ panel = Panel(
208
+ Group(Text("✓ Investigation complete", style="bold green"), Text(""), table),
209
+ border_style="green",
210
+ padding=(1, 2),
211
+ )
212
+ self.console.print(panel)
cli/main.py ADDED
@@ -0,0 +1,154 @@
1
+ """
2
+ cli/main.py — typer entry point exposed as the `voidaccess` script.
3
+
4
+ Defined as the [project.scripts] target in pyproject.toml:
5
+ voidaccess = "cli.main:app"
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import json
11
+ import os
12
+ import sys
13
+
14
+ # Force UTF-8 on Windows consoles so rich glyphs render reliably
15
+ if sys.platform == "win32":
16
+ os.environ.setdefault("PYTHONIOENCODING", "utf-8")
17
+ try:
18
+ sys.stdout.reconfigure(encoding="utf-8") # type: ignore[attr-defined]
19
+ sys.stderr.reconfigure(encoding="utf-8") # type: ignore[attr-defined]
20
+ except Exception:
21
+ pass
22
+
23
+ import typer
24
+ from rich.console import Console
25
+ from rich.table import Table
26
+
27
+ from cli import __version__
28
+ from cli import config as cli_config
29
+ from cli.commands import configure, enrich, export, investigate, show
30
+
31
+ console = Console()
32
+
33
+ app = typer.Typer(
34
+ name="voidaccess",
35
+ help="Dark web OSINT — query to intelligence report.",
36
+ no_args_is_help=True,
37
+ add_completion=False,
38
+ )
39
+
40
+ # Sub-commands
41
+ app.add_typer(configure.app, name="configure", help="Configure the CLI (LLM, keys, Tor).")
42
+ app.command("investigate", help="Run a new investigation.")(investigate.run)
43
+ app.command("show", help="Open the entity browser TUI.")(show.run)
44
+ app.command("export", help="Export an investigation to STIX/MISP/Sigma/CSV/MD/JSON.")(export.run)
45
+ app.command("enrich", help="Re-enrich a stored investigation against current feeds.")(enrich.run)
46
+
47
+
48
+ def _ensure_first_run() -> None:
49
+ """Auto-launch wizard on first invocation when no config exists."""
50
+ if cli_config.CONFIG_PATH.exists():
51
+ return
52
+ console.print(
53
+ "[bold magenta]Welcome to voidaccess.[/bold magenta] "
54
+ "Let's get you configured first."
55
+ )
56
+ # Invoke wizard via Typer
57
+ try:
58
+ configure.configure_default(ctx=typer.Context(configure.app))
59
+ except Exception:
60
+ pass
61
+
62
+
63
+ @app.command("status")
64
+ def status() -> None:
65
+ """Show current config, Tor status, and detected API keys."""
66
+ from cli.tor_detect import detect_tor
67
+ cli_config.apply_env()
68
+ cfg = cli_config.load_config()
69
+
70
+ table = Table(title="voidaccess status", show_lines=False)
71
+ table.add_column("Setting", style="bold")
72
+ table.add_column("Value")
73
+ table.add_row("Version", __version__)
74
+ table.add_row("Config path", str(cli_config.CONFIG_PATH))
75
+ table.add_row("DB path", str(cli_config.DB_PATH))
76
+ table.add_row("Output dir", str(cli_config.get_output_dir(cfg)))
77
+
78
+ llm = cfg.get("llm", {})
79
+ table.add_row("LLM provider", llm.get("provider") or "—")
80
+ table.add_row("LLM model", llm.get("model") or "—")
81
+ table.add_row("LLM key", "[green]set[/green]" if llm.get("api_key") else "[red]missing[/red]")
82
+
83
+ table.add_row("Tor host", cfg.get("tor", {}).get("host", "—"))
84
+ table.add_row("Tor port", str(cfg.get("tor", {}).get("port", "—")))
85
+
86
+ tor_status = detect_tor()
87
+ if tor_status.proxy_url:
88
+ table.add_row("Tor reachable", f"[green]{tor_status.source}[/green] at {tor_status.proxy_url}")
89
+ else:
90
+ table.add_row("Tor reachable", "[red]no proxy responded[/red]")
91
+
92
+ try:
93
+ import spacy
94
+
95
+ spacy.load("en_core_web_sm")
96
+ spacy_status = "ready"
97
+ except Exception:
98
+ spacy_status = "not installed"
99
+ table.add_row("spaCy NER", spacy_status)
100
+
101
+ keys = cfg.get("enrichment_keys", {})
102
+ set_count = sum(1 for v in keys.values() if v)
103
+ table.add_row("Enrichment keys", f"{set_count}/{len(keys)} set")
104
+
105
+ console.print(table)
106
+
107
+
108
+ @app.command("list")
109
+ def list_investigations(
110
+ limit: int = typer.Option(20, "--limit", "-n", help="Number of rows"),
111
+ as_json: bool = typer.Option(False, "--json", help="JSON output"),
112
+ ) -> None:
113
+ """List saved investigations."""
114
+ cli_config.apply_env()
115
+ from cli.adapters import sqlite as sqlite_adapter
116
+ sqlite_adapter.init_db()
117
+ rows = sqlite_adapter.list_investigations(limit=limit)
118
+ if as_json:
119
+ console.print_json(json.dumps(rows, default=str))
120
+ return
121
+ if not rows:
122
+ console.print("[grey50]No saved investigations.[/grey50]")
123
+ return
124
+ table = Table(title="Saved investigations")
125
+ table.add_column("Id", style="cyan")
126
+ table.add_column("Query")
127
+ table.add_column("Status")
128
+ table.add_column("Entities", justify="right")
129
+ table.add_column("Created")
130
+ for r in rows:
131
+ table.add_row(
132
+ r["id"][:8],
133
+ (r["query"] or "")[:60],
134
+ r["status"] or "",
135
+ str(r["entity_count"]),
136
+ (r["created_at"] or "")[:19],
137
+ )
138
+ console.print(table)
139
+
140
+
141
+ @app.command("version")
142
+ def version() -> None:
143
+ """Print the installed version."""
144
+ console.print(f"voidaccess {__version__}")
145
+
146
+
147
+ @app.callback()
148
+ def _global(ctx: typer.Context) -> None:
149
+ """Set env vars before sub-commands import voidaccess modules."""
150
+ cli_config.apply_env()
151
+
152
+
153
+ if __name__ == "__main__":
154
+ app()
cli/tor_detect.py ADDED
@@ -0,0 +1,71 @@
1
+ """
2
+ cli/tor_detect.py — Locate a running Tor SOCKS5 proxy.
3
+
4
+ Probe order:
5
+ 1. 127.0.0.1:9050 (system tor)
6
+ 2. 127.0.0.1:9150 (Tor Browser bundle)
7
+ 3. config override (cli/config.py)
8
+
9
+ Tests by performing a SOCKS5 handshake — no full HTTP round-trip required,
10
+ so detection stays fast even when the wider Tor network is slow.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import socket
16
+ import struct
17
+ from dataclasses import dataclass
18
+ from typing import Optional
19
+
20
+ from cli.config import load_config
21
+
22
+
23
+ @dataclass
24
+ class TorStatus:
25
+ proxy_url: Optional[str]
26
+ source: str # "system_tor" | "tor_browser" | "config" | "none"
27
+ host: Optional[str] = None
28
+ port: Optional[int] = None
29
+
30
+
31
+ def _socks5_handshake(host: str, port: int, timeout: float = 2.0) -> bool:
32
+ """Open TCP, send a SOCKS5 NO-AUTH greeting, expect 0x05 0x00 back."""
33
+ try:
34
+ with socket.create_connection((host, port), timeout=timeout) as sock:
35
+ sock.sendall(b"\x05\x01\x00")
36
+ resp = sock.recv(2)
37
+ return len(resp) == 2 and resp[0] == 0x05 and resp[1] == 0x00
38
+ except Exception:
39
+ return False
40
+
41
+
42
+ def detect_tor() -> TorStatus:
43
+ cfg = load_config()
44
+ cfg_host = cfg.get("tor", {}).get("host", "127.0.0.1")
45
+ cfg_port = int(cfg.get("tor", {}).get("port", 9050))
46
+
47
+ probes = [
48
+ ("system_tor", "127.0.0.1", 9050),
49
+ ("tor_browser", "127.0.0.1", 9150),
50
+ ]
51
+ if (cfg_host, cfg_port) not in {(h, p) for _, h, p in probes}:
52
+ probes.append(("config", cfg_host, cfg_port))
53
+
54
+ for source, host, port in probes:
55
+ if _socks5_handshake(host, port):
56
+ return TorStatus(
57
+ proxy_url=f"socks5://{host}:{port}",
58
+ source=source,
59
+ host=host,
60
+ port=port,
61
+ )
62
+
63
+ return TorStatus(proxy_url=None, source="none")
64
+
65
+
66
+ def tor_unavailable_message() -> str:
67
+ return (
68
+ "Tor not found. Install: https://torproject.org\n"
69
+ "Or run Tor Browser before investigating.\n"
70
+ "Use --no-tor to skip Tor entirely (clearnet sources only)."
71
+ )