voidaccess 1.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. analysis/__init__.py +49 -0
  2. analysis/opsec.py +454 -0
  3. analysis/patterns.py +202 -0
  4. analysis/temporal.py +201 -0
  5. api/__init__.py +1 -0
  6. api/auth.py +163 -0
  7. api/main.py +509 -0
  8. api/routes/__init__.py +1 -0
  9. api/routes/admin.py +214 -0
  10. api/routes/auth.py +157 -0
  11. api/routes/entities.py +871 -0
  12. api/routes/export.py +359 -0
  13. api/routes/investigations.py +2567 -0
  14. api/routes/monitors.py +405 -0
  15. api/routes/search.py +157 -0
  16. api/routes/settings.py +851 -0
  17. auth/__init__.py +1 -0
  18. auth/token_blacklist.py +108 -0
  19. cli/__init__.py +3 -0
  20. cli/adapters/__init__.py +1 -0
  21. cli/adapters/sqlite.py +273 -0
  22. cli/browser.py +376 -0
  23. cli/commands/__init__.py +1 -0
  24. cli/commands/configure.py +185 -0
  25. cli/commands/enrich.py +154 -0
  26. cli/commands/export.py +158 -0
  27. cli/commands/investigate.py +601 -0
  28. cli/commands/show.py +87 -0
  29. cli/config.py +180 -0
  30. cli/display.py +212 -0
  31. cli/main.py +154 -0
  32. cli/tor_detect.py +71 -0
  33. config.py +180 -0
  34. crawler/__init__.py +28 -0
  35. crawler/dedup.py +97 -0
  36. crawler/frontier.py +115 -0
  37. crawler/spider.py +462 -0
  38. crawler/utils.py +122 -0
  39. db/__init__.py +47 -0
  40. db/migrations/__init__.py +0 -0
  41. db/migrations/env.py +80 -0
  42. db/migrations/versions/0001_initial_schema.py +270 -0
  43. db/migrations/versions/0002_add_investigation_status_column.py +27 -0
  44. db/migrations/versions/0002_add_missing_tables.py +33 -0
  45. db/migrations/versions/0003_add_canonical_value_and_entity_links.py +61 -0
  46. db/migrations/versions/0004_add_page_posted_at.py +41 -0
  47. db/migrations/versions/0005_add_extraction_method.py +32 -0
  48. db/migrations/versions/0006_add_monitor_alerts.py +26 -0
  49. db/migrations/versions/0007_add_actor_style_profiles.py +23 -0
  50. db/migrations/versions/0008_add_users_table.py +47 -0
  51. db/migrations/versions/0009_add_investigation_id_to_relationships.py +29 -0
  52. db/migrations/versions/0010_add_composite_index_entity_relationships.py +22 -0
  53. db/migrations/versions/0011_add_page_extraction_cache.py +52 -0
  54. db/migrations/versions/0013_add_graph_status.py +31 -0
  55. db/migrations/versions/0015_add_progress_fields.py +41 -0
  56. db/migrations/versions/0016_backfill_graph_status.py +33 -0
  57. db/migrations/versions/0017_add_user_api_keys.py +44 -0
  58. db/migrations/versions/0018_add_user_id_to_investigations.py +33 -0
  59. db/migrations/versions/0019_add_content_safety_log.py +46 -0
  60. db/migrations/versions/0020_add_entity_source_tracking.py +50 -0
  61. db/models.py +618 -0
  62. db/queries.py +841 -0
  63. db/session.py +270 -0
  64. export/__init__.py +34 -0
  65. export/misp.py +257 -0
  66. export/sigma.py +342 -0
  67. export/stix.py +418 -0
  68. extractor/__init__.py +21 -0
  69. extractor/llm_extract.py +372 -0
  70. extractor/ner.py +512 -0
  71. extractor/normalizer.py +638 -0
  72. extractor/pipeline.py +401 -0
  73. extractor/regex_patterns.py +325 -0
  74. fingerprint/__init__.py +33 -0
  75. fingerprint/profiler.py +240 -0
  76. fingerprint/stylometry.py +249 -0
  77. graph/__init__.py +73 -0
  78. graph/builder.py +894 -0
  79. graph/export.py +225 -0
  80. graph/model.py +83 -0
  81. graph/queries.py +297 -0
  82. graph/visualize.py +178 -0
  83. i18n/__init__.py +24 -0
  84. i18n/detect.py +76 -0
  85. i18n/query_expand.py +72 -0
  86. i18n/translate.py +210 -0
  87. monitor/__init__.py +27 -0
  88. monitor/_db.py +74 -0
  89. monitor/alerts.py +345 -0
  90. monitor/config.py +118 -0
  91. monitor/diff.py +75 -0
  92. monitor/jobs.py +247 -0
  93. monitor/scheduler.py +184 -0
  94. scraper/__init__.py +0 -0
  95. scraper/scrape.py +857 -0
  96. scraper/scrape_js.py +272 -0
  97. search/__init__.py +318 -0
  98. search/circuit_breaker.py +240 -0
  99. search/search.py +334 -0
  100. sources/__init__.py +96 -0
  101. sources/blockchain.py +444 -0
  102. sources/cache.py +93 -0
  103. sources/cisa.py +108 -0
  104. sources/dns_enrichment.py +557 -0
  105. sources/domain_reputation.py +643 -0
  106. sources/email_reputation.py +635 -0
  107. sources/engines.py +244 -0
  108. sources/enrichment.py +1244 -0
  109. sources/github_scraper.py +589 -0
  110. sources/gitlab_scraper.py +624 -0
  111. sources/hash_reputation.py +856 -0
  112. sources/historical_intel.py +253 -0
  113. sources/ip_reputation.py +521 -0
  114. sources/paste_scraper.py +484 -0
  115. sources/pastes.py +278 -0
  116. sources/rss_scraper.py +576 -0
  117. sources/seed_manager.py +373 -0
  118. sources/seeds.py +368 -0
  119. sources/shodan.py +103 -0
  120. sources/telegram.py +199 -0
  121. sources/virustotal.py +113 -0
  122. utils/__init__.py +0 -0
  123. utils/async_utils.py +89 -0
  124. utils/content_safety.py +193 -0
  125. utils/defang.py +94 -0
  126. utils/encryption.py +34 -0
  127. utils/ioc_freshness.py +124 -0
  128. utils/user_keys.py +33 -0
  129. vector/__init__.py +39 -0
  130. vector/embedder.py +100 -0
  131. vector/model_singleton.py +49 -0
  132. vector/search.py +87 -0
  133. vector/store.py +514 -0
  134. voidaccess/__init__.py +0 -0
  135. voidaccess/llm.py +717 -0
  136. voidaccess/llm_utils.py +696 -0
  137. voidaccess-1.3.0.dist-info/METADATA +395 -0
  138. voidaccess-1.3.0.dist-info/RECORD +142 -0
  139. voidaccess-1.3.0.dist-info/WHEEL +5 -0
  140. voidaccess-1.3.0.dist-info/entry_points.txt +2 -0
  141. voidaccess-1.3.0.dist-info/licenses/LICENSE +21 -0
  142. voidaccess-1.3.0.dist-info/top_level.txt +19 -0
cli/browser.py ADDED
@@ -0,0 +1,376 @@
1
+ """
2
+ cli/browser.py — Textual TUI for browsing an investigation's entities.
3
+
4
+ Two-pane layout:
5
+ Left (30%) — entity list, type-filter, badges
6
+ Right (70%) — entity detail + top connections
7
+
8
+ Keys:
9
+ / search f filter by type
10
+ p shortest path c clusters view
11
+ e export selected q quit
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ from collections import Counter, defaultdict
17
+ from typing import Any, Optional
18
+
19
+ from textual.app import App, ComposeResult
20
+ from textual.binding import Binding
21
+ from textual.containers import Horizontal, Vertical
22
+ from textual.reactive import reactive
23
+ from textual.screen import ModalScreen
24
+ from textual.widgets import (
25
+ DataTable,
26
+ Footer,
27
+ Header,
28
+ Input,
29
+ Label,
30
+ Static,
31
+ )
32
+
33
+
34
+ TYPE_SHORT = {
35
+ "ip_address": ("I", "cyan"),
36
+ "domain": ("D", "green"),
37
+ "onion_url": ("O", "magenta"),
38
+ "email": ("E", "yellow"),
39
+ "file_hash_md5": ("H", "blue"),
40
+ "file_hash_sha1": ("H", "blue"),
41
+ "file_hash_sha256": ("H", "blue"),
42
+ "crypto_wallet": ("W", "yellow"),
43
+ "ransomware_group": ("R", "red"),
44
+ "malware": ("M", "red"),
45
+ "cve": ("C", "red"),
46
+ "phone": ("P", "grey50"),
47
+ "handle": ("@", "yellow"),
48
+ "pgp_key": ("K", "grey50"),
49
+ }
50
+
51
+
52
+ def _badges_for_entity(entity: dict) -> list[str]:
53
+ tags = (entity.get("corroborating_sources") or "").lower()
54
+ badges: list[str] = []
55
+ if "c2" in tags:
56
+ badges.append("[C2]")
57
+ if "breached" in tags or "hibp" in tags:
58
+ badges.append("[Breached]")
59
+ if "malicious" in tags or "abuseipdb" in tags:
60
+ badges.append("[Malicious]")
61
+ if "fresh" in tags:
62
+ badges.append("[Fresh]")
63
+ return badges
64
+
65
+
66
+ class EntityBrowserApp(App):
67
+ """Textual app over an investigation export dict."""
68
+
69
+ CSS = """
70
+ Screen { layout: horizontal; }
71
+ #left { width: 35%; border-right: solid $accent; }
72
+ #right { width: 65%; padding: 1 2; }
73
+ #detail { height: 100%; }
74
+ DataTable { height: 1fr; }
75
+ """
76
+
77
+ BINDINGS = [
78
+ Binding("q", "quit", "Quit"),
79
+ Binding("slash", "focus_search", "Search"),
80
+ Binding("f", "cycle_filter", "Filter"),
81
+ Binding("c", "clusters_view", "Clusters"),
82
+ Binding("p", "path_view", "Path"),
83
+ Binding("e", "export_selected", "Export"),
84
+ Binding("r", "refresh_table", "Refresh"),
85
+ ]
86
+
87
+ search_query: reactive[str] = reactive("")
88
+ type_filter: reactive[Optional[str]] = reactive(None)
89
+
90
+ def __init__(self, data: dict[str, Any]):
91
+ super().__init__()
92
+ self.data = data
93
+ inv = data.get("investigation") or {}
94
+ self._title_text = inv.get("query") or data.get("query") or "investigation"
95
+ self.entities: list[dict] = list(data.get("entities", []))
96
+ self.relationships: list[dict] = list(data.get("relationships", []))
97
+ # Connection counts
98
+ counts: Counter[str] = Counter()
99
+ for r in self.relationships:
100
+ counts[r["entity_a_id"]] += 1
101
+ counts[r["entity_b_id"]] += 1
102
+ self.connection_count = counts
103
+ self.entities.sort(
104
+ key=lambda e: (-counts.get(e["id"], 0), -(e.get("confidence") or 0))
105
+ )
106
+
107
+ def compose(self) -> ComposeResult:
108
+ yield Header(show_clock=False)
109
+ with Horizontal():
110
+ with Vertical(id="left"):
111
+ yield Input(placeholder="search… (press / to focus)", id="search")
112
+ yield Label(f"[{self._title_text}]", id="title")
113
+ yield DataTable(id="entity_table", zebra_stripes=True, cursor_type="row")
114
+ with Vertical(id="right"):
115
+ yield Static("Select an entity on the left.", id="detail")
116
+ yield Footer()
117
+
118
+ def on_mount(self) -> None:
119
+ self.title = f"voidaccess — {self._title_text}"
120
+ table: DataTable = self.query_one("#entity_table", DataTable)
121
+ table.add_columns("T", "Value", "Conn", "Badges")
122
+ self._populate_table()
123
+
124
+ # -- helpers -----------------------------------------------------------
125
+
126
+ def _filtered(self) -> list[dict]:
127
+ out = self.entities
128
+ if self.type_filter:
129
+ out = [e for e in out if e["entity_type"] == self.type_filter]
130
+ if self.search_query:
131
+ q = self.search_query.lower()
132
+ out = [
133
+ e for e in out
134
+ if q in (e.get("value") or "").lower()
135
+ or q in (e.get("canonical_value") or "").lower()
136
+ or q in (e.get("corroborating_sources") or "").lower()
137
+ ]
138
+ return out
139
+
140
+ def _populate_table(self) -> None:
141
+ table: DataTable = self.query_one("#entity_table", DataTable)
142
+ table.clear()
143
+ for e in self._filtered():
144
+ glyph, _colour = TYPE_SHORT.get(e["entity_type"], ("?", "white"))
145
+ val = (e.get("canonical_value") or e.get("value") or "")[:42]
146
+ conn = self.connection_count.get(e["id"], 0)
147
+ badges = " ".join(_badges_for_entity(e))
148
+ table.add_row(glyph, val, str(conn), badges, key=e["id"])
149
+
150
+ # -- input handlers ----------------------------------------------------
151
+
152
+ def on_input_changed(self, event: Input.Changed) -> None:
153
+ if event.input.id == "search":
154
+ self.search_query = event.value
155
+ self._populate_table()
156
+
157
+ def on_data_table_row_highlighted(self, event: DataTable.RowHighlighted) -> None:
158
+ if event.row_key is None:
159
+ return
160
+ eid = str(event.row_key.value) if hasattr(event.row_key, "value") else str(event.row_key)
161
+ entity = next((e for e in self.entities if e["id"] == eid), None)
162
+ if entity:
163
+ self._render_detail(entity)
164
+
165
+ # -- actions -----------------------------------------------------------
166
+
167
+ def action_focus_search(self) -> None:
168
+ self.query_one("#search", Input).focus()
169
+
170
+ def action_cycle_filter(self) -> None:
171
+ types = sorted({e["entity_type"] for e in self.entities})
172
+ if not types:
173
+ return
174
+ if self.type_filter is None:
175
+ self.type_filter = types[0]
176
+ else:
177
+ try:
178
+ idx = types.index(self.type_filter)
179
+ self.type_filter = types[idx + 1] if idx + 1 < len(types) else None
180
+ except ValueError:
181
+ self.type_filter = None
182
+ self._populate_table()
183
+
184
+ def action_refresh_table(self) -> None:
185
+ self._populate_table()
186
+
187
+ def action_clusters_view(self) -> None:
188
+ self.push_screen(ClustersScreen(self))
189
+
190
+ def action_path_view(self) -> None:
191
+ self.push_screen(PathScreen(self))
192
+
193
+ def action_export_selected(self) -> None:
194
+ table: DataTable = self.query_one("#entity_table", DataTable)
195
+ row = table.cursor_row
196
+ rows = list(self._filtered())
197
+ if row < 0 or row >= len(rows):
198
+ return
199
+ entity = rows[row]
200
+ from pathlib import Path
201
+ out = Path.home() / ".voidaccess" / "results" / f"entity-{entity['id']}.json"
202
+ out.parent.mkdir(parents=True, exist_ok=True)
203
+ import json
204
+ out.write_text(json.dumps(entity, indent=2, default=str), encoding="utf-8")
205
+ self.notify(f"Exported to {out}")
206
+
207
+ # -- detail pane -------------------------------------------------------
208
+
209
+ def _render_detail(self, entity: dict) -> None:
210
+ lines: list[str] = []
211
+ val = entity.get("canonical_value") or entity.get("value") or ""
212
+ lines.append(f"[b]Entity:[/b] {val}")
213
+ lines.append(
214
+ f"Type: {entity['entity_type']} | Confidence: "
215
+ f"{(entity.get('confidence') or 0):.2f}"
216
+ )
217
+ tags = entity.get("corroborating_sources") or ""
218
+ if tags:
219
+ lines.append(f"Tags: {tags}")
220
+ lines.append("")
221
+ if entity.get("first_seen") or entity.get("last_seen"):
222
+ lines.append(
223
+ f"First seen: {entity.get('first_seen') or '—'} "
224
+ f"Last seen: {entity.get('last_seen') or '—'}"
225
+ )
226
+ if entity.get("extraction_method"):
227
+ lines.append(f"Extraction: {entity['extraction_method']}")
228
+ lines.append("")
229
+ ctx = (entity.get("context_snippet") or "").strip()
230
+ if ctx:
231
+ lines.append("[b]Context:[/b]")
232
+ lines.append(ctx[:1500])
233
+ lines.append("")
234
+
235
+ neighbours = self._neighbours_of(entity["id"])
236
+ if neighbours:
237
+ lines.append("[b]Connected to (top 10):[/b]")
238
+ for other_id, edge_type, conf in neighbours[:10]:
239
+ other = next((e for e in self.entities if e["id"] == other_id), None)
240
+ if other:
241
+ other_val = (other.get("canonical_value") or other.get("value") or "")[:48]
242
+ lines.append(f" → {other_val:50} {edge_type:18} {conf:.2f}")
243
+ detail: Static = self.query_one("#detail", Static)
244
+ detail.update("\n".join(lines))
245
+
246
+ def _neighbours_of(self, entity_id: str) -> list[tuple[str, str, float]]:
247
+ out: list[tuple[str, str, float]] = []
248
+ for r in self.relationships:
249
+ if r["entity_a_id"] == entity_id:
250
+ out.append((r["entity_b_id"], r["relationship_type"], r.get("confidence") or 0.0))
251
+ elif r["entity_b_id"] == entity_id:
252
+ out.append((r["entity_a_id"], r["relationship_type"], r.get("confidence") or 0.0))
253
+ out.sort(key=lambda t: -t[2])
254
+ return out
255
+
256
+
257
+ # ---------------------------------------------------------------------------
258
+ # Cluster overlay
259
+ # ---------------------------------------------------------------------------
260
+
261
+
262
+ class ClustersScreen(ModalScreen):
263
+ BINDINGS = [Binding("escape", "dismiss", "Close"), Binding("q", "dismiss", "Close")]
264
+
265
+ def __init__(self, parent_app: EntityBrowserApp):
266
+ super().__init__()
267
+ self._parent_app = parent_app
268
+
269
+ def compose(self) -> ComposeResult:
270
+ yield Vertical(
271
+ Label("[b]Infrastructure clusters[/b] (esc to close)"),
272
+ Static(self._render_clusters(), id="clusters_body"),
273
+ )
274
+
275
+ def _render_clusters(self) -> str:
276
+ # Greedy connected-component clustering via the parent's edges
277
+ adj: dict[str, set[str]] = defaultdict(set)
278
+ for r in self._parent_app.relationships:
279
+ adj[r["entity_a_id"]].add(r["entity_b_id"])
280
+ adj[r["entity_b_id"]].add(r["entity_a_id"])
281
+
282
+ seen: set[str] = set()
283
+ clusters: list[list[str]] = []
284
+ for eid in adj:
285
+ if eid in seen:
286
+ continue
287
+ stack = [eid]
288
+ comp: list[str] = []
289
+ while stack:
290
+ node = stack.pop()
291
+ if node in seen:
292
+ continue
293
+ seen.add(node)
294
+ comp.append(node)
295
+ stack.extend(adj.get(node, ()))
296
+ clusters.append(comp)
297
+
298
+ clusters.sort(key=len, reverse=True)
299
+ entity_by_id = {e["id"]: e for e in self._parent_app.entities}
300
+
301
+ lines = []
302
+ for idx, comp in enumerate(clusters[:10], start=1):
303
+ hub_id = max(comp, key=lambda x: len(adj.get(x, ())))
304
+ hub = entity_by_id.get(hub_id, {})
305
+ hub_val = hub.get("canonical_value") or hub.get("value") or hub_id[:8]
306
+ type_counts: Counter[str] = Counter()
307
+ for nid in comp:
308
+ ent = entity_by_id.get(nid)
309
+ if ent:
310
+ type_counts[ent["entity_type"]] += 1
311
+ lines.append(
312
+ f"Cluster {chr(64 + idx)}: {hub_val} (hub, {len(adj.get(hub_id, ()))} conn)"
313
+ )
314
+ for etype, count in type_counts.most_common():
315
+ lines.append(f" └── {count} {etype}")
316
+ lines.append("")
317
+ return "\n".join(lines) or "No clusters detected."
318
+
319
+
320
+ # ---------------------------------------------------------------------------
321
+ # Path finder overlay
322
+ # ---------------------------------------------------------------------------
323
+
324
+
325
+ class PathScreen(ModalScreen):
326
+ BINDINGS = [Binding("escape", "dismiss", "Close")]
327
+
328
+ def __init__(self, parent_app: EntityBrowserApp):
329
+ super().__init__()
330
+ self._parent_app = parent_app
331
+
332
+ def compose(self) -> ComposeResult:
333
+ yield Vertical(
334
+ Label("[b]Shortest path between two entities[/b]"),
335
+ Input(placeholder="first entity value", id="path_a"),
336
+ Input(placeholder="second entity value", id="path_b"),
337
+ Static("", id="path_result"),
338
+ )
339
+
340
+ def on_input_submitted(self, event: Input.Submitted) -> None:
341
+ a = self.query_one("#path_a", Input).value.strip().lower()
342
+ b = self.query_one("#path_b", Input).value.strip().lower()
343
+ if not a or not b:
344
+ return
345
+ result = self._find_path(a, b)
346
+ self.query_one("#path_result", Static).update(result)
347
+
348
+ def _find_path(self, a_val: str, b_val: str) -> str:
349
+ ents = self._parent_app.entities
350
+ a_ent = next((e for e in ents if (e.get("canonical_value") or e.get("value") or "").lower() == a_val), None)
351
+ b_ent = next((e for e in ents if (e.get("canonical_value") or e.get("value") or "").lower() == b_val), None)
352
+ if a_ent is None or b_ent is None:
353
+ return "One or both entities not found in this investigation."
354
+
355
+ adj: dict[str, set[str]] = defaultdict(set)
356
+ for r in self._parent_app.relationships:
357
+ adj[r["entity_a_id"]].add(r["entity_b_id"])
358
+ adj[r["entity_b_id"]].add(r["entity_a_id"])
359
+
360
+ # BFS
361
+ queue = [(a_ent["id"], [a_ent["id"]])]
362
+ visited = {a_ent["id"]}
363
+ while queue:
364
+ node, path = queue.pop(0)
365
+ if node == b_ent["id"]:
366
+ ents_by_id = {e["id"]: e for e in ents}
367
+ arrow = " → ".join(
368
+ (ents_by_id[n].get("canonical_value") or ents_by_id[n].get("value") or n)
369
+ for n in path
370
+ )
371
+ return f"{arrow}\n({len(path) - 1} hops)"
372
+ for nxt in adj.get(node, ()):
373
+ if nxt not in visited:
374
+ visited.add(nxt)
375
+ queue.append((nxt, path + [nxt]))
376
+ return "No path between these entities."
@@ -0,0 +1 @@
1
+ """voidaccess CLI commands."""
@@ -0,0 +1,185 @@
1
+ """
2
+ cli/commands/configure.py — first-run wizard and config sub-commands.
3
+
4
+ voidaccess configure — full wizard
5
+ voidaccess configure llm — just the LLM provider/key
6
+ voidaccess configure keys — just enrichment API keys
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from typing import Optional
12
+
13
+ import typer
14
+ from rich.console import Console
15
+ from rich.prompt import Prompt, Confirm
16
+ from rich.table import Table
17
+
18
+ from cli import config as cli_config
19
+
20
+ app = typer.Typer(help="Configure the voidaccess CLI.", no_args_is_help=False, invoke_without_command=True)
21
+ console = Console()
22
+
23
+
24
+ PROVIDERS = [
25
+ ("openrouter", "OpenRouter (free models available)"),
26
+ ("groq", "Groq (completely free)"),
27
+ ("google", "Google Gemini (free tier)"),
28
+ ("openai", "OpenAI (paid)"),
29
+ ("anthropic", "Anthropic (paid)"),
30
+ ("ollama", "Ollama (local, free)"),
31
+ ]
32
+
33
+ DEFAULT_MODELS = {
34
+ "openrouter": "openrouter/deepseek/deepseek-chat",
35
+ "groq": "groq/llama-3.3-70b-versatile",
36
+ "google": "gemini-1.5-flash",
37
+ "openai": "gpt-4o-mini",
38
+ "anthropic": "claude-haiku-4-5-20251001",
39
+ "ollama": "ollama/llama3.2",
40
+ }
41
+
42
+
43
+ def _print_provider_table() -> None:
44
+ table = Table(title="LLM provider")
45
+ table.add_column("#", style="cyan", justify="right")
46
+ table.add_column("Provider", style="bold")
47
+ table.add_column("Notes")
48
+ for idx, (key, desc) in enumerate(PROVIDERS, start=1):
49
+ suffix = " ← default" if key == "openrouter" else ""
50
+ table.add_row(str(idx), key, desc + suffix)
51
+ console.print(table)
52
+
53
+
54
+ def _test_llm_key(provider: str, api_key: str, model: str) -> bool:
55
+ """Light credential validation — instantiate the LangChain class only."""
56
+ if provider == "ollama":
57
+ return True
58
+ try:
59
+ from voidaccess.llm import get_llm
60
+ get_llm(model, api_keys={cli_config.PROVIDER_ENV.get(provider, ""): api_key})
61
+ return True
62
+ except Exception as exc:
63
+ console.print(f"[yellow]Could not validate key:[/yellow] {exc}")
64
+ return False
65
+
66
+
67
+ def _prompt_llm(cfg: dict) -> None:
68
+ _print_provider_table()
69
+ while True:
70
+ choice = Prompt.ask(
71
+ "Pick provider [1-6]",
72
+ default="1",
73
+ choices=[str(i) for i in range(1, len(PROVIDERS) + 1)],
74
+ show_choices=False,
75
+ )
76
+ provider, _ = PROVIDERS[int(choice) - 1]
77
+ break
78
+
79
+ model = Prompt.ask(
80
+ "Model identifier",
81
+ default=DEFAULT_MODELS.get(provider, ""),
82
+ )
83
+
84
+ api_key = ""
85
+ if provider != "ollama":
86
+ api_key = Prompt.ask(
87
+ f"API key for {provider}",
88
+ default=cfg["llm"].get("api_key", "") if cfg["llm"].get("provider") == provider else "",
89
+ password=True,
90
+ )
91
+
92
+ cfg["llm"]["provider"] = provider
93
+ cfg["llm"]["model"] = model
94
+ cfg["llm"]["api_key"] = api_key
95
+
96
+ if api_key and provider != "ollama":
97
+ console.print("Testing key…", style="grey50")
98
+ if _test_llm_key(provider, api_key, model):
99
+ console.print("[green]Key looks valid.[/green]")
100
+ else:
101
+ console.print("[yellow]Saved anyway — verify later with `voidaccess status`.[/yellow]")
102
+
103
+
104
+ def _prompt_enrichment(cfg: dict) -> None:
105
+ console.print("\n[bold]Enrichment API keys[/bold] (press Enter to skip any)")
106
+ for key_name in cli_config.ENRICHMENT_KEYS:
107
+ existing = cfg["enrichment_keys"].get(key_name, "")
108
+ display_default = "(saved)" if existing else "(skip)"
109
+ val = Prompt.ask(f" {key_name}", default=existing or "", show_default=False)
110
+ cfg["enrichment_keys"][key_name] = val.strip()
111
+
112
+
113
+ def _prompt_output_dir(cfg: dict) -> None:
114
+ current = cfg.get("output_dir") or str(cli_config.DEFAULT_OUTPUT_DIR)
115
+ new_dir = Prompt.ask("Output directory", default=current)
116
+ cfg["output_dir"] = new_dir
117
+
118
+
119
+ def _ensure_spacy_model() -> None:
120
+ console.print("\n → Downloading spaCy NER model...")
121
+ try:
122
+ import subprocess
123
+ import sys
124
+
125
+ result = subprocess.run(
126
+ [sys.executable, "-m", "spacy", "download", "en_core_web_sm"],
127
+ capture_output=True,
128
+ text=True,
129
+ )
130
+ if result.returncode == 0:
131
+ console.print(" ✓ spaCy model ready")
132
+ else:
133
+ console.print(
134
+ " ⚠ spaCy download failed — run manually: "
135
+ "python -m spacy download en_core_web_sm"
136
+ )
137
+ except Exception as e:
138
+ console.print(f" ⚠ spaCy: {e}")
139
+
140
+
141
+ @app.callback()
142
+ def configure_default(ctx: typer.Context) -> None:
143
+ """Run the full wizard when no sub-command is given."""
144
+ if ctx.invoked_subcommand is not None:
145
+ return
146
+ cfg = cli_config.load_config()
147
+ console.print("[bold magenta]voidaccess — initial setup[/bold magenta]\n")
148
+ _prompt_llm(cfg)
149
+ if Confirm.ask("\nAdd enrichment API keys now?", default=False):
150
+ _prompt_enrichment(cfg)
151
+ _prompt_output_dir(cfg)
152
+ cli_config.save_config(cfg)
153
+ console.print(f"\n[green]Saved to[/green] {cli_config.CONFIG_PATH}")
154
+ _ensure_spacy_model()
155
+
156
+
157
+ @app.command("llm")
158
+ def configure_llm() -> None:
159
+ """Configure just the LLM provider, model, and API key."""
160
+ cfg = cli_config.load_config()
161
+ _prompt_llm(cfg)
162
+ cli_config.save_config(cfg)
163
+ console.print(f"[green]Saved to[/green] {cli_config.CONFIG_PATH}")
164
+
165
+
166
+ @app.command("keys")
167
+ def configure_keys() -> None:
168
+ """Configure enrichment API keys."""
169
+ cfg = cli_config.load_config()
170
+ _prompt_enrichment(cfg)
171
+ cli_config.save_config(cfg)
172
+ console.print(f"[green]Saved to[/green] {cli_config.CONFIG_PATH}")
173
+
174
+
175
+ @app.command("tor")
176
+ def configure_tor(
177
+ host: str = typer.Option("127.0.0.1", help="Tor SOCKS5 host"),
178
+ port: int = typer.Option(9050, help="Tor SOCKS5 port"),
179
+ ) -> None:
180
+ """Override Tor proxy host/port."""
181
+ cfg = cli_config.load_config()
182
+ cfg["tor"]["host"] = host
183
+ cfg["tor"]["port"] = port
184
+ cli_config.save_config(cfg)
185
+ console.print(f"Tor set to {host}:{port}")