voidaccess 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- analysis/__init__.py +49 -0
- analysis/opsec.py +454 -0
- analysis/patterns.py +202 -0
- analysis/temporal.py +201 -0
- api/__init__.py +1 -0
- api/auth.py +163 -0
- api/main.py +509 -0
- api/routes/__init__.py +1 -0
- api/routes/admin.py +214 -0
- api/routes/auth.py +157 -0
- api/routes/entities.py +871 -0
- api/routes/export.py +359 -0
- api/routes/investigations.py +2567 -0
- api/routes/monitors.py +405 -0
- api/routes/search.py +157 -0
- api/routes/settings.py +851 -0
- auth/__init__.py +1 -0
- auth/token_blacklist.py +108 -0
- cli/__init__.py +3 -0
- cli/adapters/__init__.py +1 -0
- cli/adapters/sqlite.py +273 -0
- cli/browser.py +376 -0
- cli/commands/__init__.py +1 -0
- cli/commands/configure.py +185 -0
- cli/commands/enrich.py +154 -0
- cli/commands/export.py +158 -0
- cli/commands/investigate.py +601 -0
- cli/commands/show.py +87 -0
- cli/config.py +180 -0
- cli/display.py +212 -0
- cli/main.py +154 -0
- cli/tor_detect.py +71 -0
- config.py +180 -0
- crawler/__init__.py +28 -0
- crawler/dedup.py +97 -0
- crawler/frontier.py +115 -0
- crawler/spider.py +462 -0
- crawler/utils.py +122 -0
- db/__init__.py +47 -0
- db/migrations/__init__.py +0 -0
- db/migrations/env.py +80 -0
- db/migrations/versions/0001_initial_schema.py +270 -0
- db/migrations/versions/0002_add_investigation_status_column.py +27 -0
- db/migrations/versions/0002_add_missing_tables.py +33 -0
- db/migrations/versions/0003_add_canonical_value_and_entity_links.py +61 -0
- db/migrations/versions/0004_add_page_posted_at.py +41 -0
- db/migrations/versions/0005_add_extraction_method.py +32 -0
- db/migrations/versions/0006_add_monitor_alerts.py +26 -0
- db/migrations/versions/0007_add_actor_style_profiles.py +23 -0
- db/migrations/versions/0008_add_users_table.py +47 -0
- db/migrations/versions/0009_add_investigation_id_to_relationships.py +29 -0
- db/migrations/versions/0010_add_composite_index_entity_relationships.py +22 -0
- db/migrations/versions/0011_add_page_extraction_cache.py +52 -0
- db/migrations/versions/0013_add_graph_status.py +31 -0
- db/migrations/versions/0015_add_progress_fields.py +41 -0
- db/migrations/versions/0016_backfill_graph_status.py +33 -0
- db/migrations/versions/0017_add_user_api_keys.py +44 -0
- db/migrations/versions/0018_add_user_id_to_investigations.py +33 -0
- db/migrations/versions/0019_add_content_safety_log.py +46 -0
- db/migrations/versions/0020_add_entity_source_tracking.py +50 -0
- db/models.py +618 -0
- db/queries.py +841 -0
- db/session.py +270 -0
- export/__init__.py +34 -0
- export/misp.py +257 -0
- export/sigma.py +342 -0
- export/stix.py +418 -0
- extractor/__init__.py +21 -0
- extractor/llm_extract.py +372 -0
- extractor/ner.py +512 -0
- extractor/normalizer.py +638 -0
- extractor/pipeline.py +401 -0
- extractor/regex_patterns.py +325 -0
- fingerprint/__init__.py +33 -0
- fingerprint/profiler.py +240 -0
- fingerprint/stylometry.py +249 -0
- graph/__init__.py +73 -0
- graph/builder.py +894 -0
- graph/export.py +225 -0
- graph/model.py +83 -0
- graph/queries.py +297 -0
- graph/visualize.py +178 -0
- i18n/__init__.py +24 -0
- i18n/detect.py +76 -0
- i18n/query_expand.py +72 -0
- i18n/translate.py +210 -0
- monitor/__init__.py +27 -0
- monitor/_db.py +74 -0
- monitor/alerts.py +345 -0
- monitor/config.py +118 -0
- monitor/diff.py +75 -0
- monitor/jobs.py +247 -0
- monitor/scheduler.py +184 -0
- scraper/__init__.py +0 -0
- scraper/scrape.py +857 -0
- scraper/scrape_js.py +272 -0
- search/__init__.py +318 -0
- search/circuit_breaker.py +240 -0
- search/search.py +334 -0
- sources/__init__.py +96 -0
- sources/blockchain.py +444 -0
- sources/cache.py +93 -0
- sources/cisa.py +108 -0
- sources/dns_enrichment.py +557 -0
- sources/domain_reputation.py +643 -0
- sources/email_reputation.py +635 -0
- sources/engines.py +244 -0
- sources/enrichment.py +1244 -0
- sources/github_scraper.py +589 -0
- sources/gitlab_scraper.py +624 -0
- sources/hash_reputation.py +856 -0
- sources/historical_intel.py +253 -0
- sources/ip_reputation.py +521 -0
- sources/paste_scraper.py +484 -0
- sources/pastes.py +278 -0
- sources/rss_scraper.py +576 -0
- sources/seed_manager.py +373 -0
- sources/seeds.py +368 -0
- sources/shodan.py +103 -0
- sources/telegram.py +199 -0
- sources/virustotal.py +113 -0
- utils/__init__.py +0 -0
- utils/async_utils.py +89 -0
- utils/content_safety.py +193 -0
- utils/defang.py +94 -0
- utils/encryption.py +34 -0
- utils/ioc_freshness.py +124 -0
- utils/user_keys.py +33 -0
- vector/__init__.py +39 -0
- vector/embedder.py +100 -0
- vector/model_singleton.py +49 -0
- vector/search.py +87 -0
- vector/store.py +514 -0
- voidaccess/__init__.py +0 -0
- voidaccess/llm.py +717 -0
- voidaccess/llm_utils.py +696 -0
- voidaccess-1.3.0.dist-info/METADATA +395 -0
- voidaccess-1.3.0.dist-info/RECORD +142 -0
- voidaccess-1.3.0.dist-info/WHEEL +5 -0
- voidaccess-1.3.0.dist-info/entry_points.txt +2 -0
- voidaccess-1.3.0.dist-info/licenses/LICENSE +21 -0
- voidaccess-1.3.0.dist-info/top_level.txt +19 -0
cli/browser.py
ADDED
|
@@ -0,0 +1,376 @@
|
|
|
1
|
+
"""
|
|
2
|
+
cli/browser.py — Textual TUI for browsing an investigation's entities.
|
|
3
|
+
|
|
4
|
+
Two-pane layout:
|
|
5
|
+
Left (30%) — entity list, type-filter, badges
|
|
6
|
+
Right (70%) — entity detail + top connections
|
|
7
|
+
|
|
8
|
+
Keys:
|
|
9
|
+
/ search f filter by type
|
|
10
|
+
p shortest path c clusters view
|
|
11
|
+
e export selected q quit
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
from collections import Counter, defaultdict
|
|
17
|
+
from typing import Any, Optional
|
|
18
|
+
|
|
19
|
+
from textual.app import App, ComposeResult
|
|
20
|
+
from textual.binding import Binding
|
|
21
|
+
from textual.containers import Horizontal, Vertical
|
|
22
|
+
from textual.reactive import reactive
|
|
23
|
+
from textual.screen import ModalScreen
|
|
24
|
+
from textual.widgets import (
|
|
25
|
+
DataTable,
|
|
26
|
+
Footer,
|
|
27
|
+
Header,
|
|
28
|
+
Input,
|
|
29
|
+
Label,
|
|
30
|
+
Static,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
TYPE_SHORT = {
|
|
35
|
+
"ip_address": ("I", "cyan"),
|
|
36
|
+
"domain": ("D", "green"),
|
|
37
|
+
"onion_url": ("O", "magenta"),
|
|
38
|
+
"email": ("E", "yellow"),
|
|
39
|
+
"file_hash_md5": ("H", "blue"),
|
|
40
|
+
"file_hash_sha1": ("H", "blue"),
|
|
41
|
+
"file_hash_sha256": ("H", "blue"),
|
|
42
|
+
"crypto_wallet": ("W", "yellow"),
|
|
43
|
+
"ransomware_group": ("R", "red"),
|
|
44
|
+
"malware": ("M", "red"),
|
|
45
|
+
"cve": ("C", "red"),
|
|
46
|
+
"phone": ("P", "grey50"),
|
|
47
|
+
"handle": ("@", "yellow"),
|
|
48
|
+
"pgp_key": ("K", "grey50"),
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _badges_for_entity(entity: dict) -> list[str]:
|
|
53
|
+
tags = (entity.get("corroborating_sources") or "").lower()
|
|
54
|
+
badges: list[str] = []
|
|
55
|
+
if "c2" in tags:
|
|
56
|
+
badges.append("[C2]")
|
|
57
|
+
if "breached" in tags or "hibp" in tags:
|
|
58
|
+
badges.append("[Breached]")
|
|
59
|
+
if "malicious" in tags or "abuseipdb" in tags:
|
|
60
|
+
badges.append("[Malicious]")
|
|
61
|
+
if "fresh" in tags:
|
|
62
|
+
badges.append("[Fresh]")
|
|
63
|
+
return badges
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class EntityBrowserApp(App):
|
|
67
|
+
"""Textual app over an investigation export dict."""
|
|
68
|
+
|
|
69
|
+
CSS = """
|
|
70
|
+
Screen { layout: horizontal; }
|
|
71
|
+
#left { width: 35%; border-right: solid $accent; }
|
|
72
|
+
#right { width: 65%; padding: 1 2; }
|
|
73
|
+
#detail { height: 100%; }
|
|
74
|
+
DataTable { height: 1fr; }
|
|
75
|
+
"""
|
|
76
|
+
|
|
77
|
+
BINDINGS = [
|
|
78
|
+
Binding("q", "quit", "Quit"),
|
|
79
|
+
Binding("slash", "focus_search", "Search"),
|
|
80
|
+
Binding("f", "cycle_filter", "Filter"),
|
|
81
|
+
Binding("c", "clusters_view", "Clusters"),
|
|
82
|
+
Binding("p", "path_view", "Path"),
|
|
83
|
+
Binding("e", "export_selected", "Export"),
|
|
84
|
+
Binding("r", "refresh_table", "Refresh"),
|
|
85
|
+
]
|
|
86
|
+
|
|
87
|
+
search_query: reactive[str] = reactive("")
|
|
88
|
+
type_filter: reactive[Optional[str]] = reactive(None)
|
|
89
|
+
|
|
90
|
+
def __init__(self, data: dict[str, Any]):
|
|
91
|
+
super().__init__()
|
|
92
|
+
self.data = data
|
|
93
|
+
inv = data.get("investigation") or {}
|
|
94
|
+
self._title_text = inv.get("query") or data.get("query") or "investigation"
|
|
95
|
+
self.entities: list[dict] = list(data.get("entities", []))
|
|
96
|
+
self.relationships: list[dict] = list(data.get("relationships", []))
|
|
97
|
+
# Connection counts
|
|
98
|
+
counts: Counter[str] = Counter()
|
|
99
|
+
for r in self.relationships:
|
|
100
|
+
counts[r["entity_a_id"]] += 1
|
|
101
|
+
counts[r["entity_b_id"]] += 1
|
|
102
|
+
self.connection_count = counts
|
|
103
|
+
self.entities.sort(
|
|
104
|
+
key=lambda e: (-counts.get(e["id"], 0), -(e.get("confidence") or 0))
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
def compose(self) -> ComposeResult:
|
|
108
|
+
yield Header(show_clock=False)
|
|
109
|
+
with Horizontal():
|
|
110
|
+
with Vertical(id="left"):
|
|
111
|
+
yield Input(placeholder="search… (press / to focus)", id="search")
|
|
112
|
+
yield Label(f"[{self._title_text}]", id="title")
|
|
113
|
+
yield DataTable(id="entity_table", zebra_stripes=True, cursor_type="row")
|
|
114
|
+
with Vertical(id="right"):
|
|
115
|
+
yield Static("Select an entity on the left.", id="detail")
|
|
116
|
+
yield Footer()
|
|
117
|
+
|
|
118
|
+
def on_mount(self) -> None:
|
|
119
|
+
self.title = f"voidaccess — {self._title_text}"
|
|
120
|
+
table: DataTable = self.query_one("#entity_table", DataTable)
|
|
121
|
+
table.add_columns("T", "Value", "Conn", "Badges")
|
|
122
|
+
self._populate_table()
|
|
123
|
+
|
|
124
|
+
# -- helpers -----------------------------------------------------------
|
|
125
|
+
|
|
126
|
+
def _filtered(self) -> list[dict]:
|
|
127
|
+
out = self.entities
|
|
128
|
+
if self.type_filter:
|
|
129
|
+
out = [e for e in out if e["entity_type"] == self.type_filter]
|
|
130
|
+
if self.search_query:
|
|
131
|
+
q = self.search_query.lower()
|
|
132
|
+
out = [
|
|
133
|
+
e for e in out
|
|
134
|
+
if q in (e.get("value") or "").lower()
|
|
135
|
+
or q in (e.get("canonical_value") or "").lower()
|
|
136
|
+
or q in (e.get("corroborating_sources") or "").lower()
|
|
137
|
+
]
|
|
138
|
+
return out
|
|
139
|
+
|
|
140
|
+
def _populate_table(self) -> None:
|
|
141
|
+
table: DataTable = self.query_one("#entity_table", DataTable)
|
|
142
|
+
table.clear()
|
|
143
|
+
for e in self._filtered():
|
|
144
|
+
glyph, _colour = TYPE_SHORT.get(e["entity_type"], ("?", "white"))
|
|
145
|
+
val = (e.get("canonical_value") or e.get("value") or "")[:42]
|
|
146
|
+
conn = self.connection_count.get(e["id"], 0)
|
|
147
|
+
badges = " ".join(_badges_for_entity(e))
|
|
148
|
+
table.add_row(glyph, val, str(conn), badges, key=e["id"])
|
|
149
|
+
|
|
150
|
+
# -- input handlers ----------------------------------------------------
|
|
151
|
+
|
|
152
|
+
def on_input_changed(self, event: Input.Changed) -> None:
|
|
153
|
+
if event.input.id == "search":
|
|
154
|
+
self.search_query = event.value
|
|
155
|
+
self._populate_table()
|
|
156
|
+
|
|
157
|
+
def on_data_table_row_highlighted(self, event: DataTable.RowHighlighted) -> None:
|
|
158
|
+
if event.row_key is None:
|
|
159
|
+
return
|
|
160
|
+
eid = str(event.row_key.value) if hasattr(event.row_key, "value") else str(event.row_key)
|
|
161
|
+
entity = next((e for e in self.entities if e["id"] == eid), None)
|
|
162
|
+
if entity:
|
|
163
|
+
self._render_detail(entity)
|
|
164
|
+
|
|
165
|
+
# -- actions -----------------------------------------------------------
|
|
166
|
+
|
|
167
|
+
def action_focus_search(self) -> None:
|
|
168
|
+
self.query_one("#search", Input).focus()
|
|
169
|
+
|
|
170
|
+
def action_cycle_filter(self) -> None:
|
|
171
|
+
types = sorted({e["entity_type"] for e in self.entities})
|
|
172
|
+
if not types:
|
|
173
|
+
return
|
|
174
|
+
if self.type_filter is None:
|
|
175
|
+
self.type_filter = types[0]
|
|
176
|
+
else:
|
|
177
|
+
try:
|
|
178
|
+
idx = types.index(self.type_filter)
|
|
179
|
+
self.type_filter = types[idx + 1] if idx + 1 < len(types) else None
|
|
180
|
+
except ValueError:
|
|
181
|
+
self.type_filter = None
|
|
182
|
+
self._populate_table()
|
|
183
|
+
|
|
184
|
+
def action_refresh_table(self) -> None:
|
|
185
|
+
self._populate_table()
|
|
186
|
+
|
|
187
|
+
def action_clusters_view(self) -> None:
|
|
188
|
+
self.push_screen(ClustersScreen(self))
|
|
189
|
+
|
|
190
|
+
def action_path_view(self) -> None:
|
|
191
|
+
self.push_screen(PathScreen(self))
|
|
192
|
+
|
|
193
|
+
def action_export_selected(self) -> None:
|
|
194
|
+
table: DataTable = self.query_one("#entity_table", DataTable)
|
|
195
|
+
row = table.cursor_row
|
|
196
|
+
rows = list(self._filtered())
|
|
197
|
+
if row < 0 or row >= len(rows):
|
|
198
|
+
return
|
|
199
|
+
entity = rows[row]
|
|
200
|
+
from pathlib import Path
|
|
201
|
+
out = Path.home() / ".voidaccess" / "results" / f"entity-{entity['id']}.json"
|
|
202
|
+
out.parent.mkdir(parents=True, exist_ok=True)
|
|
203
|
+
import json
|
|
204
|
+
out.write_text(json.dumps(entity, indent=2, default=str), encoding="utf-8")
|
|
205
|
+
self.notify(f"Exported to {out}")
|
|
206
|
+
|
|
207
|
+
# -- detail pane -------------------------------------------------------
|
|
208
|
+
|
|
209
|
+
def _render_detail(self, entity: dict) -> None:
|
|
210
|
+
lines: list[str] = []
|
|
211
|
+
val = entity.get("canonical_value") or entity.get("value") or ""
|
|
212
|
+
lines.append(f"[b]Entity:[/b] {val}")
|
|
213
|
+
lines.append(
|
|
214
|
+
f"Type: {entity['entity_type']} | Confidence: "
|
|
215
|
+
f"{(entity.get('confidence') or 0):.2f}"
|
|
216
|
+
)
|
|
217
|
+
tags = entity.get("corroborating_sources") or ""
|
|
218
|
+
if tags:
|
|
219
|
+
lines.append(f"Tags: {tags}")
|
|
220
|
+
lines.append("")
|
|
221
|
+
if entity.get("first_seen") or entity.get("last_seen"):
|
|
222
|
+
lines.append(
|
|
223
|
+
f"First seen: {entity.get('first_seen') or '—'} "
|
|
224
|
+
f"Last seen: {entity.get('last_seen') or '—'}"
|
|
225
|
+
)
|
|
226
|
+
if entity.get("extraction_method"):
|
|
227
|
+
lines.append(f"Extraction: {entity['extraction_method']}")
|
|
228
|
+
lines.append("")
|
|
229
|
+
ctx = (entity.get("context_snippet") or "").strip()
|
|
230
|
+
if ctx:
|
|
231
|
+
lines.append("[b]Context:[/b]")
|
|
232
|
+
lines.append(ctx[:1500])
|
|
233
|
+
lines.append("")
|
|
234
|
+
|
|
235
|
+
neighbours = self._neighbours_of(entity["id"])
|
|
236
|
+
if neighbours:
|
|
237
|
+
lines.append("[b]Connected to (top 10):[/b]")
|
|
238
|
+
for other_id, edge_type, conf in neighbours[:10]:
|
|
239
|
+
other = next((e for e in self.entities if e["id"] == other_id), None)
|
|
240
|
+
if other:
|
|
241
|
+
other_val = (other.get("canonical_value") or other.get("value") or "")[:48]
|
|
242
|
+
lines.append(f" → {other_val:50} {edge_type:18} {conf:.2f}")
|
|
243
|
+
detail: Static = self.query_one("#detail", Static)
|
|
244
|
+
detail.update("\n".join(lines))
|
|
245
|
+
|
|
246
|
+
def _neighbours_of(self, entity_id: str) -> list[tuple[str, str, float]]:
|
|
247
|
+
out: list[tuple[str, str, float]] = []
|
|
248
|
+
for r in self.relationships:
|
|
249
|
+
if r["entity_a_id"] == entity_id:
|
|
250
|
+
out.append((r["entity_b_id"], r["relationship_type"], r.get("confidence") or 0.0))
|
|
251
|
+
elif r["entity_b_id"] == entity_id:
|
|
252
|
+
out.append((r["entity_a_id"], r["relationship_type"], r.get("confidence") or 0.0))
|
|
253
|
+
out.sort(key=lambda t: -t[2])
|
|
254
|
+
return out
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
# ---------------------------------------------------------------------------
|
|
258
|
+
# Cluster overlay
|
|
259
|
+
# ---------------------------------------------------------------------------
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
class ClustersScreen(ModalScreen):
|
|
263
|
+
BINDINGS = [Binding("escape", "dismiss", "Close"), Binding("q", "dismiss", "Close")]
|
|
264
|
+
|
|
265
|
+
def __init__(self, parent_app: EntityBrowserApp):
|
|
266
|
+
super().__init__()
|
|
267
|
+
self._parent_app = parent_app
|
|
268
|
+
|
|
269
|
+
def compose(self) -> ComposeResult:
|
|
270
|
+
yield Vertical(
|
|
271
|
+
Label("[b]Infrastructure clusters[/b] (esc to close)"),
|
|
272
|
+
Static(self._render_clusters(), id="clusters_body"),
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
def _render_clusters(self) -> str:
|
|
276
|
+
# Greedy connected-component clustering via the parent's edges
|
|
277
|
+
adj: dict[str, set[str]] = defaultdict(set)
|
|
278
|
+
for r in self._parent_app.relationships:
|
|
279
|
+
adj[r["entity_a_id"]].add(r["entity_b_id"])
|
|
280
|
+
adj[r["entity_b_id"]].add(r["entity_a_id"])
|
|
281
|
+
|
|
282
|
+
seen: set[str] = set()
|
|
283
|
+
clusters: list[list[str]] = []
|
|
284
|
+
for eid in adj:
|
|
285
|
+
if eid in seen:
|
|
286
|
+
continue
|
|
287
|
+
stack = [eid]
|
|
288
|
+
comp: list[str] = []
|
|
289
|
+
while stack:
|
|
290
|
+
node = stack.pop()
|
|
291
|
+
if node in seen:
|
|
292
|
+
continue
|
|
293
|
+
seen.add(node)
|
|
294
|
+
comp.append(node)
|
|
295
|
+
stack.extend(adj.get(node, ()))
|
|
296
|
+
clusters.append(comp)
|
|
297
|
+
|
|
298
|
+
clusters.sort(key=len, reverse=True)
|
|
299
|
+
entity_by_id = {e["id"]: e for e in self._parent_app.entities}
|
|
300
|
+
|
|
301
|
+
lines = []
|
|
302
|
+
for idx, comp in enumerate(clusters[:10], start=1):
|
|
303
|
+
hub_id = max(comp, key=lambda x: len(adj.get(x, ())))
|
|
304
|
+
hub = entity_by_id.get(hub_id, {})
|
|
305
|
+
hub_val = hub.get("canonical_value") or hub.get("value") or hub_id[:8]
|
|
306
|
+
type_counts: Counter[str] = Counter()
|
|
307
|
+
for nid in comp:
|
|
308
|
+
ent = entity_by_id.get(nid)
|
|
309
|
+
if ent:
|
|
310
|
+
type_counts[ent["entity_type"]] += 1
|
|
311
|
+
lines.append(
|
|
312
|
+
f"Cluster {chr(64 + idx)}: {hub_val} (hub, {len(adj.get(hub_id, ()))} conn)"
|
|
313
|
+
)
|
|
314
|
+
for etype, count in type_counts.most_common():
|
|
315
|
+
lines.append(f" └── {count} {etype}")
|
|
316
|
+
lines.append("")
|
|
317
|
+
return "\n".join(lines) or "No clusters detected."
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
# ---------------------------------------------------------------------------
|
|
321
|
+
# Path finder overlay
|
|
322
|
+
# ---------------------------------------------------------------------------
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
class PathScreen(ModalScreen):
|
|
326
|
+
BINDINGS = [Binding("escape", "dismiss", "Close")]
|
|
327
|
+
|
|
328
|
+
def __init__(self, parent_app: EntityBrowserApp):
|
|
329
|
+
super().__init__()
|
|
330
|
+
self._parent_app = parent_app
|
|
331
|
+
|
|
332
|
+
def compose(self) -> ComposeResult:
|
|
333
|
+
yield Vertical(
|
|
334
|
+
Label("[b]Shortest path between two entities[/b]"),
|
|
335
|
+
Input(placeholder="first entity value", id="path_a"),
|
|
336
|
+
Input(placeholder="second entity value", id="path_b"),
|
|
337
|
+
Static("", id="path_result"),
|
|
338
|
+
)
|
|
339
|
+
|
|
340
|
+
def on_input_submitted(self, event: Input.Submitted) -> None:
|
|
341
|
+
a = self.query_one("#path_a", Input).value.strip().lower()
|
|
342
|
+
b = self.query_one("#path_b", Input).value.strip().lower()
|
|
343
|
+
if not a or not b:
|
|
344
|
+
return
|
|
345
|
+
result = self._find_path(a, b)
|
|
346
|
+
self.query_one("#path_result", Static).update(result)
|
|
347
|
+
|
|
348
|
+
def _find_path(self, a_val: str, b_val: str) -> str:
|
|
349
|
+
ents = self._parent_app.entities
|
|
350
|
+
a_ent = next((e for e in ents if (e.get("canonical_value") or e.get("value") or "").lower() == a_val), None)
|
|
351
|
+
b_ent = next((e for e in ents if (e.get("canonical_value") or e.get("value") or "").lower() == b_val), None)
|
|
352
|
+
if a_ent is None or b_ent is None:
|
|
353
|
+
return "One or both entities not found in this investigation."
|
|
354
|
+
|
|
355
|
+
adj: dict[str, set[str]] = defaultdict(set)
|
|
356
|
+
for r in self._parent_app.relationships:
|
|
357
|
+
adj[r["entity_a_id"]].add(r["entity_b_id"])
|
|
358
|
+
adj[r["entity_b_id"]].add(r["entity_a_id"])
|
|
359
|
+
|
|
360
|
+
# BFS
|
|
361
|
+
queue = [(a_ent["id"], [a_ent["id"]])]
|
|
362
|
+
visited = {a_ent["id"]}
|
|
363
|
+
while queue:
|
|
364
|
+
node, path = queue.pop(0)
|
|
365
|
+
if node == b_ent["id"]:
|
|
366
|
+
ents_by_id = {e["id"]: e for e in ents}
|
|
367
|
+
arrow = " → ".join(
|
|
368
|
+
(ents_by_id[n].get("canonical_value") or ents_by_id[n].get("value") or n)
|
|
369
|
+
for n in path
|
|
370
|
+
)
|
|
371
|
+
return f"{arrow}\n({len(path) - 1} hops)"
|
|
372
|
+
for nxt in adj.get(node, ()):
|
|
373
|
+
if nxt not in visited:
|
|
374
|
+
visited.add(nxt)
|
|
375
|
+
queue.append((nxt, path + [nxt]))
|
|
376
|
+
return "No path between these entities."
|
cli/commands/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""voidaccess CLI commands."""
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
"""
|
|
2
|
+
cli/commands/configure.py — first-run wizard and config sub-commands.
|
|
3
|
+
|
|
4
|
+
voidaccess configure — full wizard
|
|
5
|
+
voidaccess configure llm — just the LLM provider/key
|
|
6
|
+
voidaccess configure keys — just enrichment API keys
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from typing import Optional
|
|
12
|
+
|
|
13
|
+
import typer
|
|
14
|
+
from rich.console import Console
|
|
15
|
+
from rich.prompt import Prompt, Confirm
|
|
16
|
+
from rich.table import Table
|
|
17
|
+
|
|
18
|
+
from cli import config as cli_config
|
|
19
|
+
|
|
20
|
+
app = typer.Typer(help="Configure the voidaccess CLI.", no_args_is_help=False, invoke_without_command=True)
|
|
21
|
+
console = Console()
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
PROVIDERS = [
|
|
25
|
+
("openrouter", "OpenRouter (free models available)"),
|
|
26
|
+
("groq", "Groq (completely free)"),
|
|
27
|
+
("google", "Google Gemini (free tier)"),
|
|
28
|
+
("openai", "OpenAI (paid)"),
|
|
29
|
+
("anthropic", "Anthropic (paid)"),
|
|
30
|
+
("ollama", "Ollama (local, free)"),
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
DEFAULT_MODELS = {
|
|
34
|
+
"openrouter": "openrouter/deepseek/deepseek-chat",
|
|
35
|
+
"groq": "groq/llama-3.3-70b-versatile",
|
|
36
|
+
"google": "gemini-1.5-flash",
|
|
37
|
+
"openai": "gpt-4o-mini",
|
|
38
|
+
"anthropic": "claude-haiku-4-5-20251001",
|
|
39
|
+
"ollama": "ollama/llama3.2",
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _print_provider_table() -> None:
|
|
44
|
+
table = Table(title="LLM provider")
|
|
45
|
+
table.add_column("#", style="cyan", justify="right")
|
|
46
|
+
table.add_column("Provider", style="bold")
|
|
47
|
+
table.add_column("Notes")
|
|
48
|
+
for idx, (key, desc) in enumerate(PROVIDERS, start=1):
|
|
49
|
+
suffix = " ← default" if key == "openrouter" else ""
|
|
50
|
+
table.add_row(str(idx), key, desc + suffix)
|
|
51
|
+
console.print(table)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _test_llm_key(provider: str, api_key: str, model: str) -> bool:
|
|
55
|
+
"""Light credential validation — instantiate the LangChain class only."""
|
|
56
|
+
if provider == "ollama":
|
|
57
|
+
return True
|
|
58
|
+
try:
|
|
59
|
+
from voidaccess.llm import get_llm
|
|
60
|
+
get_llm(model, api_keys={cli_config.PROVIDER_ENV.get(provider, ""): api_key})
|
|
61
|
+
return True
|
|
62
|
+
except Exception as exc:
|
|
63
|
+
console.print(f"[yellow]Could not validate key:[/yellow] {exc}")
|
|
64
|
+
return False
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _prompt_llm(cfg: dict) -> None:
|
|
68
|
+
_print_provider_table()
|
|
69
|
+
while True:
|
|
70
|
+
choice = Prompt.ask(
|
|
71
|
+
"Pick provider [1-6]",
|
|
72
|
+
default="1",
|
|
73
|
+
choices=[str(i) for i in range(1, len(PROVIDERS) + 1)],
|
|
74
|
+
show_choices=False,
|
|
75
|
+
)
|
|
76
|
+
provider, _ = PROVIDERS[int(choice) - 1]
|
|
77
|
+
break
|
|
78
|
+
|
|
79
|
+
model = Prompt.ask(
|
|
80
|
+
"Model identifier",
|
|
81
|
+
default=DEFAULT_MODELS.get(provider, ""),
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
api_key = ""
|
|
85
|
+
if provider != "ollama":
|
|
86
|
+
api_key = Prompt.ask(
|
|
87
|
+
f"API key for {provider}",
|
|
88
|
+
default=cfg["llm"].get("api_key", "") if cfg["llm"].get("provider") == provider else "",
|
|
89
|
+
password=True,
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
cfg["llm"]["provider"] = provider
|
|
93
|
+
cfg["llm"]["model"] = model
|
|
94
|
+
cfg["llm"]["api_key"] = api_key
|
|
95
|
+
|
|
96
|
+
if api_key and provider != "ollama":
|
|
97
|
+
console.print("Testing key…", style="grey50")
|
|
98
|
+
if _test_llm_key(provider, api_key, model):
|
|
99
|
+
console.print("[green]Key looks valid.[/green]")
|
|
100
|
+
else:
|
|
101
|
+
console.print("[yellow]Saved anyway — verify later with `voidaccess status`.[/yellow]")
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _prompt_enrichment(cfg: dict) -> None:
|
|
105
|
+
console.print("\n[bold]Enrichment API keys[/bold] (press Enter to skip any)")
|
|
106
|
+
for key_name in cli_config.ENRICHMENT_KEYS:
|
|
107
|
+
existing = cfg["enrichment_keys"].get(key_name, "")
|
|
108
|
+
display_default = "(saved)" if existing else "(skip)"
|
|
109
|
+
val = Prompt.ask(f" {key_name}", default=existing or "", show_default=False)
|
|
110
|
+
cfg["enrichment_keys"][key_name] = val.strip()
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def _prompt_output_dir(cfg: dict) -> None:
|
|
114
|
+
current = cfg.get("output_dir") or str(cli_config.DEFAULT_OUTPUT_DIR)
|
|
115
|
+
new_dir = Prompt.ask("Output directory", default=current)
|
|
116
|
+
cfg["output_dir"] = new_dir
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def _ensure_spacy_model() -> None:
|
|
120
|
+
console.print("\n → Downloading spaCy NER model...")
|
|
121
|
+
try:
|
|
122
|
+
import subprocess
|
|
123
|
+
import sys
|
|
124
|
+
|
|
125
|
+
result = subprocess.run(
|
|
126
|
+
[sys.executable, "-m", "spacy", "download", "en_core_web_sm"],
|
|
127
|
+
capture_output=True,
|
|
128
|
+
text=True,
|
|
129
|
+
)
|
|
130
|
+
if result.returncode == 0:
|
|
131
|
+
console.print(" ✓ spaCy model ready")
|
|
132
|
+
else:
|
|
133
|
+
console.print(
|
|
134
|
+
" ⚠ spaCy download failed — run manually: "
|
|
135
|
+
"python -m spacy download en_core_web_sm"
|
|
136
|
+
)
|
|
137
|
+
except Exception as e:
|
|
138
|
+
console.print(f" ⚠ spaCy: {e}")
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
@app.callback()
|
|
142
|
+
def configure_default(ctx: typer.Context) -> None:
|
|
143
|
+
"""Run the full wizard when no sub-command is given."""
|
|
144
|
+
if ctx.invoked_subcommand is not None:
|
|
145
|
+
return
|
|
146
|
+
cfg = cli_config.load_config()
|
|
147
|
+
console.print("[bold magenta]voidaccess — initial setup[/bold magenta]\n")
|
|
148
|
+
_prompt_llm(cfg)
|
|
149
|
+
if Confirm.ask("\nAdd enrichment API keys now?", default=False):
|
|
150
|
+
_prompt_enrichment(cfg)
|
|
151
|
+
_prompt_output_dir(cfg)
|
|
152
|
+
cli_config.save_config(cfg)
|
|
153
|
+
console.print(f"\n[green]Saved to[/green] {cli_config.CONFIG_PATH}")
|
|
154
|
+
_ensure_spacy_model()
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
@app.command("llm")
|
|
158
|
+
def configure_llm() -> None:
|
|
159
|
+
"""Configure just the LLM provider, model, and API key."""
|
|
160
|
+
cfg = cli_config.load_config()
|
|
161
|
+
_prompt_llm(cfg)
|
|
162
|
+
cli_config.save_config(cfg)
|
|
163
|
+
console.print(f"[green]Saved to[/green] {cli_config.CONFIG_PATH}")
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
@app.command("keys")
|
|
167
|
+
def configure_keys() -> None:
|
|
168
|
+
"""Configure enrichment API keys."""
|
|
169
|
+
cfg = cli_config.load_config()
|
|
170
|
+
_prompt_enrichment(cfg)
|
|
171
|
+
cli_config.save_config(cfg)
|
|
172
|
+
console.print(f"[green]Saved to[/green] {cli_config.CONFIG_PATH}")
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
@app.command("tor")
|
|
176
|
+
def configure_tor(
|
|
177
|
+
host: str = typer.Option("127.0.0.1", help="Tor SOCKS5 host"),
|
|
178
|
+
port: int = typer.Option(9050, help="Tor SOCKS5 port"),
|
|
179
|
+
) -> None:
|
|
180
|
+
"""Override Tor proxy host/port."""
|
|
181
|
+
cfg = cli_config.load_config()
|
|
182
|
+
cfg["tor"]["host"] = host
|
|
183
|
+
cfg["tor"]["port"] = port
|
|
184
|
+
cli_config.save_config(cfg)
|
|
185
|
+
console.print(f"Tor set to {host}:{port}")
|