voidaccess 1.4.2__tar.gz → 1.4.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {voidaccess-1.4.2/voidaccess.egg-info → voidaccess-1.4.4}/PKG-INFO +56 -3
- {voidaccess-1.4.2 → voidaccess-1.4.4}/README.md +54 -2
- {voidaccess-1.4.2 → voidaccess-1.4.4}/auth/token_blacklist.py +10 -3
- {voidaccess-1.4.2 → voidaccess-1.4.4}/pyproject.toml +2 -1
- {voidaccess-1.4.2 → voidaccess-1.4.4}/search/circuit_breaker.py +10 -3
- {voidaccess-1.4.2 → voidaccess-1.4.4}/sources/enrichment.py +25 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/sources/hash_reputation.py +18 -4
- {voidaccess-1.4.2 → voidaccess-1.4.4/voidaccess.egg-info}/PKG-INFO +56 -3
- {voidaccess-1.4.2 → voidaccess-1.4.4}/voidaccess.egg-info/requires.txt +1 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/voidaccess_cli/adapters/sqlite.py +12 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/voidaccess_cli/commands/configure.py +16 -19
- {voidaccess-1.4.2 → voidaccess-1.4.4}/voidaccess_cli/commands/investigate.py +71 -6
- {voidaccess-1.4.2 → voidaccess-1.4.4}/voidaccess_cli/config.py +66 -6
- {voidaccess-1.4.2 → voidaccess-1.4.4}/LICENSE +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/analysis/__init__.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/analysis/opsec.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/analysis/patterns.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/analysis/temporal.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/api/__init__.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/api/auth.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/api/main.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/api/routes/__init__.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/api/routes/admin.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/api/routes/auth.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/api/routes/entities.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/api/routes/export.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/api/routes/investigations.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/api/routes/monitors.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/api/routes/search.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/api/routes/settings.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/auth/__init__.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/config.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/crawler/__init__.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/crawler/dedup.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/crawler/frontier.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/crawler/spider.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/crawler/utils.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/db/__init__.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/db/migrations/__init__.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/db/migrations/env.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/db/migrations/versions/0001_initial_schema.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/db/migrations/versions/0002_add_investigation_status_column.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/db/migrations/versions/0002_add_missing_tables.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/db/migrations/versions/0003_add_canonical_value_and_entity_links.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/db/migrations/versions/0004_add_page_posted_at.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/db/migrations/versions/0005_add_extraction_method.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/db/migrations/versions/0006_add_monitor_alerts.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/db/migrations/versions/0007_add_actor_style_profiles.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/db/migrations/versions/0008_add_users_table.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/db/migrations/versions/0009_add_investigation_id_to_relationships.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/db/migrations/versions/0010_add_composite_index_entity_relationships.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/db/migrations/versions/0011_add_page_extraction_cache.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/db/migrations/versions/0013_add_graph_status.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/db/migrations/versions/0015_add_progress_fields.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/db/migrations/versions/0016_backfill_graph_status.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/db/migrations/versions/0017_add_user_api_keys.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/db/migrations/versions/0018_add_user_id_to_investigations.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/db/migrations/versions/0019_add_content_safety_log.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/db/migrations/versions/0020_add_entity_source_tracking.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/db/models.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/db/queries.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/db/session.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/export/__init__.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/export/misp.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/export/sigma.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/export/stix.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/extractor/__init__.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/extractor/llm_extract.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/extractor/ner.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/extractor/normalizer.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/extractor/pipeline.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/extractor/regex_patterns.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/fingerprint/__init__.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/fingerprint/profiler.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/fingerprint/stylometry.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/graph/__init__.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/graph/builder.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/graph/export.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/graph/model.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/graph/queries.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/graph/visualize.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/i18n/__init__.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/i18n/detect.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/i18n/query_expand.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/i18n/translate.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/monitor/__init__.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/monitor/_db.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/monitor/alerts.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/monitor/config.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/monitor/diff.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/monitor/jobs.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/monitor/scheduler.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/scraper/__init__.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/scraper/scrape.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/scraper/scrape_js.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/search/__init__.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/search/search.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/setup.cfg +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/sources/__init__.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/sources/blockchain.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/sources/cache.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/sources/cisa.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/sources/dns_enrichment.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/sources/domain_reputation.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/sources/email_reputation.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/sources/engines.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/sources/github_scraper.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/sources/gitlab_scraper.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/sources/historical_intel.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/sources/ip_reputation.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/sources/paste_scraper.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/sources/pastes.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/sources/rss_scraper.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/sources/seed_manager.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/sources/seeds.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/sources/shodan.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/sources/telegram.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/sources/virustotal.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/tests/test_analysis_opsec.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/tests/test_analysis_stylometry.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/tests/test_analysis_temporal.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/tests/test_api.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/tests/test_api_monitors.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/tests/test_blockchain.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/tests/test_config.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/tests/test_crawler.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/tests/test_db.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/tests/test_dns_enrichment.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/tests/test_domain_reputation.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/tests/test_email_reputation.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/tests/test_fingerprint.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/tests/test_github_scraper.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/tests/test_gitlab_scraper.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/tests/test_graph.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/tests/test_hash_reputation.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/tests/test_i18n.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/tests/test_ip_reputation.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/tests/test_llm.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/tests/test_llm_utils.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/tests/test_model_singleton.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/tests/test_monitor.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/tests/test_pagination.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/tests/test_paste_scraper.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/tests/test_rss_scraper.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/tests/test_scrape_js.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/tests/test_settings.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/tests/test_sources.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/tests/test_sources_enrichment_new.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/tests/test_vector.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/utils/__init__.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/utils/async_utils.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/utils/content_safety.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/utils/defang.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/utils/encryption.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/utils/ioc_freshness.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/utils/user_keys.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/vector/__init__.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/vector/embedder.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/vector/model_singleton.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/vector/search.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/vector/store.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/voidaccess/__init__.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/voidaccess/llm.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/voidaccess/llm_utils.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/voidaccess.egg-info/SOURCES.txt +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/voidaccess.egg-info/dependency_links.txt +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/voidaccess.egg-info/entry_points.txt +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/voidaccess.egg-info/top_level.txt +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/voidaccess_cli/__init__.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/voidaccess_cli/adapters/__init__.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/voidaccess_cli/browser.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/voidaccess_cli/commands/__init__.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/voidaccess_cli/commands/enrich.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/voidaccess_cli/commands/export.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/voidaccess_cli/commands/show.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/voidaccess_cli/display.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/voidaccess_cli/main.py +0 -0
- {voidaccess-1.4.2 → voidaccess-1.4.4}/voidaccess_cli/tor_detect.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: voidaccess
|
|
3
|
-
Version: 1.4.
|
|
3
|
+
Version: 1.4.4
|
|
4
4
|
Summary: Dark web OSINT CLI — automated threat intelligence from query to report
|
|
5
5
|
Author: VoidAccess
|
|
6
6
|
License-Expression: MIT
|
|
@@ -28,6 +28,7 @@ Requires-Dist: langchain-openai>=0.1
|
|
|
28
28
|
Requires-Dist: langchain-anthropic>=0.1
|
|
29
29
|
Requires-Dist: langchain-google-genai>=1.0
|
|
30
30
|
Requires-Dist: langchain-groq>=0.1
|
|
31
|
+
Requires-Dist: langchain-ollama>=0.1
|
|
31
32
|
Requires-Dist: python-dotenv>=1.0
|
|
32
33
|
Requires-Dist: httpx>=0.27
|
|
33
34
|
Requires-Dist: spacy>=3.7
|
|
@@ -64,6 +65,58 @@ Commercial threat intelligence platforms often charge prohibitive annual fees fo
|
|
|
64
65
|
|
|
65
66
|
---
|
|
66
67
|
|
|
68
|
+
## Quick Start
|
|
69
|
+
|
|
70
|
+
### Option A - CLI (no Docker, 30 seconds)
|
|
71
|
+
|
|
72
|
+
```bash
|
|
73
|
+
pip install voidaccess
|
|
74
|
+
voidaccess configure
|
|
75
|
+
voidaccess investigate "LockBit ransomware"
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
Requires local Tor for dark web sources:
|
|
79
|
+
|
|
80
|
+
- https://torproject.org
|
|
81
|
+
- Use `--no-tor` for clearnet-only investigations
|
|
82
|
+
|
|
83
|
+
The CLI stores config in `~/.voidaccess/config.json` and writes results to `~/.voidaccess/results/`.
|
|
84
|
+
|
|
85
|
+
### Option B - Docker (full stack, 5 minutes)
|
|
86
|
+
|
|
87
|
+
```bash
|
|
88
|
+
git clone https://github.com/KatrielMoses/voidaccess
|
|
89
|
+
cd voidaccess
|
|
90
|
+
bash setup.sh
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
The Docker stack includes PostgreSQL, Tor, FastAPI, and Next.js.
|
|
94
|
+
|
|
95
|
+
### CLI Commands
|
|
96
|
+
|
|
97
|
+
| Command | Description |
|
|
98
|
+
|---|---|
|
|
99
|
+
| `voidaccess investigate` | Run an investigation |
|
|
100
|
+
| `voidaccess show` | Interactive entity browser |
|
|
101
|
+
| `voidaccess export` | Export STIX/MISP/Sigma/CSV/MD |
|
|
102
|
+
| `voidaccess enrich` | Re-enrich saved results |
|
|
103
|
+
| `voidaccess list` | List saved investigations |
|
|
104
|
+
| `voidaccess status` | Config and API key status |
|
|
105
|
+
| `voidaccess configure` | Setup wizard |
|
|
106
|
+
|
|
107
|
+
### CLI vs Docker
|
|
108
|
+
|
|
109
|
+
| Feature | CLI | Docker |
|
|
110
|
+
|---|---|---|
|
|
111
|
+
| Install time | 30 seconds | 5 minutes |
|
|
112
|
+
| Dark web scraping | Requires local Tor | Built-in |
|
|
113
|
+
| Graph visualization | Terminal TUI | sigma.js |
|
|
114
|
+
| Monitoring/alerts | No | Yes |
|
|
115
|
+
| Multi-user | No | Yes |
|
|
116
|
+
| Persistence | SQLite (`~/.voidaccess`) | PostgreSQL |
|
|
117
|
+
|
|
118
|
+
---
|
|
119
|
+
|
|
67
120
|
## Visual Walkthrough
|
|
68
121
|
|
|
69
122
|
### 1. Intuitive Dashboard
|
|
@@ -212,7 +265,7 @@ Free with Groq, OpenRouter free models, or Ollama. Under $0.50 per investigation
|
|
|
212
265
|
|
|
213
266
|
---
|
|
214
267
|
|
|
215
|
-
##
|
|
268
|
+
## Recent Updates
|
|
216
269
|
|
|
217
270
|
- **10 new enrichment sources**: GreyNoise (scanner suppression), AbuseIPDB, Feodo Tracker, C2IntelFeeds, crt.sh, URLScan.io, Wayback Machine, Hybrid Analysis, HaveIBeenPwned, EmailRep
|
|
218
271
|
- **4 new clearnet collection sources**: paste sites, GitHub code search, GitLab code search, and 20 curated RSS security feeds
|
|
@@ -227,7 +280,7 @@ Free with Groq, OpenRouter free models, or Ollama. Under $0.50 per investigation
|
|
|
227
280
|
|
|
228
281
|
---
|
|
229
282
|
|
|
230
|
-
##
|
|
283
|
+
## Docker Setup
|
|
231
284
|
|
|
232
285
|
### Prerequisites
|
|
233
286
|
- Docker and Docker Compose
|
|
@@ -19,6 +19,58 @@ Commercial threat intelligence platforms often charge prohibitive annual fees fo
|
|
|
19
19
|
|
|
20
20
|
---
|
|
21
21
|
|
|
22
|
+
## Quick Start
|
|
23
|
+
|
|
24
|
+
### Option A - CLI (no Docker, 30 seconds)
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
pip install voidaccess
|
|
28
|
+
voidaccess configure
|
|
29
|
+
voidaccess investigate "LockBit ransomware"
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
Requires local Tor for dark web sources:
|
|
33
|
+
|
|
34
|
+
- https://torproject.org
|
|
35
|
+
- Use `--no-tor` for clearnet-only investigations
|
|
36
|
+
|
|
37
|
+
The CLI stores config in `~/.voidaccess/config.json` and writes results to `~/.voidaccess/results/`.
|
|
38
|
+
|
|
39
|
+
### Option B - Docker (full stack, 5 minutes)
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
git clone https://github.com/KatrielMoses/voidaccess
|
|
43
|
+
cd voidaccess
|
|
44
|
+
bash setup.sh
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
The Docker stack includes PostgreSQL, Tor, FastAPI, and Next.js.
|
|
48
|
+
|
|
49
|
+
### CLI Commands
|
|
50
|
+
|
|
51
|
+
| Command | Description |
|
|
52
|
+
|---|---|
|
|
53
|
+
| `voidaccess investigate` | Run an investigation |
|
|
54
|
+
| `voidaccess show` | Interactive entity browser |
|
|
55
|
+
| `voidaccess export` | Export STIX/MISP/Sigma/CSV/MD |
|
|
56
|
+
| `voidaccess enrich` | Re-enrich saved results |
|
|
57
|
+
| `voidaccess list` | List saved investigations |
|
|
58
|
+
| `voidaccess status` | Config and API key status |
|
|
59
|
+
| `voidaccess configure` | Setup wizard |
|
|
60
|
+
|
|
61
|
+
### CLI vs Docker
|
|
62
|
+
|
|
63
|
+
| Feature | CLI | Docker |
|
|
64
|
+
|---|---|---|
|
|
65
|
+
| Install time | 30 seconds | 5 minutes |
|
|
66
|
+
| Dark web scraping | Requires local Tor | Built-in |
|
|
67
|
+
| Graph visualization | Terminal TUI | sigma.js |
|
|
68
|
+
| Monitoring/alerts | No | Yes |
|
|
69
|
+
| Multi-user | No | Yes |
|
|
70
|
+
| Persistence | SQLite (`~/.voidaccess`) | PostgreSQL |
|
|
71
|
+
|
|
72
|
+
---
|
|
73
|
+
|
|
22
74
|
## Visual Walkthrough
|
|
23
75
|
|
|
24
76
|
### 1. Intuitive Dashboard
|
|
@@ -167,7 +219,7 @@ Free with Groq, OpenRouter free models, or Ollama. Under $0.50 per investigation
|
|
|
167
219
|
|
|
168
220
|
---
|
|
169
221
|
|
|
170
|
-
##
|
|
222
|
+
## Recent Updates
|
|
171
223
|
|
|
172
224
|
- **10 new enrichment sources**: GreyNoise (scanner suppression), AbuseIPDB, Feodo Tracker, C2IntelFeeds, crt.sh, URLScan.io, Wayback Machine, Hybrid Analysis, HaveIBeenPwned, EmailRep
|
|
173
225
|
- **4 new clearnet collection sources**: paste sites, GitHub code search, GitLab code search, and 20 curated RSS security feeds
|
|
@@ -182,7 +234,7 @@ Free with Groq, OpenRouter free models, or Ollama. Under $0.50 per investigation
|
|
|
182
234
|
|
|
183
235
|
---
|
|
184
236
|
|
|
185
|
-
##
|
|
237
|
+
## Docker Setup
|
|
186
238
|
|
|
187
239
|
### Prerequisites
|
|
188
240
|
- Docker and Docker Compose
|
|
@@ -19,16 +19,22 @@ logger = logging.getLogger(__name__)
|
|
|
19
19
|
_pool: Optional[redis.ConnectionPool] = None
|
|
20
20
|
_redis_client: Optional[redis.Redis] = None
|
|
21
21
|
_blacklist_enabled = False
|
|
22
|
+
_redis_unavailable = False
|
|
22
23
|
|
|
23
24
|
BLACKLIST_PREFIX = "blacklist:"
|
|
24
25
|
|
|
25
26
|
|
|
26
27
|
async def _get_redis() -> Optional[redis.Redis]:
|
|
27
|
-
global _pool, _redis_client, _blacklist_enabled
|
|
28
|
+
global _pool, _redis_client, _blacklist_enabled, _redis_unavailable
|
|
29
|
+
|
|
30
|
+
if _redis_unavailable:
|
|
31
|
+
return None
|
|
28
32
|
|
|
29
33
|
if REDIS_URL is None:
|
|
30
34
|
_blacklist_enabled = False
|
|
31
|
-
|
|
35
|
+
if not _redis_unavailable:
|
|
36
|
+
logger.info("REDIS_URL not configured — token blacklist disabled")
|
|
37
|
+
_redis_unavailable = True
|
|
32
38
|
return None
|
|
33
39
|
|
|
34
40
|
if _redis_client is None:
|
|
@@ -42,9 +48,10 @@ async def _get_redis() -> Optional[redis.Redis]:
|
|
|
42
48
|
_blacklist_enabled = True
|
|
43
49
|
logger.info("Token blacklist enabled via Redis")
|
|
44
50
|
except Exception as e:
|
|
45
|
-
logger.warning(
|
|
51
|
+
logger.warning("Failed to connect to Redis: %s — token blacklist disabled", e)
|
|
46
52
|
_redis_client = None
|
|
47
53
|
_blacklist_enabled = False
|
|
54
|
+
_redis_unavailable = True
|
|
48
55
|
|
|
49
56
|
return _redis_client
|
|
50
57
|
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "voidaccess"
|
|
7
|
-
version = "1.4.
|
|
7
|
+
version = "1.4.4"
|
|
8
8
|
description = "Dark web OSINT CLI — automated threat intelligence from query to report"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = "MIT"
|
|
@@ -34,6 +34,7 @@ dependencies = [
|
|
|
34
34
|
"langchain-anthropic>=0.1",
|
|
35
35
|
"langchain-google-genai>=1.0",
|
|
36
36
|
"langchain-groq>=0.1",
|
|
37
|
+
"langchain-ollama>=0.1",
|
|
37
38
|
"python-dotenv>=1.0",
|
|
38
39
|
"httpx>=0.27",
|
|
39
40
|
"spacy>=3.7",
|
|
@@ -29,6 +29,7 @@ CIRCUIT_PREFIX = "circuit:"
|
|
|
29
29
|
_pool: Optional[redis.ConnectionPool] = None
|
|
30
30
|
_redis_client: Optional[redis.Redis] = None
|
|
31
31
|
_circuit_breaker_enabled = False
|
|
32
|
+
_redis_unavailable = False # latched True once we decide Redis isn't reachable
|
|
32
33
|
|
|
33
34
|
_engine_failures: dict[str, int] = {}
|
|
34
35
|
_engine_last_success: dict[str, float] = {}
|
|
@@ -37,11 +38,16 @@ _engine_open_time: dict[str, float] = {}
|
|
|
37
38
|
|
|
38
39
|
|
|
39
40
|
async def _get_redis() -> Optional[redis.Redis]:
|
|
40
|
-
global _pool, _redis_client, _circuit_breaker_enabled
|
|
41
|
+
global _pool, _redis_client, _circuit_breaker_enabled, _redis_unavailable
|
|
42
|
+
|
|
43
|
+
if _redis_unavailable:
|
|
44
|
+
return None
|
|
41
45
|
|
|
42
46
|
if REDIS_URL is None:
|
|
43
47
|
_circuit_breaker_enabled = False
|
|
44
|
-
|
|
48
|
+
if not _redis_unavailable:
|
|
49
|
+
logger.info("REDIS_URL not configured — circuit breaker using in-memory fallback")
|
|
50
|
+
_redis_unavailable = True
|
|
45
51
|
return None
|
|
46
52
|
|
|
47
53
|
if _redis_client is None:
|
|
@@ -55,9 +61,10 @@ async def _get_redis() -> Optional[redis.Redis]:
|
|
|
55
61
|
_circuit_breaker_enabled = True
|
|
56
62
|
logger.info("Circuit breaker enabled via Redis")
|
|
57
63
|
except Exception as e:
|
|
58
|
-
logger.warning(
|
|
64
|
+
logger.warning("Failed to connect to Redis: %s — circuit breaker using in-memory fallback", e)
|
|
59
65
|
_redis_client = None
|
|
60
66
|
_circuit_breaker_enabled = False
|
|
67
|
+
_redis_unavailable = True
|
|
61
68
|
|
|
62
69
|
return _redis_client
|
|
63
70
|
|
|
@@ -40,11 +40,30 @@ THREATFOX_URL = "https://threatfox-api.abuse.ch/api/v1/"
|
|
|
40
40
|
# All HTTP calls use at most 30s client timeout (enforced per request).
|
|
41
41
|
|
|
42
42
|
|
|
43
|
+
_ABUSECH_WARNED = False
|
|
44
|
+
|
|
45
|
+
|
|
43
46
|
def _abusech_headers() -> dict[str, str]:
|
|
44
47
|
key = (os.environ.get("ABUSECH_API_KEY") or "").strip()
|
|
45
48
|
return {"Auth-Key": key} if key else {}
|
|
46
49
|
|
|
47
50
|
|
|
51
|
+
def _abusech_enabled() -> bool:
|
|
52
|
+
"""abuse.ch APIs (MalwareBazaar/ThreatFox/URLhaus) require an Auth-Key
|
|
53
|
+
since 2024. Return False (and log once) when the key is missing so we
|
|
54
|
+
skip the request entirely instead of spamming HTTP 401."""
|
|
55
|
+
global _ABUSECH_WARNED
|
|
56
|
+
if (os.environ.get("ABUSECH_API_KEY") or "").strip():
|
|
57
|
+
return True
|
|
58
|
+
if not _ABUSECH_WARNED:
|
|
59
|
+
logger.info(
|
|
60
|
+
"abuse.ch enrichment skipped — set ABUSECH_API_KEY "
|
|
61
|
+
"(free at https://auth.abuse.ch) to enable MalwareBazaar/ThreatFox/URLhaus."
|
|
62
|
+
)
|
|
63
|
+
_ABUSECH_WARNED = True
|
|
64
|
+
return False
|
|
65
|
+
|
|
66
|
+
|
|
48
67
|
def is_onion_url(url: str) -> bool:
|
|
49
68
|
"""
|
|
50
69
|
Return True if *url* looks like a Tor hidden service URL (.onion).
|
|
@@ -218,6 +237,8 @@ def otx_pulse_to_page(pulse: dict) -> dict:
|
|
|
218
237
|
|
|
219
238
|
async def fetch_malwarebazaar(query: str, limit: int = 20) -> list[dict]:
|
|
220
239
|
"""Query MalwareBazaar by tag then by signature."""
|
|
240
|
+
if not _abusech_enabled():
|
|
241
|
+
return []
|
|
221
242
|
results: list[dict] = []
|
|
222
243
|
q = (query or "").strip()
|
|
223
244
|
if not q:
|
|
@@ -312,6 +333,8 @@ async def fetch_malwarebazaar(query: str, limit: int = 20) -> list[dict]:
|
|
|
312
333
|
|
|
313
334
|
async def fetch_threatfox(query: str, limit: int = 50) -> list[dict]:
|
|
314
335
|
"""Search ThreatFox IOCs by search term."""
|
|
336
|
+
if not _abusech_enabled():
|
|
337
|
+
return []
|
|
315
338
|
results: list[dict] = []
|
|
316
339
|
q = (query or "").strip()
|
|
317
340
|
if not q:
|
|
@@ -397,6 +420,8 @@ async def fetch_threatfox(query: str, limit: int = 50) -> list[dict]:
|
|
|
397
420
|
|
|
398
421
|
async def fetch_urlhaus(query: str, limit: int = 20) -> list[dict]:
|
|
399
422
|
"""Search URLhaus by tag."""
|
|
423
|
+
if not _abusech_enabled():
|
|
424
|
+
return []
|
|
400
425
|
results: list[dict] = []
|
|
401
426
|
q = (query or "").strip()
|
|
402
427
|
if not q:
|
|
@@ -164,11 +164,18 @@ async def query_malwarebazaar(hash_value: str) -> dict[str, Any]:
|
|
|
164
164
|
"""
|
|
165
165
|
POST get_info to MalwareBazaar for a file hash.
|
|
166
166
|
|
|
167
|
-
|
|
167
|
+
Requires ABUSECH_API_KEY (abuse.ch made auth-key mandatory in 2024).
|
|
168
|
+
Returns malware family, file type, first seen date.
|
|
168
169
|
"""
|
|
170
|
+
api_key = (os.environ.get("ABUSECH_API_KEY") or "").strip()
|
|
171
|
+
if not api_key:
|
|
172
|
+
return {"found": False, "source": "malwarebazaar_no_key"}
|
|
169
173
|
try:
|
|
170
174
|
timeout = aiohttp.ClientTimeout(total=15)
|
|
171
|
-
headers = {
|
|
175
|
+
headers = {
|
|
176
|
+
"User-Agent": "VoidAccess-OSINT/1.1 (security research)",
|
|
177
|
+
"Auth-Key": api_key,
|
|
178
|
+
}
|
|
172
179
|
async with aiohttp.ClientSession(timeout=timeout, headers=headers) as session:
|
|
173
180
|
async with session.post(
|
|
174
181
|
MALWAREBAZAAR_URL,
|
|
@@ -212,11 +219,18 @@ async def query_threatfox(hash_value: str) -> dict[str, Any]:
|
|
|
212
219
|
"""
|
|
213
220
|
POST search_ioc to ThreatFox for a file hash.
|
|
214
221
|
|
|
215
|
-
|
|
222
|
+
Requires ABUSECH_API_KEY (abuse.ch made auth-key mandatory in 2024).
|
|
223
|
+
Returns malware family and associated IOCs.
|
|
216
224
|
"""
|
|
225
|
+
api_key = (os.environ.get("ABUSECH_API_KEY") or "").strip()
|
|
226
|
+
if not api_key:
|
|
227
|
+
return {"found": False, "source": "threatfox_no_key"}
|
|
217
228
|
try:
|
|
218
229
|
timeout = aiohttp.ClientTimeout(total=15)
|
|
219
|
-
headers = {
|
|
230
|
+
headers = {
|
|
231
|
+
"User-Agent": "VoidAccess-OSINT/1.1 (security research)",
|
|
232
|
+
"Auth-Key": api_key,
|
|
233
|
+
}
|
|
220
234
|
async with aiohttp.ClientSession(timeout=timeout, headers=headers) as session:
|
|
221
235
|
async with session.post(
|
|
222
236
|
THREATFOX_URL,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: voidaccess
|
|
3
|
-
Version: 1.4.
|
|
3
|
+
Version: 1.4.4
|
|
4
4
|
Summary: Dark web OSINT CLI — automated threat intelligence from query to report
|
|
5
5
|
Author: VoidAccess
|
|
6
6
|
License-Expression: MIT
|
|
@@ -28,6 +28,7 @@ Requires-Dist: langchain-openai>=0.1
|
|
|
28
28
|
Requires-Dist: langchain-anthropic>=0.1
|
|
29
29
|
Requires-Dist: langchain-google-genai>=1.0
|
|
30
30
|
Requires-Dist: langchain-groq>=0.1
|
|
31
|
+
Requires-Dist: langchain-ollama>=0.1
|
|
31
32
|
Requires-Dist: python-dotenv>=1.0
|
|
32
33
|
Requires-Dist: httpx>=0.27
|
|
33
34
|
Requires-Dist: spacy>=3.7
|
|
@@ -64,6 +65,58 @@ Commercial threat intelligence platforms often charge prohibitive annual fees fo
|
|
|
64
65
|
|
|
65
66
|
---
|
|
66
67
|
|
|
68
|
+
## Quick Start
|
|
69
|
+
|
|
70
|
+
### Option A - CLI (no Docker, 30 seconds)
|
|
71
|
+
|
|
72
|
+
```bash
|
|
73
|
+
pip install voidaccess
|
|
74
|
+
voidaccess configure
|
|
75
|
+
voidaccess investigate "LockBit ransomware"
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
Requires local Tor for dark web sources:
|
|
79
|
+
|
|
80
|
+
- https://torproject.org
|
|
81
|
+
- Use `--no-tor` for clearnet-only investigations
|
|
82
|
+
|
|
83
|
+
The CLI stores config in `~/.voidaccess/config.json` and writes results to `~/.voidaccess/results/`.
|
|
84
|
+
|
|
85
|
+
### Option B - Docker (full stack, 5 minutes)
|
|
86
|
+
|
|
87
|
+
```bash
|
|
88
|
+
git clone https://github.com/KatrielMoses/voidaccess
|
|
89
|
+
cd voidaccess
|
|
90
|
+
bash setup.sh
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
The Docker stack includes PostgreSQL, Tor, FastAPI, and Next.js.
|
|
94
|
+
|
|
95
|
+
### CLI Commands
|
|
96
|
+
|
|
97
|
+
| Command | Description |
|
|
98
|
+
|---|---|
|
|
99
|
+
| `voidaccess investigate` | Run an investigation |
|
|
100
|
+
| `voidaccess show` | Interactive entity browser |
|
|
101
|
+
| `voidaccess export` | Export STIX/MISP/Sigma/CSV/MD |
|
|
102
|
+
| `voidaccess enrich` | Re-enrich saved results |
|
|
103
|
+
| `voidaccess list` | List saved investigations |
|
|
104
|
+
| `voidaccess status` | Config and API key status |
|
|
105
|
+
| `voidaccess configure` | Setup wizard |
|
|
106
|
+
|
|
107
|
+
### CLI vs Docker
|
|
108
|
+
|
|
109
|
+
| Feature | CLI | Docker |
|
|
110
|
+
|---|---|---|
|
|
111
|
+
| Install time | 30 seconds | 5 minutes |
|
|
112
|
+
| Dark web scraping | Requires local Tor | Built-in |
|
|
113
|
+
| Graph visualization | Terminal TUI | sigma.js |
|
|
114
|
+
| Monitoring/alerts | No | Yes |
|
|
115
|
+
| Multi-user | No | Yes |
|
|
116
|
+
| Persistence | SQLite (`~/.voidaccess`) | PostgreSQL |
|
|
117
|
+
|
|
118
|
+
---
|
|
119
|
+
|
|
67
120
|
## Visual Walkthrough
|
|
68
121
|
|
|
69
122
|
### 1. Intuitive Dashboard
|
|
@@ -212,7 +265,7 @@ Free with Groq, OpenRouter free models, or Ollama. Under $0.50 per investigation
|
|
|
212
265
|
|
|
213
266
|
---
|
|
214
267
|
|
|
215
|
-
##
|
|
268
|
+
## Recent Updates
|
|
216
269
|
|
|
217
270
|
- **10 new enrichment sources**: GreyNoise (scanner suppression), AbuseIPDB, Feodo Tracker, C2IntelFeeds, crt.sh, URLScan.io, Wayback Machine, Hybrid Analysis, HaveIBeenPwned, EmailRep
|
|
218
271
|
- **4 new clearnet collection sources**: paste sites, GitHub code search, GitLab code search, and 20 curated RSS security feeds
|
|
@@ -227,7 +280,7 @@ Free with Groq, OpenRouter free models, or Ollama. Under $0.50 per investigation
|
|
|
227
280
|
|
|
228
281
|
---
|
|
229
282
|
|
|
230
|
-
##
|
|
283
|
+
## Docker Setup
|
|
231
284
|
|
|
232
285
|
### Prerequisites
|
|
233
286
|
- Docker and Docker Compose
|
|
@@ -30,6 +30,18 @@ def init_db() -> None:
|
|
|
30
30
|
engine = get_engine()
|
|
31
31
|
Base.metadata.create_all(engine)
|
|
32
32
|
|
|
33
|
+
# Create page_extraction_cache table if missing
|
|
34
|
+
with engine.connect() as conn:
|
|
35
|
+
conn.execute("""
|
|
36
|
+
CREATE TABLE IF NOT EXISTS page_extraction_cache (
|
|
37
|
+
page_hash TEXT PRIMARY KEY,
|
|
38
|
+
entities_json TEXT NOT NULL,
|
|
39
|
+
extracted_at TIMESTAMP NOT NULL,
|
|
40
|
+
expires_at TIMESTAMP NOT NULL
|
|
41
|
+
)
|
|
42
|
+
""")
|
|
43
|
+
conn.commit()
|
|
44
|
+
|
|
33
45
|
|
|
34
46
|
def _serialize_dt(dt: Optional[datetime]) -> Optional[str]:
|
|
35
47
|
if dt is None:
|
|
@@ -57,8 +57,23 @@ def _test_llm_key(provider: str, api_key: str, model: str) -> bool:
|
|
|
57
57
|
return True
|
|
58
58
|
try:
|
|
59
59
|
from voidaccess.llm import get_llm
|
|
60
|
+
except ImportError as exc:
|
|
61
|
+
missing = str(exc).split("'")[-2] if "'" in str(exc) else str(exc)
|
|
62
|
+
console.print(
|
|
63
|
+
f"[yellow]Skipped validation:[/yellow] missing dependency [bold]{missing}[/bold]. "
|
|
64
|
+
f"Install with: [bold]pip install {missing.replace('_', '-')}[/bold]"
|
|
65
|
+
)
|
|
66
|
+
return False
|
|
67
|
+
try:
|
|
60
68
|
get_llm(model, api_keys={cli_config.PROVIDER_ENV.get(provider, ""): api_key})
|
|
61
69
|
return True
|
|
70
|
+
except ImportError as exc:
|
|
71
|
+
missing = str(exc).split("'")[-2] if "'" in str(exc) else str(exc)
|
|
72
|
+
console.print(
|
|
73
|
+
f"[yellow]Skipped validation:[/yellow] missing dependency [bold]{missing}[/bold]. "
|
|
74
|
+
f"Install with: [bold]pip install {missing.replace('_', '-')}[/bold]"
|
|
75
|
+
)
|
|
76
|
+
return False
|
|
62
77
|
except Exception as exc:
|
|
63
78
|
console.print(f"[yellow]Could not validate key:[/yellow] {exc}")
|
|
64
79
|
return False
|
|
@@ -117,25 +132,7 @@ def _prompt_output_dir(cfg: dict) -> None:
|
|
|
117
132
|
|
|
118
133
|
|
|
119
134
|
def _ensure_spacy_model() -> None:
|
|
120
|
-
|
|
121
|
-
try:
|
|
122
|
-
import subprocess
|
|
123
|
-
import sys
|
|
124
|
-
|
|
125
|
-
result = subprocess.run(
|
|
126
|
-
[sys.executable, "-m", "spacy", "download", "en_core_web_sm"],
|
|
127
|
-
capture_output=True,
|
|
128
|
-
text=True,
|
|
129
|
-
)
|
|
130
|
-
if result.returncode == 0:
|
|
131
|
-
console.print(" ✓ spaCy model ready")
|
|
132
|
-
else:
|
|
133
|
-
console.print(
|
|
134
|
-
" ⚠ spaCy download failed — run manually: "
|
|
135
|
-
"python -m spacy download en_core_web_sm"
|
|
136
|
-
)
|
|
137
|
-
except Exception as e:
|
|
138
|
-
console.print(f" ⚠ spaCy: {e}")
|
|
135
|
+
cli_config.ensure_spacy_model()
|
|
139
136
|
|
|
140
137
|
|
|
141
138
|
@app.callback()
|
|
@@ -27,7 +27,13 @@ from typing import Any, Optional
|
|
|
27
27
|
import typer
|
|
28
28
|
from rich.console import Console
|
|
29
29
|
|
|
30
|
+
# Import reputation enrichment sources (used in Step 6.2–6.4)
|
|
31
|
+
from sources.domain_reputation import enrich_domain_entities
|
|
32
|
+
from sources.email_reputation import enrich_email_entities
|
|
33
|
+
from sources.hash_reputation import enrich_hash_entities
|
|
34
|
+
|
|
30
35
|
console = Console()
|
|
36
|
+
logger = logging.getLogger(__name__)
|
|
31
37
|
|
|
32
38
|
|
|
33
39
|
# ---------------------------------------------------------------------------
|
|
@@ -56,10 +62,13 @@ def run(
|
|
|
56
62
|
except Exception:
|
|
57
63
|
import subprocess, sys
|
|
58
64
|
from rich.console import Console
|
|
59
|
-
Console().print(
|
|
65
|
+
Console().print(
|
|
66
|
+
" [dim]→[/dim] Installing spaCy NER model (one-time)..."
|
|
67
|
+
)
|
|
60
68
|
subprocess.run(
|
|
61
|
-
[sys.executable, "-m", "spacy",
|
|
62
|
-
|
|
69
|
+
[sys.executable, "-m", "spacy",
|
|
70
|
+
"download", "en_core_web_sm"],
|
|
71
|
+
capture_output=True
|
|
63
72
|
)
|
|
64
73
|
|
|
65
74
|
if quiet:
|
|
@@ -330,6 +339,45 @@ async def _run_investigation(
|
|
|
330
339
|
except Exception as ip_exc:
|
|
331
340
|
console.print(f"[grey50]ip_reputation skipped: {ip_exc}[/grey50]")
|
|
332
341
|
|
|
342
|
+
# Step 6.2 — Domain reputation
|
|
343
|
+
try:
|
|
344
|
+
extraction_results = await enrich_domain_entities(
|
|
345
|
+
extraction_results, inv_uuid
|
|
346
|
+
)
|
|
347
|
+
display.update_step(
|
|
348
|
+
"Enriching domains",
|
|
349
|
+
"done",
|
|
350
|
+
f"{sum(1 for e in extraction_results if e.get('entity_type') == 'DOMAIN')} domains enriched",
|
|
351
|
+
)
|
|
352
|
+
except Exception as e:
|
|
353
|
+
logger.debug(f"Domain enrichment: {e}")
|
|
354
|
+
|
|
355
|
+
# Step 6.3 — Hash reputation
|
|
356
|
+
try:
|
|
357
|
+
extraction_results = await enrich_hash_entities(
|
|
358
|
+
extraction_results, inv_uuid
|
|
359
|
+
)
|
|
360
|
+
display.update_step(
|
|
361
|
+
"Enriching hashes",
|
|
362
|
+
"done",
|
|
363
|
+
"",
|
|
364
|
+
)
|
|
365
|
+
except Exception as e:
|
|
366
|
+
logger.debug(f"Hash enrichment: {e}")
|
|
367
|
+
|
|
368
|
+
# Step 6.4 — Email reputation
|
|
369
|
+
try:
|
|
370
|
+
extraction_results = await enrich_email_entities(
|
|
371
|
+
extraction_results, inv_uuid
|
|
372
|
+
)
|
|
373
|
+
display.update_step(
|
|
374
|
+
"Enriching emails",
|
|
375
|
+
"done",
|
|
376
|
+
"",
|
|
377
|
+
)
|
|
378
|
+
except Exception as e:
|
|
379
|
+
logger.debug(f"Email enrichment: {e}")
|
|
380
|
+
|
|
333
381
|
sources_used["enrichment"] = {"status": "ok", "count": len(enrichment_pages)}
|
|
334
382
|
display.update_step("Enriching intelligence", "ok", f"{len(enrichment_pages)} pages added")
|
|
335
383
|
except Exception as exc:
|
|
@@ -364,10 +412,10 @@ async def _run_investigation(
|
|
|
364
412
|
if llm is not None:
|
|
365
413
|
try:
|
|
366
414
|
from voidaccess.llm import generate_summary
|
|
367
|
-
|
|
368
|
-
if
|
|
415
|
+
pages_to_summarize = scraped_pages[:10]
|
|
416
|
+
if pages_to_summarize:
|
|
369
417
|
summary_text = await asyncio.to_thread(
|
|
370
|
-
generate_summary, llm, refined,
|
|
418
|
+
generate_summary, llm, refined, pages_to_summarize, "threat_intel"
|
|
371
419
|
)
|
|
372
420
|
display.update_step("Generating summary", "ok")
|
|
373
421
|
except Exception as exc:
|
|
@@ -434,6 +482,23 @@ async def _run_investigation(
|
|
|
434
482
|
}
|
|
435
483
|
)
|
|
436
484
|
|
|
485
|
+
# Close any cached aiohttp sessions so the event loop exits cleanly
|
|
486
|
+
# (otherwise aiohttp prints "Unclosed client session" warnings).
|
|
487
|
+
await _close_cached_sessions()
|
|
488
|
+
|
|
489
|
+
|
|
490
|
+
async def _close_cached_sessions() -> None:
|
|
491
|
+
try:
|
|
492
|
+
from scraper.scrape import close_cached_sessions as _close_scrape
|
|
493
|
+
await _close_scrape()
|
|
494
|
+
except Exception:
|
|
495
|
+
pass
|
|
496
|
+
try:
|
|
497
|
+
from search import close_search_session as _close_search
|
|
498
|
+
await _close_search()
|
|
499
|
+
except Exception:
|
|
500
|
+
pass
|
|
501
|
+
|
|
437
502
|
|
|
438
503
|
# ---------------------------------------------------------------------------
|
|
439
504
|
# Side-source helpers (each gracefully degrades if module missing/disabled)
|