voidaccess 1.4.5__tar.gz → 1.4.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {voidaccess-1.4.5/voidaccess.egg-info → voidaccess-1.4.7}/PKG-INFO +5 -1
- {voidaccess-1.4.5 → voidaccess-1.4.7}/README.md +4 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/pyproject.toml +1 -1
- {voidaccess-1.4.5 → voidaccess-1.4.7/voidaccess.egg-info}/PKG-INFO +5 -1
- {voidaccess-1.4.5 → voidaccess-1.4.7}/voidaccess_cli/__init__.py +1 -1
- {voidaccess-1.4.5 → voidaccess-1.4.7}/voidaccess_cli/adapters/sqlite.py +43 -1
- {voidaccess-1.4.5 → voidaccess-1.4.7}/voidaccess_cli/commands/investigate.py +73 -57
- {voidaccess-1.4.5 → voidaccess-1.4.7}/LICENSE +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/analysis/__init__.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/analysis/opsec.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/analysis/patterns.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/analysis/temporal.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/api/__init__.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/api/auth.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/api/main.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/api/routes/__init__.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/api/routes/admin.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/api/routes/auth.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/api/routes/entities.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/api/routes/export.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/api/routes/investigations.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/api/routes/monitors.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/api/routes/search.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/api/routes/settings.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/auth/__init__.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/auth/token_blacklist.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/config.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/crawler/__init__.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/crawler/dedup.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/crawler/frontier.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/crawler/spider.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/crawler/utils.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/db/__init__.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/db/migrations/__init__.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/db/migrations/env.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/db/migrations/versions/0001_initial_schema.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/db/migrations/versions/0002_add_investigation_status_column.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/db/migrations/versions/0002_add_missing_tables.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/db/migrations/versions/0003_add_canonical_value_and_entity_links.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/db/migrations/versions/0004_add_page_posted_at.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/db/migrations/versions/0005_add_extraction_method.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/db/migrations/versions/0006_add_monitor_alerts.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/db/migrations/versions/0007_add_actor_style_profiles.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/db/migrations/versions/0008_add_users_table.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/db/migrations/versions/0009_add_investigation_id_to_relationships.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/db/migrations/versions/0010_add_composite_index_entity_relationships.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/db/migrations/versions/0011_add_page_extraction_cache.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/db/migrations/versions/0013_add_graph_status.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/db/migrations/versions/0015_add_progress_fields.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/db/migrations/versions/0016_backfill_graph_status.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/db/migrations/versions/0017_add_user_api_keys.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/db/migrations/versions/0018_add_user_id_to_investigations.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/db/migrations/versions/0019_add_content_safety_log.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/db/migrations/versions/0020_add_entity_source_tracking.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/db/models.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/db/queries.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/db/session.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/export/__init__.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/export/misp.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/export/sigma.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/export/stix.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/extractor/__init__.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/extractor/llm_extract.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/extractor/ner.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/extractor/normalizer.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/extractor/pipeline.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/extractor/regex_patterns.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/fingerprint/__init__.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/fingerprint/profiler.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/fingerprint/stylometry.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/graph/__init__.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/graph/builder.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/graph/export.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/graph/model.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/graph/queries.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/graph/visualize.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/i18n/__init__.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/i18n/detect.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/i18n/query_expand.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/i18n/translate.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/monitor/__init__.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/monitor/_db.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/monitor/alerts.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/monitor/config.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/monitor/diff.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/monitor/jobs.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/monitor/scheduler.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/scraper/__init__.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/scraper/scrape.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/scraper/scrape_js.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/search/__init__.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/search/circuit_breaker.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/search/search.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/setup.cfg +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/sources/__init__.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/sources/blockchain.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/sources/cache.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/sources/cisa.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/sources/dns_enrichment.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/sources/domain_reputation.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/sources/email_reputation.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/sources/engines.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/sources/enrichment.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/sources/github_scraper.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/sources/gitlab_scraper.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/sources/hash_reputation.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/sources/historical_intel.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/sources/ip_reputation.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/sources/paste_scraper.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/sources/pastes.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/sources/rss_scraper.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/sources/seed_manager.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/sources/seeds.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/sources/shodan.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/sources/telegram.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/sources/virustotal.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/tests/test_analysis_opsec.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/tests/test_analysis_stylometry.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/tests/test_analysis_temporal.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/tests/test_api.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/tests/test_api_monitors.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/tests/test_blockchain.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/tests/test_config.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/tests/test_crawler.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/tests/test_db.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/tests/test_dns_enrichment.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/tests/test_domain_reputation.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/tests/test_email_reputation.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/tests/test_fingerprint.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/tests/test_github_scraper.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/tests/test_gitlab_scraper.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/tests/test_graph.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/tests/test_hash_reputation.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/tests/test_i18n.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/tests/test_ip_reputation.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/tests/test_llm.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/tests/test_llm_utils.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/tests/test_model_singleton.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/tests/test_monitor.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/tests/test_pagination.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/tests/test_paste_scraper.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/tests/test_rss_scraper.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/tests/test_scrape_js.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/tests/test_settings.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/tests/test_sources.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/tests/test_sources_enrichment_new.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/tests/test_vector.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/utils/__init__.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/utils/async_utils.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/utils/content_safety.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/utils/defang.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/utils/encryption.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/utils/ioc_freshness.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/utils/user_keys.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/vector/__init__.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/vector/embedder.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/vector/model_singleton.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/vector/search.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/vector/store.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/voidaccess/__init__.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/voidaccess/llm.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/voidaccess/llm_utils.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/voidaccess.egg-info/SOURCES.txt +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/voidaccess.egg-info/dependency_links.txt +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/voidaccess.egg-info/entry_points.txt +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/voidaccess.egg-info/requires.txt +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/voidaccess.egg-info/top_level.txt +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/voidaccess_cli/adapters/__init__.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/voidaccess_cli/browser.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/voidaccess_cli/commands/__init__.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/voidaccess_cli/commands/configure.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/voidaccess_cli/commands/enrich.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/voidaccess_cli/commands/export.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/voidaccess_cli/commands/show.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/voidaccess_cli/config.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/voidaccess_cli/display.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/voidaccess_cli/main.py +0 -0
- {voidaccess-1.4.5 → voidaccess-1.4.7}/voidaccess_cli/tor_detect.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: voidaccess
|
|
3
|
-
Version: 1.4.
|
|
3
|
+
Version: 1.4.7
|
|
4
4
|
Summary: Dark web OSINT CLI — automated threat intelligence from query to report
|
|
5
5
|
Author: VoidAccess
|
|
6
6
|
License-Expression: MIT
|
|
@@ -75,6 +75,10 @@ voidaccess configure
|
|
|
75
75
|
voidaccess investigate "LockBit ransomware"
|
|
76
76
|
```
|
|
77
77
|
|
|
78
|
+
<div align="center">
|
|
79
|
+
<img src="./public/cli_investigation_gif.gif" alt="VoidAccess CLI investigation walkthrough" width="900">
|
|
80
|
+
</div>
|
|
81
|
+
|
|
78
82
|
Requires local Tor for dark web sources:
|
|
79
83
|
|
|
80
84
|
- https://torproject.org
|
|
@@ -29,6 +29,10 @@ voidaccess configure
|
|
|
29
29
|
voidaccess investigate "LockBit ransomware"
|
|
30
30
|
```
|
|
31
31
|
|
|
32
|
+
<div align="center">
|
|
33
|
+
<img src="./public/cli_investigation_gif.gif" alt="VoidAccess CLI investigation walkthrough" width="900">
|
|
34
|
+
</div>
|
|
35
|
+
|
|
32
36
|
Requires local Tor for dark web sources:
|
|
33
37
|
|
|
34
38
|
- https://torproject.org
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: voidaccess
|
|
3
|
-
Version: 1.4.
|
|
3
|
+
Version: 1.4.7
|
|
4
4
|
Summary: Dark web OSINT CLI — automated threat intelligence from query to report
|
|
5
5
|
Author: VoidAccess
|
|
6
6
|
License-Expression: MIT
|
|
@@ -75,6 +75,10 @@ voidaccess configure
|
|
|
75
75
|
voidaccess investigate "LockBit ransomware"
|
|
76
76
|
```
|
|
77
77
|
|
|
78
|
+
<div align="center">
|
|
79
|
+
<img src="./public/cli_investigation_gif.gif" alt="VoidAccess CLI investigation walkthrough" width="900">
|
|
80
|
+
</div>
|
|
81
|
+
|
|
78
82
|
Requires local Tor for dark web sources:
|
|
79
83
|
|
|
80
84
|
- https://torproject.org
|
|
@@ -20,7 +20,7 @@ from __future__ import annotations
|
|
|
20
20
|
import json
|
|
21
21
|
import uuid
|
|
22
22
|
from datetime import datetime, timezone
|
|
23
|
-
from typing import Any, Optional
|
|
23
|
+
from typing import Any, Optional, Union
|
|
24
24
|
|
|
25
25
|
from sqlalchemy import text
|
|
26
26
|
|
|
@@ -53,6 +53,48 @@ def _serialize_dt(dt: Optional[datetime]) -> Optional[str]:
|
|
|
53
53
|
return dt.isoformat()
|
|
54
54
|
|
|
55
55
|
|
|
56
|
+
def _coerce_expires_at(expires_at: Union[str, datetime]) -> datetime:
|
|
57
|
+
"""SQLite returns TIMESTAMP columns as strings; normalize for comparisons."""
|
|
58
|
+
if isinstance(expires_at, str):
|
|
59
|
+
expires_at = datetime.fromisoformat(expires_at)
|
|
60
|
+
if expires_at.tzinfo is None:
|
|
61
|
+
expires_at = expires_at.replace(tzinfo=timezone.utc)
|
|
62
|
+
return expires_at
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def get_page_extraction_cache(page_hash: str) -> Optional[dict[str, list[str]]]:
|
|
66
|
+
"""Load cached LLM extraction results when present and not expired."""
|
|
67
|
+
try:
|
|
68
|
+
from db.session import get_session
|
|
69
|
+
except Exception:
|
|
70
|
+
return None
|
|
71
|
+
|
|
72
|
+
try:
|
|
73
|
+
with get_session() as session:
|
|
74
|
+
row = session.execute(
|
|
75
|
+
text(
|
|
76
|
+
"""
|
|
77
|
+
SELECT entities_json, expires_at
|
|
78
|
+
FROM page_extraction_cache
|
|
79
|
+
WHERE page_hash = :page_hash
|
|
80
|
+
"""
|
|
81
|
+
),
|
|
82
|
+
{"page_hash": page_hash},
|
|
83
|
+
).fetchone()
|
|
84
|
+
|
|
85
|
+
if row is None:
|
|
86
|
+
return None
|
|
87
|
+
|
|
88
|
+
entities_json, expires_at = row[0], row[1]
|
|
89
|
+
expires_at = _coerce_expires_at(expires_at)
|
|
90
|
+
if expires_at < datetime.now(timezone.utc):
|
|
91
|
+
return None
|
|
92
|
+
|
|
93
|
+
return json.loads(entities_json)
|
|
94
|
+
except Exception:
|
|
95
|
+
return None
|
|
96
|
+
|
|
97
|
+
|
|
56
98
|
def save_investigation(
|
|
57
99
|
query: str,
|
|
58
100
|
refined_query: Optional[str] = None,
|
|
@@ -123,6 +123,24 @@ DEPTH_PRESETS = {
|
|
|
123
123
|
"deep": {"top_n": 40, "max_workers": 8, "extract_concurrency": 6},
|
|
124
124
|
}
|
|
125
125
|
|
|
126
|
+
# Pages kept after LLM relevance filter (must match voidaccess.llm.filter_results cap).
|
|
127
|
+
LLM_FILTER_TOP_N = 15
|
|
128
|
+
|
|
129
|
+
INVESTIGATION_STEPS = [
|
|
130
|
+
"Refining query",
|
|
131
|
+
"Searching dark web",
|
|
132
|
+
"Filtering results",
|
|
133
|
+
"Scraping pages",
|
|
134
|
+
"Extracting entities",
|
|
135
|
+
"Enriching intelligence",
|
|
136
|
+
"Enriching domains",
|
|
137
|
+
"Enriching hashes",
|
|
138
|
+
"Enriching emails",
|
|
139
|
+
"Building graph",
|
|
140
|
+
"Generating summary",
|
|
141
|
+
"Finalizing results",
|
|
142
|
+
]
|
|
143
|
+
|
|
126
144
|
|
|
127
145
|
async def _run_investigation(
|
|
128
146
|
query: str,
|
|
@@ -142,10 +160,11 @@ async def _run_investigation(
|
|
|
142
160
|
cfg = cli_config.load_config()
|
|
143
161
|
preset = DEPTH_PRESETS[depth]
|
|
144
162
|
display = InvestigationDisplay(quiet=quiet)
|
|
145
|
-
display.start(query)
|
|
163
|
+
display.start(query, steps=INVESTIGATION_STEPS)
|
|
146
164
|
|
|
147
165
|
# --- DB init ----------------------------------------------------------
|
|
148
166
|
sqlite_adapter.init_db()
|
|
167
|
+
_patch_llm_extraction_cache(sqlite_adapter)
|
|
149
168
|
|
|
150
169
|
# --- Tor preflight ----------------------------------------------------
|
|
151
170
|
tor_proxy: Optional[str] = None
|
|
@@ -244,19 +263,19 @@ async def _run_investigation(
|
|
|
244
263
|
|
|
245
264
|
# --- Step 3 — filter results ------------------------------------------
|
|
246
265
|
display.update_step("Filtering results", "active")
|
|
247
|
-
|
|
248
|
-
filtered_links = search_links[:
|
|
266
|
+
filter_top_n = LLM_FILTER_TOP_N
|
|
267
|
+
filtered_links = search_links[: filter_top_n * 2] if search_links else []
|
|
249
268
|
if llm is not None and search_links:
|
|
250
269
|
try:
|
|
251
270
|
from voidaccess.llm import filter_results
|
|
252
271
|
filtered_links = await asyncio.to_thread(filter_results, llm, refined, search_links) or search_links
|
|
253
|
-
filtered_links = filtered_links[:
|
|
272
|
+
filtered_links = filtered_links[:filter_top_n]
|
|
254
273
|
display.update_step("Filtering results", "ok", f"top {len(filtered_links)}")
|
|
255
274
|
except Exception as exc:
|
|
256
275
|
display.update_step("Filtering results", "fail", str(exc))
|
|
257
|
-
filtered_links = search_links[:
|
|
276
|
+
filtered_links = search_links[:filter_top_n]
|
|
258
277
|
else:
|
|
259
|
-
filtered_links = (search_links or [])[:
|
|
278
|
+
filtered_links = (search_links or [])[:filter_top_n]
|
|
260
279
|
display.update_step("Filtering results", "skip" if no_llm else "ok", f"{len(filtered_links)} kept")
|
|
261
280
|
|
|
262
281
|
# --- Step 4 — scrape pages -------------------------------------------
|
|
@@ -291,7 +310,7 @@ async def _run_investigation(
|
|
|
291
310
|
for extra in (paste_pages, github_pages, gitlab_pages, rss_pages):
|
|
292
311
|
for page in extra:
|
|
293
312
|
url = page.get("url") or page.get("link")
|
|
294
|
-
text = page.get("text") or page.get("content") or page.get("cleaned_text") or ""
|
|
313
|
+
text = page.get("text") or page.get("content") or page.get("cleaned_text") or page.get("text_content") or ""
|
|
295
314
|
if not url or not text:
|
|
296
315
|
continue
|
|
297
316
|
scraped_pages.append({"url": url, "text": text, "source": page.get("source", "clearnet")})
|
|
@@ -322,69 +341,57 @@ async def _run_investigation(
|
|
|
322
341
|
except Exception as exc:
|
|
323
342
|
display.update_step("Extracting entities", "fail", str(exc))
|
|
324
343
|
|
|
325
|
-
# --- Step 6 — enrich intelligence
|
|
344
|
+
# --- Step 6 — enrich intelligence (OTX + IP) ---------------------------
|
|
326
345
|
display.update_step("Enriching intelligence", "active")
|
|
327
346
|
enrichment_pages: list[dict] = []
|
|
328
347
|
try:
|
|
329
348
|
from sources.enrichment import enrich_investigation as _enrich_inv
|
|
330
349
|
otx_key = os.getenv("OTX_API_KEY", "") or ""
|
|
331
|
-
# Build entity dicts for sources that take them
|
|
332
350
|
entity_dicts = sqlite_adapter.get_entities(investigation_id)
|
|
333
351
|
enrichment_pages = await _enrich_inv(refined, otx_api_key=otx_key, entities=entity_dicts)
|
|
334
|
-
|
|
335
|
-
# IP reputation pass — re-uses sources.ip_reputation
|
|
336
|
-
try:
|
|
337
|
-
from sources.ip_reputation import enrich_ip_entities
|
|
338
|
-
await enrich_ip_entities(extraction_results, investigation_id=inv_uuid)
|
|
339
|
-
except Exception as ip_exc:
|
|
340
|
-
console.print(f"[grey50]ip_reputation skipped: {ip_exc}[/grey50]")
|
|
341
|
-
|
|
342
|
-
# Step 6.2 — Domain reputation
|
|
343
|
-
try:
|
|
344
|
-
extraction_results = await enrich_domain_entities(
|
|
345
|
-
extraction_results, inv_uuid
|
|
346
|
-
)
|
|
347
|
-
display.update_step(
|
|
348
|
-
"Enriching domains",
|
|
349
|
-
"done",
|
|
350
|
-
f"{sum(1 for e in extraction_results if e.get('entity_type') == 'DOMAIN')} domains enriched",
|
|
351
|
-
)
|
|
352
|
-
except Exception as e:
|
|
353
|
-
logger.debug(f"Domain enrichment: {e}")
|
|
354
|
-
|
|
355
|
-
# Step 6.3 — Hash reputation
|
|
356
|
-
try:
|
|
357
|
-
extraction_results = await enrich_hash_entities(
|
|
358
|
-
extraction_results, inv_uuid
|
|
359
|
-
)
|
|
360
|
-
display.update_step(
|
|
361
|
-
"Enriching hashes",
|
|
362
|
-
"done",
|
|
363
|
-
"",
|
|
364
|
-
)
|
|
365
|
-
except Exception as e:
|
|
366
|
-
logger.debug(f"Hash enrichment: {e}")
|
|
367
|
-
|
|
368
|
-
# Step 6.4 — Email reputation
|
|
369
|
-
try:
|
|
370
|
-
extraction_results = await enrich_email_entities(
|
|
371
|
-
extraction_results, inv_uuid
|
|
372
|
-
)
|
|
373
|
-
display.update_step(
|
|
374
|
-
"Enriching emails",
|
|
375
|
-
"done",
|
|
376
|
-
"",
|
|
377
|
-
)
|
|
378
|
-
except Exception as e:
|
|
379
|
-
logger.debug(f"Email enrichment: {e}")
|
|
380
|
-
|
|
381
352
|
sources_used["enrichment"] = {"status": "ok", "count": len(enrichment_pages)}
|
|
382
353
|
display.update_step("Enriching intelligence", "ok", f"{len(enrichment_pages)} pages added")
|
|
383
354
|
except Exception as exc:
|
|
384
355
|
sources_used["enrichment"] = {"status": "fail", "error": str(exc)}
|
|
385
356
|
display.update_step("Enriching intelligence", "fail", str(exc))
|
|
386
357
|
|
|
387
|
-
|
|
358
|
+
try:
|
|
359
|
+
from sources.ip_reputation import enrich_ip_entities
|
|
360
|
+
await enrich_ip_entities(extraction_results, investigation_id=inv_uuid)
|
|
361
|
+
except Exception as ip_exc:
|
|
362
|
+
logger.debug("ip_reputation skipped: %s", ip_exc)
|
|
363
|
+
|
|
364
|
+
# --- Step 6.2–6.4 — domain / hash / email (before graph) -------------
|
|
365
|
+
display.update_step("Enriching domains", "active")
|
|
366
|
+
try:
|
|
367
|
+
extraction_results = await enrich_domain_entities(extraction_results, inv_uuid)
|
|
368
|
+
domain_count = sum(
|
|
369
|
+
1
|
|
370
|
+
for e in sqlite_adapter.get_entities(investigation_id)
|
|
371
|
+
if (e.get("entity_type") or "").upper() == "DOMAIN"
|
|
372
|
+
)
|
|
373
|
+
detail = f"{domain_count} domains enriched" if domain_count else ""
|
|
374
|
+
display.update_step("Enriching domains", "ok", detail)
|
|
375
|
+
except Exception as exc:
|
|
376
|
+
logger.debug("Domain enrichment: %s", exc)
|
|
377
|
+
display.update_step("Enriching domains", "fail", str(exc))
|
|
378
|
+
|
|
379
|
+
display.update_step("Enriching hashes", "active")
|
|
380
|
+
try:
|
|
381
|
+
extraction_results = await enrich_hash_entities(extraction_results, inv_uuid)
|
|
382
|
+
display.update_step("Enriching hashes", "ok")
|
|
383
|
+
except Exception as exc:
|
|
384
|
+
logger.debug("Hash enrichment: %s", exc)
|
|
385
|
+
display.update_step("Enriching hashes", "fail", str(exc))
|
|
386
|
+
|
|
387
|
+
display.update_step("Enriching emails", "active")
|
|
388
|
+
try:
|
|
389
|
+
extraction_results = await enrich_email_entities(extraction_results, inv_uuid)
|
|
390
|
+
display.update_step("Enriching emails", "ok")
|
|
391
|
+
except Exception as exc:
|
|
392
|
+
logger.debug("Email enrichment: %s", exc)
|
|
393
|
+
display.update_step("Enriching emails", "fail", str(exc))
|
|
394
|
+
|
|
388
395
|
if enrichment_pages:
|
|
389
396
|
try:
|
|
390
397
|
from extractor.pipeline import extract_entities_from_pages as _extr2
|
|
@@ -487,6 +494,15 @@ async def _run_investigation(
|
|
|
487
494
|
await _close_cached_sessions()
|
|
488
495
|
|
|
489
496
|
|
|
497
|
+
def _patch_llm_extraction_cache(sqlite_adapter: Any) -> None:
|
|
498
|
+
"""Use sqlite adapter for cache reads (naive ISO strings from SQLite)."""
|
|
499
|
+
try:
|
|
500
|
+
import extractor.llm_extract as llm_extract
|
|
501
|
+
except Exception:
|
|
502
|
+
return
|
|
503
|
+
llm_extract._load_from_cache = sqlite_adapter.get_page_extraction_cache
|
|
504
|
+
|
|
505
|
+
|
|
490
506
|
async def _close_cached_sessions() -> None:
|
|
491
507
|
try:
|
|
492
508
|
from scraper.scrape import close_cached_sessions as _close_scrape
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{voidaccess-1.4.5 → voidaccess-1.4.7}/db/migrations/versions/0002_add_investigation_status_column.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{voidaccess-1.4.5 → voidaccess-1.4.7}/db/migrations/versions/0007_add_actor_style_profiles.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{voidaccess-1.4.5 → voidaccess-1.4.7}/db/migrations/versions/0011_add_page_extraction_cache.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{voidaccess-1.4.5 → voidaccess-1.4.7}/db/migrations/versions/0018_add_user_id_to_investigations.py
RENAMED
|
File without changes
|
|
File without changes
|
{voidaccess-1.4.5 → voidaccess-1.4.7}/db/migrations/versions/0020_add_entity_source_tracking.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|