voidaccess 1.4.5__tar.gz → 1.4.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (178) hide show
  1. {voidaccess-1.4.5/voidaccess.egg-info → voidaccess-1.4.6}/PKG-INFO +1 -1
  2. {voidaccess-1.4.5 → voidaccess-1.4.6}/pyproject.toml +1 -1
  3. {voidaccess-1.4.5 → voidaccess-1.4.6/voidaccess.egg-info}/PKG-INFO +1 -1
  4. {voidaccess-1.4.5 → voidaccess-1.4.6}/voidaccess_cli/adapters/sqlite.py +43 -1
  5. {voidaccess-1.4.5 → voidaccess-1.4.6}/voidaccess_cli/commands/investigate.py +72 -56
  6. {voidaccess-1.4.5 → voidaccess-1.4.6}/LICENSE +0 -0
  7. {voidaccess-1.4.5 → voidaccess-1.4.6}/README.md +0 -0
  8. {voidaccess-1.4.5 → voidaccess-1.4.6}/analysis/__init__.py +0 -0
  9. {voidaccess-1.4.5 → voidaccess-1.4.6}/analysis/opsec.py +0 -0
  10. {voidaccess-1.4.5 → voidaccess-1.4.6}/analysis/patterns.py +0 -0
  11. {voidaccess-1.4.5 → voidaccess-1.4.6}/analysis/temporal.py +0 -0
  12. {voidaccess-1.4.5 → voidaccess-1.4.6}/api/__init__.py +0 -0
  13. {voidaccess-1.4.5 → voidaccess-1.4.6}/api/auth.py +0 -0
  14. {voidaccess-1.4.5 → voidaccess-1.4.6}/api/main.py +0 -0
  15. {voidaccess-1.4.5 → voidaccess-1.4.6}/api/routes/__init__.py +0 -0
  16. {voidaccess-1.4.5 → voidaccess-1.4.6}/api/routes/admin.py +0 -0
  17. {voidaccess-1.4.5 → voidaccess-1.4.6}/api/routes/auth.py +0 -0
  18. {voidaccess-1.4.5 → voidaccess-1.4.6}/api/routes/entities.py +0 -0
  19. {voidaccess-1.4.5 → voidaccess-1.4.6}/api/routes/export.py +0 -0
  20. {voidaccess-1.4.5 → voidaccess-1.4.6}/api/routes/investigations.py +0 -0
  21. {voidaccess-1.4.5 → voidaccess-1.4.6}/api/routes/monitors.py +0 -0
  22. {voidaccess-1.4.5 → voidaccess-1.4.6}/api/routes/search.py +0 -0
  23. {voidaccess-1.4.5 → voidaccess-1.4.6}/api/routes/settings.py +0 -0
  24. {voidaccess-1.4.5 → voidaccess-1.4.6}/auth/__init__.py +0 -0
  25. {voidaccess-1.4.5 → voidaccess-1.4.6}/auth/token_blacklist.py +0 -0
  26. {voidaccess-1.4.5 → voidaccess-1.4.6}/config.py +0 -0
  27. {voidaccess-1.4.5 → voidaccess-1.4.6}/crawler/__init__.py +0 -0
  28. {voidaccess-1.4.5 → voidaccess-1.4.6}/crawler/dedup.py +0 -0
  29. {voidaccess-1.4.5 → voidaccess-1.4.6}/crawler/frontier.py +0 -0
  30. {voidaccess-1.4.5 → voidaccess-1.4.6}/crawler/spider.py +0 -0
  31. {voidaccess-1.4.5 → voidaccess-1.4.6}/crawler/utils.py +0 -0
  32. {voidaccess-1.4.5 → voidaccess-1.4.6}/db/__init__.py +0 -0
  33. {voidaccess-1.4.5 → voidaccess-1.4.6}/db/migrations/__init__.py +0 -0
  34. {voidaccess-1.4.5 → voidaccess-1.4.6}/db/migrations/env.py +0 -0
  35. {voidaccess-1.4.5 → voidaccess-1.4.6}/db/migrations/versions/0001_initial_schema.py +0 -0
  36. {voidaccess-1.4.5 → voidaccess-1.4.6}/db/migrations/versions/0002_add_investigation_status_column.py +0 -0
  37. {voidaccess-1.4.5 → voidaccess-1.4.6}/db/migrations/versions/0002_add_missing_tables.py +0 -0
  38. {voidaccess-1.4.5 → voidaccess-1.4.6}/db/migrations/versions/0003_add_canonical_value_and_entity_links.py +0 -0
  39. {voidaccess-1.4.5 → voidaccess-1.4.6}/db/migrations/versions/0004_add_page_posted_at.py +0 -0
  40. {voidaccess-1.4.5 → voidaccess-1.4.6}/db/migrations/versions/0005_add_extraction_method.py +0 -0
  41. {voidaccess-1.4.5 → voidaccess-1.4.6}/db/migrations/versions/0006_add_monitor_alerts.py +0 -0
  42. {voidaccess-1.4.5 → voidaccess-1.4.6}/db/migrations/versions/0007_add_actor_style_profiles.py +0 -0
  43. {voidaccess-1.4.5 → voidaccess-1.4.6}/db/migrations/versions/0008_add_users_table.py +0 -0
  44. {voidaccess-1.4.5 → voidaccess-1.4.6}/db/migrations/versions/0009_add_investigation_id_to_relationships.py +0 -0
  45. {voidaccess-1.4.5 → voidaccess-1.4.6}/db/migrations/versions/0010_add_composite_index_entity_relationships.py +0 -0
  46. {voidaccess-1.4.5 → voidaccess-1.4.6}/db/migrations/versions/0011_add_page_extraction_cache.py +0 -0
  47. {voidaccess-1.4.5 → voidaccess-1.4.6}/db/migrations/versions/0013_add_graph_status.py +0 -0
  48. {voidaccess-1.4.5 → voidaccess-1.4.6}/db/migrations/versions/0015_add_progress_fields.py +0 -0
  49. {voidaccess-1.4.5 → voidaccess-1.4.6}/db/migrations/versions/0016_backfill_graph_status.py +0 -0
  50. {voidaccess-1.4.5 → voidaccess-1.4.6}/db/migrations/versions/0017_add_user_api_keys.py +0 -0
  51. {voidaccess-1.4.5 → voidaccess-1.4.6}/db/migrations/versions/0018_add_user_id_to_investigations.py +0 -0
  52. {voidaccess-1.4.5 → voidaccess-1.4.6}/db/migrations/versions/0019_add_content_safety_log.py +0 -0
  53. {voidaccess-1.4.5 → voidaccess-1.4.6}/db/migrations/versions/0020_add_entity_source_tracking.py +0 -0
  54. {voidaccess-1.4.5 → voidaccess-1.4.6}/db/models.py +0 -0
  55. {voidaccess-1.4.5 → voidaccess-1.4.6}/db/queries.py +0 -0
  56. {voidaccess-1.4.5 → voidaccess-1.4.6}/db/session.py +0 -0
  57. {voidaccess-1.4.5 → voidaccess-1.4.6}/export/__init__.py +0 -0
  58. {voidaccess-1.4.5 → voidaccess-1.4.6}/export/misp.py +0 -0
  59. {voidaccess-1.4.5 → voidaccess-1.4.6}/export/sigma.py +0 -0
  60. {voidaccess-1.4.5 → voidaccess-1.4.6}/export/stix.py +0 -0
  61. {voidaccess-1.4.5 → voidaccess-1.4.6}/extractor/__init__.py +0 -0
  62. {voidaccess-1.4.5 → voidaccess-1.4.6}/extractor/llm_extract.py +0 -0
  63. {voidaccess-1.4.5 → voidaccess-1.4.6}/extractor/ner.py +0 -0
  64. {voidaccess-1.4.5 → voidaccess-1.4.6}/extractor/normalizer.py +0 -0
  65. {voidaccess-1.4.5 → voidaccess-1.4.6}/extractor/pipeline.py +0 -0
  66. {voidaccess-1.4.5 → voidaccess-1.4.6}/extractor/regex_patterns.py +0 -0
  67. {voidaccess-1.4.5 → voidaccess-1.4.6}/fingerprint/__init__.py +0 -0
  68. {voidaccess-1.4.5 → voidaccess-1.4.6}/fingerprint/profiler.py +0 -0
  69. {voidaccess-1.4.5 → voidaccess-1.4.6}/fingerprint/stylometry.py +0 -0
  70. {voidaccess-1.4.5 → voidaccess-1.4.6}/graph/__init__.py +0 -0
  71. {voidaccess-1.4.5 → voidaccess-1.4.6}/graph/builder.py +0 -0
  72. {voidaccess-1.4.5 → voidaccess-1.4.6}/graph/export.py +0 -0
  73. {voidaccess-1.4.5 → voidaccess-1.4.6}/graph/model.py +0 -0
  74. {voidaccess-1.4.5 → voidaccess-1.4.6}/graph/queries.py +0 -0
  75. {voidaccess-1.4.5 → voidaccess-1.4.6}/graph/visualize.py +0 -0
  76. {voidaccess-1.4.5 → voidaccess-1.4.6}/i18n/__init__.py +0 -0
  77. {voidaccess-1.4.5 → voidaccess-1.4.6}/i18n/detect.py +0 -0
  78. {voidaccess-1.4.5 → voidaccess-1.4.6}/i18n/query_expand.py +0 -0
  79. {voidaccess-1.4.5 → voidaccess-1.4.6}/i18n/translate.py +0 -0
  80. {voidaccess-1.4.5 → voidaccess-1.4.6}/monitor/__init__.py +0 -0
  81. {voidaccess-1.4.5 → voidaccess-1.4.6}/monitor/_db.py +0 -0
  82. {voidaccess-1.4.5 → voidaccess-1.4.6}/monitor/alerts.py +0 -0
  83. {voidaccess-1.4.5 → voidaccess-1.4.6}/monitor/config.py +0 -0
  84. {voidaccess-1.4.5 → voidaccess-1.4.6}/monitor/diff.py +0 -0
  85. {voidaccess-1.4.5 → voidaccess-1.4.6}/monitor/jobs.py +0 -0
  86. {voidaccess-1.4.5 → voidaccess-1.4.6}/monitor/scheduler.py +0 -0
  87. {voidaccess-1.4.5 → voidaccess-1.4.6}/scraper/__init__.py +0 -0
  88. {voidaccess-1.4.5 → voidaccess-1.4.6}/scraper/scrape.py +0 -0
  89. {voidaccess-1.4.5 → voidaccess-1.4.6}/scraper/scrape_js.py +0 -0
  90. {voidaccess-1.4.5 → voidaccess-1.4.6}/search/__init__.py +0 -0
  91. {voidaccess-1.4.5 → voidaccess-1.4.6}/search/circuit_breaker.py +0 -0
  92. {voidaccess-1.4.5 → voidaccess-1.4.6}/search/search.py +0 -0
  93. {voidaccess-1.4.5 → voidaccess-1.4.6}/setup.cfg +0 -0
  94. {voidaccess-1.4.5 → voidaccess-1.4.6}/sources/__init__.py +0 -0
  95. {voidaccess-1.4.5 → voidaccess-1.4.6}/sources/blockchain.py +0 -0
  96. {voidaccess-1.4.5 → voidaccess-1.4.6}/sources/cache.py +0 -0
  97. {voidaccess-1.4.5 → voidaccess-1.4.6}/sources/cisa.py +0 -0
  98. {voidaccess-1.4.5 → voidaccess-1.4.6}/sources/dns_enrichment.py +0 -0
  99. {voidaccess-1.4.5 → voidaccess-1.4.6}/sources/domain_reputation.py +0 -0
  100. {voidaccess-1.4.5 → voidaccess-1.4.6}/sources/email_reputation.py +0 -0
  101. {voidaccess-1.4.5 → voidaccess-1.4.6}/sources/engines.py +0 -0
  102. {voidaccess-1.4.5 → voidaccess-1.4.6}/sources/enrichment.py +0 -0
  103. {voidaccess-1.4.5 → voidaccess-1.4.6}/sources/github_scraper.py +0 -0
  104. {voidaccess-1.4.5 → voidaccess-1.4.6}/sources/gitlab_scraper.py +0 -0
  105. {voidaccess-1.4.5 → voidaccess-1.4.6}/sources/hash_reputation.py +0 -0
  106. {voidaccess-1.4.5 → voidaccess-1.4.6}/sources/historical_intel.py +0 -0
  107. {voidaccess-1.4.5 → voidaccess-1.4.6}/sources/ip_reputation.py +0 -0
  108. {voidaccess-1.4.5 → voidaccess-1.4.6}/sources/paste_scraper.py +0 -0
  109. {voidaccess-1.4.5 → voidaccess-1.4.6}/sources/pastes.py +0 -0
  110. {voidaccess-1.4.5 → voidaccess-1.4.6}/sources/rss_scraper.py +0 -0
  111. {voidaccess-1.4.5 → voidaccess-1.4.6}/sources/seed_manager.py +0 -0
  112. {voidaccess-1.4.5 → voidaccess-1.4.6}/sources/seeds.py +0 -0
  113. {voidaccess-1.4.5 → voidaccess-1.4.6}/sources/shodan.py +0 -0
  114. {voidaccess-1.4.5 → voidaccess-1.4.6}/sources/telegram.py +0 -0
  115. {voidaccess-1.4.5 → voidaccess-1.4.6}/sources/virustotal.py +0 -0
  116. {voidaccess-1.4.5 → voidaccess-1.4.6}/tests/test_analysis_opsec.py +0 -0
  117. {voidaccess-1.4.5 → voidaccess-1.4.6}/tests/test_analysis_stylometry.py +0 -0
  118. {voidaccess-1.4.5 → voidaccess-1.4.6}/tests/test_analysis_temporal.py +0 -0
  119. {voidaccess-1.4.5 → voidaccess-1.4.6}/tests/test_api.py +0 -0
  120. {voidaccess-1.4.5 → voidaccess-1.4.6}/tests/test_api_monitors.py +0 -0
  121. {voidaccess-1.4.5 → voidaccess-1.4.6}/tests/test_blockchain.py +0 -0
  122. {voidaccess-1.4.5 → voidaccess-1.4.6}/tests/test_config.py +0 -0
  123. {voidaccess-1.4.5 → voidaccess-1.4.6}/tests/test_crawler.py +0 -0
  124. {voidaccess-1.4.5 → voidaccess-1.4.6}/tests/test_db.py +0 -0
  125. {voidaccess-1.4.5 → voidaccess-1.4.6}/tests/test_dns_enrichment.py +0 -0
  126. {voidaccess-1.4.5 → voidaccess-1.4.6}/tests/test_domain_reputation.py +0 -0
  127. {voidaccess-1.4.5 → voidaccess-1.4.6}/tests/test_email_reputation.py +0 -0
  128. {voidaccess-1.4.5 → voidaccess-1.4.6}/tests/test_fingerprint.py +0 -0
  129. {voidaccess-1.4.5 → voidaccess-1.4.6}/tests/test_github_scraper.py +0 -0
  130. {voidaccess-1.4.5 → voidaccess-1.4.6}/tests/test_gitlab_scraper.py +0 -0
  131. {voidaccess-1.4.5 → voidaccess-1.4.6}/tests/test_graph.py +0 -0
  132. {voidaccess-1.4.5 → voidaccess-1.4.6}/tests/test_hash_reputation.py +0 -0
  133. {voidaccess-1.4.5 → voidaccess-1.4.6}/tests/test_i18n.py +0 -0
  134. {voidaccess-1.4.5 → voidaccess-1.4.6}/tests/test_ip_reputation.py +0 -0
  135. {voidaccess-1.4.5 → voidaccess-1.4.6}/tests/test_llm.py +0 -0
  136. {voidaccess-1.4.5 → voidaccess-1.4.6}/tests/test_llm_utils.py +0 -0
  137. {voidaccess-1.4.5 → voidaccess-1.4.6}/tests/test_model_singleton.py +0 -0
  138. {voidaccess-1.4.5 → voidaccess-1.4.6}/tests/test_monitor.py +0 -0
  139. {voidaccess-1.4.5 → voidaccess-1.4.6}/tests/test_pagination.py +0 -0
  140. {voidaccess-1.4.5 → voidaccess-1.4.6}/tests/test_paste_scraper.py +0 -0
  141. {voidaccess-1.4.5 → voidaccess-1.4.6}/tests/test_rss_scraper.py +0 -0
  142. {voidaccess-1.4.5 → voidaccess-1.4.6}/tests/test_scrape_js.py +0 -0
  143. {voidaccess-1.4.5 → voidaccess-1.4.6}/tests/test_settings.py +0 -0
  144. {voidaccess-1.4.5 → voidaccess-1.4.6}/tests/test_sources.py +0 -0
  145. {voidaccess-1.4.5 → voidaccess-1.4.6}/tests/test_sources_enrichment_new.py +0 -0
  146. {voidaccess-1.4.5 → voidaccess-1.4.6}/tests/test_vector.py +0 -0
  147. {voidaccess-1.4.5 → voidaccess-1.4.6}/utils/__init__.py +0 -0
  148. {voidaccess-1.4.5 → voidaccess-1.4.6}/utils/async_utils.py +0 -0
  149. {voidaccess-1.4.5 → voidaccess-1.4.6}/utils/content_safety.py +0 -0
  150. {voidaccess-1.4.5 → voidaccess-1.4.6}/utils/defang.py +0 -0
  151. {voidaccess-1.4.5 → voidaccess-1.4.6}/utils/encryption.py +0 -0
  152. {voidaccess-1.4.5 → voidaccess-1.4.6}/utils/ioc_freshness.py +0 -0
  153. {voidaccess-1.4.5 → voidaccess-1.4.6}/utils/user_keys.py +0 -0
  154. {voidaccess-1.4.5 → voidaccess-1.4.6}/vector/__init__.py +0 -0
  155. {voidaccess-1.4.5 → voidaccess-1.4.6}/vector/embedder.py +0 -0
  156. {voidaccess-1.4.5 → voidaccess-1.4.6}/vector/model_singleton.py +0 -0
  157. {voidaccess-1.4.5 → voidaccess-1.4.6}/vector/search.py +0 -0
  158. {voidaccess-1.4.5 → voidaccess-1.4.6}/vector/store.py +0 -0
  159. {voidaccess-1.4.5 → voidaccess-1.4.6}/voidaccess/__init__.py +0 -0
  160. {voidaccess-1.4.5 → voidaccess-1.4.6}/voidaccess/llm.py +0 -0
  161. {voidaccess-1.4.5 → voidaccess-1.4.6}/voidaccess/llm_utils.py +0 -0
  162. {voidaccess-1.4.5 → voidaccess-1.4.6}/voidaccess.egg-info/SOURCES.txt +0 -0
  163. {voidaccess-1.4.5 → voidaccess-1.4.6}/voidaccess.egg-info/dependency_links.txt +0 -0
  164. {voidaccess-1.4.5 → voidaccess-1.4.6}/voidaccess.egg-info/entry_points.txt +0 -0
  165. {voidaccess-1.4.5 → voidaccess-1.4.6}/voidaccess.egg-info/requires.txt +0 -0
  166. {voidaccess-1.4.5 → voidaccess-1.4.6}/voidaccess.egg-info/top_level.txt +0 -0
  167. {voidaccess-1.4.5 → voidaccess-1.4.6}/voidaccess_cli/__init__.py +0 -0
  168. {voidaccess-1.4.5 → voidaccess-1.4.6}/voidaccess_cli/adapters/__init__.py +0 -0
  169. {voidaccess-1.4.5 → voidaccess-1.4.6}/voidaccess_cli/browser.py +0 -0
  170. {voidaccess-1.4.5 → voidaccess-1.4.6}/voidaccess_cli/commands/__init__.py +0 -0
  171. {voidaccess-1.4.5 → voidaccess-1.4.6}/voidaccess_cli/commands/configure.py +0 -0
  172. {voidaccess-1.4.5 → voidaccess-1.4.6}/voidaccess_cli/commands/enrich.py +0 -0
  173. {voidaccess-1.4.5 → voidaccess-1.4.6}/voidaccess_cli/commands/export.py +0 -0
  174. {voidaccess-1.4.5 → voidaccess-1.4.6}/voidaccess_cli/commands/show.py +0 -0
  175. {voidaccess-1.4.5 → voidaccess-1.4.6}/voidaccess_cli/config.py +0 -0
  176. {voidaccess-1.4.5 → voidaccess-1.4.6}/voidaccess_cli/display.py +0 -0
  177. {voidaccess-1.4.5 → voidaccess-1.4.6}/voidaccess_cli/main.py +0 -0
  178. {voidaccess-1.4.5 → voidaccess-1.4.6}/voidaccess_cli/tor_detect.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: voidaccess
3
- Version: 1.4.5
3
+ Version: 1.4.6
4
4
  Summary: Dark web OSINT CLI — automated threat intelligence from query to report
5
5
  Author: VoidAccess
6
6
  License-Expression: MIT
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "voidaccess"
7
- version = "1.4.5"
7
+ version = "1.4.6"
8
8
  description = "Dark web OSINT CLI — automated threat intelligence from query to report"
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: voidaccess
3
- Version: 1.4.5
3
+ Version: 1.4.6
4
4
  Summary: Dark web OSINT CLI — automated threat intelligence from query to report
5
5
  Author: VoidAccess
6
6
  License-Expression: MIT
@@ -20,7 +20,7 @@ from __future__ import annotations
20
20
  import json
21
21
  import uuid
22
22
  from datetime import datetime, timezone
23
- from typing import Any, Optional
23
+ from typing import Any, Optional, Union
24
24
 
25
25
  from sqlalchemy import text
26
26
 
@@ -53,6 +53,48 @@ def _serialize_dt(dt: Optional[datetime]) -> Optional[str]:
53
53
  return dt.isoformat()
54
54
 
55
55
 
56
+ def _coerce_expires_at(expires_at: Union[str, datetime]) -> datetime:
57
+ """SQLite returns TIMESTAMP columns as strings; normalize for comparisons."""
58
+ if isinstance(expires_at, str):
59
+ expires_at = datetime.fromisoformat(expires_at)
60
+ if expires_at.tzinfo is None:
61
+ expires_at = expires_at.replace(tzinfo=timezone.utc)
62
+ return expires_at
63
+
64
+
65
+ def get_page_extraction_cache(page_hash: str) -> Optional[dict[str, list[str]]]:
66
+ """Load cached LLM extraction results when present and not expired."""
67
+ try:
68
+ from db.session import get_session
69
+ except Exception:
70
+ return None
71
+
72
+ try:
73
+ with get_session() as session:
74
+ row = session.execute(
75
+ text(
76
+ """
77
+ SELECT entities_json, expires_at
78
+ FROM page_extraction_cache
79
+ WHERE page_hash = :page_hash
80
+ """
81
+ ),
82
+ {"page_hash": page_hash},
83
+ ).fetchone()
84
+
85
+ if row is None:
86
+ return None
87
+
88
+ entities_json, expires_at = row[0], row[1]
89
+ expires_at = _coerce_expires_at(expires_at)
90
+ if expires_at < datetime.now(timezone.utc):
91
+ return None
92
+
93
+ return json.loads(entities_json)
94
+ except Exception:
95
+ return None
96
+
97
+
56
98
  def save_investigation(
57
99
  query: str,
58
100
  refined_query: Optional[str] = None,
@@ -123,6 +123,24 @@ DEPTH_PRESETS = {
123
123
  "deep": {"top_n": 40, "max_workers": 8, "extract_concurrency": 6},
124
124
  }
125
125
 
126
+ # Pages kept after LLM relevance filter (must match voidaccess.llm.filter_results cap).
127
+ LLM_FILTER_TOP_N = 15
128
+
129
+ INVESTIGATION_STEPS = [
130
+ "Refining query",
131
+ "Searching dark web",
132
+ "Filtering results",
133
+ "Scraping pages",
134
+ "Extracting entities",
135
+ "Enriching intelligence",
136
+ "Enriching domains",
137
+ "Enriching hashes",
138
+ "Enriching emails",
139
+ "Building graph",
140
+ "Generating summary",
141
+ "Finalizing results",
142
+ ]
143
+
126
144
 
127
145
  async def _run_investigation(
128
146
  query: str,
@@ -142,10 +160,11 @@ async def _run_investigation(
142
160
  cfg = cli_config.load_config()
143
161
  preset = DEPTH_PRESETS[depth]
144
162
  display = InvestigationDisplay(quiet=quiet)
145
- display.start(query)
163
+ display.start(query, steps=INVESTIGATION_STEPS)
146
164
 
147
165
  # --- DB init ----------------------------------------------------------
148
166
  sqlite_adapter.init_db()
167
+ _patch_llm_extraction_cache(sqlite_adapter)
149
168
 
150
169
  # --- Tor preflight ----------------------------------------------------
151
170
  tor_proxy: Optional[str] = None
@@ -244,19 +263,19 @@ async def _run_investigation(
244
263
 
245
264
  # --- Step 3 — filter results ------------------------------------------
246
265
  display.update_step("Filtering results", "active")
247
- top_n = preset["top_n"]
248
- filtered_links = search_links[: top_n * 2] if search_links else []
266
+ filter_top_n = LLM_FILTER_TOP_N
267
+ filtered_links = search_links[: filter_top_n * 2] if search_links else []
249
268
  if llm is not None and search_links:
250
269
  try:
251
270
  from voidaccess.llm import filter_results
252
271
  filtered_links = await asyncio.to_thread(filter_results, llm, refined, search_links) or search_links
253
- filtered_links = filtered_links[:top_n]
272
+ filtered_links = filtered_links[:filter_top_n]
254
273
  display.update_step("Filtering results", "ok", f"top {len(filtered_links)}")
255
274
  except Exception as exc:
256
275
  display.update_step("Filtering results", "fail", str(exc))
257
- filtered_links = search_links[:top_n]
276
+ filtered_links = search_links[:filter_top_n]
258
277
  else:
259
- filtered_links = (search_links or [])[:top_n]
278
+ filtered_links = (search_links or [])[:filter_top_n]
260
279
  display.update_step("Filtering results", "skip" if no_llm else "ok", f"{len(filtered_links)} kept")
261
280
 
262
281
  # --- Step 4 — scrape pages -------------------------------------------
@@ -322,69 +341,57 @@ async def _run_investigation(
322
341
  except Exception as exc:
323
342
  display.update_step("Extracting entities", "fail", str(exc))
324
343
 
325
- # --- Step 6 — enrich intelligence ------------------------------------
344
+ # --- Step 6 — enrich intelligence (OTX + IP) ---------------------------
326
345
  display.update_step("Enriching intelligence", "active")
327
346
  enrichment_pages: list[dict] = []
328
347
  try:
329
348
  from sources.enrichment import enrich_investigation as _enrich_inv
330
349
  otx_key = os.getenv("OTX_API_KEY", "") or ""
331
- # Build entity dicts for sources that take them
332
350
  entity_dicts = sqlite_adapter.get_entities(investigation_id)
333
351
  enrichment_pages = await _enrich_inv(refined, otx_api_key=otx_key, entities=entity_dicts)
334
-
335
- # IP reputation pass — re-uses sources.ip_reputation
336
- try:
337
- from sources.ip_reputation import enrich_ip_entities
338
- await enrich_ip_entities(extraction_results, investigation_id=inv_uuid)
339
- except Exception as ip_exc:
340
- console.print(f"[grey50]ip_reputation skipped: {ip_exc}[/grey50]")
341
-
342
- # Step 6.2 — Domain reputation
343
- try:
344
- extraction_results = await enrich_domain_entities(
345
- extraction_results, inv_uuid
346
- )
347
- display.update_step(
348
- "Enriching domains",
349
- "done",
350
- f"{sum(1 for e in extraction_results if e.get('entity_type') == 'DOMAIN')} domains enriched",
351
- )
352
- except Exception as e:
353
- logger.debug(f"Domain enrichment: {e}")
354
-
355
- # Step 6.3 — Hash reputation
356
- try:
357
- extraction_results = await enrich_hash_entities(
358
- extraction_results, inv_uuid
359
- )
360
- display.update_step(
361
- "Enriching hashes",
362
- "done",
363
- "",
364
- )
365
- except Exception as e:
366
- logger.debug(f"Hash enrichment: {e}")
367
-
368
- # Step 6.4 — Email reputation
369
- try:
370
- extraction_results = await enrich_email_entities(
371
- extraction_results, inv_uuid
372
- )
373
- display.update_step(
374
- "Enriching emails",
375
- "done",
376
- "",
377
- )
378
- except Exception as e:
379
- logger.debug(f"Email enrichment: {e}")
380
-
381
352
  sources_used["enrichment"] = {"status": "ok", "count": len(enrichment_pages)}
382
353
  display.update_step("Enriching intelligence", "ok", f"{len(enrichment_pages)} pages added")
383
354
  except Exception as exc:
384
355
  sources_used["enrichment"] = {"status": "fail", "error": str(exc)}
385
356
  display.update_step("Enriching intelligence", "fail", str(exc))
386
357
 
387
- # Run extraction over enrichment pages too
358
+ try:
359
+ from sources.ip_reputation import enrich_ip_entities
360
+ await enrich_ip_entities(extraction_results, investigation_id=inv_uuid)
361
+ except Exception as ip_exc:
362
+ logger.debug("ip_reputation skipped: %s", ip_exc)
363
+
364
+ # --- Step 6.2–6.4 — domain / hash / email (before graph) -------------
365
+ display.update_step("Enriching domains", "active")
366
+ try:
367
+ extraction_results = await enrich_domain_entities(extraction_results, inv_uuid)
368
+ domain_count = sum(
369
+ 1
370
+ for e in sqlite_adapter.get_entities(investigation_id)
371
+ if (e.get("entity_type") or "").upper() == "DOMAIN"
372
+ )
373
+ detail = f"{domain_count} domains enriched" if domain_count else ""
374
+ display.update_step("Enriching domains", "ok", detail)
375
+ except Exception as exc:
376
+ logger.debug("Domain enrichment: %s", exc)
377
+ display.update_step("Enriching domains", "fail", str(exc))
378
+
379
+ display.update_step("Enriching hashes", "active")
380
+ try:
381
+ extraction_results = await enrich_hash_entities(extraction_results, inv_uuid)
382
+ display.update_step("Enriching hashes", "ok")
383
+ except Exception as exc:
384
+ logger.debug("Hash enrichment: %s", exc)
385
+ display.update_step("Enriching hashes", "fail", str(exc))
386
+
387
+ display.update_step("Enriching emails", "active")
388
+ try:
389
+ extraction_results = await enrich_email_entities(extraction_results, inv_uuid)
390
+ display.update_step("Enriching emails", "ok")
391
+ except Exception as exc:
392
+ logger.debug("Email enrichment: %s", exc)
393
+ display.update_step("Enriching emails", "fail", str(exc))
394
+
388
395
  if enrichment_pages:
389
396
  try:
390
397
  from extractor.pipeline import extract_entities_from_pages as _extr2
@@ -487,6 +494,15 @@ async def _run_investigation(
487
494
  await _close_cached_sessions()
488
495
 
489
496
 
497
+ def _patch_llm_extraction_cache(sqlite_adapter: Any) -> None:
498
+ """Use sqlite adapter for cache reads (naive ISO strings from SQLite)."""
499
+ try:
500
+ import extractor.llm_extract as llm_extract
501
+ except Exception:
502
+ return
503
+ llm_extract._load_from_cache = sqlite_adapter.get_page_extraction_cache
504
+
505
+
490
506
  async def _close_cached_sessions() -> None:
491
507
  try:
492
508
  from scraper.scrape import close_cached_sessions as _close_scrape
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes