voidaccess 1.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. analysis/__init__.py +49 -0
  2. analysis/opsec.py +454 -0
  3. analysis/patterns.py +202 -0
  4. analysis/temporal.py +201 -0
  5. api/__init__.py +1 -0
  6. api/auth.py +163 -0
  7. api/main.py +509 -0
  8. api/routes/__init__.py +1 -0
  9. api/routes/admin.py +214 -0
  10. api/routes/auth.py +157 -0
  11. api/routes/entities.py +871 -0
  12. api/routes/export.py +359 -0
  13. api/routes/investigations.py +2567 -0
  14. api/routes/monitors.py +405 -0
  15. api/routes/search.py +157 -0
  16. api/routes/settings.py +851 -0
  17. auth/__init__.py +1 -0
  18. auth/token_blacklist.py +108 -0
  19. cli/__init__.py +3 -0
  20. cli/adapters/__init__.py +1 -0
  21. cli/adapters/sqlite.py +273 -0
  22. cli/browser.py +376 -0
  23. cli/commands/__init__.py +1 -0
  24. cli/commands/configure.py +185 -0
  25. cli/commands/enrich.py +154 -0
  26. cli/commands/export.py +158 -0
  27. cli/commands/investigate.py +601 -0
  28. cli/commands/show.py +87 -0
  29. cli/config.py +180 -0
  30. cli/display.py +212 -0
  31. cli/main.py +154 -0
  32. cli/tor_detect.py +71 -0
  33. config.py +180 -0
  34. crawler/__init__.py +28 -0
  35. crawler/dedup.py +97 -0
  36. crawler/frontier.py +115 -0
  37. crawler/spider.py +462 -0
  38. crawler/utils.py +122 -0
  39. db/__init__.py +47 -0
  40. db/migrations/__init__.py +0 -0
  41. db/migrations/env.py +80 -0
  42. db/migrations/versions/0001_initial_schema.py +270 -0
  43. db/migrations/versions/0002_add_investigation_status_column.py +27 -0
  44. db/migrations/versions/0002_add_missing_tables.py +33 -0
  45. db/migrations/versions/0003_add_canonical_value_and_entity_links.py +61 -0
  46. db/migrations/versions/0004_add_page_posted_at.py +41 -0
  47. db/migrations/versions/0005_add_extraction_method.py +32 -0
  48. db/migrations/versions/0006_add_monitor_alerts.py +26 -0
  49. db/migrations/versions/0007_add_actor_style_profiles.py +23 -0
  50. db/migrations/versions/0008_add_users_table.py +47 -0
  51. db/migrations/versions/0009_add_investigation_id_to_relationships.py +29 -0
  52. db/migrations/versions/0010_add_composite_index_entity_relationships.py +22 -0
  53. db/migrations/versions/0011_add_page_extraction_cache.py +52 -0
  54. db/migrations/versions/0013_add_graph_status.py +31 -0
  55. db/migrations/versions/0015_add_progress_fields.py +41 -0
  56. db/migrations/versions/0016_backfill_graph_status.py +33 -0
  57. db/migrations/versions/0017_add_user_api_keys.py +44 -0
  58. db/migrations/versions/0018_add_user_id_to_investigations.py +33 -0
  59. db/migrations/versions/0019_add_content_safety_log.py +46 -0
  60. db/migrations/versions/0020_add_entity_source_tracking.py +50 -0
  61. db/models.py +618 -0
  62. db/queries.py +841 -0
  63. db/session.py +270 -0
  64. export/__init__.py +34 -0
  65. export/misp.py +257 -0
  66. export/sigma.py +342 -0
  67. export/stix.py +418 -0
  68. extractor/__init__.py +21 -0
  69. extractor/llm_extract.py +372 -0
  70. extractor/ner.py +512 -0
  71. extractor/normalizer.py +638 -0
  72. extractor/pipeline.py +401 -0
  73. extractor/regex_patterns.py +325 -0
  74. fingerprint/__init__.py +33 -0
  75. fingerprint/profiler.py +240 -0
  76. fingerprint/stylometry.py +249 -0
  77. graph/__init__.py +73 -0
  78. graph/builder.py +894 -0
  79. graph/export.py +225 -0
  80. graph/model.py +83 -0
  81. graph/queries.py +297 -0
  82. graph/visualize.py +178 -0
  83. i18n/__init__.py +24 -0
  84. i18n/detect.py +76 -0
  85. i18n/query_expand.py +72 -0
  86. i18n/translate.py +210 -0
  87. monitor/__init__.py +27 -0
  88. monitor/_db.py +74 -0
  89. monitor/alerts.py +345 -0
  90. monitor/config.py +118 -0
  91. monitor/diff.py +75 -0
  92. monitor/jobs.py +247 -0
  93. monitor/scheduler.py +184 -0
  94. scraper/__init__.py +0 -0
  95. scraper/scrape.py +857 -0
  96. scraper/scrape_js.py +272 -0
  97. search/__init__.py +318 -0
  98. search/circuit_breaker.py +240 -0
  99. search/search.py +334 -0
  100. sources/__init__.py +96 -0
  101. sources/blockchain.py +444 -0
  102. sources/cache.py +93 -0
  103. sources/cisa.py +108 -0
  104. sources/dns_enrichment.py +557 -0
  105. sources/domain_reputation.py +643 -0
  106. sources/email_reputation.py +635 -0
  107. sources/engines.py +244 -0
  108. sources/enrichment.py +1244 -0
  109. sources/github_scraper.py +589 -0
  110. sources/gitlab_scraper.py +624 -0
  111. sources/hash_reputation.py +856 -0
  112. sources/historical_intel.py +253 -0
  113. sources/ip_reputation.py +521 -0
  114. sources/paste_scraper.py +484 -0
  115. sources/pastes.py +278 -0
  116. sources/rss_scraper.py +576 -0
  117. sources/seed_manager.py +373 -0
  118. sources/seeds.py +368 -0
  119. sources/shodan.py +103 -0
  120. sources/telegram.py +199 -0
  121. sources/virustotal.py +113 -0
  122. utils/__init__.py +0 -0
  123. utils/async_utils.py +89 -0
  124. utils/content_safety.py +193 -0
  125. utils/defang.py +94 -0
  126. utils/encryption.py +34 -0
  127. utils/ioc_freshness.py +124 -0
  128. utils/user_keys.py +33 -0
  129. vector/__init__.py +39 -0
  130. vector/embedder.py +100 -0
  131. vector/model_singleton.py +49 -0
  132. vector/search.py +87 -0
  133. vector/store.py +514 -0
  134. voidaccess/__init__.py +0 -0
  135. voidaccess/llm.py +717 -0
  136. voidaccess/llm_utils.py +696 -0
  137. voidaccess-1.3.0.dist-info/METADATA +395 -0
  138. voidaccess-1.3.0.dist-info/RECORD +142 -0
  139. voidaccess-1.3.0.dist-info/WHEEL +5 -0
  140. voidaccess-1.3.0.dist-info/entry_points.txt +2 -0
  141. voidaccess-1.3.0.dist-info/licenses/LICENSE +21 -0
  142. voidaccess-1.3.0.dist-info/top_level.txt +19 -0
auth/__init__.py ADDED
@@ -0,0 +1 @@
1
+ from auth.token_blacklist import revoke_token, is_token_revoked, close
@@ -0,0 +1,108 @@
1
+ """
2
+ Token blacklist using Redis for JWT revocation.
3
+
4
+ Provides:
5
+ - revoke_token(jti, expires_in_seconds): Add JTI to blacklist with TTL
6
+ - is_token_revoked(jti): Check if JTI is in blacklist
7
+
8
+ Gracefully degrades if Redis is unavailable (REDIS_URL not set).
9
+ """
10
+
11
+ import logging
12
+ import redis.asyncio as redis
13
+ from typing import Optional
14
+
15
+ from config import REDIS_URL
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+ _pool: Optional[redis.ConnectionPool] = None
20
+ _redis_client: Optional[redis.Redis] = None
21
+ _blacklist_enabled = False
22
+
23
+ BLACKLIST_PREFIX = "blacklist:"
24
+
25
+
26
+ async def _get_redis() -> Optional[redis.Redis]:
27
+ global _pool, _redis_client, _blacklist_enabled
28
+
29
+ if REDIS_URL is None:
30
+ _blacklist_enabled = False
31
+ logger.warning("REDIS_URL not configured - token blacklist disabled")
32
+ return None
33
+
34
+ if _redis_client is None:
35
+ try:
36
+ _pool = redis.ConnectionPool.from_url(
37
+ REDIS_URL,
38
+ decode_responses=True,
39
+ )
40
+ _redis_client = redis.Redis(connection_pool=_pool)
41
+ await _redis_client.ping()
42
+ _blacklist_enabled = True
43
+ logger.info("Token blacklist enabled via Redis")
44
+ except Exception as e:
45
+ logger.warning(f"Failed to connect to Redis: %s - token blacklist disabled", e)
46
+ _redis_client = None
47
+ _blacklist_enabled = False
48
+
49
+ return _redis_client
50
+
51
+
52
+ async def revoke_token(jti: str, expires_in_seconds: int) -> bool:
53
+ """
54
+ Add a JWT ID to the blacklist with TTL matching token expiry.
55
+
56
+ Args:
57
+ jti: The JWT ID to revoke
58
+ expires_in_seconds: Seconds until token expiry (used as Redis TTL)
59
+
60
+ Returns:
61
+ True if added to blacklist, False if blacklist disabled
62
+ """
63
+ client = await _get_redis()
64
+ if client is None or not _blacklist_enabled:
65
+ return False
66
+
67
+ try:
68
+ key = f"{BLACKLIST_PREFIX}{jti}"
69
+ await client.setex(key, expires_in_seconds, "revoked")
70
+ return True
71
+ except Exception as e:
72
+ logger.error("Failed to revoke token %s: %s", jti, e)
73
+ return False
74
+
75
+
76
+ async def is_token_revoked(jti: str) -> bool:
77
+ """
78
+ Check if a JWT ID has been revoked.
79
+
80
+ Args:
81
+ jti: The JWT ID to check
82
+
83
+ Returns:
84
+ True if the token is revoked, False otherwise
85
+ """
86
+ client = await _get_redis()
87
+ if client is None or not _blacklist_enabled:
88
+ return False
89
+
90
+ try:
91
+ key = f"{BLACKLIST_PREFIX}{jti}"
92
+ result = await client.exists(key)
93
+ return result > 0
94
+ except Exception as e:
95
+ logger.error("Failed to check token revocation for %s: %s", jti, e)
96
+ return False
97
+
98
+
99
+ async def close():
100
+ """Close Redis connection pool."""
101
+ global _pool, _redis_client
102
+
103
+ if _redis_client is not None:
104
+ await _redis_client.aclose()
105
+ _redis_client = None
106
+ if _pool is not None:
107
+ await _pool.disconnect()
108
+ _pool = None
cli/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ """voidaccess CLI — dark-web OSINT command-line interface."""
2
+
3
+ __version__ = "1.3.0"
@@ -0,0 +1 @@
1
+ """CLI database adapters."""
cli/adapters/sqlite.py ADDED
@@ -0,0 +1,273 @@
1
+ """
2
+ cli/adapters/sqlite.py — SQLite persistence layer for the CLI.
3
+
4
+ Reuses the existing SQLAlchemy ORM (db.models) and engine factory
5
+ (db.session) by setting DATABASE_URL=sqlite:///~/.voidaccess/investigations.db
6
+ before any voidaccess module is imported (cli.config.apply_env).
7
+
8
+ This adapter wraps that infrastructure with CLI-friendly helpers:
9
+ init_db() — create tables on first run (no Alembic)
10
+ save_investigation() — create an Investigation row
11
+ update_investigation() — patch fields on an existing row
12
+ list_investigations() — recent runs
13
+ get_investigation() — single row by id
14
+ get_entities() — entities for an investigation, optionally filtered
15
+ get_relationships() — edges for an investigation
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import json
21
+ import uuid
22
+ from datetime import datetime, timezone
23
+ from typing import Any, Optional
24
+
25
+
26
+ def init_db() -> None:
27
+ """Create all tables on the SQLite file if missing. Idempotent."""
28
+ from db.models import Base
29
+ from db.session import get_engine
30
+ engine = get_engine()
31
+ Base.metadata.create_all(engine)
32
+
33
+
34
+ def _serialize_dt(dt: Optional[datetime]) -> Optional[str]:
35
+ if dt is None:
36
+ return None
37
+ if dt.tzinfo is None:
38
+ dt = dt.replace(tzinfo=timezone.utc)
39
+ return dt.isoformat()
40
+
41
+
42
+ def save_investigation(
43
+ query: str,
44
+ refined_query: Optional[str] = None,
45
+ model_used: Optional[str] = None,
46
+ status: str = "running",
47
+ ) -> str:
48
+ """Insert a new Investigation row, return its id (string UUID)."""
49
+ from db.models import Investigation
50
+ from db.session import get_session
51
+
52
+ inv_id = uuid.uuid4()
53
+ run_id = uuid.uuid4()
54
+ with get_session() as session:
55
+ inv = Investigation(
56
+ id=inv_id,
57
+ run_id=run_id,
58
+ query=query,
59
+ refined_query=refined_query,
60
+ model_used=model_used,
61
+ status=status,
62
+ user_id=None,
63
+ )
64
+ session.add(inv)
65
+ return str(inv_id)
66
+
67
+
68
+ def update_investigation(investigation_id: str, updates: dict[str, Any]) -> None:
69
+ from db.models import Investigation
70
+ from db.session import get_session
71
+
72
+ inv_uuid = uuid.UUID(investigation_id)
73
+ allowed = {
74
+ "status",
75
+ "refined_query",
76
+ "model_used",
77
+ "preset",
78
+ "summary",
79
+ "graph_status",
80
+ "current_step",
81
+ "current_step_label",
82
+ "entity_count",
83
+ "page_count",
84
+ }
85
+ patch = {k: v for k, v in updates.items() if k in allowed}
86
+ if not patch:
87
+ return
88
+ with get_session() as session:
89
+ session.query(Investigation).filter_by(id=inv_uuid).update(patch)
90
+
91
+
92
+ def resolve_investigation_id(prefix_or_full: str) -> Optional[str]:
93
+ """Accept a full UUID or a unique prefix; return the full UUID string."""
94
+ from db.models import Investigation
95
+ from db.session import get_session
96
+
97
+ try:
98
+ u = uuid.UUID(prefix_or_full)
99
+ return str(u)
100
+ except (ValueError, AttributeError):
101
+ pass
102
+
103
+ p = prefix_or_full.strip().lower()
104
+ if not p:
105
+ return None
106
+ with get_session() as session:
107
+ rows = session.query(Investigation).all()
108
+ matches = [str(r.id) for r in rows if str(r.id).startswith(p)]
109
+ if len(matches) == 1:
110
+ return matches[0]
111
+ return None
112
+
113
+
114
+ def get_investigation(investigation_id: str) -> Optional[dict[str, Any]]:
115
+ from db.models import Investigation
116
+ from db.session import get_session
117
+
118
+ full = resolve_investigation_id(investigation_id) or investigation_id
119
+ try:
120
+ inv_uuid = uuid.UUID(full)
121
+ except (ValueError, AttributeError):
122
+ return None
123
+ with get_session() as session:
124
+ inv = session.query(Investigation).filter_by(id=inv_uuid).one_or_none()
125
+ if inv is None:
126
+ return None
127
+ return _investigation_row(inv)
128
+
129
+
130
+ def list_investigations(limit: int = 50) -> list[dict[str, Any]]:
131
+ from db.models import Investigation
132
+ from db.session import get_session
133
+
134
+ with get_session() as session:
135
+ rows = (
136
+ session.query(Investigation)
137
+ .order_by(Investigation.created_at.desc())
138
+ .limit(limit)
139
+ .all()
140
+ )
141
+ return [_investigation_row(r) for r in rows]
142
+
143
+
144
+ def _investigation_row(inv) -> dict[str, Any]:
145
+ return {
146
+ "id": str(inv.id),
147
+ "query": inv.query,
148
+ "refined_query": inv.refined_query,
149
+ "status": inv.status,
150
+ "model_used": inv.model_used,
151
+ "summary": inv.summary,
152
+ "entity_count": inv.entity_count,
153
+ "page_count": inv.page_count,
154
+ "created_at": _serialize_dt(inv.created_at),
155
+ "current_step": inv.current_step,
156
+ "current_step_label": inv.current_step_label,
157
+ }
158
+
159
+
160
+ def get_entities(
161
+ investigation_id: str,
162
+ entity_types: Optional[list[str]] = None,
163
+ limit: int = 1000,
164
+ ) -> list[dict[str, Any]]:
165
+ from db.models import Entity
166
+ from db.session import get_session
167
+
168
+ full = resolve_investigation_id(investigation_id) or investigation_id
169
+ inv_uuid = uuid.UUID(full)
170
+ with get_session() as session:
171
+ q = session.query(Entity).filter(Entity.investigation_id == inv_uuid)
172
+ if entity_types:
173
+ q = q.filter(Entity.entity_type.in_(entity_types))
174
+ rows = q.limit(limit).all()
175
+ return [_entity_row(r) for r in rows]
176
+
177
+
178
+ def _entity_row(e) -> dict[str, Any]:
179
+ return {
180
+ "id": str(e.id),
181
+ "entity_type": e.entity_type,
182
+ "value": e.value,
183
+ "canonical_value": e.canonical_value,
184
+ "confidence": float(e.confidence) if e.confidence is not None else None,
185
+ "context_snippet": e.context_snippet,
186
+ "extraction_method": e.extraction_method,
187
+ "source_count": e.source_count,
188
+ "corroborating_sources": e.corroborating_sources,
189
+ "first_seen": _serialize_dt(e.first_seen),
190
+ "last_seen": _serialize_dt(e.last_seen),
191
+ }
192
+
193
+
194
+ def get_relationships(investigation_id: str, limit: int = 5000) -> list[dict[str, Any]]:
195
+ from db.models import EntityRelationship
196
+ from db.session import get_session
197
+
198
+ full = resolve_investigation_id(investigation_id) or investigation_id
199
+ inv_uuid = uuid.UUID(full)
200
+ with get_session() as session:
201
+ rows = (
202
+ session.query(EntityRelationship)
203
+ .filter(EntityRelationship.investigation_id == inv_uuid)
204
+ .limit(limit)
205
+ .all()
206
+ )
207
+ return [
208
+ {
209
+ "id": str(r.id),
210
+ "entity_a_id": str(r.entity_a_id),
211
+ "entity_b_id": str(r.entity_b_id),
212
+ "relationship_type": r.relationship_type,
213
+ "confidence": float(r.confidence) if r.confidence is not None else None,
214
+ }
215
+ for r in rows
216
+ ]
217
+
218
+
219
+ def save_relationships(investigation_id: str, edges: list[dict[str, Any]]) -> int:
220
+ """Bulk-insert co-occurrence edges; ignores duplicate (a,b,type) triples."""
221
+ from db.models import EntityRelationship
222
+ from db.session import get_session
223
+
224
+ inv_uuid = uuid.UUID(investigation_id)
225
+ written = 0
226
+ if not edges:
227
+ return 0
228
+ with get_session() as session:
229
+ existing = {
230
+ (str(r.entity_a_id), str(r.entity_b_id), r.relationship_type)
231
+ for r in session.query(EntityRelationship)
232
+ .filter(EntityRelationship.investigation_id == inv_uuid)
233
+ .all()
234
+ }
235
+ for edge in edges:
236
+ key = (edge.get("entity_a_id"), edge.get("entity_b_id"), edge.get("relationship_type"))
237
+ if not all(key) or key in existing:
238
+ continue
239
+ try:
240
+ row = EntityRelationship(
241
+ entity_a_id=uuid.UUID(edge["entity_a_id"]),
242
+ entity_b_id=uuid.UUID(edge["entity_b_id"]),
243
+ relationship_type=edge["relationship_type"],
244
+ confidence=float(edge.get("confidence", 1.0)),
245
+ investigation_id=inv_uuid,
246
+ )
247
+ session.add(row)
248
+ existing.add(key)
249
+ written += 1
250
+ except Exception:
251
+ continue
252
+ return written
253
+
254
+
255
+ def investigation_to_export_dict(investigation_id: str) -> dict[str, Any]:
256
+ """Full export dict: investigation + entities + relationships."""
257
+ full = resolve_investigation_id(investigation_id) or investigation_id
258
+ inv = get_investigation(full)
259
+ if inv is None:
260
+ return {}
261
+ entities = get_entities(full)
262
+ relationships = get_relationships(full)
263
+ return {
264
+ "investigation": inv,
265
+ "entities": entities,
266
+ "relationships": relationships,
267
+ }
268
+
269
+
270
+ def write_json_export(investigation_id: str, path) -> None:
271
+ data = investigation_to_export_dict(investigation_id)
272
+ from pathlib import Path
273
+ Path(path).write_text(json.dumps(data, indent=2, default=str), encoding="utf-8")