voidaccess 1.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. analysis/__init__.py +49 -0
  2. analysis/opsec.py +454 -0
  3. analysis/patterns.py +202 -0
  4. analysis/temporal.py +201 -0
  5. api/__init__.py +1 -0
  6. api/auth.py +163 -0
  7. api/main.py +509 -0
  8. api/routes/__init__.py +1 -0
  9. api/routes/admin.py +214 -0
  10. api/routes/auth.py +157 -0
  11. api/routes/entities.py +871 -0
  12. api/routes/export.py +359 -0
  13. api/routes/investigations.py +2567 -0
  14. api/routes/monitors.py +405 -0
  15. api/routes/search.py +157 -0
  16. api/routes/settings.py +851 -0
  17. auth/__init__.py +1 -0
  18. auth/token_blacklist.py +108 -0
  19. cli/__init__.py +3 -0
  20. cli/adapters/__init__.py +1 -0
  21. cli/adapters/sqlite.py +273 -0
  22. cli/browser.py +376 -0
  23. cli/commands/__init__.py +1 -0
  24. cli/commands/configure.py +185 -0
  25. cli/commands/enrich.py +154 -0
  26. cli/commands/export.py +158 -0
  27. cli/commands/investigate.py +601 -0
  28. cli/commands/show.py +87 -0
  29. cli/config.py +180 -0
  30. cli/display.py +212 -0
  31. cli/main.py +154 -0
  32. cli/tor_detect.py +71 -0
  33. config.py +180 -0
  34. crawler/__init__.py +28 -0
  35. crawler/dedup.py +97 -0
  36. crawler/frontier.py +115 -0
  37. crawler/spider.py +462 -0
  38. crawler/utils.py +122 -0
  39. db/__init__.py +47 -0
  40. db/migrations/__init__.py +0 -0
  41. db/migrations/env.py +80 -0
  42. db/migrations/versions/0001_initial_schema.py +270 -0
  43. db/migrations/versions/0002_add_investigation_status_column.py +27 -0
  44. db/migrations/versions/0002_add_missing_tables.py +33 -0
  45. db/migrations/versions/0003_add_canonical_value_and_entity_links.py +61 -0
  46. db/migrations/versions/0004_add_page_posted_at.py +41 -0
  47. db/migrations/versions/0005_add_extraction_method.py +32 -0
  48. db/migrations/versions/0006_add_monitor_alerts.py +26 -0
  49. db/migrations/versions/0007_add_actor_style_profiles.py +23 -0
  50. db/migrations/versions/0008_add_users_table.py +47 -0
  51. db/migrations/versions/0009_add_investigation_id_to_relationships.py +29 -0
  52. db/migrations/versions/0010_add_composite_index_entity_relationships.py +22 -0
  53. db/migrations/versions/0011_add_page_extraction_cache.py +52 -0
  54. db/migrations/versions/0013_add_graph_status.py +31 -0
  55. db/migrations/versions/0015_add_progress_fields.py +41 -0
  56. db/migrations/versions/0016_backfill_graph_status.py +33 -0
  57. db/migrations/versions/0017_add_user_api_keys.py +44 -0
  58. db/migrations/versions/0018_add_user_id_to_investigations.py +33 -0
  59. db/migrations/versions/0019_add_content_safety_log.py +46 -0
  60. db/migrations/versions/0020_add_entity_source_tracking.py +50 -0
  61. db/models.py +618 -0
  62. db/queries.py +841 -0
  63. db/session.py +270 -0
  64. export/__init__.py +34 -0
  65. export/misp.py +257 -0
  66. export/sigma.py +342 -0
  67. export/stix.py +418 -0
  68. extractor/__init__.py +21 -0
  69. extractor/llm_extract.py +372 -0
  70. extractor/ner.py +512 -0
  71. extractor/normalizer.py +638 -0
  72. extractor/pipeline.py +401 -0
  73. extractor/regex_patterns.py +325 -0
  74. fingerprint/__init__.py +33 -0
  75. fingerprint/profiler.py +240 -0
  76. fingerprint/stylometry.py +249 -0
  77. graph/__init__.py +73 -0
  78. graph/builder.py +894 -0
  79. graph/export.py +225 -0
  80. graph/model.py +83 -0
  81. graph/queries.py +297 -0
  82. graph/visualize.py +178 -0
  83. i18n/__init__.py +24 -0
  84. i18n/detect.py +76 -0
  85. i18n/query_expand.py +72 -0
  86. i18n/translate.py +210 -0
  87. monitor/__init__.py +27 -0
  88. monitor/_db.py +74 -0
  89. monitor/alerts.py +345 -0
  90. monitor/config.py +118 -0
  91. monitor/diff.py +75 -0
  92. monitor/jobs.py +247 -0
  93. monitor/scheduler.py +184 -0
  94. scraper/__init__.py +0 -0
  95. scraper/scrape.py +857 -0
  96. scraper/scrape_js.py +272 -0
  97. search/__init__.py +318 -0
  98. search/circuit_breaker.py +240 -0
  99. search/search.py +334 -0
  100. sources/__init__.py +96 -0
  101. sources/blockchain.py +444 -0
  102. sources/cache.py +93 -0
  103. sources/cisa.py +108 -0
  104. sources/dns_enrichment.py +557 -0
  105. sources/domain_reputation.py +643 -0
  106. sources/email_reputation.py +635 -0
  107. sources/engines.py +244 -0
  108. sources/enrichment.py +1244 -0
  109. sources/github_scraper.py +589 -0
  110. sources/gitlab_scraper.py +624 -0
  111. sources/hash_reputation.py +856 -0
  112. sources/historical_intel.py +253 -0
  113. sources/ip_reputation.py +521 -0
  114. sources/paste_scraper.py +484 -0
  115. sources/pastes.py +278 -0
  116. sources/rss_scraper.py +576 -0
  117. sources/seed_manager.py +373 -0
  118. sources/seeds.py +368 -0
  119. sources/shodan.py +103 -0
  120. sources/telegram.py +199 -0
  121. sources/virustotal.py +113 -0
  122. utils/__init__.py +0 -0
  123. utils/async_utils.py +89 -0
  124. utils/content_safety.py +193 -0
  125. utils/defang.py +94 -0
  126. utils/encryption.py +34 -0
  127. utils/ioc_freshness.py +124 -0
  128. utils/user_keys.py +33 -0
  129. vector/__init__.py +39 -0
  130. vector/embedder.py +100 -0
  131. vector/model_singleton.py +49 -0
  132. vector/search.py +87 -0
  133. vector/store.py +514 -0
  134. voidaccess/__init__.py +0 -0
  135. voidaccess/llm.py +717 -0
  136. voidaccess/llm_utils.py +696 -0
  137. voidaccess-1.3.0.dist-info/METADATA +395 -0
  138. voidaccess-1.3.0.dist-info/RECORD +142 -0
  139. voidaccess-1.3.0.dist-info/WHEEL +5 -0
  140. voidaccess-1.3.0.dist-info/entry_points.txt +2 -0
  141. voidaccess-1.3.0.dist-info/licenses/LICENSE +21 -0
  142. voidaccess-1.3.0.dist-info/top_level.txt +19 -0
api/routes/admin.py ADDED
@@ -0,0 +1,214 @@
1
+ """
2
+ Admin routes for VoidAccess API.
3
+
4
+ Provides administrative endpoints for monitoring and managing the system.
5
+ """
6
+
7
+ import asyncio
8
+ import logging
9
+ import uuid
10
+ from datetime import datetime, timedelta, timezone
11
+ from typing import Optional
12
+
13
+ from fastapi import APIRouter, Depends, HTTPException
14
+ from pydantic import BaseModel, Field
15
+
16
+ from api.auth import get_current_user
17
+ from search.search import SEARCH_ENGINES
18
+ from search.circuit_breaker import get_all_states, record_success, is_open, _engine_failures, _engine_last_success
19
+ from sources.seed_manager import get_seed_manager
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+ router = APIRouter(tags=["admin"])
24
+
25
+ # In-memory registry of seed-validation jobs. Persistence isn't required —
26
+ # validation is best-effort and ephemeral.
27
+ _seed_validation_jobs: dict[str, dict] = {}
28
+
29
+
30
+ @router.get("/circuit-breakers")
31
+ async def get_circuit_breakers(current_user=Depends(get_current_user)) -> dict:
32
+ """
33
+ Get the current state of all search engine circuit breakers.
34
+ Returns state, failure count, and last success timestamp for each engine.
35
+ """
36
+ engines = {}
37
+ for engine in SEARCH_ENGINES:
38
+ name = engine["name"]
39
+ failures = _engine_failures.get(name, 0)
40
+ open_state = await is_open(name)
41
+ engines[name] = {
42
+ "state": "open" if open_state else "closed",
43
+ "failure_count": failures,
44
+ "url": engine.get("url", "").split("{")[0] # strip query param
45
+ }
46
+ return {"engines": engines}
47
+
48
+
49
+ @router.post("/circuit-breakers/{engine_name}/reset", dependencies=[Depends(get_current_user)])
50
+ async def reset_circuit_breaker(engine_name: str) -> dict:
51
+ """Reset a circuit breaker to closed state manually."""
52
+ await record_success(engine_name)
53
+ return {"engine": engine_name, "state": "closed", "message": "Circuit breaker reset"}
54
+
55
+
56
+ @router.post("/circuit-breakers/reset-all", dependencies=[Depends(get_current_user)])
57
+ async def reset_all_circuit_breakers() -> dict:
58
+ """Reset all circuit breakers to closed state."""
59
+ from search.search import SEARCH_ENGINES
60
+ for engine in SEARCH_ENGINES:
61
+ await record_success(engine["name"])
62
+ return {"reset_count": len(SEARCH_ENGINES), "state": "all closed"}
63
+
64
+
65
+ @router.get("/content-safety/events", dependencies=[Depends(get_current_user)])
66
+ async def get_content_safety_events() -> dict:
67
+ """
68
+ Return content safety block event counts for operator review.
69
+ Returns counts only — never the blocked content itself.
70
+ """
71
+ try:
72
+ import os
73
+ if not os.getenv("DATABASE_URL"):
74
+ return {
75
+ "last_24h": {"query_blocked": 0, "url_blocked": 0, "content_blocked": 0},
76
+ "total": {"query_blocked": 0, "url_blocked": 0, "content_blocked": 0},
77
+ }
78
+
79
+ from db.session import get_session
80
+ from db.models import ContentSafetyEvent
81
+ from sqlalchemy import func
82
+
83
+ cutoff = datetime.now(timezone.utc) - timedelta(hours=24)
84
+
85
+ event_types = ["query_blocked", "url_blocked", "content_blocked"]
86
+
87
+ with get_session() as session:
88
+ last_24h: dict[str, int] = {}
89
+ total: dict[str, int] = {}
90
+ for et in event_types:
91
+ last_24h[et] = int(
92
+ session.query(func.count(ContentSafetyEvent.id))
93
+ .filter(
94
+ ContentSafetyEvent.event_type == et,
95
+ ContentSafetyEvent.timestamp >= cutoff,
96
+ )
97
+ .scalar()
98
+ or 0
99
+ )
100
+ total[et] = int(
101
+ session.query(func.count(ContentSafetyEvent.id))
102
+ .filter(ContentSafetyEvent.event_type == et)
103
+ .scalar()
104
+ or 0
105
+ )
106
+
107
+ return {"last_24h": last_24h, "total": total}
108
+
109
+ except Exception as exc:
110
+ return {
111
+ "error": str(exc)[:200],
112
+ "last_24h": {"query_blocked": 0, "url_blocked": 0, "content_blocked": 0},
113
+ "total": {"query_blocked": 0, "url_blocked": 0, "content_blocked": 0},
114
+ }
115
+
116
+
117
+ # ---------------------------------------------------------------------------
118
+ # Seed list management
119
+ # ---------------------------------------------------------------------------
120
+
121
+
122
+ class AddSeedBody(BaseModel):
123
+ name: str = Field(..., min_length=1, max_length=200)
124
+ url: str = Field(..., min_length=8, max_length=500)
125
+ category: str = Field(default="discovered", max_length=80)
126
+ tags: list[str] = Field(default_factory=list)
127
+
128
+
129
+ async def _run_seed_validation_job(job_id: str) -> None:
130
+ """Background coroutine: validate all seeds and record the result."""
131
+ job = _seed_validation_jobs.setdefault(job_id, {})
132
+ job["status"] = "running"
133
+ job["started_at"] = datetime.now(timezone.utc).isoformat()
134
+ try:
135
+ seed_manager = get_seed_manager()
136
+ results = await seed_manager.validate_seeds(concurrency=3)
137
+ job["status"] = "completed"
138
+ job["results"] = results
139
+ except Exception as exc:
140
+ logger.warning("Seed validation job %s failed: %s", job_id, exc)
141
+ job["status"] = "failed"
142
+ job["error"] = str(exc)[:300]
143
+ finally:
144
+ job["finished_at"] = datetime.now(timezone.utc).isoformat()
145
+
146
+
147
+ @router.get("/seeds", dependencies=[Depends(get_current_user)])
148
+ async def get_seeds_summary() -> dict:
149
+ """Return a summary of the seed list: counts by category and status."""
150
+ try:
151
+ seed_manager = get_seed_manager()
152
+ return seed_manager.summary()
153
+ except Exception as exc:
154
+ logger.warning("get_seeds_summary failed: %s", exc)
155
+ return {"total": 0, "by_category": {}, "by_status": {}, "last_validated": None}
156
+
157
+
158
+ @router.get("/seeds/list", dependencies=[Depends(get_current_user)])
159
+ async def list_all_seeds() -> dict:
160
+ """Return the full seed list (admin only)."""
161
+ try:
162
+ seed_manager = get_seed_manager()
163
+ return {"seeds": seed_manager.list_seeds()}
164
+ except Exception as exc:
165
+ logger.warning("list_all_seeds failed: %s", exc)
166
+ return {"seeds": []}
167
+
168
+
169
+ @router.post("/seeds/validate", dependencies=[Depends(get_current_user)])
170
+ async def trigger_seed_validation() -> dict:
171
+ """
172
+ Trigger a background validation of every seed over Tor.
173
+ Returns a job_id so callers can poll status.
174
+ """
175
+ seed_manager = get_seed_manager()
176
+ seed_count = len(seed_manager.list_seeds())
177
+ job_id = str(uuid.uuid4())
178
+ _seed_validation_jobs[job_id] = {
179
+ "status": "queued",
180
+ "queued_at": datetime.now(timezone.utc).isoformat(),
181
+ "seed_count": seed_count,
182
+ }
183
+ asyncio.create_task(_run_seed_validation_job(job_id))
184
+ return {
185
+ "job_id": job_id,
186
+ "message": f"Validation started for {seed_count} seeds",
187
+ }
188
+
189
+
190
+ @router.get("/seeds/validate/{job_id}", dependencies=[Depends(get_current_user)])
191
+ async def get_seed_validation_status(job_id: str) -> dict:
192
+ """Poll the status of a seed-validation job."""
193
+ job = _seed_validation_jobs.get(job_id)
194
+ if job is None:
195
+ raise HTTPException(status_code=404, detail="Validation job not found")
196
+ return {"job_id": job_id, **job}
197
+
198
+
199
+ @router.post("/seeds/add", dependencies=[Depends(get_current_user)])
200
+ async def add_seed(body: AddSeedBody) -> dict:
201
+ """Manually add a seed URL to the catalogue."""
202
+ seed_manager = get_seed_manager()
203
+ added = seed_manager.add_discovered_seed(
204
+ url=body.url,
205
+ name=body.name,
206
+ tags=body.tags,
207
+ category=body.category,
208
+ )
209
+ if not added:
210
+ raise HTTPException(
211
+ status_code=400,
212
+ detail="Seed not added (duplicate URL or blocked by content safety)",
213
+ )
214
+ return {"added": True, "url": body.url, "category": body.category}
api/routes/auth.py ADDED
@@ -0,0 +1,157 @@
1
+ """Auth endpoints: login, reset-password, me, logout."""
2
+
3
+ import os
4
+ from datetime import datetime, timezone
5
+
6
+ from fastapi import APIRouter, HTTPException, Depends, status, Request
7
+ from slowapi import Limiter
8
+ from slowapi.util import get_remote_address
9
+
10
+ from api.auth import (
11
+ LoginRequest, LoginResponse, ResetPasswordRequest,
12
+ verify_password, hash_password, create_access_token,
13
+ get_current_user, CurrentUser,
14
+ )
15
+ from auth.token_blacklist import revoke_token
16
+ from db.models import User
17
+ from db.session import get_session, get_db
18
+ from sqlalchemy.orm import Session
19
+ from config import REDIS_URL
20
+
21
+ router = APIRouter(prefix="/auth", tags=["auth"])
22
+
23
+ DISABLE_RATE_LIMIT = os.getenv("DISABLE_RATE_LIMIT", "false").lower() == "true"
24
+ if DISABLE_RATE_LIMIT:
25
+ _limiter = None
26
+ else:
27
+ _limiter = Limiter(key_func=get_remote_address)
28
+
29
+
30
+ def _no_op_decorator(func):
31
+ return func
32
+
33
+ login_limit = _limiter.limit("5/minute") if _limiter else _no_op_decorator
34
+ reset_limit = _limiter.limit("3/minute") if _limiter else _no_op_decorator
35
+
36
+ def validate_password_strength(password: str) -> None:
37
+ """
38
+ Validate password meets minimum requirements.
39
+ Raises ValueError with a user-friendly message.
40
+ """
41
+ if len(password) < 8:
42
+ raise ValueError(
43
+ "Password must be at least 8 characters"
44
+ )
45
+ if len(password) > 128:
46
+ raise ValueError(
47
+ "Password must be under 128 characters"
48
+ )
49
+ # Must have at least one letter and one number
50
+ has_letter = any(c.isalpha() for c in password)
51
+ has_digit = any(c.isdigit() for c in password)
52
+ if not has_letter or not has_digit:
53
+ raise ValueError(
54
+ "Password must contain at least one "
55
+ "letter and one number"
56
+ )
57
+
58
+
59
+ @router.post("/login", response_model=LoginResponse)
60
+ @login_limit
61
+ async def login(request: Request, body: LoginRequest, db: Session = Depends(get_db)):
62
+ """
63
+ Authenticate with email + password.
64
+ Returns JWT token.
65
+ """
66
+ user = db.query(User).filter(
67
+ User.email == body.email.lower().strip(),
68
+ User.is_active == True,
69
+ ).first()
70
+
71
+ if not user or not verify_password(body.password, user.hashed_password):
72
+ raise HTTPException(
73
+ status_code=status.HTTP_401_UNAUTHORIZED,
74
+ detail="Invalid email or password",
75
+ )
76
+
77
+ user.last_login_at = datetime.now(timezone.utc)
78
+ db.commit()
79
+
80
+ token, jti = create_access_token(user.id, user.email)
81
+
82
+ return LoginResponse(
83
+ access_token=token,
84
+ must_reset_password=user.must_reset_password,
85
+ )
86
+
87
+
88
+ @router.post("/reset-password")
89
+ @reset_limit
90
+ async def reset_password(
91
+ request: Request,
92
+ body: ResetPasswordRequest,
93
+ db: Session = Depends(get_db),
94
+ current: CurrentUser = Depends(get_current_user),
95
+ ):
96
+ """
97
+ Reset password. Requires valid JWT token.
98
+ Clears must_reset_password flag on success.
99
+ """
100
+ if body.new_password != body.confirm_password:
101
+ raise HTTPException(
102
+ status_code=status.HTTP_400_BAD_REQUEST,
103
+ detail="New password and confirmation do not match",
104
+ )
105
+
106
+ try:
107
+ validate_password_strength(body.new_password)
108
+ except ValueError as e:
109
+ raise HTTPException(
110
+ status_code=status.HTTP_400_BAD_REQUEST,
111
+ detail=str(e),
112
+ )
113
+
114
+ if not verify_password(body.current_password, current.user.hashed_password):
115
+ raise HTTPException(
116
+ status_code=status.HTTP_401_UNAUTHORIZED,
117
+ detail="Current password is incorrect",
118
+ )
119
+
120
+ current.user.hashed_password = hash_password(body.new_password)
121
+ current.user.must_reset_password = False
122
+ db.commit()
123
+
124
+ return {"message": "Password updated successfully"}
125
+
126
+
127
+ @router.get("/me")
128
+ async def get_me(current: CurrentUser = Depends(get_current_user)):
129
+ """Return current user info. Safe as user is bound to request session."""
130
+ return {
131
+ "id": current.user.id,
132
+ "email": current.user.email,
133
+ "must_reset_password": current.user.must_reset_password,
134
+ "last_login_at": current.user.last_login_at.isoformat() if current.user.last_login_at else None,
135
+ }
136
+
137
+
138
+ @router.post("/logout")
139
+ async def logout(
140
+ current: CurrentUser = Depends(get_current_user),
141
+ ):
142
+ """Logout: revoke the current token."""
143
+ if current.jti and current.exp:
144
+ now = datetime.now(timezone.utc)
145
+ exp = current.exp
146
+ if exp.tzinfo is None:
147
+ exp = exp.replace(tzinfo=timezone.utc)
148
+ remaining_seconds = int((exp - now).total_seconds())
149
+ if remaining_seconds > 0:
150
+ success = await revoke_token(current.jti, remaining_seconds)
151
+ if not success and REDIS_URL:
152
+ raise HTTPException(
153
+ status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
154
+ detail="Logout failed due to internal token store error",
155
+ )
156
+
157
+ return {"message": "Logged out successfully"}