voidaccess 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- analysis/__init__.py +49 -0
- analysis/opsec.py +454 -0
- analysis/patterns.py +202 -0
- analysis/temporal.py +201 -0
- api/__init__.py +1 -0
- api/auth.py +163 -0
- api/main.py +509 -0
- api/routes/__init__.py +1 -0
- api/routes/admin.py +214 -0
- api/routes/auth.py +157 -0
- api/routes/entities.py +871 -0
- api/routes/export.py +359 -0
- api/routes/investigations.py +2567 -0
- api/routes/monitors.py +405 -0
- api/routes/search.py +157 -0
- api/routes/settings.py +851 -0
- auth/__init__.py +1 -0
- auth/token_blacklist.py +108 -0
- cli/__init__.py +3 -0
- cli/adapters/__init__.py +1 -0
- cli/adapters/sqlite.py +273 -0
- cli/browser.py +376 -0
- cli/commands/__init__.py +1 -0
- cli/commands/configure.py +185 -0
- cli/commands/enrich.py +154 -0
- cli/commands/export.py +158 -0
- cli/commands/investigate.py +601 -0
- cli/commands/show.py +87 -0
- cli/config.py +180 -0
- cli/display.py +212 -0
- cli/main.py +154 -0
- cli/tor_detect.py +71 -0
- config.py +180 -0
- crawler/__init__.py +28 -0
- crawler/dedup.py +97 -0
- crawler/frontier.py +115 -0
- crawler/spider.py +462 -0
- crawler/utils.py +122 -0
- db/__init__.py +47 -0
- db/migrations/__init__.py +0 -0
- db/migrations/env.py +80 -0
- db/migrations/versions/0001_initial_schema.py +270 -0
- db/migrations/versions/0002_add_investigation_status_column.py +27 -0
- db/migrations/versions/0002_add_missing_tables.py +33 -0
- db/migrations/versions/0003_add_canonical_value_and_entity_links.py +61 -0
- db/migrations/versions/0004_add_page_posted_at.py +41 -0
- db/migrations/versions/0005_add_extraction_method.py +32 -0
- db/migrations/versions/0006_add_monitor_alerts.py +26 -0
- db/migrations/versions/0007_add_actor_style_profiles.py +23 -0
- db/migrations/versions/0008_add_users_table.py +47 -0
- db/migrations/versions/0009_add_investigation_id_to_relationships.py +29 -0
- db/migrations/versions/0010_add_composite_index_entity_relationships.py +22 -0
- db/migrations/versions/0011_add_page_extraction_cache.py +52 -0
- db/migrations/versions/0013_add_graph_status.py +31 -0
- db/migrations/versions/0015_add_progress_fields.py +41 -0
- db/migrations/versions/0016_backfill_graph_status.py +33 -0
- db/migrations/versions/0017_add_user_api_keys.py +44 -0
- db/migrations/versions/0018_add_user_id_to_investigations.py +33 -0
- db/migrations/versions/0019_add_content_safety_log.py +46 -0
- db/migrations/versions/0020_add_entity_source_tracking.py +50 -0
- db/models.py +618 -0
- db/queries.py +841 -0
- db/session.py +270 -0
- export/__init__.py +34 -0
- export/misp.py +257 -0
- export/sigma.py +342 -0
- export/stix.py +418 -0
- extractor/__init__.py +21 -0
- extractor/llm_extract.py +372 -0
- extractor/ner.py +512 -0
- extractor/normalizer.py +638 -0
- extractor/pipeline.py +401 -0
- extractor/regex_patterns.py +325 -0
- fingerprint/__init__.py +33 -0
- fingerprint/profiler.py +240 -0
- fingerprint/stylometry.py +249 -0
- graph/__init__.py +73 -0
- graph/builder.py +894 -0
- graph/export.py +225 -0
- graph/model.py +83 -0
- graph/queries.py +297 -0
- graph/visualize.py +178 -0
- i18n/__init__.py +24 -0
- i18n/detect.py +76 -0
- i18n/query_expand.py +72 -0
- i18n/translate.py +210 -0
- monitor/__init__.py +27 -0
- monitor/_db.py +74 -0
- monitor/alerts.py +345 -0
- monitor/config.py +118 -0
- monitor/diff.py +75 -0
- monitor/jobs.py +247 -0
- monitor/scheduler.py +184 -0
- scraper/__init__.py +0 -0
- scraper/scrape.py +857 -0
- scraper/scrape_js.py +272 -0
- search/__init__.py +318 -0
- search/circuit_breaker.py +240 -0
- search/search.py +334 -0
- sources/__init__.py +96 -0
- sources/blockchain.py +444 -0
- sources/cache.py +93 -0
- sources/cisa.py +108 -0
- sources/dns_enrichment.py +557 -0
- sources/domain_reputation.py +643 -0
- sources/email_reputation.py +635 -0
- sources/engines.py +244 -0
- sources/enrichment.py +1244 -0
- sources/github_scraper.py +589 -0
- sources/gitlab_scraper.py +624 -0
- sources/hash_reputation.py +856 -0
- sources/historical_intel.py +253 -0
- sources/ip_reputation.py +521 -0
- sources/paste_scraper.py +484 -0
- sources/pastes.py +278 -0
- sources/rss_scraper.py +576 -0
- sources/seed_manager.py +373 -0
- sources/seeds.py +368 -0
- sources/shodan.py +103 -0
- sources/telegram.py +199 -0
- sources/virustotal.py +113 -0
- utils/__init__.py +0 -0
- utils/async_utils.py +89 -0
- utils/content_safety.py +193 -0
- utils/defang.py +94 -0
- utils/encryption.py +34 -0
- utils/ioc_freshness.py +124 -0
- utils/user_keys.py +33 -0
- vector/__init__.py +39 -0
- vector/embedder.py +100 -0
- vector/model_singleton.py +49 -0
- vector/search.py +87 -0
- vector/store.py +514 -0
- voidaccess/__init__.py +0 -0
- voidaccess/llm.py +717 -0
- voidaccess/llm_utils.py +696 -0
- voidaccess-1.3.0.dist-info/METADATA +395 -0
- voidaccess-1.3.0.dist-info/RECORD +142 -0
- voidaccess-1.3.0.dist-info/WHEEL +5 -0
- voidaccess-1.3.0.dist-info/entry_points.txt +2 -0
- voidaccess-1.3.0.dist-info/licenses/LICENSE +21 -0
- voidaccess-1.3.0.dist-info/top_level.txt +19 -0
api/main.py
ADDED
|
@@ -0,0 +1,509 @@
|
|
|
1
|
+
"""
|
|
2
|
+
api/main.py — FastAPI application entry point for VoidAccess Intelligence API.
|
|
3
|
+
|
|
4
|
+
Exposes the VoidAccess platform programmatically.
|
|
5
|
+
Runs alongside Streamlit on a different port (8000 vs 8501).
|
|
6
|
+
|
|
7
|
+
Usage:
|
|
8
|
+
uvicorn api.main:app --host 0.0.0.0 --port 8000
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import asyncio
|
|
14
|
+
import logging
|
|
15
|
+
import os
|
|
16
|
+
from contextlib import asynccontextmanager
|
|
17
|
+
from typing import Callable
|
|
18
|
+
|
|
19
|
+
from fastapi import FastAPI, Depends, Request
|
|
20
|
+
from fastapi.exceptions import RequestValidationError
|
|
21
|
+
from fastapi.middleware.cors import CORSMiddleware
|
|
22
|
+
from fastapi.responses import JSONResponse
|
|
23
|
+
from slowapi import Limiter
|
|
24
|
+
from slowapi.errors import RateLimitExceeded
|
|
25
|
+
from slowapi.util import get_remote_address
|
|
26
|
+
|
|
27
|
+
from api.routes import entities, export, investigations, monitors, search, auth, admin, settings
|
|
28
|
+
from api.auth import get_current_user
|
|
29
|
+
from monitor.scheduler import start_scheduler
|
|
30
|
+
|
|
31
|
+
from config import TOR_PROXY_HOST, TOR_PROXY_PORT, PLAYWRIGHT_ENABLED, JWT_SECRET
|
|
32
|
+
|
|
33
|
+
logger = logging.getLogger(__name__)
|
|
34
|
+
logging.basicConfig(level=logging.DEBUG)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
# ---------------------------------------------------------------------------
|
|
38
|
+
# Rate limiter setup
|
|
39
|
+
# ---------------------------------------------------------------------------
|
|
40
|
+
|
|
41
|
+
DISABLE_RATE_LIMIT = os.getenv("DISABLE_RATE_LIMIT", "false").lower() == "true"
|
|
42
|
+
|
|
43
|
+
if DISABLE_RATE_LIMIT:
|
|
44
|
+
limiter = None
|
|
45
|
+
else:
|
|
46
|
+
limiter = Limiter(key_func=get_remote_address)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _rate_limit_exceeded_handler(request: Request, exc: RateLimitExceeded) -> JSONResponse:
|
|
50
|
+
return JSONResponse(
|
|
51
|
+
status_code=429,
|
|
52
|
+
content={
|
|
53
|
+
"detail": "Too many requests. Please wait 60 seconds before retrying.",
|
|
54
|
+
"retry_after": 60,
|
|
55
|
+
},
|
|
56
|
+
headers={
|
|
57
|
+
"Retry-After": "60",
|
|
58
|
+
"X-RateLimit-Limit": "3",
|
|
59
|
+
"X-RateLimit-Window": "60s",
|
|
60
|
+
},
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
# ---------------------------------------------------------------------------
|
|
65
|
+
# Lifespan handler (replaces deprecated on_event)
|
|
66
|
+
# ---------------------------------------------------------------------------
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
@asynccontextmanager
|
|
70
|
+
async def lifespan(app: FastAPI):
|
|
71
|
+
# --- Startup ---
|
|
72
|
+
logger.info("VoidAccess API started")
|
|
73
|
+
|
|
74
|
+
if JWT_SECRET is None:
|
|
75
|
+
raise RuntimeError(
|
|
76
|
+
"JWT_SECRET is not set. Set JWT_SECRET in your .env file. "
|
|
77
|
+
"Generate a secure secret with: python -c \"import secrets; print(secrets.token_hex(32))\""
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
# Run Alembic migrations (idempotent — safe to call on every boot)
|
|
81
|
+
_run_migrations()
|
|
82
|
+
|
|
83
|
+
_check_db_connectivity()
|
|
84
|
+
|
|
85
|
+
# Pre-warm Playwright browser (avoids cold start on first JS page)
|
|
86
|
+
if PLAYWRIGHT_ENABLED:
|
|
87
|
+
try:
|
|
88
|
+
from scraper.scrape_js import get_browser
|
|
89
|
+
|
|
90
|
+
await get_browser(TOR_PROXY_HOST, TOR_PROXY_PORT)
|
|
91
|
+
logger.warning("Playwright browser pre-warmed")
|
|
92
|
+
except ImportError:
|
|
93
|
+
logger.warning("Playwright not installed — JS rendering disabled")
|
|
94
|
+
except Exception as e:
|
|
95
|
+
logger.warning(f"Playwright pre-warm failed (non-fatal): {e}")
|
|
96
|
+
|
|
97
|
+
# Pre-warm vector store embedding model (avoids 5-15s cold start on first search)
|
|
98
|
+
try:
|
|
99
|
+
from vector.store import get_collection
|
|
100
|
+
from vector.model_singleton import get_embedding_model
|
|
101
|
+
|
|
102
|
+
get_collection()
|
|
103
|
+
get_embedding_model()
|
|
104
|
+
logger.warning("Vector store and embedding model pre-warmed")
|
|
105
|
+
except Exception as e:
|
|
106
|
+
logger.warning(f"Vector store pre-warm failed (non-fatal): {e}")
|
|
107
|
+
|
|
108
|
+
# Load curated .onion seed catalogue (no Tor validation on startup — too slow)
|
|
109
|
+
try:
|
|
110
|
+
from sources.seed_manager import get_seed_manager
|
|
111
|
+
|
|
112
|
+
logger.info("Loading seed database...")
|
|
113
|
+
seed_manager = get_seed_manager()
|
|
114
|
+
logger.warning(
|
|
115
|
+
"Seed database loaded: %d seeds",
|
|
116
|
+
len(seed_manager.list_seeds()),
|
|
117
|
+
)
|
|
118
|
+
except Exception as e:
|
|
119
|
+
logger.warning(f"Seed database load failed (non-fatal): {e}")
|
|
120
|
+
|
|
121
|
+
# Recover stranded processing investigations
|
|
122
|
+
try:
|
|
123
|
+
if os.getenv("DATABASE_URL"):
|
|
124
|
+
from db.session import get_session
|
|
125
|
+
from db.models import Investigation
|
|
126
|
+
with get_session() as session:
|
|
127
|
+
stranded_count = session.query(Investigation).filter(Investigation.status == "processing").update(
|
|
128
|
+
{"status": "failed", "summary": "Investigation interrupted due to server restart."}
|
|
129
|
+
)
|
|
130
|
+
if stranded_count > 0:
|
|
131
|
+
session.commit()
|
|
132
|
+
logger.warning(f"Recovered {stranded_count} stranded investigations (marked as failed).")
|
|
133
|
+
except Exception as e:
|
|
134
|
+
logger.warning(f"Failed to recover stranded investigations: {e}")
|
|
135
|
+
|
|
136
|
+
# Start background scheduler (monitoring watches + weekly seed refresh)
|
|
137
|
+
try:
|
|
138
|
+
scheduler = start_scheduler()
|
|
139
|
+
if scheduler:
|
|
140
|
+
monitors.set_scheduler(scheduler)
|
|
141
|
+
logger.warning("APScheduler started: monitoring watches active")
|
|
142
|
+
else:
|
|
143
|
+
logger.warning("APScheduler background service disabled")
|
|
144
|
+
except Exception as e:
|
|
145
|
+
logger.error(f"APScheduler failed to start: {e}")
|
|
146
|
+
scheduler = None
|
|
147
|
+
|
|
148
|
+
yield
|
|
149
|
+
|
|
150
|
+
# --- Shutdown ---
|
|
151
|
+
if scheduler and scheduler.running:
|
|
152
|
+
scheduler.shutdown(wait=False)
|
|
153
|
+
logger.warning("APScheduler stopped")
|
|
154
|
+
|
|
155
|
+
# Close Playwright browser
|
|
156
|
+
if PLAYWRIGHT_ENABLED:
|
|
157
|
+
try:
|
|
158
|
+
from scraper.scrape_js import close_browser
|
|
159
|
+
|
|
160
|
+
await close_browser()
|
|
161
|
+
except Exception:
|
|
162
|
+
pass
|
|
163
|
+
|
|
164
|
+
# Close cached scrape sessions (Tor and direct) - always, regardless of PLAYWRIGHT_ENABLED
|
|
165
|
+
try:
|
|
166
|
+
from scraper.scrape import close_cached_sessions
|
|
167
|
+
|
|
168
|
+
await close_cached_sessions()
|
|
169
|
+
except Exception:
|
|
170
|
+
pass
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
# ---------------------------------------------------------------------------
|
|
174
|
+
# App setup
|
|
175
|
+
# ---------------------------------------------------------------------------
|
|
176
|
+
# App setup
|
|
177
|
+
# ---------------------------------------------------------------------------
|
|
178
|
+
|
|
179
|
+
app = FastAPI(
|
|
180
|
+
title="VoidAccess Intelligence API",
|
|
181
|
+
description="VoidAccess: Dark Web Intelligence Platform",
|
|
182
|
+
version="2.0.0",
|
|
183
|
+
docs_url="/docs",
|
|
184
|
+
redoc_url="/redoc",
|
|
185
|
+
lifespan=lifespan,
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
# Allowed origins: explicit list from env, or localhost defaults
|
|
189
|
+
CORS_ORIGINS = os.environ.get(
|
|
190
|
+
"CORS_ORIGINS",
|
|
191
|
+
"http://localhost:3000,http://127.0.0.1:3000"
|
|
192
|
+
).split(",")
|
|
193
|
+
|
|
194
|
+
app.add_middleware(
|
|
195
|
+
CORSMiddleware,
|
|
196
|
+
allow_origins=CORS_ORIGINS, # Explicit list, never wildcard
|
|
197
|
+
allow_credentials=True,
|
|
198
|
+
allow_methods=["GET", "POST", "DELETE", "PUT", "PATCH"],
|
|
199
|
+
allow_headers=["Content-Type", "Authorization", "X-Request-ID"],
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
# Add rate limiter to app state and register exception handler
|
|
203
|
+
if limiter is not None:
|
|
204
|
+
app.state.limiter = limiter
|
|
205
|
+
app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
@app.exception_handler(RequestValidationError)
|
|
209
|
+
async def validation_exception_handler(request: Request, exc: RequestValidationError) -> JSONResponse:
|
|
210
|
+
errors = []
|
|
211
|
+
for error in exc.errors():
|
|
212
|
+
field = ".".join(str(x) for x in error.get("loc", []) if x != "body")
|
|
213
|
+
msg = error.get("msg", "Invalid value")
|
|
214
|
+
errors.append(f"{field}: {msg}")
|
|
215
|
+
return JSONResponse(
|
|
216
|
+
status_code=422,
|
|
217
|
+
content={
|
|
218
|
+
"detail": "; ".join(errors),
|
|
219
|
+
"errors": errors,
|
|
220
|
+
},
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
@app.exception_handler(Exception)
|
|
225
|
+
async def global_exception_handler(request: Request, exc: Exception):
|
|
226
|
+
logger.error(f"Global exception caught: {exc}", exc_info=True)
|
|
227
|
+
return JSONResponse(
|
|
228
|
+
status_code=500,
|
|
229
|
+
content={"detail": f"Internal Server Error: {str(exc)}"},
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
# ---------------------------------------------------------------------------
|
|
234
|
+
# Global rate limit middleware (100/minute for all API routes)
|
|
235
|
+
# ---------------------------------------------------------------------------
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
@app.middleware("http")
|
|
239
|
+
async def global_rate_limit_middleware(request: Request, call_next: Callable):
|
|
240
|
+
if limiter is None:
|
|
241
|
+
return await call_next(request)
|
|
242
|
+
|
|
243
|
+
exempt_paths = {"/health", "/docs", "/redoc", "/openapi.json"}
|
|
244
|
+
if request.url.path in exempt_paths:
|
|
245
|
+
return await call_next(request)
|
|
246
|
+
|
|
247
|
+
if request.url.path.startswith("/api/") or request.url.path in exempt_paths:
|
|
248
|
+
pass
|
|
249
|
+
else:
|
|
250
|
+
return await call_next(request)
|
|
251
|
+
|
|
252
|
+
# Removed invalid limiter.check call causing 500 error
|
|
253
|
+
# Rate limiting should be handled via decorators on specific routes
|
|
254
|
+
pass
|
|
255
|
+
|
|
256
|
+
return await call_next(request)
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
# ---------------------------------------------------------------------------
|
|
260
|
+
# Routers
|
|
261
|
+
# ---------------------------------------------------------------------------
|
|
262
|
+
|
|
263
|
+
# Public routes (no auth required)
|
|
264
|
+
app.include_router(auth.router)
|
|
265
|
+
|
|
266
|
+
# Protected routes (require valid JWT)
|
|
267
|
+
app.include_router(
|
|
268
|
+
investigations.router,
|
|
269
|
+
prefix="/investigations",
|
|
270
|
+
tags=["investigations"],
|
|
271
|
+
dependencies=[Depends(get_current_user)],
|
|
272
|
+
)
|
|
273
|
+
app.include_router(
|
|
274
|
+
entities.router,
|
|
275
|
+
prefix="/entities",
|
|
276
|
+
tags=["entities"],
|
|
277
|
+
dependencies=[Depends(get_current_user)],
|
|
278
|
+
)
|
|
279
|
+
app.include_router(
|
|
280
|
+
search.router,
|
|
281
|
+
prefix="/search",
|
|
282
|
+
tags=["search"],
|
|
283
|
+
dependencies=[Depends(get_current_user)],
|
|
284
|
+
)
|
|
285
|
+
app.include_router(
|
|
286
|
+
export.router,
|
|
287
|
+
prefix="/export",
|
|
288
|
+
tags=["export"],
|
|
289
|
+
dependencies=[Depends(get_current_user)],
|
|
290
|
+
)
|
|
291
|
+
app.include_router(
|
|
292
|
+
monitors.router,
|
|
293
|
+
prefix="/monitors",
|
|
294
|
+
tags=["monitors"],
|
|
295
|
+
dependencies=[Depends(get_current_user)],
|
|
296
|
+
)
|
|
297
|
+
app.include_router(
|
|
298
|
+
admin.router,
|
|
299
|
+
prefix="/admin",
|
|
300
|
+
tags=["admin"],
|
|
301
|
+
dependencies=[Depends(get_current_user)],
|
|
302
|
+
)
|
|
303
|
+
app.include_router(
|
|
304
|
+
settings.router,
|
|
305
|
+
tags=["settings"],
|
|
306
|
+
dependencies=[Depends(get_current_user)],
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
# ---------------------------------------------------------------------------
|
|
311
|
+
# Startup event
|
|
312
|
+
# ---------------------------------------------------------------------------
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
def _run_migrations() -> None:
|
|
316
|
+
"""Apply any pending Alembic migrations at startup.
|
|
317
|
+
|
|
318
|
+
Safe to call on every boot — Alembic is idempotent (already-applied
|
|
319
|
+
migrations are skipped). Logs a warning and continues on failure so a
|
|
320
|
+
migration error never hard-crashes the API process.
|
|
321
|
+
"""
|
|
322
|
+
if not os.getenv("DATABASE_URL"):
|
|
323
|
+
logger.info("DATABASE_URL not set — skipping migrations")
|
|
324
|
+
return
|
|
325
|
+
try:
|
|
326
|
+
from alembic.config import Config # noqa: PLC0415
|
|
327
|
+
from alembic import command # noqa: PLC0415
|
|
328
|
+
from alembic.util import CommandError # noqa: PLC0415
|
|
329
|
+
import pathlib # noqa: PLC0415
|
|
330
|
+
|
|
331
|
+
project_root = pathlib.Path(__file__).resolve().parents[1]
|
|
332
|
+
ini_path = project_root / "alembic.ini"
|
|
333
|
+
alembic_cfg = Config(str(ini_path))
|
|
334
|
+
alembic_cfg.set_main_option("sqlalchemy.url", os.environ["DATABASE_URL"])
|
|
335
|
+
alembic_cfg.set_main_option("script_location", str(project_root / "db" / "migrations"))
|
|
336
|
+
|
|
337
|
+
try:
|
|
338
|
+
command.upgrade(alembic_cfg, "head")
|
|
339
|
+
logger.info("Alembic migrations applied")
|
|
340
|
+
except CommandError as e:
|
|
341
|
+
if "already up to date" in str(e).lower():
|
|
342
|
+
logger.info("Alembic migrations already at head")
|
|
343
|
+
else:
|
|
344
|
+
raise
|
|
345
|
+
except Exception as exc:
|
|
346
|
+
logger.warning("Migration failed — proceeding without applying: %s", exc)
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
async def _check_db_connectivity_async() -> str:
|
|
350
|
+
"""Return 'ok' if DB is reachable, error message otherwise."""
|
|
351
|
+
if not os.getenv("DATABASE_URL"):
|
|
352
|
+
return "error: DATABASE_URL not configured"
|
|
353
|
+
try:
|
|
354
|
+
from db.session import get_async_session # noqa: PLC0415
|
|
355
|
+
from sqlalchemy import text
|
|
356
|
+
|
|
357
|
+
async with get_async_session() as session:
|
|
358
|
+
await session.execute(text("SELECT 1"))
|
|
359
|
+
return "ok"
|
|
360
|
+
except Exception as exc:
|
|
361
|
+
logger.warning("DB connectivity check failed: %s", exc)
|
|
362
|
+
return f"error: {str(exc)}"
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
async def _check_tor_connectivity_async() -> str:
|
|
366
|
+
"""Return 'ok' if Tor proxy is reachable, 'unreachable' otherwise."""
|
|
367
|
+
host = os.getenv("TOR_PROXY_HOST", "127.0.0.1")
|
|
368
|
+
port = int(os.getenv("TOR_PROXY_PORT", "9050"))
|
|
369
|
+
try:
|
|
370
|
+
reader, writer = await asyncio.wait_for(
|
|
371
|
+
asyncio.open_connection(host, port), timeout=3.0
|
|
372
|
+
)
|
|
373
|
+
writer.close()
|
|
374
|
+
await writer.wait_closed()
|
|
375
|
+
return "ok"
|
|
376
|
+
except Exception:
|
|
377
|
+
return "unreachable"
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
def _check_db_connectivity() -> bool:
|
|
381
|
+
"""Return True if DB is reachable, False otherwise. Sync wrapper for startup."""
|
|
382
|
+
if not os.getenv("DATABASE_URL"):
|
|
383
|
+
return False
|
|
384
|
+
try:
|
|
385
|
+
from db.session import get_session # noqa: PLC0415
|
|
386
|
+
from db.queries import db_health_check # noqa: PLC0415
|
|
387
|
+
|
|
388
|
+
with get_session() as session:
|
|
389
|
+
return db_health_check(session)
|
|
390
|
+
except Exception as exc:
|
|
391
|
+
logger.warning("DB connectivity check failed: %s", exc)
|
|
392
|
+
return False
|
|
393
|
+
|
|
394
|
+
|
|
395
|
+
def _check_tor_connectivity() -> bool:
|
|
396
|
+
"""Return True if Tor proxy appears to be reachable. Sync wrapper for startup."""
|
|
397
|
+
import socket # noqa: PLC0415
|
|
398
|
+
host = os.getenv("TOR_PROXY_HOST", "127.0.0.1")
|
|
399
|
+
port = int(os.getenv("TOR_PROXY_PORT", "9050"))
|
|
400
|
+
try:
|
|
401
|
+
with socket.create_connection((host, port), timeout=2):
|
|
402
|
+
return True
|
|
403
|
+
except OSError:
|
|
404
|
+
return False
|
|
405
|
+
|
|
406
|
+
|
|
407
|
+
# ---------------------------------------------------------------------------
|
|
408
|
+
# Health endpoints
|
|
409
|
+
# ---------------------------------------------------------------------------
|
|
410
|
+
|
|
411
|
+
|
|
412
|
+
@app.get("/health", tags=["health"])
|
|
413
|
+
async def health() -> dict:
|
|
414
|
+
"""Returns API, DB, and Tor connectivity status (async)."""
|
|
415
|
+
checks = {}
|
|
416
|
+
db_result, tor_result = await asyncio.gather(
|
|
417
|
+
_check_db_connectivity_async(),
|
|
418
|
+
_check_tor_connectivity_async(),
|
|
419
|
+
)
|
|
420
|
+
checks["database"] = db_result
|
|
421
|
+
checks["tor"] = tor_result
|
|
422
|
+
|
|
423
|
+
status = "healthy" if all(v == "ok" for v in checks.values()) else "degraded"
|
|
424
|
+
return {"status": status, "checks": checks}
|
|
425
|
+
|
|
426
|
+
|
|
427
|
+
@app.get("/healthz/live", tags=["health"])
|
|
428
|
+
async def liveness() -> dict:
|
|
429
|
+
"""Liveness probe — always 200 unless process is wedged."""
|
|
430
|
+
return {"status": "alive"}
|
|
431
|
+
|
|
432
|
+
|
|
433
|
+
@app.get("/healthz/ready", tags=["health"])
|
|
434
|
+
async def readiness() -> dict:
|
|
435
|
+
"""Readiness probe — checks DB and Tor are reachable."""
|
|
436
|
+
checks = {}
|
|
437
|
+
db_result, tor_result = await asyncio.gather(
|
|
438
|
+
_check_db_connectivity_async(),
|
|
439
|
+
_check_tor_connectivity_async(),
|
|
440
|
+
)
|
|
441
|
+
checks["database"] = db_result
|
|
442
|
+
checks["tor"] = tor_result
|
|
443
|
+
|
|
444
|
+
is_ready = all(v == "ok" for v in checks.values())
|
|
445
|
+
status = "ready" if is_ready else "not_ready"
|
|
446
|
+
return {"status": status, "checks": checks}
|
|
447
|
+
|
|
448
|
+
|
|
449
|
+
@app.get("/debug/tor-test", tags=["health"])
|
|
450
|
+
async def tor_test(_=Depends(get_current_user)) -> dict:
|
|
451
|
+
"""
|
|
452
|
+
Test Tor connectivity.
|
|
453
|
+
TODO: Remove or protect in production.
|
|
454
|
+
"""
|
|
455
|
+
try:
|
|
456
|
+
import aiohttp # noqa: PLC0415
|
|
457
|
+
from aiohttp_socks import ProxyConnector # noqa: PLC0415
|
|
458
|
+
|
|
459
|
+
connector = ProxyConnector.from_url(f"socks5://{TOR_PROXY_HOST}:{TOR_PROXY_PORT}")
|
|
460
|
+
timeout = aiohttp.ClientTimeout(total=10)
|
|
461
|
+
async with aiohttp.ClientSession(connector=connector, timeout=timeout) as session:
|
|
462
|
+
async with session.get("https://check.torproject.org") as resp:
|
|
463
|
+
text = await resp.text()
|
|
464
|
+
return {
|
|
465
|
+
"tor_working": True,
|
|
466
|
+
"status_code": resp.status,
|
|
467
|
+
"response": text[:100],
|
|
468
|
+
}
|
|
469
|
+
except Exception as exc:
|
|
470
|
+
return {"tor_working": False, "error": str(exc)}
|
|
471
|
+
|
|
472
|
+
|
|
473
|
+
@app.get("/debug/search-test", tags=["health"])
|
|
474
|
+
async def search_test(_=Depends(get_current_user)) -> dict:
|
|
475
|
+
"""
|
|
476
|
+
Test search engine connectivity.
|
|
477
|
+
TODO: Remove or protect in production.
|
|
478
|
+
"""
|
|
479
|
+
try:
|
|
480
|
+
from search.search import get_search_results # noqa: PLC0415
|
|
481
|
+
|
|
482
|
+
results = get_search_results("bitcoin+dark+web")
|
|
483
|
+
return {
|
|
484
|
+
"search_working": True,
|
|
485
|
+
"results_count": len(results),
|
|
486
|
+
"first_result": results[0] if results else None,
|
|
487
|
+
}
|
|
488
|
+
except Exception as exc:
|
|
489
|
+
return {"search_working": False, "error": str(exc)}
|
|
490
|
+
|
|
491
|
+
|
|
492
|
+
@app.get("/debug/stack", tags=["health"])
|
|
493
|
+
async def debug_stack() -> dict:
|
|
494
|
+
"""Returns a list of all running asyncio tasks and their stack traces."""
|
|
495
|
+
import asyncio
|
|
496
|
+
|
|
497
|
+
tasks = asyncio.all_tasks()
|
|
498
|
+
out = []
|
|
499
|
+
for i, t in enumerate(tasks):
|
|
500
|
+
stack = []
|
|
501
|
+
for f in t.get_stack():
|
|
502
|
+
stack.append(f"{f.f_code.co_filename}:{f.f_lineno} in {f.f_code.co_name}")
|
|
503
|
+
out.append({
|
|
504
|
+
"task_id": i,
|
|
505
|
+
"name": t.get_name(),
|
|
506
|
+
"coro": str(t.get_coro()),
|
|
507
|
+
"stack": stack,
|
|
508
|
+
})
|
|
509
|
+
return {"tasks": out}
|
api/routes/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# api/routes package
|