voidaccess 1.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. analysis/__init__.py +49 -0
  2. analysis/opsec.py +454 -0
  3. analysis/patterns.py +202 -0
  4. analysis/temporal.py +201 -0
  5. api/__init__.py +1 -0
  6. api/auth.py +163 -0
  7. api/main.py +509 -0
  8. api/routes/__init__.py +1 -0
  9. api/routes/admin.py +214 -0
  10. api/routes/auth.py +157 -0
  11. api/routes/entities.py +871 -0
  12. api/routes/export.py +359 -0
  13. api/routes/investigations.py +2567 -0
  14. api/routes/monitors.py +405 -0
  15. api/routes/search.py +157 -0
  16. api/routes/settings.py +851 -0
  17. auth/__init__.py +1 -0
  18. auth/token_blacklist.py +108 -0
  19. cli/__init__.py +3 -0
  20. cli/adapters/__init__.py +1 -0
  21. cli/adapters/sqlite.py +273 -0
  22. cli/browser.py +376 -0
  23. cli/commands/__init__.py +1 -0
  24. cli/commands/configure.py +185 -0
  25. cli/commands/enrich.py +154 -0
  26. cli/commands/export.py +158 -0
  27. cli/commands/investigate.py +601 -0
  28. cli/commands/show.py +87 -0
  29. cli/config.py +180 -0
  30. cli/display.py +212 -0
  31. cli/main.py +154 -0
  32. cli/tor_detect.py +71 -0
  33. config.py +180 -0
  34. crawler/__init__.py +28 -0
  35. crawler/dedup.py +97 -0
  36. crawler/frontier.py +115 -0
  37. crawler/spider.py +462 -0
  38. crawler/utils.py +122 -0
  39. db/__init__.py +47 -0
  40. db/migrations/__init__.py +0 -0
  41. db/migrations/env.py +80 -0
  42. db/migrations/versions/0001_initial_schema.py +270 -0
  43. db/migrations/versions/0002_add_investigation_status_column.py +27 -0
  44. db/migrations/versions/0002_add_missing_tables.py +33 -0
  45. db/migrations/versions/0003_add_canonical_value_and_entity_links.py +61 -0
  46. db/migrations/versions/0004_add_page_posted_at.py +41 -0
  47. db/migrations/versions/0005_add_extraction_method.py +32 -0
  48. db/migrations/versions/0006_add_monitor_alerts.py +26 -0
  49. db/migrations/versions/0007_add_actor_style_profiles.py +23 -0
  50. db/migrations/versions/0008_add_users_table.py +47 -0
  51. db/migrations/versions/0009_add_investigation_id_to_relationships.py +29 -0
  52. db/migrations/versions/0010_add_composite_index_entity_relationships.py +22 -0
  53. db/migrations/versions/0011_add_page_extraction_cache.py +52 -0
  54. db/migrations/versions/0013_add_graph_status.py +31 -0
  55. db/migrations/versions/0015_add_progress_fields.py +41 -0
  56. db/migrations/versions/0016_backfill_graph_status.py +33 -0
  57. db/migrations/versions/0017_add_user_api_keys.py +44 -0
  58. db/migrations/versions/0018_add_user_id_to_investigations.py +33 -0
  59. db/migrations/versions/0019_add_content_safety_log.py +46 -0
  60. db/migrations/versions/0020_add_entity_source_tracking.py +50 -0
  61. db/models.py +618 -0
  62. db/queries.py +841 -0
  63. db/session.py +270 -0
  64. export/__init__.py +34 -0
  65. export/misp.py +257 -0
  66. export/sigma.py +342 -0
  67. export/stix.py +418 -0
  68. extractor/__init__.py +21 -0
  69. extractor/llm_extract.py +372 -0
  70. extractor/ner.py +512 -0
  71. extractor/normalizer.py +638 -0
  72. extractor/pipeline.py +401 -0
  73. extractor/regex_patterns.py +325 -0
  74. fingerprint/__init__.py +33 -0
  75. fingerprint/profiler.py +240 -0
  76. fingerprint/stylometry.py +249 -0
  77. graph/__init__.py +73 -0
  78. graph/builder.py +894 -0
  79. graph/export.py +225 -0
  80. graph/model.py +83 -0
  81. graph/queries.py +297 -0
  82. graph/visualize.py +178 -0
  83. i18n/__init__.py +24 -0
  84. i18n/detect.py +76 -0
  85. i18n/query_expand.py +72 -0
  86. i18n/translate.py +210 -0
  87. monitor/__init__.py +27 -0
  88. monitor/_db.py +74 -0
  89. monitor/alerts.py +345 -0
  90. monitor/config.py +118 -0
  91. monitor/diff.py +75 -0
  92. monitor/jobs.py +247 -0
  93. monitor/scheduler.py +184 -0
  94. scraper/__init__.py +0 -0
  95. scraper/scrape.py +857 -0
  96. scraper/scrape_js.py +272 -0
  97. search/__init__.py +318 -0
  98. search/circuit_breaker.py +240 -0
  99. search/search.py +334 -0
  100. sources/__init__.py +96 -0
  101. sources/blockchain.py +444 -0
  102. sources/cache.py +93 -0
  103. sources/cisa.py +108 -0
  104. sources/dns_enrichment.py +557 -0
  105. sources/domain_reputation.py +643 -0
  106. sources/email_reputation.py +635 -0
  107. sources/engines.py +244 -0
  108. sources/enrichment.py +1244 -0
  109. sources/github_scraper.py +589 -0
  110. sources/gitlab_scraper.py +624 -0
  111. sources/hash_reputation.py +856 -0
  112. sources/historical_intel.py +253 -0
  113. sources/ip_reputation.py +521 -0
  114. sources/paste_scraper.py +484 -0
  115. sources/pastes.py +278 -0
  116. sources/rss_scraper.py +576 -0
  117. sources/seed_manager.py +373 -0
  118. sources/seeds.py +368 -0
  119. sources/shodan.py +103 -0
  120. sources/telegram.py +199 -0
  121. sources/virustotal.py +113 -0
  122. utils/__init__.py +0 -0
  123. utils/async_utils.py +89 -0
  124. utils/content_safety.py +193 -0
  125. utils/defang.py +94 -0
  126. utils/encryption.py +34 -0
  127. utils/ioc_freshness.py +124 -0
  128. utils/user_keys.py +33 -0
  129. vector/__init__.py +39 -0
  130. vector/embedder.py +100 -0
  131. vector/model_singleton.py +49 -0
  132. vector/search.py +87 -0
  133. vector/store.py +514 -0
  134. voidaccess/__init__.py +0 -0
  135. voidaccess/llm.py +717 -0
  136. voidaccess/llm_utils.py +696 -0
  137. voidaccess-1.3.0.dist-info/METADATA +395 -0
  138. voidaccess-1.3.0.dist-info/RECORD +142 -0
  139. voidaccess-1.3.0.dist-info/WHEEL +5 -0
  140. voidaccess-1.3.0.dist-info/entry_points.txt +2 -0
  141. voidaccess-1.3.0.dist-info/licenses/LICENSE +21 -0
  142. voidaccess-1.3.0.dist-info/top_level.txt +19 -0
api/routes/settings.py ADDED
@@ -0,0 +1,851 @@
1
+ """
2
+ Settings API — per-user API key management.
3
+
4
+ GET /settings/api-keys — list key names and their configured status
5
+ POST /settings/api-keys — upsert a key
6
+ DELETE /settings/api-keys/{key_name} — remove a user's key
7
+ POST /settings/api-keys/test — test a key without saving it
8
+ GET /settings/models — list available models per configured provider
9
+ POST /settings/models/validate — test a model ID is accessible
10
+
11
+ All routes require authentication (JWT).
12
+ """
13
+
14
+ import asyncio
15
+ import time
16
+ from typing import Annotated, Optional, List
17
+
18
+ from fastapi import APIRouter, Depends, HTTPException, Response
19
+ from pydantic import BaseModel
20
+ from sqlalchemy import select as sa_select
21
+
22
+ from api.auth import get_current_user, require_password_not_reset_pending, CurrentUser
23
+ from db.models import UserApiKey
24
+ from db.session import get_async_session
25
+ from utils.encryption import encrypt_api_key
26
+
27
+
28
+ router = APIRouter(prefix="/settings", tags=["settings"])
29
+
30
+
31
+ ALLOWED_KEY_NAMES = {
32
+ "OPENAI_API_KEY": {
33
+ "label": "OpenAI",
34
+ "description": "Enables GPT-4o and GPT-4 models",
35
+ "test_url": "https://api.openai.com/v1/models",
36
+ "test_header": "Authorization",
37
+ "test_prefix": "Bearer ",
38
+ },
39
+ "ANTHROPIC_API_KEY": {
40
+ "label": "Anthropic",
41
+ "description": "Enables Claude models",
42
+ "test_url": "https://api.anthropic.com/v1/models",
43
+ "test_header": "x-api-key",
44
+ "test_prefix": "",
45
+ },
46
+ "GOOGLE_API_KEY": {
47
+ "label": "Google Gemini",
48
+ "description": "Enables Gemini models (free tier available)",
49
+ "test_url": "https://generativelanguage.googleapis.com/v1/models?key={key}",
50
+ "test_header": None,
51
+ "test_prefix": None,
52
+ },
53
+ "OPENROUTER_API_KEY": {
54
+ "label": "OpenRouter",
55
+ "description": "Access 100+ models including free tier options",
56
+ "test_url": "https://openrouter.ai/api/v1/models",
57
+ "test_header": "Authorization",
58
+ "test_prefix": "Bearer ",
59
+ },
60
+ "GROQ_API_KEY": {
61
+ "label": "Groq (Free tier)",
62
+ "description": "Fast inference — Llama 3.3 70B free. Sign up at console.groq.com",
63
+ "test_url": "https://api.groq.com/openai/v1/models",
64
+ "test_header": "Authorization",
65
+ "test_prefix": "Bearer ",
66
+ },
67
+ "OTX_API_KEY": {
68
+ "label": "AlienVault OTX",
69
+ "description": "Threat intelligence enrichment",
70
+ "test_url": "https://otx.alienvault.com/api/v1/user/me",
71
+ "test_header": "X-OTX-API-KEY",
72
+ "test_prefix": "",
73
+ },
74
+ "VT_API_KEY": {
75
+ "label": "VirusTotal",
76
+ "description": "File hash enrichment (optional)",
77
+ "test_url": "https://www.virustotal.com/api/v3/users/current",
78
+ "test_header": "x-apikey",
79
+ "test_prefix": "",
80
+ },
81
+ "GITHUB_TOKEN": {
82
+ "label": "GitHub Token",
83
+ "description": (
84
+ "Optional. Increases rate limit from 10 to 30 requests/min. "
85
+ "No scopes needed — public repo access only."
86
+ ),
87
+ "test_url": "https://api.github.com/rate_limit",
88
+ "test_header": "Authorization",
89
+ "test_prefix": "Bearer ",
90
+ },
91
+ "GITLAB_TOKEN": {
92
+ "label": "GitLab Token",
93
+ "description": (
94
+ "Optional. Increases rate limit from ~15 to ~60 requests/min. "
95
+ "No scopes needed — public repo access only. "
96
+ "Create at gitlab.com/-/profile/personal_access_tokens."
97
+ ),
98
+ "test_url": "https://gitlab.com/api/v4/user",
99
+ "test_header": "PRIVATE-TOKEN",
100
+ "test_prefix": "",
101
+ },
102
+ "SECURITYTRAILS_API_KEY": {
103
+ "label": "SecurityTrails API Key",
104
+ "description": (
105
+ "Optional. Enhanced DNS history and domain infrastructure data. "
106
+ "Free tier: 50 queries/month."
107
+ ),
108
+ "test_url": "https://api.securitytrails.com/v1/ping",
109
+ "test_header": "APIKEY",
110
+ "test_prefix": "",
111
+ },
112
+ "ABUSEIPDB_API_KEY": {
113
+ "label": "AbuseIPDB",
114
+ "description": (
115
+ "Community IP abuse reports. "
116
+ "Free tier: 1000 checks/day. "
117
+ "Sign up at abuseipdb.com/register"
118
+ ),
119
+ "test_url": "https://api.abuseipdb.com/api/v2/check?ipAddress=1.1.1.1&maxAgeInDays=1",
120
+ "test_header": "Key",
121
+ "test_prefix": "",
122
+ },
123
+ "GREYNOISE_API_KEY": {
124
+ "label": "GreyNoise",
125
+ "description": (
126
+ "IP noise classification. "
127
+ "Suppresses known benign scanners (Shodan, Censys, researchers) from results. "
128
+ "Sign up at greynoise.io/pricing"
129
+ ),
130
+ "test_url": "https://api.greynoise.io/v3/community/8.8.8.8",
131
+ "test_header": "key",
132
+ "test_prefix": "",
133
+ },
134
+ "URLSCAN_API_KEY": {
135
+ "label": "URLScan.io",
136
+ "description": (
137
+ "Domain scan data and malicious URL detection. "
138
+ "Optional — free public data available without key. "
139
+ "Sign up at urlscan.io/user/signup"
140
+ ),
141
+ "test_url": "https://urlscan.io/api/v1/search/?q=domain:example.com&size=1",
142
+ "test_header": "API-Key",
143
+ "test_prefix": "",
144
+ },
145
+ "HYBRID_ANALYSIS_API_KEY": {
146
+ "label": "Hybrid Analysis",
147
+ "description": (
148
+ "Malware sandbox behavioral analysis. "
149
+ "Extracts verdicts, malware families, AV detections, and network IOCs "
150
+ "from file hashes. Free tier available."
151
+ ),
152
+ "test_url": "https://www.hybrid-analysis.com/api/v2/key/current",
153
+ "test_header": "api-key",
154
+ "test_prefix": "",
155
+ },
156
+ "HIBP_API_KEY": {
157
+ "label": "HaveIBeenPwned",
158
+ "description": (
159
+ "Email breach lookup. Paid API ($3.50/month individual). "
160
+ "Most valuable for threat actor attribution — breach history can expose "
161
+ "password reuse patterns and reveal real identity across platforms. "
162
+ "Sign up at haveibeenpwned.com/API/Key"
163
+ ),
164
+ "test_url": "https://haveibeenpwned.com/api/v3/breachedaccount/test@example.com",
165
+ "test_header": "hibp-api-key",
166
+ "test_prefix": "",
167
+ },
168
+ "EMAILREP_API_KEY": {
169
+ "label": "EmailRep",
170
+ "description": (
171
+ "Email reputation scoring, disposable detection, and platform presence. "
172
+ "Works without a key at reduced rate limits. "
173
+ "Key increases throughput for large investigations."
174
+ ),
175
+ "test_url": "https://emailrep.io/test@example.com",
176
+ "test_header": "Key",
177
+ "test_prefix": "",
178
+ },
179
+ }
180
+
181
+
182
+ class ApiKeyItem(BaseModel):
183
+ key_name: str
184
+ is_set: bool
185
+ server_configured: bool
186
+ label: str
187
+ description: str
188
+
189
+
190
+ class ApiKeyListResponse(BaseModel):
191
+ keys: list[ApiKeyItem]
192
+
193
+
194
+ class UpsertKeyRequest(BaseModel):
195
+ key_name: str
196
+ value: str
197
+
198
+
199
+ class UpsertKeyResponse(BaseModel):
200
+ key_name: str
201
+ is_set: bool
202
+
203
+
204
+ class TestKeyRequest(BaseModel):
205
+ key_name: str
206
+ value: str
207
+
208
+
209
+ class TestKeyResponse(BaseModel):
210
+ valid: bool
211
+ message: str
212
+
213
+
214
+ @router.get("/api-keys", response_model=ApiKeyListResponse)
215
+ async def get_api_keys(current_user: CurrentUser = Depends(get_current_user)) -> ApiKeyListResponse:
216
+ async with get_async_session() as session:
217
+ result = await session.execute(
218
+ sa_select(UserApiKey).where(UserApiKey.user_id == current_user.user.id)
219
+ )
220
+ user_keys = {r.key_name: r for r in result.scalars().all()}
221
+
222
+ import config as _config
223
+
224
+ keys = []
225
+ for key_name, meta in ALLOWED_KEY_NAMES.items():
226
+ is_set = key_name in user_keys
227
+ server_configured = bool(getattr(_config, key_name, None))
228
+ keys.append(
229
+ ApiKeyItem(
230
+ key_name=key_name,
231
+ is_set=is_set,
232
+ server_configured=server_configured,
233
+ label=meta["label"],
234
+ description=meta["description"],
235
+ )
236
+ )
237
+ return ApiKeyListResponse(keys=keys)
238
+
239
+
240
+ @router.post("/api-keys", response_model=UpsertKeyResponse)
241
+ async def upsert_api_key(
242
+ body: UpsertKeyRequest, current_user: CurrentUser = Depends(require_password_not_reset_pending)
243
+ ) -> UpsertKeyResponse:
244
+ if body.key_name not in ALLOWED_KEY_NAMES:
245
+ raise HTTPException(status_code=400, detail=f"Unknown key_name: {body.key_name}")
246
+
247
+ encrypted = encrypt_api_key(body.value)
248
+
249
+ async with get_async_session() as session:
250
+ result = await session.execute(
251
+ sa_select(UserApiKey).where(
252
+ UserApiKey.user_id == current_user.user.id,
253
+ UserApiKey.key_name == body.key_name,
254
+ )
255
+ )
256
+ existing = result.scalar_one_or_none()
257
+
258
+ if existing:
259
+ existing.encrypted_value = encrypted
260
+ else:
261
+ record = UserApiKey(
262
+ user_id=current_user.user.id,
263
+ key_name=body.key_name,
264
+ encrypted_value=encrypted,
265
+ )
266
+ session.add(record)
267
+
268
+ await session.commit()
269
+
270
+ return UpsertKeyResponse(key_name=body.key_name, is_set=True)
271
+
272
+
273
+ @router.delete("/api-keys/{key_name}", status_code=204)
274
+ async def delete_api_key(key_name: str, current_user: CurrentUser = Depends(get_current_user)) -> Response:
275
+ if key_name not in ALLOWED_KEY_NAMES:
276
+ raise HTTPException(status_code=400, detail=f"Unknown key_name: {key_name}")
277
+
278
+ async with get_async_session() as session:
279
+ result = await session.execute(
280
+ sa_select(UserApiKey).where(
281
+ UserApiKey.user_id == current_user.user.id,
282
+ UserApiKey.key_name == key_name,
283
+ )
284
+ )
285
+ record = result.scalar_one_or_none()
286
+ if record:
287
+ await session.delete(record)
288
+ await session.commit()
289
+
290
+ return Response(status_code=204)
291
+
292
+
293
+ async def test_github_token(token: str) -> dict:
294
+ """Probe /rate_limit so we can surface the hourly quota the token grants."""
295
+ import aiohttp
296
+
297
+ async with aiohttp.ClientSession() as session:
298
+ async with session.get(
299
+ "https://api.github.com/rate_limit",
300
+ headers={
301
+ "Authorization": f"Bearer {token}",
302
+ "Accept": "application/vnd.github+json",
303
+ },
304
+ ) as resp:
305
+ if resp.status == 200:
306
+ data = await resp.json()
307
+ limit = (
308
+ data.get("resources", {}).get("core", {}).get("limit", 0)
309
+ )
310
+ return {
311
+ "valid": True,
312
+ "message": f"GitHub token valid — {limit} requests/hour",
313
+ }
314
+ return {"valid": False, "message": "Invalid GitHub token"}
315
+
316
+
317
+ async def test_gitlab_token(token: str) -> dict:
318
+ """Probe /user to confirm the token is valid and surface the username."""
319
+ import aiohttp
320
+
321
+ async with aiohttp.ClientSession() as session:
322
+ async with session.get(
323
+ "https://gitlab.com/api/v4/user",
324
+ headers={"PRIVATE-TOKEN": token},
325
+ timeout=aiohttp.ClientTimeout(total=15),
326
+ ) as resp:
327
+ if resp.status == 200:
328
+ data = await resp.json()
329
+ username = data.get("username", "unknown")
330
+ return {
331
+ "valid": True,
332
+ "message": f"GitLab token valid — authenticated as @{username}",
333
+ }
334
+ if resp.status == 401:
335
+ return {"valid": False, "message": "Invalid or expired GitLab token"}
336
+ return {"valid": False, "message": f"GitLab returned HTTP {resp.status}"}
337
+
338
+
339
+ @router.post("/api-keys/test", response_model=TestKeyResponse)
340
+ async def test_api_key(body: TestKeyRequest) -> TestKeyResponse:
341
+ if body.key_name not in ALLOWED_KEY_NAMES:
342
+ raise HTTPException(status_code=400, detail=f"Unknown key_name: {body.key_name}")
343
+
344
+ # GitHub: dedicated probe surfaces the actual hourly quota the token grants.
345
+ if body.key_name == "GITHUB_TOKEN":
346
+ try:
347
+ result = await test_github_token(body.value)
348
+ return TestKeyResponse(valid=result["valid"], message=result["message"])
349
+ except Exception as exc:
350
+ return TestKeyResponse(valid=False, message=str(exc))
351
+
352
+ # GitLab: dedicated probe surfaces the authenticated username.
353
+ if body.key_name == "GITLAB_TOKEN":
354
+ try:
355
+ result = await test_gitlab_token(body.value)
356
+ return TestKeyResponse(valid=result["valid"], message=result["message"])
357
+ except Exception as exc:
358
+ return TestKeyResponse(valid=False, message=str(exc))
359
+
360
+ meta = ALLOWED_KEY_NAMES[body.key_name]
361
+ test_url = meta["test_url"]
362
+ test_header = meta["test_header"]
363
+ test_prefix = meta["test_prefix"]
364
+
365
+ if test_header is None:
366
+ test_url = test_url.replace("{key}", body.value)
367
+
368
+ headers = {}
369
+ if test_header and test_prefix is not None:
370
+ headers[test_header] = f"{test_prefix}{body.value}"
371
+ elif test_header:
372
+ headers[test_header] = body.value
373
+
374
+ try:
375
+ import aiohttp
376
+
377
+ timeout = aiohttp.ClientTimeout(total=15)
378
+ async with aiohttp.ClientSession(timeout=timeout) as http_session:
379
+ async with http_session.get(test_url, headers=headers) as resp:
380
+ if resp.status in (200, 201):
381
+ return TestKeyResponse(valid=True, message="Connected successfully")
382
+ text = await resp.text()
383
+ return TestKeyResponse(
384
+ valid=False, message=f"API returned {resp.status}: {text[:200]}"
385
+ )
386
+ except aiohttp.ClientError as exc:
387
+ return TestKeyResponse(valid=False, message=f"Connection failed: {exc}")
388
+ except Exception as exc:
389
+ return TestKeyResponse(valid=False, message=str(exc))
390
+
391
+
392
+ # ---------------------------------------------------------------------------
393
+ # Model List — GET /settings/models
394
+ # ---------------------------------------------------------------------------
395
+
396
+ # Simple in-memory TTL cache: {user_id: (timestamp, result)}
397
+ _models_cache: dict = {}
398
+ _MODELS_CACHE_TTL = 300 # 5 minutes
399
+
400
+ # Simple per-user validate rate-limit: {user_id: [timestamps]}
401
+ _validate_rate: dict = {}
402
+ _VALIDATE_RATE_LIMIT = 10 # max calls per minute
403
+ _VALIDATE_RATE_WINDOW = 60 # seconds
404
+
405
+
406
+ class ModelInfo(BaseModel):
407
+ id: str
408
+ name: str
409
+ provider: str
410
+ free_tier: bool = False
411
+ recommended: bool = False
412
+ context_window: Optional[int] = None
413
+
414
+
415
+ class ProviderInfo(BaseModel):
416
+ name: str
417
+ key_name: str
418
+ configured: bool
419
+ models: List[ModelInfo]
420
+
421
+
422
+ class ModelListResponse(BaseModel):
423
+ providers: List[ProviderInfo]
424
+ custom_model_allowed: bool = True
425
+
426
+
427
+ class ValidateModelRequest(BaseModel):
428
+ model_id: str
429
+
430
+
431
+ class ValidateModelResponse(BaseModel):
432
+ valid: bool
433
+ model_id: str
434
+ provider: Optional[str] = None
435
+ message: str
436
+ error: Optional[str] = None
437
+ suggestion: Optional[str] = None
438
+
439
+
440
+ def _infer_provider(model_id: str) -> str:
441
+ """Return a friendly provider name from a model ID."""
442
+ mc = model_id.lower()
443
+ if mc.startswith("openrouter/"):
444
+ return "OpenRouter"
445
+ if mc.startswith("groq/"):
446
+ return "Groq"
447
+ if mc.startswith("gpt-"):
448
+ return "OpenAI"
449
+ if mc.startswith("claude-"):
450
+ return "Anthropic"
451
+ if mc.startswith("gemini-"):
452
+ return "Google"
453
+ if mc.startswith("ollama/"):
454
+ return "Ollama"
455
+ return "OpenRouter"
456
+
457
+
458
+ async def _fetch_openrouter_models(api_key: str) -> List[ModelInfo]:
459
+ """Fetch models from OpenRouter API, capped at 100."""
460
+ import aiohttp
461
+ try:
462
+ timeout = aiohttp.ClientTimeout(total=10)
463
+ async with aiohttp.ClientSession(timeout=timeout) as s:
464
+ async with s.get(
465
+ "https://openrouter.ai/api/v1/models",
466
+ headers={"Authorization": f"Bearer {api_key}"},
467
+ ) as resp:
468
+ if resp.status != 200:
469
+ return []
470
+ data = await resp.json()
471
+ raw = data.get("data", [])
472
+ models = []
473
+ for m in raw[:100]:
474
+ mid = m.get("id", "")
475
+ if not mid:
476
+ continue
477
+ name = m.get("name") or mid.split("/")[-1]
478
+ ctx = m.get("context_length") or m.get("context_window")
479
+ is_free = ":free" in mid or "free" in (m.get("pricing", {}).get("prompt", "") or "0")
480
+ models.append(ModelInfo(
481
+ id=f"openrouter/{mid}",
482
+ name=name,
483
+ provider="OpenRouter",
484
+ free_tier=is_free,
485
+ recommended=is_free,
486
+ context_window=int(ctx) if ctx else None,
487
+ ))
488
+ return models
489
+ except Exception:
490
+ return []
491
+
492
+
493
+ async def _fetch_groq_models(api_key: str) -> List[ModelInfo]:
494
+ """Fetch models from Groq API."""
495
+ import aiohttp
496
+ try:
497
+ timeout = aiohttp.ClientTimeout(total=10)
498
+ async with aiohttp.ClientSession(timeout=timeout) as s:
499
+ async with s.get(
500
+ "https://api.groq.com/openai/v1/models",
501
+ headers={"Authorization": f"Bearer {api_key}"},
502
+ ) as resp:
503
+ if resp.status != 200:
504
+ return []
505
+ data = await resp.json()
506
+ raw = data.get("data", [])
507
+ models = []
508
+ for m in raw:
509
+ mid = m.get("id", "")
510
+ if not mid:
511
+ continue
512
+ models.append(ModelInfo(
513
+ id=f"groq/{mid}",
514
+ name=mid.replace("-", " ").title(),
515
+ provider="Groq",
516
+ free_tier=True,
517
+ recommended="llama-3.3" in mid or "70b" in mid,
518
+ ))
519
+ return models
520
+ except Exception:
521
+ return []
522
+
523
+
524
+ async def _fetch_openai_models(api_key: str) -> List[ModelInfo]:
525
+ """Fetch GPT-4* and GPT-3.5* models from OpenAI."""
526
+ import aiohttp
527
+ try:
528
+ timeout = aiohttp.ClientTimeout(total=10)
529
+ async with aiohttp.ClientSession(timeout=timeout) as s:
530
+ async with s.get(
531
+ "https://api.openai.com/v1/models",
532
+ headers={"Authorization": f"Bearer {api_key}"},
533
+ ) as resp:
534
+ if resp.status != 200:
535
+ return []
536
+ data = await resp.json()
537
+ raw = data.get("data", [])
538
+ models = []
539
+ for m in raw:
540
+ mid = m.get("id", "")
541
+ if not (mid.startswith("gpt-4") or mid.startswith("gpt-3.5")):
542
+ continue
543
+ models.append(ModelInfo(
544
+ id=mid,
545
+ name=mid,
546
+ provider="OpenAI",
547
+ recommended="gpt-4o" in mid,
548
+ ))
549
+ return sorted(models, key=lambda x: x.id)
550
+ except Exception:
551
+ return []
552
+
553
+
554
+ async def _fetch_ollama_models(base_url: str) -> List[ModelInfo]:
555
+ """Fetch locally available Ollama models."""
556
+ import aiohttp
557
+ try:
558
+ url = base_url.rstrip("/") + "/api/tags"
559
+ timeout = aiohttp.ClientTimeout(total=5)
560
+ async with aiohttp.ClientSession(timeout=timeout) as s:
561
+ async with s.get(url) as resp:
562
+ if resp.status != 200:
563
+ return []
564
+ data = await resp.json()
565
+ models = []
566
+ for m in data.get("models", []):
567
+ name = m.get("name") or m.get("model", "")
568
+ if not name:
569
+ continue
570
+ models.append(ModelInfo(
571
+ id=f"ollama/{name}",
572
+ name=name,
573
+ provider="Ollama",
574
+ free_tier=True,
575
+ ))
576
+ return models
577
+ except Exception:
578
+ return []
579
+
580
+
581
+ @router.get("/models", response_model=ModelListResponse)
582
+ async def get_models(
583
+ current_user: CurrentUser = Depends(get_current_user),
584
+ ) -> ModelListResponse:
585
+ """Return all available models grouped by provider, based on configured API keys."""
586
+ user_id = current_user.user.id
587
+
588
+ # TTL cache check
589
+ cached = _models_cache.get(user_id)
590
+ if cached and (time.time() - cached[0]) < _MODELS_CACHE_TTL:
591
+ return cached[1]
592
+
593
+ import config as _config
594
+
595
+ # Resolve effective keys (user override > server)
596
+ async with get_async_session() as session:
597
+ from utils.user_keys import resolve_api_key
598
+ keys = {}
599
+ for key_name in ("OPENAI_API_KEY", "ANTHROPIC_API_KEY", "GOOGLE_API_KEY",
600
+ "OPENROUTER_API_KEY", "GROQ_API_KEY"):
601
+ keys[key_name] = await resolve_api_key(user_id, key_name, session)
602
+
603
+ openrouter_key = keys.get("OPENROUTER_API_KEY") or ""
604
+ groq_key = keys.get("GROQ_API_KEY") or ""
605
+ openai_key = keys.get("OPENAI_API_KEY") or ""
606
+ anthropic_key = keys.get("ANTHROPIC_API_KEY") or ""
607
+ google_key = keys.get("GOOGLE_API_KEY") or ""
608
+ ollama_url = getattr(_config, "OLLAMA_BASE_URL", "") or ""
609
+
610
+ # Fetch live model lists concurrently where available
611
+ fetch_tasks = []
612
+ task_labels = []
613
+
614
+ if openrouter_key:
615
+ fetch_tasks.append(_fetch_openrouter_models(openrouter_key))
616
+ task_labels.append("openrouter")
617
+ if groq_key:
618
+ fetch_tasks.append(_fetch_groq_models(groq_key))
619
+ task_labels.append("groq")
620
+ if openai_key:
621
+ fetch_tasks.append(_fetch_openai_models(openai_key))
622
+ task_labels.append("openai")
623
+ if ollama_url:
624
+ fetch_tasks.append(_fetch_ollama_models(ollama_url))
625
+ task_labels.append("ollama")
626
+
627
+ fetched_results = {}
628
+ if fetch_tasks:
629
+ results = await asyncio.gather(*fetch_tasks, return_exceptions=True)
630
+ for label, result in zip(task_labels, results):
631
+ fetched_results[label] = result if isinstance(result, list) else []
632
+
633
+ # Hardcoded model lists for providers without free list APIs
634
+ anthropic_models = [
635
+ ModelInfo(id="claude-opus-4-5", name="Claude Opus 4.5", provider="Anthropic", recommended=True),
636
+ ModelInfo(id="claude-sonnet-4-5", name="Claude Sonnet 4.5", provider="Anthropic", recommended=True),
637
+ ModelInfo(id="claude-haiku-4-5-20251001", name="Claude Haiku 4.5", provider="Anthropic"),
638
+ ]
639
+ google_models = [
640
+ ModelInfo(id="gemini-2.0-flash", name="Gemini 2.0 Flash", provider="Google", recommended=True),
641
+ ModelInfo(id="gemini-1.5-pro", name="Gemini 1.5 Pro", provider="Google"),
642
+ ModelInfo(id="gemini-1.5-flash", name="Gemini 1.5 Flash", provider="Google"),
643
+ ]
644
+
645
+ providers = [
646
+ ProviderInfo(
647
+ name="OpenRouter",
648
+ key_name="OPENROUTER_API_KEY",
649
+ configured=bool(openrouter_key),
650
+ models=fetched_results.get("openrouter", []),
651
+ ),
652
+ ProviderInfo(
653
+ name="Groq",
654
+ key_name="GROQ_API_KEY",
655
+ configured=bool(groq_key),
656
+ models=fetched_results.get("groq", []),
657
+ ),
658
+ ProviderInfo(
659
+ name="Anthropic",
660
+ key_name="ANTHROPIC_API_KEY",
661
+ configured=bool(anthropic_key),
662
+ models=anthropic_models if anthropic_key else [],
663
+ ),
664
+ ProviderInfo(
665
+ name="OpenAI",
666
+ key_name="OPENAI_API_KEY",
667
+ configured=bool(openai_key),
668
+ models=fetched_results.get("openai", []),
669
+ ),
670
+ ProviderInfo(
671
+ name="Google",
672
+ key_name="GOOGLE_API_KEY",
673
+ configured=bool(google_key),
674
+ models=google_models if google_key else [],
675
+ ),
676
+ ProviderInfo(
677
+ name="Ollama",
678
+ key_name="",
679
+ configured=bool(ollama_url),
680
+ models=fetched_results.get("ollama", []),
681
+ ),
682
+ ]
683
+
684
+ response = ModelListResponse(providers=providers, custom_model_allowed=True)
685
+ _models_cache[user_id] = (time.time(), response)
686
+ return response
687
+
688
+
689
+ # ---------------------------------------------------------------------------
690
+ # Model Validate — POST /settings/models/validate
691
+ # ---------------------------------------------------------------------------
692
+
693
+ @router.post("/models/validate", response_model=ValidateModelResponse)
694
+ async def validate_model(
695
+ body: ValidateModelRequest,
696
+ current_user: CurrentUser = Depends(get_current_user),
697
+ ) -> ValidateModelResponse:
698
+ """
699
+ Test whether a model ID is accessible with the current keys.
700
+ Makes a minimal 1-token API call and returns typed success/failure.
701
+ Rate limited to 10 requests per minute per user.
702
+ """
703
+ user_id = current_user.user.id
704
+ model_id = body.model_id.strip()
705
+
706
+ # Per-user rate limiting
707
+ now = time.time()
708
+ history = _validate_rate.get(user_id, [])
709
+ history = [t for t in history if now - t < _VALIDATE_RATE_WINDOW]
710
+ if len(history) >= _VALIDATE_RATE_LIMIT:
711
+ return ValidateModelResponse(
712
+ valid=False,
713
+ model_id=model_id,
714
+ error="rate_limited",
715
+ message="Too many validation requests. Please wait a moment and try again.",
716
+ )
717
+ history.append(now)
718
+ _validate_rate[user_id] = history
719
+
720
+ if not model_id:
721
+ return ValidateModelResponse(
722
+ valid=False,
723
+ model_id=model_id,
724
+ error="model_not_found",
725
+ message="Model ID cannot be empty.",
726
+ )
727
+
728
+ # Resolve user keys
729
+ async with get_async_session() as session:
730
+ from utils.user_keys import resolve_api_key
731
+ api_keys = {}
732
+ for key_name in ("OPENAI_API_KEY", "ANTHROPIC_API_KEY", "GOOGLE_API_KEY",
733
+ "OPENROUTER_API_KEY", "GROQ_API_KEY"):
734
+ api_keys[key_name] = await resolve_api_key(user_id, key_name, session)
735
+
736
+ provider = _infer_provider(model_id)
737
+
738
+ try:
739
+ from voidaccess.llm_utils import resolve_model_config, _common_llm_params
740
+ from langchain_openai import ChatOpenAI
741
+ from langchain_anthropic import ChatAnthropic
742
+ from langchain_google_genai import ChatGoogleGenerativeAI
743
+ from langchain_ollama import ChatOllama
744
+
745
+ config = resolve_model_config(model_id)
746
+ if config is None:
747
+ return ValidateModelResponse(
748
+ valid=False,
749
+ model_id=model_id,
750
+ provider=provider,
751
+ error="model_not_found",
752
+ message=f"Model '{model_id}' could not be resolved to any provider.",
753
+ suggestion="Check the model ID or browse available models.",
754
+ )
755
+
756
+ llm_class = config["class"]
757
+ ctor_params = dict(config.get("constructor_params", {}))
758
+
759
+ # Inject user-override API keys into constructor params
760
+ _ENV_TO_PARAM = {
761
+ "OPENAI_API_KEY": "api_key",
762
+ "OPENROUTER_API_KEY": "api_key",
763
+ "ANTHROPIC_API_KEY": "anthropic_api_key",
764
+ "GOOGLE_API_KEY": "google_api_key",
765
+ "GROQ_API_KEY": "api_key",
766
+ }
767
+ for env_key, param_key in _ENV_TO_PARAM.items():
768
+ user_val = api_keys.get(env_key)
769
+ if user_val:
770
+ ctor_params[param_key] = user_val
771
+
772
+ # Build minimal (non-streaming) LLM instance for probe
773
+ probe_params = {k: v for k, v in _common_llm_params.items() if k != "streaming"}
774
+ probe_params["streaming"] = False
775
+ all_params = {**probe_params, **ctor_params, "max_tokens": 1}
776
+
777
+ def _probe():
778
+ llm = llm_class(**all_params)
779
+ llm.invoke("hi")
780
+
781
+ await asyncio.wait_for(asyncio.to_thread(_probe), timeout=15)
782
+
783
+ return ValidateModelResponse(
784
+ valid=True,
785
+ model_id=model_id,
786
+ provider=provider,
787
+ message="Model accessible",
788
+ )
789
+
790
+ except asyncio.TimeoutError:
791
+ return ValidateModelResponse(
792
+ valid=False,
793
+ model_id=model_id,
794
+ provider=provider,
795
+ error="provider_error",
796
+ message="Model validation timed out. The provider may be slow or unreachable.",
797
+ )
798
+ except ValueError as exc:
799
+ msg = str(exc)
800
+ if "No API key" in msg or "not set" in msg.lower():
801
+ return ValidateModelResponse(
802
+ valid=False,
803
+ model_id=model_id,
804
+ provider=provider,
805
+ error="no_key_configured",
806
+ message=msg,
807
+ suggestion="Add the required API key in Settings.",
808
+ )
809
+ return ValidateModelResponse(
810
+ valid=False,
811
+ model_id=model_id,
812
+ provider=provider,
813
+ error="model_not_found",
814
+ message=msg,
815
+ suggestion="Browse available models or check the provider docs for valid IDs.",
816
+ )
817
+ except Exception as exc:
818
+ exc_str = str(exc)
819
+ if "401" in exc_str or "authentication" in exc_str.lower() or "invalid api key" in exc_str.lower():
820
+ return ValidateModelResponse(
821
+ valid=False,
822
+ model_id=model_id,
823
+ provider=provider,
824
+ error="invalid_api_key",
825
+ message="API key is invalid or expired. Check your key in Settings.",
826
+ )
827
+ if "404" in exc_str or "not found" in exc_str.lower() or "does not exist" in exc_str.lower():
828
+ return ValidateModelResponse(
829
+ valid=False,
830
+ model_id=model_id,
831
+ provider=provider,
832
+ error="model_not_found",
833
+ message=f"Model '{model_id}' not found. Check the model ID and try again.",
834
+ suggestion=f"Browse available models or check https://openrouter.ai/models for valid IDs.",
835
+ )
836
+ if "429" in exc_str or "rate limit" in exc_str.lower():
837
+ return ValidateModelResponse(
838
+ valid=False,
839
+ model_id=model_id,
840
+ provider=provider,
841
+ error="rate_limited",
842
+ message="Provider rate limit hit. Try again in a moment.",
843
+ )
844
+ return ValidateModelResponse(
845
+ valid=False,
846
+ model_id=model_id,
847
+ provider=provider,
848
+ error="provider_error",
849
+ message=f"Provider returned an unexpected error: {exc_str[:200]}",
850
+ )
851
+