voidaccess 1.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. analysis/__init__.py +49 -0
  2. analysis/opsec.py +454 -0
  3. analysis/patterns.py +202 -0
  4. analysis/temporal.py +201 -0
  5. api/__init__.py +1 -0
  6. api/auth.py +163 -0
  7. api/main.py +509 -0
  8. api/routes/__init__.py +1 -0
  9. api/routes/admin.py +214 -0
  10. api/routes/auth.py +157 -0
  11. api/routes/entities.py +871 -0
  12. api/routes/export.py +359 -0
  13. api/routes/investigations.py +2567 -0
  14. api/routes/monitors.py +405 -0
  15. api/routes/search.py +157 -0
  16. api/routes/settings.py +851 -0
  17. auth/__init__.py +1 -0
  18. auth/token_blacklist.py +108 -0
  19. cli/__init__.py +3 -0
  20. cli/adapters/__init__.py +1 -0
  21. cli/adapters/sqlite.py +273 -0
  22. cli/browser.py +376 -0
  23. cli/commands/__init__.py +1 -0
  24. cli/commands/configure.py +185 -0
  25. cli/commands/enrich.py +154 -0
  26. cli/commands/export.py +158 -0
  27. cli/commands/investigate.py +601 -0
  28. cli/commands/show.py +87 -0
  29. cli/config.py +180 -0
  30. cli/display.py +212 -0
  31. cli/main.py +154 -0
  32. cli/tor_detect.py +71 -0
  33. config.py +180 -0
  34. crawler/__init__.py +28 -0
  35. crawler/dedup.py +97 -0
  36. crawler/frontier.py +115 -0
  37. crawler/spider.py +462 -0
  38. crawler/utils.py +122 -0
  39. db/__init__.py +47 -0
  40. db/migrations/__init__.py +0 -0
  41. db/migrations/env.py +80 -0
  42. db/migrations/versions/0001_initial_schema.py +270 -0
  43. db/migrations/versions/0002_add_investigation_status_column.py +27 -0
  44. db/migrations/versions/0002_add_missing_tables.py +33 -0
  45. db/migrations/versions/0003_add_canonical_value_and_entity_links.py +61 -0
  46. db/migrations/versions/0004_add_page_posted_at.py +41 -0
  47. db/migrations/versions/0005_add_extraction_method.py +32 -0
  48. db/migrations/versions/0006_add_monitor_alerts.py +26 -0
  49. db/migrations/versions/0007_add_actor_style_profiles.py +23 -0
  50. db/migrations/versions/0008_add_users_table.py +47 -0
  51. db/migrations/versions/0009_add_investigation_id_to_relationships.py +29 -0
  52. db/migrations/versions/0010_add_composite_index_entity_relationships.py +22 -0
  53. db/migrations/versions/0011_add_page_extraction_cache.py +52 -0
  54. db/migrations/versions/0013_add_graph_status.py +31 -0
  55. db/migrations/versions/0015_add_progress_fields.py +41 -0
  56. db/migrations/versions/0016_backfill_graph_status.py +33 -0
  57. db/migrations/versions/0017_add_user_api_keys.py +44 -0
  58. db/migrations/versions/0018_add_user_id_to_investigations.py +33 -0
  59. db/migrations/versions/0019_add_content_safety_log.py +46 -0
  60. db/migrations/versions/0020_add_entity_source_tracking.py +50 -0
  61. db/models.py +618 -0
  62. db/queries.py +841 -0
  63. db/session.py +270 -0
  64. export/__init__.py +34 -0
  65. export/misp.py +257 -0
  66. export/sigma.py +342 -0
  67. export/stix.py +418 -0
  68. extractor/__init__.py +21 -0
  69. extractor/llm_extract.py +372 -0
  70. extractor/ner.py +512 -0
  71. extractor/normalizer.py +638 -0
  72. extractor/pipeline.py +401 -0
  73. extractor/regex_patterns.py +325 -0
  74. fingerprint/__init__.py +33 -0
  75. fingerprint/profiler.py +240 -0
  76. fingerprint/stylometry.py +249 -0
  77. graph/__init__.py +73 -0
  78. graph/builder.py +894 -0
  79. graph/export.py +225 -0
  80. graph/model.py +83 -0
  81. graph/queries.py +297 -0
  82. graph/visualize.py +178 -0
  83. i18n/__init__.py +24 -0
  84. i18n/detect.py +76 -0
  85. i18n/query_expand.py +72 -0
  86. i18n/translate.py +210 -0
  87. monitor/__init__.py +27 -0
  88. monitor/_db.py +74 -0
  89. monitor/alerts.py +345 -0
  90. monitor/config.py +118 -0
  91. monitor/diff.py +75 -0
  92. monitor/jobs.py +247 -0
  93. monitor/scheduler.py +184 -0
  94. scraper/__init__.py +0 -0
  95. scraper/scrape.py +857 -0
  96. scraper/scrape_js.py +272 -0
  97. search/__init__.py +318 -0
  98. search/circuit_breaker.py +240 -0
  99. search/search.py +334 -0
  100. sources/__init__.py +96 -0
  101. sources/blockchain.py +444 -0
  102. sources/cache.py +93 -0
  103. sources/cisa.py +108 -0
  104. sources/dns_enrichment.py +557 -0
  105. sources/domain_reputation.py +643 -0
  106. sources/email_reputation.py +635 -0
  107. sources/engines.py +244 -0
  108. sources/enrichment.py +1244 -0
  109. sources/github_scraper.py +589 -0
  110. sources/gitlab_scraper.py +624 -0
  111. sources/hash_reputation.py +856 -0
  112. sources/historical_intel.py +253 -0
  113. sources/ip_reputation.py +521 -0
  114. sources/paste_scraper.py +484 -0
  115. sources/pastes.py +278 -0
  116. sources/rss_scraper.py +576 -0
  117. sources/seed_manager.py +373 -0
  118. sources/seeds.py +368 -0
  119. sources/shodan.py +103 -0
  120. sources/telegram.py +199 -0
  121. sources/virustotal.py +113 -0
  122. utils/__init__.py +0 -0
  123. utils/async_utils.py +89 -0
  124. utils/content_safety.py +193 -0
  125. utils/defang.py +94 -0
  126. utils/encryption.py +34 -0
  127. utils/ioc_freshness.py +124 -0
  128. utils/user_keys.py +33 -0
  129. vector/__init__.py +39 -0
  130. vector/embedder.py +100 -0
  131. vector/model_singleton.py +49 -0
  132. vector/search.py +87 -0
  133. vector/store.py +514 -0
  134. voidaccess/__init__.py +0 -0
  135. voidaccess/llm.py +717 -0
  136. voidaccess/llm_utils.py +696 -0
  137. voidaccess-1.3.0.dist-info/METADATA +395 -0
  138. voidaccess-1.3.0.dist-info/RECORD +142 -0
  139. voidaccess-1.3.0.dist-info/WHEEL +5 -0
  140. voidaccess-1.3.0.dist-info/entry_points.txt +2 -0
  141. voidaccess-1.3.0.dist-info/licenses/LICENSE +21 -0
  142. voidaccess-1.3.0.dist-info/top_level.txt +19 -0
analysis/temporal.py ADDED
@@ -0,0 +1,201 @@
1
+ """
2
+ analysis/temporal.py — Time-series analysis of forum and actor behavior.
3
+
4
+ Detects anomalies that historically precede significant events (exit scams,
5
+ law enforcement actions, major releases).
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import logging
11
+ import math
12
+ from collections import defaultdict
13
+ from datetime import date, datetime, timedelta, timezone
14
+ from typing import Any, Optional
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ def build_activity_timeline(
20
+ entity_value: str,
21
+ entity_type: str,
22
+ since: Optional[datetime] = None,
23
+ ) -> list[dict]:
24
+ """
25
+ Query DB for all pages where this entity appeared, grouped by day.
26
+
27
+ Returns list of {"date": date, "count": int, "page_ids": list[str]}.
28
+ Returns [] if no data or DB unavailable. Never raises.
29
+ """
30
+ try:
31
+ from db.models import Entity, Page
32
+ from db.session import get_session
33
+
34
+ with get_session() as session:
35
+ entities = (
36
+ session.query(Entity)
37
+ .filter(
38
+ Entity.entity_type == entity_type,
39
+ Entity.value == entity_value,
40
+ )
41
+ .all()
42
+ )
43
+
44
+ if not entities:
45
+ return []
46
+
47
+ page_ids = list({e.page_id for e in entities if e.page_id is not None})
48
+ if not page_ids:
49
+ return []
50
+
51
+ q = session.query(Page).filter(Page.id.in_(page_ids))
52
+ if since is not None:
53
+ q = q.filter(Page.scrape_timestamp >= since)
54
+ pages = q.all()
55
+
56
+ if not pages:
57
+ return []
58
+
59
+ by_date: dict[date, list[str]] = defaultdict(list)
60
+ skipped_count = 0
61
+ for page in pages:
62
+ ts = page.posted_at
63
+ if ts is None:
64
+ skipped_count += 1
65
+ continue
66
+ day = ts.date() if hasattr(ts, "date") else ts
67
+ by_date[day].append(str(page.id))
68
+ if skipped_count > 0:
69
+ logger.debug(
70
+ "build_activity_timeline: skipped %d pages due to missing posted_at",
71
+ skipped_count,
72
+ )
73
+
74
+ return [
75
+ {"date": d, "count": len(ids), "page_ids": ids}
76
+ for d, ids in sorted(by_date.items())
77
+ ]
78
+
79
+ except Exception as exc:
80
+ logger.debug("build_activity_timeline: DB unavailable (%s)", exc)
81
+ return []
82
+
83
+
84
+ def compute_activity_stats(timeline: list[dict]) -> dict:
85
+ """
86
+ Compute summary statistics for an activity timeline.
87
+
88
+ Returns a dict with mean_daily, std_daily, peak_day, peak_count,
89
+ total_appearances, active_days, first_seen, last_seen.
90
+ """
91
+ if not timeline:
92
+ return {
93
+ "mean_daily": 0.0,
94
+ "std_daily": 0.0,
95
+ "peak_day": None,
96
+ "peak_count": 0,
97
+ "total_appearances": 0,
98
+ "active_days": 0,
99
+ "first_seen": None,
100
+ "last_seen": None,
101
+ }
102
+
103
+ counts = [entry["count"] for entry in timeline]
104
+ dates = [entry["date"] for entry in timeline]
105
+
106
+ n = len(counts)
107
+ total = sum(counts)
108
+ mean_daily = total / n
109
+
110
+ variance = sum((c - mean_daily) ** 2 for c in counts) / n
111
+ std_daily = math.sqrt(variance)
112
+
113
+ peak_idx = counts.index(max(counts))
114
+
115
+ return {
116
+ "mean_daily": float(mean_daily),
117
+ "std_daily": float(std_daily),
118
+ "peak_day": dates[peak_idx],
119
+ "peak_count": int(counts[peak_idx]),
120
+ "total_appearances": int(total),
121
+ "active_days": n,
122
+ "first_seen": dates[0] if dates else None,
123
+ "last_seen": dates[-1] if dates else None,
124
+ }
125
+
126
+
127
+ Z_SCORE_THRESHOLD = 2.5
128
+ MIN_DATA_POINTS = 10
129
+ MIN_ABSOLUTE_SPIKE = 5
130
+
131
+
132
+ def detect_anomalies(
133
+ timeline: list[dict],
134
+ z_threshold: float = Z_SCORE_THRESHOLD,
135
+ ) -> list[dict]:
136
+ """
137
+ Flag days where activity deviates > z_threshold standard deviations.
138
+
139
+ Returns list of {"date": date, "count": int, "z_score": float, "type": str}.
140
+ Returns [] for timelines with fewer than 10 data points OR fewer than 5 posts.
141
+ """
142
+ if len(timeline) < MIN_DATA_POINTS:
143
+ return []
144
+
145
+ stats = compute_activity_stats(timeline)
146
+ mean = stats["mean_daily"]
147
+ std = stats["std_daily"]
148
+
149
+ if std == 0.0:
150
+ return []
151
+
152
+ anomalies: list[dict] = []
153
+ for entry in timeline:
154
+ count = entry["count"]
155
+ z = (count - mean) / std
156
+ if abs(z) > z_threshold:
157
+ if z > 0 and count < MIN_ABSOLUTE_SPIKE:
158
+ continue
159
+ anomalies.append(
160
+ {
161
+ "date": entry["date"],
162
+ "count": count,
163
+ "z_score": float(z),
164
+ "type": "spike" if z > 0 else "drop",
165
+ }
166
+ )
167
+
168
+ return anomalies
169
+
170
+
171
+ def detect_silence_breaks(
172
+ timeline: list[dict],
173
+ silence_days: int = 14,
174
+ ) -> list[dict]:
175
+ """
176
+ Find cases where the entity was inactive for silence_days or more,
177
+ then reappeared.
178
+
179
+ Returns list of {"silent_from": date, "silent_to": date, "gap_days": int}.
180
+ Significant for tracking actor reappearances under new names.
181
+ """
182
+ if len(timeline) < 2:
183
+ return []
184
+
185
+ sorted_entries = sorted(timeline, key=lambda x: x["date"])
186
+ breaks: list[dict] = []
187
+
188
+ for i in range(1, len(sorted_entries)):
189
+ prev_date = sorted_entries[i - 1]["date"]
190
+ curr_date = sorted_entries[i]["date"]
191
+ gap = (curr_date - prev_date).days
192
+ if gap >= silence_days:
193
+ breaks.append(
194
+ {
195
+ "silent_from": prev_date,
196
+ "silent_to": curr_date,
197
+ "gap_days": gap,
198
+ }
199
+ )
200
+
201
+ return breaks
api/__init__.py ADDED
@@ -0,0 +1 @@
1
+ # api package — Phase 5 FastAPI REST API
api/auth.py ADDED
@@ -0,0 +1,163 @@
1
+ """
2
+ JWT authentication for VoidAccess API.
3
+
4
+ Flow:
5
+ 1. POST /auth/login → returns access_token (JWT, 8hr expiry)
6
+ 2. All protected routes require: Authorization: Bearer {token}
7
+ 3. First login with default password → returns {must_reset: true}
8
+ 4. POST /auth/reset-password → sets new password, clears must_reset flag
9
+ """
10
+
11
+ import secrets
12
+ from datetime import datetime, timedelta, timezone
13
+ from typing import Optional
14
+
15
+ from fastapi import Depends, HTTPException, status
16
+ from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
17
+ from jose import JWTError, jwt
18
+ from passlib.context import CryptContext
19
+ from pydantic import BaseModel
20
+ from sqlalchemy.orm import Session
21
+
22
+ from config import JWT_SECRET
23
+ from db.models import User
24
+ from db.session import get_session, get_db
25
+ from auth.token_blacklist import is_token_revoked
26
+
27
+ # Config — single canonical source from config.py
28
+ SECRET = JWT_SECRET
29
+ JWT_ALGORITHM = "HS256"
30
+ JWT_EXPIRY_HOURS = 8
31
+
32
+ pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
33
+ bearer_scheme = HTTPBearer()
34
+
35
+
36
+ # ─── Pydantic schemas ──────────────────────────────────────────────────────
37
+
38
+ class LoginRequest(BaseModel):
39
+ email: str
40
+ password: str
41
+
42
+ class LoginResponse(BaseModel):
43
+ access_token: str
44
+ token_type: str = "bearer"
45
+ must_reset_password: bool
46
+
47
+ class ResetPasswordRequest(BaseModel):
48
+ current_password: str
49
+ new_password: str
50
+ confirm_password: str
51
+
52
+ class TokenData(BaseModel):
53
+ user_id: int
54
+ email: str
55
+ jti: Optional[str] = None
56
+
57
+
58
+ # ─── Core functions ────────────────────────────────────────────────────────
59
+
60
+ def verify_password(plain: str, hashed: str) -> bool:
61
+ return pwd_context.verify(plain, hashed)
62
+
63
+ def hash_password(plain: str) -> str:
64
+ return pwd_context.hash(plain)
65
+
66
+ def create_access_token(user_id: int, email: str) -> tuple[str, str]:
67
+ jti = secrets.token_hex(16)
68
+ now = datetime.now(timezone.utc)
69
+ payload = {
70
+ "sub": str(user_id),
71
+ "email": email,
72
+ "jti": jti,
73
+ "exp": now + timedelta(hours=JWT_EXPIRY_HOURS),
74
+ "iat": now,
75
+ }
76
+ token = jwt.encode(payload, SECRET, algorithm=JWT_ALGORITHM)
77
+ return token, jti
78
+
79
+ class TokenPayload(BaseModel):
80
+ user_id: int
81
+ email: str
82
+ jti: str
83
+ exp: datetime
84
+
85
+
86
+ def decode_token(token: str) -> TokenPayload:
87
+ payload = jwt.decode(token, SECRET, algorithms=[JWT_ALGORITHM])
88
+ user_id = int(payload["sub"])
89
+ email = payload["email"]
90
+ jti = payload.get("jti", "")
91
+ exp = payload["exp"]
92
+ return TokenPayload(user_id=user_id, email=email, jti=jti, exp=exp)
93
+
94
+
95
+ from pydantic import BaseModel, ConfigDict
96
+
97
+
98
+ # ─── FastAPI dependency ────────────────────────────────────────────────────
99
+
100
+ class CurrentUser(BaseModel):
101
+ model_config = ConfigDict(arbitrary_types_allowed=True)
102
+
103
+ user: User
104
+ jti: str
105
+ exp: Optional[datetime] = None
106
+
107
+
108
+ async def get_current_user(
109
+ credentials: HTTPAuthorizationCredentials = Depends(bearer_scheme),
110
+ db: Session = Depends(get_db),
111
+ ) -> CurrentUser:
112
+ """
113
+ Dependency for protected routes.
114
+ Usage: current: CurrentUser = Depends(get_current_user)
115
+ Now uses request-scoped 'db' session to ensure user is not detached.
116
+ """
117
+ credentials_exception = HTTPException(
118
+ status_code=status.HTTP_401_UNAUTHORIZED,
119
+ detail="Invalid or expired token",
120
+ headers={"WWW-Authenticate": "Bearer"},
121
+ )
122
+ revoked_exception = HTTPException(
123
+ status_code=status.HTTP_401_UNAUTHORIZED,
124
+ detail="Token has been revoked",
125
+ headers={"WWW-Authenticate": "Bearer"},
126
+ )
127
+ try:
128
+ token_payload = decode_token(credentials.credentials)
129
+ except JWTError:
130
+ raise credentials_exception
131
+
132
+ if token_payload.jti:
133
+ if await is_token_revoked(token_payload.jti):
134
+ raise revoked_exception
135
+
136
+ user = db.query(User).filter(
137
+ User.id == token_payload.user_id,
138
+ User.is_active == True,
139
+ ).first()
140
+
141
+ if not user:
142
+ raise credentials_exception
143
+
144
+ return CurrentUser(user=user, jti=token_payload.jti, exp=token_payload.exp)
145
+
146
+
147
+ async def require_password_not_reset_pending(
148
+ current_user: CurrentUser = Depends(get_current_user),
149
+ ) -> CurrentUser:
150
+ """Dependency for resource-creating endpoints — blocks access when a password reset is required."""
151
+ if getattr(current_user.user, "must_reset_password", False):
152
+ raise HTTPException(
153
+ status_code=403,
154
+ detail={
155
+ "error": "password_reset_required",
156
+ "message": (
157
+ "You must change your password before continuing. "
158
+ "Use POST /auth/reset-password"
159
+ ),
160
+ "code": "PASSWORD_RESET_REQUIRED",
161
+ },
162
+ )
163
+ return current_user