voidaccess 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- analysis/__init__.py +49 -0
- analysis/opsec.py +454 -0
- analysis/patterns.py +202 -0
- analysis/temporal.py +201 -0
- api/__init__.py +1 -0
- api/auth.py +163 -0
- api/main.py +509 -0
- api/routes/__init__.py +1 -0
- api/routes/admin.py +214 -0
- api/routes/auth.py +157 -0
- api/routes/entities.py +871 -0
- api/routes/export.py +359 -0
- api/routes/investigations.py +2567 -0
- api/routes/monitors.py +405 -0
- api/routes/search.py +157 -0
- api/routes/settings.py +851 -0
- auth/__init__.py +1 -0
- auth/token_blacklist.py +108 -0
- cli/__init__.py +3 -0
- cli/adapters/__init__.py +1 -0
- cli/adapters/sqlite.py +273 -0
- cli/browser.py +376 -0
- cli/commands/__init__.py +1 -0
- cli/commands/configure.py +185 -0
- cli/commands/enrich.py +154 -0
- cli/commands/export.py +158 -0
- cli/commands/investigate.py +601 -0
- cli/commands/show.py +87 -0
- cli/config.py +180 -0
- cli/display.py +212 -0
- cli/main.py +154 -0
- cli/tor_detect.py +71 -0
- config.py +180 -0
- crawler/__init__.py +28 -0
- crawler/dedup.py +97 -0
- crawler/frontier.py +115 -0
- crawler/spider.py +462 -0
- crawler/utils.py +122 -0
- db/__init__.py +47 -0
- db/migrations/__init__.py +0 -0
- db/migrations/env.py +80 -0
- db/migrations/versions/0001_initial_schema.py +270 -0
- db/migrations/versions/0002_add_investigation_status_column.py +27 -0
- db/migrations/versions/0002_add_missing_tables.py +33 -0
- db/migrations/versions/0003_add_canonical_value_and_entity_links.py +61 -0
- db/migrations/versions/0004_add_page_posted_at.py +41 -0
- db/migrations/versions/0005_add_extraction_method.py +32 -0
- db/migrations/versions/0006_add_monitor_alerts.py +26 -0
- db/migrations/versions/0007_add_actor_style_profiles.py +23 -0
- db/migrations/versions/0008_add_users_table.py +47 -0
- db/migrations/versions/0009_add_investigation_id_to_relationships.py +29 -0
- db/migrations/versions/0010_add_composite_index_entity_relationships.py +22 -0
- db/migrations/versions/0011_add_page_extraction_cache.py +52 -0
- db/migrations/versions/0013_add_graph_status.py +31 -0
- db/migrations/versions/0015_add_progress_fields.py +41 -0
- db/migrations/versions/0016_backfill_graph_status.py +33 -0
- db/migrations/versions/0017_add_user_api_keys.py +44 -0
- db/migrations/versions/0018_add_user_id_to_investigations.py +33 -0
- db/migrations/versions/0019_add_content_safety_log.py +46 -0
- db/migrations/versions/0020_add_entity_source_tracking.py +50 -0
- db/models.py +618 -0
- db/queries.py +841 -0
- db/session.py +270 -0
- export/__init__.py +34 -0
- export/misp.py +257 -0
- export/sigma.py +342 -0
- export/stix.py +418 -0
- extractor/__init__.py +21 -0
- extractor/llm_extract.py +372 -0
- extractor/ner.py +512 -0
- extractor/normalizer.py +638 -0
- extractor/pipeline.py +401 -0
- extractor/regex_patterns.py +325 -0
- fingerprint/__init__.py +33 -0
- fingerprint/profiler.py +240 -0
- fingerprint/stylometry.py +249 -0
- graph/__init__.py +73 -0
- graph/builder.py +894 -0
- graph/export.py +225 -0
- graph/model.py +83 -0
- graph/queries.py +297 -0
- graph/visualize.py +178 -0
- i18n/__init__.py +24 -0
- i18n/detect.py +76 -0
- i18n/query_expand.py +72 -0
- i18n/translate.py +210 -0
- monitor/__init__.py +27 -0
- monitor/_db.py +74 -0
- monitor/alerts.py +345 -0
- monitor/config.py +118 -0
- monitor/diff.py +75 -0
- monitor/jobs.py +247 -0
- monitor/scheduler.py +184 -0
- scraper/__init__.py +0 -0
- scraper/scrape.py +857 -0
- scraper/scrape_js.py +272 -0
- search/__init__.py +318 -0
- search/circuit_breaker.py +240 -0
- search/search.py +334 -0
- sources/__init__.py +96 -0
- sources/blockchain.py +444 -0
- sources/cache.py +93 -0
- sources/cisa.py +108 -0
- sources/dns_enrichment.py +557 -0
- sources/domain_reputation.py +643 -0
- sources/email_reputation.py +635 -0
- sources/engines.py +244 -0
- sources/enrichment.py +1244 -0
- sources/github_scraper.py +589 -0
- sources/gitlab_scraper.py +624 -0
- sources/hash_reputation.py +856 -0
- sources/historical_intel.py +253 -0
- sources/ip_reputation.py +521 -0
- sources/paste_scraper.py +484 -0
- sources/pastes.py +278 -0
- sources/rss_scraper.py +576 -0
- sources/seed_manager.py +373 -0
- sources/seeds.py +368 -0
- sources/shodan.py +103 -0
- sources/telegram.py +199 -0
- sources/virustotal.py +113 -0
- utils/__init__.py +0 -0
- utils/async_utils.py +89 -0
- utils/content_safety.py +193 -0
- utils/defang.py +94 -0
- utils/encryption.py +34 -0
- utils/ioc_freshness.py +124 -0
- utils/user_keys.py +33 -0
- vector/__init__.py +39 -0
- vector/embedder.py +100 -0
- vector/model_singleton.py +49 -0
- vector/search.py +87 -0
- vector/store.py +514 -0
- voidaccess/__init__.py +0 -0
- voidaccess/llm.py +717 -0
- voidaccess/llm_utils.py +696 -0
- voidaccess-1.3.0.dist-info/METADATA +395 -0
- voidaccess-1.3.0.dist-info/RECORD +142 -0
- voidaccess-1.3.0.dist-info/WHEEL +5 -0
- voidaccess-1.3.0.dist-info/entry_points.txt +2 -0
- voidaccess-1.3.0.dist-info/licenses/LICENSE +21 -0
- voidaccess-1.3.0.dist-info/top_level.txt +19 -0
analysis/temporal.py
ADDED
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
"""
|
|
2
|
+
analysis/temporal.py — Time-series analysis of forum and actor behavior.
|
|
3
|
+
|
|
4
|
+
Detects anomalies that historically precede significant events (exit scams,
|
|
5
|
+
law enforcement actions, major releases).
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import logging
|
|
11
|
+
import math
|
|
12
|
+
from collections import defaultdict
|
|
13
|
+
from datetime import date, datetime, timedelta, timezone
|
|
14
|
+
from typing import Any, Optional
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def build_activity_timeline(
|
|
20
|
+
entity_value: str,
|
|
21
|
+
entity_type: str,
|
|
22
|
+
since: Optional[datetime] = None,
|
|
23
|
+
) -> list[dict]:
|
|
24
|
+
"""
|
|
25
|
+
Query DB for all pages where this entity appeared, grouped by day.
|
|
26
|
+
|
|
27
|
+
Returns list of {"date": date, "count": int, "page_ids": list[str]}.
|
|
28
|
+
Returns [] if no data or DB unavailable. Never raises.
|
|
29
|
+
"""
|
|
30
|
+
try:
|
|
31
|
+
from db.models import Entity, Page
|
|
32
|
+
from db.session import get_session
|
|
33
|
+
|
|
34
|
+
with get_session() as session:
|
|
35
|
+
entities = (
|
|
36
|
+
session.query(Entity)
|
|
37
|
+
.filter(
|
|
38
|
+
Entity.entity_type == entity_type,
|
|
39
|
+
Entity.value == entity_value,
|
|
40
|
+
)
|
|
41
|
+
.all()
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
if not entities:
|
|
45
|
+
return []
|
|
46
|
+
|
|
47
|
+
page_ids = list({e.page_id for e in entities if e.page_id is not None})
|
|
48
|
+
if not page_ids:
|
|
49
|
+
return []
|
|
50
|
+
|
|
51
|
+
q = session.query(Page).filter(Page.id.in_(page_ids))
|
|
52
|
+
if since is not None:
|
|
53
|
+
q = q.filter(Page.scrape_timestamp >= since)
|
|
54
|
+
pages = q.all()
|
|
55
|
+
|
|
56
|
+
if not pages:
|
|
57
|
+
return []
|
|
58
|
+
|
|
59
|
+
by_date: dict[date, list[str]] = defaultdict(list)
|
|
60
|
+
skipped_count = 0
|
|
61
|
+
for page in pages:
|
|
62
|
+
ts = page.posted_at
|
|
63
|
+
if ts is None:
|
|
64
|
+
skipped_count += 1
|
|
65
|
+
continue
|
|
66
|
+
day = ts.date() if hasattr(ts, "date") else ts
|
|
67
|
+
by_date[day].append(str(page.id))
|
|
68
|
+
if skipped_count > 0:
|
|
69
|
+
logger.debug(
|
|
70
|
+
"build_activity_timeline: skipped %d pages due to missing posted_at",
|
|
71
|
+
skipped_count,
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
return [
|
|
75
|
+
{"date": d, "count": len(ids), "page_ids": ids}
|
|
76
|
+
for d, ids in sorted(by_date.items())
|
|
77
|
+
]
|
|
78
|
+
|
|
79
|
+
except Exception as exc:
|
|
80
|
+
logger.debug("build_activity_timeline: DB unavailable (%s)", exc)
|
|
81
|
+
return []
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def compute_activity_stats(timeline: list[dict]) -> dict:
|
|
85
|
+
"""
|
|
86
|
+
Compute summary statistics for an activity timeline.
|
|
87
|
+
|
|
88
|
+
Returns a dict with mean_daily, std_daily, peak_day, peak_count,
|
|
89
|
+
total_appearances, active_days, first_seen, last_seen.
|
|
90
|
+
"""
|
|
91
|
+
if not timeline:
|
|
92
|
+
return {
|
|
93
|
+
"mean_daily": 0.0,
|
|
94
|
+
"std_daily": 0.0,
|
|
95
|
+
"peak_day": None,
|
|
96
|
+
"peak_count": 0,
|
|
97
|
+
"total_appearances": 0,
|
|
98
|
+
"active_days": 0,
|
|
99
|
+
"first_seen": None,
|
|
100
|
+
"last_seen": None,
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
counts = [entry["count"] for entry in timeline]
|
|
104
|
+
dates = [entry["date"] for entry in timeline]
|
|
105
|
+
|
|
106
|
+
n = len(counts)
|
|
107
|
+
total = sum(counts)
|
|
108
|
+
mean_daily = total / n
|
|
109
|
+
|
|
110
|
+
variance = sum((c - mean_daily) ** 2 for c in counts) / n
|
|
111
|
+
std_daily = math.sqrt(variance)
|
|
112
|
+
|
|
113
|
+
peak_idx = counts.index(max(counts))
|
|
114
|
+
|
|
115
|
+
return {
|
|
116
|
+
"mean_daily": float(mean_daily),
|
|
117
|
+
"std_daily": float(std_daily),
|
|
118
|
+
"peak_day": dates[peak_idx],
|
|
119
|
+
"peak_count": int(counts[peak_idx]),
|
|
120
|
+
"total_appearances": int(total),
|
|
121
|
+
"active_days": n,
|
|
122
|
+
"first_seen": dates[0] if dates else None,
|
|
123
|
+
"last_seen": dates[-1] if dates else None,
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
Z_SCORE_THRESHOLD = 2.5
|
|
128
|
+
MIN_DATA_POINTS = 10
|
|
129
|
+
MIN_ABSOLUTE_SPIKE = 5
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def detect_anomalies(
|
|
133
|
+
timeline: list[dict],
|
|
134
|
+
z_threshold: float = Z_SCORE_THRESHOLD,
|
|
135
|
+
) -> list[dict]:
|
|
136
|
+
"""
|
|
137
|
+
Flag days where activity deviates > z_threshold standard deviations.
|
|
138
|
+
|
|
139
|
+
Returns list of {"date": date, "count": int, "z_score": float, "type": str}.
|
|
140
|
+
Returns [] for timelines with fewer than 10 data points OR fewer than 5 posts.
|
|
141
|
+
"""
|
|
142
|
+
if len(timeline) < MIN_DATA_POINTS:
|
|
143
|
+
return []
|
|
144
|
+
|
|
145
|
+
stats = compute_activity_stats(timeline)
|
|
146
|
+
mean = stats["mean_daily"]
|
|
147
|
+
std = stats["std_daily"]
|
|
148
|
+
|
|
149
|
+
if std == 0.0:
|
|
150
|
+
return []
|
|
151
|
+
|
|
152
|
+
anomalies: list[dict] = []
|
|
153
|
+
for entry in timeline:
|
|
154
|
+
count = entry["count"]
|
|
155
|
+
z = (count - mean) / std
|
|
156
|
+
if abs(z) > z_threshold:
|
|
157
|
+
if z > 0 and count < MIN_ABSOLUTE_SPIKE:
|
|
158
|
+
continue
|
|
159
|
+
anomalies.append(
|
|
160
|
+
{
|
|
161
|
+
"date": entry["date"],
|
|
162
|
+
"count": count,
|
|
163
|
+
"z_score": float(z),
|
|
164
|
+
"type": "spike" if z > 0 else "drop",
|
|
165
|
+
}
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
return anomalies
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def detect_silence_breaks(
|
|
172
|
+
timeline: list[dict],
|
|
173
|
+
silence_days: int = 14,
|
|
174
|
+
) -> list[dict]:
|
|
175
|
+
"""
|
|
176
|
+
Find cases where the entity was inactive for silence_days or more,
|
|
177
|
+
then reappeared.
|
|
178
|
+
|
|
179
|
+
Returns list of {"silent_from": date, "silent_to": date, "gap_days": int}.
|
|
180
|
+
Significant for tracking actor reappearances under new names.
|
|
181
|
+
"""
|
|
182
|
+
if len(timeline) < 2:
|
|
183
|
+
return []
|
|
184
|
+
|
|
185
|
+
sorted_entries = sorted(timeline, key=lambda x: x["date"])
|
|
186
|
+
breaks: list[dict] = []
|
|
187
|
+
|
|
188
|
+
for i in range(1, len(sorted_entries)):
|
|
189
|
+
prev_date = sorted_entries[i - 1]["date"]
|
|
190
|
+
curr_date = sorted_entries[i]["date"]
|
|
191
|
+
gap = (curr_date - prev_date).days
|
|
192
|
+
if gap >= silence_days:
|
|
193
|
+
breaks.append(
|
|
194
|
+
{
|
|
195
|
+
"silent_from": prev_date,
|
|
196
|
+
"silent_to": curr_date,
|
|
197
|
+
"gap_days": gap,
|
|
198
|
+
}
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
return breaks
|
api/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# api package — Phase 5 FastAPI REST API
|
api/auth.py
ADDED
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
"""
|
|
2
|
+
JWT authentication for VoidAccess API.
|
|
3
|
+
|
|
4
|
+
Flow:
|
|
5
|
+
1. POST /auth/login → returns access_token (JWT, 8hr expiry)
|
|
6
|
+
2. All protected routes require: Authorization: Bearer {token}
|
|
7
|
+
3. First login with default password → returns {must_reset: true}
|
|
8
|
+
4. POST /auth/reset-password → sets new password, clears must_reset flag
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import secrets
|
|
12
|
+
from datetime import datetime, timedelta, timezone
|
|
13
|
+
from typing import Optional
|
|
14
|
+
|
|
15
|
+
from fastapi import Depends, HTTPException, status
|
|
16
|
+
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
|
|
17
|
+
from jose import JWTError, jwt
|
|
18
|
+
from passlib.context import CryptContext
|
|
19
|
+
from pydantic import BaseModel
|
|
20
|
+
from sqlalchemy.orm import Session
|
|
21
|
+
|
|
22
|
+
from config import JWT_SECRET
|
|
23
|
+
from db.models import User
|
|
24
|
+
from db.session import get_session, get_db
|
|
25
|
+
from auth.token_blacklist import is_token_revoked
|
|
26
|
+
|
|
27
|
+
# Config — single canonical source from config.py
|
|
28
|
+
SECRET = JWT_SECRET
|
|
29
|
+
JWT_ALGORITHM = "HS256"
|
|
30
|
+
JWT_EXPIRY_HOURS = 8
|
|
31
|
+
|
|
32
|
+
pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
|
|
33
|
+
bearer_scheme = HTTPBearer()
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
# ─── Pydantic schemas ──────────────────────────────────────────────────────
|
|
37
|
+
|
|
38
|
+
class LoginRequest(BaseModel):
|
|
39
|
+
email: str
|
|
40
|
+
password: str
|
|
41
|
+
|
|
42
|
+
class LoginResponse(BaseModel):
|
|
43
|
+
access_token: str
|
|
44
|
+
token_type: str = "bearer"
|
|
45
|
+
must_reset_password: bool
|
|
46
|
+
|
|
47
|
+
class ResetPasswordRequest(BaseModel):
|
|
48
|
+
current_password: str
|
|
49
|
+
new_password: str
|
|
50
|
+
confirm_password: str
|
|
51
|
+
|
|
52
|
+
class TokenData(BaseModel):
|
|
53
|
+
user_id: int
|
|
54
|
+
email: str
|
|
55
|
+
jti: Optional[str] = None
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
# ─── Core functions ────────────────────────────────────────────────────────
|
|
59
|
+
|
|
60
|
+
def verify_password(plain: str, hashed: str) -> bool:
|
|
61
|
+
return pwd_context.verify(plain, hashed)
|
|
62
|
+
|
|
63
|
+
def hash_password(plain: str) -> str:
|
|
64
|
+
return pwd_context.hash(plain)
|
|
65
|
+
|
|
66
|
+
def create_access_token(user_id: int, email: str) -> tuple[str, str]:
|
|
67
|
+
jti = secrets.token_hex(16)
|
|
68
|
+
now = datetime.now(timezone.utc)
|
|
69
|
+
payload = {
|
|
70
|
+
"sub": str(user_id),
|
|
71
|
+
"email": email,
|
|
72
|
+
"jti": jti,
|
|
73
|
+
"exp": now + timedelta(hours=JWT_EXPIRY_HOURS),
|
|
74
|
+
"iat": now,
|
|
75
|
+
}
|
|
76
|
+
token = jwt.encode(payload, SECRET, algorithm=JWT_ALGORITHM)
|
|
77
|
+
return token, jti
|
|
78
|
+
|
|
79
|
+
class TokenPayload(BaseModel):
|
|
80
|
+
user_id: int
|
|
81
|
+
email: str
|
|
82
|
+
jti: str
|
|
83
|
+
exp: datetime
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def decode_token(token: str) -> TokenPayload:
|
|
87
|
+
payload = jwt.decode(token, SECRET, algorithms=[JWT_ALGORITHM])
|
|
88
|
+
user_id = int(payload["sub"])
|
|
89
|
+
email = payload["email"]
|
|
90
|
+
jti = payload.get("jti", "")
|
|
91
|
+
exp = payload["exp"]
|
|
92
|
+
return TokenPayload(user_id=user_id, email=email, jti=jti, exp=exp)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
from pydantic import BaseModel, ConfigDict
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
# ─── FastAPI dependency ────────────────────────────────────────────────────
|
|
99
|
+
|
|
100
|
+
class CurrentUser(BaseModel):
|
|
101
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
102
|
+
|
|
103
|
+
user: User
|
|
104
|
+
jti: str
|
|
105
|
+
exp: Optional[datetime] = None
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
async def get_current_user(
|
|
109
|
+
credentials: HTTPAuthorizationCredentials = Depends(bearer_scheme),
|
|
110
|
+
db: Session = Depends(get_db),
|
|
111
|
+
) -> CurrentUser:
|
|
112
|
+
"""
|
|
113
|
+
Dependency for protected routes.
|
|
114
|
+
Usage: current: CurrentUser = Depends(get_current_user)
|
|
115
|
+
Now uses request-scoped 'db' session to ensure user is not detached.
|
|
116
|
+
"""
|
|
117
|
+
credentials_exception = HTTPException(
|
|
118
|
+
status_code=status.HTTP_401_UNAUTHORIZED,
|
|
119
|
+
detail="Invalid or expired token",
|
|
120
|
+
headers={"WWW-Authenticate": "Bearer"},
|
|
121
|
+
)
|
|
122
|
+
revoked_exception = HTTPException(
|
|
123
|
+
status_code=status.HTTP_401_UNAUTHORIZED,
|
|
124
|
+
detail="Token has been revoked",
|
|
125
|
+
headers={"WWW-Authenticate": "Bearer"},
|
|
126
|
+
)
|
|
127
|
+
try:
|
|
128
|
+
token_payload = decode_token(credentials.credentials)
|
|
129
|
+
except JWTError:
|
|
130
|
+
raise credentials_exception
|
|
131
|
+
|
|
132
|
+
if token_payload.jti:
|
|
133
|
+
if await is_token_revoked(token_payload.jti):
|
|
134
|
+
raise revoked_exception
|
|
135
|
+
|
|
136
|
+
user = db.query(User).filter(
|
|
137
|
+
User.id == token_payload.user_id,
|
|
138
|
+
User.is_active == True,
|
|
139
|
+
).first()
|
|
140
|
+
|
|
141
|
+
if not user:
|
|
142
|
+
raise credentials_exception
|
|
143
|
+
|
|
144
|
+
return CurrentUser(user=user, jti=token_payload.jti, exp=token_payload.exp)
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
async def require_password_not_reset_pending(
|
|
148
|
+
current_user: CurrentUser = Depends(get_current_user),
|
|
149
|
+
) -> CurrentUser:
|
|
150
|
+
"""Dependency for resource-creating endpoints — blocks access when a password reset is required."""
|
|
151
|
+
if getattr(current_user.user, "must_reset_password", False):
|
|
152
|
+
raise HTTPException(
|
|
153
|
+
status_code=403,
|
|
154
|
+
detail={
|
|
155
|
+
"error": "password_reset_required",
|
|
156
|
+
"message": (
|
|
157
|
+
"You must change your password before continuing. "
|
|
158
|
+
"Use POST /auth/reset-password"
|
|
159
|
+
),
|
|
160
|
+
"code": "PASSWORD_RESET_REQUIRED",
|
|
161
|
+
},
|
|
162
|
+
)
|
|
163
|
+
return current_user
|