voidaccess 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- analysis/__init__.py +49 -0
- analysis/opsec.py +454 -0
- analysis/patterns.py +202 -0
- analysis/temporal.py +201 -0
- api/__init__.py +1 -0
- api/auth.py +163 -0
- api/main.py +509 -0
- api/routes/__init__.py +1 -0
- api/routes/admin.py +214 -0
- api/routes/auth.py +157 -0
- api/routes/entities.py +871 -0
- api/routes/export.py +359 -0
- api/routes/investigations.py +2567 -0
- api/routes/monitors.py +405 -0
- api/routes/search.py +157 -0
- api/routes/settings.py +851 -0
- auth/__init__.py +1 -0
- auth/token_blacklist.py +108 -0
- cli/__init__.py +3 -0
- cli/adapters/__init__.py +1 -0
- cli/adapters/sqlite.py +273 -0
- cli/browser.py +376 -0
- cli/commands/__init__.py +1 -0
- cli/commands/configure.py +185 -0
- cli/commands/enrich.py +154 -0
- cli/commands/export.py +158 -0
- cli/commands/investigate.py +601 -0
- cli/commands/show.py +87 -0
- cli/config.py +180 -0
- cli/display.py +212 -0
- cli/main.py +154 -0
- cli/tor_detect.py +71 -0
- config.py +180 -0
- crawler/__init__.py +28 -0
- crawler/dedup.py +97 -0
- crawler/frontier.py +115 -0
- crawler/spider.py +462 -0
- crawler/utils.py +122 -0
- db/__init__.py +47 -0
- db/migrations/__init__.py +0 -0
- db/migrations/env.py +80 -0
- db/migrations/versions/0001_initial_schema.py +270 -0
- db/migrations/versions/0002_add_investigation_status_column.py +27 -0
- db/migrations/versions/0002_add_missing_tables.py +33 -0
- db/migrations/versions/0003_add_canonical_value_and_entity_links.py +61 -0
- db/migrations/versions/0004_add_page_posted_at.py +41 -0
- db/migrations/versions/0005_add_extraction_method.py +32 -0
- db/migrations/versions/0006_add_monitor_alerts.py +26 -0
- db/migrations/versions/0007_add_actor_style_profiles.py +23 -0
- db/migrations/versions/0008_add_users_table.py +47 -0
- db/migrations/versions/0009_add_investigation_id_to_relationships.py +29 -0
- db/migrations/versions/0010_add_composite_index_entity_relationships.py +22 -0
- db/migrations/versions/0011_add_page_extraction_cache.py +52 -0
- db/migrations/versions/0013_add_graph_status.py +31 -0
- db/migrations/versions/0015_add_progress_fields.py +41 -0
- db/migrations/versions/0016_backfill_graph_status.py +33 -0
- db/migrations/versions/0017_add_user_api_keys.py +44 -0
- db/migrations/versions/0018_add_user_id_to_investigations.py +33 -0
- db/migrations/versions/0019_add_content_safety_log.py +46 -0
- db/migrations/versions/0020_add_entity_source_tracking.py +50 -0
- db/models.py +618 -0
- db/queries.py +841 -0
- db/session.py +270 -0
- export/__init__.py +34 -0
- export/misp.py +257 -0
- export/sigma.py +342 -0
- export/stix.py +418 -0
- extractor/__init__.py +21 -0
- extractor/llm_extract.py +372 -0
- extractor/ner.py +512 -0
- extractor/normalizer.py +638 -0
- extractor/pipeline.py +401 -0
- extractor/regex_patterns.py +325 -0
- fingerprint/__init__.py +33 -0
- fingerprint/profiler.py +240 -0
- fingerprint/stylometry.py +249 -0
- graph/__init__.py +73 -0
- graph/builder.py +894 -0
- graph/export.py +225 -0
- graph/model.py +83 -0
- graph/queries.py +297 -0
- graph/visualize.py +178 -0
- i18n/__init__.py +24 -0
- i18n/detect.py +76 -0
- i18n/query_expand.py +72 -0
- i18n/translate.py +210 -0
- monitor/__init__.py +27 -0
- monitor/_db.py +74 -0
- monitor/alerts.py +345 -0
- monitor/config.py +118 -0
- monitor/diff.py +75 -0
- monitor/jobs.py +247 -0
- monitor/scheduler.py +184 -0
- scraper/__init__.py +0 -0
- scraper/scrape.py +857 -0
- scraper/scrape_js.py +272 -0
- search/__init__.py +318 -0
- search/circuit_breaker.py +240 -0
- search/search.py +334 -0
- sources/__init__.py +96 -0
- sources/blockchain.py +444 -0
- sources/cache.py +93 -0
- sources/cisa.py +108 -0
- sources/dns_enrichment.py +557 -0
- sources/domain_reputation.py +643 -0
- sources/email_reputation.py +635 -0
- sources/engines.py +244 -0
- sources/enrichment.py +1244 -0
- sources/github_scraper.py +589 -0
- sources/gitlab_scraper.py +624 -0
- sources/hash_reputation.py +856 -0
- sources/historical_intel.py +253 -0
- sources/ip_reputation.py +521 -0
- sources/paste_scraper.py +484 -0
- sources/pastes.py +278 -0
- sources/rss_scraper.py +576 -0
- sources/seed_manager.py +373 -0
- sources/seeds.py +368 -0
- sources/shodan.py +103 -0
- sources/telegram.py +199 -0
- sources/virustotal.py +113 -0
- utils/__init__.py +0 -0
- utils/async_utils.py +89 -0
- utils/content_safety.py +193 -0
- utils/defang.py +94 -0
- utils/encryption.py +34 -0
- utils/ioc_freshness.py +124 -0
- utils/user_keys.py +33 -0
- vector/__init__.py +39 -0
- vector/embedder.py +100 -0
- vector/model_singleton.py +49 -0
- vector/search.py +87 -0
- vector/store.py +514 -0
- voidaccess/__init__.py +0 -0
- voidaccess/llm.py +717 -0
- voidaccess/llm_utils.py +696 -0
- voidaccess-1.3.0.dist-info/METADATA +395 -0
- voidaccess-1.3.0.dist-info/RECORD +142 -0
- voidaccess-1.3.0.dist-info/WHEEL +5 -0
- voidaccess-1.3.0.dist-info/entry_points.txt +2 -0
- voidaccess-1.3.0.dist-info/licenses/LICENSE +21 -0
- voidaccess-1.3.0.dist-info/top_level.txt +19 -0
db/models.py
ADDED
|
@@ -0,0 +1,618 @@
|
|
|
1
|
+
"""
|
|
2
|
+
SQLAlchemy ORM models for VoidAccess's persistent storage layer.
|
|
3
|
+
|
|
4
|
+
Tables
|
|
5
|
+
------
|
|
6
|
+
investigations — one record per pipeline run
|
|
7
|
+
sources — canonical .onion domain registry (global, deduped by address)
|
|
8
|
+
investigation_sources — many-to-many: which sources appeared in which investigation
|
|
9
|
+
pages — individual scraped pages (URL-level, one per unique URL)
|
|
10
|
+
entities — structured intelligence artifacts extracted from pages
|
|
11
|
+
entity_relationships — directed edges between two entities
|
|
12
|
+
|
|
13
|
+
Design notes
|
|
14
|
+
------------
|
|
15
|
+
- Primary keys are UUID4, generated in Python so they're globally unique and safe
|
|
16
|
+
to produce offline before insertion.
|
|
17
|
+
- All enum columns use native_enum=False (stored as VARCHAR) for portability between
|
|
18
|
+
PostgreSQL (production) and SQLite (tests) and to avoid DDL-level ENUM management.
|
|
19
|
+
- DateTime columns are timezone-aware (UTC throughout).
|
|
20
|
+
- Soft cascade rules: deleting a Page cascades to its Entities and their Relationships.
|
|
21
|
+
Deleting an Investigation does NOT delete its Sources (they are global).
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
import enum
|
|
25
|
+
import uuid
|
|
26
|
+
from datetime import datetime, timezone
|
|
27
|
+
from typing import Any, List, Optional
|
|
28
|
+
|
|
29
|
+
import sqlalchemy as sa
|
|
30
|
+
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship
|
|
31
|
+
from sqlalchemy.schema import UniqueConstraint
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
# ---------------------------------------------------------------------------
|
|
35
|
+
# Enums (application-level validation; stored as VARCHAR in the DB)
|
|
36
|
+
# ---------------------------------------------------------------------------
|
|
37
|
+
|
|
38
|
+
class SourceStatus(str, enum.Enum):
|
|
39
|
+
ACTIVE = "active"
|
|
40
|
+
DOWN = "down"
|
|
41
|
+
UNKNOWN = "unknown"
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class SourceType(str, enum.Enum):
|
|
45
|
+
SEARCH_RESULT = "search_result"
|
|
46
|
+
CRAWLED = "crawled"
|
|
47
|
+
SEED = "seed"
|
|
48
|
+
TELEGRAM = "telegram"
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class EntityType(str, enum.Enum):
|
|
52
|
+
"""Entity types stored as VARCHAR in the DB."""
|
|
53
|
+
CRYPTO_WALLET = "crypto_wallet"
|
|
54
|
+
EMAIL = "email"
|
|
55
|
+
PGP_KEY = "pgp_key"
|
|
56
|
+
ONION_URL = "onion_url"
|
|
57
|
+
CVE = "cve"
|
|
58
|
+
IP_ADDRESS = "ip_address"
|
|
59
|
+
PHONE = "phone"
|
|
60
|
+
HANDLE = "handle"
|
|
61
|
+
MALWARE = "malware"
|
|
62
|
+
RANSOMWARE_GROUP = "ransomware_group"
|
|
63
|
+
DOMAIN = "domain"
|
|
64
|
+
OTHER = "other"
|
|
65
|
+
FILE_HASH_MD5 = "file_hash_md5"
|
|
66
|
+
FILE_HASH_SHA1 = "file_hash_sha1"
|
|
67
|
+
FILE_HASH_SHA256 = "file_hash_sha256"
|
|
68
|
+
MITRE_TECHNIQUE = "mitre_technique"
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class RelationshipType(str, enum.Enum):
|
|
72
|
+
"""Edge types for the entity graph (Phase 3 will query these)."""
|
|
73
|
+
CO_APPEARED_ON = "CO_APPEARED_ON"
|
|
74
|
+
POSTED_BY = "POSTED_BY"
|
|
75
|
+
LINKED_TO = "LINKED_TO"
|
|
76
|
+
PAID_TO = "PAID_TO"
|
|
77
|
+
MEMBER_OF = "MEMBER_OF"
|
|
78
|
+
USED = "USED"
|
|
79
|
+
CLAIMED = "CLAIMED"
|
|
80
|
+
LIKELY_SAME_ACTOR = "LIKELY_SAME_ACTOR"
|
|
81
|
+
CONFIRMED_SAME_ACTOR = "CONFIRMED_SAME_ACTOR"
|
|
82
|
+
FUNDED_BY = "FUNDED_BY"
|
|
83
|
+
POSSIBLE_SAME_AUTHOR = "POSSIBLE_SAME_AUTHOR"
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
# ---------------------------------------------------------------------------
|
|
87
|
+
# Declarative base
|
|
88
|
+
# ---------------------------------------------------------------------------
|
|
89
|
+
|
|
90
|
+
class Base(DeclarativeBase):
|
|
91
|
+
pass
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
# ---------------------------------------------------------------------------
|
|
95
|
+
# Junction table: Investigation <-> Source (many-to-many)
|
|
96
|
+
# ---------------------------------------------------------------------------
|
|
97
|
+
|
|
98
|
+
investigation_sources = sa.Table(
|
|
99
|
+
"investigation_sources",
|
|
100
|
+
Base.metadata,
|
|
101
|
+
sa.Column(
|
|
102
|
+
"investigation_id",
|
|
103
|
+
sa.UUID(as_uuid=True),
|
|
104
|
+
sa.ForeignKey("investigations.id", ondelete="CASCADE"),
|
|
105
|
+
primary_key=True,
|
|
106
|
+
),
|
|
107
|
+
sa.Column(
|
|
108
|
+
"source_id",
|
|
109
|
+
sa.UUID(as_uuid=True),
|
|
110
|
+
sa.ForeignKey("sources.id", ondelete="CASCADE"),
|
|
111
|
+
primary_key=True,
|
|
112
|
+
),
|
|
113
|
+
sa.Column(
|
|
114
|
+
"added_at",
|
|
115
|
+
sa.DateTime(timezone=True),
|
|
116
|
+
nullable=False,
|
|
117
|
+
default=lambda: datetime.now(timezone.utc),
|
|
118
|
+
),
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
# ---------------------------------------------------------------------------
|
|
123
|
+
# Models
|
|
124
|
+
# ---------------------------------------------------------------------------
|
|
125
|
+
|
|
126
|
+
class Investigation(Base):
|
|
127
|
+
"""
|
|
128
|
+
One row per pipeline run. Stores the query, parameters, and final summary.
|
|
129
|
+
"""
|
|
130
|
+
__tablename__ = "investigations"
|
|
131
|
+
|
|
132
|
+
id: Mapped[uuid.UUID] = mapped_column(
|
|
133
|
+
sa.UUID(as_uuid=True), primary_key=True, default=uuid.uuid4
|
|
134
|
+
)
|
|
135
|
+
run_id: Mapped[uuid.UUID] = mapped_column(
|
|
136
|
+
sa.UUID(as_uuid=True), unique=True, nullable=False, default=uuid.uuid4,
|
|
137
|
+
index=True,
|
|
138
|
+
)
|
|
139
|
+
query: Mapped[str] = mapped_column(sa.Text, nullable=False)
|
|
140
|
+
refined_query: Mapped[Optional[str]] = mapped_column(sa.Text, nullable=True)
|
|
141
|
+
model_used: Mapped[Optional[str]] = mapped_column(sa.String(100), nullable=True)
|
|
142
|
+
preset: Mapped[Optional[str]] = mapped_column(sa.String(50), nullable=True)
|
|
143
|
+
summary: Mapped[Optional[str]] = mapped_column(sa.Text, nullable=True)
|
|
144
|
+
status: Mapped[str] = mapped_column(
|
|
145
|
+
sa.String(20), nullable=False, default="pending", server_default="pending"
|
|
146
|
+
)
|
|
147
|
+
created_at: Mapped[datetime] = mapped_column(
|
|
148
|
+
sa.DateTime(timezone=True),
|
|
149
|
+
nullable=False,
|
|
150
|
+
default=lambda: datetime.now(timezone.utc),
|
|
151
|
+
)
|
|
152
|
+
is_seed: Mapped[bool] = mapped_column(
|
|
153
|
+
sa.Boolean, nullable=False, default=False, server_default="false"
|
|
154
|
+
)
|
|
155
|
+
graph_status: Mapped[str] = mapped_column(
|
|
156
|
+
sa.String(20), nullable=False, default="pending", server_default="pending"
|
|
157
|
+
)
|
|
158
|
+
current_step: Mapped[int] = mapped_column(
|
|
159
|
+
sa.Integer, nullable=False, default=0, server_default="0"
|
|
160
|
+
)
|
|
161
|
+
current_step_label: Mapped[str] = mapped_column(
|
|
162
|
+
sa.String(200), nullable=False, default="", server_default=""
|
|
163
|
+
)
|
|
164
|
+
entity_count: Mapped[int] = mapped_column(
|
|
165
|
+
sa.Integer, nullable=False, default=0, server_default="0"
|
|
166
|
+
)
|
|
167
|
+
page_count: Mapped[int] = mapped_column(
|
|
168
|
+
sa.Integer, nullable=False, default=0, server_default="0"
|
|
169
|
+
)
|
|
170
|
+
user_id: Mapped[Optional[int]] = mapped_column(
|
|
171
|
+
sa.Integer,
|
|
172
|
+
sa.ForeignKey("users.id", ondelete="SET NULL"),
|
|
173
|
+
nullable=True,
|
|
174
|
+
index=True,
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
sources: Mapped[List["Source"]] = relationship(
|
|
178
|
+
"Source",
|
|
179
|
+
secondary=investigation_sources,
|
|
180
|
+
back_populates="investigations",
|
|
181
|
+
lazy="select",
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
class MonitorAlertSeverity(str, enum.Enum):
|
|
187
|
+
"""Stored as VARCHAR in ``monitor_alerts.severity``."""
|
|
188
|
+
|
|
189
|
+
info = "info"
|
|
190
|
+
warning = "warning"
|
|
191
|
+
critical = "critical"
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
class MonitorAlert(Base):
|
|
195
|
+
"""
|
|
196
|
+
Persisted record of every alert fired by the monitoring system.
|
|
197
|
+
Created whenever a monitor detects a change significant enough to alert.
|
|
198
|
+
"""
|
|
199
|
+
|
|
200
|
+
__tablename__ = "monitor_alerts"
|
|
201
|
+
|
|
202
|
+
id: Mapped[int] = mapped_column(sa.Integer, primary_key=True, autoincrement=True)
|
|
203
|
+
monitor_name: Mapped[str] = mapped_column(sa.String, nullable=False, index=True)
|
|
204
|
+
triggered_at: Mapped[datetime] = mapped_column(
|
|
205
|
+
sa.DateTime(timezone=True),
|
|
206
|
+
nullable=False,
|
|
207
|
+
default=lambda: datetime.now(timezone.utc),
|
|
208
|
+
index=True,
|
|
209
|
+
)
|
|
210
|
+
change_type: Mapped[str] = mapped_column(sa.String(50), nullable=False)
|
|
211
|
+
summary: Mapped[str] = mapped_column(sa.Text, nullable=False, default="")
|
|
212
|
+
diff_data: Mapped[Optional[dict[str, Any]]] = mapped_column(sa.JSON, nullable=True)
|
|
213
|
+
severity: Mapped[str] = mapped_column(
|
|
214
|
+
sa.String(20),
|
|
215
|
+
nullable=False,
|
|
216
|
+
default=MonitorAlertSeverity.info.value,
|
|
217
|
+
)
|
|
218
|
+
entity_count_delta: Mapped[int] = mapped_column(
|
|
219
|
+
sa.Integer, nullable=False, default=0
|
|
220
|
+
)
|
|
221
|
+
delivered: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, default=False)
|
|
222
|
+
delivery_channels: Mapped[Optional[List[Any]]] = mapped_column(sa.JSON, nullable=True)
|
|
223
|
+
acknowledged: Mapped[bool] = mapped_column(
|
|
224
|
+
sa.Boolean, nullable=False, default=False
|
|
225
|
+
)
|
|
226
|
+
acknowledged_at: Mapped[Optional[datetime]] = mapped_column(
|
|
227
|
+
sa.DateTime(timezone=True), nullable=True
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
__table_args__ = (
|
|
231
|
+
sa.Index("ix_monitor_alerts_monitor_triggered", "monitor_name", "triggered_at"),
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
class InvestigationEntityLink(Base):
|
|
236
|
+
"""
|
|
237
|
+
Links an entity to additional investigations beyond its origin.
|
|
238
|
+
Enables cross-investigation deduplication without moving entity ownership.
|
|
239
|
+
"""
|
|
240
|
+
__tablename__ = "investigation_entity_links"
|
|
241
|
+
|
|
242
|
+
id: Mapped[uuid.UUID] = mapped_column(
|
|
243
|
+
sa.UUID(as_uuid=True), primary_key=True, default=uuid.uuid4
|
|
244
|
+
)
|
|
245
|
+
entity_id: Mapped[uuid.UUID] = mapped_column(
|
|
246
|
+
sa.UUID(as_uuid=True), sa.ForeignKey("entities.id", ondelete="CASCADE"), nullable=False
|
|
247
|
+
)
|
|
248
|
+
investigation_id: Mapped[uuid.UUID] = mapped_column(
|
|
249
|
+
sa.UUID(as_uuid=True), sa.ForeignKey("investigations.id", ondelete="CASCADE"), nullable=False
|
|
250
|
+
)
|
|
251
|
+
linked_at: Mapped[datetime] = mapped_column(
|
|
252
|
+
sa.DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
__table_args__ = (
|
|
256
|
+
sa.UniqueConstraint("entity_id", "investigation_id"),
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
class ActorStyleProfile(Base):
|
|
261
|
+
"""
|
|
262
|
+
Stores aggregated writing style fingerprints for unique actors.
|
|
263
|
+
Updated incrementally as new text samples are discovered.
|
|
264
|
+
"""
|
|
265
|
+
__tablename__ = "actor_style_profiles"
|
|
266
|
+
|
|
267
|
+
id: Mapped[int] = mapped_column(sa.Integer, primary_key=True, autoincrement=True)
|
|
268
|
+
canonical_value: Mapped[str] = mapped_column(sa.String, nullable=False, index=True)
|
|
269
|
+
entity_type: Mapped[str] = mapped_column(sa.String, nullable=False)
|
|
270
|
+
style_vector: Mapped[dict[str, Any]] = mapped_column(sa.JSON, nullable=False)
|
|
271
|
+
sample_count: Mapped[int] = mapped_column(sa.Integer, default=0, server_default="0")
|
|
272
|
+
total_chars: Mapped[int] = mapped_column(sa.Integer, default=0, server_default="0")
|
|
273
|
+
last_updated: Mapped[datetime] = mapped_column(
|
|
274
|
+
sa.DateTime(timezone=True),
|
|
275
|
+
nullable=False,
|
|
276
|
+
default=lambda: datetime.now(timezone.utc),
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
__table_args__ = (
|
|
280
|
+
UniqueConstraint("canonical_value", "entity_type"),
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
class User(Base):
|
|
284
|
+
"""
|
|
285
|
+
VoidAccess system user. Handles authentication and access control.
|
|
286
|
+
"""
|
|
287
|
+
__tablename__ = "users"
|
|
288
|
+
|
|
289
|
+
id: Mapped[int] = mapped_column(sa.Integer, primary_key=True, autoincrement=True)
|
|
290
|
+
email: Mapped[str] = mapped_column(sa.String(255), nullable=False, unique=True, index=True)
|
|
291
|
+
hashed_password: Mapped[str] = mapped_column(sa.String, nullable=False)
|
|
292
|
+
is_active: Mapped[bool] = mapped_column(sa.Boolean, default=True, nullable=False)
|
|
293
|
+
|
|
294
|
+
# Forces password reset on next login
|
|
295
|
+
# Set to True for the default admin account
|
|
296
|
+
must_reset_password: Mapped[bool] = mapped_column(sa.Boolean, default=False, nullable=False)
|
|
297
|
+
|
|
298
|
+
created_at: Mapped[datetime] = mapped_column(
|
|
299
|
+
sa.DateTime(timezone=True),
|
|
300
|
+
nullable=False,
|
|
301
|
+
default=lambda: datetime.now(timezone.utc),
|
|
302
|
+
)
|
|
303
|
+
last_login_at: Mapped[Optional[datetime]] = mapped_column(sa.DateTime(timezone=True), nullable=True)
|
|
304
|
+
|
|
305
|
+
def __repr__(self) -> str:
|
|
306
|
+
return f"<User {self.email!r}>"
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
class UserApiKey(Base):
|
|
310
|
+
"""
|
|
311
|
+
Per-user encrypted API key storage.
|
|
312
|
+
Keys are encrypted at rest using Fernet (AES-128) with a key derived from JWT_SECRET.
|
|
313
|
+
"""
|
|
314
|
+
__tablename__ = "user_api_keys"
|
|
315
|
+
|
|
316
|
+
id: Mapped[int] = mapped_column(sa.Integer, primary_key=True, autoincrement=True)
|
|
317
|
+
user_id: Mapped[int] = mapped_column(
|
|
318
|
+
sa.Integer,
|
|
319
|
+
sa.ForeignKey("users.id", ondelete="CASCADE"),
|
|
320
|
+
nullable=False,
|
|
321
|
+
)
|
|
322
|
+
key_name: Mapped[str] = mapped_column(sa.String(64), nullable=False)
|
|
323
|
+
encrypted_value: Mapped[str] = mapped_column(sa.Text, nullable=False)
|
|
324
|
+
created_at: Mapped[datetime] = mapped_column(
|
|
325
|
+
sa.DateTime(timezone=True),
|
|
326
|
+
server_default=sa.func.now(),
|
|
327
|
+
)
|
|
328
|
+
updated_at: Mapped[datetime] = mapped_column(
|
|
329
|
+
sa.DateTime(timezone=True),
|
|
330
|
+
server_default=sa.func.now(),
|
|
331
|
+
onupdate=sa.func.now(),
|
|
332
|
+
)
|
|
333
|
+
|
|
334
|
+
__table_args__ = (
|
|
335
|
+
sa.UniqueConstraint("user_id", "key_name"),
|
|
336
|
+
)
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
class ContentSafetyEvent(Base):
|
|
340
|
+
"""
|
|
341
|
+
Audit log for content safety block events.
|
|
342
|
+
Never stores actual prohibited content — only event metadata and a hash
|
|
343
|
+
prefix for correlation.
|
|
344
|
+
"""
|
|
345
|
+
__tablename__ = "content_safety_events"
|
|
346
|
+
|
|
347
|
+
id: Mapped[int] = mapped_column(sa.Integer, primary_key=True, autoincrement=True)
|
|
348
|
+
event_type: Mapped[str] = mapped_column(
|
|
349
|
+
sa.String(50), nullable=False
|
|
350
|
+
) # "query_blocked", "url_blocked", "content_blocked"
|
|
351
|
+
user_id: Mapped[Optional[int]] = mapped_column(sa.Integer, nullable=True)
|
|
352
|
+
# Hash prefix for correlation — never the actual content
|
|
353
|
+
content_hash: Mapped[Optional[str]] = mapped_column(sa.String(64), nullable=True)
|
|
354
|
+
timestamp: Mapped[datetime] = mapped_column(
|
|
355
|
+
sa.DateTime(timezone=True),
|
|
356
|
+
server_default=sa.func.now(),
|
|
357
|
+
)
|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
class Entity(Base):
|
|
361
|
+
"""
|
|
362
|
+
Structured intelligence artifacts extracted from pages.
|
|
363
|
+
"""
|
|
364
|
+
__tablename__ = "entities"
|
|
365
|
+
|
|
366
|
+
id: Mapped[uuid.UUID] = mapped_column(
|
|
367
|
+
sa.UUID(as_uuid=True), primary_key=True, default=uuid.uuid4
|
|
368
|
+
)
|
|
369
|
+
page_id: Mapped[uuid.UUID] = mapped_column(
|
|
370
|
+
sa.UUID(as_uuid=True),
|
|
371
|
+
sa.ForeignKey("pages.id", ondelete="CASCADE"),
|
|
372
|
+
nullable=False,
|
|
373
|
+
)
|
|
374
|
+
investigation_id: Mapped[Optional[uuid.UUID]] = mapped_column(
|
|
375
|
+
sa.UUID(as_uuid=True),
|
|
376
|
+
sa.ForeignKey("investigations.id", ondelete="SET NULL"),
|
|
377
|
+
nullable=True,
|
|
378
|
+
)
|
|
379
|
+
entity_type: Mapped[str] = mapped_column(sa.String(50), nullable=False)
|
|
380
|
+
value: Mapped[str] = mapped_column(sa.Text, nullable=False)
|
|
381
|
+
confidence: Mapped[float] = mapped_column(
|
|
382
|
+
sa.Float(), nullable=False, server_default="1.0"
|
|
383
|
+
)
|
|
384
|
+
# DB column is context_snippet; `context` is a backward-compat Python alias
|
|
385
|
+
context_snippet: Mapped[Optional[str]] = mapped_column(sa.Text, nullable=True)
|
|
386
|
+
canonical_value: Mapped[Optional[str]] = mapped_column(
|
|
387
|
+
sa.String, nullable=True, index=True
|
|
388
|
+
)
|
|
389
|
+
historical_context: Mapped[Optional[str]] = mapped_column(sa.Text, nullable=True)
|
|
390
|
+
extraction_method: Mapped[Optional[str]] = mapped_column(
|
|
391
|
+
sa.String(10), nullable=True
|
|
392
|
+
)
|
|
393
|
+
first_seen: Mapped[datetime] = mapped_column(
|
|
394
|
+
sa.DateTime(timezone=True),
|
|
395
|
+
nullable=False,
|
|
396
|
+
default=lambda: datetime.now(timezone.utc),
|
|
397
|
+
)
|
|
398
|
+
last_seen: Mapped[datetime] = mapped_column(
|
|
399
|
+
sa.DateTime(timezone=True),
|
|
400
|
+
nullable=False,
|
|
401
|
+
default=lambda: datetime.now(timezone.utc),
|
|
402
|
+
)
|
|
403
|
+
created_at: Mapped[datetime] = mapped_column(
|
|
404
|
+
sa.DateTime(timezone=True),
|
|
405
|
+
nullable=False,
|
|
406
|
+
default=lambda: datetime.now(timezone.utc),
|
|
407
|
+
)
|
|
408
|
+
source_count: Mapped[int] = mapped_column(
|
|
409
|
+
sa.Integer, server_default="1", default=1
|
|
410
|
+
)
|
|
411
|
+
corroborating_sources: Mapped[Optional[str]] = mapped_column(
|
|
412
|
+
sa.Text, nullable=True
|
|
413
|
+
)
|
|
414
|
+
first_seen_at: Mapped[Optional[datetime]] = mapped_column(
|
|
415
|
+
sa.DateTime(timezone=True),
|
|
416
|
+
server_default=sa.func.now(),
|
|
417
|
+
nullable=True,
|
|
418
|
+
)
|
|
419
|
+
last_seen_at: Mapped[Optional[datetime]] = mapped_column(
|
|
420
|
+
sa.DateTime(timezone=True),
|
|
421
|
+
server_default=sa.func.now(),
|
|
422
|
+
nullable=True,
|
|
423
|
+
)
|
|
424
|
+
|
|
425
|
+
@property
|
|
426
|
+
def context(self) -> Optional[str]:
|
|
427
|
+
"""Backward-compat alias for context_snippet (AGENTS.md: do not remove)."""
|
|
428
|
+
return self.context_snippet
|
|
429
|
+
|
|
430
|
+
@context.setter
|
|
431
|
+
def context(self, value: Optional[str]) -> None:
|
|
432
|
+
self.context_snippet = value
|
|
433
|
+
|
|
434
|
+
__table_args__ = (
|
|
435
|
+
sa.Index("ix_entities_page_id", "page_id"),
|
|
436
|
+
sa.Index("ix_entities_investigation_id", "investigation_id"),
|
|
437
|
+
sa.Index("ix_entities_entity_type", "entity_type"),
|
|
438
|
+
sa.Index("ix_entity_canonical", "entity_type", "canonical_value"),
|
|
439
|
+
)
|
|
440
|
+
|
|
441
|
+
page: Mapped["Page"] = relationship("Page", back_populates="entities")
|
|
442
|
+
relationships_as_entity_a: Mapped[List["EntityRelationship"]] = relationship(
|
|
443
|
+
"EntityRelationship",
|
|
444
|
+
foreign_keys="EntityRelationship.entity_a_id",
|
|
445
|
+
back_populates="entity_a",
|
|
446
|
+
)
|
|
447
|
+
relationships_as_entity_b: Mapped[List["EntityRelationship"]] = relationship(
|
|
448
|
+
"EntityRelationship",
|
|
449
|
+
foreign_keys="EntityRelationship.entity_b_id",
|
|
450
|
+
back_populates="entity_b",
|
|
451
|
+
)
|
|
452
|
+
|
|
453
|
+
|
|
454
|
+
|
|
455
|
+
class Page(Base):
|
|
456
|
+
"""
|
|
457
|
+
Individual scraped page from a source.
|
|
458
|
+
"""
|
|
459
|
+
__tablename__ = "pages"
|
|
460
|
+
|
|
461
|
+
id: Mapped[uuid.UUID] = mapped_column(
|
|
462
|
+
sa.UUID(as_uuid=True), primary_key=True, default=uuid.uuid4
|
|
463
|
+
)
|
|
464
|
+
source_id: Mapped[Optional[uuid.UUID]] = mapped_column(
|
|
465
|
+
sa.UUID(as_uuid=True),
|
|
466
|
+
sa.ForeignKey("sources.id", ondelete="SET NULL"),
|
|
467
|
+
nullable=True,
|
|
468
|
+
)
|
|
469
|
+
url: Mapped[str] = mapped_column(sa.Text, nullable=False)
|
|
470
|
+
raw_content_hash: Mapped[Optional[str]] = mapped_column(sa.String(64), nullable=True)
|
|
471
|
+
cleaned_text: Mapped[Optional[str]] = mapped_column(sa.Text, nullable=True)
|
|
472
|
+
scrape_timestamp: Mapped[datetime] = mapped_column(
|
|
473
|
+
sa.DateTime(timezone=True),
|
|
474
|
+
nullable=False,
|
|
475
|
+
default=lambda: datetime.now(timezone.utc),
|
|
476
|
+
)
|
|
477
|
+
language: Mapped[Optional[str]] = mapped_column(sa.String(10), nullable=True)
|
|
478
|
+
byte_size: Mapped[Optional[int]] = mapped_column(sa.Integer, nullable=True)
|
|
479
|
+
posted_at: Mapped[Optional[datetime]] = mapped_column(
|
|
480
|
+
sa.DateTime(timezone=True), nullable=True
|
|
481
|
+
)
|
|
482
|
+
created_at: Mapped[datetime] = mapped_column(
|
|
483
|
+
sa.DateTime(timezone=True),
|
|
484
|
+
nullable=False,
|
|
485
|
+
default=lambda: datetime.now(timezone.utc),
|
|
486
|
+
)
|
|
487
|
+
|
|
488
|
+
__table_args__ = (
|
|
489
|
+
sa.UniqueConstraint("url"),
|
|
490
|
+
sa.Index("ix_pages_source_id", "source_id"),
|
|
491
|
+
sa.Index("ix_pages_raw_content_hash", "raw_content_hash"),
|
|
492
|
+
sa.Index("ix_pages_posted_at", "posted_at"),
|
|
493
|
+
)
|
|
494
|
+
|
|
495
|
+
source: Mapped[Optional["Source"]] = relationship(
|
|
496
|
+
"Source", back_populates="pages"
|
|
497
|
+
)
|
|
498
|
+
entities: Mapped[List["Entity"]] = relationship(
|
|
499
|
+
"Entity", back_populates="page", cascade="all, delete-orphan"
|
|
500
|
+
)
|
|
501
|
+
relationships_as_source: Mapped[List["EntityRelationship"]] = relationship(
|
|
502
|
+
"EntityRelationship",
|
|
503
|
+
foreign_keys="EntityRelationship.source_page_id",
|
|
504
|
+
back_populates="source_page",
|
|
505
|
+
)
|
|
506
|
+
|
|
507
|
+
|
|
508
|
+
class Source(Base):
|
|
509
|
+
"""
|
|
510
|
+
Canonical .onion domain registry.
|
|
511
|
+
"""
|
|
512
|
+
__tablename__ = "sources"
|
|
513
|
+
|
|
514
|
+
id: Mapped[uuid.UUID] = mapped_column(
|
|
515
|
+
sa.UUID(as_uuid=True), primary_key=True, default=uuid.uuid4
|
|
516
|
+
)
|
|
517
|
+
onion_address: Mapped[str] = mapped_column(sa.String(255), nullable=False, unique=True)
|
|
518
|
+
first_seen: Mapped[datetime] = mapped_column(
|
|
519
|
+
sa.DateTime(timezone=True),
|
|
520
|
+
nullable=False,
|
|
521
|
+
default=lambda: datetime.now(timezone.utc),
|
|
522
|
+
)
|
|
523
|
+
last_seen: Mapped[datetime] = mapped_column(
|
|
524
|
+
sa.DateTime(timezone=True),
|
|
525
|
+
nullable=False,
|
|
526
|
+
default=lambda: datetime.now(timezone.utc),
|
|
527
|
+
)
|
|
528
|
+
status: Mapped[str] = mapped_column(
|
|
529
|
+
sa.String(20), nullable=False, default="unknown", server_default="unknown"
|
|
530
|
+
)
|
|
531
|
+
source_type: Mapped[str] = mapped_column(
|
|
532
|
+
sa.String(30),
|
|
533
|
+
nullable=False,
|
|
534
|
+
default="search_result",
|
|
535
|
+
server_default="search_result",
|
|
536
|
+
)
|
|
537
|
+
|
|
538
|
+
__table_args__ = (
|
|
539
|
+
sa.Index("ix_sources_onion_address", "onion_address"),
|
|
540
|
+
)
|
|
541
|
+
|
|
542
|
+
pages: Mapped[List["Page"]] = relationship(
|
|
543
|
+
"Page", back_populates="source", cascade="all, delete-orphan"
|
|
544
|
+
)
|
|
545
|
+
investigations: Mapped[List["Investigation"]] = relationship(
|
|
546
|
+
"Investigation",
|
|
547
|
+
secondary=investigation_sources,
|
|
548
|
+
back_populates="sources",
|
|
549
|
+
lazy="select",
|
|
550
|
+
)
|
|
551
|
+
|
|
552
|
+
|
|
553
|
+
class EntityRelationship(Base):
|
|
554
|
+
"""
|
|
555
|
+
Directed edge between two entities.
|
|
556
|
+
"""
|
|
557
|
+
__tablename__ = "entity_relationships"
|
|
558
|
+
|
|
559
|
+
id: Mapped[uuid.UUID] = mapped_column(
|
|
560
|
+
sa.UUID(as_uuid=True), primary_key=True, default=uuid.uuid4
|
|
561
|
+
)
|
|
562
|
+
entity_a_id: Mapped[uuid.UUID] = mapped_column(
|
|
563
|
+
sa.UUID(as_uuid=True),
|
|
564
|
+
sa.ForeignKey("entities.id", ondelete="CASCADE"),
|
|
565
|
+
nullable=False,
|
|
566
|
+
)
|
|
567
|
+
entity_b_id: Mapped[uuid.UUID] = mapped_column(
|
|
568
|
+
sa.UUID(as_uuid=True),
|
|
569
|
+
sa.ForeignKey("entities.id", ondelete="CASCADE"),
|
|
570
|
+
nullable=False,
|
|
571
|
+
)
|
|
572
|
+
relationship_type: Mapped[str] = mapped_column(sa.String(50), nullable=False)
|
|
573
|
+
source_page_id: Mapped[Optional[uuid.UUID]] = mapped_column(
|
|
574
|
+
sa.UUID(as_uuid=True),
|
|
575
|
+
sa.ForeignKey("pages.id", ondelete="SET NULL"),
|
|
576
|
+
nullable=True,
|
|
577
|
+
)
|
|
578
|
+
confidence: Mapped[float] = mapped_column(
|
|
579
|
+
sa.Float(), nullable=False, server_default="1.0"
|
|
580
|
+
)
|
|
581
|
+
first_seen: Mapped[datetime] = mapped_column(
|
|
582
|
+
sa.DateTime(timezone=True),
|
|
583
|
+
nullable=False,
|
|
584
|
+
default=lambda: datetime.now(timezone.utc),
|
|
585
|
+
)
|
|
586
|
+
investigation_id: Mapped[Optional[uuid.UUID]] = mapped_column(
|
|
587
|
+
sa.UUID(as_uuid=True),
|
|
588
|
+
sa.ForeignKey("investigations.id", ondelete="SET NULL"),
|
|
589
|
+
nullable=True,
|
|
590
|
+
)
|
|
591
|
+
|
|
592
|
+
__table_args__ = (
|
|
593
|
+
sa.Index(
|
|
594
|
+
"ix_entity_relationships_lookup",
|
|
595
|
+
"entity_a_id",
|
|
596
|
+
"entity_b_id",
|
|
597
|
+
"relationship_type",
|
|
598
|
+
),
|
|
599
|
+
sa.Index("ix_entity_relationships_investigation_id", "investigation_id"),
|
|
600
|
+
sa.Index("ix_entity_relationships_source_target", "entity_a_id", "entity_b_id"),
|
|
601
|
+
)
|
|
602
|
+
|
|
603
|
+
entity_a: Mapped["Entity"] = relationship(
|
|
604
|
+
"Entity",
|
|
605
|
+
foreign_keys=[entity_a_id],
|
|
606
|
+
back_populates="relationships_as_entity_a",
|
|
607
|
+
)
|
|
608
|
+
entity_b: Mapped["Entity"] = relationship(
|
|
609
|
+
"Entity",
|
|
610
|
+
foreign_keys=[entity_b_id],
|
|
611
|
+
back_populates="relationships_as_entity_b",
|
|
612
|
+
)
|
|
613
|
+
source_page: Mapped[Optional["Page"]] = relationship(
|
|
614
|
+
"Page",
|
|
615
|
+
foreign_keys=[source_page_id],
|
|
616
|
+
back_populates="relationships_as_source",
|
|
617
|
+
)
|
|
618
|
+
|