voidaccess 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- analysis/__init__.py +49 -0
- analysis/opsec.py +454 -0
- analysis/patterns.py +202 -0
- analysis/temporal.py +201 -0
- api/__init__.py +1 -0
- api/auth.py +163 -0
- api/main.py +509 -0
- api/routes/__init__.py +1 -0
- api/routes/admin.py +214 -0
- api/routes/auth.py +157 -0
- api/routes/entities.py +871 -0
- api/routes/export.py +359 -0
- api/routes/investigations.py +2567 -0
- api/routes/monitors.py +405 -0
- api/routes/search.py +157 -0
- api/routes/settings.py +851 -0
- auth/__init__.py +1 -0
- auth/token_blacklist.py +108 -0
- cli/__init__.py +3 -0
- cli/adapters/__init__.py +1 -0
- cli/adapters/sqlite.py +273 -0
- cli/browser.py +376 -0
- cli/commands/__init__.py +1 -0
- cli/commands/configure.py +185 -0
- cli/commands/enrich.py +154 -0
- cli/commands/export.py +158 -0
- cli/commands/investigate.py +601 -0
- cli/commands/show.py +87 -0
- cli/config.py +180 -0
- cli/display.py +212 -0
- cli/main.py +154 -0
- cli/tor_detect.py +71 -0
- config.py +180 -0
- crawler/__init__.py +28 -0
- crawler/dedup.py +97 -0
- crawler/frontier.py +115 -0
- crawler/spider.py +462 -0
- crawler/utils.py +122 -0
- db/__init__.py +47 -0
- db/migrations/__init__.py +0 -0
- db/migrations/env.py +80 -0
- db/migrations/versions/0001_initial_schema.py +270 -0
- db/migrations/versions/0002_add_investigation_status_column.py +27 -0
- db/migrations/versions/0002_add_missing_tables.py +33 -0
- db/migrations/versions/0003_add_canonical_value_and_entity_links.py +61 -0
- db/migrations/versions/0004_add_page_posted_at.py +41 -0
- db/migrations/versions/0005_add_extraction_method.py +32 -0
- db/migrations/versions/0006_add_monitor_alerts.py +26 -0
- db/migrations/versions/0007_add_actor_style_profiles.py +23 -0
- db/migrations/versions/0008_add_users_table.py +47 -0
- db/migrations/versions/0009_add_investigation_id_to_relationships.py +29 -0
- db/migrations/versions/0010_add_composite_index_entity_relationships.py +22 -0
- db/migrations/versions/0011_add_page_extraction_cache.py +52 -0
- db/migrations/versions/0013_add_graph_status.py +31 -0
- db/migrations/versions/0015_add_progress_fields.py +41 -0
- db/migrations/versions/0016_backfill_graph_status.py +33 -0
- db/migrations/versions/0017_add_user_api_keys.py +44 -0
- db/migrations/versions/0018_add_user_id_to_investigations.py +33 -0
- db/migrations/versions/0019_add_content_safety_log.py +46 -0
- db/migrations/versions/0020_add_entity_source_tracking.py +50 -0
- db/models.py +618 -0
- db/queries.py +841 -0
- db/session.py +270 -0
- export/__init__.py +34 -0
- export/misp.py +257 -0
- export/sigma.py +342 -0
- export/stix.py +418 -0
- extractor/__init__.py +21 -0
- extractor/llm_extract.py +372 -0
- extractor/ner.py +512 -0
- extractor/normalizer.py +638 -0
- extractor/pipeline.py +401 -0
- extractor/regex_patterns.py +325 -0
- fingerprint/__init__.py +33 -0
- fingerprint/profiler.py +240 -0
- fingerprint/stylometry.py +249 -0
- graph/__init__.py +73 -0
- graph/builder.py +894 -0
- graph/export.py +225 -0
- graph/model.py +83 -0
- graph/queries.py +297 -0
- graph/visualize.py +178 -0
- i18n/__init__.py +24 -0
- i18n/detect.py +76 -0
- i18n/query_expand.py +72 -0
- i18n/translate.py +210 -0
- monitor/__init__.py +27 -0
- monitor/_db.py +74 -0
- monitor/alerts.py +345 -0
- monitor/config.py +118 -0
- monitor/diff.py +75 -0
- monitor/jobs.py +247 -0
- monitor/scheduler.py +184 -0
- scraper/__init__.py +0 -0
- scraper/scrape.py +857 -0
- scraper/scrape_js.py +272 -0
- search/__init__.py +318 -0
- search/circuit_breaker.py +240 -0
- search/search.py +334 -0
- sources/__init__.py +96 -0
- sources/blockchain.py +444 -0
- sources/cache.py +93 -0
- sources/cisa.py +108 -0
- sources/dns_enrichment.py +557 -0
- sources/domain_reputation.py +643 -0
- sources/email_reputation.py +635 -0
- sources/engines.py +244 -0
- sources/enrichment.py +1244 -0
- sources/github_scraper.py +589 -0
- sources/gitlab_scraper.py +624 -0
- sources/hash_reputation.py +856 -0
- sources/historical_intel.py +253 -0
- sources/ip_reputation.py +521 -0
- sources/paste_scraper.py +484 -0
- sources/pastes.py +278 -0
- sources/rss_scraper.py +576 -0
- sources/seed_manager.py +373 -0
- sources/seeds.py +368 -0
- sources/shodan.py +103 -0
- sources/telegram.py +199 -0
- sources/virustotal.py +113 -0
- utils/__init__.py +0 -0
- utils/async_utils.py +89 -0
- utils/content_safety.py +193 -0
- utils/defang.py +94 -0
- utils/encryption.py +34 -0
- utils/ioc_freshness.py +124 -0
- utils/user_keys.py +33 -0
- vector/__init__.py +39 -0
- vector/embedder.py +100 -0
- vector/model_singleton.py +49 -0
- vector/search.py +87 -0
- vector/store.py +514 -0
- voidaccess/__init__.py +0 -0
- voidaccess/llm.py +717 -0
- voidaccess/llm_utils.py +696 -0
- voidaccess-1.3.0.dist-info/METADATA +395 -0
- voidaccess-1.3.0.dist-info/RECORD +142 -0
- voidaccess-1.3.0.dist-info/WHEEL +5 -0
- voidaccess-1.3.0.dist-info/entry_points.txt +2 -0
- voidaccess-1.3.0.dist-info/licenses/LICENSE +21 -0
- voidaccess-1.3.0.dist-info/top_level.txt +19 -0
db/migrations/env.py
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Alembic migration environment.
|
|
3
|
+
|
|
4
|
+
DATABASE_URL is read from the environment (via config.py) so credentials
|
|
5
|
+
are never stored in version control. The models' Base.metadata is imported
|
|
6
|
+
here so `alembic revision --autogenerate` can diff the ORM against the DB.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import os
|
|
10
|
+
import sys
|
|
11
|
+
from logging.config import fileConfig
|
|
12
|
+
|
|
13
|
+
from alembic import context
|
|
14
|
+
from sqlalchemy import engine_from_config, pool
|
|
15
|
+
|
|
16
|
+
# Make sure the project root is on sys.path so `from db.models import Base`
|
|
17
|
+
# resolves correctly regardless of where alembic is invoked from.
|
|
18
|
+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
|
19
|
+
|
|
20
|
+
from config import DATABASE_URL # noqa: E402
|
|
21
|
+
from db.models import Base # noqa: E402 — imports all mapped classes
|
|
22
|
+
|
|
23
|
+
# Alembic Config object (gives access to alembic.ini values)
|
|
24
|
+
config = context.config
|
|
25
|
+
|
|
26
|
+
# Override sqlalchemy.url with the value from the environment.
|
|
27
|
+
# This means alembic.ini never needs a real connection string.
|
|
28
|
+
if DATABASE_URL:
|
|
29
|
+
config.set_main_option("sqlalchemy.url", DATABASE_URL)
|
|
30
|
+
|
|
31
|
+
# Set up Python logging from alembic.ini
|
|
32
|
+
if config.config_file_name is not None:
|
|
33
|
+
fileConfig(config.config_file_name)
|
|
34
|
+
|
|
35
|
+
# This is what autogenerate inspects to build migration scripts.
|
|
36
|
+
target_metadata = Base.metadata
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def run_migrations_offline() -> None:
|
|
40
|
+
"""
|
|
41
|
+
Run migrations without a live DB connection.
|
|
42
|
+
Emits SQL to stdout — useful for review or for DBAs who apply migrations manually.
|
|
43
|
+
"""
|
|
44
|
+
url = config.get_main_option("sqlalchemy.url")
|
|
45
|
+
context.configure(
|
|
46
|
+
url=url,
|
|
47
|
+
target_metadata=target_metadata,
|
|
48
|
+
literal_binds=True,
|
|
49
|
+
dialect_opts={"paramstyle": "named"},
|
|
50
|
+
compare_type=True,
|
|
51
|
+
)
|
|
52
|
+
with context.begin_transaction():
|
|
53
|
+
context.run_migrations()
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def run_migrations_online() -> None:
|
|
57
|
+
"""
|
|
58
|
+
Run migrations against a live DB connection.
|
|
59
|
+
This is the normal path for `alembic upgrade head`.
|
|
60
|
+
"""
|
|
61
|
+
connectable = engine_from_config(
|
|
62
|
+
config.get_section(config.config_ini_section, {}),
|
|
63
|
+
prefix="sqlalchemy.",
|
|
64
|
+
poolclass=pool.NullPool, # no pooling needed for one-shot migration runs
|
|
65
|
+
)
|
|
66
|
+
with connectable.connect() as connection:
|
|
67
|
+
context.configure(
|
|
68
|
+
connection=connection,
|
|
69
|
+
target_metadata=target_metadata,
|
|
70
|
+
compare_type=True, # detect column type changes in autogenerate
|
|
71
|
+
compare_server_default=True,
|
|
72
|
+
)
|
|
73
|
+
with context.begin_transaction():
|
|
74
|
+
context.run_migrations()
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
if context.is_offline_mode():
|
|
78
|
+
run_migrations_offline()
|
|
79
|
+
else:
|
|
80
|
+
run_migrations_online()
|
|
@@ -0,0 +1,270 @@
|
|
|
1
|
+
"""Initial schema — all Phase 1A tables.
|
|
2
|
+
|
|
3
|
+
Revision ID: 0001
|
|
4
|
+
Revises: (none — first migration)
|
|
5
|
+
Create Date: 2026-04-14
|
|
6
|
+
|
|
7
|
+
Tables created
|
|
8
|
+
--------------
|
|
9
|
+
investigations
|
|
10
|
+
sources
|
|
11
|
+
investigation_sources (junction)
|
|
12
|
+
pages
|
|
13
|
+
entities
|
|
14
|
+
entity_relationships
|
|
15
|
+
users
|
|
16
|
+
monitor_alerts
|
|
17
|
+
investigation_entity_links
|
|
18
|
+
actor_style_profiles
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from typing import Sequence, Union
|
|
22
|
+
|
|
23
|
+
import sqlalchemy as sa
|
|
24
|
+
from alembic import op
|
|
25
|
+
|
|
26
|
+
revision: str = "0001_initial_schema"
|
|
27
|
+
down_revision: Union[str, None] = None
|
|
28
|
+
branch_labels: Union[str, Sequence[str], None] = None
|
|
29
|
+
depends_on: Union[str, Sequence[str], None] = None
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def upgrade() -> None:
|
|
33
|
+
# ------------------------------------------------------------------
|
|
34
|
+
# investigations
|
|
35
|
+
# ------------------------------------------------------------------
|
|
36
|
+
op.create_table(
|
|
37
|
+
"investigations",
|
|
38
|
+
sa.Column("id", sa.UUID(), nullable=False),
|
|
39
|
+
sa.Column("run_id", sa.UUID(), nullable=False),
|
|
40
|
+
sa.Column("query", sa.Text(), nullable=False),
|
|
41
|
+
sa.Column("refined_query", sa.Text(), nullable=True),
|
|
42
|
+
sa.Column("model_used", sa.String(100), nullable=True),
|
|
43
|
+
sa.Column("preset", sa.String(50), nullable=True),
|
|
44
|
+
sa.Column("summary", sa.Text(), nullable=True),
|
|
45
|
+
sa.Column("created_at", sa.DateTime(timezone=True), nullable=False),
|
|
46
|
+
sa.PrimaryKeyConstraint("id"),
|
|
47
|
+
sa.UniqueConstraint("run_id"),
|
|
48
|
+
)
|
|
49
|
+
op.create_index("ix_investigations_run_id", "investigations", ["run_id"])
|
|
50
|
+
|
|
51
|
+
# ------------------------------------------------------------------
|
|
52
|
+
# sources
|
|
53
|
+
# ------------------------------------------------------------------
|
|
54
|
+
op.create_table(
|
|
55
|
+
"sources",
|
|
56
|
+
sa.Column("id", sa.UUID(), nullable=False),
|
|
57
|
+
sa.Column("onion_address", sa.String(255), nullable=False),
|
|
58
|
+
sa.Column("first_seen", sa.DateTime(timezone=True), nullable=False),
|
|
59
|
+
sa.Column("last_seen", sa.DateTime(timezone=True), nullable=False),
|
|
60
|
+
sa.Column("status", sa.String(20), nullable=False, server_default="unknown"),
|
|
61
|
+
sa.Column("source_type", sa.String(30), nullable=False, server_default="search_result"),
|
|
62
|
+
sa.PrimaryKeyConstraint("id"),
|
|
63
|
+
sa.UniqueConstraint("onion_address"),
|
|
64
|
+
)
|
|
65
|
+
op.create_index("ix_sources_onion_address", "sources", ["onion_address"])
|
|
66
|
+
|
|
67
|
+
# ------------------------------------------------------------------
|
|
68
|
+
# investigation_sources (many-to-many junction)
|
|
69
|
+
# ------------------------------------------------------------------
|
|
70
|
+
op.create_table(
|
|
71
|
+
"investigation_sources",
|
|
72
|
+
sa.Column(
|
|
73
|
+
"investigation_id",
|
|
74
|
+
sa.UUID(),
|
|
75
|
+
sa.ForeignKey("investigations.id", ondelete="CASCADE"),
|
|
76
|
+
nullable=False,
|
|
77
|
+
),
|
|
78
|
+
sa.Column(
|
|
79
|
+
"source_id",
|
|
80
|
+
sa.UUID(),
|
|
81
|
+
sa.ForeignKey("sources.id", ondelete="CASCADE"),
|
|
82
|
+
nullable=False,
|
|
83
|
+
),
|
|
84
|
+
sa.Column("added_at", sa.DateTime(timezone=True), nullable=False),
|
|
85
|
+
sa.PrimaryKeyConstraint("investigation_id", "source_id"),
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
# ------------------------------------------------------------------
|
|
89
|
+
# pages
|
|
90
|
+
# ------------------------------------------------------------------
|
|
91
|
+
op.create_table(
|
|
92
|
+
"pages",
|
|
93
|
+
sa.Column("id", sa.UUID(), nullable=False),
|
|
94
|
+
sa.Column(
|
|
95
|
+
"source_id",
|
|
96
|
+
sa.UUID(),
|
|
97
|
+
sa.ForeignKey("sources.id", ondelete="SET NULL"),
|
|
98
|
+
nullable=True,
|
|
99
|
+
),
|
|
100
|
+
sa.Column("url", sa.Text(), nullable=False),
|
|
101
|
+
sa.Column("raw_content_hash", sa.String(64), nullable=True),
|
|
102
|
+
sa.Column("cleaned_text", sa.Text(), nullable=True),
|
|
103
|
+
sa.Column("scrape_timestamp", sa.DateTime(timezone=True), nullable=False),
|
|
104
|
+
sa.Column("language", sa.String(10), nullable=True),
|
|
105
|
+
sa.Column("byte_size", sa.Integer(), nullable=True),
|
|
106
|
+
sa.Column("created_at", sa.DateTime(timezone=True), nullable=False),
|
|
107
|
+
sa.PrimaryKeyConstraint("id"),
|
|
108
|
+
sa.UniqueConstraint("url"),
|
|
109
|
+
)
|
|
110
|
+
op.create_index("ix_pages_source_id", "pages", ["source_id"])
|
|
111
|
+
op.create_index("ix_pages_raw_content_hash", "pages", ["raw_content_hash"])
|
|
112
|
+
|
|
113
|
+
# ------------------------------------------------------------------
|
|
114
|
+
# entities
|
|
115
|
+
# ------------------------------------------------------------------
|
|
116
|
+
op.create_table(
|
|
117
|
+
"entities",
|
|
118
|
+
sa.Column("id", sa.UUID(), nullable=False),
|
|
119
|
+
sa.Column(
|
|
120
|
+
"page_id",
|
|
121
|
+
sa.UUID(),
|
|
122
|
+
sa.ForeignKey("pages.id", ondelete="CASCADE"),
|
|
123
|
+
nullable=False,
|
|
124
|
+
),
|
|
125
|
+
sa.Column(
|
|
126
|
+
"investigation_id",
|
|
127
|
+
sa.UUID(),
|
|
128
|
+
sa.ForeignKey("investigations.id", ondelete="SET NULL"),
|
|
129
|
+
nullable=True,
|
|
130
|
+
),
|
|
131
|
+
sa.Column("entity_type", sa.String(50), nullable=False),
|
|
132
|
+
sa.Column("value", sa.Text(), nullable=False),
|
|
133
|
+
sa.Column("confidence", sa.Float(), nullable=False, server_default="1.0"),
|
|
134
|
+
sa.Column("context", sa.Text(), nullable=True),
|
|
135
|
+
sa.Column("first_seen", sa.DateTime(timezone=True), nullable=False),
|
|
136
|
+
sa.Column("last_seen", sa.DateTime(timezone=True), nullable=False),
|
|
137
|
+
sa.Column("created_at", sa.DateTime(timezone=True), nullable=False),
|
|
138
|
+
sa.PrimaryKeyConstraint("id"),
|
|
139
|
+
)
|
|
140
|
+
op.create_index("ix_entities_page_id", "entities", ["page_id"])
|
|
141
|
+
op.create_index("ix_entities_investigation_id", "entities", ["investigation_id"])
|
|
142
|
+
op.create_index("ix_entities_entity_type", "entities", ["entity_type"])
|
|
143
|
+
|
|
144
|
+
# ------------------------------------------------------------------
|
|
145
|
+
# entity_relationships
|
|
146
|
+
# ------------------------------------------------------------------
|
|
147
|
+
op.create_table(
|
|
148
|
+
"entity_relationships",
|
|
149
|
+
sa.Column("id", sa.UUID(), nullable=False),
|
|
150
|
+
sa.Column(
|
|
151
|
+
"entity_a_id",
|
|
152
|
+
sa.UUID(),
|
|
153
|
+
sa.ForeignKey("entities.id", ondelete="CASCADE"),
|
|
154
|
+
nullable=False,
|
|
155
|
+
),
|
|
156
|
+
sa.Column(
|
|
157
|
+
"entity_b_id",
|
|
158
|
+
sa.UUID(),
|
|
159
|
+
sa.ForeignKey("entities.id", ondelete="CASCADE"),
|
|
160
|
+
nullable=False,
|
|
161
|
+
),
|
|
162
|
+
sa.Column("relationship_type", sa.String(50), nullable=False),
|
|
163
|
+
sa.Column(
|
|
164
|
+
"source_page_id",
|
|
165
|
+
sa.UUID(),
|
|
166
|
+
sa.ForeignKey("pages.id", ondelete="SET NULL"),
|
|
167
|
+
nullable=True,
|
|
168
|
+
),
|
|
169
|
+
sa.Column("confidence", sa.Float(), nullable=False, server_default="1.0"),
|
|
170
|
+
sa.Column("first_seen", sa.DateTime(timezone=True), nullable=False),
|
|
171
|
+
sa.PrimaryKeyConstraint("id"),
|
|
172
|
+
)
|
|
173
|
+
op.create_index("ix_entity_relationships_entity_a_id", "entity_relationships", ["entity_a_id"])
|
|
174
|
+
op.create_index("ix_entity_relationships_entity_b_id", "entity_relationships", ["entity_b_id"])
|
|
175
|
+
op.create_index("ix_entity_relationships_relationship_type", "entity_relationships", ["relationship_type"])
|
|
176
|
+
|
|
177
|
+
# ------------------------------------------------------------------
|
|
178
|
+
# users
|
|
179
|
+
# ------------------------------------------------------------------
|
|
180
|
+
op.create_table(
|
|
181
|
+
"users",
|
|
182
|
+
sa.Column("id", sa.Integer(), nullable=False, autoincrement=True),
|
|
183
|
+
sa.Column("email", sa.String(255), nullable=False),
|
|
184
|
+
sa.Column("hashed_password", sa.String(), nullable=False),
|
|
185
|
+
sa.Column("is_active", sa.Boolean(), nullable=False, server_default="true"),
|
|
186
|
+
sa.Column("must_reset_password", sa.Boolean(), nullable=False, server_default="false"),
|
|
187
|
+
sa.Column("created_at", sa.DateTime(timezone=True), nullable=False),
|
|
188
|
+
sa.Column("last_login_at", sa.DateTime(timezone=True), nullable=True),
|
|
189
|
+
sa.PrimaryKeyConstraint("id"),
|
|
190
|
+
sa.UniqueConstraint("email"),
|
|
191
|
+
)
|
|
192
|
+
op.create_index("ix_users_email", "users", ["email"])
|
|
193
|
+
|
|
194
|
+
# ------------------------------------------------------------------
|
|
195
|
+
# monitor_alerts
|
|
196
|
+
# ------------------------------------------------------------------
|
|
197
|
+
op.create_table(
|
|
198
|
+
"monitor_alerts",
|
|
199
|
+
sa.Column("id", sa.Integer(), nullable=False, autoincrement=True),
|
|
200
|
+
sa.Column("monitor_name", sa.String(), nullable=False),
|
|
201
|
+
sa.Column("triggered_at", sa.DateTime(timezone=True), nullable=False),
|
|
202
|
+
sa.Column("change_type", sa.String(50), nullable=False),
|
|
203
|
+
sa.Column("summary", sa.Text(), nullable=False),
|
|
204
|
+
sa.Column("diff_data", sa.JSON(), nullable=True),
|
|
205
|
+
sa.Column("severity", sa.String(20), nullable=False, server_default="info"),
|
|
206
|
+
sa.Column("entity_count_delta", sa.Integer(), nullable=False, server_default="0"),
|
|
207
|
+
sa.Column("delivered", sa.Boolean(), nullable=False, server_default="false"),
|
|
208
|
+
sa.Column("delivery_channels", sa.JSON(), nullable=True),
|
|
209
|
+
sa.Column("acknowledged", sa.Boolean(), nullable=False, server_default="false"),
|
|
210
|
+
sa.Column("acknowledged_at", sa.DateTime(timezone=True), nullable=True),
|
|
211
|
+
sa.PrimaryKeyConstraint("id"),
|
|
212
|
+
)
|
|
213
|
+
op.create_index("ix_monitor_alerts_monitor_name", "monitor_alerts", ["monitor_name"])
|
|
214
|
+
op.create_index("ix_monitor_alerts_triggered_at", "monitor_alerts", ["triggered_at"])
|
|
215
|
+
op.create_index("ix_monitor_alerts_monitor_triggered", "monitor_alerts", ["monitor_name", "triggered_at"])
|
|
216
|
+
|
|
217
|
+
# ------------------------------------------------------------------
|
|
218
|
+
# investigation_entity_links
|
|
219
|
+
# ------------------------------------------------------------------
|
|
220
|
+
op.create_table(
|
|
221
|
+
"investigation_entity_links",
|
|
222
|
+
sa.Column("id", sa.UUID(), nullable=False),
|
|
223
|
+
sa.Column(
|
|
224
|
+
"entity_id",
|
|
225
|
+
sa.UUID(),
|
|
226
|
+
sa.ForeignKey("entities.id", ondelete="CASCADE"),
|
|
227
|
+
nullable=False,
|
|
228
|
+
),
|
|
229
|
+
sa.Column(
|
|
230
|
+
"investigation_id",
|
|
231
|
+
sa.UUID(),
|
|
232
|
+
sa.ForeignKey("investigations.id", ondelete="CASCADE"),
|
|
233
|
+
nullable=False,
|
|
234
|
+
),
|
|
235
|
+
sa.Column("linked_at", sa.DateTime(timezone=True), nullable=False),
|
|
236
|
+
sa.PrimaryKeyConstraint("id"),
|
|
237
|
+
sa.UniqueConstraint("entity_id", "investigation_id"),
|
|
238
|
+
)
|
|
239
|
+
op.create_index("ix_investigation_entity_links_entity_id", "investigation_entity_links", ["entity_id"])
|
|
240
|
+
op.create_index("ix_investigation_entity_links_investigation_id", "investigation_entity_links", ["investigation_id"])
|
|
241
|
+
|
|
242
|
+
# ------------------------------------------------------------------
|
|
243
|
+
# actor_style_profiles
|
|
244
|
+
# ------------------------------------------------------------------
|
|
245
|
+
op.create_table(
|
|
246
|
+
"actor_style_profiles",
|
|
247
|
+
sa.Column("id", sa.Integer(), nullable=False, autoincrement=True),
|
|
248
|
+
sa.Column("canonical_value", sa.String(), nullable=False),
|
|
249
|
+
sa.Column("entity_type", sa.String(), nullable=False),
|
|
250
|
+
sa.Column("style_vector", sa.JSON(), nullable=False),
|
|
251
|
+
sa.Column("sample_count", sa.Integer(), nullable=False, server_default="0"),
|
|
252
|
+
sa.Column("total_chars", sa.Integer(), nullable=False, server_default="0"),
|
|
253
|
+
sa.Column("last_updated", sa.DateTime(timezone=True), nullable=False),
|
|
254
|
+
sa.PrimaryKeyConstraint("id"),
|
|
255
|
+
sa.UniqueConstraint("canonical_value", "entity_type"),
|
|
256
|
+
)
|
|
257
|
+
op.create_index("ix_actor_style_profiles_canonical_value", "actor_style_profiles", ["canonical_value"])
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
def downgrade() -> None:
|
|
261
|
+
op.drop_table("actor_style_profiles")
|
|
262
|
+
op.drop_table("investigation_entity_links")
|
|
263
|
+
op.drop_table("monitor_alerts")
|
|
264
|
+
op.drop_table("users")
|
|
265
|
+
op.drop_table("entity_relationships")
|
|
266
|
+
op.drop_table("entities")
|
|
267
|
+
op.drop_table("pages")
|
|
268
|
+
op.drop_table("investigation_sources")
|
|
269
|
+
op.drop_table("sources")
|
|
270
|
+
op.drop_table("investigations")
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""Add status column to investigations.
|
|
2
|
+
|
|
3
|
+
Revision ID: 0002
|
|
4
|
+
Revises: 0001
|
|
5
|
+
Create Date: 2026-04-14
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import Sequence, Union
|
|
9
|
+
|
|
10
|
+
import sqlalchemy as sa
|
|
11
|
+
from alembic import op
|
|
12
|
+
|
|
13
|
+
revision: str = "0003_add_investigation_status"
|
|
14
|
+
down_revision: Union[str, None] = "0002_add_missing_tables"
|
|
15
|
+
branch_labels: Union[str, Sequence[str], None] = None
|
|
16
|
+
depends_on: Union[str, Sequence[str], None] = None
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def upgrade() -> None:
|
|
20
|
+
op.add_column(
|
|
21
|
+
"investigations",
|
|
22
|
+
sa.Column("status", sa.String(length=20), nullable=False, server_default="pending"),
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def downgrade() -> None:
|
|
27
|
+
op.drop_column("investigations", "status")
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""Add missing tables — users, monitor_alerts, investigation_entity_links, actor_style_profiles.
|
|
2
|
+
|
|
3
|
+
Revision ID: 0002
|
|
4
|
+
Revises: 0001
|
|
5
|
+
Create Date: 2026-04-20
|
|
6
|
+
|
|
7
|
+
Tables created
|
|
8
|
+
--------------
|
|
9
|
+
users
|
|
10
|
+
monitor_alerts
|
|
11
|
+
investigation_entity_links
|
|
12
|
+
actor_style_profiles
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from typing import Sequence, Union
|
|
16
|
+
|
|
17
|
+
import sqlalchemy as sa
|
|
18
|
+
from alembic import op
|
|
19
|
+
|
|
20
|
+
revision: str = "0002_add_missing_tables"
|
|
21
|
+
down_revision: Union[str, None] = "0001_initial_schema"
|
|
22
|
+
branch_labels: Union[str, Sequence[str], None] = None
|
|
23
|
+
depends_on: Union[str, Sequence[str], None] = None
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def upgrade() -> None:
|
|
27
|
+
# Tables handled in 0001_initial_schema:
|
|
28
|
+
# users, monitor_alerts, investigation_entity_links, actor_style_profiles
|
|
29
|
+
pass
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def downgrade() -> None:
|
|
33
|
+
pass
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
"""add canonical value and entity links
|
|
2
|
+
|
|
3
|
+
Revision ID: 0003
|
|
4
|
+
Revises: 0002
|
|
5
|
+
Create Date: 2026-04-16
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
from typing import Sequence, Union
|
|
9
|
+
import sqlalchemy as sa
|
|
10
|
+
from alembic import op
|
|
11
|
+
|
|
12
|
+
revision: str = "0004_add_canonical_val_links"
|
|
13
|
+
down_revision: Union[str, None] = "0003_add_investigation_status"
|
|
14
|
+
branch_labels: Union[str, Sequence[str], None] = None
|
|
15
|
+
depends_on: Union[str, Sequence[str], None] = None
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def upgrade() -> None:
|
|
19
|
+
conn = op.get_bind()
|
|
20
|
+
inspector = sa.inspect(conn)
|
|
21
|
+
existing_columns = [c['name'] for c in inspector.get_columns('entities')]
|
|
22
|
+
existing_inv_columns = [c['name'] for c in inspector.get_columns('investigations')]
|
|
23
|
+
|
|
24
|
+
# 1. Add columns to entities if they don't exist
|
|
25
|
+
if 'canonical_value' not in existing_columns:
|
|
26
|
+
op.add_column('entities', sa.Column('canonical_value', sa.String(), nullable=True))
|
|
27
|
+
if 'historical_context' not in existing_columns:
|
|
28
|
+
op.add_column('entities', sa.Column('historical_context', sa.Text(), nullable=True))
|
|
29
|
+
if 'context' in existing_columns and 'context_snippet' not in existing_columns:
|
|
30
|
+
# Rename context to context_snippet
|
|
31
|
+
op.alter_column('entities', 'context', new_column_name='context_snippet')
|
|
32
|
+
|
|
33
|
+
# 2. Add is_seed to investigations if it doesn't exist
|
|
34
|
+
if 'is_seed' not in existing_inv_columns:
|
|
35
|
+
op.add_column('investigations', sa.Column('is_seed', sa.Boolean(), server_default='false', nullable=False))
|
|
36
|
+
|
|
37
|
+
# 3. Tables handled in 0001_initial_schema:
|
|
38
|
+
# investigation_entity_links
|
|
39
|
+
pass
|
|
40
|
+
|
|
41
|
+
# 4. Create indexes (ensure we don't duplicate them)
|
|
42
|
+
existing_indexes = [i['name'] for i in inspector.get_indexes('entities')]
|
|
43
|
+
if 'ix_entities_canonical_value' not in existing_indexes:
|
|
44
|
+
op.create_index('ix_entities_canonical_value', 'entities', ['canonical_value'])
|
|
45
|
+
if 'ix_entity_canonical' not in existing_indexes:
|
|
46
|
+
op.create_index('ix_entity_canonical', 'entities', ['entity_type', 'canonical_value'])
|
|
47
|
+
|
|
48
|
+
# 5. Backfill canonical_value with size limits to avoid B-tree index row size errors
|
|
49
|
+
op.execute("UPDATE entities SET canonical_value = substring(lower(regexp_replace(value, '[\\s\\-_\\.]', '', 'g')), 1, 1024) WHERE entity_type IN ('THREAT_ACTOR', 'MALWARE', 'FORUM', 'THREAT_ACTOR_HANDLE', 'MALWARE_FAMILY', 'RANSOMWARE_GROUP', 'handle', 'malware', 'ransomware_group');")
|
|
50
|
+
op.execute("UPDATE entities SET canonical_value = substring(lower(value), 1, 1024) WHERE entity_type IN ('EMAIL', 'ONION_URL', 'EMAIL_ADDRESS', 'email', 'onion_url');")
|
|
51
|
+
op.execute("UPDATE entities SET canonical_value = substring(value, 1, 1024) WHERE canonical_value IS NULL;")
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def downgrade() -> None:
|
|
55
|
+
op.drop_index('ix_entity_canonical', table_name='entities')
|
|
56
|
+
op.drop_index('ix_entities_canonical_value', table_name='entities')
|
|
57
|
+
op.drop_table('investigation_entity_links')
|
|
58
|
+
op.drop_column('investigations', 'is_seed')
|
|
59
|
+
op.alter_column('entities', 'context_snippet', new_column_name='context')
|
|
60
|
+
op.drop_column('entities', 'historical_context')
|
|
61
|
+
op.drop_column('entities', 'canonical_value')
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"""Add posted_at column to pages table
|
|
2
|
+
|
|
3
|
+
Revision ID: 0004
|
|
4
|
+
Revises: 0003
|
|
5
|
+
Create Date: 2026-04-16
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from typing import Sequence, Union
|
|
10
|
+
|
|
11
|
+
import sqlalchemy as sa
|
|
12
|
+
from alembic import op
|
|
13
|
+
|
|
14
|
+
revision: str = "0005_add_page_posted_at"
|
|
15
|
+
down_revision: Union[str, None] = "0004_add_canonical_val_links"
|
|
16
|
+
branch_labels: Union[str, Sequence[str], None] = None
|
|
17
|
+
depends_on: Union[str, Sequence[str], None] = None
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def upgrade() -> None:
|
|
21
|
+
conn = op.get_bind()
|
|
22
|
+
inspector = sa.inspect(conn)
|
|
23
|
+
existing = [c["name"] for c in inspector.get_columns("pages")]
|
|
24
|
+
if "posted_at" not in existing:
|
|
25
|
+
op.add_column(
|
|
26
|
+
"pages",
|
|
27
|
+
sa.Column(
|
|
28
|
+
"posted_at",
|
|
29
|
+
sa.DateTime(timezone=True),
|
|
30
|
+
nullable=True,
|
|
31
|
+
),
|
|
32
|
+
)
|
|
33
|
+
inspector = sa.inspect(conn)
|
|
34
|
+
existing_indexes = [i["name"] for i in inspector.get_indexes("pages")]
|
|
35
|
+
if "ix_pages_posted_at" not in existing_indexes:
|
|
36
|
+
op.create_index("ix_pages_posted_at", "pages", ["posted_at"])
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def downgrade() -> None:
|
|
40
|
+
op.drop_index("ix_pages_posted_at", table_name="pages")
|
|
41
|
+
op.drop_column("pages", "posted_at")
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
"""Add extraction_method to entities
|
|
2
|
+
|
|
3
|
+
Revision ID: 0005
|
|
4
|
+
Revises: 0004
|
|
5
|
+
Create Date: 2026-04-16
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from typing import Sequence, Union
|
|
10
|
+
|
|
11
|
+
import sqlalchemy as sa
|
|
12
|
+
from alembic import op
|
|
13
|
+
|
|
14
|
+
revision: str = "0006_add_extraction_method"
|
|
15
|
+
down_revision: Union[str, None] = "0005_add_page_posted_at"
|
|
16
|
+
branch_labels: Union[str, Sequence[str], None] = None
|
|
17
|
+
depends_on: Union[str, Sequence[str], None] = None
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def upgrade() -> None:
|
|
21
|
+
conn = op.get_bind()
|
|
22
|
+
inspector = sa.inspect(conn)
|
|
23
|
+
existing = [c["name"] for c in inspector.get_columns("entities")]
|
|
24
|
+
if "extraction_method" not in existing:
|
|
25
|
+
op.add_column(
|
|
26
|
+
"entities",
|
|
27
|
+
sa.Column("extraction_method", sa.String(length=10), nullable=True),
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def downgrade() -> None:
|
|
32
|
+
op.drop_column("entities", "extraction_method")
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"""Add monitor_alerts table
|
|
2
|
+
|
|
3
|
+
Revision ID: 0006
|
|
4
|
+
Revises: 0005
|
|
5
|
+
Create Date: 2026-04-17
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from typing import Sequence, Union
|
|
10
|
+
|
|
11
|
+
import sqlalchemy as sa
|
|
12
|
+
from alembic import op
|
|
13
|
+
|
|
14
|
+
revision: str = "0007_add_monitor_alerts"
|
|
15
|
+
down_revision: Union[str, None] = "0006_add_extraction_method"
|
|
16
|
+
branch_labels: Union[str, Sequence[str], None] = None
|
|
17
|
+
depends_on: Union[str, Sequence[str], None] = None
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def upgrade() -> None:
|
|
21
|
+
# Table 'monitor_alerts' already created in 0001_initial_schema
|
|
22
|
+
pass
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def downgrade() -> None:
|
|
26
|
+
pass
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"""Add actor_style_profiles table
|
|
2
|
+
|
|
3
|
+
Revision ID: 0007
|
|
4
|
+
Revises: 0006
|
|
5
|
+
Create Date: 2026-04-17 19:55:00.000000
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
from alembic import op
|
|
9
|
+
import sqlalchemy as sa
|
|
10
|
+
from datetime import datetime, timezone
|
|
11
|
+
|
|
12
|
+
# revision identifiers, used by Alembic.
|
|
13
|
+
revision = '0008_add_actor_style_profiles'
|
|
14
|
+
down_revision = '0007_add_monitor_alerts'
|
|
15
|
+
branch_labels = None
|
|
16
|
+
depends_on = None
|
|
17
|
+
|
|
18
|
+
def upgrade():
|
|
19
|
+
# Table actor_style_profiles already created in 0001_initial_schema
|
|
20
|
+
pass
|
|
21
|
+
|
|
22
|
+
def downgrade():
|
|
23
|
+
pass
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
"""Add users table and seed default admin
|
|
2
|
+
|
|
3
|
+
Revision ID: 0008
|
|
4
|
+
Revises: 0007
|
|
5
|
+
Create Date: 2026-04-17 22:45:00.000000
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
from alembic import op
|
|
9
|
+
import sqlalchemy as sa
|
|
10
|
+
from passlib.context import CryptContext
|
|
11
|
+
from datetime import datetime, timezone
|
|
12
|
+
|
|
13
|
+
# revision identifiers, used by Alembic.
|
|
14
|
+
revision = '0009_add_users_table'
|
|
15
|
+
down_revision = '0008_add_actor_style_profiles'
|
|
16
|
+
branch_labels = None
|
|
17
|
+
depends_on = None
|
|
18
|
+
|
|
19
|
+
pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
|
|
20
|
+
|
|
21
|
+
def upgrade():
|
|
22
|
+
# Table 'users' already created in 0001_initial_schema
|
|
23
|
+
# Seed default admin account with a secure placeholder
|
|
24
|
+
# Password must be set during setup or changed on first login
|
|
25
|
+
import secrets
|
|
26
|
+
placeholder_password = secrets.token_urlsafe(32)
|
|
27
|
+
hashed_pwd = pwd_context.hash(placeholder_password)
|
|
28
|
+
now = datetime.now(timezone.utc).isoformat()
|
|
29
|
+
|
|
30
|
+
op.execute(
|
|
31
|
+
f"""
|
|
32
|
+
INSERT INTO users (email, hashed_password, is_active, must_reset_password, created_at)
|
|
33
|
+
VALUES (
|
|
34
|
+
'admin@voidaccess.tech',
|
|
35
|
+
'{hashed_pwd}',
|
|
36
|
+
true,
|
|
37
|
+
true,
|
|
38
|
+
'{now}'
|
|
39
|
+
)
|
|
40
|
+
"""
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def downgrade():
|
|
45
|
+
# We don't drop the table here as it belongs to 0001_initial_schema
|
|
46
|
+
# Optional: Delete the seeded admin
|
|
47
|
+
op.execute("DELETE FROM users WHERE email = 'admin@voidaccess.tech'")
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
"""Add investigation_id to entity_relationships"""
|
|
2
|
+
|
|
3
|
+
from alembic import op
|
|
4
|
+
import sqlalchemy as sa
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
revision = "0010_add_investigation_id_rel"
|
|
8
|
+
down_revision = "0009_add_users_table"
|
|
9
|
+
branch_labels = None
|
|
10
|
+
depends_on = None
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def upgrade():
|
|
14
|
+
# Calling add_column with index=True already creates the index
|
|
15
|
+
# 'ix_entity_relationships_investigation_id'
|
|
16
|
+
op.add_column(
|
|
17
|
+
"entity_relationships",
|
|
18
|
+
sa.Column(
|
|
19
|
+
"investigation_id",
|
|
20
|
+
sa.UUID(as_uuid=True),
|
|
21
|
+
sa.ForeignKey("investigations.id", ondelete="SET NULL"),
|
|
22
|
+
nullable=True,
|
|
23
|
+
index=True,
|
|
24
|
+
),
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def downgrade():
|
|
29
|
+
op.drop_column("entity_relationships", "investigation_id")
|