voidaccess 1.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. analysis/__init__.py +49 -0
  2. analysis/opsec.py +454 -0
  3. analysis/patterns.py +202 -0
  4. analysis/temporal.py +201 -0
  5. api/__init__.py +1 -0
  6. api/auth.py +163 -0
  7. api/main.py +509 -0
  8. api/routes/__init__.py +1 -0
  9. api/routes/admin.py +214 -0
  10. api/routes/auth.py +157 -0
  11. api/routes/entities.py +871 -0
  12. api/routes/export.py +359 -0
  13. api/routes/investigations.py +2567 -0
  14. api/routes/monitors.py +405 -0
  15. api/routes/search.py +157 -0
  16. api/routes/settings.py +851 -0
  17. auth/__init__.py +1 -0
  18. auth/token_blacklist.py +108 -0
  19. cli/__init__.py +3 -0
  20. cli/adapters/__init__.py +1 -0
  21. cli/adapters/sqlite.py +273 -0
  22. cli/browser.py +376 -0
  23. cli/commands/__init__.py +1 -0
  24. cli/commands/configure.py +185 -0
  25. cli/commands/enrich.py +154 -0
  26. cli/commands/export.py +158 -0
  27. cli/commands/investigate.py +601 -0
  28. cli/commands/show.py +87 -0
  29. cli/config.py +180 -0
  30. cli/display.py +212 -0
  31. cli/main.py +154 -0
  32. cli/tor_detect.py +71 -0
  33. config.py +180 -0
  34. crawler/__init__.py +28 -0
  35. crawler/dedup.py +97 -0
  36. crawler/frontier.py +115 -0
  37. crawler/spider.py +462 -0
  38. crawler/utils.py +122 -0
  39. db/__init__.py +47 -0
  40. db/migrations/__init__.py +0 -0
  41. db/migrations/env.py +80 -0
  42. db/migrations/versions/0001_initial_schema.py +270 -0
  43. db/migrations/versions/0002_add_investigation_status_column.py +27 -0
  44. db/migrations/versions/0002_add_missing_tables.py +33 -0
  45. db/migrations/versions/0003_add_canonical_value_and_entity_links.py +61 -0
  46. db/migrations/versions/0004_add_page_posted_at.py +41 -0
  47. db/migrations/versions/0005_add_extraction_method.py +32 -0
  48. db/migrations/versions/0006_add_monitor_alerts.py +26 -0
  49. db/migrations/versions/0007_add_actor_style_profiles.py +23 -0
  50. db/migrations/versions/0008_add_users_table.py +47 -0
  51. db/migrations/versions/0009_add_investigation_id_to_relationships.py +29 -0
  52. db/migrations/versions/0010_add_composite_index_entity_relationships.py +22 -0
  53. db/migrations/versions/0011_add_page_extraction_cache.py +52 -0
  54. db/migrations/versions/0013_add_graph_status.py +31 -0
  55. db/migrations/versions/0015_add_progress_fields.py +41 -0
  56. db/migrations/versions/0016_backfill_graph_status.py +33 -0
  57. db/migrations/versions/0017_add_user_api_keys.py +44 -0
  58. db/migrations/versions/0018_add_user_id_to_investigations.py +33 -0
  59. db/migrations/versions/0019_add_content_safety_log.py +46 -0
  60. db/migrations/versions/0020_add_entity_source_tracking.py +50 -0
  61. db/models.py +618 -0
  62. db/queries.py +841 -0
  63. db/session.py +270 -0
  64. export/__init__.py +34 -0
  65. export/misp.py +257 -0
  66. export/sigma.py +342 -0
  67. export/stix.py +418 -0
  68. extractor/__init__.py +21 -0
  69. extractor/llm_extract.py +372 -0
  70. extractor/ner.py +512 -0
  71. extractor/normalizer.py +638 -0
  72. extractor/pipeline.py +401 -0
  73. extractor/regex_patterns.py +325 -0
  74. fingerprint/__init__.py +33 -0
  75. fingerprint/profiler.py +240 -0
  76. fingerprint/stylometry.py +249 -0
  77. graph/__init__.py +73 -0
  78. graph/builder.py +894 -0
  79. graph/export.py +225 -0
  80. graph/model.py +83 -0
  81. graph/queries.py +297 -0
  82. graph/visualize.py +178 -0
  83. i18n/__init__.py +24 -0
  84. i18n/detect.py +76 -0
  85. i18n/query_expand.py +72 -0
  86. i18n/translate.py +210 -0
  87. monitor/__init__.py +27 -0
  88. monitor/_db.py +74 -0
  89. monitor/alerts.py +345 -0
  90. monitor/config.py +118 -0
  91. monitor/diff.py +75 -0
  92. monitor/jobs.py +247 -0
  93. monitor/scheduler.py +184 -0
  94. scraper/__init__.py +0 -0
  95. scraper/scrape.py +857 -0
  96. scraper/scrape_js.py +272 -0
  97. search/__init__.py +318 -0
  98. search/circuit_breaker.py +240 -0
  99. search/search.py +334 -0
  100. sources/__init__.py +96 -0
  101. sources/blockchain.py +444 -0
  102. sources/cache.py +93 -0
  103. sources/cisa.py +108 -0
  104. sources/dns_enrichment.py +557 -0
  105. sources/domain_reputation.py +643 -0
  106. sources/email_reputation.py +635 -0
  107. sources/engines.py +244 -0
  108. sources/enrichment.py +1244 -0
  109. sources/github_scraper.py +589 -0
  110. sources/gitlab_scraper.py +624 -0
  111. sources/hash_reputation.py +856 -0
  112. sources/historical_intel.py +253 -0
  113. sources/ip_reputation.py +521 -0
  114. sources/paste_scraper.py +484 -0
  115. sources/pastes.py +278 -0
  116. sources/rss_scraper.py +576 -0
  117. sources/seed_manager.py +373 -0
  118. sources/seeds.py +368 -0
  119. sources/shodan.py +103 -0
  120. sources/telegram.py +199 -0
  121. sources/virustotal.py +113 -0
  122. utils/__init__.py +0 -0
  123. utils/async_utils.py +89 -0
  124. utils/content_safety.py +193 -0
  125. utils/defang.py +94 -0
  126. utils/encryption.py +34 -0
  127. utils/ioc_freshness.py +124 -0
  128. utils/user_keys.py +33 -0
  129. vector/__init__.py +39 -0
  130. vector/embedder.py +100 -0
  131. vector/model_singleton.py +49 -0
  132. vector/search.py +87 -0
  133. vector/store.py +514 -0
  134. voidaccess/__init__.py +0 -0
  135. voidaccess/llm.py +717 -0
  136. voidaccess/llm_utils.py +696 -0
  137. voidaccess-1.3.0.dist-info/METADATA +395 -0
  138. voidaccess-1.3.0.dist-info/RECORD +142 -0
  139. voidaccess-1.3.0.dist-info/WHEEL +5 -0
  140. voidaccess-1.3.0.dist-info/entry_points.txt +2 -0
  141. voidaccess-1.3.0.dist-info/licenses/LICENSE +21 -0
  142. voidaccess-1.3.0.dist-info/top_level.txt +19 -0
db/migrations/env.py ADDED
@@ -0,0 +1,80 @@
1
+ """
2
+ Alembic migration environment.
3
+
4
+ DATABASE_URL is read from the environment (via config.py) so credentials
5
+ are never stored in version control. The models' Base.metadata is imported
6
+ here so `alembic revision --autogenerate` can diff the ORM against the DB.
7
+ """
8
+
9
+ import os
10
+ import sys
11
+ from logging.config import fileConfig
12
+
13
+ from alembic import context
14
+ from sqlalchemy import engine_from_config, pool
15
+
16
+ # Make sure the project root is on sys.path so `from db.models import Base`
17
+ # resolves correctly regardless of where alembic is invoked from.
18
+ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
19
+
20
+ from config import DATABASE_URL # noqa: E402
21
+ from db.models import Base # noqa: E402 — imports all mapped classes
22
+
23
+ # Alembic Config object (gives access to alembic.ini values)
24
+ config = context.config
25
+
26
+ # Override sqlalchemy.url with the value from the environment.
27
+ # This means alembic.ini never needs a real connection string.
28
+ if DATABASE_URL:
29
+ config.set_main_option("sqlalchemy.url", DATABASE_URL)
30
+
31
+ # Set up Python logging from alembic.ini
32
+ if config.config_file_name is not None:
33
+ fileConfig(config.config_file_name)
34
+
35
+ # This is what autogenerate inspects to build migration scripts.
36
+ target_metadata = Base.metadata
37
+
38
+
39
+ def run_migrations_offline() -> None:
40
+ """
41
+ Run migrations without a live DB connection.
42
+ Emits SQL to stdout — useful for review or for DBAs who apply migrations manually.
43
+ """
44
+ url = config.get_main_option("sqlalchemy.url")
45
+ context.configure(
46
+ url=url,
47
+ target_metadata=target_metadata,
48
+ literal_binds=True,
49
+ dialect_opts={"paramstyle": "named"},
50
+ compare_type=True,
51
+ )
52
+ with context.begin_transaction():
53
+ context.run_migrations()
54
+
55
+
56
+ def run_migrations_online() -> None:
57
+ """
58
+ Run migrations against a live DB connection.
59
+ This is the normal path for `alembic upgrade head`.
60
+ """
61
+ connectable = engine_from_config(
62
+ config.get_section(config.config_ini_section, {}),
63
+ prefix="sqlalchemy.",
64
+ poolclass=pool.NullPool, # no pooling needed for one-shot migration runs
65
+ )
66
+ with connectable.connect() as connection:
67
+ context.configure(
68
+ connection=connection,
69
+ target_metadata=target_metadata,
70
+ compare_type=True, # detect column type changes in autogenerate
71
+ compare_server_default=True,
72
+ )
73
+ with context.begin_transaction():
74
+ context.run_migrations()
75
+
76
+
77
+ if context.is_offline_mode():
78
+ run_migrations_offline()
79
+ else:
80
+ run_migrations_online()
@@ -0,0 +1,270 @@
1
+ """Initial schema — all Phase 1A tables.
2
+
3
+ Revision ID: 0001
4
+ Revises: (none — first migration)
5
+ Create Date: 2026-04-14
6
+
7
+ Tables created
8
+ --------------
9
+ investigations
10
+ sources
11
+ investigation_sources (junction)
12
+ pages
13
+ entities
14
+ entity_relationships
15
+ users
16
+ monitor_alerts
17
+ investigation_entity_links
18
+ actor_style_profiles
19
+ """
20
+
21
+ from typing import Sequence, Union
22
+
23
+ import sqlalchemy as sa
24
+ from alembic import op
25
+
26
+ revision: str = "0001_initial_schema"
27
+ down_revision: Union[str, None] = None
28
+ branch_labels: Union[str, Sequence[str], None] = None
29
+ depends_on: Union[str, Sequence[str], None] = None
30
+
31
+
32
+ def upgrade() -> None:
33
+ # ------------------------------------------------------------------
34
+ # investigations
35
+ # ------------------------------------------------------------------
36
+ op.create_table(
37
+ "investigations",
38
+ sa.Column("id", sa.UUID(), nullable=False),
39
+ sa.Column("run_id", sa.UUID(), nullable=False),
40
+ sa.Column("query", sa.Text(), nullable=False),
41
+ sa.Column("refined_query", sa.Text(), nullable=True),
42
+ sa.Column("model_used", sa.String(100), nullable=True),
43
+ sa.Column("preset", sa.String(50), nullable=True),
44
+ sa.Column("summary", sa.Text(), nullable=True),
45
+ sa.Column("created_at", sa.DateTime(timezone=True), nullable=False),
46
+ sa.PrimaryKeyConstraint("id"),
47
+ sa.UniqueConstraint("run_id"),
48
+ )
49
+ op.create_index("ix_investigations_run_id", "investigations", ["run_id"])
50
+
51
+ # ------------------------------------------------------------------
52
+ # sources
53
+ # ------------------------------------------------------------------
54
+ op.create_table(
55
+ "sources",
56
+ sa.Column("id", sa.UUID(), nullable=False),
57
+ sa.Column("onion_address", sa.String(255), nullable=False),
58
+ sa.Column("first_seen", sa.DateTime(timezone=True), nullable=False),
59
+ sa.Column("last_seen", sa.DateTime(timezone=True), nullable=False),
60
+ sa.Column("status", sa.String(20), nullable=False, server_default="unknown"),
61
+ sa.Column("source_type", sa.String(30), nullable=False, server_default="search_result"),
62
+ sa.PrimaryKeyConstraint("id"),
63
+ sa.UniqueConstraint("onion_address"),
64
+ )
65
+ op.create_index("ix_sources_onion_address", "sources", ["onion_address"])
66
+
67
+ # ------------------------------------------------------------------
68
+ # investigation_sources (many-to-many junction)
69
+ # ------------------------------------------------------------------
70
+ op.create_table(
71
+ "investigation_sources",
72
+ sa.Column(
73
+ "investigation_id",
74
+ sa.UUID(),
75
+ sa.ForeignKey("investigations.id", ondelete="CASCADE"),
76
+ nullable=False,
77
+ ),
78
+ sa.Column(
79
+ "source_id",
80
+ sa.UUID(),
81
+ sa.ForeignKey("sources.id", ondelete="CASCADE"),
82
+ nullable=False,
83
+ ),
84
+ sa.Column("added_at", sa.DateTime(timezone=True), nullable=False),
85
+ sa.PrimaryKeyConstraint("investigation_id", "source_id"),
86
+ )
87
+
88
+ # ------------------------------------------------------------------
89
+ # pages
90
+ # ------------------------------------------------------------------
91
+ op.create_table(
92
+ "pages",
93
+ sa.Column("id", sa.UUID(), nullable=False),
94
+ sa.Column(
95
+ "source_id",
96
+ sa.UUID(),
97
+ sa.ForeignKey("sources.id", ondelete="SET NULL"),
98
+ nullable=True,
99
+ ),
100
+ sa.Column("url", sa.Text(), nullable=False),
101
+ sa.Column("raw_content_hash", sa.String(64), nullable=True),
102
+ sa.Column("cleaned_text", sa.Text(), nullable=True),
103
+ sa.Column("scrape_timestamp", sa.DateTime(timezone=True), nullable=False),
104
+ sa.Column("language", sa.String(10), nullable=True),
105
+ sa.Column("byte_size", sa.Integer(), nullable=True),
106
+ sa.Column("created_at", sa.DateTime(timezone=True), nullable=False),
107
+ sa.PrimaryKeyConstraint("id"),
108
+ sa.UniqueConstraint("url"),
109
+ )
110
+ op.create_index("ix_pages_source_id", "pages", ["source_id"])
111
+ op.create_index("ix_pages_raw_content_hash", "pages", ["raw_content_hash"])
112
+
113
+ # ------------------------------------------------------------------
114
+ # entities
115
+ # ------------------------------------------------------------------
116
+ op.create_table(
117
+ "entities",
118
+ sa.Column("id", sa.UUID(), nullable=False),
119
+ sa.Column(
120
+ "page_id",
121
+ sa.UUID(),
122
+ sa.ForeignKey("pages.id", ondelete="CASCADE"),
123
+ nullable=False,
124
+ ),
125
+ sa.Column(
126
+ "investigation_id",
127
+ sa.UUID(),
128
+ sa.ForeignKey("investigations.id", ondelete="SET NULL"),
129
+ nullable=True,
130
+ ),
131
+ sa.Column("entity_type", sa.String(50), nullable=False),
132
+ sa.Column("value", sa.Text(), nullable=False),
133
+ sa.Column("confidence", sa.Float(), nullable=False, server_default="1.0"),
134
+ sa.Column("context", sa.Text(), nullable=True),
135
+ sa.Column("first_seen", sa.DateTime(timezone=True), nullable=False),
136
+ sa.Column("last_seen", sa.DateTime(timezone=True), nullable=False),
137
+ sa.Column("created_at", sa.DateTime(timezone=True), nullable=False),
138
+ sa.PrimaryKeyConstraint("id"),
139
+ )
140
+ op.create_index("ix_entities_page_id", "entities", ["page_id"])
141
+ op.create_index("ix_entities_investigation_id", "entities", ["investigation_id"])
142
+ op.create_index("ix_entities_entity_type", "entities", ["entity_type"])
143
+
144
+ # ------------------------------------------------------------------
145
+ # entity_relationships
146
+ # ------------------------------------------------------------------
147
+ op.create_table(
148
+ "entity_relationships",
149
+ sa.Column("id", sa.UUID(), nullable=False),
150
+ sa.Column(
151
+ "entity_a_id",
152
+ sa.UUID(),
153
+ sa.ForeignKey("entities.id", ondelete="CASCADE"),
154
+ nullable=False,
155
+ ),
156
+ sa.Column(
157
+ "entity_b_id",
158
+ sa.UUID(),
159
+ sa.ForeignKey("entities.id", ondelete="CASCADE"),
160
+ nullable=False,
161
+ ),
162
+ sa.Column("relationship_type", sa.String(50), nullable=False),
163
+ sa.Column(
164
+ "source_page_id",
165
+ sa.UUID(),
166
+ sa.ForeignKey("pages.id", ondelete="SET NULL"),
167
+ nullable=True,
168
+ ),
169
+ sa.Column("confidence", sa.Float(), nullable=False, server_default="1.0"),
170
+ sa.Column("first_seen", sa.DateTime(timezone=True), nullable=False),
171
+ sa.PrimaryKeyConstraint("id"),
172
+ )
173
+ op.create_index("ix_entity_relationships_entity_a_id", "entity_relationships", ["entity_a_id"])
174
+ op.create_index("ix_entity_relationships_entity_b_id", "entity_relationships", ["entity_b_id"])
175
+ op.create_index("ix_entity_relationships_relationship_type", "entity_relationships", ["relationship_type"])
176
+
177
+ # ------------------------------------------------------------------
178
+ # users
179
+ # ------------------------------------------------------------------
180
+ op.create_table(
181
+ "users",
182
+ sa.Column("id", sa.Integer(), nullable=False, autoincrement=True),
183
+ sa.Column("email", sa.String(255), nullable=False),
184
+ sa.Column("hashed_password", sa.String(), nullable=False),
185
+ sa.Column("is_active", sa.Boolean(), nullable=False, server_default="true"),
186
+ sa.Column("must_reset_password", sa.Boolean(), nullable=False, server_default="false"),
187
+ sa.Column("created_at", sa.DateTime(timezone=True), nullable=False),
188
+ sa.Column("last_login_at", sa.DateTime(timezone=True), nullable=True),
189
+ sa.PrimaryKeyConstraint("id"),
190
+ sa.UniqueConstraint("email"),
191
+ )
192
+ op.create_index("ix_users_email", "users", ["email"])
193
+
194
+ # ------------------------------------------------------------------
195
+ # monitor_alerts
196
+ # ------------------------------------------------------------------
197
+ op.create_table(
198
+ "monitor_alerts",
199
+ sa.Column("id", sa.Integer(), nullable=False, autoincrement=True),
200
+ sa.Column("monitor_name", sa.String(), nullable=False),
201
+ sa.Column("triggered_at", sa.DateTime(timezone=True), nullable=False),
202
+ sa.Column("change_type", sa.String(50), nullable=False),
203
+ sa.Column("summary", sa.Text(), nullable=False),
204
+ sa.Column("diff_data", sa.JSON(), nullable=True),
205
+ sa.Column("severity", sa.String(20), nullable=False, server_default="info"),
206
+ sa.Column("entity_count_delta", sa.Integer(), nullable=False, server_default="0"),
207
+ sa.Column("delivered", sa.Boolean(), nullable=False, server_default="false"),
208
+ sa.Column("delivery_channels", sa.JSON(), nullable=True),
209
+ sa.Column("acknowledged", sa.Boolean(), nullable=False, server_default="false"),
210
+ sa.Column("acknowledged_at", sa.DateTime(timezone=True), nullable=True),
211
+ sa.PrimaryKeyConstraint("id"),
212
+ )
213
+ op.create_index("ix_monitor_alerts_monitor_name", "monitor_alerts", ["monitor_name"])
214
+ op.create_index("ix_monitor_alerts_triggered_at", "monitor_alerts", ["triggered_at"])
215
+ op.create_index("ix_monitor_alerts_monitor_triggered", "monitor_alerts", ["monitor_name", "triggered_at"])
216
+
217
+ # ------------------------------------------------------------------
218
+ # investigation_entity_links
219
+ # ------------------------------------------------------------------
220
+ op.create_table(
221
+ "investigation_entity_links",
222
+ sa.Column("id", sa.UUID(), nullable=False),
223
+ sa.Column(
224
+ "entity_id",
225
+ sa.UUID(),
226
+ sa.ForeignKey("entities.id", ondelete="CASCADE"),
227
+ nullable=False,
228
+ ),
229
+ sa.Column(
230
+ "investigation_id",
231
+ sa.UUID(),
232
+ sa.ForeignKey("investigations.id", ondelete="CASCADE"),
233
+ nullable=False,
234
+ ),
235
+ sa.Column("linked_at", sa.DateTime(timezone=True), nullable=False),
236
+ sa.PrimaryKeyConstraint("id"),
237
+ sa.UniqueConstraint("entity_id", "investigation_id"),
238
+ )
239
+ op.create_index("ix_investigation_entity_links_entity_id", "investigation_entity_links", ["entity_id"])
240
+ op.create_index("ix_investigation_entity_links_investigation_id", "investigation_entity_links", ["investigation_id"])
241
+
242
+ # ------------------------------------------------------------------
243
+ # actor_style_profiles
244
+ # ------------------------------------------------------------------
245
+ op.create_table(
246
+ "actor_style_profiles",
247
+ sa.Column("id", sa.Integer(), nullable=False, autoincrement=True),
248
+ sa.Column("canonical_value", sa.String(), nullable=False),
249
+ sa.Column("entity_type", sa.String(), nullable=False),
250
+ sa.Column("style_vector", sa.JSON(), nullable=False),
251
+ sa.Column("sample_count", sa.Integer(), nullable=False, server_default="0"),
252
+ sa.Column("total_chars", sa.Integer(), nullable=False, server_default="0"),
253
+ sa.Column("last_updated", sa.DateTime(timezone=True), nullable=False),
254
+ sa.PrimaryKeyConstraint("id"),
255
+ sa.UniqueConstraint("canonical_value", "entity_type"),
256
+ )
257
+ op.create_index("ix_actor_style_profiles_canonical_value", "actor_style_profiles", ["canonical_value"])
258
+
259
+
260
+ def downgrade() -> None:
261
+ op.drop_table("actor_style_profiles")
262
+ op.drop_table("investigation_entity_links")
263
+ op.drop_table("monitor_alerts")
264
+ op.drop_table("users")
265
+ op.drop_table("entity_relationships")
266
+ op.drop_table("entities")
267
+ op.drop_table("pages")
268
+ op.drop_table("investigation_sources")
269
+ op.drop_table("sources")
270
+ op.drop_table("investigations")
@@ -0,0 +1,27 @@
1
+ """Add status column to investigations.
2
+
3
+ Revision ID: 0002
4
+ Revises: 0001
5
+ Create Date: 2026-04-14
6
+ """
7
+
8
+ from typing import Sequence, Union
9
+
10
+ import sqlalchemy as sa
11
+ from alembic import op
12
+
13
+ revision: str = "0003_add_investigation_status"
14
+ down_revision: Union[str, None] = "0002_add_missing_tables"
15
+ branch_labels: Union[str, Sequence[str], None] = None
16
+ depends_on: Union[str, Sequence[str], None] = None
17
+
18
+
19
+ def upgrade() -> None:
20
+ op.add_column(
21
+ "investigations",
22
+ sa.Column("status", sa.String(length=20), nullable=False, server_default="pending"),
23
+ )
24
+
25
+
26
+ def downgrade() -> None:
27
+ op.drop_column("investigations", "status")
@@ -0,0 +1,33 @@
1
+ """Add missing tables — users, monitor_alerts, investigation_entity_links, actor_style_profiles.
2
+
3
+ Revision ID: 0002
4
+ Revises: 0001
5
+ Create Date: 2026-04-20
6
+
7
+ Tables created
8
+ --------------
9
+ users
10
+ monitor_alerts
11
+ investigation_entity_links
12
+ actor_style_profiles
13
+ """
14
+
15
+ from typing import Sequence, Union
16
+
17
+ import sqlalchemy as sa
18
+ from alembic import op
19
+
20
+ revision: str = "0002_add_missing_tables"
21
+ down_revision: Union[str, None] = "0001_initial_schema"
22
+ branch_labels: Union[str, Sequence[str], None] = None
23
+ depends_on: Union[str, Sequence[str], None] = None
24
+
25
+
26
+ def upgrade() -> None:
27
+ # Tables handled in 0001_initial_schema:
28
+ # users, monitor_alerts, investigation_entity_links, actor_style_profiles
29
+ pass
30
+
31
+
32
+ def downgrade() -> None:
33
+ pass
@@ -0,0 +1,61 @@
1
+ """add canonical value and entity links
2
+
3
+ Revision ID: 0003
4
+ Revises: 0002
5
+ Create Date: 2026-04-16
6
+
7
+ """
8
+ from typing import Sequence, Union
9
+ import sqlalchemy as sa
10
+ from alembic import op
11
+
12
+ revision: str = "0004_add_canonical_val_links"
13
+ down_revision: Union[str, None] = "0003_add_investigation_status"
14
+ branch_labels: Union[str, Sequence[str], None] = None
15
+ depends_on: Union[str, Sequence[str], None] = None
16
+
17
+
18
+ def upgrade() -> None:
19
+ conn = op.get_bind()
20
+ inspector = sa.inspect(conn)
21
+ existing_columns = [c['name'] for c in inspector.get_columns('entities')]
22
+ existing_inv_columns = [c['name'] for c in inspector.get_columns('investigations')]
23
+
24
+ # 1. Add columns to entities if they don't exist
25
+ if 'canonical_value' not in existing_columns:
26
+ op.add_column('entities', sa.Column('canonical_value', sa.String(), nullable=True))
27
+ if 'historical_context' not in existing_columns:
28
+ op.add_column('entities', sa.Column('historical_context', sa.Text(), nullable=True))
29
+ if 'context' in existing_columns and 'context_snippet' not in existing_columns:
30
+ # Rename context to context_snippet
31
+ op.alter_column('entities', 'context', new_column_name='context_snippet')
32
+
33
+ # 2. Add is_seed to investigations if it doesn't exist
34
+ if 'is_seed' not in existing_inv_columns:
35
+ op.add_column('investigations', sa.Column('is_seed', sa.Boolean(), server_default='false', nullable=False))
36
+
37
+ # 3. Tables handled in 0001_initial_schema:
38
+ # investigation_entity_links
39
+ pass
40
+
41
+ # 4. Create indexes (ensure we don't duplicate them)
42
+ existing_indexes = [i['name'] for i in inspector.get_indexes('entities')]
43
+ if 'ix_entities_canonical_value' not in existing_indexes:
44
+ op.create_index('ix_entities_canonical_value', 'entities', ['canonical_value'])
45
+ if 'ix_entity_canonical' not in existing_indexes:
46
+ op.create_index('ix_entity_canonical', 'entities', ['entity_type', 'canonical_value'])
47
+
48
+ # 5. Backfill canonical_value with size limits to avoid B-tree index row size errors
49
+ op.execute("UPDATE entities SET canonical_value = substring(lower(regexp_replace(value, '[\\s\\-_\\.]', '', 'g')), 1, 1024) WHERE entity_type IN ('THREAT_ACTOR', 'MALWARE', 'FORUM', 'THREAT_ACTOR_HANDLE', 'MALWARE_FAMILY', 'RANSOMWARE_GROUP', 'handle', 'malware', 'ransomware_group');")
50
+ op.execute("UPDATE entities SET canonical_value = substring(lower(value), 1, 1024) WHERE entity_type IN ('EMAIL', 'ONION_URL', 'EMAIL_ADDRESS', 'email', 'onion_url');")
51
+ op.execute("UPDATE entities SET canonical_value = substring(value, 1, 1024) WHERE canonical_value IS NULL;")
52
+
53
+
54
+ def downgrade() -> None:
55
+ op.drop_index('ix_entity_canonical', table_name='entities')
56
+ op.drop_index('ix_entities_canonical_value', table_name='entities')
57
+ op.drop_table('investigation_entity_links')
58
+ op.drop_column('investigations', 'is_seed')
59
+ op.alter_column('entities', 'context_snippet', new_column_name='context')
60
+ op.drop_column('entities', 'historical_context')
61
+ op.drop_column('entities', 'canonical_value')
@@ -0,0 +1,41 @@
1
+ """Add posted_at column to pages table
2
+
3
+ Revision ID: 0004
4
+ Revises: 0003
5
+ Create Date: 2026-04-16
6
+
7
+ """
8
+
9
+ from typing import Sequence, Union
10
+
11
+ import sqlalchemy as sa
12
+ from alembic import op
13
+
14
+ revision: str = "0005_add_page_posted_at"
15
+ down_revision: Union[str, None] = "0004_add_canonical_val_links"
16
+ branch_labels: Union[str, Sequence[str], None] = None
17
+ depends_on: Union[str, Sequence[str], None] = None
18
+
19
+
20
+ def upgrade() -> None:
21
+ conn = op.get_bind()
22
+ inspector = sa.inspect(conn)
23
+ existing = [c["name"] for c in inspector.get_columns("pages")]
24
+ if "posted_at" not in existing:
25
+ op.add_column(
26
+ "pages",
27
+ sa.Column(
28
+ "posted_at",
29
+ sa.DateTime(timezone=True),
30
+ nullable=True,
31
+ ),
32
+ )
33
+ inspector = sa.inspect(conn)
34
+ existing_indexes = [i["name"] for i in inspector.get_indexes("pages")]
35
+ if "ix_pages_posted_at" not in existing_indexes:
36
+ op.create_index("ix_pages_posted_at", "pages", ["posted_at"])
37
+
38
+
39
+ def downgrade() -> None:
40
+ op.drop_index("ix_pages_posted_at", table_name="pages")
41
+ op.drop_column("pages", "posted_at")
@@ -0,0 +1,32 @@
1
+ """Add extraction_method to entities
2
+
3
+ Revision ID: 0005
4
+ Revises: 0004
5
+ Create Date: 2026-04-16
6
+
7
+ """
8
+
9
+ from typing import Sequence, Union
10
+
11
+ import sqlalchemy as sa
12
+ from alembic import op
13
+
14
+ revision: str = "0006_add_extraction_method"
15
+ down_revision: Union[str, None] = "0005_add_page_posted_at"
16
+ branch_labels: Union[str, Sequence[str], None] = None
17
+ depends_on: Union[str, Sequence[str], None] = None
18
+
19
+
20
+ def upgrade() -> None:
21
+ conn = op.get_bind()
22
+ inspector = sa.inspect(conn)
23
+ existing = [c["name"] for c in inspector.get_columns("entities")]
24
+ if "extraction_method" not in existing:
25
+ op.add_column(
26
+ "entities",
27
+ sa.Column("extraction_method", sa.String(length=10), nullable=True),
28
+ )
29
+
30
+
31
+ def downgrade() -> None:
32
+ op.drop_column("entities", "extraction_method")
@@ -0,0 +1,26 @@
1
+ """Add monitor_alerts table
2
+
3
+ Revision ID: 0006
4
+ Revises: 0005
5
+ Create Date: 2026-04-17
6
+
7
+ """
8
+
9
+ from typing import Sequence, Union
10
+
11
+ import sqlalchemy as sa
12
+ from alembic import op
13
+
14
+ revision: str = "0007_add_monitor_alerts"
15
+ down_revision: Union[str, None] = "0006_add_extraction_method"
16
+ branch_labels: Union[str, Sequence[str], None] = None
17
+ depends_on: Union[str, Sequence[str], None] = None
18
+
19
+
20
+ def upgrade() -> None:
21
+ # Table 'monitor_alerts' already created in 0001_initial_schema
22
+ pass
23
+
24
+
25
+ def downgrade() -> None:
26
+ pass
@@ -0,0 +1,23 @@
1
+ """Add actor_style_profiles table
2
+
3
+ Revision ID: 0007
4
+ Revises: 0006
5
+ Create Date: 2026-04-17 19:55:00.000000
6
+
7
+ """
8
+ from alembic import op
9
+ import sqlalchemy as sa
10
+ from datetime import datetime, timezone
11
+
12
+ # revision identifiers, used by Alembic.
13
+ revision = '0008_add_actor_style_profiles'
14
+ down_revision = '0007_add_monitor_alerts'
15
+ branch_labels = None
16
+ depends_on = None
17
+
18
+ def upgrade():
19
+ # Table actor_style_profiles already created in 0001_initial_schema
20
+ pass
21
+
22
+ def downgrade():
23
+ pass
@@ -0,0 +1,47 @@
1
+ """Add users table and seed default admin
2
+
3
+ Revision ID: 0008
4
+ Revises: 0007
5
+ Create Date: 2026-04-17 22:45:00.000000
6
+
7
+ """
8
+ from alembic import op
9
+ import sqlalchemy as sa
10
+ from passlib.context import CryptContext
11
+ from datetime import datetime, timezone
12
+
13
+ # revision identifiers, used by Alembic.
14
+ revision = '0009_add_users_table'
15
+ down_revision = '0008_add_actor_style_profiles'
16
+ branch_labels = None
17
+ depends_on = None
18
+
19
+ pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
20
+
21
+ def upgrade():
22
+ # Table 'users' already created in 0001_initial_schema
23
+ # Seed default admin account with a secure placeholder
24
+ # Password must be set during setup or changed on first login
25
+ import secrets
26
+ placeholder_password = secrets.token_urlsafe(32)
27
+ hashed_pwd = pwd_context.hash(placeholder_password)
28
+ now = datetime.now(timezone.utc).isoformat()
29
+
30
+ op.execute(
31
+ f"""
32
+ INSERT INTO users (email, hashed_password, is_active, must_reset_password, created_at)
33
+ VALUES (
34
+ 'admin@voidaccess.tech',
35
+ '{hashed_pwd}',
36
+ true,
37
+ true,
38
+ '{now}'
39
+ )
40
+ """
41
+ )
42
+
43
+
44
+ def downgrade():
45
+ # We don't drop the table here as it belongs to 0001_initial_schema
46
+ # Optional: Delete the seeded admin
47
+ op.execute("DELETE FROM users WHERE email = 'admin@voidaccess.tech'")
@@ -0,0 +1,29 @@
1
+ """Add investigation_id to entity_relationships"""
2
+
3
+ from alembic import op
4
+ import sqlalchemy as sa
5
+
6
+
7
+ revision = "0010_add_investigation_id_rel"
8
+ down_revision = "0009_add_users_table"
9
+ branch_labels = None
10
+ depends_on = None
11
+
12
+
13
+ def upgrade():
14
+ # Calling add_column with index=True already creates the index
15
+ # 'ix_entity_relationships_investigation_id'
16
+ op.add_column(
17
+ "entity_relationships",
18
+ sa.Column(
19
+ "investigation_id",
20
+ sa.UUID(as_uuid=True),
21
+ sa.ForeignKey("investigations.id", ondelete="SET NULL"),
22
+ nullable=True,
23
+ index=True,
24
+ ),
25
+ )
26
+
27
+
28
+ def downgrade():
29
+ op.drop_column("entity_relationships", "investigation_id")