voidaccess 1.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. analysis/__init__.py +49 -0
  2. analysis/opsec.py +454 -0
  3. analysis/patterns.py +202 -0
  4. analysis/temporal.py +201 -0
  5. api/__init__.py +1 -0
  6. api/auth.py +163 -0
  7. api/main.py +509 -0
  8. api/routes/__init__.py +1 -0
  9. api/routes/admin.py +214 -0
  10. api/routes/auth.py +157 -0
  11. api/routes/entities.py +871 -0
  12. api/routes/export.py +359 -0
  13. api/routes/investigations.py +2567 -0
  14. api/routes/monitors.py +405 -0
  15. api/routes/search.py +157 -0
  16. api/routes/settings.py +851 -0
  17. auth/__init__.py +1 -0
  18. auth/token_blacklist.py +108 -0
  19. cli/__init__.py +3 -0
  20. cli/adapters/__init__.py +1 -0
  21. cli/adapters/sqlite.py +273 -0
  22. cli/browser.py +376 -0
  23. cli/commands/__init__.py +1 -0
  24. cli/commands/configure.py +185 -0
  25. cli/commands/enrich.py +154 -0
  26. cli/commands/export.py +158 -0
  27. cli/commands/investigate.py +601 -0
  28. cli/commands/show.py +87 -0
  29. cli/config.py +180 -0
  30. cli/display.py +212 -0
  31. cli/main.py +154 -0
  32. cli/tor_detect.py +71 -0
  33. config.py +180 -0
  34. crawler/__init__.py +28 -0
  35. crawler/dedup.py +97 -0
  36. crawler/frontier.py +115 -0
  37. crawler/spider.py +462 -0
  38. crawler/utils.py +122 -0
  39. db/__init__.py +47 -0
  40. db/migrations/__init__.py +0 -0
  41. db/migrations/env.py +80 -0
  42. db/migrations/versions/0001_initial_schema.py +270 -0
  43. db/migrations/versions/0002_add_investigation_status_column.py +27 -0
  44. db/migrations/versions/0002_add_missing_tables.py +33 -0
  45. db/migrations/versions/0003_add_canonical_value_and_entity_links.py +61 -0
  46. db/migrations/versions/0004_add_page_posted_at.py +41 -0
  47. db/migrations/versions/0005_add_extraction_method.py +32 -0
  48. db/migrations/versions/0006_add_monitor_alerts.py +26 -0
  49. db/migrations/versions/0007_add_actor_style_profiles.py +23 -0
  50. db/migrations/versions/0008_add_users_table.py +47 -0
  51. db/migrations/versions/0009_add_investigation_id_to_relationships.py +29 -0
  52. db/migrations/versions/0010_add_composite_index_entity_relationships.py +22 -0
  53. db/migrations/versions/0011_add_page_extraction_cache.py +52 -0
  54. db/migrations/versions/0013_add_graph_status.py +31 -0
  55. db/migrations/versions/0015_add_progress_fields.py +41 -0
  56. db/migrations/versions/0016_backfill_graph_status.py +33 -0
  57. db/migrations/versions/0017_add_user_api_keys.py +44 -0
  58. db/migrations/versions/0018_add_user_id_to_investigations.py +33 -0
  59. db/migrations/versions/0019_add_content_safety_log.py +46 -0
  60. db/migrations/versions/0020_add_entity_source_tracking.py +50 -0
  61. db/models.py +618 -0
  62. db/queries.py +841 -0
  63. db/session.py +270 -0
  64. export/__init__.py +34 -0
  65. export/misp.py +257 -0
  66. export/sigma.py +342 -0
  67. export/stix.py +418 -0
  68. extractor/__init__.py +21 -0
  69. extractor/llm_extract.py +372 -0
  70. extractor/ner.py +512 -0
  71. extractor/normalizer.py +638 -0
  72. extractor/pipeline.py +401 -0
  73. extractor/regex_patterns.py +325 -0
  74. fingerprint/__init__.py +33 -0
  75. fingerprint/profiler.py +240 -0
  76. fingerprint/stylometry.py +249 -0
  77. graph/__init__.py +73 -0
  78. graph/builder.py +894 -0
  79. graph/export.py +225 -0
  80. graph/model.py +83 -0
  81. graph/queries.py +297 -0
  82. graph/visualize.py +178 -0
  83. i18n/__init__.py +24 -0
  84. i18n/detect.py +76 -0
  85. i18n/query_expand.py +72 -0
  86. i18n/translate.py +210 -0
  87. monitor/__init__.py +27 -0
  88. monitor/_db.py +74 -0
  89. monitor/alerts.py +345 -0
  90. monitor/config.py +118 -0
  91. monitor/diff.py +75 -0
  92. monitor/jobs.py +247 -0
  93. monitor/scheduler.py +184 -0
  94. scraper/__init__.py +0 -0
  95. scraper/scrape.py +857 -0
  96. scraper/scrape_js.py +272 -0
  97. search/__init__.py +318 -0
  98. search/circuit_breaker.py +240 -0
  99. search/search.py +334 -0
  100. sources/__init__.py +96 -0
  101. sources/blockchain.py +444 -0
  102. sources/cache.py +93 -0
  103. sources/cisa.py +108 -0
  104. sources/dns_enrichment.py +557 -0
  105. sources/domain_reputation.py +643 -0
  106. sources/email_reputation.py +635 -0
  107. sources/engines.py +244 -0
  108. sources/enrichment.py +1244 -0
  109. sources/github_scraper.py +589 -0
  110. sources/gitlab_scraper.py +624 -0
  111. sources/hash_reputation.py +856 -0
  112. sources/historical_intel.py +253 -0
  113. sources/ip_reputation.py +521 -0
  114. sources/paste_scraper.py +484 -0
  115. sources/pastes.py +278 -0
  116. sources/rss_scraper.py +576 -0
  117. sources/seed_manager.py +373 -0
  118. sources/seeds.py +368 -0
  119. sources/shodan.py +103 -0
  120. sources/telegram.py +199 -0
  121. sources/virustotal.py +113 -0
  122. utils/__init__.py +0 -0
  123. utils/async_utils.py +89 -0
  124. utils/content_safety.py +193 -0
  125. utils/defang.py +94 -0
  126. utils/encryption.py +34 -0
  127. utils/ioc_freshness.py +124 -0
  128. utils/user_keys.py +33 -0
  129. vector/__init__.py +39 -0
  130. vector/embedder.py +100 -0
  131. vector/model_singleton.py +49 -0
  132. vector/search.py +87 -0
  133. vector/store.py +514 -0
  134. voidaccess/__init__.py +0 -0
  135. voidaccess/llm.py +717 -0
  136. voidaccess/llm_utils.py +696 -0
  137. voidaccess-1.3.0.dist-info/METADATA +395 -0
  138. voidaccess-1.3.0.dist-info/RECORD +142 -0
  139. voidaccess-1.3.0.dist-info/WHEEL +5 -0
  140. voidaccess-1.3.0.dist-info/entry_points.txt +2 -0
  141. voidaccess-1.3.0.dist-info/licenses/LICENSE +21 -0
  142. voidaccess-1.3.0.dist-info/top_level.txt +19 -0
@@ -0,0 +1,22 @@
1
+ """Add composite index on (entity_a_id, entity_b_id) for neighbor lookups"""
2
+
3
+ from alembic import op
4
+ import sqlalchemy as sa
5
+
6
+
7
+ revision = "0011_add_composite_index"
8
+ down_revision = "0010_add_investigation_id_rel"
9
+ branch_labels = None
10
+ depends_on = None
11
+
12
+
13
+ def upgrade():
14
+ op.create_index(
15
+ "ix_entity_relationships_source_target",
16
+ "entity_relationships",
17
+ ["entity_a_id", "entity_b_id"],
18
+ )
19
+
20
+
21
+ def downgrade():
22
+ op.drop_index("ix_entity_relationships_source_target", "entity_relationships")
@@ -0,0 +1,52 @@
1
+ """Add page_extraction_cache table for LLM extraction caching.
2
+
3
+ Revision ID: 0011_add_page_extraction_cache
4
+ Revises: 0010_add_composite_index_entity_relationships
5
+ Create Date: 2026-04-21
6
+ """
7
+
8
+ from alembic import op
9
+ import sqlalchemy as sa
10
+
11
+
12
+ revision = "0012_add_page_extract_cache"
13
+ down_revision = "0011_add_composite_index"
14
+ branch_labels = None
15
+ depends_on = None
16
+
17
+
18
+ def upgrade() -> None:
19
+ op.create_table(
20
+ "page_extraction_cache",
21
+ sa.Column(
22
+ "page_hash",
23
+ sa.String(64),
24
+ primary_key=True,
25
+ ),
26
+ sa.Column(
27
+ "entities_json",
28
+ sa.Text,
29
+ nullable=False,
30
+ ),
31
+ sa.Column(
32
+ "extracted_at",
33
+ sa.DateTime(timezone=True),
34
+ nullable=False,
35
+ server_default=sa.func.now(),
36
+ ),
37
+ sa.Column(
38
+ "expires_at",
39
+ sa.DateTime(timezone=True),
40
+ nullable=False,
41
+ ),
42
+ )
43
+ op.create_index(
44
+ "ix_page_extraction_cache_expires",
45
+ "page_extraction_cache",
46
+ ["expires_at"],
47
+ )
48
+
49
+
50
+ def downgrade() -> None:
51
+ op.drop_index("ix_page_extraction_cache_expires", table_name="page_extraction_cache")
52
+ op.drop_table("page_extraction_cache")
@@ -0,0 +1,31 @@
1
+ """Add graph_status column to investigations table.
2
+
3
+ Revision ID: 0013_add_graph_status
4
+ Revises: 0012_add_page_extract_cache
5
+ Create Date: 2026-04-23
6
+ """
7
+
8
+ from alembic import op
9
+ import sqlalchemy as sa
10
+
11
+
12
+ revision = "0013_add_graph_status"
13
+ down_revision = "0012_add_page_extract_cache"
14
+ branch_labels = None
15
+ depends_on = None
16
+
17
+
18
+ def upgrade() -> None:
19
+ op.add_column(
20
+ "investigations",
21
+ sa.Column(
22
+ "graph_status",
23
+ sa.String(20),
24
+ nullable=False,
25
+ server_default="pending",
26
+ ),
27
+ )
28
+
29
+
30
+ def downgrade() -> None:
31
+ op.drop_column("investigations", "graph_status")
@@ -0,0 +1,41 @@
1
+ """Add pipeline progress tracking fields to investigations table.
2
+
3
+ Revision ID: 0015_add_progress_fields
4
+ Revises: 0013_add_graph_status
5
+ Create Date: 2026-04-23
6
+ """
7
+
8
+ from alembic import op
9
+ import sqlalchemy as sa
10
+
11
+
12
+ revision = "0015_add_progress_fields"
13
+ down_revision = "0013_add_graph_status"
14
+ branch_labels = None
15
+ depends_on = None
16
+
17
+
18
+ def upgrade() -> None:
19
+ op.add_column(
20
+ "investigations",
21
+ sa.Column("current_step", sa.Integer, nullable=False, server_default="0"),
22
+ )
23
+ op.add_column(
24
+ "investigations",
25
+ sa.Column("current_step_label", sa.String(200), nullable=False, server_default=""),
26
+ )
27
+ op.add_column(
28
+ "investigations",
29
+ sa.Column("entity_count", sa.Integer, nullable=False, server_default="0"),
30
+ )
31
+ op.add_column(
32
+ "investigations",
33
+ sa.Column("page_count", sa.Integer, nullable=False, server_default="0"),
34
+ )
35
+
36
+
37
+ def downgrade() -> None:
38
+ op.drop_column("investigations", "page_count")
39
+ op.drop_column("investigations", "entity_count")
40
+ op.drop_column("investigations", "current_step_label")
41
+ op.drop_column("investigations", "current_step")
@@ -0,0 +1,33 @@
1
+ """Backfill graph_status for historical investigations.
2
+
3
+ Revision ID: 0016_backfill_graph_status
4
+ Revises: 0015_add_progress_fields
5
+ Create Date: 2026-04-24
6
+ """
7
+
8
+ from alembic import op
9
+
10
+
11
+ revision = "0016_backfill_graph_status"
12
+ down_revision = "0015_add_progress_fields"
13
+ branch_labels = None
14
+ depends_on = None
15
+
16
+
17
+ def upgrade() -> None:
18
+ op.execute("""
19
+ UPDATE investigations
20
+ SET graph_status = 'built'
21
+ WHERE status = 'completed'
22
+ AND graph_status = 'pending'
23
+ """)
24
+ op.execute("""
25
+ UPDATE investigations
26
+ SET graph_status = 'no_data'
27
+ WHERE status IN ('completed_no_results', 'failed')
28
+ AND graph_status = 'pending'
29
+ """)
30
+
31
+
32
+ def downgrade() -> None:
33
+ pass # data migration, no safe rollback
@@ -0,0 +1,44 @@
1
+ """Add user_api_keys table.
2
+
3
+ Revision ID: 0017_add_user_api_keys
4
+ Revises: 0016_backfill_graph_status
5
+ Create Date: 2026-04-25
6
+ """
7
+
8
+ import sqlalchemy as sa
9
+ from alembic import op
10
+
11
+
12
+ revision = "0017_add_user_api_keys"
13
+ down_revision = "0016_backfill_graph_status"
14
+ branch_labels = None
15
+ depends_on = None
16
+
17
+
18
+ def upgrade() -> None:
19
+ op.create_table(
20
+ "user_api_keys",
21
+ sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
22
+ sa.Column("user_id", sa.Integer(), nullable=False),
23
+ sa.Column("key_name", sa.String(length=64), nullable=False),
24
+ sa.Column("encrypted_value", sa.Text(), nullable=False),
25
+ sa.Column(
26
+ "created_at",
27
+ sa.DateTime(timezone=True),
28
+ server_default=sa.text("CURRENT_TIMESTAMP"),
29
+ nullable=False,
30
+ ),
31
+ sa.Column(
32
+ "updated_at",
33
+ sa.DateTime(timezone=True),
34
+ server_default=sa.text("CURRENT_TIMESTAMP"),
35
+ nullable=False,
36
+ ),
37
+ sa.ForeignKeyConstraint(["user_id"], ["users.id"], ondelete="CASCADE"),
38
+ sa.PrimaryKeyConstraint("id"),
39
+ sa.UniqueConstraint("user_id", "key_name"),
40
+ )
41
+
42
+
43
+ def downgrade() -> None:
44
+ op.drop_table("user_api_keys")
@@ -0,0 +1,33 @@
1
+ """Add user_id to investigations.
2
+
3
+ Revision ID: 0018_add_user_id_to_investigations
4
+ Revises: 0017_add_user_api_keys
5
+ Create Date: 2026-04-25
6
+ """
7
+
8
+ import sqlalchemy as sa
9
+ from alembic import op
10
+
11
+
12
+ revision = "0018_user_id_investigations"
13
+ down_revision = "0017_add_user_api_keys"
14
+ branch_labels = None
15
+ depends_on = None
16
+
17
+
18
+ def upgrade() -> None:
19
+ op.add_column(
20
+ "investigations",
21
+ sa.Column(
22
+ "user_id",
23
+ sa.Integer(),
24
+ sa.ForeignKey("users.id", ondelete="SET NULL"),
25
+ nullable=True,
26
+ ),
27
+ )
28
+ op.create_index("ix_investigations_user_id", "investigations", ["user_id"])
29
+
30
+
31
+ def downgrade() -> None:
32
+ op.drop_index("ix_investigations_user_id", table_name="investigations")
33
+ op.drop_column("investigations", "user_id")
@@ -0,0 +1,46 @@
1
+ """Add content_safety_events table.
2
+
3
+ Revision ID: 0019_add_content_safety_log
4
+ Revises: 0018_user_id_investigations
5
+ Create Date: 2026-04-30
6
+ """
7
+
8
+ import sqlalchemy as sa
9
+ from alembic import op
10
+
11
+ revision = "0019_add_content_safety_log"
12
+ down_revision = "0018_user_id_investigations"
13
+ branch_labels = None
14
+ depends_on = None
15
+
16
+
17
+ def upgrade() -> None:
18
+ op.create_table(
19
+ "content_safety_events",
20
+ sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True),
21
+ sa.Column("event_type", sa.String(50), nullable=False),
22
+ sa.Column("user_id", sa.Integer(), nullable=True),
23
+ sa.Column("content_hash", sa.String(64), nullable=True),
24
+ sa.Column(
25
+ "timestamp",
26
+ sa.DateTime(timezone=True),
27
+ server_default=sa.func.now(),
28
+ nullable=False,
29
+ ),
30
+ )
31
+ op.create_index(
32
+ "ix_content_safety_events_event_type",
33
+ "content_safety_events",
34
+ ["event_type"],
35
+ )
36
+ op.create_index(
37
+ "ix_content_safety_events_timestamp",
38
+ "content_safety_events",
39
+ ["timestamp"],
40
+ )
41
+
42
+
43
+ def downgrade() -> None:
44
+ op.drop_index("ix_content_safety_events_timestamp", table_name="content_safety_events")
45
+ op.drop_index("ix_content_safety_events_event_type", table_name="content_safety_events")
46
+ op.drop_table("content_safety_events")
@@ -0,0 +1,50 @@
1
+ """Add entity source tracking and freshness fields.
2
+
3
+ Revision ID: 0020_add_entity_source_tracking
4
+ Revises: 0019_add_content_safety_log
5
+ Create Date: 2025-04-30
6
+ """
7
+ from alembic import op
8
+ import sqlalchemy as sa
9
+
10
+
11
+ revision = "0020_add_entity_source_tracking"
12
+ down_revision = "0019_add_content_safety_log"
13
+ branch_labels = None
14
+ depends_on = None
15
+
16
+
17
+ def upgrade() -> None:
18
+ op.add_column(
19
+ "entities",
20
+ sa.Column("source_count", sa.Integer(), server_default="1", nullable=False),
21
+ )
22
+ op.add_column(
23
+ "entities",
24
+ sa.Column("corroborating_sources", sa.Text(), nullable=True),
25
+ )
26
+ op.add_column(
27
+ "entities",
28
+ sa.Column(
29
+ "first_seen_at",
30
+ sa.DateTime(timezone=True),
31
+ server_default=sa.func.now(),
32
+ nullable=True,
33
+ ),
34
+ )
35
+ op.add_column(
36
+ "entities",
37
+ sa.Column(
38
+ "last_seen_at",
39
+ sa.DateTime(timezone=True),
40
+ server_default=sa.func.now(),
41
+ nullable=True,
42
+ ),
43
+ )
44
+
45
+
46
+ def downgrade() -> None:
47
+ op.drop_column("entities", "last_seen_at")
48
+ op.drop_column("entities", "first_seen_at")
49
+ op.drop_column("entities", "corroborating_sources")
50
+ op.drop_column("entities", "source_count")