PyPI - voidaccess - Versions diffs - 1.3.0__py3-none-any.whl - Mend

voidaccess 1.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (142) hide show

analysis/__init__.py +49 -0
analysis/opsec.py +454 -0
analysis/patterns.py +202 -0
analysis/temporal.py +201 -0
api/__init__.py +1 -0
api/auth.py +163 -0
api/main.py +509 -0
api/routes/__init__.py +1 -0
api/routes/admin.py +214 -0
api/routes/auth.py +157 -0
api/routes/entities.py +871 -0
api/routes/export.py +359 -0
api/routes/investigations.py +2567 -0
api/routes/monitors.py +405 -0
api/routes/search.py +157 -0
api/routes/settings.py +851 -0
auth/__init__.py +1 -0
auth/token_blacklist.py +108 -0
cli/__init__.py +3 -0
cli/adapters/__init__.py +1 -0
cli/adapters/sqlite.py +273 -0
cli/browser.py +376 -0
cli/commands/__init__.py +1 -0
cli/commands/configure.py +185 -0
cli/commands/enrich.py +154 -0
cli/commands/export.py +158 -0
cli/commands/investigate.py +601 -0
cli/commands/show.py +87 -0
cli/config.py +180 -0
cli/display.py +212 -0
cli/main.py +154 -0
cli/tor_detect.py +71 -0
config.py +180 -0
crawler/__init__.py +28 -0
crawler/dedup.py +97 -0
crawler/frontier.py +115 -0
crawler/spider.py +462 -0
crawler/utils.py +122 -0
db/__init__.py +47 -0
db/migrations/__init__.py +0 -0
db/migrations/env.py +80 -0
db/migrations/versions/0001_initial_schema.py +270 -0
db/migrations/versions/0002_add_investigation_status_column.py +27 -0
db/migrations/versions/0002_add_missing_tables.py +33 -0
db/migrations/versions/0003_add_canonical_value_and_entity_links.py +61 -0
db/migrations/versions/0004_add_page_posted_at.py +41 -0
db/migrations/versions/0005_add_extraction_method.py +32 -0
db/migrations/versions/0006_add_monitor_alerts.py +26 -0
db/migrations/versions/0007_add_actor_style_profiles.py +23 -0
db/migrations/versions/0008_add_users_table.py +47 -0
db/migrations/versions/0009_add_investigation_id_to_relationships.py +29 -0
db/migrations/versions/0010_add_composite_index_entity_relationships.py +22 -0
db/migrations/versions/0011_add_page_extraction_cache.py +52 -0
db/migrations/versions/0013_add_graph_status.py +31 -0
db/migrations/versions/0015_add_progress_fields.py +41 -0
db/migrations/versions/0016_backfill_graph_status.py +33 -0
db/migrations/versions/0017_add_user_api_keys.py +44 -0
db/migrations/versions/0018_add_user_id_to_investigations.py +33 -0
db/migrations/versions/0019_add_content_safety_log.py +46 -0
db/migrations/versions/0020_add_entity_source_tracking.py +50 -0
db/models.py +618 -0
db/queries.py +841 -0
db/session.py +270 -0
export/__init__.py +34 -0
export/misp.py +257 -0
export/sigma.py +342 -0
export/stix.py +418 -0
extractor/__init__.py +21 -0
extractor/llm_extract.py +372 -0
extractor/ner.py +512 -0
extractor/normalizer.py +638 -0
extractor/pipeline.py +401 -0
extractor/regex_patterns.py +325 -0
fingerprint/__init__.py +33 -0
fingerprint/profiler.py +240 -0
fingerprint/stylometry.py +249 -0
graph/__init__.py +73 -0
graph/builder.py +894 -0
graph/export.py +225 -0
graph/model.py +83 -0
graph/queries.py +297 -0
graph/visualize.py +178 -0
i18n/__init__.py +24 -0
i18n/detect.py +76 -0
i18n/query_expand.py +72 -0
i18n/translate.py +210 -0
monitor/__init__.py +27 -0
monitor/_db.py +74 -0
monitor/alerts.py +345 -0
monitor/config.py +118 -0
monitor/diff.py +75 -0
monitor/jobs.py +247 -0
monitor/scheduler.py +184 -0
scraper/__init__.py +0 -0
scraper/scrape.py +857 -0
scraper/scrape_js.py +272 -0
search/__init__.py +318 -0
search/circuit_breaker.py +240 -0
search/search.py +334 -0
sources/__init__.py +96 -0
sources/blockchain.py +444 -0
sources/cache.py +93 -0
sources/cisa.py +108 -0
sources/dns_enrichment.py +557 -0
sources/domain_reputation.py +643 -0
sources/email_reputation.py +635 -0
sources/engines.py +244 -0
sources/enrichment.py +1244 -0
sources/github_scraper.py +589 -0
sources/gitlab_scraper.py +624 -0
sources/hash_reputation.py +856 -0
sources/historical_intel.py +253 -0
sources/ip_reputation.py +521 -0
sources/paste_scraper.py +484 -0
sources/pastes.py +278 -0
sources/rss_scraper.py +576 -0
sources/seed_manager.py +373 -0
sources/seeds.py +368 -0
sources/shodan.py +103 -0
sources/telegram.py +199 -0
sources/virustotal.py +113 -0
utils/__init__.py +0 -0
utils/async_utils.py +89 -0
utils/content_safety.py +193 -0
utils/defang.py +94 -0
utils/encryption.py +34 -0
utils/ioc_freshness.py +124 -0
utils/user_keys.py +33 -0
vector/__init__.py +39 -0
vector/embedder.py +100 -0
vector/model_singleton.py +49 -0
vector/search.py +87 -0
vector/store.py +514 -0
voidaccess/__init__.py +0 -0
voidaccess/llm.py +717 -0
voidaccess/llm_utils.py +696 -0
voidaccess-1.3.0.dist-info/METADATA +395 -0
voidaccess-1.3.0.dist-info/RECORD +142 -0
voidaccess-1.3.0.dist-info/WHEEL +5 -0
voidaccess-1.3.0.dist-info/entry_points.txt +2 -0
voidaccess-1.3.0.dist-info/licenses/LICENSE +21 -0
voidaccess-1.3.0.dist-info/top_level.txt +19 -0

export/sigma.py ADDED Viewed

@@ -0,0 +1,342 @@
+"""
+export/sigma.py — Generates draft Sigma detection rules from investigation entities.
+Sigma rules are YAML-formatted SIEM-agnostic detection rules.
+LLM assistance is optional; if provided, enriches description, tags, and falsepositives.
+Public interface
+----------------
+entities_to_sigma_rules(entities, llm)     → list[dict]
+sigma_rule_to_yaml(rule)                   → str
+export_sigma_rules(investigation_id, output_dir, llm) → list[str]
+"""
+from __future__ import annotations
+import json
+import logging
+import os
+import uuid as _uuid_module
+from pathlib import Path
+from typing import Any, Optional
+import yaml
+logger = logging.getLogger(__name__)
+# ---------------------------------------------------------------------------
+# Entity types that produce Sigma rules
+# ---------------------------------------------------------------------------
+_SIGMA_ENTITY_TYPES = frozenset({"IP_ADDRESS", "ONION_URL", "CVE_NUMBER", "MALWARE_FAMILY", "RANSOMWARE_GROUP"})
+# ---------------------------------------------------------------------------
+# Base rule builders per entity type
+# ---------------------------------------------------------------------------
+def _base_rule_for_ip(entity: Any) -> dict:
+    return {
+        "title": f"Network connection to suspicious IP: {entity.value}",
+        "id": str(_uuid_module.uuid4()),
+        "status": "experimental",
+        "description": f"Detects outbound network connection to IP address {entity.value} "
+                       "associated with dark web activity.",
+        "references": [entity.source_url] if entity.source_url else [],
+        "tags": ["attack.initial_access"],
+        "logsource": {"category": "network", "product": "any"},
+        "detection": {
+            "selection": {"DestinationIp": entity.value},
+            "condition": "selection",
+        },
+        "falsepositives": ["Unknown"],
+        "level": "medium",
+    }
+def _base_rule_for_onion(entity: Any) -> dict:
+    return {
+        "title": f"DNS query or connection to .onion address: {entity.value[:60]}",
+        "id": str(_uuid_module.uuid4()),
+        "status": "experimental",
+        "description": f"Detects connection attempt to Tor hidden service {entity.value}.",
+        "references": [entity.source_url] if entity.source_url else [],
+        "tags": ["attack.command_and_control"],
+        "logsource": {"category": "network", "product": "any"},
+        "detection": {
+            "selection": {"DestinationHostname|contains": ".onion"},
+            "condition": "selection",
+        },
+        "falsepositives": ["Legitimate Tor browser usage"],
+        "level": "medium",
+    }
+def _base_rule_for_cve(entity: Any) -> dict:
+    return {
+        "title": f"Exploitation attempt for {entity.value}",
+        "id": str(_uuid_module.uuid4()),
+        "status": "experimental",
+        "description": f"Detects activity patterns related to exploitation of {entity.value} "
+                       "observed in dark web intelligence.",
+        "references": [entity.source_url] if entity.source_url else [],
+        "tags": ["attack.initial_access", "attack.exploitation"],
+        "logsource": {"category": "network", "product": "any"},
+        "detection": {
+            "selection": {"CommandLine|contains": entity.value},
+            "condition": "selection",
+        },
+        "falsepositives": ["Security scanners", "Penetration testing tools"],
+        "level": "high",
+    }
+def _base_rule_for_malware(entity: Any) -> dict:
+    name = entity.value
+    return {
+        "title": f"Malware family activity: {name}",
+        "id": str(_uuid_module.uuid4()),
+        "status": "experimental",
+        "description": f"Detects activity associated with {name} malware family "
+                       "as observed in dark web intelligence.",
+        "references": [entity.source_url] if entity.source_url else [],
+        "tags": ["attack.execution"],
+        "logsource": {"category": "process_creation", "product": "windows"},
+        "detection": {
+            "selection": {"CommandLine|contains": name},
+            "condition": "selection",
+        },
+        "falsepositives": ["Unknown"],
+        "level": "high",
+    }
+def _build_base_rule(entity: Any) -> Optional[dict]:
+    """Return a base Sigma rule dict for the entity, or None if unsupported type."""
+    etype = entity.entity_type
+    if etype == "IP_ADDRESS":
+        return _base_rule_for_ip(entity)
+    if etype == "ONION_URL":
+        return _base_rule_for_onion(entity)
+    if etype == "CVE_NUMBER":
+        return _base_rule_for_cve(entity)
+    if etype in ("MALWARE_FAMILY", "RANSOMWARE_GROUP"):
+        return _base_rule_for_malware(entity)
+    return None
+# ---------------------------------------------------------------------------
+# LLM enrichment
+# ---------------------------------------------------------------------------
+_LLM_PROMPT_TEMPLATE = """You are a threat intelligence analyst writing Sigma detection rules.
+Given the following base Sigma rule as JSON, enrich three fields:
+1. "description" — make it more precise and actionable
+2. "tags" — use MITRE ATT&CK tactic/technique tags (e.g. attack.t1071)
+3. "falsepositives" — list realistic false positive scenarios
+Return ONLY a JSON object with exactly these three keys: description, tags, falsepositives.
+Do not include any other text.
+Base rule:
+{base_rule_json}
+"""
+def _enrich_with_llm(rule: dict, llm: Any) -> dict:
+    """
+    Send base rule to LLM to enrich description, tags, and falsepositives.
+    Returns the original rule unchanged if LLM fails or returns invalid JSON.
+    """
+    try:
+        base_json = json.dumps(rule, indent=2)
+        prompt = _LLM_PROMPT_TEMPLATE.format(base_rule_json=base_json)
+        # Support both LangChain-style (invoke) and simple (predict/call) interfaces
+        if hasattr(llm, "invoke"):
+            response = llm.invoke(prompt)
+            # LangChain returns an AIMessage; get .content
+            content = getattr(response, "content", str(response))
+        elif callable(llm):
+            content = str(llm(prompt))
+        else:
+            return rule
+        # Strip markdown code fences if present
+        content = content.strip()
+        if content.startswith("```"):
+            lines = content.split("\n")
+            lines = [l for l in lines if not l.startswith("```")]
+            content = "\n".join(lines).strip()
+        enriched = json.loads(content)
+        if not isinstance(enriched, dict):
+            return rule
+        updated = dict(rule)
+        if "description" in enriched and isinstance(enriched["description"], str):
+            updated["description"] = enriched["description"]
+        if "tags" in enriched and isinstance(enriched["tags"], list):
+            updated["tags"] = enriched["tags"]
+        if "falsepositives" in enriched and isinstance(enriched["falsepositives"], list):
+            updated["falsepositives"] = enriched["falsepositives"]
+        return updated
+    except Exception as exc:
+        logger.warning("LLM enrichment failed for Sigma rule %r: %s", rule.get("id"), exc)
+        return rule
+# ---------------------------------------------------------------------------
+# Public interface
+# ---------------------------------------------------------------------------
+def entities_to_sigma_rules(
+    entities: list[Any],
+    llm: Optional[Any] = None,
+) -> list[dict]:
+    """
+    Generate Sigma rule dicts for relevant entities.
+    Entity types that produce rules: IP_ADDRESS, ONION_URL, CVE_NUMBER,
+    MALWARE_FAMILY, RANSOMWARE_GROUP.
+    If llm is provided, enriches description, tags, and falsepositives via LLM.
+    Falls back to base rule if LLM fails.
+    """
+    rules: list[dict] = []
+    for entity in entities:
+        if entity.entity_type not in _SIGMA_ENTITY_TYPES:
+            continue
+        base = _build_base_rule(entity)
+        if base is None:
+            continue
+        if llm is not None:
+            base = _enrich_with_llm(base, llm)
+        rules.append(base)
+    return rules
+def sigma_rule_to_yaml(rule: dict) -> str:
+    """Convert a Sigma rule dict to a valid YAML string."""
+    try:
+        return yaml.dump(rule, default_flow_style=False, allow_unicode=True, sort_keys=False)
+    except Exception as exc:
+        logger.warning("sigma_rule_to_yaml failed: %s", exc)
+        return ""
+def export_sigma_rules(
+    investigation_id: Any,
+    output_dir: str,
+    llm: Optional[Any] = None,
+) -> list[str]:
+    """
+    Load entities for an investigation, generate Sigma rules, and write each to
+    {output_dir}/{uuid}.yml.
+    Returns list of file paths written. Creates output_dir if it doesn't exist.
+    Returns [] if investigation not found or DATABASE_URL not set.
+    """
+    entities = _load_entities_for_investigation(investigation_id)
+    if not entities:
+        return []
+    rules = entities_to_sigma_rules(entities, llm=llm)
+    if not rules:
+        return []
+    out_path = Path(output_dir)
+    out_path.mkdir(parents=True, exist_ok=True)
+    written: list[str] = []
+    for rule in rules:
+        rule_id = rule.get("id") or str(_uuid_module.uuid4())
+        filename = out_path / f"{rule_id}.yml"
+        try:
+            yaml_content = sigma_rule_to_yaml(rule)
+            filename.write_text(yaml_content, encoding="utf-8")
+            written.append(str(filename))
+        except Exception as exc:
+            logger.warning("Failed to write Sigma rule %r: %s", rule_id, exc)
+    return written
+# ---------------------------------------------------------------------------
+# Internal DB helper
+# ---------------------------------------------------------------------------
+def _load_entities_for_investigation(investigation_id: Any) -> list[Any]:
+    """Load NormalizedEntity list from DB for this investigation.
+    Includes entities owned directly AND entities linked via InvestigationEntityLink.
+    Returns [] on error.
+    """
+    if not os.getenv("DATABASE_URL"):
+        return []
+    try:
+        from db.session import get_session  # noqa: PLC0415
+        from db.queries import get_investigation_by_id_or_run  # noqa: PLC0415
+        from db.models import Entity, InvestigationEntityLink  # noqa: PLC0415
+        from extractor.normalizer import NormalizedEntity  # noqa: PLC0415
+        inv_uuid = _coerce_uuid(investigation_id)
+        if inv_uuid is None:
+            return []
+        with get_session() as session:
+            inv = get_investigation_by_id_or_run(session, inv_uuid)
+            if inv is None:
+                return []
+            linked_ids_subq = (
+                session.query(InvestigationEntityLink.entity_id)
+                .filter(InvestigationEntityLink.investigation_id == inv.id)
+                .subquery()
+            )
+            db_entities = (
+                session.query(Entity)
+                .filter(
+                    (Entity.investigation_id == inv.id)
+                    | Entity.id.in_(linked_ids_subq)
+                )
+                .all()
+            )
+            result: list[NormalizedEntity] = []
+            for e in db_entities:
+                source_url = ""
+                try:
+                    if e.page:
+                        source_url = e.page.url or ""
+                except Exception:
+                    pass
+                result.append(NormalizedEntity(
+                    entity_type=e.entity_type,
+                    value=e.canonical_value or e.value,
+                    confidence=e.confidence,
+                    source_url=source_url,
+                    page_id=e.page_id,
+                    context_snippet=e.context_snippet or "",
+                ))
+            return result
+    except Exception as exc:
+        logger.warning("sigma _load_entities_for_investigation failed: %s", exc)
+        return []
+def _coerce_uuid(value: Any):
+    """Coerce value to uuid.UUID. Returns None on failure."""
+    import uuid as _uuid
+    if isinstance(value, _uuid.UUID):
+        return value
+    try:
+        return _uuid.UUID(str(value))
+    except (ValueError, AttributeError):
+        return None