PyPI - ventra - Versions diffs - 0.1.0__py3-none-any.whl - Mend

ventra 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

collector/__init__.py +11 -0
collector/__main__.py +8 -0
collector/aws/__init__.py +5 -0
collector/aws/client_factory.py +183 -0
collector/aws/control_plane/cloudtrail.py +331 -0
collector/aws/control_plane/cloudtrail_s3.py +312 -0
collector/aws/control_plane/config.py +96 -0
collector/aws/detections/detective.py +102 -0
collector/aws/detections/guardduty.py +116 -0
collector/aws/detections/macie.py +103 -0
collector/aws/detections/securityhub.py +101 -0
collector/aws/identity/account.py +71 -0
collector/aws/identity/iam.py +386 -0
collector/aws/identity/kms.py +74 -0
collector/aws/identity/secrets.py +56 -0
collector/aws/identity/sts.py +74 -0
collector/aws/network/vpc_flow.py +176 -0
collector/aws/network/waf.py +144 -0
collector/aws/registry.py +60 -0
collector/aws/runner/runner.py +224 -0
collector/aws/workloads/ec2.py +217 -0
collector/aws/workloads/lambda_.py +75 -0
collector/aws/workloads/s3.py +112 -0
collector/azure/__init__.py +17 -0
collector/cli.py +388 -0
collector/devgui.py +350 -0
collector/gcp/__init__.py +17 -0
collector/lib/__init__.py +1 -0
collector/lib/base.py +167 -0
collector/lib/chain_of_custody/__init__.py +6 -0
collector/lib/chain_of_custody/hashing.py +21 -0
collector/lib/chain_of_custody/signing.py +67 -0
collector/lib/models.py +232 -0
collector/lib/packaging/__init__.py +5 -0
collector/lib/packaging/packager.py +77 -0
collector/lib/transport/__init__.py +5 -0
collector/lib/transport/base.py +90 -0
collector/tools/__init__.py +1 -0
collector/tools/verify_readonly.py +66 -0
ventra-0.1.0.dist-info/METADATA +178 -0
ventra-0.1.0.dist-info/RECORD +45 -0
ventra-0.1.0.dist-info/WHEEL +5 -0
ventra-0.1.0.dist-info/entry_points.txt +3 -0
ventra-0.1.0.dist-info/licenses/LICENSE +202 -0
ventra-0.1.0.dist-info/top_level.txt +1 -0

collector/__init__.py ADDED Viewed

@@ -0,0 +1,11 @@
+"""Ventra collector — read-only cloud forensic triage acquisition.
+The collector runs in the client's cloud shell, gathers exactly the logs and artifacts
+incident responders need, and seals them into a signed evidence package described by the
+Ventra Evidence Package Format (EPF).
+Forensic invariant: nothing in this package may call a mutating cloud API. See
+``collector.tools.verify_readonly`` and the ``readonly-guard`` CI check.
+"""
+__version__ = "0.1.0"

collector/__main__.py ADDED Viewed

@@ -0,0 +1,8 @@
+"""Allow ``python -m collector dev`` from a fresh clone before ``pip install``."""
+from __future__ import annotations
+from .cli import main
+if __name__ == "__main__":
+    raise SystemExit(main())

collector/aws/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""AWS orchestration — registry, runner, and boto3 client factory.
+Collector modules (identity, control_plane, network, …) live in this package.
+Import ``collector.aws.registry`` to populate and access the registry.
+"""

collector/aws/client_factory.py ADDED Viewed

@@ -0,0 +1,183 @@
+"""AWS client management used by every collector.
+Wraps boto3 so collectors don't each reinvent region handling, pagination, and the
+all-important AccessDenied detection (an AccessDenied is a *gap*, recorded as evidence, not a
+crash). All clients are created from a single session so credentials are resolved once.
+"""
+from __future__ import annotations
+from collections.abc import Iterator
+from dataclasses import dataclass
+from typing import Any
+import boto3
+from botocore.config import Config
+from botocore.exceptions import ClientError, EndpointConnectionError, NoCredentialsError
+# Error codes that mean "you can't see this" rather than "something broke".
+ACCESS_DENIED_CODES = frozenset(
+    {
+        "AccessDenied",
+        "AccessDeniedException",
+        "UnauthorizedOperation",
+        "AuthorizationError",
+        "AuthFailure",
+        "ForbiddenException",
+    }
+)
+# Error codes that mean "this service/feature isn't enabled here".
+NOT_ENABLED_CODES = frozenset(
+    {
+        "ResourceNotFoundException",
+        "BadRequestException",
+        "InvalidInputException",
+        "SubscriptionRequiredException",
+        "OptInRequired",
+        # Security Hub raises this when the account is not subscribed to the hub.
+        "InvalidAccessException",
+        # WAFv2 raises this when e.g. a Web ACL has no logging configuration.
+        "WAFNonexistentItemException",
+        # Standalone accounts that are not part of an AWS Organization.
+        "AWSOrganizationsNotInUseException",
+        # CloudTrail trails without Insights enabled.
+        "InsightNotEnabledException",
+        # IAM raises this when e.g. no custom password policy exists on the account.
+        "NoSuchEntity",
+    }
+)
+class AccessDenied(Exception):
+    """Raised by helpers when an API returns an access-denied style error."""
+    def __init__(self, action: str, message: str) -> None:
+        super().__init__(f"{action}: {message}")
+        self.action = action
+        self.message = message
+class ServiceNotEnabled(Exception):
+    def __init__(self, service: str, message: str) -> None:
+        super().__init__(f"{service}: {message}")
+        self.service = service
+        self.message = message
+@dataclass
+class CallerIdentity:
+    account_id: str
+    arn: str
+    user_id: str
+    partition: str
+class AwsClientFactory:
+    """Creates per-service, per-region boto3 clients from one session."""
+    def __init__(self, session: boto3.Session | None = None) -> None:
+        self._session = session or boto3.Session()
+        self._cfg = Config(retries={"max_attempts": 5, "mode": "adaptive"}, user_agent_extra="ventra")
+        self._cache: dict[tuple[str, str | None], Any] = {}
+    def client(self, service: str, region: str | None = None) -> Any:
+        key = (service, region)
+        if key not in self._cache:
+            self._cache[key] = self._session.client(service, region_name=region, config=self._cfg)
+        return self._cache[key]
+    # -- identity / region discovery -----------------------------------------------------
+    def caller_identity(self) -> CallerIdentity:
+        try:
+            ident = self.client("sts").get_caller_identity()
+        except NoCredentialsError as exc:  # pragma: no cover
+            raise RuntimeError(
+                "No AWS credentials found. Run inside CloudShell or configure a profile."
+            ) from exc
+        arn = ident["Arn"]
+        partition = arn.split(":")[1] if arn.startswith("arn:") else "aws"
+        return CallerIdentity(
+            account_id=ident["Account"],
+            arn=arn,
+            user_id=ident.get("UserId", ""),
+            partition=partition,
+        )
+    def enabled_regions(self) -> list[str]:
+        """Regions enabled for this account (opt-in regions included if active)."""
+        try:
+            resp = self.client("ec2", "us-east-1").describe_regions(
+                Filters=[{"Name": "opt-in-status", "Values": ["opt-in-not-required", "opted-in"]}]
+            )
+            return sorted(r["RegionName"] for r in resp["Regions"])
+        except ClientError:
+            # Fall back to the SDK's static partition list.
+            return sorted(self._session.get_available_regions("ec2"))
+    # -- safe call helpers ---------------------------------------------------------------
+    def paginate(
+        self, service: str, region: str | None, operation: str, result_key: str, **kwargs: Any
+    ) -> Iterator[dict[str, Any]]:
+        """Yield items across pages, translating access/enablement errors into typed gaps."""
+        client = self.client(service, region)
+        try:
+            paginator = client.get_paginator(operation)
+            for page in paginator.paginate(**kwargs):
+                yield from page.get(result_key, [])
+        except ClientError as exc:
+            _raise_typed(exc, f"{service}:{operation}")
+        except EndpointConnectionError:
+            return
+    def call(self, service: str, region: str | None, operation: str, **kwargs: Any) -> dict[str, Any]:
+        client = self.client(service, region)
+        try:
+            return getattr(client, operation)(**kwargs)
+        except ClientError as exc:
+            _raise_typed(exc, f"{service}:{operation}")
+            raise  # unreachable, keeps type-checkers happy
+        except EndpointConnectionError as exc:
+            # The service has no endpoint in this region — same gap as "not enabled".
+            raise ServiceNotEnabled(f"{service}:{operation}", str(exc)) from exc
+    def paginate_manual(
+        self,
+        service: str,
+        region: str | None,
+        operation: str,
+        result_key: str,
+        *,
+        token_request_key: str = "NextToken",
+        token_response_key: str = "NextToken",
+        max_pages: int = 500,
+        **kwargs: Any,
+    ) -> Iterator[dict[str, Any]]:
+        """Token-loop pagination for operations botocore has no paginator for
+        (e.g. wafv2 ListWebACLs / detective ListInvestigations)."""
+        token: str | None = None
+        for _ in range(max_pages):
+            params = dict(kwargs)
+            if token:
+                params[token_request_key] = token
+            page = self.call(service, region, operation, **params)
+            items = page.get(result_key) or []
+            yield from items
+            new_token = page.get(token_response_key)
+            # Stop on a missing, repeated, or itemless marker so a quirky
+            # implementation can never loop us forever.
+            if not new_token or new_token == token or not items:
+                return
+            token = new_token
+def _raise_typed(exc: ClientError, action: str) -> None:
+    code = exc.response.get("Error", {}).get("Code", "")
+    msg = exc.response.get("Error", {}).get("Message", str(exc))
+    if code in ACCESS_DENIED_CODES:
+        raise AccessDenied(action, msg)
+    if code in NOT_ENABLED_CODES:
+        raise ServiceNotEnabled(action, msg)
+    raise exc

collector/aws/control_plane/cloudtrail.py ADDED Viewed

@@ -0,0 +1,331 @@
+"""CloudTrail collector — the control-plane backbone of cloud IR.
+Captures:
+  1. **Trail configuration** — trails, selectors (management / data / network / insights),
+     log validation, and S3 delivery settings.
+  2. **Management events** — via LookupEvents (portable, ~90-day lookback).
+  3. **Insight events** — via LookupEvents and S3 log files when Insights is enabled.
+  4. **Data events** — from the trail's S3 log files when data events are enabled.
+  5. **Network activity events** — from the trail's S3 log files when enabled.
+"""
+from __future__ import annotations
+from datetime import UTC, datetime, timedelta
+from ...lib.base import Collector
+from ...lib.models import GapReason, SourceResult, SourceStatus
+from ..client_factory import AccessDenied, ServiceNotEnabled
+from .cloudtrail_s3 import (
+    DATA_CATEGORIES,
+    INSIGHT_CATEGORIES,
+    NETWORK_CATEGORIES,
+    collect_s3_trail_records,
+    coverage_summary,
+    data_events_configured,
+    insight_events_configured,
+    lookup_event_category,
+    merge_dedupe,
+    network_activity_configured,
+    trail_is_logging_to_s3,
+)
+# Bound the in-memory LookupEvents pull so a busy account can't exhaust a CloudShell.
+MAX_LOOKUP_RECORDS = 200_000
+class CloudTrailCollector(Collector):
+    name = "cloudtrail"
+    tier = 1
+    description = (
+        "CloudTrail trail config, management and insight events (LookupEvents), "
+        "and data / network-activity events from S3 logs."
+    )
+    required_actions = (
+        "cloudtrail:DescribeTrails",
+        "cloudtrail:GetTrailStatus",
+        "cloudtrail:GetEventSelectors",
+        "cloudtrail:GetInsightSelectors",
+        "cloudtrail:LookupEvents",
+        "s3:ListBucket",
+        "s3:GetObject",
+    )
+    def collect(self) -> SourceResult:
+        cf = self.ctx.client_factory
+        gaps: list[tuple[str, GapReason, str]] = []
+        config = self._trail_config(cf, gaps)
+        config["event_coverage"] = coverage_summary(config.get("trails", []))
+        window = self.ctx.time_window
+        start = window.since or (datetime.now(UTC) - timedelta(days=90))
+        end = window.until or datetime.now(UTC)
+        mgmt_records, lookup_insight_records = self._collect_lookup_events(cf, gaps, start, end)
+        insight_s3_records, insight_stats = self._collect_s3_category(
+            cf,
+            config,
+            gaps,
+            start,
+            end,
+            INSIGHT_CATEGORIES,
+            insight_events_configured,
+            "insight_events",
+            require_s3=False,
+        )
+        insight_records = merge_dedupe(lookup_insight_records, insight_s3_records)
+        data_records, data_stats = self._collect_s3_category(
+            cf, config, gaps, start, end, DATA_CATEGORIES, data_events_configured, "data_events"
+        )
+        network_records, network_stats = self._collect_s3_category(
+            cf,
+            config,
+            gaps,
+            start,
+            end,
+            NETWORK_CATEGORIES,
+            network_activity_configured,
+            "network_activity",
+        )
+        if config["event_coverage"]["insight_events_configured"] and not insight_records:
+            gaps.append(
+                (
+                    "insight_events",
+                    GapReason.NOT_PRESENT,
+                    "Insights enabled but no insight events in window.",
+                )
+            )
+        config["s3_collection"] = {
+            "insight_events": insight_stats,
+            "data_events": data_stats,
+            "network_activity": network_stats,
+        }
+        files = []
+        total = 0
+        if mgmt_records:
+            files.append(self.write_jsonl(mgmt_records, "events.jsonl.gz"))
+            total += len(mgmt_records)
+        if insight_records:
+            files.append(self.write_jsonl(insight_records, "events_insights.jsonl.gz"))
+            total += len(insight_records)
+        if data_records:
+            files.append(self.write_jsonl(data_records, "events_data.jsonl.gz"))
+            total += len(data_records)
+        if network_records:
+            files.append(self.write_jsonl(network_records, "events_network.jsonl.gz"))
+            total += len(network_records)
+        files.append(self.write_json(config, "config.json"))
+        if total:
+            status = SourceStatus.PARTIAL if gaps else SourceStatus.COLLECTED
+        elif gaps:
+            status = SourceStatus.PARTIAL
+        else:
+            status = SourceStatus.EMPTY
+            gaps.append(
+                ("cloudtrail", GapReason.NOT_PRESENT, "No CloudTrail events in window.")
+            )
+        self.write_meta(
+            {
+                "source": self.name,
+                "records": total,
+                "management_events": len(mgmt_records),
+                "insight_events": len(insight_records),
+                "data_events": len(data_records),
+                "network_activity_events": len(network_records),
+                "lookup_insight_events": len(lookup_insight_records),
+                "regions": self.ctx.regions,
+                "window": window.to_manifest(),
+                "trails": len(config.get("trails", [])),
+                "log_validation_enabled": config.get("any_log_validation_enabled"),
+                "event_coverage": config["event_coverage"],
+                "s3_collection": config["s3_collection"],
+            }
+        )
+        notes = (
+            f"{len(mgmt_records)} management, {len(insight_records)} insight, "
+            f"{len(data_records)} data, {len(network_records)} network-activity; "
+            f"{len(config.get('trails', []))} trail(s)."
+        )
+        return SourceResult(
+            name=self.name,
+            status=status,
+            files=files,
+            record_count=total,
+            gaps=gaps,
+            notes=notes,
+        )
+    def _collect_lookup_events(
+        self,
+        cf,
+        gaps: list[tuple[str, GapReason, str]],
+        start: datetime,
+        end: datetime,
+    ) -> tuple[list[dict], list[dict]]:
+        management: list[dict] = []
+        insights: list[dict] = []
+        truncated = False
+        for region in self.ctx.regions:
+            if truncated:
+                break
+            try:
+                for ev in cf.paginate(
+                    "cloudtrail",
+                    region,
+                    "lookup_events",
+                    "Events",
+                    StartTime=start,
+                    EndTime=end,
+                ):
+                    if len(management) + len(insights) >= MAX_LOOKUP_RECORDS:
+                        truncated = True
+                        break
+                    ev["_ventra_region"] = region
+                    if lookup_event_category(ev) == "Insight":
+                        insights.append(ev)
+                    else:
+                        management.append(ev)
+            except AccessDenied as exc:
+                gaps.append(("cloudtrail", GapReason.ACCESS_DENIED, f"{region}: {exc.message}"))
+            except ServiceNotEnabled:
+                continue
+        if truncated:
+            gaps.append(
+                (
+                    "cloudtrail",
+                    GapReason.COLLECTOR_ERROR,
+                    f"LookupEvents truncated at {MAX_LOOKUP_RECORDS} records; "
+                    "narrow the window (--since/--until) for full coverage.",
+                )
+            )
+        return management, insights
+    def _collect_s3_category(
+        self,
+        cf,
+        config: dict,
+        gaps: list[tuple[str, GapReason, str]],
+        start: datetime,
+        end: datetime,
+        categories: frozenset[str],
+        configured_fn,
+        gap_name: str,
+        *,
+        require_s3: bool = True,
+    ) -> tuple[list[dict], dict]:
+        trails = config.get("trails", [])
+        if not any(configured_fn(t) for t in trails):
+            return [], {"configured": False, "records": 0}
+        if not any(trail_is_logging_to_s3(t) for t in trails):
+            if require_s3:
+                gaps.append(
+                    (
+                        gap_name,
+                        GapReason.LOGGING_NOT_CONFIGURED,
+                        "Event type enabled but trail does not deliver logs to S3.",
+                    )
+                )
+            return [], {"configured": True, "records": 0, "s3_logging": False}
+        combined: list[dict] = []
+        combined_stats: dict = {"configured": True, "records": 0, "s3_logging": True}
+        account_id = self.ctx.account_id
+        for trail in trails:
+            if not configured_fn(trail) or not trail_is_logging_to_s3(trail):
+                continue
+            recs, stats = collect_s3_trail_records(
+                cf,
+                trail,
+                account_id,
+                self.ctx.regions,
+                start,
+                end,
+                categories,
+                gaps,
+                log=lambda msg: self._log(msg),
+            )
+            combined.extend(recs)
+            for key in ("objects_scanned", "objects_read", "records", "truncated"):
+                if key == "truncated":
+                    combined_stats["truncated"] = combined_stats.get("truncated") or stats.get(
+                        "truncated"
+                    )
+                else:
+                    combined_stats[key] = combined_stats.get(key, 0) + stats.get(key, 0)
+        combined_stats["records"] = len(combined)
+        if require_s3 and combined_stats.get("configured") and not combined:
+            gaps.append(
+                (
+                    gap_name,
+                    GapReason.NOT_PRESENT,
+                    f"No {gap_name.replace('_', ' ')} log records in window (check S3 path/permissions).",
+                )
+            )
+        return combined, combined_stats
+    def _trail_config(self, cf, gaps) -> dict:
+        trails: list[dict] = []
+        any_validation = False
+        seen = set()
+        for region in self.ctx.regions:
+            try:
+                described = cf.call("cloudtrail", region, "describe_trails").get("trailList", [])
+            except AccessDenied as exc:
+                gaps.append(
+                    ("cloudtrail_config", GapReason.ACCESS_DENIED, f"{region}: {exc.message}")
+                )
+                continue
+            except ServiceNotEnabled:
+                continue
+            for trail in described:
+                arn = trail.get("TrailARN", "")
+                if arn in seen:
+                    continue
+                seen.add(arn)
+                trail = dict(trail)
+                if trail.get("LogFileValidationEnabled"):
+                    any_validation = True
+                home = trail.get("HomeRegion", region)
+                try:
+                    trail["Status"] = cf.call(
+                        "cloudtrail", home, "get_trail_status", Name=arn
+                    )
+                    trail["EventSelectors"] = cf.call(
+                        "cloudtrail", home, "get_event_selectors", TrailName=arn
+                    )
+                except AccessDenied as exc:
+                    # Without status, S3 log collection for this trail is skipped — that
+                    # is a gap worth surfacing, not hiding.
+                    gaps.append(
+                        ("cloudtrail_config", GapReason.ACCESS_DENIED, f"{arn}: {exc.message}")
+                    )
+                except Exception:  # noqa: BLE001 - keep the trail entry, just less enriched
+                    pass
+                try:
+                    trail["InsightSelectors"] = cf.call(
+                        "cloudtrail", home, "get_insight_selectors", TrailName=arn
+                    )
+                except Exception:  # InsightNotEnabledException et al. — Insights are off
+                    trail["InsightSelectors"] = None
+                trails.append(trail)
+        return {
+            "trails": trails,
+            "trail_count": len(trails),
+            "any_log_validation_enabled": any_validation,
+            "multi_region_trail_present": any(t.get("IsMultiRegionTrail") for t in trails),
+            "organization_trail_present": any(t.get("IsOrganizationTrail") for t in trails),
+        }