statewatch 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
statewatch/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ """statewatch — dependency-aware infrastructure drift detector for GCP."""
2
+
3
+ __version__ = "0.1.0"
File without changes
@@ -0,0 +1,98 @@
1
+ """The ``CloudAdapter`` interface.
2
+
3
+ This protocol is the seam that keeps statewatch cloud-agnostic at the boundary even
4
+ though v0.1 only ships a GCP implementation. An adapter has exactly one job: authenticate
5
+ to a cloud, and return the *live* state of requested resource types as normalized
6
+ :class:`statewatch.normalizer.Resource` objects.
7
+
8
+ An adapter does **not**:
9
+ * read or know anything about Terraform state,
10
+ * compute diffs,
11
+ * classify severity, or
12
+ * traverse the dependency graph.
13
+
14
+ Those are all downstream of the adapter. If you want to add support for another cloud,
15
+ implement this protocol and nothing else changes.
16
+
17
+ Implementing a new adapter (e.g. AWS)
18
+ -------------------------------------
19
+ 1. ``name``: a short provider key, e.g. ``"aws"``.
20
+ 2. ``authenticate()``: establish credentials using the provider's standard mechanism
21
+ (GCP: Application Default Credentials; AWS: the default boto3 credential chain; Azure:
22
+ ``DefaultAzureCredential``). Make one cheap call to confirm the credentials work (e.g.
23
+ STS ``GetCallerIdentity``). Raise :class:`AdapterAuthError` on failure.
24
+ 3. ``supported_resource_types()``: the Terraform-style type names you can fetch live state
25
+ for, e.g. ``{"aws_instance", "aws_security_group"}``. The CLI validates the user's
26
+ requested types against this set.
27
+ 4. ``fetch_resources(types, scope=...)``: query the provider's inventory API
28
+ (GCP: Cloud Asset Inventory; AWS: AWS Config / describe APIs; Azure: Resource Graph),
29
+ and map each native object into a ``Resource`` — deriving a ``resource_id`` that will
30
+ match the one Terraform-side normalization produces, filling ``parent_refs`` from
31
+ obvious attribute references, and stripping provider noise. Raise
32
+ :class:`AdapterError` on API or permission failures.
33
+ """
34
+
35
+ from __future__ import annotations
36
+
37
+ from collections.abc import Iterable
38
+ from typing import Protocol, runtime_checkable
39
+
40
+ from statewatch.normalizer import Resource
41
+
42
+
43
+ class AdapterError(RuntimeError):
44
+ """Base class for adapter failures (API errors, permissions, bad responses)."""
45
+
46
+
47
+ class AdapterAuthError(AdapterError):
48
+ """Raised when an adapter cannot establish or validate credentials."""
49
+
50
+
51
+ @runtime_checkable
52
+ class CloudAdapter(Protocol):
53
+ """A provider-specific source of live resource state."""
54
+
55
+ #: Short, stable provider key, e.g. ``"gcp"``. Used in CLI options and output.
56
+ name: str
57
+
58
+ def authenticate(self) -> None:
59
+ """Establish and validate credentials for this provider.
60
+
61
+ Called once before any :meth:`fetch_resources` call. Implementations should make a
62
+ single inexpensive request to confirm the credentials are usable, and raise
63
+ :class:`AdapterAuthError` (with an actionable message) if not.
64
+ """
65
+ ...
66
+
67
+ def supported_resource_types(self) -> frozenset[str]:
68
+ """Return the Terraform-style resource type names this adapter can fetch.
69
+
70
+ e.g. ``frozenset({"google_compute_instance"})``. The CLI uses this to reject
71
+ requests for unsupported types up front.
72
+ """
73
+ ...
74
+
75
+ def fetch_resources(
76
+ self,
77
+ resource_types: Iterable[str],
78
+ *,
79
+ scope: str,
80
+ ) -> list[Resource]:
81
+ """Fetch live state for the given resource types within ``scope``.
82
+
83
+ Args:
84
+ resource_types: Terraform-style type names to fetch. Every value should be a
85
+ member of :meth:`supported_resource_types`.
86
+ scope: The provider-specific account/project scope to query. For GCP this is a
87
+ project id; for AWS an account id (optionally region-qualified); for Azure
88
+ a subscription id.
89
+
90
+ Returns:
91
+ Normalized :class:`Resource` objects for every matching live resource. The
92
+ ``resource_id`` of each must be derivable identically from the corresponding
93
+ Terraform state entry so the differ can pair them.
94
+
95
+ Raises:
96
+ AdapterError: on API errors, permission problems, or malformed responses.
97
+ """
98
+ ...
@@ -0,0 +1,264 @@
1
+ """GCP adapter — live resource state via Cloud Asset Inventory.
2
+
3
+ The **authentication flow is real** (Application Default Credentials via
4
+ ``google.auth.default()``), but :meth:`GCPAdapter.fetch_resources` returns *stubbed*
5
+ CAI-shaped data rather than calling ``AssetServiceClient.list_assets``. The stub is shaped
6
+ like real CAI responses and deliberately drifts from the Phase 3 example state
7
+ (``tests/fixtures/firewall_subnet_drift.tfstate.json``) so ``statewatch scan`` shows
8
+ severity × impact end to end. Replacing the stub with the real ``list_assets`` call is a
9
+ self-contained follow-up — see the TODO below.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import json
15
+ import os
16
+ from collections.abc import Iterable
17
+ from typing import Any
18
+
19
+ from statewatch.adapters.base import AdapterAuthError, AdapterError
20
+ from statewatch.normalizer import Resource
21
+ from statewatch.resources import (
22
+ SUPPORTED_RESOURCE_TYPES,
23
+ normalize_cai,
24
+ )
25
+
26
+ _TYPE_TO_CAI_ASSET_TYPE = {
27
+ "google_compute_instance": "compute.googleapis.com/Instance",
28
+ "google_compute_firewall": "compute.googleapis.com/Firewall",
29
+ "google_compute_subnetwork": "compute.googleapis.com/Subnetwork",
30
+ "google_container_cluster": "container.googleapis.com/Cluster",
31
+ }
32
+ _SUPPORTED = frozenset(_TYPE_TO_CAI_ASSET_TYPE) & SUPPORTED_RESOURCE_TYPES
33
+
34
+
35
+ class GCPAdapter:
36
+ """CloudAdapter implementation backed by Google Cloud Asset Inventory."""
37
+
38
+ name = "gcp"
39
+
40
+ def __init__(self, *, stub: bool | None = None) -> None:
41
+ self._credentials: Any | None = None
42
+ self._default_project: str | None = None
43
+ # Offline mode: hand-built CAI data instead of a real API call. Off by default
44
+ # (real Cloud Asset Inventory). Enable explicitly, or via STATEWATCH_STUB_GCP=1
45
+ # for demos / CI without a GCP project.
46
+ if stub is None:
47
+ stub = os.environ.get("STATEWATCH_STUB_GCP", "") not in ("", "0", "false")
48
+ self._stub = stub
49
+
50
+ # -- CloudAdapter protocol ---------------------------------------------------------
51
+
52
+ def authenticate(self) -> None:
53
+ """Resolve Application Default Credentials.
54
+
55
+ Uses ``google.auth.default()`` — the same resolution order as ``gcloud`` and every
56
+ Google client library. Raises :class:`AdapterAuthError` with remediation guidance
57
+ if no credentials are found. No-op in stub mode (offline demo / CI).
58
+ """
59
+ if self._stub:
60
+ return
61
+ try:
62
+ import google.auth
63
+ from google.auth.exceptions import DefaultCredentialsError
64
+ except ImportError as exc: # pragma: no cover - dependency declared in pyproject
65
+ raise AdapterError(
66
+ "google-auth is required for the GCP adapter; install statewatch with its "
67
+ "dependencies (pip install statewatch)."
68
+ ) from exc
69
+
70
+ try:
71
+ credentials, project = google.auth.default(
72
+ scopes=["https://www.googleapis.com/auth/cloud-platform"]
73
+ )
74
+ except DefaultCredentialsError as exc:
75
+ raise AdapterAuthError(
76
+ "No Google Cloud credentials found. Run "
77
+ "`gcloud auth application-default login`, or set "
78
+ "GOOGLE_APPLICATION_CREDENTIALS to a service-account key file."
79
+ ) from exc
80
+
81
+ self._credentials = credentials
82
+ self._default_project = project
83
+
84
+ def supported_resource_types(self) -> frozenset[str]:
85
+ return _SUPPORTED
86
+
87
+ def fetch_resources(
88
+ self,
89
+ resource_types: Iterable[str],
90
+ *,
91
+ scope: str,
92
+ ) -> list[Resource]:
93
+ requested = list(resource_types)
94
+ unsupported = [t for t in requested if t not in _SUPPORTED]
95
+ if unsupported:
96
+ raise AdapterError(
97
+ f"GCP adapter does not support resource types: {sorted(unsupported)} "
98
+ f"(supported: {sorted(_SUPPORTED)})"
99
+ )
100
+
101
+ resources: list[Resource] = []
102
+ for asset in self._list_assets(project=scope):
103
+ asset_type = asset.get("asset_type")
104
+ rtype = next(
105
+ (t for t, a in _TYPE_TO_CAI_ASSET_TYPE.items() if a == asset_type), None
106
+ )
107
+ if rtype not in requested:
108
+ continue
109
+ r = normalize_cai(asset, project=scope)
110
+ if r is not None:
111
+ resources.append(r)
112
+ return resources
113
+
114
+ # -- CAI calls ---------------------------------------------------------------------
115
+
116
+ def _list_assets(self, *, project: str) -> list[dict[str, Any]]:
117
+ """Return CAI ``Asset`` dicts for the supported asset types in ``project``.
118
+
119
+ Real Cloud Asset Inventory ``list_assets`` by default. Stub mode returns
120
+ hand-built data that drifts from the example state (offline demo / CI).
121
+ """
122
+ if self._stub:
123
+ return _stub_assets(project)
124
+
125
+ try:
126
+ from google.cloud import asset_v1
127
+ except ImportError as exc: # pragma: no cover - declared dependency
128
+ raise AdapterError(
129
+ "google-cloud-asset is required; install statewatch with its dependencies."
130
+ ) from exc
131
+
132
+ try:
133
+ client = asset_v1.AssetServiceClient(credentials=self._credentials)
134
+ pager = client.list_assets(
135
+ request={
136
+ "parent": f"projects/{project}",
137
+ "asset_types": sorted(set(_TYPE_TO_CAI_ASSET_TYPE.values())),
138
+ "content_type": asset_v1.ContentType.RESOURCE,
139
+ }
140
+ )
141
+ return [json.loads(asset_v1.Asset.to_json(asset)) for asset in pager]
142
+ except Exception as exc: # google API errors -> uniform adapter failure
143
+ raise AdapterError(
144
+ f"Cloud Asset Inventory list_assets failed for project {project!r}: {exc}"
145
+ ) from exc
146
+
147
+
148
+ # --------------------------------------------------------------------------------------
149
+ # Stubbed CAI data — drifts from tests/fixtures/firewall_subnet_drift.tfstate.json
150
+ # --------------------------------------------------------------------------------------
151
+
152
+ _REGION = "us-central1"
153
+
154
+
155
+ def _instance_asset(
156
+ project: str,
157
+ name: str,
158
+ zone: str,
159
+ machine_type: str,
160
+ *,
161
+ sa_email: str,
162
+ tags: list[str],
163
+ ) -> dict[str, Any]:
164
+ return {
165
+ "name": f"//compute.googleapis.com/projects/{project}/zones/{zone}/instances/{name}",
166
+ "asset_type": "compute.googleapis.com/Instance",
167
+ "resource": {
168
+ "data": {
169
+ "name": name,
170
+ "zone": f"https://www.googleapis.com/compute/v1/projects/{project}/zones/{zone}",
171
+ "machineType": (
172
+ f"https://www.googleapis.com/compute/v1/projects/{project}"
173
+ f"/zones/{zone}/machineTypes/{machine_type}"
174
+ ),
175
+ "status": "RUNNING",
176
+ "canIpForward": False,
177
+ "deletionProtection": False,
178
+ "labels": {"env": "prod"},
179
+ "tags": {"items": tags},
180
+ "metadata": {"items": [{"key": "enable-oslogin", "value": "TRUE"}]},
181
+ "networkInterfaces": [
182
+ {
183
+ "name": "nic0",
184
+ "network": (
185
+ f"https://www.googleapis.com/compute/v1/projects/{project}"
186
+ f"/global/networks/prod-vpc"
187
+ ),
188
+ "subnetwork": (
189
+ f"https://www.googleapis.com/compute/v1/projects/{project}"
190
+ f"/regions/{_REGION}/subnetworks/prod-subnet"
191
+ ),
192
+ "networkIP": "10.0.0.10",
193
+ "accessConfigs": [],
194
+ }
195
+ ],
196
+ "serviceAccounts": [
197
+ {"email": sa_email, "scopes": ["https://www.googleapis.com/auth/cloud-platform"]}
198
+ ],
199
+ "scheduling": {"preemptible": False, "automaticRestart": True},
200
+ }
201
+ },
202
+ }
203
+
204
+
205
+ def _stub_assets(project: str) -> list[dict[str, Any]]:
206
+ """Live state: instances match Terraform; the subnet and firewall have drifted.
207
+
208
+ - subnet ``prod-subnet``: ``ipCidrRange`` 10.0.0.0/24 -> 10.0.0.0/20 (MEDIUM, but
209
+ every instance in the subnet is DIRECTly impacted -> wide blast radius).
210
+ - firewall ``allow-http``: ``sourceRanges`` 10.0.0.0/8 -> 0.0.0.0/0 (CRITICAL;
211
+ DIRECTly impacts the http-server-tagged instances).
212
+ - instances: unchanged, so the only drift is on the high-blast-radius resources.
213
+ """
214
+ sa = f"sa-app@{project}.iam.gserviceaccount.com"
215
+ return [
216
+ _instance_asset(project, "api-server-prod", "us-central1-a", "n2-standard-4",
217
+ sa_email=sa, tags=["http-server", "ssh"]),
218
+ _instance_asset(project, "web-2", "us-central1-a", "e2-standard-2",
219
+ sa_email=sa, tags=["http-server"]),
220
+ _instance_asset(project, "worker-01", "us-central1-b", "e2-medium",
221
+ sa_email=sa, tags=["worker"]),
222
+ {
223
+ "name": (
224
+ f"//compute.googleapis.com/projects/{project}"
225
+ f"/regions/{_REGION}/subnetworks/prod-subnet"
226
+ ),
227
+ "asset_type": "compute.googleapis.com/Subnetwork",
228
+ "resource": {
229
+ "data": {
230
+ "name": "prod-subnet",
231
+ "region": f"https://www.googleapis.com/compute/v1/projects/{project}/regions/{_REGION}",
232
+ "network": (
233
+ f"https://www.googleapis.com/compute/v1/projects/{project}"
234
+ f"/global/networks/prod-vpc"
235
+ ),
236
+ "ipCidrRange": "10.0.0.0/20", # drift: tfstate says /24
237
+ "privateIpGoogleAccess": True,
238
+ "secondaryIpRanges": [
239
+ {"rangeName": "pods", "ipCidrRange": "10.4.0.0/14"}
240
+ ],
241
+ "purpose": "PRIVATE",
242
+ }
243
+ },
244
+ },
245
+ {
246
+ "name": f"//compute.googleapis.com/projects/{project}/global/firewalls/allow-http",
247
+ "asset_type": "compute.googleapis.com/Firewall",
248
+ "resource": {
249
+ "data": {
250
+ "name": "allow-http",
251
+ "network": (
252
+ f"https://www.googleapis.com/compute/v1/projects/{project}"
253
+ f"/global/networks/prod-vpc"
254
+ ),
255
+ "direction": "INGRESS",
256
+ "priority": 1000,
257
+ "disabled": False,
258
+ "sourceRanges": ["0.0.0.0/0"], # drift: tfstate says 10.0.0.0/8
259
+ "targetTags": ["http-server"],
260
+ "allowed": [{"IPProtocol": "tcp", "ports": ["80", "443"]}],
261
+ }
262
+ },
263
+ },
264
+ ]
File without changes
@@ -0,0 +1 @@
1
+ """Audit Logs API wrapper. Implemented in v0.2 (Phase 5), after v0.1 ships."""
@@ -0,0 +1 @@
1
+ """Audit log time-window correlation. Implemented in v0.2 (Phase 5), after v0.1 ships."""
@@ -0,0 +1 @@
1
+ """Parse actor / method / timestamp from audit log entries. Implemented in v0.2 (Phase 5)."""
@@ -0,0 +1,88 @@
1
+ """Severity classifier — how bad is the drift itself?
2
+
3
+ Independent of impact (that's the graph's job). Maps each :class:`~statewatch.differ.Change`
4
+ to CRITICAL / MEDIUM / LOW per the SPEC rules, and a whole
5
+ :class:`~statewatch.differ.ResourceDiff` to the max severity across its changes.
6
+
7
+ Unrecognized changes default to **MEDIUM** — a config change we haven't categorized is
8
+ worth a look but shouldn't cry wolf as CRITICAL. Deliberate, documented default.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ from statewatch.differ import ResourceDiff
14
+
15
+ CRITICAL = "CRITICAL"
16
+ MEDIUM = "MEDIUM"
17
+ LOW = "LOW"
18
+ NONE = "NONE"
19
+
20
+ # Higher index = worse. Used to take the max severity across changes.
21
+ _ORDER = [NONE, LOW, MEDIUM, CRITICAL]
22
+
23
+
24
+ def severity_rank(sev: str) -> int:
25
+ return _ORDER.index(sev) if sev in _ORDER else _ORDER.index(MEDIUM)
26
+
27
+
28
+ def max_severity(a: str, b: str) -> str:
29
+ return a if severity_rank(a) >= severity_rank(b) else b
30
+
31
+
32
+ def _is_truthy(v: object) -> bool:
33
+ return v not in (None, "", [], {}, False)
34
+
35
+
36
+ def _instance_change_severity(path: str, old: object, new: object) -> str:
37
+ if path == "external_ip" and not _is_truthy(old) and _is_truthy(new):
38
+ return CRITICAL # public IP exposure on a previously-private instance
39
+ head = path.split(".", 1)[0].split("[", 1)[0]
40
+ if head in ("labels", "tags"):
41
+ return LOW
42
+ return MEDIUM # machine_type, metadata, service_account, scopes, … and unknowns
43
+
44
+
45
+ def _gke_change_severity(path: str, old: object, new: object) -> str:
46
+ # Private cluster -> public node IPs is an exposure event, same class as a public IP
47
+ # on a previously-private instance.
48
+ if path == "private_nodes" and old is True and new is False:
49
+ return CRITICAL
50
+ return MEDIUM # node config / version / channel / network changes
51
+
52
+
53
+ def _subnetwork_change_severity(path: str) -> str:
54
+ head = path.split(".", 1)[0].split("[", 1)[0]
55
+ if head in ("private_ip_google_access",):
56
+ return LOW
57
+ return MEDIUM # ip_cidr_range, secondary_ip_range, purpose, … and unknowns
58
+
59
+
60
+ def classify_resource_diff(diff: ResourceDiff) -> tuple[str, dict[str, str]]:
61
+ """Return ``(overall_severity, {change_path: severity})`` for one resource's drift.
62
+
63
+ Status-only drift (a resource present on just one side) is classified too: a
64
+ missing/rogue firewall is CRITICAL; anything else missing/unmanaged is MEDIUM.
65
+ """
66
+ rtype = diff.resource_type
67
+
68
+ if diff.status in ("missing_in_live", "unmanaged"):
69
+ sev = CRITICAL if rtype == "google_compute_firewall" else MEDIUM
70
+ return sev, {}
71
+
72
+ per_path: dict[str, str] = {}
73
+ overall = NONE
74
+ for ch in diff.changes:
75
+ if rtype == "google_compute_firewall":
76
+ # Firewall added/removed/modified outside Terraform is always CRITICAL.
77
+ sev = CRITICAL
78
+ elif rtype == "google_compute_subnetwork":
79
+ sev = _subnetwork_change_severity(ch.path)
80
+ elif rtype == "google_container_cluster":
81
+ sev = _gke_change_severity(ch.path, ch.old_value, ch.new_value)
82
+ elif rtype == "google_compute_instance":
83
+ sev = _instance_change_severity(ch.path, ch.old_value, ch.new_value)
84
+ else:
85
+ sev = MEDIUM
86
+ per_path[ch.path] = sev
87
+ overall = max_severity(overall, sev)
88
+ return overall, per_path