stackfix 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. cloudgym/__init__.py +3 -0
  2. cloudgym/benchmark/__init__.py +0 -0
  3. cloudgym/benchmark/dataset.py +188 -0
  4. cloudgym/benchmark/evaluator.py +275 -0
  5. cloudgym/cli.py +61 -0
  6. cloudgym/fixer/__init__.py +1 -0
  7. cloudgym/fixer/cli.py +521 -0
  8. cloudgym/fixer/detector.py +81 -0
  9. cloudgym/fixer/formatter.py +55 -0
  10. cloudgym/fixer/lambda_handler.py +126 -0
  11. cloudgym/fixer/repairer.py +237 -0
  12. cloudgym/generator/__init__.py +0 -0
  13. cloudgym/generator/formatter.py +142 -0
  14. cloudgym/generator/pipeline.py +271 -0
  15. cloudgym/inverter/__init__.py +0 -0
  16. cloudgym/inverter/_cf_injectors.py +705 -0
  17. cloudgym/inverter/_cf_utils.py +202 -0
  18. cloudgym/inverter/_hcl_utils.py +182 -0
  19. cloudgym/inverter/_tf_injectors.py +641 -0
  20. cloudgym/inverter/_yaml_cf.py +84 -0
  21. cloudgym/inverter/agentic.py +90 -0
  22. cloudgym/inverter/engine.py +258 -0
  23. cloudgym/inverter/programmatic.py +95 -0
  24. cloudgym/scraper/__init__.py +0 -0
  25. cloudgym/scraper/aws_samples.py +159 -0
  26. cloudgym/scraper/github.py +238 -0
  27. cloudgym/scraper/registry.py +165 -0
  28. cloudgym/scraper/validator.py +116 -0
  29. cloudgym/taxonomy/__init__.py +10 -0
  30. cloudgym/taxonomy/base.py +102 -0
  31. cloudgym/taxonomy/cloudformation.py +258 -0
  32. cloudgym/taxonomy/terraform.py +274 -0
  33. cloudgym/utils/__init__.py +0 -0
  34. cloudgym/utils/config.py +57 -0
  35. cloudgym/utils/ollama.py +66 -0
  36. cloudgym/validator/__init__.py +0 -0
  37. cloudgym/validator/cloudformation.py +55 -0
  38. cloudgym/validator/opentofu.py +103 -0
  39. cloudgym/validator/terraform.py +115 -0
  40. stackfix-0.1.0.dist-info/METADATA +182 -0
  41. stackfix-0.1.0.dist-info/RECORD +44 -0
  42. stackfix-0.1.0.dist-info/WHEEL +4 -0
  43. stackfix-0.1.0.dist-info/entry_points.txt +3 -0
  44. stackfix-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,238 @@
1
+ """GitHub scraper for Terraform and CloudFormation configurations."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import logging
7
+ import os
8
+ import re
9
+ from dataclasses import dataclass, field
10
+ from pathlib import Path
11
+
12
+ import httpx
13
+
14
+ from cloudgym.utils.config import GOLD_CF_DIR, GOLD_TF_DIR, ScraperConfig
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+ GITHUB_API = "https://api.github.com"
19
+ GITHUB_SEARCH_CODE = f"{GITHUB_API}/search/code"
20
+ GITHUB_SEARCH_REPOS = f"{GITHUB_API}/search/repositories"
21
+
22
+ # Patterns that suggest secrets — skip these files
23
+ SECRET_PATTERNS = re.compile(
24
+ r"(AKIA[0-9A-Z]{16}|aws_secret_access_key|password\s*=\s*\".{8,}\")",
25
+ re.IGNORECASE,
26
+ )
27
+
28
+
29
+ @dataclass
30
+ class ScrapedFile:
31
+ """A single scraped IaC configuration file."""
32
+
33
+ repo_full_name: str
34
+ file_path: str
35
+ content: str
36
+ format: str # "terraform" or "cloudformation"
37
+ sha: str = ""
38
+
39
+
40
+ @dataclass
41
+ class GitHubScraper:
42
+ """Scrapes GitHub for Terraform and CloudFormation configs."""
43
+
44
+ config: ScraperConfig = field(default_factory=ScraperConfig)
45
+ _seen_hashes: set[str] = field(default_factory=set, repr=False)
46
+
47
+ @property
48
+ def _headers(self) -> dict[str, str]:
49
+ token = self.config.github_token or os.environ.get("GITHUB_TOKEN", "")
50
+ headers = {"Accept": "application/vnd.github.v3+json"}
51
+ if token:
52
+ headers["Authorization"] = f"Bearer {token}"
53
+ return headers
54
+
55
+ async def scrape_terraform(self) -> list[ScrapedFile]:
56
+ """Search GitHub for Terraform .tf files."""
57
+ results: list[ScrapedFile] = []
58
+ async with httpx.AsyncClient(timeout=30.0) as client:
59
+ for query in self.config.tf_search_queries:
60
+ files = await self._search_code(
61
+ client,
62
+ query=f"{query} extension:tf",
63
+ fmt="terraform",
64
+ )
65
+ results.extend(files)
66
+ if len(results) >= self.config.max_repos:
67
+ break
68
+ # Respect rate limits
69
+ await asyncio.sleep(2.0)
70
+ return results
71
+
72
+ async def scrape_cloudformation(self) -> list[ScrapedFile]:
73
+ """Search GitHub for CloudFormation templates."""
74
+ results: list[ScrapedFile] = []
75
+ async with httpx.AsyncClient(timeout=30.0) as client:
76
+ for query in self.config.cf_search_queries:
77
+ files = await self._search_code(
78
+ client,
79
+ query=f"{query} extension:yaml",
80
+ fmt="cloudformation",
81
+ )
82
+ results.extend(files)
83
+
84
+ # Also search JSON CF templates
85
+ files = await self._search_code(
86
+ client,
87
+ query=f"{query} extension:json",
88
+ fmt="cloudformation",
89
+ )
90
+ results.extend(files)
91
+
92
+ if len(results) >= self.config.max_repos:
93
+ break
94
+ await asyncio.sleep(2.0)
95
+ return results
96
+
97
+ async def scrape_all(self) -> list[ScrapedFile]:
98
+ """Run all GitHub scraping tasks."""
99
+ tf_files, cf_files = await asyncio.gather(
100
+ self.scrape_terraform(),
101
+ self.scrape_cloudformation(),
102
+ )
103
+ return tf_files + cf_files
104
+
105
+ async def _search_code(
106
+ self,
107
+ client: httpx.AsyncClient,
108
+ query: str,
109
+ fmt: str,
110
+ per_page: int = 30,
111
+ ) -> list[ScrapedFile]:
112
+ """Search GitHub code API and download matching files."""
113
+ results: list[ScrapedFile] = []
114
+
115
+ try:
116
+ resp = await client.get(
117
+ GITHUB_SEARCH_CODE,
118
+ params={"q": query, "per_page": per_page},
119
+ headers=self._headers,
120
+ )
121
+ resp.raise_for_status()
122
+ except httpx.HTTPStatusError as exc:
123
+ if exc.response.status_code == 403:
124
+ logger.warning("GitHub API rate limit hit, pausing")
125
+ await asyncio.sleep(60)
126
+ return results
127
+ logger.error("GitHub search failed: %s", exc)
128
+ return results
129
+
130
+ data = resp.json()
131
+ items = data.get("items", [])
132
+ logger.info("GitHub search '%s' returned %d items", query, len(items))
133
+
134
+ for item in items:
135
+ sha = item.get("sha", "")
136
+ if sha in self._seen_hashes:
137
+ continue
138
+
139
+ raw_url = item.get("html_url", "").replace(
140
+ "github.com", "raw.githubusercontent.com"
141
+ ).replace("/blob/", "/")
142
+
143
+ if not raw_url:
144
+ continue
145
+
146
+ content = await self._download_raw(client, raw_url)
147
+ if content is None:
148
+ continue
149
+
150
+ if not self._passes_filters(content, fmt):
151
+ continue
152
+
153
+ self._seen_hashes.add(sha)
154
+ results.append(
155
+ ScrapedFile(
156
+ repo_full_name=item.get("repository", {}).get("full_name", ""),
157
+ file_path=item.get("path", ""),
158
+ content=content,
159
+ format=fmt,
160
+ sha=sha,
161
+ )
162
+ )
163
+
164
+ return results
165
+
166
+ async def _download_raw(self, client: httpx.AsyncClient, url: str) -> str | None:
167
+ """Download raw file content from GitHub."""
168
+ try:
169
+ resp = await client.get(url, headers=self._headers, follow_redirects=True)
170
+ resp.raise_for_status()
171
+ text = resp.text
172
+ if len(text) > self.config.max_file_size_kb * 1024:
173
+ return None
174
+ return text
175
+ except httpx.HTTPError:
176
+ return None
177
+
178
+ def _passes_filters(self, content: str, fmt: str) -> bool:
179
+ """Check if file content passes quality filters."""
180
+ # Skip files with obvious secrets
181
+ if SECRET_PATTERNS.search(content):
182
+ logger.debug("Skipping file with potential secrets")
183
+ return False
184
+
185
+ # Skip very small files
186
+ if len(content.strip()) < 50:
187
+ return False
188
+
189
+ if fmt == "terraform":
190
+ return self._filter_terraform(content)
191
+ elif fmt == "cloudformation":
192
+ return self._filter_cloudformation(content)
193
+ return False
194
+
195
+ def _filter_terraform(self, content: str) -> bool:
196
+ """Check Terraform-specific quality criteria."""
197
+ resource_count = content.count("resource ")
198
+ data_count = content.count("data ")
199
+ module_count = content.count("module ")
200
+ total = resource_count + data_count + module_count
201
+ return total >= self.config.min_resources
202
+
203
+ def _filter_cloudformation(self, content: str) -> bool:
204
+ """Check CloudFormation-specific quality criteria."""
205
+ has_version = "AWSTemplateFormatVersion" in content
206
+ has_resources = "Resources:" in content or '"Resources"' in content
207
+ if not (has_version or has_resources):
208
+ return False
209
+
210
+ # Count resources roughly
211
+ resource_lines = content.count("Type: AWS::") + content.count('"Type": "AWS::')
212
+ return resource_lines >= self.config.min_resources
213
+
214
+
215
+ async def save_scraped_files(files: list[ScrapedFile]) -> dict[str, int]:
216
+ """Save scraped files to the gold directories. Returns count per format."""
217
+ counts = {"terraform": 0, "cloudformation": 0}
218
+
219
+ for f in files:
220
+ if f.format == "terraform":
221
+ out_dir = GOLD_TF_DIR
222
+ ext = ".tf"
223
+ else:
224
+ out_dir = GOLD_CF_DIR
225
+ ext = ".yaml" if not f.file_path.endswith(".json") else ".json"
226
+
227
+ out_dir.mkdir(parents=True, exist_ok=True)
228
+
229
+ # Use repo name + file path as unique filename
230
+ safe_name = f"{f.repo_full_name}__{f.file_path}".replace("/", "_").replace("\\", "_")
231
+ if not safe_name.endswith(ext):
232
+ safe_name += ext
233
+
234
+ out_path = out_dir / safe_name
235
+ out_path.write_text(f.content, encoding="utf-8")
236
+ counts[f.format] += 1
237
+
238
+ return counts
@@ -0,0 +1,165 @@
1
+ """Terraform Registry module scraper."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import logging
7
+ from dataclasses import dataclass, field
8
+ from pathlib import Path
9
+
10
+ import httpx
11
+
12
+ from cloudgym.utils.config import GOLD_TF_DIR
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+ REGISTRY_API = "https://registry.terraform.io/v1"
17
+
18
+
19
+ @dataclass
20
+ class RegistryModule:
21
+ """A module scraped from the Terraform Registry."""
22
+
23
+ namespace: str
24
+ name: str
25
+ provider: str
26
+ version: str
27
+ source_url: str
28
+ configs: list[tuple[str, str]] = field(default_factory=list) # (filename, content)
29
+
30
+
31
+ @dataclass
32
+ class RegistryScraper:
33
+ """Scrapes verified modules from the Terraform Registry."""
34
+
35
+ max_modules: int = 100
36
+ providers: list[str] = field(default_factory=lambda: ["aws", "azurerm", "google"])
37
+
38
+ async def scrape(self) -> list[RegistryModule]:
39
+ """Fetch verified modules and download their source configs."""
40
+ modules: list[RegistryModule] = []
41
+
42
+ async with httpx.AsyncClient(timeout=30.0) as client:
43
+ for provider in self.providers:
44
+ batch = await self._list_modules(client, provider)
45
+ modules.extend(batch)
46
+ if len(modules) >= self.max_modules:
47
+ break
48
+
49
+ # Download source for each module
50
+ tasks = [self._fetch_module_source(client, m) for m in modules]
51
+ await asyncio.gather(*tasks)
52
+
53
+ return [m for m in modules if m.configs]
54
+
55
+ async def _list_modules(
56
+ self, client: httpx.AsyncClient, provider: str
57
+ ) -> list[RegistryModule]:
58
+ """List verified modules for a given provider."""
59
+ modules = []
60
+ try:
61
+ resp = await client.get(
62
+ f"{REGISTRY_API}/modules",
63
+ params={
64
+ "provider": provider,
65
+ "verified": "true",
66
+ "limit": 20,
67
+ },
68
+ )
69
+ resp.raise_for_status()
70
+ except httpx.HTTPError as exc:
71
+ logger.error("Registry API failed for %s: %s", provider, exc)
72
+ return modules
73
+
74
+ data = resp.json()
75
+ for mod in data.get("modules", []):
76
+ modules.append(
77
+ RegistryModule(
78
+ namespace=mod.get("namespace", ""),
79
+ name=mod.get("name", ""),
80
+ provider=mod.get("provider", ""),
81
+ version=mod.get("version", ""),
82
+ source_url=mod.get("source", ""),
83
+ )
84
+ )
85
+ return modules
86
+
87
+ async def _fetch_module_source(
88
+ self, client: httpx.AsyncClient, module: RegistryModule
89
+ ) -> None:
90
+ """Fetch the download URL and retrieve .tf files from the source."""
91
+ try:
92
+ # Get the download URL from the registry
93
+ resp = await client.get(
94
+ f"{REGISTRY_API}/modules/"
95
+ f"{module.namespace}/{module.name}/{module.provider}/"
96
+ f"{module.version}/download",
97
+ follow_redirects=True,
98
+ )
99
+
100
+ # The download endpoint returns a redirect header with the source archive
101
+ download_url = resp.headers.get("X-Terraform-Get", "")
102
+ if not download_url:
103
+ return
104
+
105
+ # If it's a GitHub source, try to get raw .tf files
106
+ if "github.com" in download_url:
107
+ await self._fetch_github_tf_files(client, download_url, module)
108
+
109
+ except httpx.HTTPError as exc:
110
+ logger.debug("Failed to fetch source for %s/%s: %s", module.namespace, module.name, exc)
111
+
112
+ async def _fetch_github_tf_files(
113
+ self,
114
+ client: httpx.AsyncClient,
115
+ github_url: str,
116
+ module: RegistryModule,
117
+ ) -> None:
118
+ """Fetch .tf files from a GitHub repository URL."""
119
+ # Convert GitHub URL to API URL for contents
120
+ # e.g., https://github.com/org/repo -> api.github.com/repos/org/repo/contents
121
+ parts = github_url.rstrip("/").split("github.com/")
122
+ if len(parts) < 2:
123
+ return
124
+
125
+ repo_path = parts[1].split("?")[0].split("//")[0]
126
+ api_url = f"https://api.github.com/repos/{repo_path}/contents"
127
+
128
+ try:
129
+ resp = await client.get(api_url)
130
+ resp.raise_for_status()
131
+ contents = resp.json()
132
+ except (httpx.HTTPError, ValueError):
133
+ return
134
+
135
+ if not isinstance(contents, list):
136
+ return
137
+
138
+ for item in contents:
139
+ if not isinstance(item, dict):
140
+ continue
141
+ name = item.get("name", "")
142
+ if name.endswith(".tf") and item.get("type") == "file":
143
+ download_url = item.get("download_url", "")
144
+ if download_url:
145
+ try:
146
+ file_resp = await client.get(download_url)
147
+ file_resp.raise_for_status()
148
+ module.configs.append((name, file_resp.text))
149
+ except httpx.HTTPError:
150
+ continue
151
+
152
+
153
+ async def save_registry_modules(modules: list[RegistryModule]) -> int:
154
+ """Save registry module configs to gold directory. Returns file count."""
155
+ count = 0
156
+ GOLD_TF_DIR.mkdir(parents=True, exist_ok=True)
157
+
158
+ for module in modules:
159
+ for filename, content in module.configs:
160
+ safe_name = f"registry__{module.namespace}__{module.name}__{filename}"
161
+ out_path = GOLD_TF_DIR / safe_name
162
+ out_path.write_text(content, encoding="utf-8")
163
+ count += 1
164
+
165
+ return count
@@ -0,0 +1,116 @@
1
+ """Gold instance validator — filters scraped configs to only keep valid ones."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import logging
7
+ from dataclasses import dataclass, field
8
+ from pathlib import Path
9
+
10
+ from cloudgym.utils.config import GOLD_CF_DIR, GOLD_TF_DIR
11
+ from cloudgym.validator import cloudformation as cf_validator
12
+ from cloudgym.validator import terraform as tf_validator
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ @dataclass
18
+ class ValidationStats:
19
+ """Statistics from a gold validation run."""
20
+
21
+ total: int = 0
22
+ valid: int = 0
23
+ invalid: int = 0
24
+ errors: dict[str, list[str]] = field(default_factory=dict)
25
+
26
+ @property
27
+ def pass_rate(self) -> float:
28
+ return self.valid / self.total if self.total > 0 else 0.0
29
+
30
+
31
+ async def validate_gold_terraform(
32
+ directory: Path | None = None,
33
+ concurrency: int = 5,
34
+ ) -> ValidationStats:
35
+ """Validate all Terraform configs in the gold directory.
36
+
37
+ Removes configs that fail validation so only gold instances remain.
38
+ """
39
+ tf_dir = directory or GOLD_TF_DIR
40
+ if not tf_dir.exists():
41
+ logger.warning("Terraform gold directory does not exist: %s", tf_dir)
42
+ return ValidationStats()
43
+
44
+ tf_files = list(tf_dir.glob("*.tf"))
45
+ stats = ValidationStats(total=len(tf_files))
46
+
47
+ semaphore = asyncio.Semaphore(concurrency)
48
+
49
+ async def _validate_one(path: Path) -> None:
50
+ async with semaphore:
51
+ result = await tf_validator.validate(path)
52
+ if result.valid:
53
+ stats.valid += 1
54
+ logger.debug("PASS: %s", path.name)
55
+ else:
56
+ stats.invalid += 1
57
+ stats.errors[path.name] = result.errors
58
+ logger.info("FAIL (removing): %s — %s", path.name, result.errors[:2])
59
+ path.unlink(missing_ok=True)
60
+
61
+ await asyncio.gather(*[_validate_one(f) for f in tf_files])
62
+ return stats
63
+
64
+
65
+ async def validate_gold_cloudformation(
66
+ directory: Path | None = None,
67
+ concurrency: int = 5,
68
+ ) -> ValidationStats:
69
+ """Validate all CloudFormation templates in the gold directory.
70
+
71
+ Removes templates that fail cfn-lint so only gold instances remain.
72
+ """
73
+ cf_dir = directory or GOLD_CF_DIR
74
+ if not cf_dir.exists():
75
+ logger.warning("CloudFormation gold directory does not exist: %s", cf_dir)
76
+ return ValidationStats()
77
+
78
+ cf_files = list(cf_dir.glob("*.yaml")) + list(cf_dir.glob("*.json"))
79
+ stats = ValidationStats(total=len(cf_files))
80
+
81
+ semaphore = asyncio.Semaphore(concurrency)
82
+
83
+ async def _validate_one(path: Path) -> None:
84
+ async with semaphore:
85
+ result = await cf_validator.validate(path)
86
+ if result.valid:
87
+ stats.valid += 1
88
+ logger.debug("PASS: %s", path.name)
89
+ else:
90
+ stats.invalid += 1
91
+ stats.errors[path.name] = result.errors
92
+ logger.info("FAIL (removing): %s — %s", path.name, result.errors[:2])
93
+ path.unlink(missing_ok=True)
94
+
95
+ await asyncio.gather(*[_validate_one(f) for f in cf_files])
96
+ return stats
97
+
98
+
99
+ async def validate_all_gold() -> dict[str, ValidationStats]:
100
+ """Validate all gold configs across formats."""
101
+ tf_stats, cf_stats = await asyncio.gather(
102
+ validate_gold_terraform(),
103
+ validate_gold_cloudformation(),
104
+ )
105
+
106
+ logger.info(
107
+ "Gold validation complete — TF: %d/%d pass (%.0f%%), CF: %d/%d pass (%.0f%%)",
108
+ tf_stats.valid,
109
+ tf_stats.total,
110
+ tf_stats.pass_rate * 100,
111
+ cf_stats.valid,
112
+ cf_stats.total,
113
+ cf_stats.pass_rate * 100,
114
+ )
115
+
116
+ return {"terraform": tf_stats, "cloudformation": cf_stats}
@@ -0,0 +1,10 @@
1
+ """Fault taxonomy for IaC environment inversion.
2
+
3
+ Importing this package auto-registers all fault types in the global REGISTRY.
4
+ """
5
+
6
+ from cloudgym.taxonomy.base import REGISTRY # noqa: F401
7
+
8
+ # Import submodules to trigger fault registration
9
+ import cloudgym.taxonomy.terraform # noqa: F401
10
+ import cloudgym.taxonomy.cloudformation # noqa: F401
@@ -0,0 +1,102 @@
1
+ """Fault taxonomy base definitions for IaC environment inversion."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from enum import Enum, auto
7
+ from typing import TYPE_CHECKING
8
+
9
+ if TYPE_CHECKING:
10
+ from collections.abc import Callable
11
+
12
+
13
+ class FaultCategory(Enum):
14
+ """High-level categories of IaC faults."""
15
+
16
+ SYNTACTIC = auto()
17
+ REFERENCE = auto()
18
+ SEMANTIC = auto()
19
+ DEPENDENCY = auto()
20
+ PROVIDER = auto()
21
+ SECURITY = auto()
22
+ CROSS_RESOURCE = auto()
23
+ INTRINSIC = auto() # CloudFormation-specific (intrinsic function faults)
24
+
25
+
26
+ class IaCFormat(Enum):
27
+ """Supported Infrastructure-as-Code formats."""
28
+
29
+ TERRAFORM = "terraform"
30
+ CLOUDFORMATION = "cloudformation"
31
+ OPENTOFU = "opentofu"
32
+
33
+
34
+ class Severity(Enum):
35
+ """Fault severity / difficulty level."""
36
+
37
+ LOW = "low" # Obvious syntax errors, easy to spot
38
+ MEDIUM = "medium" # Requires understanding of resource relationships
39
+ HIGH = "high" # Subtle semantic or cross-resource issues
40
+
41
+
42
+ @dataclass(frozen=True)
43
+ class FaultType:
44
+ """A specific type of fault that can be injected into IaC configs.
45
+
46
+ Each FaultType defines a category of breakage (e.g., "missing closing brace")
47
+ and carries metadata for taxonomy analysis and benchmark stratification.
48
+ """
49
+
50
+ name: str
51
+ category: FaultCategory
52
+ description: str
53
+ severity: Severity
54
+ applicable_formats: frozenset[IaCFormat]
55
+ example_error: str = ""
56
+ tags: frozenset[str] = field(default_factory=frozenset)
57
+
58
+ @property
59
+ def id(self) -> str:
60
+ """Short identifier: category.name (e.g., SYNTACTIC.missing_brace)."""
61
+ return f"{self.category.name}.{self.name}"
62
+
63
+
64
+ @dataclass
65
+ class FaultInjection:
66
+ """Record of a single fault injection applied to a config."""
67
+
68
+ fault_type: FaultType
69
+ original_snippet: str
70
+ modified_snippet: str
71
+ location: str # file path or line range description
72
+ description: str # human-readable explanation of what was changed
73
+
74
+
75
+ @dataclass
76
+ class FaultRegistry:
77
+ """Central registry of all known fault types."""
78
+
79
+ _faults: dict[str, FaultType] = field(default_factory=dict)
80
+
81
+ def register(self, fault: FaultType) -> FaultType:
82
+ self._faults[fault.id] = fault
83
+ return fault
84
+
85
+ def get(self, fault_id: str) -> FaultType | None:
86
+ return self._faults.get(fault_id)
87
+
88
+ def list_by_category(self, category: FaultCategory) -> list[FaultType]:
89
+ return [f for f in self._faults.values() if f.category == category]
90
+
91
+ def list_by_format(self, fmt: IaCFormat) -> list[FaultType]:
92
+ return [f for f in self._faults.values() if fmt in f.applicable_formats]
93
+
94
+ def all(self) -> list[FaultType]:
95
+ return list(self._faults.values())
96
+
97
+ def __len__(self) -> int:
98
+ return len(self._faults)
99
+
100
+
101
+ # Global registry instance
102
+ REGISTRY = FaultRegistry()