stackfix 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. cloudgym/__init__.py +3 -0
  2. cloudgym/benchmark/__init__.py +0 -0
  3. cloudgym/benchmark/dataset.py +188 -0
  4. cloudgym/benchmark/evaluator.py +275 -0
  5. cloudgym/cli.py +61 -0
  6. cloudgym/fixer/__init__.py +1 -0
  7. cloudgym/fixer/cli.py +521 -0
  8. cloudgym/fixer/detector.py +81 -0
  9. cloudgym/fixer/formatter.py +55 -0
  10. cloudgym/fixer/lambda_handler.py +126 -0
  11. cloudgym/fixer/repairer.py +237 -0
  12. cloudgym/generator/__init__.py +0 -0
  13. cloudgym/generator/formatter.py +142 -0
  14. cloudgym/generator/pipeline.py +271 -0
  15. cloudgym/inverter/__init__.py +0 -0
  16. cloudgym/inverter/_cf_injectors.py +705 -0
  17. cloudgym/inverter/_cf_utils.py +202 -0
  18. cloudgym/inverter/_hcl_utils.py +182 -0
  19. cloudgym/inverter/_tf_injectors.py +641 -0
  20. cloudgym/inverter/_yaml_cf.py +84 -0
  21. cloudgym/inverter/agentic.py +90 -0
  22. cloudgym/inverter/engine.py +258 -0
  23. cloudgym/inverter/programmatic.py +95 -0
  24. cloudgym/scraper/__init__.py +0 -0
  25. cloudgym/scraper/aws_samples.py +159 -0
  26. cloudgym/scraper/github.py +238 -0
  27. cloudgym/scraper/registry.py +165 -0
  28. cloudgym/scraper/validator.py +116 -0
  29. cloudgym/taxonomy/__init__.py +10 -0
  30. cloudgym/taxonomy/base.py +102 -0
  31. cloudgym/taxonomy/cloudformation.py +258 -0
  32. cloudgym/taxonomy/terraform.py +274 -0
  33. cloudgym/utils/__init__.py +0 -0
  34. cloudgym/utils/config.py +57 -0
  35. cloudgym/utils/ollama.py +66 -0
  36. cloudgym/validator/__init__.py +0 -0
  37. cloudgym/validator/cloudformation.py +55 -0
  38. cloudgym/validator/opentofu.py +103 -0
  39. cloudgym/validator/terraform.py +115 -0
  40. stackfix-0.1.0.dist-info/METADATA +182 -0
  41. stackfix-0.1.0.dist-info/RECORD +44 -0
  42. stackfix-0.1.0.dist-info/WHEEL +4 -0
  43. stackfix-0.1.0.dist-info/entry_points.txt +3 -0
  44. stackfix-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,84 @@
1
+ """Custom YAML loader/dumper for CloudFormation templates.
2
+
3
+ CloudFormation uses custom YAML tags like !Ref, !GetAtt, !Sub, etc.
4
+ that yaml.safe_load doesn't handle. This module provides a loader
5
+ that preserves these tags as dicts.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import yaml
11
+
12
+
13
+ class CFLoader(yaml.SafeLoader):
14
+ """YAML loader that handles CloudFormation intrinsic functions."""
15
+ pass
16
+
17
+
18
+ class CFDumper(yaml.SafeDumper):
19
+ """YAML dumper that outputs CloudFormation intrinsic functions."""
20
+ pass
21
+
22
+
23
+ # CloudFormation intrinsic function tags
24
+ _CF_TAGS = [
25
+ "!Ref", "!GetAtt", "!Sub", "!Join", "!Select", "!Split",
26
+ "!If", "!Equals", "!Not", "!And", "!Or", "!Condition",
27
+ "!FindInMap", "!GetAZs", "!ImportValue", "!Base64",
28
+ "!Cidr", "!Transform",
29
+ ]
30
+
31
+
32
+ def _cf_constructor(tag: str):
33
+ """Create a constructor that converts a CF tag to a dict."""
34
+ fn_name = tag.lstrip("!")
35
+
36
+ def constructor(loader, node):
37
+ if isinstance(node, yaml.ScalarNode):
38
+ value = loader.construct_scalar(node)
39
+ return {fn_name: value}
40
+ elif isinstance(node, yaml.SequenceNode):
41
+ value = loader.construct_sequence(node, deep=True)
42
+ return {fn_name: value}
43
+ elif isinstance(node, yaml.MappingNode):
44
+ value = loader.construct_mapping(node, deep=True)
45
+ return {fn_name: value}
46
+ return {fn_name: None}
47
+
48
+ return constructor
49
+
50
+
51
+ def _cf_representer(tag: str):
52
+ """Create a representer that converts a dict back to a CF tag."""
53
+ fn_name = tag.lstrip("!")
54
+
55
+ def representer(dumper, data):
56
+ value = data[fn_name]
57
+ if isinstance(value, str):
58
+ return dumper.represent_scalar(tag, value)
59
+ elif isinstance(value, list):
60
+ return dumper.represent_sequence(tag, value)
61
+ elif isinstance(value, dict):
62
+ return dumper.represent_mapping(tag, value)
63
+ return dumper.represent_scalar(tag, str(value))
64
+
65
+ return representer
66
+
67
+
68
+ # Register constructors and representers for all CF tags
69
+ for _tag in _CF_TAGS:
70
+ _fn_name = _tag.lstrip("!")
71
+ CFLoader.add_constructor(_tag, _cf_constructor(_tag))
72
+
73
+
74
+ def cf_load(text: str) -> dict:
75
+ """Load a CloudFormation YAML template, handling intrinsic functions."""
76
+ return yaml.load(text, Loader=CFLoader) or {}
77
+
78
+
79
+ def cf_dump(template: dict) -> str:
80
+ """Dump a CloudFormation template dict back to YAML."""
81
+ # For simplicity, use regular yaml.dump with default_flow_style=False
82
+ # This won't preserve the !Tag shorthand, but will produce valid YAML
83
+ # with Fn:: prefix style that CloudFormation accepts
84
+ return yaml.dump(template, default_flow_style=False, sort_keys=False)
@@ -0,0 +1,90 @@
1
+ """LLM-based (agentic) fault injection via Ollama.
2
+
3
+ Sends gold config + fault category prompt to local LLM with quality gates:
4
+ - Similarity check (difflib)
5
+ - Diff size check (< 20% lines changed)
6
+ - Validation check (must fail terraform validate / cfn-lint)
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import difflib
12
+ import logging
13
+
14
+ from cloudgym.utils.config import InverterConfig
15
+ from cloudgym.utils.ollama import OllamaClient
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ async def inject_fault_agentic(
21
+ config_content: str,
22
+ fault_category: str,
23
+ iac_format: str,
24
+ config: InverterConfig | None = None,
25
+ ) -> str | None:
26
+ """Use a local LLM to inject a realistic fault into an IaC config.
27
+
28
+ Quality gates:
29
+ - Similarity >= 0.7 (reject if LLM rewrote everything)
30
+ - < 20% of lines changed
31
+ - Output must be non-empty
32
+
33
+ Returns the broken config content, or None if quality gates fail.
34
+ """
35
+ if config is None:
36
+ config = InverterConfig()
37
+
38
+ client = OllamaClient(config=config)
39
+
40
+ try:
41
+ broken = await client.inject_fault(config_content, fault_category, iac_format)
42
+ except Exception:
43
+ logger.exception("Ollama inject_fault failed")
44
+ return None
45
+
46
+ if not broken or not broken.strip():
47
+ logger.debug("LLM returned empty response")
48
+ return None
49
+
50
+ # Strip markdown fences if present
51
+ broken = _strip_fences(broken)
52
+
53
+ # Quality gate 1: similarity check
54
+ similarity = difflib.SequenceMatcher(None, config_content, broken).ratio()
55
+ if similarity < 0.7:
56
+ logger.debug("LLM output too dissimilar (%.2f < 0.7)", similarity)
57
+ return None
58
+
59
+ # Quality gate 2: diff size check (< 20% of lines changed)
60
+ orig_lines = config_content.splitlines()
61
+ broken_lines = broken.splitlines()
62
+ diff = list(difflib.unified_diff(orig_lines, broken_lines, lineterm=""))
63
+ changed_lines = sum(1 for line in diff if line.startswith('+') or line.startswith('-'))
64
+ # Subtract header lines (--- and +++)
65
+ changed_lines = max(0, changed_lines - 2)
66
+ total_lines = max(len(orig_lines), 1)
67
+
68
+ if changed_lines / total_lines > 0.2:
69
+ logger.debug(
70
+ "LLM changed too many lines (%d/%d = %.0f%%)",
71
+ changed_lines, total_lines, 100 * changed_lines / total_lines,
72
+ )
73
+ return None
74
+
75
+ # Quality gate 3: must actually be different
76
+ if broken.strip() == config_content.strip():
77
+ logger.debug("LLM output identical to input")
78
+ return None
79
+
80
+ return broken
81
+
82
+
83
+ def _strip_fences(text: str) -> str:
84
+ """Remove markdown code fences from LLM output."""
85
+ lines = text.strip().splitlines()
86
+ if lines and lines[0].startswith("```"):
87
+ lines = lines[1:]
88
+ if lines and lines[-1].strip() == "```":
89
+ lines = lines[:-1]
90
+ return "\n".join(lines)
@@ -0,0 +1,258 @@
1
+ """Inversion engine — orchestrates fault injection and validates breaks.
2
+
3
+ Reads gold config, selects fault type(s), injects faults, validates that
4
+ the config is actually broken, and returns structured results.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import asyncio
10
+ import logging
11
+ import tempfile
12
+ from dataclasses import dataclass, field
13
+ from pathlib import Path
14
+
15
+ from cloudgym.inverter.programmatic import inject_fault
16
+ from cloudgym.taxonomy import REGISTRY
17
+ from cloudgym.taxonomy.base import FaultInjection, FaultType
18
+ from cloudgym.validator.terraform import ValidationResult
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ @dataclass
24
+ class InversionResult:
25
+ """Result of a single fault injection attempt."""
26
+
27
+ gold_config: str
28
+ broken_config: str
29
+ fault_type: FaultType
30
+ injection: FaultInjection
31
+ validation_result: ValidationResult
32
+ attempts: int = 1
33
+ gold_path: str = ""
34
+ iac_format: str = ""
35
+
36
+
37
+ def _detect_format(path: Path) -> str:
38
+ """Detect IaC format from file extension."""
39
+ suffix = path.suffix.lower()
40
+ if suffix == ".tf":
41
+ return "terraform"
42
+ if suffix in (".yaml", ".yml", ".json", ".template"):
43
+ return "cloudformation"
44
+ return "terraform" # default
45
+
46
+
47
+ def _get_applicable_faults(iac_format: str) -> list[FaultType]:
48
+ """Get fault types applicable to the given format."""
49
+ from cloudgym.taxonomy.base import IaCFormat
50
+
51
+ fmt_map = {
52
+ "terraform": IaCFormat.TERRAFORM,
53
+ "opentofu": IaCFormat.OPENTOFU,
54
+ "cloudformation": IaCFormat.CLOUDFORMATION,
55
+ }
56
+ iac_fmt = fmt_map.get(iac_format)
57
+ if iac_fmt is None:
58
+ return []
59
+ return REGISTRY.list_by_format(iac_fmt)
60
+
61
+
62
+ async def _validate_broken(
63
+ broken_content: str, iac_format: str
64
+ ) -> ValidationResult:
65
+ """Write broken config to temp file and validate."""
66
+ suffix = ".tf" if iac_format in ("terraform", "opentofu") else ".yaml"
67
+ tmpdir = Path(tempfile.mkdtemp(prefix="cloudgym_inv_"))
68
+ tmp_file = tmpdir / f"broken{suffix}"
69
+ tmp_file.write_text(broken_content)
70
+
71
+ try:
72
+ if iac_format in ("terraform", "opentofu"):
73
+ from cloudgym.validator.terraform import validate
74
+ return await validate(tmp_file)
75
+ else:
76
+ from cloudgym.validator.cloudformation import validate
77
+ return await validate(tmp_file)
78
+ finally:
79
+ import shutil
80
+ shutil.rmtree(tmpdir, ignore_errors=True)
81
+
82
+
83
+ class InversionEngine:
84
+ """Orchestrates fault injection with validation feedback loop."""
85
+
86
+ def __init__(
87
+ self,
88
+ max_retries: int = 3,
89
+ concurrency: int = 5,
90
+ skip_validation: bool = False,
91
+ ):
92
+ self.max_retries = max_retries
93
+ self._semaphore = asyncio.Semaphore(concurrency)
94
+ self.skip_validation = skip_validation
95
+
96
+ async def invert(
97
+ self,
98
+ gold_config_path: str | Path,
99
+ fault_types: list[FaultType] | None = None,
100
+ mode: str = "programmatic",
101
+ ) -> InversionResult | None:
102
+ """Inject a fault into a gold config and validate the break.
103
+
104
+ Args:
105
+ gold_config_path: Path to the gold (valid) config file.
106
+ fault_types: Specific fault types to try. If None, auto-selects.
107
+ mode: "programmatic" or "agentic".
108
+
109
+ Returns:
110
+ InversionResult if successful, None if all attempts fail.
111
+ """
112
+ async with self._semaphore:
113
+ path = Path(gold_config_path)
114
+ iac_format = _detect_format(path)
115
+ config_content = path.read_text()
116
+
117
+ if fault_types is None:
118
+ fault_types = _get_applicable_faults(iac_format)
119
+
120
+ if not fault_types:
121
+ logger.warning("No applicable faults for %s", iac_format)
122
+ return None
123
+
124
+ for attempt in range(self.max_retries):
125
+ fault_type = fault_types[attempt % len(fault_types)]
126
+
127
+ if mode == "agentic":
128
+ result = await self._try_agentic(
129
+ config_content, fault_type, iac_format
130
+ )
131
+ else:
132
+ result = await self._try_programmatic(
133
+ config_content, fault_type, iac_format
134
+ )
135
+
136
+ if result is None:
137
+ continue
138
+
139
+ broken_content, injection = result
140
+
141
+ # In skip_validation mode, trust the injector
142
+ if self.skip_validation:
143
+ val_result = ValidationResult(
144
+ valid=False,
145
+ errors=[fault_type.example_error or f"Injected {fault_type.id}"],
146
+ )
147
+ return InversionResult(
148
+ gold_config=config_content,
149
+ broken_config=broken_content,
150
+ fault_type=fault_type,
151
+ injection=injection,
152
+ validation_result=val_result,
153
+ attempts=attempt + 1,
154
+ gold_path=str(path),
155
+ iac_format=iac_format,
156
+ )
157
+
158
+ # Validate that the broken config actually fails
159
+ val_result = await _validate_broken(broken_content, iac_format)
160
+
161
+ if not val_result.valid or val_result.errors:
162
+ return InversionResult(
163
+ gold_config=config_content,
164
+ broken_config=broken_content,
165
+ fault_type=fault_type,
166
+ injection=injection,
167
+ validation_result=val_result,
168
+ attempts=attempt + 1,
169
+ gold_path=str(path),
170
+ iac_format=iac_format,
171
+ )
172
+
173
+ # For security faults, check if warnings increased
174
+ if fault_type.category.name == "SECURITY":
175
+ gold_val = await _validate_broken(config_content, iac_format)
176
+ if len(val_result.warnings) > len(gold_val.warnings):
177
+ return InversionResult(
178
+ gold_config=config_content,
179
+ broken_config=broken_content,
180
+ fault_type=fault_type,
181
+ injection=injection,
182
+ validation_result=val_result,
183
+ attempts=attempt + 1,
184
+ gold_path=str(path),
185
+ iac_format=iac_format,
186
+ )
187
+
188
+ logger.debug(
189
+ "Attempt %d: fault %s did not break validation for %s",
190
+ attempt + 1, fault_type.id, path.name,
191
+ )
192
+
193
+ return None
194
+
195
+ async def _try_programmatic(
196
+ self,
197
+ config_content: str,
198
+ fault_type: FaultType,
199
+ iac_format: str,
200
+ ) -> tuple[str, FaultInjection] | None:
201
+ """Try programmatic fault injection."""
202
+ return await inject_fault(config_content, fault_type, iac_format)
203
+
204
+ async def _try_agentic(
205
+ self,
206
+ config_content: str,
207
+ fault_type: FaultType,
208
+ iac_format: str,
209
+ ) -> tuple[str, FaultInjection] | None:
210
+ """Try agentic (LLM) fault injection."""
211
+ from cloudgym.inverter.agentic import inject_fault_agentic
212
+
213
+ broken = await inject_fault_agentic(
214
+ config_content, fault_type.category.name, iac_format
215
+ )
216
+ if broken is None:
217
+ return None
218
+
219
+ injection = FaultInjection(
220
+ fault_type=fault_type,
221
+ original_snippet=config_content[:80],
222
+ modified_snippet=broken[:80],
223
+ location="agentic (full config)",
224
+ description=f"LLM-injected {fault_type.category.name} fault",
225
+ )
226
+ return broken, injection
227
+
228
+
229
+ # Convenience function matching original stub signature
230
+ async def invert(
231
+ gold_config_path: str,
232
+ fault_types: list[str] | None = None,
233
+ mode: str = "programmatic",
234
+ ) -> dict | None:
235
+ """Orchestrate fault injection on a gold config.
236
+
237
+ Returns a dict with original config, broken config, fault types applied,
238
+ and validation errors.
239
+ """
240
+ engine = InversionEngine()
241
+
242
+ ft_objects = None
243
+ if fault_types:
244
+ ft_objects = [REGISTRY.get(fid) for fid in fault_types]
245
+ ft_objects = [ft for ft in ft_objects if ft is not None]
246
+
247
+ result = await engine.invert(gold_config_path, ft_objects, mode)
248
+ if result is None:
249
+ return None
250
+
251
+ return {
252
+ "gold_config": result.gold_config,
253
+ "broken_config": result.broken_config,
254
+ "fault_type": result.fault_type.id,
255
+ "errors": result.validation_result.errors,
256
+ "warnings": result.validation_result.warnings,
257
+ "attempts": result.attempts,
258
+ }
@@ -0,0 +1,95 @@
1
+ """Programmatic (rule-based) fault injection for IaC configs.
2
+
3
+ Routes fault injection requests to the appropriate TF or CF injector function
4
+ based on the fault type and IaC format.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ import logging
11
+
12
+ from cloudgym.inverter._cf_injectors import CF_INJECTOR_REGISTRY
13
+ from cloudgym.inverter._tf_injectors import TF_INJECTOR_REGISTRY
14
+ from cloudgym.taxonomy.base import FaultInjection, FaultType
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+ # Combined registry
19
+ INJECTOR_REGISTRY: dict[str, dict] = {
20
+ "terraform": TF_INJECTOR_REGISTRY,
21
+ "opentofu": TF_INJECTOR_REGISTRY, # Same injectors as TF
22
+ "cloudformation": CF_INJECTOR_REGISTRY,
23
+ }
24
+
25
+
26
+ def _parse_config(content: str, iac_format: str) -> dict | None:
27
+ """Parse config content into a dict for structural analysis."""
28
+ if iac_format in ("terraform", "opentofu"):
29
+ try:
30
+ import hcl2
31
+ import io
32
+ return hcl2.load(io.StringIO(content))
33
+ except Exception:
34
+ return {}
35
+ else:
36
+ # CloudFormation — try YAML then JSON
37
+ from cloudgym.inverter._yaml_cf import cf_load
38
+ try:
39
+ return cf_load(content)
40
+ except Exception:
41
+ try:
42
+ return json.loads(content)
43
+ except json.JSONDecodeError:
44
+ return {}
45
+
46
+
47
+ async def inject_fault(
48
+ config_content: str,
49
+ fault_type: FaultType,
50
+ iac_format: str,
51
+ ) -> tuple[str, FaultInjection] | None:
52
+ """Inject a specific fault into an IaC config.
53
+
54
+ Args:
55
+ config_content: The original (gold) config content.
56
+ fault_type: The type of fault to inject.
57
+ iac_format: One of "terraform", "cloudformation", "opentofu".
58
+
59
+ Returns:
60
+ Tuple of (broken_config_content, injection_record) or None if fault
61
+ is not applicable to this config.
62
+ """
63
+ registry = INJECTOR_REGISTRY.get(iac_format)
64
+ if registry is None:
65
+ logger.warning("No injector registry for format: %s", iac_format)
66
+ return None
67
+
68
+ injector_fn = registry.get(fault_type.id)
69
+ if injector_fn is None:
70
+ logger.debug("No injector for fault %s in format %s", fault_type.id, iac_format)
71
+ return None
72
+
73
+ parsed = _parse_config(config_content, iac_format)
74
+ if parsed is None:
75
+ logger.warning("Failed to parse config for format %s", iac_format)
76
+ return None
77
+
78
+ try:
79
+ result = injector_fn(config_content, parsed)
80
+ except Exception:
81
+ logger.exception("Injector %s raised an exception", fault_type.id)
82
+ return None
83
+
84
+ if result is None:
85
+ return None
86
+
87
+ broken_content, injection = result
88
+ injection.fault_type = fault_type
89
+
90
+ # Verify the injection actually changed something
91
+ if broken_content == config_content:
92
+ logger.debug("Injector %s produced no change", fault_type.id)
93
+ return None
94
+
95
+ return broken_content, injection
File without changes
@@ -0,0 +1,159 @@
1
+ """AWS CloudFormation sample template scraper."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import logging
7
+ from dataclasses import dataclass, field
8
+ from pathlib import Path
9
+
10
+ import httpx
11
+
12
+ from cloudgym.utils.config import GOLD_CF_DIR
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+ # Official AWS CF sample repos on GitHub
17
+ AWS_CF_REPOS = [
18
+ "aws-cloudformation/aws-cloudformation-templates",
19
+ "awslabs/aws-cloudformation-templates",
20
+ "aws-samples/aws-cloudformation-samples",
21
+ ]
22
+
23
+
24
+ @dataclass
25
+ class AWSTemplateFile:
26
+ """A scraped AWS CloudFormation sample template."""
27
+
28
+ repo: str
29
+ path: str
30
+ content: str
31
+
32
+
33
+ @dataclass
34
+ class AWSSamplesScraper:
35
+ """Scrapes AWS CloudFormation sample templates from official repos."""
36
+
37
+ max_files: int = 200
38
+
39
+ async def scrape(self) -> list[AWSTemplateFile]:
40
+ """Fetch CF templates from AWS sample repositories."""
41
+ results: list[AWSTemplateFile] = []
42
+
43
+ async with httpx.AsyncClient(timeout=30.0) as client:
44
+ for repo in AWS_CF_REPOS:
45
+ files = await self._scrape_repo(client, repo)
46
+ results.extend(files)
47
+ if len(results) >= self.max_files:
48
+ break
49
+ await asyncio.sleep(1.0)
50
+
51
+ return results[: self.max_files]
52
+
53
+ async def _scrape_repo(
54
+ self,
55
+ client: httpx.AsyncClient,
56
+ repo: str,
57
+ ) -> list[AWSTemplateFile]:
58
+ """Recursively scrape CF templates from a GitHub repo."""
59
+ results: list[AWSTemplateFile] = []
60
+ await self._walk_contents(client, repo, "", results)
61
+ return results
62
+
63
+ async def _walk_contents(
64
+ self,
65
+ client: httpx.AsyncClient,
66
+ repo: str,
67
+ path: str,
68
+ results: list[AWSTemplateFile],
69
+ depth: int = 0,
70
+ ) -> None:
71
+ """Walk repo contents recursively, collecting CF templates."""
72
+ if depth > 3 or len(results) >= self.max_files:
73
+ return
74
+
75
+ api_url = f"https://api.github.com/repos/{repo}/contents/{path}"
76
+ try:
77
+ resp = await client.get(api_url)
78
+ resp.raise_for_status()
79
+ items = resp.json()
80
+ except (httpx.HTTPError, ValueError) as exc:
81
+ logger.debug("Failed to list %s/%s: %s", repo, path, exc)
82
+ return
83
+
84
+ if not isinstance(items, list):
85
+ return
86
+
87
+ dirs = []
88
+ download_tasks = []
89
+
90
+ for item in items:
91
+ if not isinstance(item, dict):
92
+ continue
93
+ name = item.get("name", "")
94
+ item_type = item.get("type", "")
95
+ item_path = item.get("path", "")
96
+
97
+ if item_type == "dir":
98
+ dirs.append(item_path)
99
+ elif item_type == "file" and self._is_cf_template(name):
100
+ download_url = item.get("download_url", "")
101
+ if download_url:
102
+ download_tasks.append(
103
+ self._download_template(client, repo, item_path, download_url)
104
+ )
105
+
106
+ # Download files in parallel
107
+ templates = await asyncio.gather(*download_tasks)
108
+ results.extend(t for t in templates if t is not None)
109
+
110
+ # Recurse into directories
111
+ for d in dirs:
112
+ await self._walk_contents(client, repo, d, results, depth + 1)
113
+ await asyncio.sleep(0.5) # Rate limiting
114
+
115
+ def _is_cf_template(self, filename: str) -> bool:
116
+ """Check if a filename looks like a CloudFormation template."""
117
+ lower = filename.lower()
118
+ cf_hints = ("template", "cfn", "cloudformation", "stack")
119
+ is_yaml_json = lower.endswith((".yaml", ".yml", ".json"))
120
+ has_hint = any(h in lower for h in cf_hints)
121
+ return is_yaml_json and has_hint
122
+
123
+ async def _download_template(
124
+ self,
125
+ client: httpx.AsyncClient,
126
+ repo: str,
127
+ path: str,
128
+ url: str,
129
+ ) -> AWSTemplateFile | None:
130
+ """Download and verify a single CF template."""
131
+ try:
132
+ resp = await client.get(url, follow_redirects=True)
133
+ resp.raise_for_status()
134
+ content = resp.text
135
+ except httpx.HTTPError:
136
+ return None
137
+
138
+ # Quick check: does it look like a CF template?
139
+ if "AWSTemplateFormatVersion" not in content and "Resources" not in content:
140
+ return None
141
+
142
+ return AWSTemplateFile(repo=repo, path=path, content=content)
143
+
144
+
145
+ async def save_aws_samples(files: list[AWSTemplateFile]) -> int:
146
+ """Save AWS sample templates to gold directory. Returns file count."""
147
+ count = 0
148
+ GOLD_CF_DIR.mkdir(parents=True, exist_ok=True)
149
+
150
+ for f in files:
151
+ safe_name = f"{f.repo}__{f.path}".replace("/", "_").replace("\\", "_")
152
+ ext = ".yaml" if not f.path.endswith(".json") else ".json"
153
+ if not safe_name.endswith(ext):
154
+ safe_name += ext
155
+ out_path = GOLD_CF_DIR / safe_name
156
+ out_path.write_text(f.content, encoding="utf-8")
157
+ count += 1
158
+
159
+ return count