PyPI - stackfix - Versions diffs - 0.1.0__py3-none-any.whl - Mend

stackfix 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

cloudgym/__init__.py +3 -0
cloudgym/benchmark/__init__.py +0 -0
cloudgym/benchmark/dataset.py +188 -0
cloudgym/benchmark/evaluator.py +275 -0
cloudgym/cli.py +61 -0
cloudgym/fixer/__init__.py +1 -0
cloudgym/fixer/cli.py +521 -0
cloudgym/fixer/detector.py +81 -0
cloudgym/fixer/formatter.py +55 -0
cloudgym/fixer/lambda_handler.py +126 -0
cloudgym/fixer/repairer.py +237 -0
cloudgym/generator/__init__.py +0 -0
cloudgym/generator/formatter.py +142 -0
cloudgym/generator/pipeline.py +271 -0
cloudgym/inverter/__init__.py +0 -0
cloudgym/inverter/_cf_injectors.py +705 -0
cloudgym/inverter/_cf_utils.py +202 -0
cloudgym/inverter/_hcl_utils.py +182 -0
cloudgym/inverter/_tf_injectors.py +641 -0
cloudgym/inverter/_yaml_cf.py +84 -0
cloudgym/inverter/agentic.py +90 -0
cloudgym/inverter/engine.py +258 -0
cloudgym/inverter/programmatic.py +95 -0
cloudgym/scraper/__init__.py +0 -0
cloudgym/scraper/aws_samples.py +159 -0
cloudgym/scraper/github.py +238 -0
cloudgym/scraper/registry.py +165 -0
cloudgym/scraper/validator.py +116 -0
cloudgym/taxonomy/__init__.py +10 -0
cloudgym/taxonomy/base.py +102 -0
cloudgym/taxonomy/cloudformation.py +258 -0
cloudgym/taxonomy/terraform.py +274 -0
cloudgym/utils/__init__.py +0 -0
cloudgym/utils/config.py +57 -0
cloudgym/utils/ollama.py +66 -0
cloudgym/validator/__init__.py +0 -0
cloudgym/validator/cloudformation.py +55 -0
cloudgym/validator/opentofu.py +103 -0
cloudgym/validator/terraform.py +115 -0
stackfix-0.1.0.dist-info/METADATA +182 -0
stackfix-0.1.0.dist-info/RECORD +44 -0
stackfix-0.1.0.dist-info/WHEEL +4 -0
stackfix-0.1.0.dist-info/entry_points.txt +3 -0
stackfix-0.1.0.dist-info/licenses/LICENSE +21 -0

cloudgym/inverter/_yaml_cf.py ADDED Viewed

@@ -0,0 +1,84 @@
+"""Custom YAML loader/dumper for CloudFormation templates.
+CloudFormation uses custom YAML tags like !Ref, !GetAtt, !Sub, etc.
+that yaml.safe_load doesn't handle. This module provides a loader
+that preserves these tags as dicts.
+"""
+from __future__ import annotations
+import yaml
+class CFLoader(yaml.SafeLoader):
+    """YAML loader that handles CloudFormation intrinsic functions."""
+    pass
+class CFDumper(yaml.SafeDumper):
+    """YAML dumper that outputs CloudFormation intrinsic functions."""
+    pass
+# CloudFormation intrinsic function tags
+_CF_TAGS = [
+    "!Ref", "!GetAtt", "!Sub", "!Join", "!Select", "!Split",
+    "!If", "!Equals", "!Not", "!And", "!Or", "!Condition",
+    "!FindInMap", "!GetAZs", "!ImportValue", "!Base64",
+    "!Cidr", "!Transform",
+]
+def _cf_constructor(tag: str):
+    """Create a constructor that converts a CF tag to a dict."""
+    fn_name = tag.lstrip("!")
+    def constructor(loader, node):
+        if isinstance(node, yaml.ScalarNode):
+            value = loader.construct_scalar(node)
+            return {fn_name: value}
+        elif isinstance(node, yaml.SequenceNode):
+            value = loader.construct_sequence(node, deep=True)
+            return {fn_name: value}
+        elif isinstance(node, yaml.MappingNode):
+            value = loader.construct_mapping(node, deep=True)
+            return {fn_name: value}
+        return {fn_name: None}
+    return constructor
+def _cf_representer(tag: str):
+    """Create a representer that converts a dict back to a CF tag."""
+    fn_name = tag.lstrip("!")
+    def representer(dumper, data):
+        value = data[fn_name]
+        if isinstance(value, str):
+            return dumper.represent_scalar(tag, value)
+        elif isinstance(value, list):
+            return dumper.represent_sequence(tag, value)
+        elif isinstance(value, dict):
+            return dumper.represent_mapping(tag, value)
+        return dumper.represent_scalar(tag, str(value))
+    return representer
+# Register constructors and representers for all CF tags
+for _tag in _CF_TAGS:
+    _fn_name = _tag.lstrip("!")
+    CFLoader.add_constructor(_tag, _cf_constructor(_tag))
+def cf_load(text: str) -> dict:
+    """Load a CloudFormation YAML template, handling intrinsic functions."""
+    return yaml.load(text, Loader=CFLoader) or {}
+def cf_dump(template: dict) -> str:
+    """Dump a CloudFormation template dict back to YAML."""
+    # For simplicity, use regular yaml.dump with default_flow_style=False
+    # This won't preserve the !Tag shorthand, but will produce valid YAML
+    # with Fn:: prefix style that CloudFormation accepts
+    return yaml.dump(template, default_flow_style=False, sort_keys=False)

cloudgym/inverter/agentic.py ADDED Viewed

@@ -0,0 +1,90 @@
+"""LLM-based (agentic) fault injection via Ollama.
+Sends gold config + fault category prompt to local LLM with quality gates:
+- Similarity check (difflib)
+- Diff size check (< 20% lines changed)
+- Validation check (must fail terraform validate / cfn-lint)
+"""
+from __future__ import annotations
+import difflib
+import logging
+from cloudgym.utils.config import InverterConfig
+from cloudgym.utils.ollama import OllamaClient
+logger = logging.getLogger(__name__)
+async def inject_fault_agentic(
+    config_content: str,
+    fault_category: str,
+    iac_format: str,
+    config: InverterConfig | None = None,
+) -> str | None:
+    """Use a local LLM to inject a realistic fault into an IaC config.
+    Quality gates:
+    - Similarity >= 0.7 (reject if LLM rewrote everything)
+    - < 20% of lines changed
+    - Output must be non-empty
+    Returns the broken config content, or None if quality gates fail.
+    """
+    if config is None:
+        config = InverterConfig()
+    client = OllamaClient(config=config)
+    try:
+        broken = await client.inject_fault(config_content, fault_category, iac_format)
+    except Exception:
+        logger.exception("Ollama inject_fault failed")
+        return None
+    if not broken or not broken.strip():
+        logger.debug("LLM returned empty response")
+        return None
+    # Strip markdown fences if present
+    broken = _strip_fences(broken)
+    # Quality gate 1: similarity check
+    similarity = difflib.SequenceMatcher(None, config_content, broken).ratio()
+    if similarity < 0.7:
+        logger.debug("LLM output too dissimilar (%.2f < 0.7)", similarity)
+        return None
+    # Quality gate 2: diff size check (< 20% of lines changed)
+    orig_lines = config_content.splitlines()
+    broken_lines = broken.splitlines()
+    diff = list(difflib.unified_diff(orig_lines, broken_lines, lineterm=""))
+    changed_lines = sum(1 for line in diff if line.startswith('+') or line.startswith('-'))
+    # Subtract header lines (--- and +++)
+    changed_lines = max(0, changed_lines - 2)
+    total_lines = max(len(orig_lines), 1)
+    if changed_lines / total_lines > 0.2:
+        logger.debug(
+            "LLM changed too many lines (%d/%d = %.0f%%)",
+            changed_lines, total_lines, 100 * changed_lines / total_lines,
+        )
+        return None
+    # Quality gate 3: must actually be different
+    if broken.strip() == config_content.strip():
+        logger.debug("LLM output identical to input")
+        return None
+    return broken
+def _strip_fences(text: str) -> str:
+    """Remove markdown code fences from LLM output."""
+    lines = text.strip().splitlines()
+    if lines and lines[0].startswith("```"):
+        lines = lines[1:]
+    if lines and lines[-1].strip() == "```":
+        lines = lines[:-1]
+    return "\n".join(lines)

cloudgym/inverter/engine.py ADDED Viewed

@@ -0,0 +1,258 @@
+"""Inversion engine — orchestrates fault injection and validates breaks.
+Reads gold config, selects fault type(s), injects faults, validates that
+the config is actually broken, and returns structured results.
+"""
+from __future__ import annotations
+import asyncio
+import logging
+import tempfile
+from dataclasses import dataclass, field
+from pathlib import Path
+from cloudgym.inverter.programmatic import inject_fault
+from cloudgym.taxonomy import REGISTRY
+from cloudgym.taxonomy.base import FaultInjection, FaultType
+from cloudgym.validator.terraform import ValidationResult
+logger = logging.getLogger(__name__)
+@dataclass
+class InversionResult:
+    """Result of a single fault injection attempt."""
+    gold_config: str
+    broken_config: str
+    fault_type: FaultType
+    injection: FaultInjection
+    validation_result: ValidationResult
+    attempts: int = 1
+    gold_path: str = ""
+    iac_format: str = ""
+def _detect_format(path: Path) -> str:
+    """Detect IaC format from file extension."""
+    suffix = path.suffix.lower()
+    if suffix == ".tf":
+        return "terraform"
+    if suffix in (".yaml", ".yml", ".json", ".template"):
+        return "cloudformation"
+    return "terraform"  # default
+def _get_applicable_faults(iac_format: str) -> list[FaultType]:
+    """Get fault types applicable to the given format."""
+    from cloudgym.taxonomy.base import IaCFormat
+    fmt_map = {
+        "terraform": IaCFormat.TERRAFORM,
+        "opentofu": IaCFormat.OPENTOFU,
+        "cloudformation": IaCFormat.CLOUDFORMATION,
+    }
+    iac_fmt = fmt_map.get(iac_format)
+    if iac_fmt is None:
+        return []
+    return REGISTRY.list_by_format(iac_fmt)
+async def _validate_broken(
+    broken_content: str, iac_format: str
+) -> ValidationResult:
+    """Write broken config to temp file and validate."""
+    suffix = ".tf" if iac_format in ("terraform", "opentofu") else ".yaml"
+    tmpdir = Path(tempfile.mkdtemp(prefix="cloudgym_inv_"))
+    tmp_file = tmpdir / f"broken{suffix}"
+    tmp_file.write_text(broken_content)
+    try:
+        if iac_format in ("terraform", "opentofu"):
+            from cloudgym.validator.terraform import validate
+            return await validate(tmp_file)
+        else:
+            from cloudgym.validator.cloudformation import validate
+            return await validate(tmp_file)
+    finally:
+        import shutil
+        shutil.rmtree(tmpdir, ignore_errors=True)
+class InversionEngine:
+    """Orchestrates fault injection with validation feedback loop."""
+    def __init__(
+        self,
+        max_retries: int = 3,
+        concurrency: int = 5,
+        skip_validation: bool = False,
+    ):
+        self.max_retries = max_retries
+        self._semaphore = asyncio.Semaphore(concurrency)
+        self.skip_validation = skip_validation
+    async def invert(
+        self,
+        gold_config_path: str | Path,
+        fault_types: list[FaultType] | None = None,
+        mode: str = "programmatic",
+    ) -> InversionResult | None:
+        """Inject a fault into a gold config and validate the break.
+        Args:
+            gold_config_path: Path to the gold (valid) config file.
+            fault_types: Specific fault types to try. If None, auto-selects.
+            mode: "programmatic" or "agentic".
+        Returns:
+            InversionResult if successful, None if all attempts fail.
+        """
+        async with self._semaphore:
+            path = Path(gold_config_path)
+            iac_format = _detect_format(path)
+            config_content = path.read_text()
+            if fault_types is None:
+                fault_types = _get_applicable_faults(iac_format)
+            if not fault_types:
+                logger.warning("No applicable faults for %s", iac_format)
+                return None
+            for attempt in range(self.max_retries):
+                fault_type = fault_types[attempt % len(fault_types)]
+                if mode == "agentic":
+                    result = await self._try_agentic(
+                        config_content, fault_type, iac_format
+                    )
+                else:
+                    result = await self._try_programmatic(
+                        config_content, fault_type, iac_format
+                    )
+                if result is None:
+                    continue
+                broken_content, injection = result
+                # In skip_validation mode, trust the injector
+                if self.skip_validation:
+                    val_result = ValidationResult(
+                        valid=False,
+                        errors=[fault_type.example_error or f"Injected {fault_type.id}"],
+                    )
+                    return InversionResult(
+                        gold_config=config_content,
+                        broken_config=broken_content,
+                        fault_type=fault_type,
+                        injection=injection,
+                        validation_result=val_result,
+                        attempts=attempt + 1,
+                        gold_path=str(path),
+                        iac_format=iac_format,
+                    )
+                # Validate that the broken config actually fails
+                val_result = await _validate_broken(broken_content, iac_format)
+                if not val_result.valid or val_result.errors:
+                    return InversionResult(
+                        gold_config=config_content,
+                        broken_config=broken_content,
+                        fault_type=fault_type,
+                        injection=injection,
+                        validation_result=val_result,
+                        attempts=attempt + 1,
+                        gold_path=str(path),
+                        iac_format=iac_format,
+                    )
+                # For security faults, check if warnings increased
+                if fault_type.category.name == "SECURITY":
+                    gold_val = await _validate_broken(config_content, iac_format)
+                    if len(val_result.warnings) > len(gold_val.warnings):
+                        return InversionResult(
+                            gold_config=config_content,
+                            broken_config=broken_content,
+                            fault_type=fault_type,
+                            injection=injection,
+                            validation_result=val_result,
+                            attempts=attempt + 1,
+                            gold_path=str(path),
+                            iac_format=iac_format,
+                        )
+                logger.debug(
+                    "Attempt %d: fault %s did not break validation for %s",
+                    attempt + 1, fault_type.id, path.name,
+                )
+            return None
+    async def _try_programmatic(
+        self,
+        config_content: str,
+        fault_type: FaultType,
+        iac_format: str,
+    ) -> tuple[str, FaultInjection] | None:
+        """Try programmatic fault injection."""
+        return await inject_fault(config_content, fault_type, iac_format)
+    async def _try_agentic(
+        self,
+        config_content: str,
+        fault_type: FaultType,
+        iac_format: str,
+    ) -> tuple[str, FaultInjection] | None:
+        """Try agentic (LLM) fault injection."""
+        from cloudgym.inverter.agentic import inject_fault_agentic
+        broken = await inject_fault_agentic(
+            config_content, fault_type.category.name, iac_format
+        )
+        if broken is None:
+            return None
+        injection = FaultInjection(
+            fault_type=fault_type,
+            original_snippet=config_content[:80],
+            modified_snippet=broken[:80],
+            location="agentic (full config)",
+            description=f"LLM-injected {fault_type.category.name} fault",
+        )
+        return broken, injection
+# Convenience function matching original stub signature
+async def invert(
+    gold_config_path: str,
+    fault_types: list[str] | None = None,
+    mode: str = "programmatic",
+) -> dict | None:
+    """Orchestrate fault injection on a gold config.
+    Returns a dict with original config, broken config, fault types applied,
+    and validation errors.
+    """
+    engine = InversionEngine()
+    ft_objects = None
+    if fault_types:
+        ft_objects = [REGISTRY.get(fid) for fid in fault_types]
+        ft_objects = [ft for ft in ft_objects if ft is not None]
+    result = await engine.invert(gold_config_path, ft_objects, mode)
+    if result is None:
+        return None
+    return {
+        "gold_config": result.gold_config,
+        "broken_config": result.broken_config,
+        "fault_type": result.fault_type.id,
+        "errors": result.validation_result.errors,
+        "warnings": result.validation_result.warnings,
+        "attempts": result.attempts,
+    }

cloudgym/inverter/programmatic.py ADDED Viewed

@@ -0,0 +1,95 @@
+"""Programmatic (rule-based) fault injection for IaC configs.
+Routes fault injection requests to the appropriate TF or CF injector function
+based on the fault type and IaC format.
+"""
+from __future__ import annotations
+import json
+import logging
+from cloudgym.inverter._cf_injectors import CF_INJECTOR_REGISTRY
+from cloudgym.inverter._tf_injectors import TF_INJECTOR_REGISTRY
+from cloudgym.taxonomy.base import FaultInjection, FaultType
+logger = logging.getLogger(__name__)
+# Combined registry
+INJECTOR_REGISTRY: dict[str, dict] = {
+    "terraform": TF_INJECTOR_REGISTRY,
+    "opentofu": TF_INJECTOR_REGISTRY,  # Same injectors as TF
+    "cloudformation": CF_INJECTOR_REGISTRY,
+}
+def _parse_config(content: str, iac_format: str) -> dict | None:
+    """Parse config content into a dict for structural analysis."""
+    if iac_format in ("terraform", "opentofu"):
+        try:
+            import hcl2
+            import io
+            return hcl2.load(io.StringIO(content))
+        except Exception:
+            return {}
+    else:
+        # CloudFormation — try YAML then JSON
+        from cloudgym.inverter._yaml_cf import cf_load
+        try:
+            return cf_load(content)
+        except Exception:
+            try:
+                return json.loads(content)
+            except json.JSONDecodeError:
+                return {}
+async def inject_fault(
+    config_content: str,
+    fault_type: FaultType,
+    iac_format: str,
+) -> tuple[str, FaultInjection] | None:
+    """Inject a specific fault into an IaC config.
+    Args:
+        config_content: The original (gold) config content.
+        fault_type: The type of fault to inject.
+        iac_format: One of "terraform", "cloudformation", "opentofu".
+    Returns:
+        Tuple of (broken_config_content, injection_record) or None if fault
+        is not applicable to this config.
+    """
+    registry = INJECTOR_REGISTRY.get(iac_format)
+    if registry is None:
+        logger.warning("No injector registry for format: %s", iac_format)
+        return None
+    injector_fn = registry.get(fault_type.id)
+    if injector_fn is None:
+        logger.debug("No injector for fault %s in format %s", fault_type.id, iac_format)
+        return None
+    parsed = _parse_config(config_content, iac_format)
+    if parsed is None:
+        logger.warning("Failed to parse config for format %s", iac_format)
+        return None
+    try:
+        result = injector_fn(config_content, parsed)
+    except Exception:
+        logger.exception("Injector %s raised an exception", fault_type.id)
+        return None
+    if result is None:
+        return None
+    broken_content, injection = result
+    injection.fault_type = fault_type
+    # Verify the injection actually changed something
+    if broken_content == config_content:
+        logger.debug("Injector %s produced no change", fault_type.id)
+        return None
+    return broken_content, injection

cloudgym/scraper/__init__.py ADDED Viewed

File without changes

cloudgym/scraper/aws_samples.py ADDED Viewed

@@ -0,0 +1,159 @@
+"""AWS CloudFormation sample template scraper."""
+from __future__ import annotations
+import asyncio
+import logging
+from dataclasses import dataclass, field
+from pathlib import Path
+import httpx
+from cloudgym.utils.config import GOLD_CF_DIR
+logger = logging.getLogger(__name__)
+# Official AWS CF sample repos on GitHub
+AWS_CF_REPOS = [
+    "aws-cloudformation/aws-cloudformation-templates",
+    "awslabs/aws-cloudformation-templates",
+    "aws-samples/aws-cloudformation-samples",
+]
+@dataclass
+class AWSTemplateFile:
+    """A scraped AWS CloudFormation sample template."""
+    repo: str
+    path: str
+    content: str
+@dataclass
+class AWSSamplesScraper:
+    """Scrapes AWS CloudFormation sample templates from official repos."""
+    max_files: int = 200
+    async def scrape(self) -> list[AWSTemplateFile]:
+        """Fetch CF templates from AWS sample repositories."""
+        results: list[AWSTemplateFile] = []
+        async with httpx.AsyncClient(timeout=30.0) as client:
+            for repo in AWS_CF_REPOS:
+                files = await self._scrape_repo(client, repo)
+                results.extend(files)
+                if len(results) >= self.max_files:
+                    break
+                await asyncio.sleep(1.0)
+        return results[: self.max_files]
+    async def _scrape_repo(
+        self,
+        client: httpx.AsyncClient,
+        repo: str,
+    ) -> list[AWSTemplateFile]:
+        """Recursively scrape CF templates from a GitHub repo."""
+        results: list[AWSTemplateFile] = []
+        await self._walk_contents(client, repo, "", results)
+        return results
+    async def _walk_contents(
+        self,
+        client: httpx.AsyncClient,
+        repo: str,
+        path: str,
+        results: list[AWSTemplateFile],
+        depth: int = 0,
+    ) -> None:
+        """Walk repo contents recursively, collecting CF templates."""
+        if depth > 3 or len(results) >= self.max_files:
+            return
+        api_url = f"https://api.github.com/repos/{repo}/contents/{path}"
+        try:
+            resp = await client.get(api_url)
+            resp.raise_for_status()
+            items = resp.json()
+        except (httpx.HTTPError, ValueError) as exc:
+            logger.debug("Failed to list %s/%s: %s", repo, path, exc)
+            return
+        if not isinstance(items, list):
+            return
+        dirs = []
+        download_tasks = []
+        for item in items:
+            if not isinstance(item, dict):
+                continue
+            name = item.get("name", "")
+            item_type = item.get("type", "")
+            item_path = item.get("path", "")
+            if item_type == "dir":
+                dirs.append(item_path)
+            elif item_type == "file" and self._is_cf_template(name):
+                download_url = item.get("download_url", "")
+                if download_url:
+                    download_tasks.append(
+                        self._download_template(client, repo, item_path, download_url)
+                    )
+        # Download files in parallel
+        templates = await asyncio.gather(*download_tasks)
+        results.extend(t for t in templates if t is not None)
+        # Recurse into directories
+        for d in dirs:
+            await self._walk_contents(client, repo, d, results, depth + 1)
+            await asyncio.sleep(0.5)  # Rate limiting
+    def _is_cf_template(self, filename: str) -> bool:
+        """Check if a filename looks like a CloudFormation template."""
+        lower = filename.lower()
+        cf_hints = ("template", "cfn", "cloudformation", "stack")
+        is_yaml_json = lower.endswith((".yaml", ".yml", ".json"))
+        has_hint = any(h in lower for h in cf_hints)
+        return is_yaml_json and has_hint
+    async def _download_template(
+        self,
+        client: httpx.AsyncClient,
+        repo: str,
+        path: str,
+        url: str,
+    ) -> AWSTemplateFile | None:
+        """Download and verify a single CF template."""
+        try:
+            resp = await client.get(url, follow_redirects=True)
+            resp.raise_for_status()
+            content = resp.text
+        except httpx.HTTPError:
+            return None
+        # Quick check: does it look like a CF template?
+        if "AWSTemplateFormatVersion" not in content and "Resources" not in content:
+            return None
+        return AWSTemplateFile(repo=repo, path=path, content=content)
+async def save_aws_samples(files: list[AWSTemplateFile]) -> int:
+    """Save AWS sample templates to gold directory. Returns file count."""
+    count = 0
+    GOLD_CF_DIR.mkdir(parents=True, exist_ok=True)
+    for f in files:
+        safe_name = f"{f.repo}__{f.path}".replace("/", "_").replace("\\", "_")
+        ext = ".yaml" if not f.path.endswith(".json") else ".json"
+        if not safe_name.endswith(ext):
+            safe_name += ext
+        out_path = GOLD_CF_DIR / safe_name
+        out_path.write_text(f.content, encoding="utf-8")
+        count += 1
+    return count