stackfix 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cloudgym/__init__.py +3 -0
- cloudgym/benchmark/__init__.py +0 -0
- cloudgym/benchmark/dataset.py +188 -0
- cloudgym/benchmark/evaluator.py +275 -0
- cloudgym/cli.py +61 -0
- cloudgym/fixer/__init__.py +1 -0
- cloudgym/fixer/cli.py +521 -0
- cloudgym/fixer/detector.py +81 -0
- cloudgym/fixer/formatter.py +55 -0
- cloudgym/fixer/lambda_handler.py +126 -0
- cloudgym/fixer/repairer.py +237 -0
- cloudgym/generator/__init__.py +0 -0
- cloudgym/generator/formatter.py +142 -0
- cloudgym/generator/pipeline.py +271 -0
- cloudgym/inverter/__init__.py +0 -0
- cloudgym/inverter/_cf_injectors.py +705 -0
- cloudgym/inverter/_cf_utils.py +202 -0
- cloudgym/inverter/_hcl_utils.py +182 -0
- cloudgym/inverter/_tf_injectors.py +641 -0
- cloudgym/inverter/_yaml_cf.py +84 -0
- cloudgym/inverter/agentic.py +90 -0
- cloudgym/inverter/engine.py +258 -0
- cloudgym/inverter/programmatic.py +95 -0
- cloudgym/scraper/__init__.py +0 -0
- cloudgym/scraper/aws_samples.py +159 -0
- cloudgym/scraper/github.py +238 -0
- cloudgym/scraper/registry.py +165 -0
- cloudgym/scraper/validator.py +116 -0
- cloudgym/taxonomy/__init__.py +10 -0
- cloudgym/taxonomy/base.py +102 -0
- cloudgym/taxonomy/cloudformation.py +258 -0
- cloudgym/taxonomy/terraform.py +274 -0
- cloudgym/utils/__init__.py +0 -0
- cloudgym/utils/config.py +57 -0
- cloudgym/utils/ollama.py +66 -0
- cloudgym/validator/__init__.py +0 -0
- cloudgym/validator/cloudformation.py +55 -0
- cloudgym/validator/opentofu.py +103 -0
- cloudgym/validator/terraform.py +115 -0
- stackfix-0.1.0.dist-info/METADATA +182 -0
- stackfix-0.1.0.dist-info/RECORD +44 -0
- stackfix-0.1.0.dist-info/WHEEL +4 -0
- stackfix-0.1.0.dist-info/entry_points.txt +3 -0
- stackfix-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
"""CloudFormation validation via cfn-lint."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from .terraform import ValidationResult
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
async def validate(config_path: Path) -> ValidationResult:
|
|
11
|
+
"""Validate a CloudFormation template using cfn-lint.
|
|
12
|
+
|
|
13
|
+
Uses the cfn-lint Python API directly instead of shelling out.
|
|
14
|
+
"""
|
|
15
|
+
try:
|
|
16
|
+
from cfnlint import decode, runner
|
|
17
|
+
from cfnlint.config import ConfigMixIn
|
|
18
|
+
except ImportError:
|
|
19
|
+
return ValidationResult(
|
|
20
|
+
valid=False,
|
|
21
|
+
errors=["cfn-lint is not installed — run `pip install cfn-lint`"],
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
config_str = str(config_path)
|
|
25
|
+
|
|
26
|
+
try:
|
|
27
|
+
args = ConfigMixIn(["--template", config_str, "--format", "json"])
|
|
28
|
+
lint_runner = runner.Runner(args)
|
|
29
|
+
matches = list(lint_runner.run())
|
|
30
|
+
except Exception as exc:
|
|
31
|
+
return ValidationResult(
|
|
32
|
+
valid=False,
|
|
33
|
+
errors=[f"cfn-lint failed: {exc}"],
|
|
34
|
+
raw_output=str(exc),
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
errors = []
|
|
38
|
+
warnings = []
|
|
39
|
+
|
|
40
|
+
for match in matches:
|
|
41
|
+
rule_id = match.rule.id if hasattr(match, "rule") else ""
|
|
42
|
+
message = str(match)
|
|
43
|
+
|
|
44
|
+
# E = error, W = warning, I = info
|
|
45
|
+
if rule_id.startswith("E"):
|
|
46
|
+
errors.append(message)
|
|
47
|
+
else:
|
|
48
|
+
warnings.append(message)
|
|
49
|
+
|
|
50
|
+
return ValidationResult(
|
|
51
|
+
valid=len(errors) == 0,
|
|
52
|
+
errors=errors,
|
|
53
|
+
warnings=warnings,
|
|
54
|
+
raw_output="\n".join(str(m) for m in matches),
|
|
55
|
+
)
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
"""OpenTofu validation via tofu CLI."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import json
|
|
7
|
+
import shutil
|
|
8
|
+
import tempfile
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
from .terraform import ValidationResult
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
async def validate(config_path: Path) -> ValidationResult:
|
|
15
|
+
"""Validate an OpenTofu configuration directory or file.
|
|
16
|
+
|
|
17
|
+
Runs `tofu init -backend=false` followed by `tofu validate -json`.
|
|
18
|
+
Mirrors the Terraform validator but uses the `tofu` binary.
|
|
19
|
+
"""
|
|
20
|
+
tofu_bin = shutil.which("tofu")
|
|
21
|
+
if tofu_bin is None:
|
|
22
|
+
return ValidationResult(
|
|
23
|
+
valid=False,
|
|
24
|
+
errors=["tofu CLI not found in PATH"],
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
if config_path.is_file():
|
|
28
|
+
tmpdir = Path(tempfile.mkdtemp(prefix="cloudgym_tofu_"))
|
|
29
|
+
shutil.copy2(config_path, tmpdir / config_path.name)
|
|
30
|
+
work_dir = tmpdir
|
|
31
|
+
cleanup = True
|
|
32
|
+
else:
|
|
33
|
+
work_dir = config_path
|
|
34
|
+
cleanup = False
|
|
35
|
+
|
|
36
|
+
try:
|
|
37
|
+
return await _run_tofu(work_dir)
|
|
38
|
+
finally:
|
|
39
|
+
if cleanup:
|
|
40
|
+
shutil.rmtree(work_dir, ignore_errors=True)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
async def _run_tofu(work_dir: Path) -> ValidationResult:
|
|
44
|
+
"""Run tofu init + validate in the given directory."""
|
|
45
|
+
init_proc = await asyncio.create_subprocess_exec(
|
|
46
|
+
"tofu",
|
|
47
|
+
"init",
|
|
48
|
+
"-backend=false",
|
|
49
|
+
"-no-color",
|
|
50
|
+
cwd=work_dir,
|
|
51
|
+
stdout=asyncio.subprocess.PIPE,
|
|
52
|
+
stderr=asyncio.subprocess.PIPE,
|
|
53
|
+
)
|
|
54
|
+
init_stdout, init_stderr = await init_proc.communicate()
|
|
55
|
+
|
|
56
|
+
if init_proc.returncode != 0:
|
|
57
|
+
stderr_text = init_stderr.decode(errors="replace")
|
|
58
|
+
return ValidationResult(
|
|
59
|
+
valid=False,
|
|
60
|
+
errors=[f"tofu init failed: {stderr_text}"],
|
|
61
|
+
raw_output=stderr_text,
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
val_proc = await asyncio.create_subprocess_exec(
|
|
65
|
+
"tofu",
|
|
66
|
+
"validate",
|
|
67
|
+
"-json",
|
|
68
|
+
"-no-color",
|
|
69
|
+
cwd=work_dir,
|
|
70
|
+
stdout=asyncio.subprocess.PIPE,
|
|
71
|
+
stderr=asyncio.subprocess.PIPE,
|
|
72
|
+
)
|
|
73
|
+
val_stdout, val_stderr = await val_proc.communicate()
|
|
74
|
+
|
|
75
|
+
raw = val_stdout.decode(errors="replace")
|
|
76
|
+
|
|
77
|
+
try:
|
|
78
|
+
result = json.loads(raw)
|
|
79
|
+
except json.JSONDecodeError:
|
|
80
|
+
return ValidationResult(
|
|
81
|
+
valid=False,
|
|
82
|
+
errors=[f"Failed to parse tofu validate output: {raw}"],
|
|
83
|
+
raw_output=raw,
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
errors = []
|
|
87
|
+
warnings = []
|
|
88
|
+
for diag in result.get("diagnostics", []):
|
|
89
|
+
msg = diag.get("summary", "")
|
|
90
|
+
detail = diag.get("detail", "")
|
|
91
|
+
full_msg = f"{msg}: {detail}" if detail else msg
|
|
92
|
+
|
|
93
|
+
if diag.get("severity") == "error":
|
|
94
|
+
errors.append(full_msg)
|
|
95
|
+
else:
|
|
96
|
+
warnings.append(full_msg)
|
|
97
|
+
|
|
98
|
+
return ValidationResult(
|
|
99
|
+
valid=result.get("valid", False),
|
|
100
|
+
errors=errors,
|
|
101
|
+
warnings=warnings,
|
|
102
|
+
raw_output=raw,
|
|
103
|
+
)
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
"""Terraform validation via terraform CLI."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import json
|
|
7
|
+
import tempfile
|
|
8
|
+
import shutil
|
|
9
|
+
from dataclasses import dataclass, field
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass
|
|
14
|
+
class ValidationResult:
|
|
15
|
+
"""Result of validating an IaC configuration."""
|
|
16
|
+
|
|
17
|
+
valid: bool
|
|
18
|
+
errors: list[str] = field(default_factory=list)
|
|
19
|
+
warnings: list[str] = field(default_factory=list)
|
|
20
|
+
raw_output: str = ""
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
async def validate(config_path: Path) -> ValidationResult:
|
|
24
|
+
"""Validate a Terraform configuration directory or file.
|
|
25
|
+
|
|
26
|
+
Runs `terraform init -backend=false` followed by `terraform validate -json`.
|
|
27
|
+
If config_path is a single .tf file, copies it to a temp directory first.
|
|
28
|
+
"""
|
|
29
|
+
terraform_bin = shutil.which("terraform")
|
|
30
|
+
if terraform_bin is None:
|
|
31
|
+
return ValidationResult(
|
|
32
|
+
valid=False,
|
|
33
|
+
errors=["terraform CLI not found in PATH"],
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
# If single file, create a temp dir with just that file
|
|
37
|
+
if config_path.is_file():
|
|
38
|
+
tmpdir = Path(tempfile.mkdtemp(prefix="cloudgym_tf_"))
|
|
39
|
+
shutil.copy2(config_path, tmpdir / config_path.name)
|
|
40
|
+
work_dir = tmpdir
|
|
41
|
+
cleanup = True
|
|
42
|
+
else:
|
|
43
|
+
work_dir = config_path
|
|
44
|
+
cleanup = False
|
|
45
|
+
|
|
46
|
+
try:
|
|
47
|
+
return await _run_terraform(work_dir)
|
|
48
|
+
finally:
|
|
49
|
+
if cleanup:
|
|
50
|
+
shutil.rmtree(work_dir, ignore_errors=True)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
async def _run_terraform(work_dir: Path) -> ValidationResult:
|
|
54
|
+
"""Run terraform init + validate in the given directory."""
|
|
55
|
+
# terraform init -backend=false
|
|
56
|
+
init_proc = await asyncio.create_subprocess_exec(
|
|
57
|
+
"terraform",
|
|
58
|
+
"init",
|
|
59
|
+
"-backend=false",
|
|
60
|
+
"-no-color",
|
|
61
|
+
cwd=work_dir,
|
|
62
|
+
stdout=asyncio.subprocess.PIPE,
|
|
63
|
+
stderr=asyncio.subprocess.PIPE,
|
|
64
|
+
)
|
|
65
|
+
init_stdout, init_stderr = await init_proc.communicate()
|
|
66
|
+
|
|
67
|
+
if init_proc.returncode != 0:
|
|
68
|
+
stderr_text = init_stderr.decode(errors="replace")
|
|
69
|
+
return ValidationResult(
|
|
70
|
+
valid=False,
|
|
71
|
+
errors=[f"terraform init failed: {stderr_text}"],
|
|
72
|
+
raw_output=stderr_text,
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
# terraform validate -json
|
|
76
|
+
val_proc = await asyncio.create_subprocess_exec(
|
|
77
|
+
"terraform",
|
|
78
|
+
"validate",
|
|
79
|
+
"-json",
|
|
80
|
+
"-no-color",
|
|
81
|
+
cwd=work_dir,
|
|
82
|
+
stdout=asyncio.subprocess.PIPE,
|
|
83
|
+
stderr=asyncio.subprocess.PIPE,
|
|
84
|
+
)
|
|
85
|
+
val_stdout, val_stderr = await val_proc.communicate()
|
|
86
|
+
|
|
87
|
+
raw = val_stdout.decode(errors="replace")
|
|
88
|
+
|
|
89
|
+
try:
|
|
90
|
+
result = json.loads(raw)
|
|
91
|
+
except json.JSONDecodeError:
|
|
92
|
+
return ValidationResult(
|
|
93
|
+
valid=False,
|
|
94
|
+
errors=[f"Failed to parse terraform validate output: {raw}"],
|
|
95
|
+
raw_output=raw,
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
errors = []
|
|
99
|
+
warnings = []
|
|
100
|
+
for diag in result.get("diagnostics", []):
|
|
101
|
+
msg = diag.get("summary", "")
|
|
102
|
+
detail = diag.get("detail", "")
|
|
103
|
+
full_msg = f"{msg}: {detail}" if detail else msg
|
|
104
|
+
|
|
105
|
+
if diag.get("severity") == "error":
|
|
106
|
+
errors.append(full_msg)
|
|
107
|
+
else:
|
|
108
|
+
warnings.append(full_msg)
|
|
109
|
+
|
|
110
|
+
return ValidationResult(
|
|
111
|
+
valid=result.get("valid", False),
|
|
112
|
+
errors=errors,
|
|
113
|
+
warnings=warnings,
|
|
114
|
+
raw_output=raw,
|
|
115
|
+
)
|
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: stackfix
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: AI-powered Infrastructure-as-Code repair — fix broken Terraform and CloudFormation on CPU
|
|
5
|
+
Author: Jon
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
License-File: LICENSE
|
|
8
|
+
Requires-Python: >=3.11
|
|
9
|
+
Requires-Dist: cfn-lint>=1.0
|
|
10
|
+
Requires-Dist: click>=8.0
|
|
11
|
+
Requires-Dist: datasets>=3.0
|
|
12
|
+
Requires-Dist: httpx>=0.27
|
|
13
|
+
Requires-Dist: ollama>=0.4
|
|
14
|
+
Requires-Dist: python-hcl2>=5.0
|
|
15
|
+
Requires-Dist: pyyaml>=6.0
|
|
16
|
+
Requires-Dist: rich>=13.0
|
|
17
|
+
Provides-Extra: dev
|
|
18
|
+
Requires-Dist: pytest-asyncio>=0.24; extra == 'dev'
|
|
19
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
20
|
+
Requires-Dist: ruff>=0.8; extra == 'dev'
|
|
21
|
+
Provides-Extra: gguf
|
|
22
|
+
Requires-Dist: llama-cpp-python>=0.3; extra == 'gguf'
|
|
23
|
+
Provides-Extra: mlx
|
|
24
|
+
Requires-Dist: mlx-lm>=0.21; extra == 'mlx'
|
|
25
|
+
Description-Content-Type: text/markdown
|
|
26
|
+
|
|
27
|
+
# Cloud-Gym
|
|
28
|
+
|
|
29
|
+
Scalable Training Data Generation for Infrastructure-as-Code Repair via Environment Inversion.
|
|
30
|
+
|
|
31
|
+
Cloud-Gym generates (broken_config, error_message, fix) training pairs for IaC repair by applying **environment inversion** — taking working Terraform, CloudFormation, and OpenTofu configs and systematically breaking them using a defined fault taxonomy. It includes a benchmark (188 entries across 8 error categories) and fine-tuned models that run entirely on CPU.
|
|
32
|
+
|
|
33
|
+
## stackfix: AI-Powered IaC Repair
|
|
34
|
+
|
|
35
|
+
The `stackfix` CLI tool validates and repairs broken IaC files using fine-tuned local models. No API keys, no cloud costs, no data leaves your machine.
|
|
36
|
+
|
|
37
|
+
### Install
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
pip install stackfix[gguf]
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
### Download a Model
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
# Recommended: 3B Q4 (1.8 GB, 87% pass@1)
|
|
47
|
+
python -c "
|
|
48
|
+
from huggingface_hub import hf_hub_download
|
|
49
|
+
hf_hub_download('Tetsuto/iac-repair-3b-gguf', 'iac-repair-3b-q4.gguf', local_dir='.')
|
|
50
|
+
"
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
### Usage
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
# Check files for errors
|
|
57
|
+
stackfix check main.tf template.yaml
|
|
58
|
+
|
|
59
|
+
# Repair a broken file (show diff)
|
|
60
|
+
stackfix repair main.tf --backend gguf --model iac-repair-3b-q4.gguf
|
|
61
|
+
|
|
62
|
+
# Repair and apply fix in place
|
|
63
|
+
stackfix repair main.tf --apply --backend gguf --model iac-repair-3b-q4.gguf
|
|
64
|
+
|
|
65
|
+
# Explain errors in plain language
|
|
66
|
+
stackfix discuss main.tf --backend gguf --model iac-repair-3b-q4.gguf
|
|
67
|
+
|
|
68
|
+
# Pipe mode (stdin/stdout)
|
|
69
|
+
cat broken.tf | stackfix repair - --backend gguf --model iac-repair-3b-q4.gguf > fixed.tf
|
|
70
|
+
|
|
71
|
+
# Check all changed IaC files in git
|
|
72
|
+
stackfix git-diff --backend gguf --model iac-repair-3b-q4.gguf
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
### Models
|
|
76
|
+
|
|
77
|
+
| Model | Size | RAM | Speed (CPU) | pass@1 | HuggingFace |
|
|
78
|
+
|---|---|---|---|---|---|
|
|
79
|
+
| **7B Q4** | 4.5 GB | ~8 GB | ~20 tok/s | **0.926** | [Tetsuto/iac-repair-7b-gguf](https://huggingface.co/Tetsuto/iac-repair-7b-gguf) |
|
|
80
|
+
| **3B Q4** | 1.8 GB | ~4 GB | 49 tok/s | 0.867 | [Tetsuto/iac-repair-3b-gguf](https://huggingface.co/Tetsuto/iac-repair-3b-gguf) |
|
|
81
|
+
| **0.5B Q4** | 379 MB | ~800 MB | 127 tok/s | 0.723 | [Tetsuto/iac-repair-0.5b-gguf](https://huggingface.co/Tetsuto/iac-repair-0.5b-gguf) |
|
|
82
|
+
|
|
83
|
+
All models are fine-tuned Qwen2.5-Coder with LoRA, exported to GGUF. They run on any CPU (Linux, macOS, Windows).
|
|
84
|
+
|
|
85
|
+
### Backends
|
|
86
|
+
|
|
87
|
+
| Backend | Install | Platform | Use Case |
|
|
88
|
+
|---|---|---|---|
|
|
89
|
+
| `gguf` | `pip install stackfix[gguf]` | Any (CPU) | CI/CD, Lambda, servers |
|
|
90
|
+
| `mlx` | `pip install stackfix[mlx]` | Apple Silicon | Local dev on Mac |
|
|
91
|
+
| `ollama` | `pip install stackfix` + Ollama | Any | When Ollama is already running |
|
|
92
|
+
|
|
93
|
+
### CI/CD Integration
|
|
94
|
+
|
|
95
|
+
Add to your GitHub Actions workflow to catch IaC errors on every PR:
|
|
96
|
+
|
|
97
|
+
```yaml
|
|
98
|
+
- name: Check IaC
|
|
99
|
+
run: |
|
|
100
|
+
pip install stackfix[gguf]
|
|
101
|
+
python -c "
|
|
102
|
+
from huggingface_hub import hf_hub_download
|
|
103
|
+
hf_hub_download('Tetsuto/iac-repair-3b-gguf', 'iac-repair-3b-q4.gguf', local_dir='.')
|
|
104
|
+
"
|
|
105
|
+
stackfix check **/*.tf **/*.yaml
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
See [examples/USE_CASES.md](examples/USE_CASES.md) for more deployment scenarios (pre-commit hooks, Lambda, pipeline integration).
|
|
109
|
+
|
|
110
|
+
### Pre-Commit Hook
|
|
111
|
+
|
|
112
|
+
```yaml
|
|
113
|
+
# .pre-commit-config.yaml
|
|
114
|
+
repos:
|
|
115
|
+
- repo: local
|
|
116
|
+
hooks:
|
|
117
|
+
- id: stackfix
|
|
118
|
+
name: stackfix
|
|
119
|
+
entry: stackfix pre-commit --backend gguf --model iac-repair-3b-q4.gguf
|
|
120
|
+
language: python
|
|
121
|
+
types_or: [terraform, yaml]
|
|
122
|
+
additional_dependencies: ['stackfix[gguf]']
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
## Benchmark
|
|
126
|
+
|
|
127
|
+
188 entries across 8 error categories, 3 difficulty levels, and 2 formats (Terraform + CloudFormation).
|
|
128
|
+
|
|
129
|
+
### Results Summary
|
|
130
|
+
|
|
131
|
+
| Model | pass@1 | Terraform | CloudFormation | High | Medium | Low |
|
|
132
|
+
|---|---|---|---|---|---|---|
|
|
133
|
+
| **7B v2 fine-tuned** | **0.926** | 0.993 | 0.750 | 0.960 | 0.897 | 0.923 |
|
|
134
|
+
| 3B rank4 fine-tuned | 0.867 | 0.912 | 0.750 | 0.964 | 0.797 | 0.821 |
|
|
135
|
+
| qwen2.5-coder:7b (base) | 0.856 | 0.905 | 0.707 | 0.840 | 0.859 | 0.893 |
|
|
136
|
+
| 0.5B distilled | 0.723 | 0.775 | 0.590 | 0.809 | 0.648 | 0.731 |
|
|
137
|
+
| llama3.2:3b (base) | 0.641 | 0.734 | 0.361 | 0.684 | 0.636 | 0.533 |
|
|
138
|
+
| gemma-4-26b (base) | 0.009 | 0.000 | 0.032 | 0.000 | 0.004 | 0.051 |
|
|
139
|
+
|
|
140
|
+
Fine-tuning a 0.5B model outperforms a 26B base model by 80x.
|
|
141
|
+
|
|
142
|
+
## Training Data Generation
|
|
143
|
+
|
|
144
|
+
Cloud-Gym generates training data via environment inversion:
|
|
145
|
+
|
|
146
|
+
1. **Collect** working IaC configs from GitHub, Terraform Registry, AWS samples
|
|
147
|
+
2. **Break** them systematically using a fault taxonomy (28+ fault types across 8 categories)
|
|
148
|
+
3. **Validate** broken configs to capture real error messages
|
|
149
|
+
4. **Pair** (broken + errors) with the original working config as the gold fix
|
|
150
|
+
|
|
151
|
+
```bash
|
|
152
|
+
# Generate training data
|
|
153
|
+
cloud-gym taxonomy # View fault types
|
|
154
|
+
python scripts/scrape.py # Collect gold configs
|
|
155
|
+
cloud-gym invert # Generate broken variants
|
|
156
|
+
cloud-gym export # Export training pairs
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
## Project Structure
|
|
160
|
+
|
|
161
|
+
```text
|
|
162
|
+
cloudgym/
|
|
163
|
+
taxonomy/ Fault type definitions (28+ types, 8 categories)
|
|
164
|
+
scraper/ Gold config collection
|
|
165
|
+
validator/ IaC validation wrappers (terraform, cfn-lint)
|
|
166
|
+
inverter/ Fault injection engines
|
|
167
|
+
generator/ Training data pipeline
|
|
168
|
+
benchmark/ Evaluation harness
|
|
169
|
+
fixer/ stackfix CLI tool + model backends
|
|
170
|
+
scripts/ Training, evaluation, and export scripts
|
|
171
|
+
examples/ Broken IaC examples + use case docs
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
## Supported Formats
|
|
175
|
+
|
|
176
|
+
- **Terraform** (`.tf`) — validated with `terraform validate`
|
|
177
|
+
- **CloudFormation** (`.yaml`, `.yml`, `.json`) — validated with `cfn-lint`
|
|
178
|
+
- **OpenTofu** (`.tf`) — same as Terraform
|
|
179
|
+
|
|
180
|
+
## License
|
|
181
|
+
|
|
182
|
+
MIT
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
cloudgym/__init__.py,sha256=nolAPNtBAMikNChS5WneVqEhcB2ovIhJpuKxoofJnh8,88
|
|
2
|
+
cloudgym/cli.py,sha256=vrdVezoxt44rTB335dE5koUWgTovIfQpXm40XSiBMcU,2052
|
|
3
|
+
cloudgym/benchmark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
+
cloudgym/benchmark/dataset.py,sha256=2f_Oi8x_ivCic62vqdIfBdsif_nJJxhA-YOyhtLn-mM,6113
|
|
5
|
+
cloudgym/benchmark/evaluator.py,sha256=4hU8BAKu7_Kc-evOhj2H3Hp9WedrskOqnr7hdNYd348,9795
|
|
6
|
+
cloudgym/fixer/__init__.py,sha256=LwrTdAF6LCXZsLs3iM_Lauo0j7x6ykwINym-jL68aNM,63
|
|
7
|
+
cloudgym/fixer/cli.py,sha256=xsPKU91_9VE_ADGW0d2K9aMTNvcCxsicJURXoZpUa6w,18152
|
|
8
|
+
cloudgym/fixer/detector.py,sha256=EJ8YKtCNshX2DVVo7S4oyUD3SWNDGVts0lCkO31Vdl8,2720
|
|
9
|
+
cloudgym/fixer/formatter.py,sha256=-df8-LL9JRMXYRB_Ks0PSie70F5lRXI9WdK9ah6IgGU,1692
|
|
10
|
+
cloudgym/fixer/lambda_handler.py,sha256=rEtxWd4d-GchxrCQXKA3HWl4KD8c66zvglCsYdY7W2k,3513
|
|
11
|
+
cloudgym/fixer/repairer.py,sha256=3vGX7iPYRCJ-bQmxOHlZFPOo_8hmKvkbIc9IWIWsTEo,8072
|
|
12
|
+
cloudgym/generator/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
13
|
+
cloudgym/generator/formatter.py,sha256=WnBgGGXfQBEXaQ_D0ozzAfLrNEWxq7RQ-q4wwWatiYA,4523
|
|
14
|
+
cloudgym/generator/pipeline.py,sha256=E5hIivKX-uXI167u346N0puiCUzKaSQDx6QWS67Kh5A,9831
|
|
15
|
+
cloudgym/inverter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
16
|
+
cloudgym/inverter/_cf_injectors.py,sha256=xV40Xr3XYNOSRJ7CS0Ke21OVi-dcRf2U41oRZa9-OJ0,25986
|
|
17
|
+
cloudgym/inverter/_cf_utils.py,sha256=dI_l3Jfn9P2GdmcjTwLzWOPqjO-sWFva2Oy3YV63ofg,6745
|
|
18
|
+
cloudgym/inverter/_hcl_utils.py,sha256=3rsCKmZ93gLclUxER6g_y5YqUvvxWEs8vyyH_e7qKKs,5771
|
|
19
|
+
cloudgym/inverter/_tf_injectors.py,sha256=7oRkzfbRmNBhS_JYUrQgV7MCSL3QpvKvYL5UV8p7u-0,23676
|
|
20
|
+
cloudgym/inverter/_yaml_cf.py,sha256=XmCWSICQRLxkQrUALEL0ycJ3aVsbPLJbTDk2j0tCr2k,2688
|
|
21
|
+
cloudgym/inverter/agentic.py,sha256=UJMKeAx7MY1KRqJJMy2yrLQ4bNeyrqWsG1SSH0xB99I,2808
|
|
22
|
+
cloudgym/inverter/engine.py,sha256=IbtTgpbgQkx6-yYxrfVSl1yS9izJxFngGPlzFk9Ra0M,8830
|
|
23
|
+
cloudgym/inverter/programmatic.py,sha256=tMpnEvnFIZbCV9es2SZLecvB7JPjlIzcx_yhl32IOew,2914
|
|
24
|
+
cloudgym/scraper/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
25
|
+
cloudgym/scraper/aws_samples.py,sha256=R_d6DuJpCdXA3_j05vLBOmWhUaFC-d0MQxzRXn3p8ds,4996
|
|
26
|
+
cloudgym/scraper/github.py,sha256=JalRPr3eeARfIaZ7afHqnNob5UCITjQ_xhiDU3fMIBQ,8024
|
|
27
|
+
cloudgym/scraper/registry.py,sha256=uLz6vY-vVVEHoTYGpKLaONamG1xlcS9XUUpxEoBGXWY,5584
|
|
28
|
+
cloudgym/scraper/validator.py,sha256=wHEzkulBmz4YLNc28OcwXa3AKPhTxzM3YQaOCKNFS74,3691
|
|
29
|
+
cloudgym/taxonomy/__init__.py,sha256=TUDMRHoSi49EfWhBBt9AG8Ue9mK7imzsvCqFME6YKT0,345
|
|
30
|
+
cloudgym/taxonomy/base.py,sha256=lDb-gszabWOCk3zE1_sceQkkSpySh5hPUO6WIItvtt4,2865
|
|
31
|
+
cloudgym/taxonomy/cloudformation.py,sha256=psCPloS7Vrrq4yqiJAzRN59pTnPt45aUv5g73ZluWTk,8849
|
|
32
|
+
cloudgym/taxonomy/terraform.py,sha256=mZxidh5gNkGDXnPoWRHqBMHr05yoJVEr8kkhrdmBUaY,9496
|
|
33
|
+
cloudgym/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
34
|
+
cloudgym/utils/config.py,sha256=nTC9RteEf4_g8msRMJ8Ytg0OI2t1BHmuKqgXTW2tS50,1490
|
|
35
|
+
cloudgym/utils/ollama.py,sha256=EnMY-9BZ09fCmqavMYE1SLB0uFo-Ogx144FornghSj0,2275
|
|
36
|
+
cloudgym/validator/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
37
|
+
cloudgym/validator/cloudformation.py,sha256=wBHgs3MLR2Z1_pYe9zWjHzcqyKXwbawwKbC1kHPWzNs,1494
|
|
38
|
+
cloudgym/validator/opentofu.py,sha256=yJILyZsZ4F2JkzVSiZYFEnFHdlOtidtmNYQjMBQE_vI,2816
|
|
39
|
+
cloudgym/validator/terraform.py,sha256=OOeSysosKFnw8Rxzn37jhI5LZUh8yjNtDClbPqyEwR0,3264
|
|
40
|
+
stackfix-0.1.0.dist-info/METADATA,sha256=WBBvdVwS2ikfEU0i75FSPE9sKnnypMJ8Ab_Ug7nUpvU,6225
|
|
41
|
+
stackfix-0.1.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
42
|
+
stackfix-0.1.0.dist-info/entry_points.txt,sha256=lrBTXrD-vxU26gDfg3kSUJkbCN_IPADtkckj0rFDK4Q,83
|
|
43
|
+
stackfix-0.1.0.dist-info/licenses/LICENSE,sha256=HXSLpM_F4i_eePLsGEnb4DP0qJK23NDq04QbhH9jius,1068
|
|
44
|
+
stackfix-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Jon Hammant
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|