stackfix 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. stackfix-0.1.0/.github/workflows/stackfix.yml +66 -0
  2. stackfix-0.1.0/.gitignore +52 -0
  3. stackfix-0.1.0/LICENSE +21 -0
  4. stackfix-0.1.0/PKG-INFO +182 -0
  5. stackfix-0.1.0/README.md +156 -0
  6. stackfix-0.1.0/cloudgym/__init__.py +3 -0
  7. stackfix-0.1.0/cloudgym/benchmark/__init__.py +0 -0
  8. stackfix-0.1.0/cloudgym/benchmark/dataset.py +188 -0
  9. stackfix-0.1.0/cloudgym/benchmark/evaluator.py +275 -0
  10. stackfix-0.1.0/cloudgym/cli.py +61 -0
  11. stackfix-0.1.0/cloudgym/fixer/__init__.py +1 -0
  12. stackfix-0.1.0/cloudgym/fixer/cli.py +521 -0
  13. stackfix-0.1.0/cloudgym/fixer/detector.py +81 -0
  14. stackfix-0.1.0/cloudgym/fixer/formatter.py +55 -0
  15. stackfix-0.1.0/cloudgym/fixer/lambda_handler.py +126 -0
  16. stackfix-0.1.0/cloudgym/fixer/repairer.py +237 -0
  17. stackfix-0.1.0/cloudgym/generator/__init__.py +0 -0
  18. stackfix-0.1.0/cloudgym/generator/formatter.py +142 -0
  19. stackfix-0.1.0/cloudgym/generator/pipeline.py +271 -0
  20. stackfix-0.1.0/cloudgym/inverter/__init__.py +0 -0
  21. stackfix-0.1.0/cloudgym/inverter/_cf_injectors.py +705 -0
  22. stackfix-0.1.0/cloudgym/inverter/_cf_utils.py +202 -0
  23. stackfix-0.1.0/cloudgym/inverter/_hcl_utils.py +182 -0
  24. stackfix-0.1.0/cloudgym/inverter/_tf_injectors.py +641 -0
  25. stackfix-0.1.0/cloudgym/inverter/_yaml_cf.py +84 -0
  26. stackfix-0.1.0/cloudgym/inverter/agentic.py +90 -0
  27. stackfix-0.1.0/cloudgym/inverter/engine.py +258 -0
  28. stackfix-0.1.0/cloudgym/inverter/programmatic.py +95 -0
  29. stackfix-0.1.0/cloudgym/scraper/__init__.py +0 -0
  30. stackfix-0.1.0/cloudgym/scraper/aws_samples.py +159 -0
  31. stackfix-0.1.0/cloudgym/scraper/github.py +238 -0
  32. stackfix-0.1.0/cloudgym/scraper/registry.py +165 -0
  33. stackfix-0.1.0/cloudgym/scraper/validator.py +116 -0
  34. stackfix-0.1.0/cloudgym/taxonomy/__init__.py +10 -0
  35. stackfix-0.1.0/cloudgym/taxonomy/base.py +102 -0
  36. stackfix-0.1.0/cloudgym/taxonomy/cloudformation.py +258 -0
  37. stackfix-0.1.0/cloudgym/taxonomy/terraform.py +274 -0
  38. stackfix-0.1.0/cloudgym/utils/__init__.py +0 -0
  39. stackfix-0.1.0/cloudgym/utils/config.py +57 -0
  40. stackfix-0.1.0/cloudgym/utils/ollama.py +66 -0
  41. stackfix-0.1.0/cloudgym/validator/__init__.py +0 -0
  42. stackfix-0.1.0/cloudgym/validator/cloudformation.py +55 -0
  43. stackfix-0.1.0/cloudgym/validator/opentofu.py +103 -0
  44. stackfix-0.1.0/cloudgym/validator/terraform.py +115 -0
  45. stackfix-0.1.0/examples/.pre-commit-config.yaml +13 -0
  46. stackfix-0.1.0/examples/LINKEDIN_POST.md +76 -0
  47. stackfix-0.1.0/examples/USE_CASES.md +205 -0
  48. stackfix-0.1.0/examples/broken_s3.tf +45 -0
  49. stackfix-0.1.0/examples/broken_security_group.tf +44 -0
  50. stackfix-0.1.0/examples/broken_vpc.yaml +57 -0
  51. stackfix-0.1.0/examples/chart_category_breakdown.png +0 -0
  52. stackfix-0.1.0/examples/chart_deployment_tradeoffs.png +0 -0
  53. stackfix-0.1.0/examples/chart_hyperparameter_fix.png +0 -0
  54. stackfix-0.1.0/examples/chart_overall_comparison.png +0 -0
  55. stackfix-0.1.0/examples/chart_size_vs_accuracy.png +0 -0
  56. stackfix-0.1.0/paper/main.pdf +0 -0
  57. stackfix-0.1.0/paper/main.tex +63 -0
  58. stackfix-0.1.0/paper/references.bib +65 -0
  59. stackfix-0.1.0/paper/sections/conclusion.tex +16 -0
  60. stackfix-0.1.0/paper/sections/dataset.tex +38 -0
  61. stackfix-0.1.0/paper/sections/experiments.tex +41 -0
  62. stackfix-0.1.0/paper/sections/introduction.tex +21 -0
  63. stackfix-0.1.0/paper/sections/method.tex +46 -0
  64. stackfix-0.1.0/paper/sections/related_work.tex +13 -0
  65. stackfix-0.1.0/pyproject.toml +52 -0
  66. stackfix-0.1.0/scripts/build_benchmark.py +47 -0
  67. stackfix-0.1.0/scripts/evaluate.py +289 -0
  68. stackfix-0.1.0/scripts/export_gguf.py +154 -0
  69. stackfix-0.1.0/scripts/export_model.sh +53 -0
  70. stackfix-0.1.0/scripts/finetune.sh +35 -0
  71. stackfix-0.1.0/scripts/format_for_finetuning.py +96 -0
  72. stackfix-0.1.0/scripts/generate.py +80 -0
  73. stackfix-0.1.0/scripts/generate_gold.py +794 -0
  74. stackfix-0.1.0/scripts/generate_gold_cf.py +855 -0
  75. stackfix-0.1.0/scripts/run_7b_16L_train_and_eval.sh +92 -0
  76. stackfix-0.1.0/scripts/run_7b_reeval.sh +83 -0
  77. stackfix-0.1.0/scripts/run_7b_train.sh +62 -0
  78. stackfix-0.1.0/scripts/run_7b_train_then_eval.sh +71 -0
  79. stackfix-0.1.0/scripts/run_7b_v2_train_and_eval.sh +34 -0
  80. stackfix-0.1.0/scripts/run_all.sh +78 -0
  81. stackfix-0.1.0/scripts/run_baselines.py +147 -0
  82. stackfix-0.1.0/scripts/run_compression_experiments.sh +235 -0
  83. stackfix-0.1.0/scripts/run_finetuned_eval.sh +19 -0
  84. stackfix-0.1.0/scripts/run_gemma4_eval.sh +32 -0
  85. stackfix-0.1.0/scripts/run_overnight.sh +67 -0
  86. stackfix-0.1.0/scripts/run_remaining.sh +38 -0
  87. stackfix-0.1.0/scripts/run_steps_5_6.sh +38 -0
  88. stackfix-0.1.0/scripts/run_v2_pipeline.sh +83 -0
  89. stackfix-0.1.0/scripts/scrape.py +93 -0
  90. stackfix-0.1.0/scripts/train.py +101 -0
  91. stackfix-0.1.0/tests/__init__.py +0 -0
  92. stackfix-0.1.0/tests/fixtures/gold_main.tf +102 -0
  93. stackfix-0.1.0/tests/fixtures/gold_template.yaml +98 -0
  94. stackfix-0.1.0/tests/test_inverter.py +223 -0
  95. stackfix-0.1.0/tests/test_pipeline.py +190 -0
  96. stackfix-0.1.0/tests/test_taxonomy.py +114 -0
  97. stackfix-0.1.0/tests/test_validator.py +89 -0
@@ -0,0 +1,66 @@
1
+ name: stackfix
2
+
3
+ on:
4
+ pull_request:
5
+ paths:
6
+ - '**/*.tf'
7
+ - '**/*.yaml'
8
+ - '**/*.yml'
9
+
10
+ jobs:
11
+ stackfix:
12
+ runs-on: ubuntu-latest
13
+ steps:
14
+ - uses: actions/checkout@v4
15
+
16
+ - uses: actions/setup-python@v5
17
+ with:
18
+ python-version: '3.12'
19
+
20
+ - name: Install stackfix with GGUF backend
21
+ run: |
22
+ pip install stackfix[gguf]
23
+ pip install cfn-lint
24
+
25
+ - name: Download GGUF model
26
+ run: |
27
+ mkdir -p data/models/exports
28
+ python -c "
29
+ from huggingface_hub import hf_hub_download
30
+ hf_hub_download(
31
+ repo_id='Tetsuto/iac-repair-3b-gguf',
32
+ filename='iac-repair-3b-q4.gguf',
33
+ local_dir='data/models/exports',
34
+ )
35
+ "
36
+
37
+ - name: Check changed IaC files
38
+ run: |
39
+ git diff --name-only origin/${{ github.base_ref }}...HEAD \
40
+ | grep -E '\.(tf|yaml|yml)$' \
41
+ | xargs -r stackfix check
42
+
43
+ - name: Suggest fixes (comment on PR)
44
+ if: failure()
45
+ run: |
46
+ git diff --name-only origin/${{ github.base_ref }}...HEAD \
47
+ | grep -E '\.(tf|yaml|yml)$' \
48
+ | xargs -r stackfix repair \
49
+ --backend gguf \
50
+ --model data/models/exports/iac-repair-3b-q4.gguf \
51
+ --no-color 2>&1 \
52
+ | head -200 > /tmp/fix-output.txt
53
+
54
+ gh pr comment ${{ github.event.pull_request.number }} \
55
+ --body "$(cat <<'EOF'
56
+ ### stackfix found issues
57
+
58
+ ```
59
+ $(cat /tmp/fix-output.txt)
60
+ ```
61
+
62
+ Run `stackfix repair --apply` locally to auto-fix.
63
+ EOF
64
+ )"
65
+ env:
66
+ GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
@@ -0,0 +1,52 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.egg-info/
6
+ dist/
7
+ build/
8
+ *.egg
9
+ .eggs/
10
+
11
+ # Virtual environments
12
+ .venv/
13
+ venv/
14
+ env/
15
+
16
+ # IDE
17
+ .vscode/
18
+ .idea/
19
+ *.swp
20
+ *.swo
21
+
22
+ # Data (large files, generated)
23
+ data/gold/
24
+ data/broken/
25
+ data/training/
26
+ data/benchmark/
27
+ data/finetune/
28
+ data/models/
29
+
30
+ # OS
31
+ .DS_Store
32
+ Thumbs.db
33
+
34
+ # Claude Code
35
+ .claude/
36
+ .planning/
37
+
38
+ # Environment
39
+ .env
40
+ .env.local
41
+
42
+ # Testing
43
+ .pytest_cache/
44
+ .coverage
45
+ htmlcov/
46
+
47
+ # Terraform
48
+ .terraform/
49
+ *.tfstate
50
+ *.tfstate.backup
51
+ .terraform.lock.hcl
52
+ data/distill/
stackfix-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Jon Hammant
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,182 @@
1
+ Metadata-Version: 2.4
2
+ Name: stackfix
3
+ Version: 0.1.0
4
+ Summary: AI-powered Infrastructure-as-Code repair — fix broken Terraform and CloudFormation on CPU
5
+ Author: Jon
6
+ License-Expression: MIT
7
+ License-File: LICENSE
8
+ Requires-Python: >=3.11
9
+ Requires-Dist: cfn-lint>=1.0
10
+ Requires-Dist: click>=8.0
11
+ Requires-Dist: datasets>=3.0
12
+ Requires-Dist: httpx>=0.27
13
+ Requires-Dist: ollama>=0.4
14
+ Requires-Dist: python-hcl2>=5.0
15
+ Requires-Dist: pyyaml>=6.0
16
+ Requires-Dist: rich>=13.0
17
+ Provides-Extra: dev
18
+ Requires-Dist: pytest-asyncio>=0.24; extra == 'dev'
19
+ Requires-Dist: pytest>=8.0; extra == 'dev'
20
+ Requires-Dist: ruff>=0.8; extra == 'dev'
21
+ Provides-Extra: gguf
22
+ Requires-Dist: llama-cpp-python>=0.3; extra == 'gguf'
23
+ Provides-Extra: mlx
24
+ Requires-Dist: mlx-lm>=0.21; extra == 'mlx'
25
+ Description-Content-Type: text/markdown
26
+
27
+ # Cloud-Gym
28
+
29
+ Scalable Training Data Generation for Infrastructure-as-Code Repair via Environment Inversion.
30
+
31
+ Cloud-Gym generates (broken_config, error_message, fix) training pairs for IaC repair by applying **environment inversion** — taking working Terraform, CloudFormation, and OpenTofu configs and systematically breaking them using a defined fault taxonomy. It includes a benchmark (188 entries across 8 error categories) and fine-tuned models that run entirely on CPU.
32
+
33
+ ## stackfix: AI-Powered IaC Repair
34
+
35
+ The `stackfix` CLI tool validates and repairs broken IaC files using fine-tuned local models. No API keys, no cloud costs, no data leaves your machine.
36
+
37
+ ### Install
38
+
39
+ ```bash
40
+ pip install stackfix[gguf]
41
+ ```
42
+
43
+ ### Download a Model
44
+
45
+ ```bash
46
+ # Recommended: 3B Q4 (1.8 GB, 87% pass@1)
47
+ python -c "
48
+ from huggingface_hub import hf_hub_download
49
+ hf_hub_download('Tetsuto/iac-repair-3b-gguf', 'iac-repair-3b-q4.gguf', local_dir='.')
50
+ "
51
+ ```
52
+
53
+ ### Usage
54
+
55
+ ```bash
56
+ # Check files for errors
57
+ stackfix check main.tf template.yaml
58
+
59
+ # Repair a broken file (show diff)
60
+ stackfix repair main.tf --backend gguf --model iac-repair-3b-q4.gguf
61
+
62
+ # Repair and apply fix in place
63
+ stackfix repair main.tf --apply --backend gguf --model iac-repair-3b-q4.gguf
64
+
65
+ # Explain errors in plain language
66
+ stackfix discuss main.tf --backend gguf --model iac-repair-3b-q4.gguf
67
+
68
+ # Pipe mode (stdin/stdout)
69
+ cat broken.tf | stackfix repair - --backend gguf --model iac-repair-3b-q4.gguf > fixed.tf
70
+
71
+ # Check all changed IaC files in git
72
+ stackfix git-diff --backend gguf --model iac-repair-3b-q4.gguf
73
+ ```
74
+
75
+ ### Models
76
+
77
+ | Model | Size | RAM | Speed (CPU) | pass@1 | HuggingFace |
78
+ |---|---|---|---|---|---|
79
+ | **7B Q4** | 4.5 GB | ~8 GB | ~20 tok/s | **0.926** | [Tetsuto/iac-repair-7b-gguf](https://huggingface.co/Tetsuto/iac-repair-7b-gguf) |
80
+ | **3B Q4** | 1.8 GB | ~4 GB | 49 tok/s | 0.867 | [Tetsuto/iac-repair-3b-gguf](https://huggingface.co/Tetsuto/iac-repair-3b-gguf) |
81
+ | **0.5B Q4** | 379 MB | ~800 MB | 127 tok/s | 0.723 | [Tetsuto/iac-repair-0.5b-gguf](https://huggingface.co/Tetsuto/iac-repair-0.5b-gguf) |
82
+
83
+ All models are fine-tuned Qwen2.5-Coder with LoRA, exported to GGUF. They run on any CPU (Linux, macOS, Windows).
84
+
85
+ ### Backends
86
+
87
+ | Backend | Install | Platform | Use Case |
88
+ |---|---|---|---|
89
+ | `gguf` | `pip install stackfix[gguf]` | Any (CPU) | CI/CD, Lambda, servers |
90
+ | `mlx` | `pip install stackfix[mlx]` | Apple Silicon | Local dev on Mac |
91
+ | `ollama` | `pip install stackfix` + Ollama | Any | When Ollama is already running |
92
+
93
+ ### CI/CD Integration
94
+
95
+ Add to your GitHub Actions workflow to catch IaC errors on every PR:
96
+
97
+ ```yaml
98
+ - name: Check IaC
99
+ run: |
100
+ pip install stackfix[gguf]
101
+ python -c "
102
+ from huggingface_hub import hf_hub_download
103
+ hf_hub_download('Tetsuto/iac-repair-3b-gguf', 'iac-repair-3b-q4.gguf', local_dir='.')
104
+ "
105
+ stackfix check **/*.tf **/*.yaml
106
+ ```
107
+
108
+ See [examples/USE_CASES.md](examples/USE_CASES.md) for more deployment scenarios (pre-commit hooks, Lambda, pipeline integration).
109
+
110
+ ### Pre-Commit Hook
111
+
112
+ ```yaml
113
+ # .pre-commit-config.yaml
114
+ repos:
115
+ - repo: local
116
+ hooks:
117
+ - id: stackfix
118
+ name: stackfix
119
+ entry: stackfix pre-commit --backend gguf --model iac-repair-3b-q4.gguf
120
+ language: python
121
+ types_or: [terraform, yaml]
122
+ additional_dependencies: ['stackfix[gguf]']
123
+ ```
124
+
125
+ ## Benchmark
126
+
127
+ 188 entries across 8 error categories, 3 difficulty levels, and 2 formats (Terraform + CloudFormation).
128
+
129
+ ### Results Summary
130
+
131
+ | Model | pass@1 | Terraform | CloudFormation | High | Medium | Low |
132
+ |---|---|---|---|---|---|---|
133
+ | **7B v2 fine-tuned** | **0.926** | 0.993 | 0.750 | 0.960 | 0.897 | 0.923 |
134
+ | 3B rank4 fine-tuned | 0.867 | 0.912 | 0.750 | 0.964 | 0.797 | 0.821 |
135
+ | qwen2.5-coder:7b (base) | 0.856 | 0.905 | 0.707 | 0.840 | 0.859 | 0.893 |
136
+ | 0.5B distilled | 0.723 | 0.775 | 0.590 | 0.809 | 0.648 | 0.731 |
137
+ | llama3.2:3b (base) | 0.641 | 0.734 | 0.361 | 0.684 | 0.636 | 0.533 |
138
+ | gemma-4-26b (base) | 0.009 | 0.000 | 0.032 | 0.000 | 0.004 | 0.051 |
139
+
140
+ Fine-tuning a 0.5B model outperforms a 26B base model by 80x.
141
+
142
+ ## Training Data Generation
143
+
144
+ Cloud-Gym generates training data via environment inversion:
145
+
146
+ 1. **Collect** working IaC configs from GitHub, Terraform Registry, AWS samples
147
+ 2. **Break** them systematically using a fault taxonomy (28+ fault types across 8 categories)
148
+ 3. **Validate** broken configs to capture real error messages
149
+ 4. **Pair** (broken + errors) with the original working config as the gold fix
150
+
151
+ ```bash
152
+ # Generate training data
153
+ cloud-gym taxonomy # View fault types
154
+ python scripts/scrape.py # Collect gold configs
155
+ cloud-gym invert # Generate broken variants
156
+ cloud-gym export # Export training pairs
157
+ ```
158
+
159
+ ## Project Structure
160
+
161
+ ```text
162
+ cloudgym/
163
+ taxonomy/ Fault type definitions (28+ types, 8 categories)
164
+ scraper/ Gold config collection
165
+ validator/ IaC validation wrappers (terraform, cfn-lint)
166
+ inverter/ Fault injection engines
167
+ generator/ Training data pipeline
168
+ benchmark/ Evaluation harness
169
+ fixer/ stackfix CLI tool + model backends
170
+ scripts/ Training, evaluation, and export scripts
171
+ examples/ Broken IaC examples + use case docs
172
+ ```
173
+
174
+ ## Supported Formats
175
+
176
+ - **Terraform** (`.tf`) — validated with `terraform validate`
177
+ - **CloudFormation** (`.yaml`, `.yml`, `.json`) — validated with `cfn-lint`
178
+ - **OpenTofu** (`.tf`) — same as Terraform
179
+
180
+ ## License
181
+
182
+ MIT
@@ -0,0 +1,156 @@
1
+ # Cloud-Gym
2
+
3
+ Scalable Training Data Generation for Infrastructure-as-Code Repair via Environment Inversion.
4
+
5
+ Cloud-Gym generates (broken_config, error_message, fix) training pairs for IaC repair by applying **environment inversion** — taking working Terraform, CloudFormation, and OpenTofu configs and systematically breaking them using a defined fault taxonomy. It includes a benchmark (188 entries across 8 error categories) and fine-tuned models that run entirely on CPU.
6
+
7
+ ## stackfix: AI-Powered IaC Repair
8
+
9
+ The `stackfix` CLI tool validates and repairs broken IaC files using fine-tuned local models. No API keys, no cloud costs, no data leaves your machine.
10
+
11
+ ### Install
12
+
13
+ ```bash
14
+ pip install stackfix[gguf]
15
+ ```
16
+
17
+ ### Download a Model
18
+
19
+ ```bash
20
+ # Recommended: 3B Q4 (1.8 GB, 87% pass@1)
21
+ python -c "
22
+ from huggingface_hub import hf_hub_download
23
+ hf_hub_download('Tetsuto/iac-repair-3b-gguf', 'iac-repair-3b-q4.gguf', local_dir='.')
24
+ "
25
+ ```
26
+
27
+ ### Usage
28
+
29
+ ```bash
30
+ # Check files for errors
31
+ stackfix check main.tf template.yaml
32
+
33
+ # Repair a broken file (show diff)
34
+ stackfix repair main.tf --backend gguf --model iac-repair-3b-q4.gguf
35
+
36
+ # Repair and apply fix in place
37
+ stackfix repair main.tf --apply --backend gguf --model iac-repair-3b-q4.gguf
38
+
39
+ # Explain errors in plain language
40
+ stackfix discuss main.tf --backend gguf --model iac-repair-3b-q4.gguf
41
+
42
+ # Pipe mode (stdin/stdout)
43
+ cat broken.tf | stackfix repair - --backend gguf --model iac-repair-3b-q4.gguf > fixed.tf
44
+
45
+ # Check all changed IaC files in git
46
+ stackfix git-diff --backend gguf --model iac-repair-3b-q4.gguf
47
+ ```
48
+
49
+ ### Models
50
+
51
+ | Model | Size | RAM | Speed (CPU) | pass@1 | HuggingFace |
52
+ |---|---|---|---|---|---|
53
+ | **7B Q4** | 4.5 GB | ~8 GB | ~20 tok/s | **0.926** | [Tetsuto/iac-repair-7b-gguf](https://huggingface.co/Tetsuto/iac-repair-7b-gguf) |
54
+ | **3B Q4** | 1.8 GB | ~4 GB | 49 tok/s | 0.867 | [Tetsuto/iac-repair-3b-gguf](https://huggingface.co/Tetsuto/iac-repair-3b-gguf) |
55
+ | **0.5B Q4** | 379 MB | ~800 MB | 127 tok/s | 0.723 | [Tetsuto/iac-repair-0.5b-gguf](https://huggingface.co/Tetsuto/iac-repair-0.5b-gguf) |
56
+
57
+ All models are fine-tuned Qwen2.5-Coder with LoRA, exported to GGUF. They run on any CPU (Linux, macOS, Windows).
58
+
59
+ ### Backends
60
+
61
+ | Backend | Install | Platform | Use Case |
62
+ |---|---|---|---|
63
+ | `gguf` | `pip install stackfix[gguf]` | Any (CPU) | CI/CD, Lambda, servers |
64
+ | `mlx` | `pip install stackfix[mlx]` | Apple Silicon | Local dev on Mac |
65
+ | `ollama` | `pip install stackfix` + Ollama | Any | When Ollama is already running |
66
+
67
+ ### CI/CD Integration
68
+
69
+ Add to your GitHub Actions workflow to catch IaC errors on every PR:
70
+
71
+ ```yaml
72
+ - name: Check IaC
73
+ run: |
74
+ pip install stackfix[gguf]
75
+ python -c "
76
+ from huggingface_hub import hf_hub_download
77
+ hf_hub_download('Tetsuto/iac-repair-3b-gguf', 'iac-repair-3b-q4.gguf', local_dir='.')
78
+ "
79
+ stackfix check **/*.tf **/*.yaml
80
+ ```
81
+
82
+ See [examples/USE_CASES.md](examples/USE_CASES.md) for more deployment scenarios (pre-commit hooks, Lambda, pipeline integration).
83
+
84
+ ### Pre-Commit Hook
85
+
86
+ ```yaml
87
+ # .pre-commit-config.yaml
88
+ repos:
89
+ - repo: local
90
+ hooks:
91
+ - id: stackfix
92
+ name: stackfix
93
+ entry: stackfix pre-commit --backend gguf --model iac-repair-3b-q4.gguf
94
+ language: python
95
+ types_or: [terraform, yaml]
96
+ additional_dependencies: ['stackfix[gguf]']
97
+ ```
98
+
99
+ ## Benchmark
100
+
101
+ 188 entries across 8 error categories, 3 difficulty levels, and 2 formats (Terraform + CloudFormation).
102
+
103
+ ### Results Summary
104
+
105
+ | Model | pass@1 | Terraform | CloudFormation | High | Medium | Low |
106
+ |---|---|---|---|---|---|---|
107
+ | **7B v2 fine-tuned** | **0.926** | 0.993 | 0.750 | 0.960 | 0.897 | 0.923 |
108
+ | 3B rank4 fine-tuned | 0.867 | 0.912 | 0.750 | 0.964 | 0.797 | 0.821 |
109
+ | qwen2.5-coder:7b (base) | 0.856 | 0.905 | 0.707 | 0.840 | 0.859 | 0.893 |
110
+ | 0.5B distilled | 0.723 | 0.775 | 0.590 | 0.809 | 0.648 | 0.731 |
111
+ | llama3.2:3b (base) | 0.641 | 0.734 | 0.361 | 0.684 | 0.636 | 0.533 |
112
+ | gemma-4-26b (base) | 0.009 | 0.000 | 0.032 | 0.000 | 0.004 | 0.051 |
113
+
114
+ Fine-tuning a 0.5B model outperforms a 26B base model by 80x.
115
+
116
+ ## Training Data Generation
117
+
118
+ Cloud-Gym generates training data via environment inversion:
119
+
120
+ 1. **Collect** working IaC configs from GitHub, Terraform Registry, AWS samples
121
+ 2. **Break** them systematically using a fault taxonomy (28+ fault types across 8 categories)
122
+ 3. **Validate** broken configs to capture real error messages
123
+ 4. **Pair** (broken + errors) with the original working config as the gold fix
124
+
125
+ ```bash
126
+ # Generate training data
127
+ cloud-gym taxonomy # View fault types
128
+ python scripts/scrape.py # Collect gold configs
129
+ cloud-gym invert # Generate broken variants
130
+ cloud-gym export # Export training pairs
131
+ ```
132
+
133
+ ## Project Structure
134
+
135
+ ```text
136
+ cloudgym/
137
+ taxonomy/ Fault type definitions (28+ types, 8 categories)
138
+ scraper/ Gold config collection
139
+ validator/ IaC validation wrappers (terraform, cfn-lint)
140
+ inverter/ Fault injection engines
141
+ generator/ Training data pipeline
142
+ benchmark/ Evaluation harness
143
+ fixer/ stackfix CLI tool + model backends
144
+ scripts/ Training, evaluation, and export scripts
145
+ examples/ Broken IaC examples + use case docs
146
+ ```
147
+
148
+ ## Supported Formats
149
+
150
+ - **Terraform** (`.tf`) — validated with `terraform validate`
151
+ - **CloudFormation** (`.yaml`, `.yml`, `.json`) — validated with `cfn-lint`
152
+ - **OpenTofu** (`.tf`) — same as Terraform
153
+
154
+ ## License
155
+
156
+ MIT
@@ -0,0 +1,3 @@
1
+ """Cloud-Gym: IaC Repair Benchmark via Environment Inversion."""
2
+
3
+ __version__ = "0.1.0"
File without changes
@@ -0,0 +1,188 @@
1
+ """Benchmark dataset management.
2
+
3
+ Curates a balanced subset from the test split for evaluation.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import json
9
+ import logging
10
+ from dataclasses import dataclass
11
+ from pathlib import Path
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ @dataclass
17
+ class BenchmarkEntry:
18
+ """A single benchmark entry."""
19
+
20
+ id: str
21
+ format: str
22
+ broken_config: str
23
+ errors: list[str]
24
+ warnings: list[str]
25
+ fault_types: list[str]
26
+ difficulty: str
27
+ gold_config: str
28
+ gold_hash: str
29
+
30
+
31
+ class BenchmarkDataset:
32
+ """Manages the curated benchmark dataset."""
33
+
34
+ def __init__(self, path: str | Path):
35
+ self.path = Path(path)
36
+ self.entries: list[BenchmarkEntry] = []
37
+ if self.path.exists():
38
+ self._load()
39
+
40
+ def _load(self):
41
+ """Load benchmark entries from JSONL."""
42
+ with open(self.path) as f:
43
+ for line in f:
44
+ data = json.loads(line)
45
+ self.entries.append(BenchmarkEntry(**{
46
+ k: data[k] for k in BenchmarkEntry.__dataclass_fields__
47
+ if k in data
48
+ }))
49
+ logger.info("Loaded %d benchmark entries from %s", len(self.entries), self.path)
50
+
51
+ def __len__(self) -> int:
52
+ return len(self.entries)
53
+
54
+ def __iter__(self):
55
+ return iter(self.entries)
56
+
57
+ @staticmethod
58
+ def build(
59
+ test_jsonl: str | Path,
60
+ output_path: str | Path,
61
+ target_size: int = 200,
62
+ ) -> BenchmarkDataset:
63
+ """Curate a benchmark dataset from the test split.
64
+
65
+ Curation rules:
66
+ - Single-fault only (one fault type per record)
67
+ - Balanced across categories and difficulties
68
+ - Min 10-line configs (non-trivial)
69
+ - Deduplicated per gold config (max 1 entry per gold hash per fault category)
70
+
71
+ Args:
72
+ test_jsonl: Path to test.jsonl from format_and_split.
73
+ output_path: Path to write benchmark.jsonl.
74
+ target_size: Target number of benchmark entries.
75
+
76
+ Returns:
77
+ BenchmarkDataset with curated entries.
78
+ """
79
+ test_path = Path(test_jsonl)
80
+ out_path = Path(output_path)
81
+ out_path.parent.mkdir(parents=True, exist_ok=True)
82
+
83
+ # Load test records
84
+ records = []
85
+ with open(test_path) as f:
86
+ for line in f:
87
+ records.append(json.loads(line))
88
+
89
+ logger.info("Loaded %d test records for curation", len(records))
90
+
91
+ # Filter: single-fault, min 10 lines
92
+ candidates = [
93
+ r for r in records
94
+ if len(r.get("fault_types", [])) == 1
95
+ and len(r.get("broken_config", "").splitlines()) >= 10
96
+ and r.get("errors") # Must have validation errors
97
+ ]
98
+ logger.info("%d candidates after filtering", len(candidates))
99
+
100
+ # Deduplicate: max 1 entry per (gold_hash, fault_category)
101
+ seen: set[tuple[str, str]] = set()
102
+ deduped = []
103
+ for r in candidates:
104
+ fault_id = r["fault_types"][0]
105
+ category = fault_id.split(".")[0] if "." in fault_id else fault_id
106
+ key = (r.get("gold_hash", ""), category)
107
+ if key not in seen:
108
+ seen.add(key)
109
+ deduped.append(r)
110
+ logger.info("%d after deduplication", len(deduped))
111
+
112
+ # Balance across categories and difficulties
113
+ selected = _balance_select(deduped, target_size)
114
+ logger.info("Selected %d entries for benchmark", len(selected))
115
+
116
+ # Write benchmark JSONL
117
+ with open(out_path, "w") as f:
118
+ for r in selected:
119
+ entry = {
120
+ "id": r["id"],
121
+ "format": r["format"],
122
+ "broken_config": r["broken_config"],
123
+ "errors": r["errors"],
124
+ "warnings": r.get("warnings", []),
125
+ "fault_types": r["fault_types"],
126
+ "difficulty": r["difficulty"],
127
+ "gold_config": r["gold_config"],
128
+ "gold_hash": r.get("gold_hash", ""),
129
+ }
130
+ f.write(json.dumps(entry) + "\n")
131
+
132
+ # Write metadata
133
+ meta = {
134
+ "total_entries": len(selected),
135
+ "source": str(test_path),
136
+ "category_distribution": _count_categories(selected),
137
+ "difficulty_distribution": _count_field(selected, "difficulty"),
138
+ "format_distribution": _count_field(selected, "format"),
139
+ }
140
+ meta_path = out_path.parent / "benchmark_meta.json"
141
+ with open(meta_path, "w") as f:
142
+ json.dump(meta, f, indent=2)
143
+
144
+ return BenchmarkDataset(out_path)
145
+
146
+
147
+ def _balance_select(records: list[dict], target: int) -> list[dict]:
148
+ """Select records with balanced category/difficulty distribution."""
149
+ by_category: dict[str, list[dict]] = {}
150
+ for r in records:
151
+ fault_id = r["fault_types"][0]
152
+ cat = fault_id.split(".")[0] if "." in fault_id else fault_id
153
+ by_category.setdefault(cat, []).append(r)
154
+
155
+ if not by_category:
156
+ return []
157
+
158
+ per_category = max(1, target // len(by_category))
159
+ selected = []
160
+
161
+ for cat, cat_records in by_category.items():
162
+ # Within category, balance by difficulty
163
+ by_diff: dict[str, list[dict]] = {}
164
+ for r in cat_records:
165
+ by_diff.setdefault(r["difficulty"], []).append(r)
166
+
167
+ per_diff = max(1, per_category // max(len(by_diff), 1))
168
+ for diff, diff_records in by_diff.items():
169
+ selected.extend(diff_records[:per_diff])
170
+
171
+ return selected[:target]
172
+
173
+
174
+ def _count_categories(records: list[dict]) -> dict[str, int]:
175
+ counts: dict[str, int] = {}
176
+ for r in records:
177
+ fault_id = r["fault_types"][0]
178
+ cat = fault_id.split(".")[0] if "." in fault_id else fault_id
179
+ counts[cat] = counts.get(cat, 0) + 1
180
+ return counts
181
+
182
+
183
+ def _count_field(records: list[dict], field: str) -> dict[str, int]:
184
+ counts: dict[str, int] = {}
185
+ for r in records:
186
+ val = r.get(field, "unknown")
187
+ counts[val] = counts.get(val, 0) + 1
188
+ return counts