PyPI - stackfix - Versions diffs - 0.1.0__tar.gz - Mend

stackfix 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (97) hide show

stackfix-0.1.0/.github/workflows/stackfix.yml +66 -0
stackfix-0.1.0/.gitignore +52 -0
stackfix-0.1.0/LICENSE +21 -0
stackfix-0.1.0/PKG-INFO +182 -0
stackfix-0.1.0/README.md +156 -0
stackfix-0.1.0/cloudgym/__init__.py +3 -0
stackfix-0.1.0/cloudgym/benchmark/__init__.py +0 -0
stackfix-0.1.0/cloudgym/benchmark/dataset.py +188 -0
stackfix-0.1.0/cloudgym/benchmark/evaluator.py +275 -0
stackfix-0.1.0/cloudgym/cli.py +61 -0
stackfix-0.1.0/cloudgym/fixer/__init__.py +1 -0
stackfix-0.1.0/cloudgym/fixer/cli.py +521 -0
stackfix-0.1.0/cloudgym/fixer/detector.py +81 -0
stackfix-0.1.0/cloudgym/fixer/formatter.py +55 -0
stackfix-0.1.0/cloudgym/fixer/lambda_handler.py +126 -0
stackfix-0.1.0/cloudgym/fixer/repairer.py +237 -0
stackfix-0.1.0/cloudgym/generator/__init__.py +0 -0
stackfix-0.1.0/cloudgym/generator/formatter.py +142 -0
stackfix-0.1.0/cloudgym/generator/pipeline.py +271 -0
stackfix-0.1.0/cloudgym/inverter/__init__.py +0 -0
stackfix-0.1.0/cloudgym/inverter/_cf_injectors.py +705 -0
stackfix-0.1.0/cloudgym/inverter/_cf_utils.py +202 -0
stackfix-0.1.0/cloudgym/inverter/_hcl_utils.py +182 -0
stackfix-0.1.0/cloudgym/inverter/_tf_injectors.py +641 -0
stackfix-0.1.0/cloudgym/inverter/_yaml_cf.py +84 -0
stackfix-0.1.0/cloudgym/inverter/agentic.py +90 -0
stackfix-0.1.0/cloudgym/inverter/engine.py +258 -0
stackfix-0.1.0/cloudgym/inverter/programmatic.py +95 -0
stackfix-0.1.0/cloudgym/scraper/__init__.py +0 -0
stackfix-0.1.0/cloudgym/scraper/aws_samples.py +159 -0
stackfix-0.1.0/cloudgym/scraper/github.py +238 -0
stackfix-0.1.0/cloudgym/scraper/registry.py +165 -0
stackfix-0.1.0/cloudgym/scraper/validator.py +116 -0
stackfix-0.1.0/cloudgym/taxonomy/__init__.py +10 -0
stackfix-0.1.0/cloudgym/taxonomy/base.py +102 -0
stackfix-0.1.0/cloudgym/taxonomy/cloudformation.py +258 -0
stackfix-0.1.0/cloudgym/taxonomy/terraform.py +274 -0
stackfix-0.1.0/cloudgym/utils/__init__.py +0 -0
stackfix-0.1.0/cloudgym/utils/config.py +57 -0
stackfix-0.1.0/cloudgym/utils/ollama.py +66 -0
stackfix-0.1.0/cloudgym/validator/__init__.py +0 -0
stackfix-0.1.0/cloudgym/validator/cloudformation.py +55 -0
stackfix-0.1.0/cloudgym/validator/opentofu.py +103 -0
stackfix-0.1.0/cloudgym/validator/terraform.py +115 -0
stackfix-0.1.0/examples/.pre-commit-config.yaml +13 -0
stackfix-0.1.0/examples/LINKEDIN_POST.md +76 -0
stackfix-0.1.0/examples/USE_CASES.md +205 -0
stackfix-0.1.0/examples/broken_s3.tf +45 -0
stackfix-0.1.0/examples/broken_security_group.tf +44 -0
stackfix-0.1.0/examples/broken_vpc.yaml +57 -0
stackfix-0.1.0/examples/chart_category_breakdown.png +0 -0
stackfix-0.1.0/examples/chart_deployment_tradeoffs.png +0 -0
stackfix-0.1.0/examples/chart_hyperparameter_fix.png +0 -0
stackfix-0.1.0/examples/chart_overall_comparison.png +0 -0
stackfix-0.1.0/examples/chart_size_vs_accuracy.png +0 -0
stackfix-0.1.0/paper/main.pdf +0 -0
stackfix-0.1.0/paper/main.tex +63 -0
stackfix-0.1.0/paper/references.bib +65 -0
stackfix-0.1.0/paper/sections/conclusion.tex +16 -0
stackfix-0.1.0/paper/sections/dataset.tex +38 -0
stackfix-0.1.0/paper/sections/experiments.tex +41 -0
stackfix-0.1.0/paper/sections/introduction.tex +21 -0
stackfix-0.1.0/paper/sections/method.tex +46 -0
stackfix-0.1.0/paper/sections/related_work.tex +13 -0
stackfix-0.1.0/pyproject.toml +52 -0
stackfix-0.1.0/scripts/build_benchmark.py +47 -0
stackfix-0.1.0/scripts/evaluate.py +289 -0
stackfix-0.1.0/scripts/export_gguf.py +154 -0
stackfix-0.1.0/scripts/export_model.sh +53 -0
stackfix-0.1.0/scripts/finetune.sh +35 -0
stackfix-0.1.0/scripts/format_for_finetuning.py +96 -0
stackfix-0.1.0/scripts/generate.py +80 -0
stackfix-0.1.0/scripts/generate_gold.py +794 -0
stackfix-0.1.0/scripts/generate_gold_cf.py +855 -0
stackfix-0.1.0/scripts/run_7b_16L_train_and_eval.sh +92 -0
stackfix-0.1.0/scripts/run_7b_reeval.sh +83 -0
stackfix-0.1.0/scripts/run_7b_train.sh +62 -0
stackfix-0.1.0/scripts/run_7b_train_then_eval.sh +71 -0
stackfix-0.1.0/scripts/run_7b_v2_train_and_eval.sh +34 -0
stackfix-0.1.0/scripts/run_all.sh +78 -0
stackfix-0.1.0/scripts/run_baselines.py +147 -0
stackfix-0.1.0/scripts/run_compression_experiments.sh +235 -0
stackfix-0.1.0/scripts/run_finetuned_eval.sh +19 -0
stackfix-0.1.0/scripts/run_gemma4_eval.sh +32 -0
stackfix-0.1.0/scripts/run_overnight.sh +67 -0
stackfix-0.1.0/scripts/run_remaining.sh +38 -0
stackfix-0.1.0/scripts/run_steps_5_6.sh +38 -0
stackfix-0.1.0/scripts/run_v2_pipeline.sh +83 -0
stackfix-0.1.0/scripts/scrape.py +93 -0
stackfix-0.1.0/scripts/train.py +101 -0
stackfix-0.1.0/tests/__init__.py +0 -0
stackfix-0.1.0/tests/fixtures/gold_main.tf +102 -0
stackfix-0.1.0/tests/fixtures/gold_template.yaml +98 -0
stackfix-0.1.0/tests/test_inverter.py +223 -0
stackfix-0.1.0/tests/test_pipeline.py +190 -0
stackfix-0.1.0/tests/test_taxonomy.py +114 -0
stackfix-0.1.0/tests/test_validator.py +89 -0

stackfix-0.1.0/.github/workflows/stackfix.yml ADDED Viewed

@@ -0,0 +1,66 @@
+name: stackfix
+on:
+  pull_request:
+    paths:
+      - '**/*.tf'
+      - '**/*.yaml'
+      - '**/*.yml'
+jobs:
+  stackfix:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: '3.12'
+      - name: Install stackfix with GGUF backend
+        run: |
+          pip install stackfix[gguf]
+          pip install cfn-lint
+      - name: Download GGUF model
+        run: |
+          mkdir -p data/models/exports
+          python -c "
+          from huggingface_hub import hf_hub_download
+          hf_hub_download(
+              repo_id='Tetsuto/iac-repair-3b-gguf',
+              filename='iac-repair-3b-q4.gguf',
+              local_dir='data/models/exports',
+          )
+          "
+      - name: Check changed IaC files
+        run: |
+          git diff --name-only origin/${{ github.base_ref }}...HEAD \
+            | grep -E '\.(tf|yaml|yml)$' \
+            | xargs -r stackfix check
+      - name: Suggest fixes (comment on PR)
+        if: failure()
+        run: |
+          git diff --name-only origin/${{ github.base_ref }}...HEAD \
+            | grep -E '\.(tf|yaml|yml)$' \
+            | xargs -r stackfix repair \
+              --backend gguf \
+              --model data/models/exports/iac-repair-3b-q4.gguf \
+              --no-color 2>&1 \
+            | head -200 > /tmp/fix-output.txt
+          gh pr comment ${{ github.event.pull_request.number }} \
+            --body "$(cat <<'EOF'
+          ### stackfix found issues
+          ```
+          $(cat /tmp/fix-output.txt)
+          ```
+          Run `stackfix repair --apply` locally to auto-fix.
+          EOF
+          )"
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}

stackfix-0.1.0/.gitignore ADDED Viewed

@@ -0,0 +1,52 @@
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.egg-info/
+dist/
+build/
+*.egg
+.eggs/
+# Virtual environments
+.venv/
+venv/
+env/
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+# Data (large files, generated)
+data/gold/
+data/broken/
+data/training/
+data/benchmark/
+data/finetune/
+data/models/
+# OS
+.DS_Store
+Thumbs.db
+# Claude Code
+.claude/
+.planning/
+# Environment
+.env
+.env.local
+# Testing
+.pytest_cache/
+.coverage
+htmlcov/
+# Terraform
+.terraform/
+*.tfstate
+*.tfstate.backup
+.terraform.lock.hcl
+data/distill/

stackfix-0.1.0/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 Jon Hammant
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

stackfix-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,182 @@
+Metadata-Version: 2.4
+Name: stackfix
+Version: 0.1.0
+Summary: AI-powered Infrastructure-as-Code repair — fix broken Terraform and CloudFormation on CPU
+Author: Jon
+License-Expression: MIT
+License-File: LICENSE
+Requires-Python: >=3.11
+Requires-Dist: cfn-lint>=1.0
+Requires-Dist: click>=8.0
+Requires-Dist: datasets>=3.0
+Requires-Dist: httpx>=0.27
+Requires-Dist: ollama>=0.4
+Requires-Dist: python-hcl2>=5.0
+Requires-Dist: pyyaml>=6.0
+Requires-Dist: rich>=13.0
+Provides-Extra: dev
+Requires-Dist: pytest-asyncio>=0.24; extra == 'dev'
+Requires-Dist: pytest>=8.0; extra == 'dev'
+Requires-Dist: ruff>=0.8; extra == 'dev'
+Provides-Extra: gguf
+Requires-Dist: llama-cpp-python>=0.3; extra == 'gguf'
+Provides-Extra: mlx
+Requires-Dist: mlx-lm>=0.21; extra == 'mlx'
+Description-Content-Type: text/markdown
+# Cloud-Gym
+Scalable Training Data Generation for Infrastructure-as-Code Repair via Environment Inversion.
+Cloud-Gym generates (broken_config, error_message, fix) training pairs for IaC repair by applying **environment inversion** — taking working Terraform, CloudFormation, and OpenTofu configs and systematically breaking them using a defined fault taxonomy. It includes a benchmark (188 entries across 8 error categories) and fine-tuned models that run entirely on CPU.
+## stackfix: AI-Powered IaC Repair
+The `stackfix` CLI tool validates and repairs broken IaC files using fine-tuned local models. No API keys, no cloud costs, no data leaves your machine.
+### Install
+```bash
+pip install stackfix[gguf]
+```
+### Download a Model
+```bash
+# Recommended: 3B Q4 (1.8 GB, 87% pass@1)
+python -c "
+from huggingface_hub import hf_hub_download
+hf_hub_download('Tetsuto/iac-repair-3b-gguf', 'iac-repair-3b-q4.gguf', local_dir='.')
+"
+```
+### Usage
+```bash
+# Check files for errors
+stackfix check main.tf template.yaml
+# Repair a broken file (show diff)
+stackfix repair main.tf --backend gguf --model iac-repair-3b-q4.gguf
+# Repair and apply fix in place
+stackfix repair main.tf --apply --backend gguf --model iac-repair-3b-q4.gguf
+# Explain errors in plain language
+stackfix discuss main.tf --backend gguf --model iac-repair-3b-q4.gguf
+# Pipe mode (stdin/stdout)
+cat broken.tf | stackfix repair - --backend gguf --model iac-repair-3b-q4.gguf > fixed.tf
+# Check all changed IaC files in git
+stackfix git-diff --backend gguf --model iac-repair-3b-q4.gguf
+```
+### Models
+| Model | Size | RAM | Speed (CPU) | pass@1 | HuggingFace |
+|---|---|---|---|---|---|
+| **7B Q4** | 4.5 GB | ~8 GB | ~20 tok/s | **0.926** | [Tetsuto/iac-repair-7b-gguf](https://huggingface.co/Tetsuto/iac-repair-7b-gguf) |
+| **3B Q4** | 1.8 GB | ~4 GB | 49 tok/s | 0.867 | [Tetsuto/iac-repair-3b-gguf](https://huggingface.co/Tetsuto/iac-repair-3b-gguf) |
+| **0.5B Q4** | 379 MB | ~800 MB | 127 tok/s | 0.723 | [Tetsuto/iac-repair-0.5b-gguf](https://huggingface.co/Tetsuto/iac-repair-0.5b-gguf) |
+All models are fine-tuned Qwen2.5-Coder with LoRA, exported to GGUF. They run on any CPU (Linux, macOS, Windows).
+### Backends
+| Backend | Install | Platform | Use Case |
+|---|---|---|---|
+| `gguf` | `pip install stackfix[gguf]` | Any (CPU) | CI/CD, Lambda, servers |
+| `mlx` | `pip install stackfix[mlx]` | Apple Silicon | Local dev on Mac |
+| `ollama` | `pip install stackfix` + Ollama | Any | When Ollama is already running |
+### CI/CD Integration
+Add to your GitHub Actions workflow to catch IaC errors on every PR:
+```yaml
+- name: Check IaC
+  run: |
+    pip install stackfix[gguf]
+    python -c "
+    from huggingface_hub import hf_hub_download
+    hf_hub_download('Tetsuto/iac-repair-3b-gguf', 'iac-repair-3b-q4.gguf', local_dir='.')
+    "
+    stackfix check **/*.tf **/*.yaml
+```
+See [examples/USE_CASES.md](examples/USE_CASES.md) for more deployment scenarios (pre-commit hooks, Lambda, pipeline integration).
+### Pre-Commit Hook
+```yaml
+# .pre-commit-config.yaml
+repos:
+  - repo: local
+    hooks:
+      - id: stackfix
+        name: stackfix
+        entry: stackfix pre-commit --backend gguf --model iac-repair-3b-q4.gguf
+        language: python
+        types_or: [terraform, yaml]
+        additional_dependencies: ['stackfix[gguf]']
+```
+## Benchmark
+188 entries across 8 error categories, 3 difficulty levels, and 2 formats (Terraform + CloudFormation).
+### Results Summary
+| Model | pass@1 | Terraform | CloudFormation | High | Medium | Low |
+|---|---|---|---|---|---|---|
+| **7B v2 fine-tuned** | **0.926** | 0.993 | 0.750 | 0.960 | 0.897 | 0.923 |
+| 3B rank4 fine-tuned | 0.867 | 0.912 | 0.750 | 0.964 | 0.797 | 0.821 |
+| qwen2.5-coder:7b (base) | 0.856 | 0.905 | 0.707 | 0.840 | 0.859 | 0.893 |
+| 0.5B distilled | 0.723 | 0.775 | 0.590 | 0.809 | 0.648 | 0.731 |
+| llama3.2:3b (base) | 0.641 | 0.734 | 0.361 | 0.684 | 0.636 | 0.533 |
+| gemma-4-26b (base) | 0.009 | 0.000 | 0.032 | 0.000 | 0.004 | 0.051 |
+Fine-tuning a 0.5B model outperforms a 26B base model by 80x.
+## Training Data Generation
+Cloud-Gym generates training data via environment inversion:
+1. **Collect** working IaC configs from GitHub, Terraform Registry, AWS samples
+2. **Break** them systematically using a fault taxonomy (28+ fault types across 8 categories)
+3. **Validate** broken configs to capture real error messages
+4. **Pair** (broken + errors) with the original working config as the gold fix
+```bash
+# Generate training data
+cloud-gym taxonomy          # View fault types
+python scripts/scrape.py    # Collect gold configs
+cloud-gym invert            # Generate broken variants
+cloud-gym export            # Export training pairs
+```
+## Project Structure
+```text
+cloudgym/
+  taxonomy/     Fault type definitions (28+ types, 8 categories)
+  scraper/      Gold config collection
+  validator/    IaC validation wrappers (terraform, cfn-lint)
+  inverter/     Fault injection engines
+  generator/    Training data pipeline
+  benchmark/    Evaluation harness
+  fixer/        stackfix CLI tool + model backends
+scripts/        Training, evaluation, and export scripts
+examples/       Broken IaC examples + use case docs
+```
+## Supported Formats
+- **Terraform** (`.tf`) — validated with `terraform validate`
+- **CloudFormation** (`.yaml`, `.yml`, `.json`) — validated with `cfn-lint`
+- **OpenTofu** (`.tf`) — same as Terraform
+## License
+MIT

stackfix-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,156 @@
+# Cloud-Gym
+Scalable Training Data Generation for Infrastructure-as-Code Repair via Environment Inversion.
+Cloud-Gym generates (broken_config, error_message, fix) training pairs for IaC repair by applying **environment inversion** — taking working Terraform, CloudFormation, and OpenTofu configs and systematically breaking them using a defined fault taxonomy. It includes a benchmark (188 entries across 8 error categories) and fine-tuned models that run entirely on CPU.
+## stackfix: AI-Powered IaC Repair
+The `stackfix` CLI tool validates and repairs broken IaC files using fine-tuned local models. No API keys, no cloud costs, no data leaves your machine.
+### Install
+```bash
+pip install stackfix[gguf]
+```
+### Download a Model
+```bash
+# Recommended: 3B Q4 (1.8 GB, 87% pass@1)
+python -c "
+from huggingface_hub import hf_hub_download
+hf_hub_download('Tetsuto/iac-repair-3b-gguf', 'iac-repair-3b-q4.gguf', local_dir='.')
+"
+```
+### Usage
+```bash
+# Check files for errors
+stackfix check main.tf template.yaml
+# Repair a broken file (show diff)
+stackfix repair main.tf --backend gguf --model iac-repair-3b-q4.gguf
+# Repair and apply fix in place
+stackfix repair main.tf --apply --backend gguf --model iac-repair-3b-q4.gguf
+# Explain errors in plain language
+stackfix discuss main.tf --backend gguf --model iac-repair-3b-q4.gguf
+# Pipe mode (stdin/stdout)
+cat broken.tf | stackfix repair - --backend gguf --model iac-repair-3b-q4.gguf > fixed.tf
+# Check all changed IaC files in git
+stackfix git-diff --backend gguf --model iac-repair-3b-q4.gguf
+```
+### Models
+| Model | Size | RAM | Speed (CPU) | pass@1 | HuggingFace |
+|---|---|---|---|---|---|
+| **7B Q4** | 4.5 GB | ~8 GB | ~20 tok/s | **0.926** | [Tetsuto/iac-repair-7b-gguf](https://huggingface.co/Tetsuto/iac-repair-7b-gguf) |
+| **3B Q4** | 1.8 GB | ~4 GB | 49 tok/s | 0.867 | [Tetsuto/iac-repair-3b-gguf](https://huggingface.co/Tetsuto/iac-repair-3b-gguf) |
+| **0.5B Q4** | 379 MB | ~800 MB | 127 tok/s | 0.723 | [Tetsuto/iac-repair-0.5b-gguf](https://huggingface.co/Tetsuto/iac-repair-0.5b-gguf) |
+All models are fine-tuned Qwen2.5-Coder with LoRA, exported to GGUF. They run on any CPU (Linux, macOS, Windows).
+### Backends
+| Backend | Install | Platform | Use Case |
+|---|---|---|---|
+| `gguf` | `pip install stackfix[gguf]` | Any (CPU) | CI/CD, Lambda, servers |
+| `mlx` | `pip install stackfix[mlx]` | Apple Silicon | Local dev on Mac |
+| `ollama` | `pip install stackfix` + Ollama | Any | When Ollama is already running |
+### CI/CD Integration
+Add to your GitHub Actions workflow to catch IaC errors on every PR:
+```yaml
+- name: Check IaC
+  run: |
+    pip install stackfix[gguf]
+    python -c "
+    from huggingface_hub import hf_hub_download
+    hf_hub_download('Tetsuto/iac-repair-3b-gguf', 'iac-repair-3b-q4.gguf', local_dir='.')
+    "
+    stackfix check **/*.tf **/*.yaml
+```
+See [examples/USE_CASES.md](examples/USE_CASES.md) for more deployment scenarios (pre-commit hooks, Lambda, pipeline integration).
+### Pre-Commit Hook
+```yaml
+# .pre-commit-config.yaml
+repos:
+  - repo: local
+    hooks:
+      - id: stackfix
+        name: stackfix
+        entry: stackfix pre-commit --backend gguf --model iac-repair-3b-q4.gguf
+        language: python
+        types_or: [terraform, yaml]
+        additional_dependencies: ['stackfix[gguf]']
+```
+## Benchmark
+188 entries across 8 error categories, 3 difficulty levels, and 2 formats (Terraform + CloudFormation).
+### Results Summary
+| Model | pass@1 | Terraform | CloudFormation | High | Medium | Low |
+|---|---|---|---|---|---|---|
+| **7B v2 fine-tuned** | **0.926** | 0.993 | 0.750 | 0.960 | 0.897 | 0.923 |
+| 3B rank4 fine-tuned | 0.867 | 0.912 | 0.750 | 0.964 | 0.797 | 0.821 |
+| qwen2.5-coder:7b (base) | 0.856 | 0.905 | 0.707 | 0.840 | 0.859 | 0.893 |
+| 0.5B distilled | 0.723 | 0.775 | 0.590 | 0.809 | 0.648 | 0.731 |
+| llama3.2:3b (base) | 0.641 | 0.734 | 0.361 | 0.684 | 0.636 | 0.533 |
+| gemma-4-26b (base) | 0.009 | 0.000 | 0.032 | 0.000 | 0.004 | 0.051 |
+Fine-tuning a 0.5B model outperforms a 26B base model by 80x.
+## Training Data Generation
+Cloud-Gym generates training data via environment inversion:
+1. **Collect** working IaC configs from GitHub, Terraform Registry, AWS samples
+2. **Break** them systematically using a fault taxonomy (28+ fault types across 8 categories)
+3. **Validate** broken configs to capture real error messages
+4. **Pair** (broken + errors) with the original working config as the gold fix
+```bash
+# Generate training data
+cloud-gym taxonomy          # View fault types
+python scripts/scrape.py    # Collect gold configs
+cloud-gym invert            # Generate broken variants
+cloud-gym export            # Export training pairs
+```
+## Project Structure
+```text
+cloudgym/
+  taxonomy/     Fault type definitions (28+ types, 8 categories)
+  scraper/      Gold config collection
+  validator/    IaC validation wrappers (terraform, cfn-lint)
+  inverter/     Fault injection engines
+  generator/    Training data pipeline
+  benchmark/    Evaluation harness
+  fixer/        stackfix CLI tool + model backends
+scripts/        Training, evaluation, and export scripts
+examples/       Broken IaC examples + use case docs
+```
+## Supported Formats
+- **Terraform** (`.tf`) — validated with `terraform validate`
+- **CloudFormation** (`.yaml`, `.yml`, `.json`) — validated with `cfn-lint`
+- **OpenTofu** (`.tf`) — same as Terraform
+## License
+MIT

stackfix-0.1.0/cloudgym/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+"""Cloud-Gym: IaC Repair Benchmark via Environment Inversion."""
+__version__ = "0.1.0"

stackfix-0.1.0/cloudgym/benchmark/__init__.py ADDED Viewed

File without changes

stackfix-0.1.0/cloudgym/benchmark/dataset.py ADDED Viewed

@@ -0,0 +1,188 @@
+"""Benchmark dataset management.
+Curates a balanced subset from the test split for evaluation.
+"""
+from __future__ import annotations
+import json
+import logging
+from dataclasses import dataclass
+from pathlib import Path
+logger = logging.getLogger(__name__)
+@dataclass
+class BenchmarkEntry:
+    """A single benchmark entry."""
+    id: str
+    format: str
+    broken_config: str
+    errors: list[str]
+    warnings: list[str]
+    fault_types: list[str]
+    difficulty: str
+    gold_config: str
+    gold_hash: str
+class BenchmarkDataset:
+    """Manages the curated benchmark dataset."""
+    def __init__(self, path: str | Path):
+        self.path = Path(path)
+        self.entries: list[BenchmarkEntry] = []
+        if self.path.exists():
+            self._load()
+    def _load(self):
+        """Load benchmark entries from JSONL."""
+        with open(self.path) as f:
+            for line in f:
+                data = json.loads(line)
+                self.entries.append(BenchmarkEntry(**{
+                    k: data[k] for k in BenchmarkEntry.__dataclass_fields__
+                    if k in data
+                }))
+        logger.info("Loaded %d benchmark entries from %s", len(self.entries), self.path)
+    def __len__(self) -> int:
+        return len(self.entries)
+    def __iter__(self):
+        return iter(self.entries)
+    @staticmethod
+    def build(
+        test_jsonl: str | Path,
+        output_path: str | Path,
+        target_size: int = 200,
+    ) -> BenchmarkDataset:
+        """Curate a benchmark dataset from the test split.
+        Curation rules:
+        - Single-fault only (one fault type per record)
+        - Balanced across categories and difficulties
+        - Min 10-line configs (non-trivial)
+        - Deduplicated per gold config (max 1 entry per gold hash per fault category)
+        Args:
+            test_jsonl: Path to test.jsonl from format_and_split.
+            output_path: Path to write benchmark.jsonl.
+            target_size: Target number of benchmark entries.
+        Returns:
+            BenchmarkDataset with curated entries.
+        """
+        test_path = Path(test_jsonl)
+        out_path = Path(output_path)
+        out_path.parent.mkdir(parents=True, exist_ok=True)
+        # Load test records
+        records = []
+        with open(test_path) as f:
+            for line in f:
+                records.append(json.loads(line))
+        logger.info("Loaded %d test records for curation", len(records))
+        # Filter: single-fault, min 10 lines
+        candidates = [
+            r for r in records
+            if len(r.get("fault_types", [])) == 1
+            and len(r.get("broken_config", "").splitlines()) >= 10
+            and r.get("errors")  # Must have validation errors
+        ]
+        logger.info("%d candidates after filtering", len(candidates))
+        # Deduplicate: max 1 entry per (gold_hash, fault_category)
+        seen: set[tuple[str, str]] = set()
+        deduped = []
+        for r in candidates:
+            fault_id = r["fault_types"][0]
+            category = fault_id.split(".")[0] if "." in fault_id else fault_id
+            key = (r.get("gold_hash", ""), category)
+            if key not in seen:
+                seen.add(key)
+                deduped.append(r)
+        logger.info("%d after deduplication", len(deduped))
+        # Balance across categories and difficulties
+        selected = _balance_select(deduped, target_size)
+        logger.info("Selected %d entries for benchmark", len(selected))
+        # Write benchmark JSONL
+        with open(out_path, "w") as f:
+            for r in selected:
+                entry = {
+                    "id": r["id"],
+                    "format": r["format"],
+                    "broken_config": r["broken_config"],
+                    "errors": r["errors"],
+                    "warnings": r.get("warnings", []),
+                    "fault_types": r["fault_types"],
+                    "difficulty": r["difficulty"],
+                    "gold_config": r["gold_config"],
+                    "gold_hash": r.get("gold_hash", ""),
+                }
+                f.write(json.dumps(entry) + "\n")
+        # Write metadata
+        meta = {
+            "total_entries": len(selected),
+            "source": str(test_path),
+            "category_distribution": _count_categories(selected),
+            "difficulty_distribution": _count_field(selected, "difficulty"),
+            "format_distribution": _count_field(selected, "format"),
+        }
+        meta_path = out_path.parent / "benchmark_meta.json"
+        with open(meta_path, "w") as f:
+            json.dump(meta, f, indent=2)
+        return BenchmarkDataset(out_path)
+def _balance_select(records: list[dict], target: int) -> list[dict]:
+    """Select records with balanced category/difficulty distribution."""
+    by_category: dict[str, list[dict]] = {}
+    for r in records:
+        fault_id = r["fault_types"][0]
+        cat = fault_id.split(".")[0] if "." in fault_id else fault_id
+        by_category.setdefault(cat, []).append(r)
+    if not by_category:
+        return []
+    per_category = max(1, target // len(by_category))
+    selected = []
+    for cat, cat_records in by_category.items():
+        # Within category, balance by difficulty
+        by_diff: dict[str, list[dict]] = {}
+        for r in cat_records:
+            by_diff.setdefault(r["difficulty"], []).append(r)
+        per_diff = max(1, per_category // max(len(by_diff), 1))
+        for diff, diff_records in by_diff.items():
+            selected.extend(diff_records[:per_diff])
+    return selected[:target]
+def _count_categories(records: list[dict]) -> dict[str, int]:
+    counts: dict[str, int] = {}
+    for r in records:
+        fault_id = r["fault_types"][0]
+        cat = fault_id.split(".")[0] if "." in fault_id else fault_id
+        counts[cat] = counts.get(cat, 0) + 1
+    return counts
+def _count_field(records: list[dict], field: str) -> dict[str, int]:
+    counts: dict[str, int] = {}
+    for r in records:
+        val = r.get(field, "unknown")
+        counts[val] = counts.get(val, 0) + 1
+    return counts