stackfix 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. cloudgym/__init__.py +3 -0
  2. cloudgym/benchmark/__init__.py +0 -0
  3. cloudgym/benchmark/dataset.py +188 -0
  4. cloudgym/benchmark/evaluator.py +275 -0
  5. cloudgym/cli.py +61 -0
  6. cloudgym/fixer/__init__.py +1 -0
  7. cloudgym/fixer/cli.py +521 -0
  8. cloudgym/fixer/detector.py +81 -0
  9. cloudgym/fixer/formatter.py +55 -0
  10. cloudgym/fixer/lambda_handler.py +126 -0
  11. cloudgym/fixer/repairer.py +237 -0
  12. cloudgym/generator/__init__.py +0 -0
  13. cloudgym/generator/formatter.py +142 -0
  14. cloudgym/generator/pipeline.py +271 -0
  15. cloudgym/inverter/__init__.py +0 -0
  16. cloudgym/inverter/_cf_injectors.py +705 -0
  17. cloudgym/inverter/_cf_utils.py +202 -0
  18. cloudgym/inverter/_hcl_utils.py +182 -0
  19. cloudgym/inverter/_tf_injectors.py +641 -0
  20. cloudgym/inverter/_yaml_cf.py +84 -0
  21. cloudgym/inverter/agentic.py +90 -0
  22. cloudgym/inverter/engine.py +258 -0
  23. cloudgym/inverter/programmatic.py +95 -0
  24. cloudgym/scraper/__init__.py +0 -0
  25. cloudgym/scraper/aws_samples.py +159 -0
  26. cloudgym/scraper/github.py +238 -0
  27. cloudgym/scraper/registry.py +165 -0
  28. cloudgym/scraper/validator.py +116 -0
  29. cloudgym/taxonomy/__init__.py +10 -0
  30. cloudgym/taxonomy/base.py +102 -0
  31. cloudgym/taxonomy/cloudformation.py +258 -0
  32. cloudgym/taxonomy/terraform.py +274 -0
  33. cloudgym/utils/__init__.py +0 -0
  34. cloudgym/utils/config.py +57 -0
  35. cloudgym/utils/ollama.py +66 -0
  36. cloudgym/validator/__init__.py +0 -0
  37. cloudgym/validator/cloudformation.py +55 -0
  38. cloudgym/validator/opentofu.py +103 -0
  39. cloudgym/validator/terraform.py +115 -0
  40. stackfix-0.1.0.dist-info/METADATA +182 -0
  41. stackfix-0.1.0.dist-info/RECORD +44 -0
  42. stackfix-0.1.0.dist-info/WHEEL +4 -0
  43. stackfix-0.1.0.dist-info/entry_points.txt +3 -0
  44. stackfix-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,202 @@
1
+ """CloudFormation dict manipulation helpers for fault injection."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+
8
+ # Required properties by resource type (non-exhaustive, covers common resources)
9
+ REQUIRED_PROPERTIES: dict[str, list[str]] = {
10
+ "AWS::EC2::Instance": ["ImageId"],
11
+ "AWS::EC2::SecurityGroup": ["GroupDescription"],
12
+ "AWS::EC2::Subnet": ["VpcId", "CidrBlock"],
13
+ "AWS::EC2::VPC": ["CidrBlock"],
14
+ "AWS::EC2::InternetGateway": [],
15
+ "AWS::EC2::RouteTable": ["VpcId"],
16
+ "AWS::EC2::Route": ["RouteTableId"],
17
+ "AWS::S3::Bucket": [],
18
+ "AWS::RDS::DBInstance": ["DBInstanceClass", "Engine"],
19
+ "AWS::Lambda::Function": ["Code", "Handler", "Role", "Runtime"],
20
+ "AWS::IAM::Role": ["AssumeRolePolicyDocument"],
21
+ "AWS::IAM::Policy": ["PolicyDocument", "PolicyName"],
22
+ "AWS::SNS::Topic": [],
23
+ "AWS::SQS::Queue": [],
24
+ "AWS::DynamoDB::Table": ["KeySchema", "AttributeDefinitions"],
25
+ "AWS::ECS::Cluster": [],
26
+ "AWS::ECS::TaskDefinition": ["ContainerDefinitions"],
27
+ "AWS::ECS::Service": ["TaskDefinition"],
28
+ "AWS::ElasticLoadBalancingV2::LoadBalancer": [],
29
+ "AWS::ElasticLoadBalancingV2::TargetGroup": [],
30
+ "AWS::AutoScaling::AutoScalingGroup": ["MinSize", "MaxSize"],
31
+ "AWS::AutoScaling::LaunchConfiguration": ["ImageId", "InstanceType"],
32
+ "AWS::CloudWatch::Alarm": [
33
+ "ComparisonOperator", "EvaluationPeriods", "MetricName",
34
+ "Namespace", "Period", "Statistic", "Threshold",
35
+ ],
36
+ }
37
+
38
+ # Common resource type typos for injection
39
+ RESOURCE_TYPE_TYPOS: dict[str, str] = {
40
+ "AWS::EC2::Instance": "AWS::EC2::VirtualMachine",
41
+ "AWS::S3::Bucket": "AWS::S3::Storage",
42
+ "AWS::Lambda::Function": "AWS::Lambda::Lambda",
43
+ "AWS::RDS::DBInstance": "AWS::RDS::Database",
44
+ "AWS::IAM::Role": "AWS::IAM::ServiceRole",
45
+ "AWS::EC2::SecurityGroup": "AWS::EC2::FirewallGroup",
46
+ "AWS::EC2::VPC": "AWS::EC2::VirtualPrivateCloud",
47
+ "AWS::DynamoDB::Table": "AWS::DynamoDB::Database",
48
+ "AWS::SNS::Topic": "AWS::SNS::Notification",
49
+ "AWS::SQS::Queue": "AWS::SQS::MessageQueue",
50
+ }
51
+
52
+
53
+ def find_refs(template: dict) -> list[tuple[str, list[str]]]:
54
+ """Find all !Ref / Fn::Ref targets in a template.
55
+
56
+ Returns list of (ref_target, json_path) pairs.
57
+ """
58
+ results: list[tuple[str, list[str]]] = []
59
+ _walk(template, [], lambda path, k, v: (
60
+ results.append((v, list(path) + [k]))
61
+ if k in ("Ref", "Fn::Ref") and isinstance(v, str)
62
+ else None
63
+ ))
64
+ return results
65
+
66
+
67
+ def find_getatt(template: dict) -> list[tuple[list, list[str]]]:
68
+ """Find all !GetAtt / Fn::GetAtt targets.
69
+
70
+ Returns list of (getatt_value, json_path) pairs.
71
+ """
72
+ results: list[tuple[list, list[str]]] = []
73
+
74
+ def visitor(path: list, key: str, value: Any) -> None:
75
+ if key in ("GetAtt", "Fn::GetAtt"):
76
+ results.append((value, list(path) + [key]))
77
+
78
+ _walk(template, [], visitor)
79
+ return results
80
+
81
+
82
+ def find_subs(template: dict) -> list[tuple[Any, list[str]]]:
83
+ """Find all !Sub / Fn::Sub expressions."""
84
+ results: list[tuple[Any, list[str]]] = []
85
+
86
+ def visitor(path: list, key: str, value: Any) -> None:
87
+ if key in ("Sub", "Fn::Sub"):
88
+ results.append((value, list(path) + [key]))
89
+
90
+ _walk(template, [], visitor)
91
+ return results
92
+
93
+
94
+ def find_selects(template: dict) -> list[tuple[Any, list[str]]]:
95
+ """Find all !Select / Fn::Select expressions."""
96
+ results: list[tuple[Any, list[str]]] = []
97
+
98
+ def visitor(path: list, key: str, value: Any) -> None:
99
+ if key in ("Select", "Fn::Select"):
100
+ results.append((value, list(path) + [key]))
101
+
102
+ _walk(template, [], visitor)
103
+ return results
104
+
105
+
106
+ def find_ifs(template: dict) -> list[tuple[Any, list[str]]]:
107
+ """Find all !If / Fn::If expressions."""
108
+ results: list[tuple[Any, list[str]]] = []
109
+
110
+ def visitor(path: list, key: str, value: Any) -> None:
111
+ if key in ("If", "Fn::If"):
112
+ results.append((value, list(path) + [key]))
113
+
114
+ _walk(template, [], visitor)
115
+ return results
116
+
117
+
118
+ def find_joins(template: dict) -> list[tuple[Any, list[str]]]:
119
+ """Find all !Join / Fn::Join expressions."""
120
+ results: list[tuple[Any, list[str]]] = []
121
+
122
+ def visitor(path: list, key: str, value: Any) -> None:
123
+ if key in ("Join", "Fn::Join"):
124
+ results.append((value, list(path) + [key]))
125
+
126
+ _walk(template, [], visitor)
127
+ return results
128
+
129
+
130
+ def get_resource_logical_ids(template: dict) -> list[str]:
131
+ """Get all logical IDs from the Resources section."""
132
+ resources = template.get("Resources", {})
133
+ if isinstance(resources, dict):
134
+ return list(resources.keys())
135
+ return []
136
+
137
+
138
+ def get_parameter_names(template: dict) -> list[str]:
139
+ """Get all parameter names from the Parameters section."""
140
+ params = template.get("Parameters", {})
141
+ if isinstance(params, dict):
142
+ return list(params.keys())
143
+ return []
144
+
145
+
146
+ def get_condition_names(template: dict) -> list[str]:
147
+ """Get all condition names from the Conditions section."""
148
+ conditions = template.get("Conditions", {})
149
+ if isinstance(conditions, dict):
150
+ return list(conditions.keys())
151
+ return []
152
+
153
+
154
+ def get_resource_type(template: dict, logical_id: str) -> str | None:
155
+ """Get the Type of a resource by logical ID."""
156
+ resources = template.get("Resources", {})
157
+ resource = resources.get(logical_id, {})
158
+ return resource.get("Type")
159
+
160
+
161
+ def set_nested(d: dict, path: list[str], value: Any) -> None:
162
+ """Set a value at a nested path in a dict."""
163
+ for key in path[:-1]:
164
+ if isinstance(d, dict):
165
+ d = d.setdefault(key, {})
166
+ elif isinstance(d, list) and key.isdigit():
167
+ d = d[int(key)]
168
+ else:
169
+ return
170
+ if isinstance(d, dict) and path:
171
+ d[path[-1]] = value
172
+
173
+
174
+ def get_nested(d: dict, path: list[str]) -> Any:
175
+ """Get a value at a nested path in a dict."""
176
+ for key in path:
177
+ if isinstance(d, dict):
178
+ d = d.get(key)
179
+ elif isinstance(d, list) and key.isdigit():
180
+ idx = int(key)
181
+ d = d[idx] if idx < len(d) else None
182
+ else:
183
+ return None
184
+ if d is None:
185
+ return None
186
+ return d
187
+
188
+
189
+ def walk_template(template: dict, visitor_fn: Any) -> None:
190
+ """Recursively walk a CF template dict, calling visitor_fn(path, key, value)."""
191
+ _walk(template, [], visitor_fn)
192
+
193
+
194
+ def _walk(obj: Any, path: list, visitor: Any) -> None:
195
+ """Internal recursive walker."""
196
+ if isinstance(obj, dict):
197
+ for key, value in obj.items():
198
+ visitor(path, key, value)
199
+ _walk(value, path + [key], visitor)
200
+ elif isinstance(obj, list):
201
+ for i, item in enumerate(obj):
202
+ _walk(item, path + [str(i)], visitor)
@@ -0,0 +1,182 @@
1
+ """HCL text manipulation helpers for Terraform fault injection.
2
+
3
+ python-hcl2 is read-only (parses HCL to dicts but can't write back),
4
+ so we use a parse-then-regex approach: parse to understand structure,
5
+ then do targeted string manipulation on the raw text.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import re
11
+
12
+
13
+ def find_block_boundaries(
14
+ text: str, block_type: str, block_name: str | None = None
15
+ ) -> list[tuple[int, int]]:
16
+ """Find start/end character offsets of HCL blocks by brace-depth counting.
17
+
18
+ Args:
19
+ text: Raw HCL text.
20
+ block_type: e.g. "resource", "variable", "provider", "terraform".
21
+ block_name: Optional label to match (e.g. "aws_instance" or "\"main\"").
22
+
23
+ Returns:
24
+ List of (start_offset, end_offset) tuples for matching blocks.
25
+ """
26
+ results = []
27
+ # Match block headers like: resource "aws_instance" "main" {
28
+ if block_name:
29
+ pattern = re.compile(
30
+ rf'^[ \t]*{re.escape(block_type)}\s+["\']?{re.escape(block_name)}["\']?'
31
+ r'(?:\s+["\'][^"\']*["\'])?\s*\{',
32
+ re.MULTILINE,
33
+ )
34
+ else:
35
+ pattern = re.compile(
36
+ rf'^[ \t]*{re.escape(block_type)}\s+.*?\{{',
37
+ re.MULTILINE,
38
+ )
39
+
40
+ for match in pattern.finditer(text):
41
+ start = match.start()
42
+ brace_pos = match.end() - 1 # Position of opening brace
43
+ depth = 1
44
+ pos = brace_pos + 1
45
+
46
+ while pos < len(text) and depth > 0:
47
+ ch = text[pos]
48
+ if ch == '{':
49
+ depth += 1
50
+ elif ch == '}':
51
+ depth -= 1
52
+ elif ch == '"':
53
+ # Skip string content
54
+ pos += 1
55
+ while pos < len(text) and text[pos] != '"':
56
+ if text[pos] == '\\':
57
+ pos += 1
58
+ pos += 1
59
+ elif ch == '#':
60
+ # Skip line comment
61
+ while pos < len(text) and text[pos] != '\n':
62
+ pos += 1
63
+ pos += 1
64
+
65
+ if depth == 0:
66
+ results.append((start, pos))
67
+
68
+ return results
69
+
70
+
71
+ def find_attribute_line(
72
+ text: str, block_start: int, block_end: int, attr_name: str
73
+ ) -> int | None:
74
+ """Find the line number of a specific attribute assignment within a block.
75
+
76
+ Returns 0-based line number or None if not found.
77
+ """
78
+ block_text = text[block_start:block_end]
79
+ lines = text[:block_start].count('\n')
80
+
81
+ for i, line in enumerate(block_text.split('\n')):
82
+ stripped = line.strip()
83
+ # Match attr = value or attr= value patterns
84
+ if re.match(rf'{re.escape(attr_name)}\s*=', stripped):
85
+ return lines + i
86
+
87
+ return None
88
+
89
+
90
+ def remove_lines(text: str, start_line: int, end_line: int) -> str:
91
+ """Remove line range [start_line, end_line] (0-based, inclusive)."""
92
+ lines = text.split('\n')
93
+ result = lines[:start_line] + lines[end_line + 1:]
94
+ return '\n'.join(result)
95
+
96
+
97
+ def replace_value(text: str, line_num: int, old_val: str, new_val: str) -> str:
98
+ """Replace a value on a specific line (0-based)."""
99
+ lines = text.split('\n')
100
+ if 0 <= line_num < len(lines):
101
+ lines[line_num] = lines[line_num].replace(old_val, new_val, 1)
102
+ return '\n'.join(lines)
103
+
104
+
105
+ def find_all_attributes(text: str, block_start: int, block_end: int) -> list[tuple[str, int]]:
106
+ """Find all attribute assignments within a block.
107
+
108
+ Returns list of (attr_name, line_number) tuples.
109
+ """
110
+ block_text = text[block_start:block_end]
111
+ base_line = text[:block_start].count('\n')
112
+ attrs = []
113
+ depth = 0
114
+
115
+ for i, line in enumerate(block_text.split('\n')):
116
+ stripped = line.strip()
117
+ depth += stripped.count('{') - stripped.count('}')
118
+ if depth <= 1: # Only top-level attributes of this block
119
+ m = re.match(r'(\w+)\s*=', stripped)
120
+ if m:
121
+ attrs.append((m.group(1), base_line + i))
122
+
123
+ return attrs
124
+
125
+
126
+ def find_resource_blocks(text: str) -> list[tuple[str, str, int, int]]:
127
+ """Find all resource blocks and return (type, name, start, end) tuples."""
128
+ results = []
129
+ pattern = re.compile(
130
+ r'^[ \t]*resource\s+"([^"]+)"\s+"([^"]+)"\s*\{',
131
+ re.MULTILINE,
132
+ )
133
+
134
+ for match in pattern.finditer(text):
135
+ res_type = match.group(1)
136
+ res_name = match.group(2)
137
+ start = match.start()
138
+ brace_pos = match.end() - 1
139
+ depth = 1
140
+ pos = brace_pos + 1
141
+
142
+ while pos < len(text) and depth > 0:
143
+ ch = text[pos]
144
+ if ch == '{':
145
+ depth += 1
146
+ elif ch == '}':
147
+ depth -= 1
148
+ elif ch == '"':
149
+ pos += 1
150
+ while pos < len(text) and text[pos] != '"':
151
+ if text[pos] == '\\':
152
+ pos += 1
153
+ pos += 1
154
+ pos += 1
155
+
156
+ if depth == 0:
157
+ results.append((res_type, res_name, start, pos))
158
+
159
+ return results
160
+
161
+
162
+ def find_variable_refs(text: str) -> list[tuple[str, int]]:
163
+ """Find all var.X references and return (var_name, offset) pairs."""
164
+ results = []
165
+ for m in re.finditer(r'var\.(\w+)', text):
166
+ results.append((m.group(1), m.start()))
167
+ return results
168
+
169
+
170
+ def find_resource_refs(text: str) -> list[tuple[str, str, int]]:
171
+ """Find all resource_type.resource_name references.
172
+
173
+ Returns (resource_type, resource_name, offset) triples.
174
+ """
175
+ results = []
176
+ # Match patterns like aws_instance.main.id or aws_vpc.default.id
177
+ for m in re.finditer(r'(\w+\.\w+)\.(\w+)', text):
178
+ full_ref = m.group(1)
179
+ parts = full_ref.split('.')
180
+ if len(parts) == 2 and not parts[0].startswith('var'):
181
+ results.append((parts[0], parts[1], m.start()))
182
+ return results