terraformgraph 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,341 @@
1
+ """
2
+ Terraform HCL Parser
3
+
4
+ Parses Terraform files and extracts AWS resources and their relationships.
5
+ """
6
+
7
+ import re
8
+ from dataclasses import dataclass, field
9
+ from pathlib import Path
10
+ from typing import Any, Dict, List, Optional
11
+
12
+ import hcl2
13
+
14
+
15
+ @dataclass
16
+ class TerraformResource:
17
+ """Represents a parsed Terraform resource."""
18
+ resource_type: str
19
+ resource_name: str
20
+ module_path: str
21
+ attributes: Dict[str, Any]
22
+ source_file: str
23
+ count: Optional[int] = None
24
+ for_each: bool = False
25
+
26
+ @property
27
+ def full_id(self) -> str:
28
+ """Unique identifier for this resource."""
29
+ if self.module_path:
30
+ return f"{self.module_path}.{self.resource_type}.{self.resource_name}"
31
+ return f"{self.resource_type}.{self.resource_name}"
32
+
33
+ @property
34
+ def display_name(self) -> str:
35
+ """Human-readable name for display."""
36
+ name = self.attributes.get('name', self.resource_name)
37
+ if isinstance(name, str) and '${' not in name:
38
+ return name
39
+ return self.resource_name
40
+
41
+
42
+ @dataclass
43
+ class ModuleCall:
44
+ """Represents a module instantiation."""
45
+ name: str
46
+ source: str
47
+ inputs: Dict[str, Any]
48
+ source_file: str
49
+
50
+
51
+ @dataclass
52
+ class ResourceRelationship:
53
+ """Represents a connection between resources."""
54
+ source_id: str
55
+ target_id: str
56
+ relationship_type: str
57
+ label: Optional[str] = None
58
+
59
+
60
+ @dataclass
61
+ class ParseResult:
62
+ """Result of parsing Terraform files."""
63
+ resources: List[TerraformResource] = field(default_factory=list)
64
+ modules: List[ModuleCall] = field(default_factory=list)
65
+ relationships: List[ResourceRelationship] = field(default_factory=list)
66
+
67
+
68
+ class TerraformParser:
69
+ """Parses Terraform HCL files and extracts resources."""
70
+
71
+ REFERENCE_PATTERNS = [
72
+ # module.X.output
73
+ (r'module\.(\w+)\.(\w+)', 'module_ref'),
74
+ # aws_resource.name.attribute
75
+ (r'(aws_\w+)\.(\w+)\.(\w+)', 'resource_ref'),
76
+ # var.X
77
+ (r'var\.(\w+)', 'var_ref'),
78
+ # local.X
79
+ (r'local\.(\w+)', 'local_ref'),
80
+ ]
81
+
82
+ RELATIONSHIP_EXTRACTORS = {
83
+ 'vpc_id': ('belongs_to_vpc', 'aws_vpc'),
84
+ 'subnet_id': ('deployed_in_subnet', 'aws_subnet'),
85
+ 'subnet_ids': ('deployed_in_subnets', 'aws_subnet'),
86
+ 'security_group_ids': ('uses_security_group', 'aws_security_group'),
87
+ 'kms_master_key_id': ('encrypted_by', 'aws_kms_key'),
88
+ 'kms_key_id': ('encrypted_by', 'aws_kms_key'),
89
+ 'target_group_arn': ('routes_to', 'aws_lb_target_group'),
90
+ 'load_balancer_arn': ('attached_to', 'aws_lb'),
91
+ 'web_acl_arn': ('protected_by', 'aws_wafv2_web_acl'),
92
+ 'waf_acl_arn': ('protected_by', 'aws_wafv2_web_acl'),
93
+ 'certificate_arn': ('uses_certificate', 'aws_acm_certificate'),
94
+ 'role_arn': ('assumes_role', 'aws_iam_role'),
95
+ 'queue_arn': ('sends_to_queue', 'aws_sqs_queue'),
96
+ 'topic_arn': ('publishes_to', 'aws_sns_topic'),
97
+ 'alarm_topic_arn': ('alerts_to', 'aws_sns_topic'),
98
+ }
99
+
100
+ def __init__(self, infrastructure_path: str, icons_path: Optional[str] = None):
101
+ self.infrastructure_path = Path(infrastructure_path)
102
+ self.icons_path = Path(icons_path) if icons_path else None
103
+ self._parsed_modules: Dict[str, ParseResult] = {}
104
+
105
+ def parse_environment(self, environment: str) -> ParseResult:
106
+ """Parse all Terraform files for a specific environment."""
107
+ env_path = self.infrastructure_path / environment
108
+ if not env_path.exists():
109
+ raise ValueError(f"Environment path not found: {env_path}")
110
+
111
+ return self.parse_directory(env_path)
112
+
113
+ def parse_directory(self, directory: Path) -> ParseResult:
114
+ """Parse all Terraform files in a directory (non-environment mode).
115
+
116
+ Args:
117
+ directory: Path to directory containing .tf files
118
+
119
+ Returns:
120
+ ParseResult with all resources and relationships
121
+ """
122
+ if isinstance(directory, str):
123
+ directory = Path(directory)
124
+
125
+ if not directory.exists():
126
+ raise ValueError(f"Directory does not exist: {directory}")
127
+
128
+ result = ParseResult()
129
+
130
+ # Parse all .tf files in directory
131
+ tf_files = list(directory.glob("*.tf"))
132
+ if not tf_files:
133
+ print(f"Warning: No .tf files found in {directory}")
134
+
135
+ for tf_file in tf_files:
136
+ self._parse_file(tf_file, result, module_path="")
137
+
138
+ # Parse referenced modules
139
+ modules_to_parse = list(result.modules)
140
+ for module in modules_to_parse:
141
+ module_result = self._parse_module(module.source, directory, module.name)
142
+ result.resources.extend(module_result.resources)
143
+ result.relationships.extend(module_result.relationships)
144
+
145
+ # Extract relationships from all resources
146
+ self._extract_relationships(result)
147
+
148
+ return result
149
+
150
+ def _parse_file(self, file_path: Path, result: ParseResult, module_path: str) -> None:
151
+ """Parse a single Terraform file."""
152
+ try:
153
+ with open(file_path, 'r') as f:
154
+ content = hcl2.load(f)
155
+ except Exception as e:
156
+ print(f"Warning: Could not parse {file_path}: {e}")
157
+ return
158
+
159
+ # Extract resources
160
+ for resource_block in content.get('resource', []):
161
+ for resource_type, resources in resource_block.items():
162
+ for resource_name, config in resources.items():
163
+ # Handle list configs (HCL2 can return lists)
164
+ if isinstance(config, list):
165
+ config = config[0] if config else {}
166
+
167
+ resource = TerraformResource(
168
+ resource_type=resource_type,
169
+ resource_name=resource_name,
170
+ module_path=module_path,
171
+ attributes=config,
172
+ source_file=str(file_path),
173
+ count=self._extract_count(config),
174
+ for_each='for_each' in config
175
+ )
176
+ result.resources.append(resource)
177
+
178
+ # Extract module calls
179
+ for module_block in content.get('module', []):
180
+ for module_name, config in module_block.items():
181
+ if isinstance(config, list):
182
+ config = config[0] if config else {}
183
+
184
+ source = config.get('source', '')
185
+ module = ModuleCall(
186
+ name=module_name,
187
+ source=source,
188
+ inputs=config,
189
+ source_file=str(file_path)
190
+ )
191
+ result.modules.append(module)
192
+
193
+ def _parse_module(self, source: str, base_path: Path, module_name: str) -> ParseResult:
194
+ """Parse a module from its source path."""
195
+ # Resolve relative path
196
+ if source.startswith('../') or source.startswith('./'):
197
+ module_path = (base_path / source).resolve()
198
+ else:
199
+ module_path = self.infrastructure_path / '.modules' / source
200
+
201
+ if not module_path.exists():
202
+ print(f"Warning: Module path not found: {module_path}")
203
+ return ParseResult()
204
+
205
+ # Check cache
206
+ cache_key = str(module_path)
207
+ if cache_key in self._parsed_modules:
208
+ # Return a copy with updated module paths
209
+ cached = self._parsed_modules[cache_key]
210
+ result = ParseResult()
211
+ for res in cached.resources:
212
+ new_res = TerraformResource(
213
+ resource_type=res.resource_type,
214
+ resource_name=res.resource_name,
215
+ module_path=module_name,
216
+ attributes=res.attributes,
217
+ source_file=res.source_file,
218
+ count=res.count,
219
+ for_each=res.for_each
220
+ )
221
+ result.resources.append(new_res)
222
+ return result
223
+
224
+ result = ParseResult()
225
+ for tf_file in module_path.glob("*.tf"):
226
+ self._parse_file(tf_file, result, module_path=module_name)
227
+
228
+ self._parsed_modules[cache_key] = result
229
+ return result
230
+
231
+ def _extract_count(self, config: Dict[str, Any]) -> Optional[int]:
232
+ """Extract count value from resource config."""
233
+ count = config.get('count')
234
+ if count is None:
235
+ return None
236
+ if isinstance(count, int):
237
+ return count
238
+ if isinstance(count, str):
239
+ # Try to parse simple numbers
240
+ try:
241
+ return int(count)
242
+ except ValueError:
243
+ # Complex expression, return -1 to indicate "multiple"
244
+ return -1
245
+ return None
246
+
247
+ def _extract_relationships(self, result: ParseResult) -> None:
248
+ """Extract relationships between resources."""
249
+ type_index: Dict[str, List[TerraformResource]] = {}
250
+ for r in result.resources:
251
+ type_index.setdefault(r.resource_type, []).append(r)
252
+
253
+ for resource in result.resources:
254
+ # Check for DLQ redrive policy
255
+ self._extract_dlq_relationship(resource, result, type_index)
256
+
257
+ # Check standard attribute references
258
+ for attr_name, (rel_type, target_type) in self.RELATIONSHIP_EXTRACTORS.items():
259
+ value = resource.attributes.get(attr_name)
260
+ if value:
261
+ targets = self._find_referenced_resources(value, target_type, type_index)
262
+ for target in targets:
263
+ result.relationships.append(ResourceRelationship(
264
+ source_id=resource.full_id,
265
+ target_id=target.full_id,
266
+ relationship_type=rel_type
267
+ ))
268
+
269
+ def _extract_dlq_relationship(
270
+ self,
271
+ resource: TerraformResource,
272
+ result: ParseResult,
273
+ type_index: Dict[str, List[TerraformResource]]
274
+ ) -> None:
275
+ """Extract SQS dead letter queue relationships."""
276
+ if resource.resource_type != 'aws_sqs_queue':
277
+ return
278
+
279
+ redrive = resource.attributes.get('redrive_policy')
280
+ if not redrive:
281
+ return
282
+
283
+ # Parse redrive policy (could be string or dict)
284
+ if isinstance(redrive, str):
285
+ # Try to find DLQ reference in string
286
+ match = re.search(r'aws_sqs_queue\.(\w+)\.arn', redrive)
287
+ if match:
288
+ dlq_name = match.group(1)
289
+ for queue in type_index.get('aws_sqs_queue', []):
290
+ if queue.resource_name == dlq_name:
291
+ result.relationships.append(ResourceRelationship(
292
+ source_id=resource.full_id,
293
+ target_id=queue.full_id,
294
+ relationship_type='redrives_to',
295
+ label='DLQ'
296
+ ))
297
+ break
298
+
299
+ def _find_referenced_resources(
300
+ self,
301
+ value: Any,
302
+ target_type: str,
303
+ type_index: Dict[str, List[TerraformResource]]
304
+ ) -> List[TerraformResource]:
305
+ """Find resources referenced in a value."""
306
+ results = []
307
+ value_str = str(value)
308
+
309
+ # Look for resource references
310
+ pattern = rf'{target_type}\.(\w+)\.'
311
+ for match in re.finditer(pattern, value_str):
312
+ res_name = match.group(1)
313
+ for res in type_index.get(target_type, []):
314
+ if res.resource_name == res_name:
315
+ results.append(res)
316
+ break
317
+
318
+ # Look for module references
319
+ module_pattern = r'module\.(\w+)\.(\w+)'
320
+ for match in re.finditer(module_pattern, value_str):
321
+ module_name = match.group(1)
322
+ # Find resources in that module
323
+ for res in type_index.get(target_type, []):
324
+ if res.module_path == module_name:
325
+ results.append(res)
326
+ break
327
+
328
+ return results
329
+
330
+
331
+ def get_resource_summary(result: ParseResult) -> Dict[str, int]:
332
+ """Get a summary count of resources by type."""
333
+ summary: Dict[str, int] = {}
334
+ for resource in result.resources:
335
+ count = 1
336
+ if resource.count and resource.count > 0:
337
+ count = resource.count
338
+ elif resource.for_each:
339
+ count = 1 # Unknown, but at least 1
340
+ summary[resource.resource_type] = summary.get(resource.resource_type, 0) + count
341
+ return summary