terraformgraph 1.0.3__py3-none-any.whl → 1.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
terraformgraph/parser.py CHANGED
@@ -8,9 +8,14 @@ import logging
8
8
  import re
9
9
  from dataclasses import dataclass, field
10
10
  from pathlib import Path
11
- from typing import Any, Dict, List, Optional
11
+ from typing import TYPE_CHECKING, Any, Dict, List, Optional
12
12
 
13
13
  import hcl2
14
+ from lark.exceptions import UnexpectedInput, UnexpectedToken
15
+
16
+ if TYPE_CHECKING:
17
+ from terraformgraph.terraform_tools import TerraformStateResult
18
+ from terraformgraph.variable_resolver import VariableResolver
14
19
 
15
20
  logger = logging.getLogger(__name__)
16
21
 
@@ -18,6 +23,7 @@ logger = logging.getLogger(__name__)
18
23
  @dataclass
19
24
  class TerraformResource:
20
25
  """Represents a parsed Terraform resource."""
26
+
21
27
  resource_type: str
22
28
  resource_name: str
23
29
  module_path: str
@@ -36,15 +42,33 @@ class TerraformResource:
36
42
  @property
37
43
  def display_name(self) -> str:
38
44
  """Human-readable name for display."""
39
- name = self.attributes.get('name', self.resource_name)
40
- if isinstance(name, str) and '${' not in name:
45
+ name = self.attributes.get("name", self.resource_name)
46
+ if isinstance(name, str) and "${" not in name:
41
47
  return name
42
48
  return self.resource_name
43
49
 
50
+ def get_resolved_display_name(self, resolver: "VariableResolver") -> str:
51
+ """Get display name with interpolations resolved and truncated.
52
+
53
+ Args:
54
+ resolver: VariableResolver instance for resolving interpolations
55
+
56
+ Returns:
57
+ Resolved and truncated display name
58
+ """
59
+ from terraformgraph.variable_resolver import VariableResolver
60
+
61
+ name = self.attributes.get("name", self.resource_name)
62
+ if isinstance(name, str):
63
+ resolved_name = resolver.resolve(name)
64
+ return VariableResolver.truncate_name(resolved_name)
65
+ return VariableResolver.truncate_name(self.resource_name)
66
+
44
67
 
45
68
  @dataclass
46
69
  class ModuleCall:
47
70
  """Represents a module instantiation."""
71
+
48
72
  name: str
49
73
  source: str
50
74
  inputs: Dict[str, Any]
@@ -54,6 +78,7 @@ class ModuleCall:
54
78
  @dataclass
55
79
  class ResourceRelationship:
56
80
  """Represents a connection between resources."""
81
+
57
82
  source_id: str
58
83
  target_id: str
59
84
  relationship_type: str
@@ -63,6 +88,7 @@ class ResourceRelationship:
63
88
  @dataclass
64
89
  class ParseResult:
65
90
  """Result of parsing Terraform files."""
91
+
66
92
  resources: List[TerraformResource] = field(default_factory=list)
67
93
  modules: List[ModuleCall] = field(default_factory=list)
68
94
  relationships: List[ResourceRelationship] = field(default_factory=list)
@@ -71,39 +97,37 @@ class ParseResult:
71
97
  class TerraformParser:
72
98
  """Parses Terraform HCL files and extracts resources."""
73
99
 
74
- REFERENCE_PATTERNS = [
75
- # module.X.output
76
- (r'module\.(\w+)\.(\w+)', 'module_ref'),
77
- # aws_resource.name.attribute
78
- (r'(aws_\w+)\.(\w+)\.(\w+)', 'resource_ref'),
79
- # var.X
80
- (r'var\.(\w+)', 'var_ref'),
81
- # local.X
82
- (r'local\.(\w+)', 'local_ref'),
83
- ]
84
-
85
100
  RELATIONSHIP_EXTRACTORS = {
86
- 'vpc_id': ('belongs_to_vpc', 'aws_vpc'),
87
- 'subnet_id': ('deployed_in_subnet', 'aws_subnet'),
88
- 'subnet_ids': ('deployed_in_subnets', 'aws_subnet'),
89
- 'security_group_ids': ('uses_security_group', 'aws_security_group'),
90
- 'kms_master_key_id': ('encrypted_by', 'aws_kms_key'),
91
- 'kms_key_id': ('encrypted_by', 'aws_kms_key'),
92
- 'target_group_arn': ('routes_to', 'aws_lb_target_group'),
93
- 'load_balancer_arn': ('attached_to', 'aws_lb'),
94
- 'web_acl_arn': ('protected_by', 'aws_wafv2_web_acl'),
95
- 'waf_acl_arn': ('protected_by', 'aws_wafv2_web_acl'),
96
- 'certificate_arn': ('uses_certificate', 'aws_acm_certificate'),
97
- 'role_arn': ('assumes_role', 'aws_iam_role'),
98
- 'queue_arn': ('sends_to_queue', 'aws_sqs_queue'),
99
- 'topic_arn': ('publishes_to', 'aws_sns_topic'),
100
- 'alarm_topic_arn': ('alerts_to', 'aws_sns_topic'),
101
+ "vpc_id": ("belongs_to_vpc", "aws_vpc"),
102
+ "subnet_id": ("deployed_in_subnet", "aws_subnet"),
103
+ "subnet_ids": ("deployed_in_subnets", "aws_subnet"),
104
+ "security_group_ids": ("uses_security_group", "aws_security_group"),
105
+ "vpc_security_group_ids": ("uses_security_group", "aws_security_group"),
106
+ "security_groups": ("uses_security_group", "aws_security_group"),
107
+ "kms_master_key_id": ("encrypted_by", "aws_kms_key"),
108
+ "kms_key_id": ("encrypted_by", "aws_kms_key"),
109
+ "target_group_arn": ("routes_to", "aws_lb_target_group"),
110
+ "load_balancer_arn": ("attached_to", "aws_lb"),
111
+ "web_acl_arn": ("protected_by", "aws_wafv2_web_acl"),
112
+ "waf_acl_arn": ("protected_by", "aws_wafv2_web_acl"),
113
+ "certificate_arn": ("uses_certificate", "aws_acm_certificate"),
114
+ "role_arn": ("assumes_role", "aws_iam_role"),
115
+ "queue_arn": ("sends_to_queue", "aws_sqs_queue"),
116
+ "topic_arn": ("publishes_to", "aws_sns_topic"),
117
+ "alarm_topic_arn": ("alerts_to", "aws_sns_topic"),
101
118
  }
102
119
 
103
- def __init__(self, infrastructure_path: str, icons_path: Optional[str] = None):
120
+ def __init__(
121
+ self,
122
+ infrastructure_path: str,
123
+ use_terraform_state: bool = False,
124
+ state_file: Optional[str] = None,
125
+ ):
104
126
  self.infrastructure_path = Path(infrastructure_path)
105
- self.icons_path = Path(icons_path) if icons_path else None
106
127
  self._parsed_modules: Dict[str, ParseResult] = {}
128
+ self.use_terraform_state = use_terraform_state
129
+ self.state_file = Path(state_file) if state_file else None
130
+ self._state_result: Optional["TerraformStateResult"] = None
107
131
 
108
132
  def parse_environment(self, environment: str) -> ParseResult:
109
133
  """Parse all Terraform files for a specific environment."""
@@ -122,9 +146,6 @@ class TerraformParser:
122
146
  Returns:
123
147
  ParseResult with all resources and relationships
124
148
  """
125
- if isinstance(directory, str):
126
- directory = Path(directory)
127
-
128
149
  if not directory.exists():
129
150
  raise ValueError(f"Directory does not exist: {directory}")
130
151
 
@@ -148,19 +169,60 @@ class TerraformParser:
148
169
  # Extract relationships from all resources
149
170
  self._extract_relationships(result)
150
171
 
172
+ # Enhance with terraform state if requested
173
+ if self.use_terraform_state:
174
+ self._enhance_with_terraform_state(result, directory)
175
+
151
176
  return result
152
177
 
178
+ def _enhance_with_terraform_state(self, result: ParseResult, directory: Path) -> None:
179
+ """Enhance parse result with data from terraform state."""
180
+ from terraformgraph.terraform_tools import TerraformToolsRunner
181
+
182
+ runner = TerraformToolsRunner(directory)
183
+ state_result = runner.run_show_json(state_file=self.state_file)
184
+ if state_result:
185
+ self._state_result = state_result
186
+ self._enrich_resources_with_state(result, state_result)
187
+ logger.info("Enhanced with terraform state: %d resources", len(state_result.resources))
188
+
189
+ def _enrich_resources_with_state(
190
+ self, result: ParseResult, state_result: "TerraformStateResult"
191
+ ) -> None:
192
+ """Enrich parsed resources with actual values from terraform state."""
193
+ from terraformgraph.terraform_tools import map_state_to_resource_id
194
+
195
+ # Build index by full_id
196
+ resource_index = {r.full_id: r for r in result.resources}
197
+
198
+ for state_res in state_result.resources:
199
+ resource_id = map_state_to_resource_id(state_res.address)
200
+
201
+ if resource_id in resource_index:
202
+ resource = resource_index[resource_id]
203
+ # Merge state values into attributes (state values take precedence)
204
+ for key, value in state_res.values.items():
205
+ if value is not None:
206
+ resource.attributes[f"_state_{key}"] = value
207
+
208
+ def get_state_result(self) -> Optional["TerraformStateResult"]:
209
+ """Get the terraform state result if available."""
210
+ return self._state_result
211
+
153
212
  def _parse_file(self, file_path: Path, result: ParseResult, module_path: str) -> None:
154
213
  """Parse a single Terraform file."""
155
214
  try:
156
- with open(file_path, 'r') as f:
215
+ with open(file_path, "r", encoding="utf-8") as f:
157
216
  content = hcl2.load(f)
158
- except Exception as e:
159
- logger.warning("Could not parse %s: %s", file_path, e)
217
+ except OSError as e:
218
+ logger.warning("Could not read %s: %s", file_path, e)
219
+ return
220
+ except (UnexpectedInput, UnexpectedToken) as e:
221
+ logger.warning("Could not parse HCL in %s: %s", file_path, e)
160
222
  return
161
223
 
162
224
  # Extract resources
163
- for resource_block in content.get('resource', []):
225
+ for resource_block in content.get("resource", []):
164
226
  for resource_type, resources in resource_block.items():
165
227
  for resource_name, config in resources.items():
166
228
  # Handle list configs (HCL2 can return lists)
@@ -174,32 +236,29 @@ class TerraformParser:
174
236
  attributes=config,
175
237
  source_file=str(file_path),
176
238
  count=self._extract_count(config),
177
- for_each='for_each' in config
239
+ for_each="for_each" in config,
178
240
  )
179
241
  result.resources.append(resource)
180
242
 
181
243
  # Extract module calls
182
- for module_block in content.get('module', []):
244
+ for module_block in content.get("module", []):
183
245
  for module_name, config in module_block.items():
184
246
  if isinstance(config, list):
185
247
  config = config[0] if config else {}
186
248
 
187
- source = config.get('source', '')
249
+ source = config.get("source", "")
188
250
  module = ModuleCall(
189
- name=module_name,
190
- source=source,
191
- inputs=config,
192
- source_file=str(file_path)
251
+ name=module_name, source=source, inputs=config, source_file=str(file_path)
193
252
  )
194
253
  result.modules.append(module)
195
254
 
196
255
  def _parse_module(self, source: str, base_path: Path, module_name: str) -> ParseResult:
197
256
  """Parse a module from its source path."""
198
257
  # Resolve relative path
199
- if source.startswith('../') or source.startswith('./'):
258
+ if source.startswith("../") or source.startswith("./"):
200
259
  module_path = (base_path / source).resolve()
201
260
  else:
202
- module_path = self.infrastructure_path / '.modules' / source
261
+ module_path = self.infrastructure_path / ".modules" / source
203
262
 
204
263
  if not module_path.exists():
205
264
  logger.warning("Module path not found: %s", module_path)
@@ -219,7 +278,7 @@ class TerraformParser:
219
278
  attributes=res.attributes,
220
279
  source_file=res.source_file,
221
280
  count=res.count,
222
- for_each=res.for_each
281
+ for_each=res.for_each,
223
282
  )
224
283
  result.resources.append(new_res)
225
284
  return result
@@ -233,7 +292,7 @@ class TerraformParser:
233
292
 
234
293
  def _extract_count(self, config: Dict[str, Any]) -> Optional[int]:
235
294
  """Extract count value from resource config."""
236
- count = config.get('count')
295
+ count = config.get("count")
237
296
  if count is None:
238
297
  return None
239
298
  if isinstance(count, int):
@@ -263,54 +322,247 @@ class TerraformParser:
263
322
  if value:
264
323
  targets = self._find_referenced_resources(value, target_type, type_index)
265
324
  for target in targets:
266
- result.relationships.append(ResourceRelationship(
267
- source_id=resource.full_id,
268
- target_id=target.full_id,
269
- relationship_type=rel_type
270
- ))
325
+ result.relationships.append(
326
+ ResourceRelationship(
327
+ source_id=resource.full_id,
328
+ target_id=target.full_id,
329
+ relationship_type=rel_type,
330
+ )
331
+ )
332
+
333
+ # Deep scan: find resource references in ALL attributes (catches nested refs
334
+ # like environment.variables that RELATIONSHIP_EXTRACTORS miss)
335
+ self._extract_deep_references(resource, result, type_index)
336
+
337
+ # Check for security group cross-references
338
+ self._extract_sg_cross_references(resource, result, type_index)
339
+
340
+ # Resource types excluded from deep scan (infrastructure plumbing, not logical connections)
341
+ _DEEP_SCAN_EXCLUDED_TYPES = frozenset({
342
+ "aws_security_group", "aws_iam_role", "aws_iam_policy",
343
+ "aws_subnet", "aws_vpc", "aws_route_table", "aws_route_table_association",
344
+ "aws_eip", "aws_network_interface",
345
+ })
346
+
347
+ def _extract_deep_references(
348
+ self,
349
+ resource: TerraformResource,
350
+ result: ParseResult,
351
+ type_index: Dict[str, List[TerraformResource]],
352
+ ) -> None:
353
+ """Scan all attribute values for resource references not caught by RELATIONSHIP_EXTRACTORS."""
354
+ # Build set of already-known targets to avoid duplicates
355
+ known_targets: set = set()
356
+ for rel in result.relationships:
357
+ if rel.source_id == resource.full_id:
358
+ known_targets.add(rel.target_id)
359
+
360
+ # Convert entire attributes dict to string and scan for all known resource types
361
+ attrs_str = str(resource.attributes)
362
+ for target_type, resources_of_type in type_index.items():
363
+ if target_type == resource.resource_type:
364
+ continue # Skip self-type references
365
+ if target_type in self._DEEP_SCAN_EXCLUDED_TYPES:
366
+ continue # Skip infrastructure plumbing types
367
+ pattern = rf"{re.escape(target_type)}\.(\w+)\."
368
+ for match in re.finditer(pattern, attrs_str):
369
+ res_name = match.group(1)
370
+ for target_res in resources_of_type:
371
+ if target_res.resource_name == res_name and target_res.full_id not in known_targets:
372
+ known_targets.add(target_res.full_id)
373
+ result.relationships.append(
374
+ ResourceRelationship(
375
+ source_id=resource.full_id,
376
+ target_id=target_res.full_id,
377
+ relationship_type="references",
378
+ )
379
+ )
380
+ break
271
381
 
272
382
  def _extract_dlq_relationship(
273
383
  self,
274
384
  resource: TerraformResource,
275
385
  result: ParseResult,
276
- type_index: Dict[str, List[TerraformResource]]
386
+ type_index: Dict[str, List[TerraformResource]],
277
387
  ) -> None:
278
388
  """Extract SQS dead letter queue relationships."""
279
- if resource.resource_type != 'aws_sqs_queue':
389
+ if resource.resource_type != "aws_sqs_queue":
280
390
  return
281
391
 
282
- redrive = resource.attributes.get('redrive_policy')
392
+ redrive = resource.attributes.get("redrive_policy")
283
393
  if not redrive:
284
394
  return
285
395
 
286
396
  # Parse redrive policy (could be string or dict)
287
397
  if isinstance(redrive, str):
288
398
  # Try to find DLQ reference in string
289
- match = re.search(r'aws_sqs_queue\.(\w+)\.arn', redrive)
399
+ match = re.search(r"aws_sqs_queue\.(\w+)\.arn", redrive)
290
400
  if match:
291
401
  dlq_name = match.group(1)
292
- for queue in type_index.get('aws_sqs_queue', []):
402
+ for queue in type_index.get("aws_sqs_queue", []):
293
403
  if queue.resource_name == dlq_name:
294
- result.relationships.append(ResourceRelationship(
295
- source_id=resource.full_id,
296
- target_id=queue.full_id,
297
- relationship_type='redrives_to',
298
- label='DLQ'
299
- ))
404
+ result.relationships.append(
405
+ ResourceRelationship(
406
+ source_id=resource.full_id,
407
+ target_id=queue.full_id,
408
+ relationship_type="redrives_to",
409
+ label="DLQ",
410
+ )
411
+ )
300
412
  break
301
413
 
302
- def _find_referenced_resources(
414
+ def _extract_sg_cross_references(
303
415
  self,
304
- value: Any,
305
- target_type: str,
306
- type_index: Dict[str, List[TerraformResource]]
416
+ resource: TerraformResource,
417
+ result: ParseResult,
418
+ type_index: Dict[str, List[TerraformResource]],
419
+ ) -> None:
420
+ """Extract security group cross-references from ingress rules.
421
+
422
+ Creates sg_allows_from relationships when a security group rule
423
+ references another security group as its source.
424
+ """
425
+ sg_resources = type_index.get("aws_security_group", [])
426
+ if not sg_resources:
427
+ return
428
+
429
+ # Case 1: Inline ingress rules in aws_security_group
430
+ if resource.resource_type == "aws_security_group":
431
+ ingress_rules = resource.attributes.get("ingress", [])
432
+ if not isinstance(ingress_rules, list):
433
+ return
434
+ for rule in ingress_rules:
435
+ if not isinstance(rule, dict):
436
+ continue
437
+ self._process_sg_rule(
438
+ rule, resource.full_id, result, sg_resources, is_inline=True
439
+ )
440
+
441
+ # Case 2: Standalone aws_security_group_rule with type=ingress
442
+ elif resource.resource_type == "aws_security_group_rule":
443
+ if resource.attributes.get("type") != "ingress":
444
+ return
445
+ # The SG this rule belongs to
446
+ sg_id_attr = resource.attributes.get("security_group_id", "")
447
+ target_sg = self._resolve_sg_ref(str(sg_id_attr), sg_resources)
448
+ if not target_sg:
449
+ return
450
+ source_ref = resource.attributes.get("source_security_group_id", "")
451
+ source_sg = self._resolve_sg_ref(str(source_ref), sg_resources)
452
+ if source_sg and source_sg.full_id != target_sg.full_id:
453
+ port_label = self._format_port_label(resource.attributes)
454
+ result.relationships.append(
455
+ ResourceRelationship(
456
+ source_id=source_sg.full_id,
457
+ target_id=target_sg.full_id,
458
+ relationship_type="sg_allows_from",
459
+ label=port_label,
460
+ )
461
+ )
462
+
463
+ # Case 3: aws_vpc_security_group_ingress_rule
464
+ elif resource.resource_type == "aws_vpc_security_group_ingress_rule":
465
+ sg_id_attr = resource.attributes.get("security_group_id", "")
466
+ target_sg = self._resolve_sg_ref(str(sg_id_attr), sg_resources)
467
+ if not target_sg:
468
+ return
469
+ source_ref = resource.attributes.get(
470
+ "referenced_security_group_id", ""
471
+ )
472
+ source_sg = self._resolve_sg_ref(str(source_ref), sg_resources)
473
+ if source_sg and source_sg.full_id != target_sg.full_id:
474
+ port_label = self._format_port_label(resource.attributes)
475
+ result.relationships.append(
476
+ ResourceRelationship(
477
+ source_id=source_sg.full_id,
478
+ target_id=target_sg.full_id,
479
+ relationship_type="sg_allows_from",
480
+ label=port_label,
481
+ )
482
+ )
483
+
484
+ def _process_sg_rule(
485
+ self,
486
+ rule: dict,
487
+ sg_full_id: str,
488
+ result: ParseResult,
489
+ sg_resources: List[TerraformResource],
490
+ is_inline: bool = True,
491
+ ) -> None:
492
+ """Process a single SG ingress rule for cross-references."""
493
+ # Look for security_groups list (inline rules use this)
494
+ sg_refs = rule.get("security_groups", [])
495
+ if not isinstance(sg_refs, list):
496
+ sg_refs = [sg_refs] if sg_refs else []
497
+
498
+ for ref in sg_refs:
499
+ source_sg = self._resolve_sg_ref(str(ref), sg_resources)
500
+ if source_sg and source_sg.full_id != sg_full_id:
501
+ port_label = self._format_port_label(rule)
502
+ result.relationships.append(
503
+ ResourceRelationship(
504
+ source_id=source_sg.full_id,
505
+ target_id=sg_full_id,
506
+ relationship_type="sg_allows_from",
507
+ label=port_label,
508
+ )
509
+ )
510
+
511
+ @staticmethod
512
+ def _resolve_sg_ref(
513
+ value: str, sg_resources: List[TerraformResource]
514
+ ) -> Optional[TerraformResource]:
515
+ """Resolve a security group reference to a TerraformResource."""
516
+ if not value:
517
+ return None
518
+ match = re.search(r"aws_security_group\.(\w+)", value)
519
+ if match:
520
+ name = match.group(1)
521
+ for sg in sg_resources:
522
+ if sg.resource_name == name:
523
+ return sg
524
+ return None
525
+
526
+ @staticmethod
527
+ def _format_port_label(attrs: dict) -> str:
528
+ """Format a port label from rule attributes (e.g., 'TCP/80')."""
529
+ from_port = attrs.get("from_port")
530
+ to_port = attrs.get("to_port")
531
+ protocol = attrs.get("protocol", "tcp")
532
+
533
+ if from_port is None:
534
+ return ""
535
+
536
+ # Coerce ports to int (HCL2 may return strings in some contexts)
537
+ try:
538
+ from_port = int(from_port)
539
+ except (TypeError, ValueError):
540
+ pass
541
+ try:
542
+ to_port = int(to_port)
543
+ except (TypeError, ValueError):
544
+ pass
545
+
546
+ if isinstance(protocol, str):
547
+ protocol = protocol.upper()
548
+ if protocol == "-1":
549
+ return "All Traffic"
550
+
551
+ if from_port == to_port or to_port is None:
552
+ return f"{protocol}/{from_port}"
553
+ if from_port == 0 and to_port == 65535:
554
+ return f"{protocol}/All"
555
+ return f"{protocol}/{from_port}-{to_port}"
556
+
557
+ def _find_referenced_resources(
558
+ self, value: Any, target_type: str, type_index: Dict[str, List[TerraformResource]]
307
559
  ) -> List[TerraformResource]:
308
560
  """Find resources referenced in a value."""
309
561
  results = []
310
562
  value_str = str(value)
311
563
 
312
564
  # Look for resource references
313
- pattern = rf'{target_type}\.(\w+)\.'
565
+ pattern = rf"{target_type}\.(\w+)\."
314
566
  for match in re.finditer(pattern, value_str):
315
567
  res_name = match.group(1)
316
568
  for res in type_index.get(target_type, []):
@@ -319,7 +571,7 @@ class TerraformParser:
319
571
  break
320
572
 
321
573
  # Look for module references
322
- module_pattern = r'module\.(\w+)\.(\w+)'
574
+ module_pattern = r"module\.(\w+)\.(\w+)"
323
575
  for match in re.finditer(module_pattern, value_str):
324
576
  module_name = match.group(1)
325
577
  # Find resources in that module
@@ -329,16 +581,3 @@ class TerraformParser:
329
581
  break
330
582
 
331
583
  return results
332
-
333
-
334
- def get_resource_summary(result: ParseResult) -> Dict[str, int]:
335
- """Get a summary count of resources by type."""
336
- summary: Dict[str, int] = {}
337
- for resource in result.resources:
338
- count = 1
339
- if resource.count and resource.count > 0:
340
- count = resource.count
341
- elif resource.for_each:
342
- count = 1 # Unknown, but at least 1
343
- summary[resource.resource_type] = summary.get(resource.resource_type, 0) + count
344
- return summary