PyPI - terraformgraph - Versions diffs - 1.0.3__py3-none-any.whl → 1.0.4__py3-none-any.whl - Mend

terraformgraph 1.0.3py3-none-any.whl → 1.0.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

terraformgraph/__main__.py +1 -1
terraformgraph/aggregator.py +941 -44
terraformgraph/config/aggregation_rules.yaml +276 -1
terraformgraph/config_loader.py +9 -8
terraformgraph/icons.py +504 -521
terraformgraph/layout.py +580 -116
terraformgraph/main.py +251 -48
terraformgraph/parser.py +323 -84
terraformgraph/renderer.py +1864 -167
terraformgraph/terraform_tools.py +355 -0
terraformgraph/variable_resolver.py +180 -0
terraformgraph-1.0.4.dist-info/METADATA +386 -0
terraformgraph-1.0.4.dist-info/RECORD +19 -0
{terraformgraph-1.0.3.dist-info → terraformgraph-1.0.4.dist-info}/licenses/LICENSE +1 -1
terraformgraph-1.0.3.dist-info/METADATA +0 -163
terraformgraph-1.0.3.dist-info/RECORD +0 -17
{terraformgraph-1.0.3.dist-info → terraformgraph-1.0.4.dist-info}/WHEEL +0 -0
{terraformgraph-1.0.3.dist-info → terraformgraph-1.0.4.dist-info}/entry_points.txt +0 -0
{terraformgraph-1.0.3.dist-info → terraformgraph-1.0.4.dist-info}/top_level.txt +0 -0

terraformgraph/parser.py CHANGED Viewed

@@ -8,9 +8,14 @@ import logging
 import re
 from dataclasses import dataclass, field
 from pathlib import Path
-from typing import Any, Dict, List, Optional
+from typing import TYPE_CHECKING, Any, Dict, List, Optional
 import hcl2
+from lark.exceptions import UnexpectedInput, UnexpectedToken
+if TYPE_CHECKING:
+    from terraformgraph.terraform_tools import TerraformStateResult
+    from terraformgraph.variable_resolver import VariableResolver
 logger = logging.getLogger(__name__)
@@ -18,6 +23,7 @@ logger = logging.getLogger(__name__)
 @dataclass
 class TerraformResource:
     """Represents a parsed Terraform resource."""
     resource_type: str
     resource_name: str
     module_path: str
@@ -36,15 +42,33 @@ class TerraformResource:
     @property
     def display_name(self) -> str:
         """Human-readable name for display."""
-        name = self.attributes.get('name', self.resource_name)
-        if isinstance(name, str) and '${' not in name:
+        name = self.attributes.get("name", self.resource_name)
+        if isinstance(name, str) and "${" not in name:
             return name
         return self.resource_name
+    def get_resolved_display_name(self, resolver: "VariableResolver") -> str:
+        """Get display name with interpolations resolved and truncated.
+        Args:
+            resolver: VariableResolver instance for resolving interpolations
+        Returns:
+            Resolved and truncated display name
+        """
+        from terraformgraph.variable_resolver import VariableResolver
+        name = self.attributes.get("name", self.resource_name)
+        if isinstance(name, str):
+            resolved_name = resolver.resolve(name)
+            return VariableResolver.truncate_name(resolved_name)
+        return VariableResolver.truncate_name(self.resource_name)
 @dataclass
 class ModuleCall:
     """Represents a module instantiation."""
     name: str
     source: str
     inputs: Dict[str, Any]
@@ -54,6 +78,7 @@ class ModuleCall:
 @dataclass
 class ResourceRelationship:
     """Represents a connection between resources."""
     source_id: str
     target_id: str
     relationship_type: str
@@ -63,6 +88,7 @@ class ResourceRelationship:
 @dataclass
 class ParseResult:
     """Result of parsing Terraform files."""
     resources: List[TerraformResource] = field(default_factory=list)
     modules: List[ModuleCall] = field(default_factory=list)
     relationships: List[ResourceRelationship] = field(default_factory=list)
@@ -71,39 +97,37 @@ class ParseResult:
 class TerraformParser:
     """Parses Terraform HCL files and extracts resources."""
-    REFERENCE_PATTERNS = [
-        # module.X.output
-        (r'module\.(\w+)\.(\w+)', 'module_ref'),
-        # aws_resource.name.attribute
-        (r'(aws_\w+)\.(\w+)\.(\w+)', 'resource_ref'),
-        # var.X
-        (r'var\.(\w+)', 'var_ref'),
-        # local.X
-        (r'local\.(\w+)', 'local_ref'),
-    ]
     RELATIONSHIP_EXTRACTORS = {
-        'vpc_id': ('belongs_to_vpc', 'aws_vpc'),
-        'subnet_id': ('deployed_in_subnet', 'aws_subnet'),
-        'subnet_ids': ('deployed_in_subnets', 'aws_subnet'),
-        'security_group_ids': ('uses_security_group', 'aws_security_group'),
-        'kms_master_key_id': ('encrypted_by', 'aws_kms_key'),
-        'kms_key_id': ('encrypted_by', 'aws_kms_key'),
-        'target_group_arn': ('routes_to', 'aws_lb_target_group'),
-        'load_balancer_arn': ('attached_to', 'aws_lb'),
-        'web_acl_arn': ('protected_by', 'aws_wafv2_web_acl'),
-        'waf_acl_arn': ('protected_by', 'aws_wafv2_web_acl'),
-        'certificate_arn': ('uses_certificate', 'aws_acm_certificate'),
-        'role_arn': ('assumes_role', 'aws_iam_role'),
-        'queue_arn': ('sends_to_queue', 'aws_sqs_queue'),
-        'topic_arn': ('publishes_to', 'aws_sns_topic'),
-        'alarm_topic_arn': ('alerts_to', 'aws_sns_topic'),
+        "vpc_id": ("belongs_to_vpc", "aws_vpc"),
+        "subnet_id": ("deployed_in_subnet", "aws_subnet"),
+        "subnet_ids": ("deployed_in_subnets", "aws_subnet"),
+        "security_group_ids": ("uses_security_group", "aws_security_group"),
+        "vpc_security_group_ids": ("uses_security_group", "aws_security_group"),
+        "security_groups": ("uses_security_group", "aws_security_group"),
+        "kms_master_key_id": ("encrypted_by", "aws_kms_key"),
+        "kms_key_id": ("encrypted_by", "aws_kms_key"),
+        "target_group_arn": ("routes_to", "aws_lb_target_group"),
+        "load_balancer_arn": ("attached_to", "aws_lb"),
+        "web_acl_arn": ("protected_by", "aws_wafv2_web_acl"),
+        "waf_acl_arn": ("protected_by", "aws_wafv2_web_acl"),
+        "certificate_arn": ("uses_certificate", "aws_acm_certificate"),
+        "role_arn": ("assumes_role", "aws_iam_role"),
+        "queue_arn": ("sends_to_queue", "aws_sqs_queue"),
+        "topic_arn": ("publishes_to", "aws_sns_topic"),
+        "alarm_topic_arn": ("alerts_to", "aws_sns_topic"),
     }
-    def __init__(self, infrastructure_path: str, icons_path: Optional[str] = None):
+    def __init__(
+        self,
+        infrastructure_path: str,
+        use_terraform_state: bool = False,
+        state_file: Optional[str] = None,
+    ):
         self.infrastructure_path = Path(infrastructure_path)
-        self.icons_path = Path(icons_path) if icons_path else None
         self._parsed_modules: Dict[str, ParseResult] = {}
+        self.use_terraform_state = use_terraform_state
+        self.state_file = Path(state_file) if state_file else None
+        self._state_result: Optional["TerraformStateResult"] = None
     def parse_environment(self, environment: str) -> ParseResult:
         """Parse all Terraform files for a specific environment."""
@@ -122,9 +146,6 @@ class TerraformParser:
         Returns:
             ParseResult with all resources and relationships
         """
-        if isinstance(directory, str):
-            directory = Path(directory)
         if not directory.exists():
             raise ValueError(f"Directory does not exist: {directory}")
@@ -148,19 +169,60 @@ class TerraformParser:
         # Extract relationships from all resources
         self._extract_relationships(result)
+        # Enhance with terraform state if requested
+        if self.use_terraform_state:
+            self._enhance_with_terraform_state(result, directory)
         return result
+    def _enhance_with_terraform_state(self, result: ParseResult, directory: Path) -> None:
+        """Enhance parse result with data from terraform state."""
+        from terraformgraph.terraform_tools import TerraformToolsRunner
+        runner = TerraformToolsRunner(directory)
+        state_result = runner.run_show_json(state_file=self.state_file)
+        if state_result:
+            self._state_result = state_result
+            self._enrich_resources_with_state(result, state_result)
+            logger.info("Enhanced with terraform state: %d resources", len(state_result.resources))
+    def _enrich_resources_with_state(
+        self, result: ParseResult, state_result: "TerraformStateResult"
+    ) -> None:
+        """Enrich parsed resources with actual values from terraform state."""
+        from terraformgraph.terraform_tools import map_state_to_resource_id
+        # Build index by full_id
+        resource_index = {r.full_id: r for r in result.resources}
+        for state_res in state_result.resources:
+            resource_id = map_state_to_resource_id(state_res.address)
+            if resource_id in resource_index:
+                resource = resource_index[resource_id]
+                # Merge state values into attributes (state values take precedence)
+                for key, value in state_res.values.items():
+                    if value is not None:
+                        resource.attributes[f"_state_{key}"] = value
+    def get_state_result(self) -> Optional["TerraformStateResult"]:
+        """Get the terraform state result if available."""
+        return self._state_result
     def _parse_file(self, file_path: Path, result: ParseResult, module_path: str) -> None:
         """Parse a single Terraform file."""
         try:
-            with open(file_path, 'r') as f:
+            with open(file_path, "r", encoding="utf-8") as f:
                 content = hcl2.load(f)
-        except Exception as e:
-            logger.warning("Could not parse %s: %s", file_path, e)
+        except OSError as e:
+            logger.warning("Could not read %s: %s", file_path, e)
+            return
+        except (UnexpectedInput, UnexpectedToken) as e:
+            logger.warning("Could not parse HCL in %s: %s", file_path, e)
             return
         # Extract resources
-        for resource_block in content.get('resource', []):
+        for resource_block in content.get("resource", []):
             for resource_type, resources in resource_block.items():
                 for resource_name, config in resources.items():
                     # Handle list configs (HCL2 can return lists)
@@ -174,32 +236,29 @@ class TerraformParser:
                         attributes=config,
                         source_file=str(file_path),
                         count=self._extract_count(config),
-                        for_each='for_each' in config
+                        for_each="for_each" in config,
                     )
                     result.resources.append(resource)
         # Extract module calls
-        for module_block in content.get('module', []):
+        for module_block in content.get("module", []):
             for module_name, config in module_block.items():
                 if isinstance(config, list):
                     config = config[0] if config else {}
-                source = config.get('source', '')
+                source = config.get("source", "")
                 module = ModuleCall(
-                    name=module_name,
-                    source=source,
-                    inputs=config,
-                    source_file=str(file_path)
+                    name=module_name, source=source, inputs=config, source_file=str(file_path)
                 )
                 result.modules.append(module)
     def _parse_module(self, source: str, base_path: Path, module_name: str) -> ParseResult:
         """Parse a module from its source path."""
         # Resolve relative path
-        if source.startswith('../') or source.startswith('./'):
+        if source.startswith("../") or source.startswith("./"):
             module_path = (base_path / source).resolve()
         else:
-            module_path = self.infrastructure_path / '.modules' / source
+            module_path = self.infrastructure_path / ".modules" / source
         if not module_path.exists():
             logger.warning("Module path not found: %s", module_path)
@@ -219,7 +278,7 @@ class TerraformParser:
                     attributes=res.attributes,
                     source_file=res.source_file,
                     count=res.count,
-                    for_each=res.for_each
+                    for_each=res.for_each,
                 )
                 result.resources.append(new_res)
             return result
@@ -233,7 +292,7 @@ class TerraformParser:
     def _extract_count(self, config: Dict[str, Any]) -> Optional[int]:
         """Extract count value from resource config."""
-        count = config.get('count')
+        count = config.get("count")
         if count is None:
             return None
         if isinstance(count, int):
@@ -263,54 +322,247 @@ class TerraformParser:
                 if value:
                     targets = self._find_referenced_resources(value, target_type, type_index)
                     for target in targets:
-                        result.relationships.append(ResourceRelationship(
-                            source_id=resource.full_id,
-                            target_id=target.full_id,
-                            relationship_type=rel_type
-                        ))
+                        result.relationships.append(
+                            ResourceRelationship(
+                                source_id=resource.full_id,
+                                target_id=target.full_id,
+                                relationship_type=rel_type,
+                            )
+                        )
+            # Deep scan: find resource references in ALL attributes (catches nested refs
+            # like environment.variables that RELATIONSHIP_EXTRACTORS miss)
+            self._extract_deep_references(resource, result, type_index)
+            # Check for security group cross-references
+            self._extract_sg_cross_references(resource, result, type_index)
+    # Resource types excluded from deep scan (infrastructure plumbing, not logical connections)
+    _DEEP_SCAN_EXCLUDED_TYPES = frozenset({
+        "aws_security_group", "aws_iam_role", "aws_iam_policy",
+        "aws_subnet", "aws_vpc", "aws_route_table", "aws_route_table_association",
+        "aws_eip", "aws_network_interface",
+    })
+    def _extract_deep_references(
+        self,
+        resource: TerraformResource,
+        result: ParseResult,
+        type_index: Dict[str, List[TerraformResource]],
+    ) -> None:
+        """Scan all attribute values for resource references not caught by RELATIONSHIP_EXTRACTORS."""
+        # Build set of already-known targets to avoid duplicates
+        known_targets: set = set()
+        for rel in result.relationships:
+            if rel.source_id == resource.full_id:
+                known_targets.add(rel.target_id)
+        # Convert entire attributes dict to string and scan for all known resource types
+        attrs_str = str(resource.attributes)
+        for target_type, resources_of_type in type_index.items():
+            if target_type == resource.resource_type:
+                continue  # Skip self-type references
+            if target_type in self._DEEP_SCAN_EXCLUDED_TYPES:
+                continue  # Skip infrastructure plumbing types
+            pattern = rf"{re.escape(target_type)}\.(\w+)\."
+            for match in re.finditer(pattern, attrs_str):
+                res_name = match.group(1)
+                for target_res in resources_of_type:
+                    if target_res.resource_name == res_name and target_res.full_id not in known_targets:
+                        known_targets.add(target_res.full_id)
+                        result.relationships.append(
+                            ResourceRelationship(
+                                source_id=resource.full_id,
+                                target_id=target_res.full_id,
+                                relationship_type="references",
+                            )
+                        )
+                        break
     def _extract_dlq_relationship(
         self,
         resource: TerraformResource,
         result: ParseResult,
-        type_index: Dict[str, List[TerraformResource]]
+        type_index: Dict[str, List[TerraformResource]],
     ) -> None:
         """Extract SQS dead letter queue relationships."""
-        if resource.resource_type != 'aws_sqs_queue':
+        if resource.resource_type != "aws_sqs_queue":
             return
-        redrive = resource.attributes.get('redrive_policy')
+        redrive = resource.attributes.get("redrive_policy")
         if not redrive:
             return
         # Parse redrive policy (could be string or dict)
         if isinstance(redrive, str):
             # Try to find DLQ reference in string
-            match = re.search(r'aws_sqs_queue\.(\w+)\.arn', redrive)
+            match = re.search(r"aws_sqs_queue\.(\w+)\.arn", redrive)
             if match:
                 dlq_name = match.group(1)
-                for queue in type_index.get('aws_sqs_queue', []):
+                for queue in type_index.get("aws_sqs_queue", []):
                     if queue.resource_name == dlq_name:
-                        result.relationships.append(ResourceRelationship(
-                            source_id=resource.full_id,
-                            target_id=queue.full_id,
-                            relationship_type='redrives_to',
-                            label='DLQ'
-                        ))
+                        result.relationships.append(
+                            ResourceRelationship(
+                                source_id=resource.full_id,
+                                target_id=queue.full_id,
+                                relationship_type="redrives_to",
+                                label="DLQ",
+                            )
+                        )
                         break
-    def _find_referenced_resources(
+    def _extract_sg_cross_references(
         self,
-        value: Any,
-        target_type: str,
-        type_index: Dict[str, List[TerraformResource]]
+        resource: TerraformResource,
+        result: ParseResult,
+        type_index: Dict[str, List[TerraformResource]],
+    ) -> None:
+        """Extract security group cross-references from ingress rules.
+        Creates sg_allows_from relationships when a security group rule
+        references another security group as its source.
+        """
+        sg_resources = type_index.get("aws_security_group", [])
+        if not sg_resources:
+            return
+        # Case 1: Inline ingress rules in aws_security_group
+        if resource.resource_type == "aws_security_group":
+            ingress_rules = resource.attributes.get("ingress", [])
+            if not isinstance(ingress_rules, list):
+                return
+            for rule in ingress_rules:
+                if not isinstance(rule, dict):
+                    continue
+                self._process_sg_rule(
+                    rule, resource.full_id, result, sg_resources, is_inline=True
+                )
+        # Case 2: Standalone aws_security_group_rule with type=ingress
+        elif resource.resource_type == "aws_security_group_rule":
+            if resource.attributes.get("type") != "ingress":
+                return
+            # The SG this rule belongs to
+            sg_id_attr = resource.attributes.get("security_group_id", "")
+            target_sg = self._resolve_sg_ref(str(sg_id_attr), sg_resources)
+            if not target_sg:
+                return
+            source_ref = resource.attributes.get("source_security_group_id", "")
+            source_sg = self._resolve_sg_ref(str(source_ref), sg_resources)
+            if source_sg and source_sg.full_id != target_sg.full_id:
+                port_label = self._format_port_label(resource.attributes)
+                result.relationships.append(
+                    ResourceRelationship(
+                        source_id=source_sg.full_id,
+                        target_id=target_sg.full_id,
+                        relationship_type="sg_allows_from",
+                        label=port_label,
+                    )
+                )
+        # Case 3: aws_vpc_security_group_ingress_rule
+        elif resource.resource_type == "aws_vpc_security_group_ingress_rule":
+            sg_id_attr = resource.attributes.get("security_group_id", "")
+            target_sg = self._resolve_sg_ref(str(sg_id_attr), sg_resources)
+            if not target_sg:
+                return
+            source_ref = resource.attributes.get(
+                "referenced_security_group_id", ""
+            )
+            source_sg = self._resolve_sg_ref(str(source_ref), sg_resources)
+            if source_sg and source_sg.full_id != target_sg.full_id:
+                port_label = self._format_port_label(resource.attributes)
+                result.relationships.append(
+                    ResourceRelationship(
+                        source_id=source_sg.full_id,
+                        target_id=target_sg.full_id,
+                        relationship_type="sg_allows_from",
+                        label=port_label,
+                    )
+                )
+    def _process_sg_rule(
+        self,
+        rule: dict,
+        sg_full_id: str,
+        result: ParseResult,
+        sg_resources: List[TerraformResource],
+        is_inline: bool = True,
+    ) -> None:
+        """Process a single SG ingress rule for cross-references."""
+        # Look for security_groups list (inline rules use this)
+        sg_refs = rule.get("security_groups", [])
+        if not isinstance(sg_refs, list):
+            sg_refs = [sg_refs] if sg_refs else []
+        for ref in sg_refs:
+            source_sg = self._resolve_sg_ref(str(ref), sg_resources)
+            if source_sg and source_sg.full_id != sg_full_id:
+                port_label = self._format_port_label(rule)
+                result.relationships.append(
+                    ResourceRelationship(
+                        source_id=source_sg.full_id,
+                        target_id=sg_full_id,
+                        relationship_type="sg_allows_from",
+                        label=port_label,
+                    )
+                )
+    @staticmethod
+    def _resolve_sg_ref(
+        value: str, sg_resources: List[TerraformResource]
+    ) -> Optional[TerraformResource]:
+        """Resolve a security group reference to a TerraformResource."""
+        if not value:
+            return None
+        match = re.search(r"aws_security_group\.(\w+)", value)
+        if match:
+            name = match.group(1)
+            for sg in sg_resources:
+                if sg.resource_name == name:
+                    return sg
+        return None
+    @staticmethod
+    def _format_port_label(attrs: dict) -> str:
+        """Format a port label from rule attributes (e.g., 'TCP/80')."""
+        from_port = attrs.get("from_port")
+        to_port = attrs.get("to_port")
+        protocol = attrs.get("protocol", "tcp")
+        if from_port is None:
+            return ""
+        # Coerce ports to int (HCL2 may return strings in some contexts)
+        try:
+            from_port = int(from_port)
+        except (TypeError, ValueError):
+            pass
+        try:
+            to_port = int(to_port)
+        except (TypeError, ValueError):
+            pass
+        if isinstance(protocol, str):
+            protocol = protocol.upper()
+            if protocol == "-1":
+                return "All Traffic"
+        if from_port == to_port or to_port is None:
+            return f"{protocol}/{from_port}"
+        if from_port == 0 and to_port == 65535:
+            return f"{protocol}/All"
+        return f"{protocol}/{from_port}-{to_port}"
+    def _find_referenced_resources(
+        self, value: Any, target_type: str, type_index: Dict[str, List[TerraformResource]]
     ) -> List[TerraformResource]:
         """Find resources referenced in a value."""
         results = []
         value_str = str(value)
         # Look for resource references
-        pattern = rf'{target_type}\.(\w+)\.'
+        pattern = rf"{target_type}\.(\w+)\."
         for match in re.finditer(pattern, value_str):
             res_name = match.group(1)
             for res in type_index.get(target_type, []):
@@ -319,7 +571,7 @@ class TerraformParser:
                     break
         # Look for module references
-        module_pattern = r'module\.(\w+)\.(\w+)'
+        module_pattern = r"module\.(\w+)\.(\w+)"
         for match in re.finditer(module_pattern, value_str):
             module_name = match.group(1)
             # Find resources in that module
@@ -329,16 +581,3 @@ class TerraformParser:
                     break
         return results
-def get_resource_summary(result: ParseResult) -> Dict[str, int]:
-    """Get a summary count of resources by type."""
-    summary: Dict[str, int] = {}
-    for resource in result.resources:
-        count = 1
-        if resource.count and resource.count > 0:
-            count = resource.count
-        elif resource.for_each:
-            count = 1  # Unknown, but at least 1
-        summary[resource.resource_type] = summary.get(resource.resource_type, 0) + count
-    return summary

terraformgraph 1.0.3__py3-none-any.whl → 1.0.4__py3-none-any.whl

terraformgraph 1.0.3py3-none-any.whl → 1.0.4py3-none-any.whl