PyPI - sourcecode - Versions diffs - 1.30.0__py3-none-any.whl → 1.30.2__py3-none-any.whl - Mend

sourcecode 1.30.0py3-none-any.whl → 1.30.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

sourcecode/__init__.py CHANGED Viewed

@@ -1,3 +1,3 @@
 """sourcecode — Deterministic codebase context maps for AI coding agents."""
-__version__ = "1.30.0"
+__version__ = "1.30.2"

sourcecode/cli.py CHANGED Viewed

@@ -1866,6 +1866,8 @@ def prepare_context_cmd(
             out["review_hotspots"] = output.review_hotspots
         if output.suggested_review_order:
             out["suggested_review_order"] = output.suggested_review_order
+        if output.execution_paths:
+            out["execution_paths"] = output.execution_paths
         if output.impact_summary:
             out["impact_summary"] = output.impact_summary
         if output.why_these_files:

sourcecode/flow_analyzer.py ADDED Viewed

@@ -0,0 +1,310 @@
+"""flow_analyzer.py — Evidence-based execution path extraction for PR context.
+Builds Entry → Service → Repository → EndState ordered sequences using ONLY
+direct code evidence: field injection, constructor params, type annotations,
+method calls, explicit instantiation.
+V3: execution_paths with runtime_notes — conditional branches, optional execution,
+and async side-effects are surfaced when explicit code signals exist.
+No inference, no naming, no invented behavior.
+"""
+from __future__ import annotations
+import re
+from pathlib import Path
+from typing import Callable, Optional
+_ENTRY_ARTIFACT_TYPES = frozenset({"controller", "entrypoint"})
+_SERVICE_ARTIFACT_TYPES = frozenset({"service"})
+_REPO_ARTIFACT_TYPES = frozenset({"repository", "mapper"})
+_DB_KEYWORDS = frozenset({"repository", "dao", "mapper", "store", "jpa", "jdbc", "sql"})
+_EVENT_KEYWORDS = frozenset({"event", "publish", "emit", "kafka", "queue", "rabbit", "sns", "bus"})
+_HTTP_ENTRY_RE = re.compile(
+    r'@(?:Get|Post|Put|Delete|Patch|Request)Mapping[^)]*\)'
+    r'|@(?:Get|Post|Put|Delete|Patch)\([^)]*\)'
+    r'|@\w+\.(?:get|post|put|delete|patch)\([^)]*\)',
+    re.IGNORECASE,
+)
+_METHOD_NAME_RE = re.compile(
+    r'(?:public\s+|async\s+|def\s+|function\s+)*'
+    r'(?:[\w<>\[\]]+\s+)?'
+    r'(\w+)\s*\(',
+)
+# Runtime signal patterns: (compiled_regex, note_text)
+# Only signals with explicit code evidence — no inference.
+# Three categories: condition | branch | async
+_RUNTIME_SIGNALS: list[tuple[re.Pattern, str]] = [
+    # ── Conditional / auth guards ─────────────────────────────────────────────
+    (re.compile(r'@PreAuthorize|@Secured|@RolesAllowed', re.IGNORECASE),
+     "condition: authorization check present (@PreAuthorize / @Secured)"),
+    (re.compile(r'isAuthenticated\(\)|hasRole\(|hasAuthority\(|SecurityContextHolder', re.IGNORECASE),
+     "condition: reads authentication context"),
+    (re.compile(r'featureFlag|FeatureToggle|\.isEnabled\s*\(|\.isActive\s*\(', re.IGNORECASE),
+     "condition: feature flag gates execution"),
+    # Null/empty guard with early return — matches if (...null/empty...) return/throw on same line
+    (re.compile(r'if\s*\([^)]*(?:==\s*null|!=\s*null|isEmpty\s*\(\)|isBlank\s*\(\))[^)]*\)'
+                r'\s*(?:\{?\s*)?(?:return|throw)\b', re.IGNORECASE),
+     "condition: null/empty guard with early return"),
+    # ── Optional execution / branching ────────────────────────────────────────
+    (re.compile(r'@Cacheable|@CacheEvict|@CachePut', re.IGNORECASE),
+     "branch: Spring cache may short-circuit downstream call"),
+    (re.compile(r'\.getIfPresent\s*\(|cache\.get\s*\(|cacheManager\.', re.IGNORECASE),
+     "branch: manual cache lookup may short-circuit"),
+    (re.compile(r'Optional\s*<|\.orElseThrow\s*\(|\.orElseGet\s*\(|\.orElse\s*\(', re.IGNORECASE),
+     "branch: result may be absent (Optional)"),
+    # ── Async / side effects ──────────────────────────────────────────────────
+    (re.compile(r'@Async\b'),
+     "async: runs in separate thread (@Async)"),
+    (re.compile(r'CompletableFuture|\.supplyAsync\s*\(|\.runAsync\s*\('),
+     "async: non-blocking future-based execution"),
+    (re.compile(r'\basync\s+def\b|\bawait\b', re.IGNORECASE),
+     "async: non-blocking (async/await)"),
+    (re.compile(r'publishEvent\s*\(|applicationEventPublisher|eventPublisher\.', re.IGNORECASE),
+     "async: Spring application event emitted"),
+    (re.compile(r'kafkaTemplate\.|KafkaProducer|@KafkaListener', re.IGNORECASE),
+     "async: Kafka message produced"),
+    (re.compile(r'rabbitTemplate\.|amqpTemplate\.|@RabbitListener', re.IGNORECASE),
+     "async: RabbitMQ message sent"),
+]
+def _detect_lang(path: str) -> str:
+    return {
+        ".java": "java", ".kt": "kotlin",
+        ".py": "python",
+        ".ts": "typescript", ".tsx": "typescript",
+        ".js": "javascript", ".jsx": "javascript",
+        ".go": "go", ".cs": "csharp", ".rb": "ruby", ".php": "php",
+    }.get(Path(path).suffix.lower(), "unknown")
+def _strip_comments(content: str, lang: str) -> str:
+    content = re.sub(r"/\*.*?\*/", " ", content, flags=re.DOTALL)
+    content = re.sub(r"//[^\n]*", " ", content)
+    if lang in ("python", "ruby", "go"):
+        content = re.sub(r"#[^\n]*", " ", content)
+    return content
+def _read_safe(root: Path, rel_path: str) -> str:
+    try:
+        return (root / rel_path).read_text(encoding="utf-8", errors="ignore")
+    except (OSError, ValueError):
+        return ""
+def _collect_runtime_notes(content: str, lang: str) -> list[str]:
+    """Scan comment-stripped content for explicit runtime behavior signals.
+    Returns only notes backed by a direct code pattern match.
+    Returns [] when no signals are found.
+    """
+    clean = _strip_comments(content, lang)
+    notes: list[str] = []
+    seen: set[str] = set()
+    for pattern, note in _RUNTIME_SIGNALS:
+        if note not in seen and pattern.search(clean):
+            notes.append(note)
+            seen.add(note)
+    return notes
+def _find_entry_method(clean: str) -> Optional[str]:
+    m = _HTTP_ENTRY_RE.search(clean)
+    if not m:
+        return None
+    after = clean[m.end():]
+    mn = _METHOD_NAME_RE.match(after.lstrip())
+    if mn:
+        name = mn.group(1)
+        if name.lower() not in ("public", "async", "def", "function", "void", "override"):
+            return name
+    return None
+def _build_field_map(clean: str) -> dict[str, str]:
+    """Map field_name_lower → ClassName from injection patterns."""
+    fmap: dict[str, str] = {}
+    for m in re.finditer(r"private\s+(\w+)(?:<[^>]+>)?\s+(\w+)\s*[;=,)]", clean):
+        fmap[m.group(2).lower()] = m.group(1)
+    for m in re.finditer(r"(?:private|protected|readonly)\s+(\w+)\s*:\s*(\w+)", clean):
+        fmap[m.group(1).lower()] = m.group(2)
+    for m in re.finditer(r"self\.(\w+)\s*=\s*(\w+)\s*\(", clean):
+        fmap[m.group(1).lower()] = m.group(2)
+    return fmap
+def _find_called_method(clean: str, class_name: str, fmap: dict[str, str]) -> Optional[str]:
+    fields = [f for f, t in fmap.items() if t.lower() == class_name.lower()]
+    for field in fields:
+        pat = rf"\bthis\.{re.escape(field)}\.(\w+)\s*\(|\b{re.escape(field)}\.(\w+)\s*\("
+        for m in re.finditer(pat, clean, re.IGNORECASE):
+            name = m.group(1) or m.group(2)
+            if name and name.lower() not in ("class", "new", "super", "get", "set"):
+                return name
+    for m in re.finditer(rf"\b{re.escape(class_name)}\.(\w+)\s*\(", clean, re.IGNORECASE):
+        name = m.group(1)
+        if name.lower() not in ("class", "new", "super"):
+            return name
+    return None
+def _has_code_evidence(clean: str, class_name: str) -> bool:
+    """True only when class_name has direct code evidence in pre-stripped content."""
+    esc = re.escape(class_name)
+    if re.search(rf"\b(?:private|protected)\s+{esc}\b", clean, re.IGNORECASE):
+        return True
+    if re.search(rf"[,(]\s*{esc}\s+\w+", clean, re.IGNORECASE):
+        return True
+    if re.search(rf":\s*{esc}\b", clean, re.IGNORECASE):
+        return True
+    if re.search(rf"\bnew\s+{esc}\s*\(", clean, re.IGNORECASE):
+        return True
+    if re.search(rf"\b{esc}\s*\(", clean):
+        return True
+    if re.search(rf"\b{esc}\b", clean, re.IGNORECASE):
+        non_import = re.search(
+            rf"^(?!\s*(?:import|require|from|//|#|\*)\b).*\b{esc}\b",
+            clean, re.IGNORECASE | re.MULTILINE,
+        )
+        if non_import:
+            return True
+    return False
+def _find_evidenced_ordered(
+    root: Path,
+    source_path: str,
+    candidates: list[str],
+) -> list[tuple[str, Optional[str]]]:
+    """Return (class_name, method_or_None) for candidates with direct code evidence,
+    ordered by their first appearance position in the source file."""
+    content = _read_safe(root, source_path)
+    if not content:
+        return []
+    lang = _detect_lang(source_path)
+    clean = _strip_comments(content, lang)
+    fmap = _build_field_map(clean)
+    positioned: list[tuple[int, str, Optional[str]]] = []
+    for cand_path in candidates:
+        class_name = Path(cand_path).stem
+        if not _has_code_evidence(clean, class_name):
+            continue
+        method = _find_called_method(clean, class_name, fmap)
+        m = re.search(rf"\b{re.escape(class_name)}\b", clean, re.IGNORECASE)
+        pos = m.start() if m else len(clean)
+        positioned.append((pos, class_name, method))
+    positioned.sort(key=lambda x: x[0])
+    return [(cls, meth) for _, cls, meth in positioned]
+def _detect_end_state(path: list[str]) -> str:
+    for step in path:
+        s = step.lower()
+        if any(kw in s for kw in _DB_KEYWORDS):
+            return "DB write"
+        if any(kw in s for kw in _EVENT_KEYWORDS):
+            return "event emitted"
+    return "HTTP response"
+def _step_label(class_name: str, method: Optional[str]) -> str:
+    return f"{class_name}.{method}" if method else class_name
+def _path_name(entry_class: str) -> str:
+    domain = re.sub(
+        r"(?:RestController|Controller|Resource|Handler|Api|Endpoint|Router|Servlet)$",
+        "", entry_class, flags=re.IGNORECASE,
+    )
+    return re.sub(r"(?<=[a-z])(?=[A-Z])", " ", domain).strip()
+def analyze_execution_paths(
+    changed_files: list[str],
+    all_paths: list[str],
+    root: Path,
+    classify_fn: Callable[[str], dict],
+    max_paths: int = 3,
+) -> list[dict]:
+    """Build ordered execution paths with runtime behavior signals.
+    Each path:
+    - One service per entry point (most evident, earliest-referenced)
+    - Each step requires direct code evidence
+    - runtime_notes populated from explicit code signals only (never inferred)
+    - Forward-only: Controller → Service → Repository
+    Returns list of: {name, entry_point, path, runtime_notes, end_state}
+    Returns [] when no verifiable path exists.
+    """
+    entry_files = [
+        f for f in changed_files
+        if classify_fn(f)["artifact_type"] in _ENTRY_ARTIFACT_TYPES
+    ]
+    if not entry_files:
+        return []
+    all_services = [p for p in all_paths if classify_fn(p)["artifact_type"] in _SERVICE_ARTIFACT_TYPES]
+    all_repos = [p for p in all_paths if classify_fn(p)["artifact_type"] in _REPO_ARTIFACT_TYPES]
+    result: list[dict] = []
+    for entry_path in entry_files[:max_paths]:
+        entry_class = Path(entry_path).stem
+        lang = _detect_lang(entry_path)
+        entry_content = _read_safe(root, entry_path)
+        entry_clean = _strip_comments(entry_content, lang) if entry_content else ""
+        entry_method = _find_entry_method(entry_clean) if entry_clean else None
+        entry_point_str = _step_label(entry_class, entry_method)
+        evidenced_svcs = _find_evidenced_ordered(root, entry_path, all_services)
+        if not evidenced_svcs:
+            continue
+        svc_class, svc_method = evidenced_svcs[0]
+        svc_label = _step_label(svc_class, svc_method)
+        svc_path = next((p for p in all_services if Path(p).stem == svc_class), None)
+        svc_content = _read_safe(root, svc_path) if svc_path else ""
+        svc_lang = _detect_lang(svc_path) if svc_path else "unknown"
+        # Service step — notes scoped to service file only
+        path_items: list[dict] = [
+            {"step": svc_label,
+             "notes": _collect_runtime_notes(svc_content, svc_lang) if svc_content else []},
+        ]
+        # Repository step — notes scoped to repo file only
+        if svc_path:
+            evidenced_repos = _find_evidenced_ordered(root, svc_path, all_repos)
+            if evidenced_repos:
+                repo_class, repo_method = evidenced_repos[0]
+                repo_label = _step_label(repo_class, repo_method)
+                repo_path = next((p for p in all_repos if Path(p).stem == repo_class), None)
+                repo_content = _read_safe(root, repo_path) if repo_path else ""
+                repo_lang = _detect_lang(repo_path) if repo_path else "unknown"
+                path_items.append(
+                    {"step": repo_label,
+                     "notes": _collect_runtime_notes(repo_content, repo_lang) if repo_content else []},
+                )
+        # Entry-point notes scoped to controller file
+        entry_notes = _collect_runtime_notes(entry_content, lang) if entry_content else []
+        result.append({
+            "name": _path_name(entry_class),
+            "entry_point": {"step": entry_point_str, "notes": entry_notes},
+            "path": path_items,
+            "end_state": _detect_end_state([item["step"] for item in path_items]),
+        })
+    return result

sourcecode/prepare_context.py CHANGED Viewed

@@ -351,6 +351,7 @@ class TaskOutput:
     test_coverage_risk: dict = field(default_factory=dict)
     review_hotspots: list[str] = field(default_factory=list)
     suggested_review_order: list[str] = field(default_factory=list)
+    execution_paths: list[dict] = field(default_factory=list)
 # ─────────────────────────────────────────────────────────────────────────────
@@ -874,6 +875,17 @@ class TaskContextBuilder:
                     _pr_suggested_review_order.append(_f)
                     _seen_order.add(_f)
+        # ── 6d. review-pr: execution paths ──────────────────────────────────
+        _execution_paths: list[dict] = []
+        if task_name == "review-pr" and _delta_files:
+            from sourcecode.flow_analyzer import analyze_execution_paths
+            _execution_paths = analyze_execution_paths(
+                changed_files=sorted(_delta_files),
+                all_paths=all_paths,
+                root=self.root,
+                classify_fn=self._classify_changed_file,
+            )
         # ── 6c. Symptom keyword boost + related notes (fix-bug + --symptom) ──
         symptom_keywords: list[str] = []
         related_notes: list[dict] = []
@@ -1104,6 +1116,7 @@ class TaskContextBuilder:
             test_coverage_risk=_pr_test_coverage_risk,
             review_hotspots=_pr_review_hotspots,
             suggested_review_order=_pr_suggested_review_order,
+            execution_paths=_execution_paths,
         )
     def render_prompt(self, output: TaskOutput) -> str:
@@ -1605,6 +1618,78 @@ class TaskContextBuilder:
         # Binaries, images, lock files — treat as noise (closed taxonomy: no unknown_*)
         return {"artifact_type": "ide_noise", "risk_areas": [], "impact_level": "noise", "is_noise": True, "module": module, "confidence": "low"}
+    def _classify_diff_severity(self, path: str, since: Optional[str]) -> str:
+        """Classify the semantic severity of a file's diff to gate BFS expansion.
+        Returns: 'trivial' | 'field_change' | 'api_change' | 'security_change' | 'unknown'
+        - trivial: only comments/whitespace changed — no BFS expansion seeded
+        - field_change: field/attribute declarations changed — hop-1 only, no hop-2+ frontier
+        - api_change: method signatures or class structure changed — full BFS
+        - security_change: auth/security keywords in changed lines — full BFS + security chain
+        - unknown: diff unreadable — treated as api_change (safe default)
+        """
+        import subprocess as _subprocess
+        import re as _re
+        try:
+            if since:
+                cmd = ["git", "diff", since, "HEAD", "--", path]
+            else:
+                cmd = ["git", "diff", "HEAD", "--", path]
+            result = _subprocess.run(
+                cmd, capture_output=True, text=True, timeout=5,
+                cwd=str(self.root), encoding="utf-8", errors="ignore",
+            )
+            diff_text = result.stdout
+        except Exception:
+            return "unknown"
+        if not diff_text.strip():
+            return "unknown"
+        changed_lines = [
+            line[1:] for line in diff_text.splitlines()
+            if line.startswith(("+", "-")) and not line.startswith(("+++", "---"))
+        ]
+        if not changed_lines:
+            return "trivial"
+        suffix = Path(path).suffix.lower()
+        if suffix in (".java", ".kt"):
+            _TRIVIAL  = _re.compile(r'^\s*(?://|/\*|\*)')
+            _FIELD    = _re.compile(r'^\s*(?:private|protected|public|final|static)\s+\w[\w<>, ]*\s+\w+\s*[;=]')
+            _API      = _re.compile(r'^\s*(?:public|protected)\s+\S.*\(')
+            # Exclude 'password', 'role', 'permission' — these are common field names
+            # in domain models and don't indicate auth logic changes. Keep mechanism
+            # keywords: jwt, auth (as class prefix), token, credential, encrypt, decrypt, oauth.
+            _SECURITY = _re.compile(r'\b(?:jwt|auth|token|credential|encrypt|decrypt|oauth|saml|ldap|principal|Security)\b')
+            _STRUCT   = _re.compile(r'^\s*(?:class|interface|enum|record|import|package)\s')
+        elif suffix == ".py":
+            _TRIVIAL  = _re.compile(r'^\s*#')
+            _FIELD    = _re.compile(r'^\s*(?:self\.\w+\s*=|\w+:\s*\w)')
+            _API      = _re.compile(r'^\s*def\s+\w')
+            _SECURITY = _re.compile(r'\b(?:jwt|auth|token|credential|encrypt|decrypt|oauth|saml|ldap|principal|security)\b', _re.IGNORECASE)
+            _STRUCT   = _re.compile(r'^\s*(?:class|import|from)\s')
+        elif suffix in (".ts", ".tsx", ".js", ".jsx", ".mjs"):
+            _TRIVIAL  = _re.compile(r'^\s*(?://|/\*|\*)')
+            _FIELD    = _re.compile(r'^\s*(?:private|readonly|public)?\s*\w+[?!]?\s*[=:]')
+            _API      = _re.compile(r'^\s*(?:(?:public|private|protected|async|export)\s+)*(?:function\s+\w|\w+\s*\()')
+            _SECURITY = _re.compile(r'\b(?:jwt|auth|token|credential|encrypt|decrypt|oauth|saml|ldap|principal|security)\b', _re.IGNORECASE)
+            _STRUCT   = _re.compile(r'^\s*(?:class|interface|import|export\s+(?:class|interface|type))\s')
+        else:
+            return "unknown"
+        if any(_SECURITY.search(line) for line in changed_lines):
+            return "security_change"
+        if any(_API.match(line) or _STRUCT.match(line) for line in changed_lines):
+            return "api_change"
+        if any(_FIELD.match(line) for line in changed_lines):
+            return "field_change"
+        if all(_TRIVIAL.match(line) or not line.strip() for line in changed_lines):
+            return "trivial"
+        return "field_change"  # safe default: treat unknown non-trivial as field-level
     def _scan_import_dependents(
         self,
         changed_paths: list[str],
@@ -1888,6 +1973,16 @@ class TaskContextBuilder:
             f: self._classify_changed_file(f) for f in changed_files
         }
+        # ── Step 1b: classify diff severity to gate BFS expansion ─────────────
+        # trivial   → no BFS seeding (comments/whitespace only)
+        # field_change → hop-1 BFS only, deps excluded from hop-2+ frontier
+        # api_change   → full BFS (method signature or class structure changed)
+        # security_change → full BFS + security chain allowed cross-module
+        # unknown   → treated as api_change (safe default)
+        diff_severities: dict[str, str] = {
+            f: self._classify_diff_severity(f, since) for f in changed_files
+        }
         # ── Step 2: build relevant_files from the changed set ─────────────────
         relevant: list[RelevantFile] = []
         why: dict[str, str] = {}
@@ -2004,9 +2099,12 @@ class TaskContextBuilder:
         ]
         _bfs_seen: set[str] = {rf.path for rf in relevant}
+        # trivial changes (comments/whitespace only) don't seed BFS — nothing structural
+        # to propagate, so excluding them prevents false expansion on cosmetic commits
         _bfs_frontier: list[str] = [
             f for f in changed_files
             if Path(f).suffix.lower() in _BFS_SCANNABLE
+            and diff_severities.get(f, "unknown") != "trivial"
         ]
         # (max results added from this hop, max_candidates scanned per seed)
@@ -2035,6 +2133,8 @@ class TaskContextBuilder:
             # collect (score, path) pairs for this hop to build the next frontier
             _hop_scored: list[tuple[float, str]] = []
+            # per-hop staging list — capped at _max_results before merging into _bfs_collected
+            _hop_bfs_staged: list[tuple[int, float, str, RelevantFile]] = []
             for _seed_path, _dep_paths in _hop_dep_map.items():
                 _seed_atype = (
@@ -2042,6 +2142,9 @@ class TaskContextBuilder:
                     if _seed_path in classifications
                     else self._classify_changed_file(_seed_path)["artifact_type"]
                 )
+                # diff severity for original changed files only (hop-1 seeds);
+                # hop-2+ seeds are dep files not in diff_severities → "unknown"
+                _seed_severity = diff_severities.get(_seed_path, "unknown")
                 for _dep_path in _dep_paths:
                     if _dep_path in _bfs_seen:
                         continue
@@ -2052,9 +2155,29 @@ class TaskContextBuilder:
                         continue
                     _dep_atype = _dep_cls["artifact_type"]
+                    _dep_module = _dep_cls["module"]
+                    # Cross-module gating: if dep lives in a different domain module,
+                    # only allow it if:
+                    #   hop-1 AND dep_atype is explicitly in seed's _EXPANSION_TARGETS
+                    # For hop-2+, cross-module deps are always excluded — transitives
+                    # must stay within the changed modules to avoid system-wide explosion.
+                    _is_cross_module = bool(_dep_module) and _dep_module not in affected_modules_set
+                    if _is_cross_module:
+                        _seed_expansion = _EXPANSION_TARGETS.get(_seed_atype, frozenset())
+                        # security_change seeds are allowed to cross into the security chain
+                        # even when their base expansion targets don't include those types
+                        if _seed_severity == "security_change":
+                            _seed_expansion = _seed_expansion | frozenset({"security", "spring_config", "config"})
+                        if _hop_num >= 2 or _dep_atype not in _seed_expansion:
+                            continue
                     _dep_score_base = _ARTIFACT_SCORE.get(_dep_atype, 0.45)
                     # score decays 30% per hop so transitives rank below direct dependents
-                    _dep_score = round(_dep_score_base * (0.70 ** _hop_num), 2)
+                    # cross-module deps get additional 40% penalty so same-module files
+                    # always rank higher in the per-hop cap
+                    _cross_module_factor = 0.60 if _is_cross_module else 1.0
+                    _dep_score = round(_dep_score_base * (0.70 ** _hop_num) * _cross_module_factor, 2)
                     _dep_role = _role_in_system(_dep_path, _dep_atype, _dep_path in ep_paths)
                     _why_str = (
@@ -2069,27 +2192,44 @@ class TaskContextBuilder:
                         f" ({_seed_atype}) | score: {_dep_score:.2f}"
                     )
                     why[_dep_path] = _why_str
-                    # Tests are consumers, not structural dependencies — exclude from import graph.
-                    # They remain in relevant_files but must not seed further BFS hops.
+                    # Tests import production code but are not structural dependencies —
+                    # exclude from graph, frontier, and bfs_collected entirely.
                     _is_test = _dep_atype == "test"
                     if not _is_test:
                         graph_edges.append({
                             "from": _seed_path, "to": _dep_path,
                             "edge_type": "import_dependency", "hop": _hop_num,
                         })
-                        _hop_scored.append((_dep_score, _dep_path))
-                    _bfs_collected.append((_hop_num, _dep_score, _dep_path, RelevantFile(
-                        path=_dep_path, role=_dep_role, score=_dep_score,
-                        reason=_reason, why=_why_str,
-                    )))
+                        # field_change seeds don't propagate to hop-2+ frontier:
+                        # a field-level change (getter, attribute) is collected at hop-1
+                        # but its callers are not recursively expanded further
+                        if _seed_severity != "field_change":
+                            _hop_scored.append((_dep_score, _dep_path))
+                        _hop_bfs_staged.append((_hop_num, _dep_score, _dep_path, RelevantFile(
+                            path=_dep_path, role=_dep_role, score=_dep_score,
+                            reason=_reason, why=_why_str,
+                        )))
+            # Per-hop cap: keep only the top-_max_results by score before merging.
+            # Prevents a single high-fanout seed (e.g. User.java imported by every
+            # controller) from flooding _bfs_collected and pushing out hop-2/3 results.
+            _hop_bfs_staged.sort(key=lambda x: (-x[1], x[2]))
+            _bfs_collected.extend(_hop_bfs_staged[:_max_results])
             # next frontier = top-N files by score from this hop
             _hop_scored.sort(key=lambda x: -x[0])
             _bfs_frontier = [p for _, p in _hop_scored[:_max_results]]
-        # merge into relevant: closer hops first, then higher score; cap total at 20
+        # merge into relevant: closer hops first, then higher score; cap total at 18
         _bfs_collected.sort(key=lambda x: (x[0], -x[1], x[2]))
-        relevant.extend(rf for _, _, _, rf in _bfs_collected[:20])
+        _bfs_cap = sum(budget[0] for budget in _BFS_HOP_BUDGET)  # 8+6+4 = 18
+        relevant.extend(rf for _, _, _, rf in _bfs_collected[:_bfs_cap])
+        # Truncation guard: flag excess expansion — gap message added in Step 6.
+        _EXPANSION_HARD_LIMIT = 40
+        _expansion_truncated = len(relevant) > _EXPANSION_HARD_LIMIT
+        if _expansion_truncated:
+            relevant = relevant[:_EXPANSION_HARD_LIMIT]
         # ── Step 3d: per-file impact scores, change_type, system_impact ─────────
         # Downstream fanout: count graph edges originating from each changed file
@@ -2263,6 +2403,11 @@ class TaskContextBuilder:
         analysis_gaps: list[str] = [
             f"Related file expansion: type-aware chain expansion + {_bfs_note} + module/directory heuristics",
         ]
+        if _expansion_truncated:
+            analysis_gaps.insert(0,
+                f"truncated_dependency_graph: expansion exceeded {_EXPANSION_HARD_LIMIT} nodes"
+                " — lower-priority files omitted. Narrow scope with --since <ref> for precision."
+            )
         if noise_count > 0 and meaningful > 0:
             analysis_gaps.append(
                 f"{noise_count} IDE/tooling file(s) in diff excluded from impact analysis"

{sourcecode-1.30.0.dist-info → sourcecode-1.30.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: sourcecode
-Version: 1.30.0
+Version: 1.30.2
 Summary: Deterministic codebase context for AI coding agents
 License:                                  Apache License
                                    Version 2.0, January 2004
@@ -221,7 +221,7 @@ Description-Content-Type: text/markdown
 **Compressed AI-ready context for Java/Spring enterprise codebases.**
-![Version](https://img.shields.io/badge/version-1.30.0-blue)
+![Version](https://img.shields.io/badge/version-1.30.2-blue)
 ![Python](https://img.shields.io/badge/python-3.10%2B-green)
 ---
@@ -255,7 +255,7 @@ pipx install sourcecode
 ```bash
 sourcecode version
-# sourcecode 1.30.0
+# sourcecode 1.30.2
 ```
 ---

{sourcecode-1.30.0.dist-info → sourcecode-1.30.2.dist-info}/RECORD RENAMED Viewed

@@ -1,10 +1,10 @@
-sourcecode/__init__.py,sha256=Bqhw95H9r5IFRlnJFDRt1uCsK_ahVHjggAAWdJ3d-5c,103
+sourcecode/__init__.py,sha256=ERxetwuKJX_1UzzbbdymfXL8AXwRFp03HJG6sY-iJO4,103
 sourcecode/adaptive_scanner.py,sha256=RTNExwWPXzjgLaRueT7UuxkPj5ZEToWjGbx1j0LSZ9E,10250
 sourcecode/architecture_analyzer.py,sha256=MyBa0Hf5HmkudZQDLKrjcWDKETXETXl0mQX1swtTwAA,39091
 sourcecode/architecture_summary.py,sha256=z34_6v7cSwy98cof2UVciGho7SCrZ93tiqMmq5WNzRQ,20405
 sourcecode/ast_extractor.py,sha256=XgrZg2DcWcUm9r87cRG3KGO7IK2TIL_N-CvhSbUmmh4,49901
 sourcecode/classifier.py,sha256=pYve2J1LqtYssU3lYLMDz18PT-CjN5c18QYE7R_IG1Q,7507
-sourcecode/cli.py,sha256=iWzo7u-wmWjj0GYAF54UpbicfpXt2OUxPRy44h2VaCI,80646
+sourcecode/cli.py,sha256=1qVMsC2swT-OtCK6XziIM0J4xKp8kcRhUzfOaHr7vRU,80743
 sourcecode/code_notes_analyzer.py,sha256=y1MJBnPZHYp4i6cQCXUb9ATIyifS_qMQWjw_8lPkpsU,9215
 sourcecode/confidence_analyzer.py,sha256=xw_Jv8pAd0wd8t2vvQlorw8Ih0rSF3YCoFS8K-_4aXg,15762
 sourcecode/context_scorer.py,sha256=QpChSpsmaAYz91rXA4Ue5xzQmNz_ZboZN09YOHScq1U,14679
@@ -17,10 +17,11 @@ sourcecode/doc_analyzer.py,sha256=afA4uJFwXZ_uR2l4J0pQwbeTkRkGmKdN9KhRVYePBUw,24
 sourcecode/entrypoint_classifier.py,sha256=gvKgl0f5T8ol1r4JMmkeqGHuZTfZJiOwFOWdc7EYwYw,4061
 sourcecode/env_analyzer.py,sha256=GxCidahAAIptTdDFIlVB6URd4HBnBlIX_SqUov3MBRQ,22076
 sourcecode/file_classifier.py,sha256=48ly5Z6exkzBy8lNy1AkdP4-oJqIA1zT3LZfffuTyDo,11572
+sourcecode/flow_analyzer.py,sha256=VQDrItg3NBqOOD8PxHXyntXQnPweUuUn6JtOY8lNWys,12841
 sourcecode/git_analyzer.py,sha256=_pCg2V4d2aa17k9hayTzpexAj8syvyk4y9NYNvvgOAI,12802
 sourcecode/graph_analyzer.py,sha256=iUK-7pSV-cvGqqD2hENdYmhnm0wcXFEyK-xnu5ul8OU,62515
 sourcecode/metrics_analyzer.py,sha256=m0ENgtqKeBL17kUIK3fmGkgo7UfXBNHxCMj0H_Y5K7c,22750
-sourcecode/prepare_context.py,sha256=UxAwXHLZC61WFmYWwp-LWRUXnH6CbaX_lsyn6W7ok4o,121062
+sourcecode/prepare_context.py,sha256=ELrCIIcttip4B3y9aQZdMPqIgzaEJR0evDdG8QYTBLc,129623
 sourcecode/progress.py,sha256=qn30sWaHOkjTgXsSBmiPkz7Rsbwc5oSlIe6JNEMYp_k,3149
 sourcecode/ranking_engine.py,sha256=virVglafZufioHpZpwktjMvUiL0TZELWQCQnQNV8dFo,9360
 sourcecode/redactor.py,sha256=xuGcadGEHaPw4qZXlMDvzMCsr4VOkdp3oBQptHyJk8c,2884
@@ -61,8 +62,8 @@ sourcecode/telemetry/consent.py,sha256=wLMvGNJeSSyZoNkQXpoUioY6mMv4Qdvuw7S9jAEWn
 sourcecode/telemetry/events.py,sha256=oEvvulfsv5GIDWG2174gSS6tNB95w38AIYiYeifGKlE,2294
 sourcecode/telemetry/filters.py,sha256=Asa71oRl7q3Wt_FMwuufIZJFzSYdgRNKS8LHCIyFeYE,4805
 sourcecode/telemetry/transport.py,sha256=KJeIPCPWMdmbCP3ySGs2iUlia34U6vWne2dZsUezesw,1560
-sourcecode-1.30.0.dist-info/METADATA,sha256=RDWe-iF73ttF7ZeXsUoUp2kQQUGB8lnxCWHI7dZQroM,23417
-sourcecode-1.30.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
-sourcecode-1.30.0.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
-sourcecode-1.30.0.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
-sourcecode-1.30.0.dist-info/RECORD,,
+sourcecode-1.30.2.dist-info/METADATA,sha256=3bLQsn6BmYa9Rum0jjejw2627bPdOMaYxbqI2XMyOLY,23417
+sourcecode-1.30.2.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
+sourcecode-1.30.2.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
+sourcecode-1.30.2.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
+sourcecode-1.30.2.dist-info/RECORD,,

{sourcecode-1.30.0.dist-info → sourcecode-1.30.2.dist-info}/WHEEL RENAMED Viewed

File without changes

{sourcecode-1.30.0.dist-info → sourcecode-1.30.2.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{sourcecode-1.30.0.dist-info → sourcecode-1.30.2.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

sourcecode 1.30.0__py3-none-any.whl → 1.30.2__py3-none-any.whl

sourcecode 1.30.0py3-none-any.whl → 1.30.2py3-none-any.whl