PyPI - sourcecode - Versions diffs - 1.30.4__py3-none-any.whl → 1.30.6__py3-none-any.whl - Mend

sourcecode 1.30.4py3-none-any.whl → 1.30.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

sourcecode/__init__.py CHANGED Viewed

@@ -1,3 +1,3 @@
 """sourcecode — Deterministic codebase context maps for AI coding agents."""
-__version__ = "1.30.4"
+__version__ = "1.30.6"

sourcecode/cli.py CHANGED Viewed

@@ -1868,10 +1868,18 @@ def prepare_context_cmd(
             out["suggested_review_order"] = output.suggested_review_order
         if output.execution_paths:
             out["execution_paths"] = output.execution_paths
+        if output.behavioral_impact:
+            out["behavioral_impact"] = output.behavioral_impact
         if output.impact_summary:
             out["impact_summary"] = output.impact_summary
         if output.why_these_files:
             out["reasoning"] = output.why_these_files
+        # git-first scope metadata
+        out["scope"] = {
+            "source": output.scope_source or "git_diff",
+            "files": output.scope_files,
+            "repo_root": output.repo_root or "",
+        }
     if output.limitations:
         out["limitations"] = output.limitations
     if output.symptom:

sourcecode/flow_analyzer.py CHANGED Viewed

@@ -177,6 +177,49 @@ def _has_code_evidence(clean: str, class_name: str) -> bool:
     return False
+_EVIDENCE_PRIORITY: dict[str, int] = {
+    "none": 0, "heuristic_only": 1, "direct_call": 2, "direct_injection": 3,
+}
+_EVIDENCE_STRONG = frozenset({"direct_call", "direct_injection"})
+def _classify_evidence_type(clean: str, class_name: str) -> str:
+    """Return how class_name is referenced in pre-stripped content."""
+    esc = re.escape(class_name)
+    if re.search(rf"\b(?:private|protected)\s+{esc}\b", clean, re.IGNORECASE):
+        return "direct_injection"
+    if re.search(rf"[,(]\s*{esc}\s+\w+", clean, re.IGNORECASE):
+        return "direct_injection"
+    if re.search(rf":\s*{esc}\b", clean, re.IGNORECASE):
+        return "direct_injection"
+    if re.search(rf"\bnew\s+{esc}\s*\(", clean, re.IGNORECASE):
+        return "direct_call"
+    if re.search(rf"\b{esc}\s*\(", clean):
+        return "direct_call"
+    non_import = re.search(
+        rf"^(?!\s*(?:import|require|from|//|#|\*)\b).*\b{esc}\b",
+        clean, re.IGNORECASE | re.MULTILINE,
+    )
+    if non_import:
+        return "heuristic_only"
+    return "none"
+def _worst_evidence(levels: list[str]) -> str:
+    return min(levels, key=lambda x: _EVIDENCE_PRIORITY.get(x, 0)) if levels else "none"
+def _compute_confidence(evidence_level: str, trace_len: int) -> str:
+    if evidence_level not in _EVIDENCE_STRONG:
+        return "low"
+    return "high" if trace_len >= 2 else "medium"
+def _build_trace_step(source_class: str, target_class: str, evidence_type: str) -> str:
+    verb = "injects" if evidence_type == "direct_injection" else "calls"
+    return f"{source_class} {verb} {target_class}"
 def _find_evidenced_ordered(
     root: Path,
     source_path: str,
@@ -307,4 +350,296 @@ def analyze_execution_paths(
             "end_state": _detect_end_state([item["step"] for item in path_items]),
         })
+# ── Behavioral impact helpers ─────────────────────────────────────────────────
+def _domain_from_class(class_name: str) -> str:
+    """Extract human-readable domain noun from a class name."""
+    stripped = re.sub(
+        r"(?i)(?:repository|repo|dao|mapper|store|service|manager|handler|helper|"
+        r"impl|controller|api|resource|endpoint|facade)$",
+        "", class_name,
+    )
+    return re.sub(r"(?<=[a-z])(?=[A-Z])", " ", stripped).strip().lower()
+def _impact_item(statement: str, support: str, certainty: str) -> dict:
+    return {"statement": statement, "support": support, "certainty": certainty}
+def _impact_descriptions(
+    changed_class: str,
+    changed_type: str,
+    end_state: str,
+    ctrl_clean: str,
+    evidence_level: str,
+) -> list[dict]:
+    domain = _domain_from_class(changed_class)
+    certainty = "medium" if evidence_level in _EVIDENCE_STRONG else "low"
+    items: list[dict] = []
+    if changed_type in _REPO_ARTIFACT_TYPES:
+        items.append(_impact_item(
+            f"{domain} persistence affected" if domain else "persistence affected",
+            f"{changed_class} is a repository in path",
+            certainty,
+        ))
+    elif changed_type in _SERVICE_ARTIFACT_TYPES:
+        if end_state == "DB write":
+            items.append(_impact_item(
+                f"{domain} persistence affected" if domain else "persistence affected",
+                f"{changed_class} delegates to repository with DB write",
+                certainty,
+            ))
+        else:
+            items.append(_impact_item(
+                f"{domain} behavior may change" if domain else "behavior may change",
+                f"{changed_class} is a service in path",
+                certainty,
+            ))
+    else:
+        items.append(_impact_item(
+            f"{domain} behavior may change" if domain else "behavior may change",
+            f"{changed_class} is in path",
+            certainty,
+        ))
+    if re.search(r"@PreAuthorize|@Secured|@RolesAllowed|hasRole\(|isAuthenticated", ctrl_clean, re.IGNORECASE):
+        items.append(_impact_item(
+            "authorization check present on entry point",
+            "security annotation detected on controller",
+            "high",
+        ))
+    if re.search(r"@Transactional\b", ctrl_clean):
+        items.append(_impact_item(
+            "transactional boundary in path",
+            "@Transactional detected on entry point",
+            "high",
+        ))
+    return items[:3]
+def _impact_descriptions_for_controller(
+    affected_path: list[str],
+    end_state: str,
+    ctrl_clean: str,
+    evidence_level: str,
+) -> list[dict]:
+    certainty = "medium" if evidence_level in _EVIDENCE_STRONG else "low"
+    items: list[dict] = []
+    if end_state == "DB write":
+        domain = ""
+        for step in reversed(affected_path):
+            base = step.split(".")[0]
+            d = _domain_from_class(base)
+            if d:
+                domain = d
+                break
+        items.append(_impact_item(
+            f"{domain} persistence affected" if domain else "data persistence affected",
+            "repository with DB write detected in path",
+            certainty,
+        ))
+    else:
+        items.append(_impact_item(
+            "request handler behavior may change",
+            "controller entry point modified",
+            certainty,
+        ))
+    if re.search(r"@PreAuthorize|@Secured|@RolesAllowed|hasRole\(|isAuthenticated", ctrl_clean, re.IGNORECASE):
+        items.append(_impact_item(
+            "authorization check present on entry point",
+            "security annotation detected on controller",
+            "high",
+        ))
+    if re.search(r"@Transactional\b", ctrl_clean):
+        items.append(_impact_item(
+            "transactional boundary in path",
+            "@Transactional detected on controller",
+            "high",
+        ))
+    return items[:3]
+def analyze_behavioral_impact(
+    changed_files: list[str],
+    all_paths: list[str],
+    root: Path,
+    classify_fn: Callable[[str], dict],
+    max_impacts: int = 3,
+) -> list[dict]:
+    """Build behavioral impact entries for PR review.
+    For changed controllers: forward traversal → service → repository.
+    For changed services/repos/domain: reverse lookup → find callers → build causal path.
+    Each entry: {entry_point, affected_path, impact, end_state}
+    All paths require direct code evidence — no naming/module inference.
+    Returns [] when no verifiable causal path exists.
+    """
+    entry_changed = [f for f in changed_files if classify_fn(f)["artifact_type"] in _ENTRY_ARTIFACT_TYPES]
+    non_entry_changed = [f for f in changed_files if classify_fn(f)["artifact_type"] not in _ENTRY_ARTIFACT_TYPES]
+    all_entries = [p for p in all_paths if classify_fn(p)["artifact_type"] in _ENTRY_ARTIFACT_TYPES]
+    all_services = [p for p in all_paths if classify_fn(p)["artifact_type"] in _SERVICE_ARTIFACT_TYPES]
+    all_repos = [p for p in all_paths if classify_fn(p)["artifact_type"] in _REPO_ARTIFACT_TYPES]
+    result: list[dict] = []
+    seen_entries: set[str] = set()
+    # Case 1: changed controllers — forward traversal
+    for entry_path in entry_changed:
+        if len(result) >= max_impacts:
+            break
+        entry_class = Path(entry_path).stem
+        if entry_class in seen_entries:
+            continue
+        lang = _detect_lang(entry_path)
+        ctrl_content = _read_safe(root, entry_path)
+        if not ctrl_content:
+            continue
+        ctrl_clean = _strip_comments(ctrl_content, lang)
+        entry_method = _find_entry_method(ctrl_clean)
+        entry_str = _step_label(entry_class, entry_method)
+        evidenced_svcs = _find_evidenced_ordered(root, entry_path, all_services)
+        if not evidenced_svcs:
+            continue
+        svc_class, svc_method = evidenced_svcs[0]
+        svc_evidence = _classify_evidence_type(ctrl_clean, svc_class)
+        affected_path = [_step_label(svc_class, svc_method)]
+        trace = [_build_trace_step(entry_class, svc_class, svc_evidence)]
+        evidence_levels = [svc_evidence]
+        svc_path = next((p for p in all_services if Path(p).stem == svc_class), None)
+        if svc_path:
+            svc_content_raw = _read_safe(root, svc_path)
+            if svc_content_raw:
+                svc_clean_raw = _strip_comments(svc_content_raw, _detect_lang(svc_path))
+                evidenced_repos = _find_evidenced_ordered(root, svc_path, all_repos)
+                if evidenced_repos:
+                    repo_class, repo_method = evidenced_repos[0]
+                    repo_evidence = _classify_evidence_type(svc_clean_raw, repo_class)
+                    affected_path.append(_step_label(repo_class, repo_method))
+                    trace.append(_build_trace_step(svc_class, repo_class, repo_evidence))
+                    evidence_levels.append(repo_evidence)
+        end_state = _detect_end_state(affected_path)
+        evidence_level = _worst_evidence(evidence_levels)
+        confidence = _compute_confidence(evidence_level, len(trace))
+        seen_entries.add(entry_class)
+        result.append({
+            "entry_point": entry_str,
+            "affected_path": affected_path,
+            "impact": _impact_descriptions_for_controller(affected_path, end_state, ctrl_clean, evidence_level),
+            "end_state": end_state,
+            "confidence": confidence,
+            "evidence_level": evidence_level,
+            "trace": trace,
+        })
+    # Case 2: changed non-controllers — reverse lookup
+    for changed_path in non_entry_changed:
+        if len(result) >= max_impacts:
+            break
+        changed_class = Path(changed_path).stem
+        changed_type = classify_fn(changed_path)["artifact_type"]
+        for ctrl_path in all_entries:
+            if len(result) >= max_impacts:
+                break
+            ctrl_class = Path(ctrl_path).stem
+            if ctrl_class in seen_entries:
+                continue
+            ctrl_content = _read_safe(root, ctrl_path)
+            if not ctrl_content:
+                continue
+            ctrl_lang = _detect_lang(ctrl_path)
+            ctrl_clean = _strip_comments(ctrl_content, ctrl_lang)
+            affected_path: list[str] = []
+            trace: list[str] = []
+            evidence_levels: list[str] = []
+            if _has_code_evidence(ctrl_clean, changed_class):
+                # Direct: controller → changed class
+                ctrl_to_changed = _classify_evidence_type(ctrl_clean, changed_class)
+                fmap = _build_field_map(ctrl_clean)
+                method = _find_called_method(ctrl_clean, changed_class, fmap)
+                affected_path.append(_step_label(changed_class, method))
+                trace.append(_build_trace_step(ctrl_class, changed_class, ctrl_to_changed))
+                evidence_levels.append(ctrl_to_changed)
+                if changed_type in _SERVICE_ARTIFACT_TYPES:
+                    changed_content = _read_safe(root, changed_path)
+                    changed_clean = _strip_comments(changed_content, _detect_lang(changed_path)) if changed_content else ""
+                    evidenced_repos = _find_evidenced_ordered(root, changed_path, all_repos)
+                    if evidenced_repos:
+                        rclass, rmethod = evidenced_repos[0]
+                        repo_evidence = _classify_evidence_type(changed_clean, rclass)
+                        affected_path.append(_step_label(rclass, rmethod))
+                        trace.append(_build_trace_step(changed_class, rclass, repo_evidence))
+                        evidence_levels.append(repo_evidence)
+            else:
+                # Indirect: controller → mediating service → changed class
+                for svc_class, svc_method in _find_evidenced_ordered(root, ctrl_path, all_services):
+                    svc_p = next((p for p in all_services if Path(p).stem == svc_class), None)
+                    if not svc_p:
+                        continue
+                    svc_content = _read_safe(root, svc_p)
+                    if not svc_content:
+                        continue
+                    svc_lang = _detect_lang(svc_p)
+                    svc_clean = _strip_comments(svc_content, svc_lang)
+                    if not _has_code_evidence(svc_clean, changed_class):
+                        continue
+                    ctrl_to_svc = _classify_evidence_type(ctrl_clean, svc_class)
+                    svc_to_changed = _classify_evidence_type(svc_clean, changed_class)
+                    fmap = _build_field_map(svc_clean)
+                    method = _find_called_method(svc_clean, changed_class, fmap)
+                    affected_path = [_step_label(svc_class, svc_method), _step_label(changed_class, method)]
+                    trace = [
+                        _build_trace_step(ctrl_class, svc_class, ctrl_to_svc),
+                        _build_trace_step(svc_class, changed_class, svc_to_changed),
+                    ]
+                    evidence_levels = [ctrl_to_svc, svc_to_changed]
+                    if changed_type in _SERVICE_ARTIFACT_TYPES:
+                        changed_content = _read_safe(root, changed_path)
+                        changed_clean = _strip_comments(changed_content, _detect_lang(changed_path)) if changed_content else ""
+                        evidenced_repos = _find_evidenced_ordered(root, changed_path, all_repos)
+                        if evidenced_repos:
+                            rclass, rmethod = evidenced_repos[0]
+                            repo_evidence = _classify_evidence_type(changed_clean, rclass)
+                            affected_path.append(_step_label(rclass, rmethod))
+                            trace.append(_build_trace_step(changed_class, rclass, repo_evidence))
+                            evidence_levels.append(repo_evidence)
+                    break
+            if not affected_path:
+                continue
+            entry_method = _find_entry_method(ctrl_clean)
+            end_state = _detect_end_state(affected_path)
+            evidence_level = _worst_evidence(evidence_levels)
+            confidence = _compute_confidence(evidence_level, len(trace))
+            seen_entries.add(ctrl_class)
+            result.append({
+                "entry_point": _step_label(ctrl_class, entry_method),
+                "affected_path": affected_path,
+                "impact": _impact_descriptions(changed_class, changed_type, end_state, ctrl_clean, evidence_level),
+                "end_state": end_state,
+                "confidence": confidence,
+                "evidence_level": evidence_level,
+                "trace": trace,
+            })
     return result

sourcecode/prepare_context.py CHANGED Viewed

@@ -352,6 +352,11 @@ class TaskOutput:
     review_hotspots: list[str] = field(default_factory=list)
     suggested_review_order: list[str] = field(default_factory=list)
     execution_paths: list[dict] = field(default_factory=list)
+    behavioral_impact: list[dict] = field(default_factory=list)
+    # git-first scope metadata (review-pr only)
+    scope_source: Optional[str] = None   # "git_diff" | "staged" | "untracked" | "full_scan_fallback"
+    scope_files: list[str] = field(default_factory=list)
+    repo_root: Optional[str] = None
 # ─────────────────────────────────────────────────────────────────────────────
@@ -439,23 +444,81 @@ class TaskContextBuilder:
             )
         spec = TASKS[task_name]
+        # ── 0. review-pr: git-first scope resolution (before any filesystem scan) ─
+        _pr_git_root: Optional[Path] = None
+        _pr_scope_files: Optional[list[str]] = None
+        _pr_scope_source: str = "full_scan_fallback"
+        if task_name == "review-pr":
+            _pr_git_root = self._resolve_git_root()
+            if _pr_git_root is None:
+                return TaskOutput(
+                    task="review-pr", goal=spec.goal,
+                    project_summary=None, architecture_summary=None,
+                    relevant_files=[], suspected_areas=[],
+                    improvement_opportunities=[], test_gaps=[],
+                    key_dependencies=[], code_notes_summary=None,
+                    limitations=[], confidence="low",
+                    error_code="no_git_repo",
+                    error_message="review-pr requires a git repository.",
+                    ci_decision="no_git_repo",
+                    scope_source="full_scan_fallback",
+                    repo_root=str(self.root),
+                )
+            _raw_scope, _pr_scope_source = self._get_pr_scope_files(since=since)
+            if _raw_scope is None:
+                # Explicit --since ref is invalid
+                _avail_pr, _sug_pr = self._get_available_refs(since or "")
+                _pr_hints: list[str] = []
+                if _sug_pr:
+                    _pr_hints.append(f"Did you mean '{_sug_pr}'?")
+                if _avail_pr:
+                    _pr_hints.append(f"Available refs: {', '.join(_avail_pr[:8])}")
+                return TaskOutput(
+                    task="review-pr", goal=spec.goal,
+                    project_summary=None, architecture_summary=None,
+                    relevant_files=[], suspected_areas=[],
+                    improvement_opportunities=[], test_gaps=[],
+                    key_dependencies=[], code_notes_summary=None,
+                    limitations=[], confidence="low",
+                    since=since,
+                    error_code="git_ref_not_found",
+                    error_message=f"Base ref '{since}' not found in this repository.",
+                    error_hints=_pr_hints,
+                    gaps=[f"Cannot compute PR diff: git ref '{since}' not found."] + _pr_hints,
+                    ci_decision="git_ref_error",
+                    scope_source="git_diff",
+                    repo_root=str(_pr_git_root),
+                )
+            _pr_scope_files = _raw_scope
+            # _pr_scope_files == [] means no diff; handled in step 5d
+        _use_git_first = task_name == "review-pr"
         # ── 1. Scan ────────────────────────────────────────────────────────
         from sourcecode.adaptive_scanner import AdaptiveScanner
         from sourcecode.repo_classifier import RepoClassifier
         from sourcecode.tree_utils import flatten_file_tree
-        _topology = RepoClassifier().classify(self.root)
-        # Shallow pre-scan to detect Java manifests before choosing depth.
         from sourcecode.scanner import FileScanner as _FileScanner
-        _pre = _FileScanner(self.root, max_depth=1)
-        _pre_manifests = _pre.find_manifests()
+        _pre_manifests = _FileScanner(self.root, max_depth=1).find_manifests()
         _java_names = {"pom.xml", "build.gradle", "build.gradle.kts"}
         _is_java = any(Path(m).name in _java_names for m in _pre_manifests)
-        _base_depth = 12 if _is_java else 6
-        scanner = AdaptiveScanner(self.root, topology=_topology, base_depth=_base_depth)
-        file_tree = scanner.scan_tree()
-        manifests = scanner.find_manifests()
-        all_paths = [p.replace("\\", "/") for p in flatten_file_tree(file_tree)]
+        manifests = _pre_manifests
+        if _use_git_first:
+            # Git-first: no full filesystem traversal — skip AdaptiveScanner.
+            # all_paths = scope files + siblings in same directories (bounded context
+            # for behavioral_impact reverse lookups without scanning the whole repo).
+            file_tree: dict = {}
+            all_paths = self._expand_scope_for_analysis(_pr_scope_files or [])
+        else:
+            _topology = RepoClassifier().classify(self.root)
+            _base_depth = 12 if _is_java else 6
+            scanner = AdaptiveScanner(self.root, topology=_topology, base_depth=_base_depth)
+            file_tree = scanner.scan_tree()
+            manifests = scanner.find_manifests()
+            all_paths = [p.replace("\\", "/") for p in flatten_file_tree(file_tree)]
         # Warn when Java project has no Mapper.xml — suggests files below scan depth.
         _mybatis_warning: dict | None = None
@@ -486,25 +549,26 @@ class TaskContextBuilder:
         else:
             stacks, entry_points, _ = detector.detect(self.root, file_tree, _detection_manifests)
-        # Iterate workspaces to collect per-workspace stacks and entry points —
-        # same approach as the main CLI (cli.py lines 971-1041).
-        for workspace in workspace_analysis.workspaces:
-            ws_root = self.root / workspace.path
-            if not ws_root.exists() or not ws_root.is_dir():
-                continue
-            _ws_topology = RepoClassifier().classify(ws_root)
-            _ws_scanner = AdaptiveScanner(ws_root, topology=_ws_topology, base_depth=6)
-            _ws_tree = _ws_scanner.scan_tree()
-            _ws_manifests = _ws_scanner.find_manifests()
-            _ws_stacks, _ws_eps, _ = detector.detect(ws_root, _ws_tree, _ws_manifests)
-            stacks.extend(
-                _replace(s, root=workspace.path, workspace=workspace.path, primary=False)
-                for s in _ws_stacks
-            )
-            entry_points.extend(
-                _replace(ep, path=f"{workspace.path}/{ep.path}")
-                for ep in _ws_eps
-            )
+        if not _use_git_first:
+            # Workspace sub-scans: each runs AdaptiveScanner on a workspace root.
+            # Skipped for review-pr — would re-trigger full traversal per workspace.
+            for workspace in workspace_analysis.workspaces:
+                ws_root = self.root / workspace.path
+                if not ws_root.exists() or not ws_root.is_dir():
+                    continue
+                _ws_topology = RepoClassifier().classify(ws_root)
+                _ws_scanner = AdaptiveScanner(ws_root, topology=_ws_topology, base_depth=6)
+                _ws_tree = _ws_scanner.scan_tree()
+                _ws_manifests = _ws_scanner.find_manifests()
+                _ws_stacks, _ws_eps, _ = detector.detect(ws_root, _ws_tree, _ws_manifests)
+                stacks.extend(
+                    _replace(s, root=workspace.path, workspace=workspace.path, primary=False)
+                    for s in _ws_stacks
+                )
+                entry_points.extend(
+                    _replace(ep, path=f"{workspace.path}/{ep.path}")
+                    for ep in _ws_eps
+                )
         stacks, project_type = detector.classify_results(
             file_tree, stacks, entry_points,
@@ -668,49 +732,10 @@ class TaskContextBuilder:
             elif _delta_raw:
                 _delta_files = set(_delta_raw)
-        # ── 5d. review-pr: git-first gate ──────────────────────────────────────
+        # ── 5d. review-pr: set _delta_files from pre-resolved git scope ──────────
+        # No-git and invalid-ref cases were already handled in step 0 (early returns).
         if task_name == "review-pr":
-            if not self._is_git_repo():
-                return TaskOutput(
-                    task="review-pr", goal=spec.goal,
-                    project_summary=None, architecture_summary=None,
-                    relevant_files=[], suspected_areas=[],
-                    improvement_opportunities=[], test_gaps=[],
-                    key_dependencies=[], code_notes_summary=None,
-                    limitations=[], confidence="low",
-                    error_code="no_git_repo",
-                    error_message="review-pr requires a git repository.",
-                    ci_decision="no_git_repo",
-                )
-            if since is None:
-                # review-pr with no --since: check only uncommitted changes.
-                # _get_git_changed_files(since=None) defaults to HEAD~1 which
-                # returns the last *committed* diff — a false positive here.
-                _pr_raw: Optional[list[str]] = self._get_uncommitted_changed_files()
-            else:
-                _pr_raw = self._get_git_changed_files(since=since)
-            if _pr_raw is None:
-                _avail_pr, _sug_pr = self._get_available_refs(since or "")
-                _pr_hints: list[str] = []
-                if _sug_pr:
-                    _pr_hints.append(f"Did you mean '{_sug_pr}'?")
-                if _avail_pr:
-                    _pr_hints.append(f"Available refs: {', '.join(_avail_pr[:8])}")
-                return TaskOutput(
-                    task="review-pr", goal=spec.goal,
-                    project_summary=None, architecture_summary=None,
-                    relevant_files=[], suspected_areas=[],
-                    improvement_opportunities=[], test_gaps=[],
-                    key_dependencies=[], code_notes_summary=None,
-                    limitations=[], confidence="low",
-                    since=since,
-                    error_code="git_ref_not_found",
-                    error_message=f"Base ref '{since}' not found in this repository.",
-                    error_hints=_pr_hints,
-                    gaps=[f"Cannot compute PR diff: git ref '{since}' not found."] + _pr_hints,
-                    ci_decision="git_ref_error",
-                )
-            if not _pr_raw:
+            if not _pr_scope_files:
                 _no_diff_hint = "review-pr requires changed files or --since <ref>."
                 return TaskOutput(
                     task="review-pr", goal=spec.goal,
@@ -723,8 +748,11 @@ class TaskContextBuilder:
                     error_message=f"No PR diff detected. {_no_diff_hint}",
                     gaps=[f"No PR diff detected. {_no_diff_hint}"],
                     ci_decision="no_changes",
+                    scope_source=_pr_scope_source,
+                    scope_files=[],
+                    repo_root=str(_pr_git_root),
                 )
-            _delta_files = set(_pr_raw)
+            _delta_files = set(_pr_scope_files)
         # ── 5c. review-pr suspected_areas (needs git uncommitted_files) ──────
         if task_name == "review-pr" and spec.enable_code_notes:
@@ -875,12 +903,20 @@ class TaskContextBuilder:
                     _pr_suggested_review_order.append(_f)
                     _seen_order.add(_f)
-        # ── 6d. review-pr: execution paths ──────────────────────────────────
+        # ── 6d. review-pr: execution paths + behavioral impact ──────────────
         _execution_paths: list[dict] = []
+        _behavioral_impact: list[dict] = []
         if task_name == "review-pr" and _delta_files:
-            from sourcecode.flow_analyzer import analyze_execution_paths
+            from sourcecode.flow_analyzer import analyze_execution_paths, analyze_behavioral_impact
+            _changed_sorted = sorted(_delta_files)
             _execution_paths = analyze_execution_paths(
-                changed_files=sorted(_delta_files),
+                changed_files=_changed_sorted,
+                all_paths=all_paths,
+                root=self.root,
+                classify_fn=self._classify_changed_file,
+            )
+            _behavioral_impact = analyze_behavioral_impact(
+                changed_files=_changed_sorted,
                 all_paths=all_paths,
                 root=self.root,
                 classify_fn=self._classify_changed_file,
@@ -1117,6 +1153,11 @@ class TaskContextBuilder:
             review_hotspots=_pr_review_hotspots,
             suggested_review_order=_pr_suggested_review_order,
             execution_paths=_execution_paths,
+            behavioral_impact=_behavioral_impact,
+            # git-first scope metadata
+            scope_source=_pr_scope_source if task_name == "review-pr" else None,
+            scope_files=list(_pr_scope_files) if task_name == "review-pr" and _pr_scope_files else [],
+            repo_root=str(_pr_git_root) if task_name == "review-pr" and _pr_git_root else None,
         )
     def render_prompt(self, output: TaskOutput) -> str:
@@ -1408,6 +1449,122 @@ class TaskContextBuilder:
     def _is_source(self, path: str) -> bool:
         return Path(path).suffix.lower() in _SOURCE_EXTENSIONS
+    def _resolve_git_root(self) -> Optional[Path]:
+        """Return the absolute git repo root, or None if not in a git repo."""
+        import subprocess
+        try:
+            r = subprocess.run(
+                ["git", "rev-parse", "--show-toplevel"],
+                cwd=str(self.root),
+                capture_output=True, text=True,
+                encoding="utf-8", errors="replace", timeout=5,
+            )
+            if r.returncode == 0 and r.stdout.strip():
+                return Path(r.stdout.strip())
+        except (subprocess.TimeoutExpired, FileNotFoundError):
+            pass
+        return None
+    def _get_pr_scope_files(self, since: Optional[str] = None) -> tuple[Optional[list[str]], str]:
+        """Return (files, scope_source) for review-pr scope resolution.
+        Returns (None, _) only when since is explicitly provided but the ref is invalid.
+        Returns ([], _) when git is available but no changes are found.
+        scope_source is a comma-separated list of active sources (git_diff, staged, untracked).
+        """
+        import subprocess
+        def _run(*cmd: str) -> Optional[list[str]]:
+            try:
+                r = subprocess.run(
+                    list(cmd), cwd=str(self.root),
+                    capture_output=True, text=True,
+                    encoding="utf-8", errors="replace", timeout=10,
+                )
+                return (
+                    [ln.strip() for ln in (r.stdout or "").splitlines() if ln.strip()]
+                    if r.returncode == 0 else None
+                )
+            except (subprocess.TimeoutExpired, FileNotFoundError):
+                return None
+        files: set[str] = set()
+        sources: list[str] = []
+        if since is not None:
+            committed = _run("git", "diff", "--name-only", "--relative", since, "HEAD")
+            if committed is None:
+                return None, "git_diff"  # invalid ref — hard error
+            if committed:
+                files.update(committed)
+                sources.append("git_diff")
+        else:
+            # Working tree vs HEAD~1: covers last commit + all uncommitted changes
+            h1_diff = _run("git", "diff", "--name-only", "--relative", "HEAD~1")
+            if h1_diff:
+                files.update(h1_diff)
+                sources.append("git_diff")
+            # Working tree vs HEAD: uncommitted only (may add new unstaged files)
+            h_diff = _run("git", "diff", "--name-only", "--relative", "HEAD")
+            if h_diff:
+                new = set(h_diff) - files
+                if new:
+                    files.update(new)
+                    if "git_diff" not in sources:
+                        sources.append("git_diff")
+            # Staged changes not yet committed
+            staged = _run("git", "diff", "--name-only", "--cached", "--relative")
+            if staged:
+                new = set(staged) - files
+                if new:
+                    files.update(new)
+                    sources.append("staged")
+        # Untracked files (both cases)
+        status = _run("git", "status", "--porcelain", "--short")
+        if status:
+            for line in status:
+                if line.startswith("??") and len(line) > 3:
+                    f = line[3:].strip()
+                    if f and not f.endswith("/") and f not in files:
+                        files.add(f)
+                        if "untracked" not in sources:
+                            sources.append("untracked")
+        # Drop paths outside self.root (../… prefix means above cwd — occurs when
+        # self.root is a subdirectory of the git repo and git status shows repo-level files).
+        files = {f for f in files if not f.startswith("../") and not f.startswith("..\\")}
+        scope_source = ",".join(sources) if sources else "git_diff"
+        return sorted(files), scope_source
+    def _expand_scope_for_analysis(self, scope_files: list[str]) -> list[str]:
+        """Add sibling files in the same directories as scope_files (depth=1 expansion).
+        Gives behavioral_impact engine context for reverse lookups (e.g. controllers
+        in the same package as changed services) without traversing the full repo.
+        """
+        expanded: set[str] = set(scope_files)
+        seen_dirs: set[Path] = set()
+        for f in scope_files:
+            parent = Path(f).parent
+            if parent in seen_dirs:
+                continue
+            seen_dirs.add(parent)
+            full_parent = self.root / parent
+            if not full_parent.is_dir():
+                continue
+            try:
+                for entry in full_parent.iterdir():
+                    if entry.is_file():
+                        rel = str(entry.relative_to(self.root)).replace("\\", "/")
+                        expanded.add(rel)
+            except OSError:
+                pass
+        return sorted(f for f in expanded if (self.root / f).exists())
     def _is_git_repo(self) -> bool:
         import subprocess
         try:

{sourcecode-1.30.4.dist-info → sourcecode-1.30.6.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: sourcecode
-Version: 1.30.4
+Version: 1.30.6
 Summary: Deterministic codebase context for AI coding agents
 License:                                  Apache License
                                    Version 2.0, January 2004
@@ -221,7 +221,7 @@ Description-Content-Type: text/markdown
 **Deterministic, behavior-aware codebase context for AI agents and PR review.**
-![Version](https://img.shields.io/badge/version-1.30.4-blue)
+![Version](https://img.shields.io/badge/version-1.30.6-blue)
 ![Python](https://img.shields.io/badge/python-3.10%2B-green)
 ---
@@ -257,7 +257,7 @@ pipx install sourcecode
 ```bash
 sourcecode version
-# sourcecode 1.30.4
+# sourcecode 1.30.6
 ```
 ---

{sourcecode-1.30.4.dist-info → sourcecode-1.30.6.dist-info}/RECORD RENAMED Viewed

@@ -1,10 +1,10 @@
-sourcecode/__init__.py,sha256=lum_KRetZZierS82OkkxkHcbIkjAw1eYUAQiiRzOrX8,103
+sourcecode/__init__.py,sha256=MEBCm2OG1EipFt5XTCDIkAml5j9KHaqNtfp_BSFcGa0,103
 sourcecode/adaptive_scanner.py,sha256=RTNExwWPXzjgLaRueT7UuxkPj5ZEToWjGbx1j0LSZ9E,10250
 sourcecode/architecture_analyzer.py,sha256=MyBa0Hf5HmkudZQDLKrjcWDKETXETXl0mQX1swtTwAA,39091
 sourcecode/architecture_summary.py,sha256=z34_6v7cSwy98cof2UVciGho7SCrZ93tiqMmq5WNzRQ,20405
 sourcecode/ast_extractor.py,sha256=XgrZg2DcWcUm9r87cRG3KGO7IK2TIL_N-CvhSbUmmh4,49901
 sourcecode/classifier.py,sha256=pYve2J1LqtYssU3lYLMDz18PT-CjN5c18QYE7R_IG1Q,7507
-sourcecode/cli.py,sha256=K6ecski4uwAWtATwxD-OF8IZlfwsqFWHCoDPOi4U9bI,80775
+sourcecode/cli.py,sha256=zP55Nf483vXefZa_3KG-wTZbaoxZ8ek3xKwZJHiZtng,81101
 sourcecode/code_notes_analyzer.py,sha256=y1MJBnPZHYp4i6cQCXUb9ATIyifS_qMQWjw_8lPkpsU,9215
 sourcecode/confidence_analyzer.py,sha256=xw_Jv8pAd0wd8t2vvQlorw8Ih0rSF3YCoFS8K-_4aXg,15762
 sourcecode/context_scorer.py,sha256=QpChSpsmaAYz91rXA4Ue5xzQmNz_ZboZN09YOHScq1U,14679
@@ -17,11 +17,11 @@ sourcecode/doc_analyzer.py,sha256=afA4uJFwXZ_uR2l4J0pQwbeTkRkGmKdN9KhRVYePBUw,24
 sourcecode/entrypoint_classifier.py,sha256=gvKgl0f5T8ol1r4JMmkeqGHuZTfZJiOwFOWdc7EYwYw,4061
 sourcecode/env_analyzer.py,sha256=GxCidahAAIptTdDFIlVB6URd4HBnBlIX_SqUov3MBRQ,22076
 sourcecode/file_classifier.py,sha256=48ly5Z6exkzBy8lNy1AkdP4-oJqIA1zT3LZfffuTyDo,11572
-sourcecode/flow_analyzer.py,sha256=VQDrItg3NBqOOD8PxHXyntXQnPweUuUn6JtOY8lNWys,12841
+sourcecode/flow_analyzer.py,sha256=m29PJPdAwH4n3ZNqMidgi97csSUUtav5SM9lkDy_sr8,27219
 sourcecode/git_analyzer.py,sha256=_pCg2V4d2aa17k9hayTzpexAj8syvyk4y9NYNvvgOAI,12802
 sourcecode/graph_analyzer.py,sha256=iUK-7pSV-cvGqqD2hENdYmhnm0wcXFEyK-xnu5ul8OU,62515
 sourcecode/metrics_analyzer.py,sha256=m0ENgtqKeBL17kUIK3fmGkgo7UfXBNHxCMj0H_Y5K7c,22750
-sourcecode/prepare_context.py,sha256=ELrCIIcttip4B3y9aQZdMPqIgzaEJR0evDdG8QYTBLc,129623
+sourcecode/prepare_context.py,sha256=WDsG7XA5yjtvsVvlSFn0E4rb3wVu_vUe4ztDOZ6rYfo,136632
 sourcecode/progress.py,sha256=qn30sWaHOkjTgXsSBmiPkz7Rsbwc5oSlIe6JNEMYp_k,3149
 sourcecode/ranking_engine.py,sha256=virVglafZufioHpZpwktjMvUiL0TZELWQCQnQNV8dFo,9360
 sourcecode/redactor.py,sha256=xuGcadGEHaPw4qZXlMDvzMCsr4VOkdp3oBQptHyJk8c,2884
@@ -62,8 +62,8 @@ sourcecode/telemetry/consent.py,sha256=wLMvGNJeSSyZoNkQXpoUioY6mMv4Qdvuw7S9jAEWn
 sourcecode/telemetry/events.py,sha256=oEvvulfsv5GIDWG2174gSS6tNB95w38AIYiYeifGKlE,2294
 sourcecode/telemetry/filters.py,sha256=Asa71oRl7q3Wt_FMwuufIZJFzSYdgRNKS8LHCIyFeYE,4805
 sourcecode/telemetry/transport.py,sha256=KJeIPCPWMdmbCP3ySGs2iUlia34U6vWne2dZsUezesw,1560
-sourcecode-1.30.4.dist-info/METADATA,sha256=lSj_ODIJgDwxQP4EJ1VN9dbO0tiaAlWBiMG6qLep3mo,26770
-sourcecode-1.30.4.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
-sourcecode-1.30.4.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
-sourcecode-1.30.4.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
-sourcecode-1.30.4.dist-info/RECORD,,
+sourcecode-1.30.6.dist-info/METADATA,sha256=ydDuZ4ucf78fNK7tdu0BQlkCaO27zHFYT-eoOxW0CC0,26770
+sourcecode-1.30.6.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
+sourcecode-1.30.6.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
+sourcecode-1.30.6.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
+sourcecode-1.30.6.dist-info/RECORD,,

{sourcecode-1.30.4.dist-info → sourcecode-1.30.6.dist-info}/WHEEL RENAMED Viewed

File without changes

{sourcecode-1.30.4.dist-info → sourcecode-1.30.6.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{sourcecode-1.30.4.dist-info → sourcecode-1.30.6.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

sourcecode 1.30.4__py3-none-any.whl → 1.30.6__py3-none-any.whl

sourcecode 1.30.4py3-none-any.whl → 1.30.6py3-none-any.whl