sourcecode 1.30.4__py3-none-any.whl → 1.30.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sourcecode/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  """sourcecode — Deterministic codebase context maps for AI coding agents."""
2
2
 
3
- __version__ = "1.30.4"
3
+ __version__ = "1.30.6"
sourcecode/cli.py CHANGED
@@ -1868,10 +1868,18 @@ def prepare_context_cmd(
1868
1868
  out["suggested_review_order"] = output.suggested_review_order
1869
1869
  if output.execution_paths:
1870
1870
  out["execution_paths"] = output.execution_paths
1871
+ if output.behavioral_impact:
1872
+ out["behavioral_impact"] = output.behavioral_impact
1871
1873
  if output.impact_summary:
1872
1874
  out["impact_summary"] = output.impact_summary
1873
1875
  if output.why_these_files:
1874
1876
  out["reasoning"] = output.why_these_files
1877
+ # git-first scope metadata
1878
+ out["scope"] = {
1879
+ "source": output.scope_source or "git_diff",
1880
+ "files": output.scope_files,
1881
+ "repo_root": output.repo_root or "",
1882
+ }
1875
1883
  if output.limitations:
1876
1884
  out["limitations"] = output.limitations
1877
1885
  if output.symptom:
@@ -177,6 +177,49 @@ def _has_code_evidence(clean: str, class_name: str) -> bool:
177
177
  return False
178
178
 
179
179
 
180
+ _EVIDENCE_PRIORITY: dict[str, int] = {
181
+ "none": 0, "heuristic_only": 1, "direct_call": 2, "direct_injection": 3,
182
+ }
183
+ _EVIDENCE_STRONG = frozenset({"direct_call", "direct_injection"})
184
+
185
+
186
+ def _classify_evidence_type(clean: str, class_name: str) -> str:
187
+ """Return how class_name is referenced in pre-stripped content."""
188
+ esc = re.escape(class_name)
189
+ if re.search(rf"\b(?:private|protected)\s+{esc}\b", clean, re.IGNORECASE):
190
+ return "direct_injection"
191
+ if re.search(rf"[,(]\s*{esc}\s+\w+", clean, re.IGNORECASE):
192
+ return "direct_injection"
193
+ if re.search(rf":\s*{esc}\b", clean, re.IGNORECASE):
194
+ return "direct_injection"
195
+ if re.search(rf"\bnew\s+{esc}\s*\(", clean, re.IGNORECASE):
196
+ return "direct_call"
197
+ if re.search(rf"\b{esc}\s*\(", clean):
198
+ return "direct_call"
199
+ non_import = re.search(
200
+ rf"^(?!\s*(?:import|require|from|//|#|\*)\b).*\b{esc}\b",
201
+ clean, re.IGNORECASE | re.MULTILINE,
202
+ )
203
+ if non_import:
204
+ return "heuristic_only"
205
+ return "none"
206
+
207
+
208
+ def _worst_evidence(levels: list[str]) -> str:
209
+ return min(levels, key=lambda x: _EVIDENCE_PRIORITY.get(x, 0)) if levels else "none"
210
+
211
+
212
+ def _compute_confidence(evidence_level: str, trace_len: int) -> str:
213
+ if evidence_level not in _EVIDENCE_STRONG:
214
+ return "low"
215
+ return "high" if trace_len >= 2 else "medium"
216
+
217
+
218
+ def _build_trace_step(source_class: str, target_class: str, evidence_type: str) -> str:
219
+ verb = "injects" if evidence_type == "direct_injection" else "calls"
220
+ return f"{source_class} {verb} {target_class}"
221
+
222
+
180
223
  def _find_evidenced_ordered(
181
224
  root: Path,
182
225
  source_path: str,
@@ -307,4 +350,296 @@ def analyze_execution_paths(
307
350
  "end_state": _detect_end_state([item["step"] for item in path_items]),
308
351
  })
309
352
 
353
+
354
+ # ── Behavioral impact helpers ─────────────────────────────────────────────────
355
+
356
+ def _domain_from_class(class_name: str) -> str:
357
+ """Extract human-readable domain noun from a class name."""
358
+ stripped = re.sub(
359
+ r"(?i)(?:repository|repo|dao|mapper|store|service|manager|handler|helper|"
360
+ r"impl|controller|api|resource|endpoint|facade)$",
361
+ "", class_name,
362
+ )
363
+ return re.sub(r"(?<=[a-z])(?=[A-Z])", " ", stripped).strip().lower()
364
+
365
+
366
+ def _impact_item(statement: str, support: str, certainty: str) -> dict:
367
+ return {"statement": statement, "support": support, "certainty": certainty}
368
+
369
+
370
+ def _impact_descriptions(
371
+ changed_class: str,
372
+ changed_type: str,
373
+ end_state: str,
374
+ ctrl_clean: str,
375
+ evidence_level: str,
376
+ ) -> list[dict]:
377
+ domain = _domain_from_class(changed_class)
378
+ certainty = "medium" if evidence_level in _EVIDENCE_STRONG else "low"
379
+ items: list[dict] = []
380
+
381
+ if changed_type in _REPO_ARTIFACT_TYPES:
382
+ items.append(_impact_item(
383
+ f"{domain} persistence affected" if domain else "persistence affected",
384
+ f"{changed_class} is a repository in path",
385
+ certainty,
386
+ ))
387
+ elif changed_type in _SERVICE_ARTIFACT_TYPES:
388
+ if end_state == "DB write":
389
+ items.append(_impact_item(
390
+ f"{domain} persistence affected" if domain else "persistence affected",
391
+ f"{changed_class} delegates to repository with DB write",
392
+ certainty,
393
+ ))
394
+ else:
395
+ items.append(_impact_item(
396
+ f"{domain} behavior may change" if domain else "behavior may change",
397
+ f"{changed_class} is a service in path",
398
+ certainty,
399
+ ))
400
+ else:
401
+ items.append(_impact_item(
402
+ f"{domain} behavior may change" if domain else "behavior may change",
403
+ f"{changed_class} is in path",
404
+ certainty,
405
+ ))
406
+
407
+ if re.search(r"@PreAuthorize|@Secured|@RolesAllowed|hasRole\(|isAuthenticated", ctrl_clean, re.IGNORECASE):
408
+ items.append(_impact_item(
409
+ "authorization check present on entry point",
410
+ "security annotation detected on controller",
411
+ "high",
412
+ ))
413
+
414
+ if re.search(r"@Transactional\b", ctrl_clean):
415
+ items.append(_impact_item(
416
+ "transactional boundary in path",
417
+ "@Transactional detected on entry point",
418
+ "high",
419
+ ))
420
+
421
+ return items[:3]
422
+
423
+
424
+ def _impact_descriptions_for_controller(
425
+ affected_path: list[str],
426
+ end_state: str,
427
+ ctrl_clean: str,
428
+ evidence_level: str,
429
+ ) -> list[dict]:
430
+ certainty = "medium" if evidence_level in _EVIDENCE_STRONG else "low"
431
+ items: list[dict] = []
432
+
433
+ if end_state == "DB write":
434
+ domain = ""
435
+ for step in reversed(affected_path):
436
+ base = step.split(".")[0]
437
+ d = _domain_from_class(base)
438
+ if d:
439
+ domain = d
440
+ break
441
+ items.append(_impact_item(
442
+ f"{domain} persistence affected" if domain else "data persistence affected",
443
+ "repository with DB write detected in path",
444
+ certainty,
445
+ ))
446
+ else:
447
+ items.append(_impact_item(
448
+ "request handler behavior may change",
449
+ "controller entry point modified",
450
+ certainty,
451
+ ))
452
+
453
+ if re.search(r"@PreAuthorize|@Secured|@RolesAllowed|hasRole\(|isAuthenticated", ctrl_clean, re.IGNORECASE):
454
+ items.append(_impact_item(
455
+ "authorization check present on entry point",
456
+ "security annotation detected on controller",
457
+ "high",
458
+ ))
459
+
460
+ if re.search(r"@Transactional\b", ctrl_clean):
461
+ items.append(_impact_item(
462
+ "transactional boundary in path",
463
+ "@Transactional detected on controller",
464
+ "high",
465
+ ))
466
+
467
+ return items[:3]
468
+
469
+
470
+ def analyze_behavioral_impact(
471
+ changed_files: list[str],
472
+ all_paths: list[str],
473
+ root: Path,
474
+ classify_fn: Callable[[str], dict],
475
+ max_impacts: int = 3,
476
+ ) -> list[dict]:
477
+ """Build behavioral impact entries for PR review.
478
+
479
+ For changed controllers: forward traversal → service → repository.
480
+ For changed services/repos/domain: reverse lookup → find callers → build causal path.
481
+
482
+ Each entry: {entry_point, affected_path, impact, end_state}
483
+ All paths require direct code evidence — no naming/module inference.
484
+ Returns [] when no verifiable causal path exists.
485
+ """
486
+ entry_changed = [f for f in changed_files if classify_fn(f)["artifact_type"] in _ENTRY_ARTIFACT_TYPES]
487
+ non_entry_changed = [f for f in changed_files if classify_fn(f)["artifact_type"] not in _ENTRY_ARTIFACT_TYPES]
488
+
489
+ all_entries = [p for p in all_paths if classify_fn(p)["artifact_type"] in _ENTRY_ARTIFACT_TYPES]
490
+ all_services = [p for p in all_paths if classify_fn(p)["artifact_type"] in _SERVICE_ARTIFACT_TYPES]
491
+ all_repos = [p for p in all_paths if classify_fn(p)["artifact_type"] in _REPO_ARTIFACT_TYPES]
492
+
493
+ result: list[dict] = []
494
+ seen_entries: set[str] = set()
495
+
496
+ # Case 1: changed controllers — forward traversal
497
+ for entry_path in entry_changed:
498
+ if len(result) >= max_impacts:
499
+ break
500
+ entry_class = Path(entry_path).stem
501
+ if entry_class in seen_entries:
502
+ continue
503
+ lang = _detect_lang(entry_path)
504
+ ctrl_content = _read_safe(root, entry_path)
505
+ if not ctrl_content:
506
+ continue
507
+ ctrl_clean = _strip_comments(ctrl_content, lang)
508
+ entry_method = _find_entry_method(ctrl_clean)
509
+ entry_str = _step_label(entry_class, entry_method)
510
+
511
+ evidenced_svcs = _find_evidenced_ordered(root, entry_path, all_services)
512
+ if not evidenced_svcs:
513
+ continue
514
+
515
+ svc_class, svc_method = evidenced_svcs[0]
516
+ svc_evidence = _classify_evidence_type(ctrl_clean, svc_class)
517
+ affected_path = [_step_label(svc_class, svc_method)]
518
+ trace = [_build_trace_step(entry_class, svc_class, svc_evidence)]
519
+ evidence_levels = [svc_evidence]
520
+
521
+ svc_path = next((p for p in all_services if Path(p).stem == svc_class), None)
522
+ if svc_path:
523
+ svc_content_raw = _read_safe(root, svc_path)
524
+ if svc_content_raw:
525
+ svc_clean_raw = _strip_comments(svc_content_raw, _detect_lang(svc_path))
526
+ evidenced_repos = _find_evidenced_ordered(root, svc_path, all_repos)
527
+ if evidenced_repos:
528
+ repo_class, repo_method = evidenced_repos[0]
529
+ repo_evidence = _classify_evidence_type(svc_clean_raw, repo_class)
530
+ affected_path.append(_step_label(repo_class, repo_method))
531
+ trace.append(_build_trace_step(svc_class, repo_class, repo_evidence))
532
+ evidence_levels.append(repo_evidence)
533
+
534
+ end_state = _detect_end_state(affected_path)
535
+ evidence_level = _worst_evidence(evidence_levels)
536
+ confidence = _compute_confidence(evidence_level, len(trace))
537
+ seen_entries.add(entry_class)
538
+ result.append({
539
+ "entry_point": entry_str,
540
+ "affected_path": affected_path,
541
+ "impact": _impact_descriptions_for_controller(affected_path, end_state, ctrl_clean, evidence_level),
542
+ "end_state": end_state,
543
+ "confidence": confidence,
544
+ "evidence_level": evidence_level,
545
+ "trace": trace,
546
+ })
547
+
548
+ # Case 2: changed non-controllers — reverse lookup
549
+ for changed_path in non_entry_changed:
550
+ if len(result) >= max_impacts:
551
+ break
552
+ changed_class = Path(changed_path).stem
553
+ changed_type = classify_fn(changed_path)["artifact_type"]
554
+
555
+ for ctrl_path in all_entries:
556
+ if len(result) >= max_impacts:
557
+ break
558
+ ctrl_class = Path(ctrl_path).stem
559
+ if ctrl_class in seen_entries:
560
+ continue
561
+ ctrl_content = _read_safe(root, ctrl_path)
562
+ if not ctrl_content:
563
+ continue
564
+ ctrl_lang = _detect_lang(ctrl_path)
565
+ ctrl_clean = _strip_comments(ctrl_content, ctrl_lang)
566
+
567
+ affected_path: list[str] = []
568
+ trace: list[str] = []
569
+ evidence_levels: list[str] = []
570
+
571
+ if _has_code_evidence(ctrl_clean, changed_class):
572
+ # Direct: controller → changed class
573
+ ctrl_to_changed = _classify_evidence_type(ctrl_clean, changed_class)
574
+ fmap = _build_field_map(ctrl_clean)
575
+ method = _find_called_method(ctrl_clean, changed_class, fmap)
576
+ affected_path.append(_step_label(changed_class, method))
577
+ trace.append(_build_trace_step(ctrl_class, changed_class, ctrl_to_changed))
578
+ evidence_levels.append(ctrl_to_changed)
579
+
580
+ if changed_type in _SERVICE_ARTIFACT_TYPES:
581
+ changed_content = _read_safe(root, changed_path)
582
+ changed_clean = _strip_comments(changed_content, _detect_lang(changed_path)) if changed_content else ""
583
+ evidenced_repos = _find_evidenced_ordered(root, changed_path, all_repos)
584
+ if evidenced_repos:
585
+ rclass, rmethod = evidenced_repos[0]
586
+ repo_evidence = _classify_evidence_type(changed_clean, rclass)
587
+ affected_path.append(_step_label(rclass, rmethod))
588
+ trace.append(_build_trace_step(changed_class, rclass, repo_evidence))
589
+ evidence_levels.append(repo_evidence)
590
+ else:
591
+ # Indirect: controller → mediating service → changed class
592
+ for svc_class, svc_method in _find_evidenced_ordered(root, ctrl_path, all_services):
593
+ svc_p = next((p for p in all_services if Path(p).stem == svc_class), None)
594
+ if not svc_p:
595
+ continue
596
+ svc_content = _read_safe(root, svc_p)
597
+ if not svc_content:
598
+ continue
599
+ svc_lang = _detect_lang(svc_p)
600
+ svc_clean = _strip_comments(svc_content, svc_lang)
601
+ if not _has_code_evidence(svc_clean, changed_class):
602
+ continue
603
+
604
+ ctrl_to_svc = _classify_evidence_type(ctrl_clean, svc_class)
605
+ svc_to_changed = _classify_evidence_type(svc_clean, changed_class)
606
+ fmap = _build_field_map(svc_clean)
607
+ method = _find_called_method(svc_clean, changed_class, fmap)
608
+ affected_path = [_step_label(svc_class, svc_method), _step_label(changed_class, method)]
609
+ trace = [
610
+ _build_trace_step(ctrl_class, svc_class, ctrl_to_svc),
611
+ _build_trace_step(svc_class, changed_class, svc_to_changed),
612
+ ]
613
+ evidence_levels = [ctrl_to_svc, svc_to_changed]
614
+
615
+ if changed_type in _SERVICE_ARTIFACT_TYPES:
616
+ changed_content = _read_safe(root, changed_path)
617
+ changed_clean = _strip_comments(changed_content, _detect_lang(changed_path)) if changed_content else ""
618
+ evidenced_repos = _find_evidenced_ordered(root, changed_path, all_repos)
619
+ if evidenced_repos:
620
+ rclass, rmethod = evidenced_repos[0]
621
+ repo_evidence = _classify_evidence_type(changed_clean, rclass)
622
+ affected_path.append(_step_label(rclass, rmethod))
623
+ trace.append(_build_trace_step(changed_class, rclass, repo_evidence))
624
+ evidence_levels.append(repo_evidence)
625
+ break
626
+
627
+ if not affected_path:
628
+ continue
629
+
630
+ entry_method = _find_entry_method(ctrl_clean)
631
+ end_state = _detect_end_state(affected_path)
632
+ evidence_level = _worst_evidence(evidence_levels)
633
+ confidence = _compute_confidence(evidence_level, len(trace))
634
+ seen_entries.add(ctrl_class)
635
+ result.append({
636
+ "entry_point": _step_label(ctrl_class, entry_method),
637
+ "affected_path": affected_path,
638
+ "impact": _impact_descriptions(changed_class, changed_type, end_state, ctrl_clean, evidence_level),
639
+ "end_state": end_state,
640
+ "confidence": confidence,
641
+ "evidence_level": evidence_level,
642
+ "trace": trace,
643
+ })
644
+
310
645
  return result
@@ -352,6 +352,11 @@ class TaskOutput:
352
352
  review_hotspots: list[str] = field(default_factory=list)
353
353
  suggested_review_order: list[str] = field(default_factory=list)
354
354
  execution_paths: list[dict] = field(default_factory=list)
355
+ behavioral_impact: list[dict] = field(default_factory=list)
356
+ # git-first scope metadata (review-pr only)
357
+ scope_source: Optional[str] = None # "git_diff" | "staged" | "untracked" | "full_scan_fallback"
358
+ scope_files: list[str] = field(default_factory=list)
359
+ repo_root: Optional[str] = None
355
360
 
356
361
 
357
362
  # ─────────────────────────────────────────────────────────────────────────────
@@ -439,23 +444,81 @@ class TaskContextBuilder:
439
444
  )
440
445
  spec = TASKS[task_name]
441
446
 
447
+ # ── 0. review-pr: git-first scope resolution (before any filesystem scan) ─
448
+ _pr_git_root: Optional[Path] = None
449
+ _pr_scope_files: Optional[list[str]] = None
450
+ _pr_scope_source: str = "full_scan_fallback"
451
+
452
+ if task_name == "review-pr":
453
+ _pr_git_root = self._resolve_git_root()
454
+ if _pr_git_root is None:
455
+ return TaskOutput(
456
+ task="review-pr", goal=spec.goal,
457
+ project_summary=None, architecture_summary=None,
458
+ relevant_files=[], suspected_areas=[],
459
+ improvement_opportunities=[], test_gaps=[],
460
+ key_dependencies=[], code_notes_summary=None,
461
+ limitations=[], confidence="low",
462
+ error_code="no_git_repo",
463
+ error_message="review-pr requires a git repository.",
464
+ ci_decision="no_git_repo",
465
+ scope_source="full_scan_fallback",
466
+ repo_root=str(self.root),
467
+ )
468
+ _raw_scope, _pr_scope_source = self._get_pr_scope_files(since=since)
469
+ if _raw_scope is None:
470
+ # Explicit --since ref is invalid
471
+ _avail_pr, _sug_pr = self._get_available_refs(since or "")
472
+ _pr_hints: list[str] = []
473
+ if _sug_pr:
474
+ _pr_hints.append(f"Did you mean '{_sug_pr}'?")
475
+ if _avail_pr:
476
+ _pr_hints.append(f"Available refs: {', '.join(_avail_pr[:8])}")
477
+ return TaskOutput(
478
+ task="review-pr", goal=spec.goal,
479
+ project_summary=None, architecture_summary=None,
480
+ relevant_files=[], suspected_areas=[],
481
+ improvement_opportunities=[], test_gaps=[],
482
+ key_dependencies=[], code_notes_summary=None,
483
+ limitations=[], confidence="low",
484
+ since=since,
485
+ error_code="git_ref_not_found",
486
+ error_message=f"Base ref '{since}' not found in this repository.",
487
+ error_hints=_pr_hints,
488
+ gaps=[f"Cannot compute PR diff: git ref '{since}' not found."] + _pr_hints,
489
+ ci_decision="git_ref_error",
490
+ scope_source="git_diff",
491
+ repo_root=str(_pr_git_root),
492
+ )
493
+ _pr_scope_files = _raw_scope
494
+ # _pr_scope_files == [] means no diff; handled in step 5d
495
+
496
+ _use_git_first = task_name == "review-pr"
497
+
442
498
  # ── 1. Scan ────────────────────────────────────────────────────────
443
499
  from sourcecode.adaptive_scanner import AdaptiveScanner
444
500
  from sourcecode.repo_classifier import RepoClassifier
445
501
  from sourcecode.tree_utils import flatten_file_tree
446
-
447
- _topology = RepoClassifier().classify(self.root)
448
- # Shallow pre-scan to detect Java manifests before choosing depth.
449
502
  from sourcecode.scanner import FileScanner as _FileScanner
450
- _pre = _FileScanner(self.root, max_depth=1)
451
- _pre_manifests = _pre.find_manifests()
503
+
504
+ _pre_manifests = _FileScanner(self.root, max_depth=1).find_manifests()
452
505
  _java_names = {"pom.xml", "build.gradle", "build.gradle.kts"}
453
506
  _is_java = any(Path(m).name in _java_names for m in _pre_manifests)
454
- _base_depth = 12 if _is_java else 6
455
- scanner = AdaptiveScanner(self.root, topology=_topology, base_depth=_base_depth)
456
- file_tree = scanner.scan_tree()
457
- manifests = scanner.find_manifests()
458
- all_paths = [p.replace("\\", "/") for p in flatten_file_tree(file_tree)]
507
+ manifests = _pre_manifests
508
+
509
+ if _use_git_first:
510
+ # Git-first: no full filesystem traversal — skip AdaptiveScanner.
511
+ # all_paths = scope files + siblings in same directories (bounded context
512
+ # for behavioral_impact reverse lookups without scanning the whole repo).
513
+ file_tree: dict = {}
514
+ all_paths = self._expand_scope_for_analysis(_pr_scope_files or [])
515
+ else:
516
+ _topology = RepoClassifier().classify(self.root)
517
+ _base_depth = 12 if _is_java else 6
518
+ scanner = AdaptiveScanner(self.root, topology=_topology, base_depth=_base_depth)
519
+ file_tree = scanner.scan_tree()
520
+ manifests = scanner.find_manifests()
521
+ all_paths = [p.replace("\\", "/") for p in flatten_file_tree(file_tree)]
459
522
 
460
523
  # Warn when Java project has no Mapper.xml — suggests files below scan depth.
461
524
  _mybatis_warning: dict | None = None
@@ -486,25 +549,26 @@ class TaskContextBuilder:
486
549
  else:
487
550
  stacks, entry_points, _ = detector.detect(self.root, file_tree, _detection_manifests)
488
551
 
489
- # Iterate workspaces to collect per-workspace stacks and entry points —
490
- # same approach as the main CLI (cli.py lines 971-1041).
491
- for workspace in workspace_analysis.workspaces:
492
- ws_root = self.root / workspace.path
493
- if not ws_root.exists() or not ws_root.is_dir():
494
- continue
495
- _ws_topology = RepoClassifier().classify(ws_root)
496
- _ws_scanner = AdaptiveScanner(ws_root, topology=_ws_topology, base_depth=6)
497
- _ws_tree = _ws_scanner.scan_tree()
498
- _ws_manifests = _ws_scanner.find_manifests()
499
- _ws_stacks, _ws_eps, _ = detector.detect(ws_root, _ws_tree, _ws_manifests)
500
- stacks.extend(
501
- _replace(s, root=workspace.path, workspace=workspace.path, primary=False)
502
- for s in _ws_stacks
503
- )
504
- entry_points.extend(
505
- _replace(ep, path=f"{workspace.path}/{ep.path}")
506
- for ep in _ws_eps
507
- )
552
+ if not _use_git_first:
553
+ # Workspace sub-scans: each runs AdaptiveScanner on a workspace root.
554
+ # Skipped for review-pr would re-trigger full traversal per workspace.
555
+ for workspace in workspace_analysis.workspaces:
556
+ ws_root = self.root / workspace.path
557
+ if not ws_root.exists() or not ws_root.is_dir():
558
+ continue
559
+ _ws_topology = RepoClassifier().classify(ws_root)
560
+ _ws_scanner = AdaptiveScanner(ws_root, topology=_ws_topology, base_depth=6)
561
+ _ws_tree = _ws_scanner.scan_tree()
562
+ _ws_manifests = _ws_scanner.find_manifests()
563
+ _ws_stacks, _ws_eps, _ = detector.detect(ws_root, _ws_tree, _ws_manifests)
564
+ stacks.extend(
565
+ _replace(s, root=workspace.path, workspace=workspace.path, primary=False)
566
+ for s in _ws_stacks
567
+ )
568
+ entry_points.extend(
569
+ _replace(ep, path=f"{workspace.path}/{ep.path}")
570
+ for ep in _ws_eps
571
+ )
508
572
 
509
573
  stacks, project_type = detector.classify_results(
510
574
  file_tree, stacks, entry_points,
@@ -668,49 +732,10 @@ class TaskContextBuilder:
668
732
  elif _delta_raw:
669
733
  _delta_files = set(_delta_raw)
670
734
 
671
- # ── 5d. review-pr: git-first gate ──────────────────────────────────────
735
+ # ── 5d. review-pr: set _delta_files from pre-resolved git scope ──────────
736
+ # No-git and invalid-ref cases were already handled in step 0 (early returns).
672
737
  if task_name == "review-pr":
673
- if not self._is_git_repo():
674
- return TaskOutput(
675
- task="review-pr", goal=spec.goal,
676
- project_summary=None, architecture_summary=None,
677
- relevant_files=[], suspected_areas=[],
678
- improvement_opportunities=[], test_gaps=[],
679
- key_dependencies=[], code_notes_summary=None,
680
- limitations=[], confidence="low",
681
- error_code="no_git_repo",
682
- error_message="review-pr requires a git repository.",
683
- ci_decision="no_git_repo",
684
- )
685
- if since is None:
686
- # review-pr with no --since: check only uncommitted changes.
687
- # _get_git_changed_files(since=None) defaults to HEAD~1 which
688
- # returns the last *committed* diff — a false positive here.
689
- _pr_raw: Optional[list[str]] = self._get_uncommitted_changed_files()
690
- else:
691
- _pr_raw = self._get_git_changed_files(since=since)
692
- if _pr_raw is None:
693
- _avail_pr, _sug_pr = self._get_available_refs(since or "")
694
- _pr_hints: list[str] = []
695
- if _sug_pr:
696
- _pr_hints.append(f"Did you mean '{_sug_pr}'?")
697
- if _avail_pr:
698
- _pr_hints.append(f"Available refs: {', '.join(_avail_pr[:8])}")
699
- return TaskOutput(
700
- task="review-pr", goal=spec.goal,
701
- project_summary=None, architecture_summary=None,
702
- relevant_files=[], suspected_areas=[],
703
- improvement_opportunities=[], test_gaps=[],
704
- key_dependencies=[], code_notes_summary=None,
705
- limitations=[], confidence="low",
706
- since=since,
707
- error_code="git_ref_not_found",
708
- error_message=f"Base ref '{since}' not found in this repository.",
709
- error_hints=_pr_hints,
710
- gaps=[f"Cannot compute PR diff: git ref '{since}' not found."] + _pr_hints,
711
- ci_decision="git_ref_error",
712
- )
713
- if not _pr_raw:
738
+ if not _pr_scope_files:
714
739
  _no_diff_hint = "review-pr requires changed files or --since <ref>."
715
740
  return TaskOutput(
716
741
  task="review-pr", goal=spec.goal,
@@ -723,8 +748,11 @@ class TaskContextBuilder:
723
748
  error_message=f"No PR diff detected. {_no_diff_hint}",
724
749
  gaps=[f"No PR diff detected. {_no_diff_hint}"],
725
750
  ci_decision="no_changes",
751
+ scope_source=_pr_scope_source,
752
+ scope_files=[],
753
+ repo_root=str(_pr_git_root),
726
754
  )
727
- _delta_files = set(_pr_raw)
755
+ _delta_files = set(_pr_scope_files)
728
756
 
729
757
  # ── 5c. review-pr suspected_areas (needs git uncommitted_files) ──────
730
758
  if task_name == "review-pr" and spec.enable_code_notes:
@@ -875,12 +903,20 @@ class TaskContextBuilder:
875
903
  _pr_suggested_review_order.append(_f)
876
904
  _seen_order.add(_f)
877
905
 
878
- # ── 6d. review-pr: execution paths ──────────────────────────────────
906
+ # ── 6d. review-pr: execution paths + behavioral impact ──────────────
879
907
  _execution_paths: list[dict] = []
908
+ _behavioral_impact: list[dict] = []
880
909
  if task_name == "review-pr" and _delta_files:
881
- from sourcecode.flow_analyzer import analyze_execution_paths
910
+ from sourcecode.flow_analyzer import analyze_execution_paths, analyze_behavioral_impact
911
+ _changed_sorted = sorted(_delta_files)
882
912
  _execution_paths = analyze_execution_paths(
883
- changed_files=sorted(_delta_files),
913
+ changed_files=_changed_sorted,
914
+ all_paths=all_paths,
915
+ root=self.root,
916
+ classify_fn=self._classify_changed_file,
917
+ )
918
+ _behavioral_impact = analyze_behavioral_impact(
919
+ changed_files=_changed_sorted,
884
920
  all_paths=all_paths,
885
921
  root=self.root,
886
922
  classify_fn=self._classify_changed_file,
@@ -1117,6 +1153,11 @@ class TaskContextBuilder:
1117
1153
  review_hotspots=_pr_review_hotspots,
1118
1154
  suggested_review_order=_pr_suggested_review_order,
1119
1155
  execution_paths=_execution_paths,
1156
+ behavioral_impact=_behavioral_impact,
1157
+ # git-first scope metadata
1158
+ scope_source=_pr_scope_source if task_name == "review-pr" else None,
1159
+ scope_files=list(_pr_scope_files) if task_name == "review-pr" and _pr_scope_files else [],
1160
+ repo_root=str(_pr_git_root) if task_name == "review-pr" and _pr_git_root else None,
1120
1161
  )
1121
1162
 
1122
1163
  def render_prompt(self, output: TaskOutput) -> str:
@@ -1408,6 +1449,122 @@ class TaskContextBuilder:
1408
1449
  def _is_source(self, path: str) -> bool:
1409
1450
  return Path(path).suffix.lower() in _SOURCE_EXTENSIONS
1410
1451
 
1452
+ def _resolve_git_root(self) -> Optional[Path]:
1453
+ """Return the absolute git repo root, or None if not in a git repo."""
1454
+ import subprocess
1455
+ try:
1456
+ r = subprocess.run(
1457
+ ["git", "rev-parse", "--show-toplevel"],
1458
+ cwd=str(self.root),
1459
+ capture_output=True, text=True,
1460
+ encoding="utf-8", errors="replace", timeout=5,
1461
+ )
1462
+ if r.returncode == 0 and r.stdout.strip():
1463
+ return Path(r.stdout.strip())
1464
+ except (subprocess.TimeoutExpired, FileNotFoundError):
1465
+ pass
1466
+ return None
1467
+
1468
+ def _get_pr_scope_files(self, since: Optional[str] = None) -> tuple[Optional[list[str]], str]:
1469
+ """Return (files, scope_source) for review-pr scope resolution.
1470
+
1471
+ Returns (None, _) only when since is explicitly provided but the ref is invalid.
1472
+ Returns ([], _) when git is available but no changes are found.
1473
+ scope_source is a comma-separated list of active sources (git_diff, staged, untracked).
1474
+ """
1475
+ import subprocess
1476
+
1477
+ def _run(*cmd: str) -> Optional[list[str]]:
1478
+ try:
1479
+ r = subprocess.run(
1480
+ list(cmd), cwd=str(self.root),
1481
+ capture_output=True, text=True,
1482
+ encoding="utf-8", errors="replace", timeout=10,
1483
+ )
1484
+ return (
1485
+ [ln.strip() for ln in (r.stdout or "").splitlines() if ln.strip()]
1486
+ if r.returncode == 0 else None
1487
+ )
1488
+ except (subprocess.TimeoutExpired, FileNotFoundError):
1489
+ return None
1490
+
1491
+ files: set[str] = set()
1492
+ sources: list[str] = []
1493
+
1494
+ if since is not None:
1495
+ committed = _run("git", "diff", "--name-only", "--relative", since, "HEAD")
1496
+ if committed is None:
1497
+ return None, "git_diff" # invalid ref — hard error
1498
+ if committed:
1499
+ files.update(committed)
1500
+ sources.append("git_diff")
1501
+ else:
1502
+ # Working tree vs HEAD~1: covers last commit + all uncommitted changes
1503
+ h1_diff = _run("git", "diff", "--name-only", "--relative", "HEAD~1")
1504
+ if h1_diff:
1505
+ files.update(h1_diff)
1506
+ sources.append("git_diff")
1507
+ # Working tree vs HEAD: uncommitted only (may add new unstaged files)
1508
+ h_diff = _run("git", "diff", "--name-only", "--relative", "HEAD")
1509
+ if h_diff:
1510
+ new = set(h_diff) - files
1511
+ if new:
1512
+ files.update(new)
1513
+ if "git_diff" not in sources:
1514
+ sources.append("git_diff")
1515
+ # Staged changes not yet committed
1516
+ staged = _run("git", "diff", "--name-only", "--cached", "--relative")
1517
+ if staged:
1518
+ new = set(staged) - files
1519
+ if new:
1520
+ files.update(new)
1521
+ sources.append("staged")
1522
+
1523
+ # Untracked files (both cases)
1524
+ status = _run("git", "status", "--porcelain", "--short")
1525
+ if status:
1526
+ for line in status:
1527
+ if line.startswith("??") and len(line) > 3:
1528
+ f = line[3:].strip()
1529
+ if f and not f.endswith("/") and f not in files:
1530
+ files.add(f)
1531
+ if "untracked" not in sources:
1532
+ sources.append("untracked")
1533
+
1534
+ # Drop paths outside self.root (../… prefix means above cwd — occurs when
1535
+ # self.root is a subdirectory of the git repo and git status shows repo-level files).
1536
+ files = {f for f in files if not f.startswith("../") and not f.startswith("..\\")}
1537
+
1538
+ scope_source = ",".join(sources) if sources else "git_diff"
1539
+ return sorted(files), scope_source
1540
+
1541
+ def _expand_scope_for_analysis(self, scope_files: list[str]) -> list[str]:
1542
+ """Add sibling files in the same directories as scope_files (depth=1 expansion).
1543
+
1544
+ Gives behavioral_impact engine context for reverse lookups (e.g. controllers
1545
+ in the same package as changed services) without traversing the full repo.
1546
+ """
1547
+ expanded: set[str] = set(scope_files)
1548
+ seen_dirs: set[Path] = set()
1549
+
1550
+ for f in scope_files:
1551
+ parent = Path(f).parent
1552
+ if parent in seen_dirs:
1553
+ continue
1554
+ seen_dirs.add(parent)
1555
+ full_parent = self.root / parent
1556
+ if not full_parent.is_dir():
1557
+ continue
1558
+ try:
1559
+ for entry in full_parent.iterdir():
1560
+ if entry.is_file():
1561
+ rel = str(entry.relative_to(self.root)).replace("\\", "/")
1562
+ expanded.add(rel)
1563
+ except OSError:
1564
+ pass
1565
+
1566
+ return sorted(f for f in expanded if (self.root / f).exists())
1567
+
1411
1568
  def _is_git_repo(self) -> bool:
1412
1569
  import subprocess
1413
1570
  try:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sourcecode
3
- Version: 1.30.4
3
+ Version: 1.30.6
4
4
  Summary: Deterministic codebase context for AI coding agents
5
5
  License: Apache License
6
6
  Version 2.0, January 2004
@@ -221,7 +221,7 @@ Description-Content-Type: text/markdown
221
221
 
222
222
  **Deterministic, behavior-aware codebase context for AI agents and PR review.**
223
223
 
224
- ![Version](https://img.shields.io/badge/version-1.30.4-blue)
224
+ ![Version](https://img.shields.io/badge/version-1.30.6-blue)
225
225
  ![Python](https://img.shields.io/badge/python-3.10%2B-green)
226
226
 
227
227
  ---
@@ -257,7 +257,7 @@ pipx install sourcecode
257
257
 
258
258
  ```bash
259
259
  sourcecode version
260
- # sourcecode 1.30.4
260
+ # sourcecode 1.30.6
261
261
  ```
262
262
 
263
263
  ---
@@ -1,10 +1,10 @@
1
- sourcecode/__init__.py,sha256=lum_KRetZZierS82OkkxkHcbIkjAw1eYUAQiiRzOrX8,103
1
+ sourcecode/__init__.py,sha256=MEBCm2OG1EipFt5XTCDIkAml5j9KHaqNtfp_BSFcGa0,103
2
2
  sourcecode/adaptive_scanner.py,sha256=RTNExwWPXzjgLaRueT7UuxkPj5ZEToWjGbx1j0LSZ9E,10250
3
3
  sourcecode/architecture_analyzer.py,sha256=MyBa0Hf5HmkudZQDLKrjcWDKETXETXl0mQX1swtTwAA,39091
4
4
  sourcecode/architecture_summary.py,sha256=z34_6v7cSwy98cof2UVciGho7SCrZ93tiqMmq5WNzRQ,20405
5
5
  sourcecode/ast_extractor.py,sha256=XgrZg2DcWcUm9r87cRG3KGO7IK2TIL_N-CvhSbUmmh4,49901
6
6
  sourcecode/classifier.py,sha256=pYve2J1LqtYssU3lYLMDz18PT-CjN5c18QYE7R_IG1Q,7507
7
- sourcecode/cli.py,sha256=K6ecski4uwAWtATwxD-OF8IZlfwsqFWHCoDPOi4U9bI,80775
7
+ sourcecode/cli.py,sha256=zP55Nf483vXefZa_3KG-wTZbaoxZ8ek3xKwZJHiZtng,81101
8
8
  sourcecode/code_notes_analyzer.py,sha256=y1MJBnPZHYp4i6cQCXUb9ATIyifS_qMQWjw_8lPkpsU,9215
9
9
  sourcecode/confidence_analyzer.py,sha256=xw_Jv8pAd0wd8t2vvQlorw8Ih0rSF3YCoFS8K-_4aXg,15762
10
10
  sourcecode/context_scorer.py,sha256=QpChSpsmaAYz91rXA4Ue5xzQmNz_ZboZN09YOHScq1U,14679
@@ -17,11 +17,11 @@ sourcecode/doc_analyzer.py,sha256=afA4uJFwXZ_uR2l4J0pQwbeTkRkGmKdN9KhRVYePBUw,24
17
17
  sourcecode/entrypoint_classifier.py,sha256=gvKgl0f5T8ol1r4JMmkeqGHuZTfZJiOwFOWdc7EYwYw,4061
18
18
  sourcecode/env_analyzer.py,sha256=GxCidahAAIptTdDFIlVB6URd4HBnBlIX_SqUov3MBRQ,22076
19
19
  sourcecode/file_classifier.py,sha256=48ly5Z6exkzBy8lNy1AkdP4-oJqIA1zT3LZfffuTyDo,11572
20
- sourcecode/flow_analyzer.py,sha256=VQDrItg3NBqOOD8PxHXyntXQnPweUuUn6JtOY8lNWys,12841
20
+ sourcecode/flow_analyzer.py,sha256=m29PJPdAwH4n3ZNqMidgi97csSUUtav5SM9lkDy_sr8,27219
21
21
  sourcecode/git_analyzer.py,sha256=_pCg2V4d2aa17k9hayTzpexAj8syvyk4y9NYNvvgOAI,12802
22
22
  sourcecode/graph_analyzer.py,sha256=iUK-7pSV-cvGqqD2hENdYmhnm0wcXFEyK-xnu5ul8OU,62515
23
23
  sourcecode/metrics_analyzer.py,sha256=m0ENgtqKeBL17kUIK3fmGkgo7UfXBNHxCMj0H_Y5K7c,22750
24
- sourcecode/prepare_context.py,sha256=ELrCIIcttip4B3y9aQZdMPqIgzaEJR0evDdG8QYTBLc,129623
24
+ sourcecode/prepare_context.py,sha256=WDsG7XA5yjtvsVvlSFn0E4rb3wVu_vUe4ztDOZ6rYfo,136632
25
25
  sourcecode/progress.py,sha256=qn30sWaHOkjTgXsSBmiPkz7Rsbwc5oSlIe6JNEMYp_k,3149
26
26
  sourcecode/ranking_engine.py,sha256=virVglafZufioHpZpwktjMvUiL0TZELWQCQnQNV8dFo,9360
27
27
  sourcecode/redactor.py,sha256=xuGcadGEHaPw4qZXlMDvzMCsr4VOkdp3oBQptHyJk8c,2884
@@ -62,8 +62,8 @@ sourcecode/telemetry/consent.py,sha256=wLMvGNJeSSyZoNkQXpoUioY6mMv4Qdvuw7S9jAEWn
62
62
  sourcecode/telemetry/events.py,sha256=oEvvulfsv5GIDWG2174gSS6tNB95w38AIYiYeifGKlE,2294
63
63
  sourcecode/telemetry/filters.py,sha256=Asa71oRl7q3Wt_FMwuufIZJFzSYdgRNKS8LHCIyFeYE,4805
64
64
  sourcecode/telemetry/transport.py,sha256=KJeIPCPWMdmbCP3ySGs2iUlia34U6vWne2dZsUezesw,1560
65
- sourcecode-1.30.4.dist-info/METADATA,sha256=lSj_ODIJgDwxQP4EJ1VN9dbO0tiaAlWBiMG6qLep3mo,26770
66
- sourcecode-1.30.4.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
67
- sourcecode-1.30.4.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
68
- sourcecode-1.30.4.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
69
- sourcecode-1.30.4.dist-info/RECORD,,
65
+ sourcecode-1.30.6.dist-info/METADATA,sha256=ydDuZ4ucf78fNK7tdu0BQlkCaO27zHFYT-eoOxW0CC0,26770
66
+ sourcecode-1.30.6.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
67
+ sourcecode-1.30.6.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
68
+ sourcecode-1.30.6.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
69
+ sourcecode-1.30.6.dist-info/RECORD,,