sourcecode 1.30.0__py3-none-any.whl → 1.30.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sourcecode/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  """sourcecode — Deterministic codebase context maps for AI coding agents."""
2
2
 
3
- __version__ = "1.30.0"
3
+ __version__ = "1.30.2"
sourcecode/cli.py CHANGED
@@ -1866,6 +1866,8 @@ def prepare_context_cmd(
1866
1866
  out["review_hotspots"] = output.review_hotspots
1867
1867
  if output.suggested_review_order:
1868
1868
  out["suggested_review_order"] = output.suggested_review_order
1869
+ if output.execution_paths:
1870
+ out["execution_paths"] = output.execution_paths
1869
1871
  if output.impact_summary:
1870
1872
  out["impact_summary"] = output.impact_summary
1871
1873
  if output.why_these_files:
@@ -0,0 +1,310 @@
1
+ """flow_analyzer.py — Evidence-based execution path extraction for PR context.
2
+
3
+ Builds Entry → Service → Repository → EndState ordered sequences using ONLY
4
+ direct code evidence: field injection, constructor params, type annotations,
5
+ method calls, explicit instantiation.
6
+
7
+ V3: execution_paths with runtime_notes — conditional branches, optional execution,
8
+ and async side-effects are surfaced when explicit code signals exist.
9
+ No inference, no naming, no invented behavior.
10
+ """
11
+ from __future__ import annotations
12
+
13
+ import re
14
+ from pathlib import Path
15
+ from typing import Callable, Optional
16
+
17
+ _ENTRY_ARTIFACT_TYPES = frozenset({"controller", "entrypoint"})
18
+ _SERVICE_ARTIFACT_TYPES = frozenset({"service"})
19
+ _REPO_ARTIFACT_TYPES = frozenset({"repository", "mapper"})
20
+
21
+ _DB_KEYWORDS = frozenset({"repository", "dao", "mapper", "store", "jpa", "jdbc", "sql"})
22
+ _EVENT_KEYWORDS = frozenset({"event", "publish", "emit", "kafka", "queue", "rabbit", "sns", "bus"})
23
+
24
+ _HTTP_ENTRY_RE = re.compile(
25
+ r'@(?:Get|Post|Put|Delete|Patch|Request)Mapping[^)]*\)'
26
+ r'|@(?:Get|Post|Put|Delete|Patch)\([^)]*\)'
27
+ r'|@\w+\.(?:get|post|put|delete|patch)\([^)]*\)',
28
+ re.IGNORECASE,
29
+ )
30
+ _METHOD_NAME_RE = re.compile(
31
+ r'(?:public\s+|async\s+|def\s+|function\s+)*'
32
+ r'(?:[\w<>\[\]]+\s+)?'
33
+ r'(\w+)\s*\(',
34
+ )
35
+
36
+ # Runtime signal patterns: (compiled_regex, note_text)
37
+ # Only signals with explicit code evidence — no inference.
38
+ # Three categories: condition | branch | async
39
+ _RUNTIME_SIGNALS: list[tuple[re.Pattern, str]] = [
40
+ # ── Conditional / auth guards ─────────────────────────────────────────────
41
+ (re.compile(r'@PreAuthorize|@Secured|@RolesAllowed', re.IGNORECASE),
42
+ "condition: authorization check present (@PreAuthorize / @Secured)"),
43
+ (re.compile(r'isAuthenticated\(\)|hasRole\(|hasAuthority\(|SecurityContextHolder', re.IGNORECASE),
44
+ "condition: reads authentication context"),
45
+ (re.compile(r'featureFlag|FeatureToggle|\.isEnabled\s*\(|\.isActive\s*\(', re.IGNORECASE),
46
+ "condition: feature flag gates execution"),
47
+ # Null/empty guard with early return — matches if (...null/empty...) return/throw on same line
48
+ (re.compile(r'if\s*\([^)]*(?:==\s*null|!=\s*null|isEmpty\s*\(\)|isBlank\s*\(\))[^)]*\)'
49
+ r'\s*(?:\{?\s*)?(?:return|throw)\b', re.IGNORECASE),
50
+ "condition: null/empty guard with early return"),
51
+
52
+ # ── Optional execution / branching ────────────────────────────────────────
53
+ (re.compile(r'@Cacheable|@CacheEvict|@CachePut', re.IGNORECASE),
54
+ "branch: Spring cache may short-circuit downstream call"),
55
+ (re.compile(r'\.getIfPresent\s*\(|cache\.get\s*\(|cacheManager\.', re.IGNORECASE),
56
+ "branch: manual cache lookup may short-circuit"),
57
+ (re.compile(r'Optional\s*<|\.orElseThrow\s*\(|\.orElseGet\s*\(|\.orElse\s*\(', re.IGNORECASE),
58
+ "branch: result may be absent (Optional)"),
59
+
60
+ # ── Async / side effects ──────────────────────────────────────────────────
61
+ (re.compile(r'@Async\b'),
62
+ "async: runs in separate thread (@Async)"),
63
+ (re.compile(r'CompletableFuture|\.supplyAsync\s*\(|\.runAsync\s*\('),
64
+ "async: non-blocking future-based execution"),
65
+ (re.compile(r'\basync\s+def\b|\bawait\b', re.IGNORECASE),
66
+ "async: non-blocking (async/await)"),
67
+ (re.compile(r'publishEvent\s*\(|applicationEventPublisher|eventPublisher\.', re.IGNORECASE),
68
+ "async: Spring application event emitted"),
69
+ (re.compile(r'kafkaTemplate\.|KafkaProducer|@KafkaListener', re.IGNORECASE),
70
+ "async: Kafka message produced"),
71
+ (re.compile(r'rabbitTemplate\.|amqpTemplate\.|@RabbitListener', re.IGNORECASE),
72
+ "async: RabbitMQ message sent"),
73
+ ]
74
+
75
+
76
+ def _detect_lang(path: str) -> str:
77
+ return {
78
+ ".java": "java", ".kt": "kotlin",
79
+ ".py": "python",
80
+ ".ts": "typescript", ".tsx": "typescript",
81
+ ".js": "javascript", ".jsx": "javascript",
82
+ ".go": "go", ".cs": "csharp", ".rb": "ruby", ".php": "php",
83
+ }.get(Path(path).suffix.lower(), "unknown")
84
+
85
+
86
+ def _strip_comments(content: str, lang: str) -> str:
87
+ content = re.sub(r"/\*.*?\*/", " ", content, flags=re.DOTALL)
88
+ content = re.sub(r"//[^\n]*", " ", content)
89
+ if lang in ("python", "ruby", "go"):
90
+ content = re.sub(r"#[^\n]*", " ", content)
91
+ return content
92
+
93
+
94
+ def _read_safe(root: Path, rel_path: str) -> str:
95
+ try:
96
+ return (root / rel_path).read_text(encoding="utf-8", errors="ignore")
97
+ except (OSError, ValueError):
98
+ return ""
99
+
100
+
101
+ def _collect_runtime_notes(content: str, lang: str) -> list[str]:
102
+ """Scan comment-stripped content for explicit runtime behavior signals.
103
+
104
+ Returns only notes backed by a direct code pattern match.
105
+ Returns [] when no signals are found.
106
+ """
107
+ clean = _strip_comments(content, lang)
108
+ notes: list[str] = []
109
+ seen: set[str] = set()
110
+ for pattern, note in _RUNTIME_SIGNALS:
111
+ if note not in seen and pattern.search(clean):
112
+ notes.append(note)
113
+ seen.add(note)
114
+ return notes
115
+
116
+
117
+ def _find_entry_method(clean: str) -> Optional[str]:
118
+ m = _HTTP_ENTRY_RE.search(clean)
119
+ if not m:
120
+ return None
121
+ after = clean[m.end():]
122
+ mn = _METHOD_NAME_RE.match(after.lstrip())
123
+ if mn:
124
+ name = mn.group(1)
125
+ if name.lower() not in ("public", "async", "def", "function", "void", "override"):
126
+ return name
127
+ return None
128
+
129
+
130
+ def _build_field_map(clean: str) -> dict[str, str]:
131
+ """Map field_name_lower → ClassName from injection patterns."""
132
+ fmap: dict[str, str] = {}
133
+ for m in re.finditer(r"private\s+(\w+)(?:<[^>]+>)?\s+(\w+)\s*[;=,)]", clean):
134
+ fmap[m.group(2).lower()] = m.group(1)
135
+ for m in re.finditer(r"(?:private|protected|readonly)\s+(\w+)\s*:\s*(\w+)", clean):
136
+ fmap[m.group(1).lower()] = m.group(2)
137
+ for m in re.finditer(r"self\.(\w+)\s*=\s*(\w+)\s*\(", clean):
138
+ fmap[m.group(1).lower()] = m.group(2)
139
+ return fmap
140
+
141
+
142
+ def _find_called_method(clean: str, class_name: str, fmap: dict[str, str]) -> Optional[str]:
143
+ fields = [f for f, t in fmap.items() if t.lower() == class_name.lower()]
144
+ for field in fields:
145
+ pat = rf"\bthis\.{re.escape(field)}\.(\w+)\s*\(|\b{re.escape(field)}\.(\w+)\s*\("
146
+ for m in re.finditer(pat, clean, re.IGNORECASE):
147
+ name = m.group(1) or m.group(2)
148
+ if name and name.lower() not in ("class", "new", "super", "get", "set"):
149
+ return name
150
+ for m in re.finditer(rf"\b{re.escape(class_name)}\.(\w+)\s*\(", clean, re.IGNORECASE):
151
+ name = m.group(1)
152
+ if name.lower() not in ("class", "new", "super"):
153
+ return name
154
+ return None
155
+
156
+
157
+ def _has_code_evidence(clean: str, class_name: str) -> bool:
158
+ """True only when class_name has direct code evidence in pre-stripped content."""
159
+ esc = re.escape(class_name)
160
+ if re.search(rf"\b(?:private|protected)\s+{esc}\b", clean, re.IGNORECASE):
161
+ return True
162
+ if re.search(rf"[,(]\s*{esc}\s+\w+", clean, re.IGNORECASE):
163
+ return True
164
+ if re.search(rf":\s*{esc}\b", clean, re.IGNORECASE):
165
+ return True
166
+ if re.search(rf"\bnew\s+{esc}\s*\(", clean, re.IGNORECASE):
167
+ return True
168
+ if re.search(rf"\b{esc}\s*\(", clean):
169
+ return True
170
+ if re.search(rf"\b{esc}\b", clean, re.IGNORECASE):
171
+ non_import = re.search(
172
+ rf"^(?!\s*(?:import|require|from|//|#|\*)\b).*\b{esc}\b",
173
+ clean, re.IGNORECASE | re.MULTILINE,
174
+ )
175
+ if non_import:
176
+ return True
177
+ return False
178
+
179
+
180
+ def _find_evidenced_ordered(
181
+ root: Path,
182
+ source_path: str,
183
+ candidates: list[str],
184
+ ) -> list[tuple[str, Optional[str]]]:
185
+ """Return (class_name, method_or_None) for candidates with direct code evidence,
186
+ ordered by their first appearance position in the source file."""
187
+ content = _read_safe(root, source_path)
188
+ if not content:
189
+ return []
190
+ lang = _detect_lang(source_path)
191
+ clean = _strip_comments(content, lang)
192
+ fmap = _build_field_map(clean)
193
+
194
+ positioned: list[tuple[int, str, Optional[str]]] = []
195
+ for cand_path in candidates:
196
+ class_name = Path(cand_path).stem
197
+ if not _has_code_evidence(clean, class_name):
198
+ continue
199
+ method = _find_called_method(clean, class_name, fmap)
200
+ m = re.search(rf"\b{re.escape(class_name)}\b", clean, re.IGNORECASE)
201
+ pos = m.start() if m else len(clean)
202
+ positioned.append((pos, class_name, method))
203
+
204
+ positioned.sort(key=lambda x: x[0])
205
+ return [(cls, meth) for _, cls, meth in positioned]
206
+
207
+
208
+ def _detect_end_state(path: list[str]) -> str:
209
+ for step in path:
210
+ s = step.lower()
211
+ if any(kw in s for kw in _DB_KEYWORDS):
212
+ return "DB write"
213
+ if any(kw in s for kw in _EVENT_KEYWORDS):
214
+ return "event emitted"
215
+ return "HTTP response"
216
+
217
+
218
+ def _step_label(class_name: str, method: Optional[str]) -> str:
219
+ return f"{class_name}.{method}" if method else class_name
220
+
221
+
222
+ def _path_name(entry_class: str) -> str:
223
+ domain = re.sub(
224
+ r"(?:RestController|Controller|Resource|Handler|Api|Endpoint|Router|Servlet)$",
225
+ "", entry_class, flags=re.IGNORECASE,
226
+ )
227
+ return re.sub(r"(?<=[a-z])(?=[A-Z])", " ", domain).strip()
228
+
229
+
230
+ def analyze_execution_paths(
231
+ changed_files: list[str],
232
+ all_paths: list[str],
233
+ root: Path,
234
+ classify_fn: Callable[[str], dict],
235
+ max_paths: int = 3,
236
+ ) -> list[dict]:
237
+ """Build ordered execution paths with runtime behavior signals.
238
+
239
+ Each path:
240
+ - One service per entry point (most evident, earliest-referenced)
241
+ - Each step requires direct code evidence
242
+ - runtime_notes populated from explicit code signals only (never inferred)
243
+ - Forward-only: Controller → Service → Repository
244
+
245
+ Returns list of: {name, entry_point, path, runtime_notes, end_state}
246
+ Returns [] when no verifiable path exists.
247
+ """
248
+ entry_files = [
249
+ f for f in changed_files
250
+ if classify_fn(f)["artifact_type"] in _ENTRY_ARTIFACT_TYPES
251
+ ]
252
+ if not entry_files:
253
+ return []
254
+
255
+ all_services = [p for p in all_paths if classify_fn(p)["artifact_type"] in _SERVICE_ARTIFACT_TYPES]
256
+ all_repos = [p for p in all_paths if classify_fn(p)["artifact_type"] in _REPO_ARTIFACT_TYPES]
257
+
258
+ result: list[dict] = []
259
+
260
+ for entry_path in entry_files[:max_paths]:
261
+ entry_class = Path(entry_path).stem
262
+ lang = _detect_lang(entry_path)
263
+
264
+ entry_content = _read_safe(root, entry_path)
265
+ entry_clean = _strip_comments(entry_content, lang) if entry_content else ""
266
+ entry_method = _find_entry_method(entry_clean) if entry_clean else None
267
+ entry_point_str = _step_label(entry_class, entry_method)
268
+
269
+ evidenced_svcs = _find_evidenced_ordered(root, entry_path, all_services)
270
+ if not evidenced_svcs:
271
+ continue
272
+
273
+ svc_class, svc_method = evidenced_svcs[0]
274
+ svc_label = _step_label(svc_class, svc_method)
275
+
276
+ svc_path = next((p for p in all_services if Path(p).stem == svc_class), None)
277
+ svc_content = _read_safe(root, svc_path) if svc_path else ""
278
+ svc_lang = _detect_lang(svc_path) if svc_path else "unknown"
279
+
280
+ # Service step — notes scoped to service file only
281
+ path_items: list[dict] = [
282
+ {"step": svc_label,
283
+ "notes": _collect_runtime_notes(svc_content, svc_lang) if svc_content else []},
284
+ ]
285
+
286
+ # Repository step — notes scoped to repo file only
287
+ if svc_path:
288
+ evidenced_repos = _find_evidenced_ordered(root, svc_path, all_repos)
289
+ if evidenced_repos:
290
+ repo_class, repo_method = evidenced_repos[0]
291
+ repo_label = _step_label(repo_class, repo_method)
292
+ repo_path = next((p for p in all_repos if Path(p).stem == repo_class), None)
293
+ repo_content = _read_safe(root, repo_path) if repo_path else ""
294
+ repo_lang = _detect_lang(repo_path) if repo_path else "unknown"
295
+ path_items.append(
296
+ {"step": repo_label,
297
+ "notes": _collect_runtime_notes(repo_content, repo_lang) if repo_content else []},
298
+ )
299
+
300
+ # Entry-point notes scoped to controller file
301
+ entry_notes = _collect_runtime_notes(entry_content, lang) if entry_content else []
302
+
303
+ result.append({
304
+ "name": _path_name(entry_class),
305
+ "entry_point": {"step": entry_point_str, "notes": entry_notes},
306
+ "path": path_items,
307
+ "end_state": _detect_end_state([item["step"] for item in path_items]),
308
+ })
309
+
310
+ return result
@@ -351,6 +351,7 @@ class TaskOutput:
351
351
  test_coverage_risk: dict = field(default_factory=dict)
352
352
  review_hotspots: list[str] = field(default_factory=list)
353
353
  suggested_review_order: list[str] = field(default_factory=list)
354
+ execution_paths: list[dict] = field(default_factory=list)
354
355
 
355
356
 
356
357
  # ─────────────────────────────────────────────────────────────────────────────
@@ -874,6 +875,17 @@ class TaskContextBuilder:
874
875
  _pr_suggested_review_order.append(_f)
875
876
  _seen_order.add(_f)
876
877
 
878
+ # ── 6d. review-pr: execution paths ──────────────────────────────────
879
+ _execution_paths: list[dict] = []
880
+ if task_name == "review-pr" and _delta_files:
881
+ from sourcecode.flow_analyzer import analyze_execution_paths
882
+ _execution_paths = analyze_execution_paths(
883
+ changed_files=sorted(_delta_files),
884
+ all_paths=all_paths,
885
+ root=self.root,
886
+ classify_fn=self._classify_changed_file,
887
+ )
888
+
877
889
  # ── 6c. Symptom keyword boost + related notes (fix-bug + --symptom) ──
878
890
  symptom_keywords: list[str] = []
879
891
  related_notes: list[dict] = []
@@ -1104,6 +1116,7 @@ class TaskContextBuilder:
1104
1116
  test_coverage_risk=_pr_test_coverage_risk,
1105
1117
  review_hotspots=_pr_review_hotspots,
1106
1118
  suggested_review_order=_pr_suggested_review_order,
1119
+ execution_paths=_execution_paths,
1107
1120
  )
1108
1121
 
1109
1122
  def render_prompt(self, output: TaskOutput) -> str:
@@ -1605,6 +1618,78 @@ class TaskContextBuilder:
1605
1618
  # Binaries, images, lock files — treat as noise (closed taxonomy: no unknown_*)
1606
1619
  return {"artifact_type": "ide_noise", "risk_areas": [], "impact_level": "noise", "is_noise": True, "module": module, "confidence": "low"}
1607
1620
 
1621
+ def _classify_diff_severity(self, path: str, since: Optional[str]) -> str:
1622
+ """Classify the semantic severity of a file's diff to gate BFS expansion.
1623
+
1624
+ Returns: 'trivial' | 'field_change' | 'api_change' | 'security_change' | 'unknown'
1625
+
1626
+ - trivial: only comments/whitespace changed — no BFS expansion seeded
1627
+ - field_change: field/attribute declarations changed — hop-1 only, no hop-2+ frontier
1628
+ - api_change: method signatures or class structure changed — full BFS
1629
+ - security_change: auth/security keywords in changed lines — full BFS + security chain
1630
+ - unknown: diff unreadable — treated as api_change (safe default)
1631
+ """
1632
+ import subprocess as _subprocess
1633
+ import re as _re
1634
+
1635
+ try:
1636
+ if since:
1637
+ cmd = ["git", "diff", since, "HEAD", "--", path]
1638
+ else:
1639
+ cmd = ["git", "diff", "HEAD", "--", path]
1640
+ result = _subprocess.run(
1641
+ cmd, capture_output=True, text=True, timeout=5,
1642
+ cwd=str(self.root), encoding="utf-8", errors="ignore",
1643
+ )
1644
+ diff_text = result.stdout
1645
+ except Exception:
1646
+ return "unknown"
1647
+
1648
+ if not diff_text.strip():
1649
+ return "unknown"
1650
+
1651
+ changed_lines = [
1652
+ line[1:] for line in diff_text.splitlines()
1653
+ if line.startswith(("+", "-")) and not line.startswith(("+++", "---"))
1654
+ ]
1655
+ if not changed_lines:
1656
+ return "trivial"
1657
+
1658
+ suffix = Path(path).suffix.lower()
1659
+ if suffix in (".java", ".kt"):
1660
+ _TRIVIAL = _re.compile(r'^\s*(?://|/\*|\*)')
1661
+ _FIELD = _re.compile(r'^\s*(?:private|protected|public|final|static)\s+\w[\w<>, ]*\s+\w+\s*[;=]')
1662
+ _API = _re.compile(r'^\s*(?:public|protected)\s+\S.*\(')
1663
+ # Exclude 'password', 'role', 'permission' — these are common field names
1664
+ # in domain models and don't indicate auth logic changes. Keep mechanism
1665
+ # keywords: jwt, auth (as class prefix), token, credential, encrypt, decrypt, oauth.
1666
+ _SECURITY = _re.compile(r'\b(?:jwt|auth|token|credential|encrypt|decrypt|oauth|saml|ldap|principal|Security)\b')
1667
+ _STRUCT = _re.compile(r'^\s*(?:class|interface|enum|record|import|package)\s')
1668
+ elif suffix == ".py":
1669
+ _TRIVIAL = _re.compile(r'^\s*#')
1670
+ _FIELD = _re.compile(r'^\s*(?:self\.\w+\s*=|\w+:\s*\w)')
1671
+ _API = _re.compile(r'^\s*def\s+\w')
1672
+ _SECURITY = _re.compile(r'\b(?:jwt|auth|token|credential|encrypt|decrypt|oauth|saml|ldap|principal|security)\b', _re.IGNORECASE)
1673
+ _STRUCT = _re.compile(r'^\s*(?:class|import|from)\s')
1674
+ elif suffix in (".ts", ".tsx", ".js", ".jsx", ".mjs"):
1675
+ _TRIVIAL = _re.compile(r'^\s*(?://|/\*|\*)')
1676
+ _FIELD = _re.compile(r'^\s*(?:private|readonly|public)?\s*\w+[?!]?\s*[=:]')
1677
+ _API = _re.compile(r'^\s*(?:(?:public|private|protected|async|export)\s+)*(?:function\s+\w|\w+\s*\()')
1678
+ _SECURITY = _re.compile(r'\b(?:jwt|auth|token|credential|encrypt|decrypt|oauth|saml|ldap|principal|security)\b', _re.IGNORECASE)
1679
+ _STRUCT = _re.compile(r'^\s*(?:class|interface|import|export\s+(?:class|interface|type))\s')
1680
+ else:
1681
+ return "unknown"
1682
+
1683
+ if any(_SECURITY.search(line) for line in changed_lines):
1684
+ return "security_change"
1685
+ if any(_API.match(line) or _STRUCT.match(line) for line in changed_lines):
1686
+ return "api_change"
1687
+ if any(_FIELD.match(line) for line in changed_lines):
1688
+ return "field_change"
1689
+ if all(_TRIVIAL.match(line) or not line.strip() for line in changed_lines):
1690
+ return "trivial"
1691
+ return "field_change" # safe default: treat unknown non-trivial as field-level
1692
+
1608
1693
  def _scan_import_dependents(
1609
1694
  self,
1610
1695
  changed_paths: list[str],
@@ -1888,6 +1973,16 @@ class TaskContextBuilder:
1888
1973
  f: self._classify_changed_file(f) for f in changed_files
1889
1974
  }
1890
1975
 
1976
+ # ── Step 1b: classify diff severity to gate BFS expansion ─────────────
1977
+ # trivial → no BFS seeding (comments/whitespace only)
1978
+ # field_change → hop-1 BFS only, deps excluded from hop-2+ frontier
1979
+ # api_change → full BFS (method signature or class structure changed)
1980
+ # security_change → full BFS + security chain allowed cross-module
1981
+ # unknown → treated as api_change (safe default)
1982
+ diff_severities: dict[str, str] = {
1983
+ f: self._classify_diff_severity(f, since) for f in changed_files
1984
+ }
1985
+
1891
1986
  # ── Step 2: build relevant_files from the changed set ─────────────────
1892
1987
  relevant: list[RelevantFile] = []
1893
1988
  why: dict[str, str] = {}
@@ -2004,9 +2099,12 @@ class TaskContextBuilder:
2004
2099
  ]
2005
2100
 
2006
2101
  _bfs_seen: set[str] = {rf.path for rf in relevant}
2102
+ # trivial changes (comments/whitespace only) don't seed BFS — nothing structural
2103
+ # to propagate, so excluding them prevents false expansion on cosmetic commits
2007
2104
  _bfs_frontier: list[str] = [
2008
2105
  f for f in changed_files
2009
2106
  if Path(f).suffix.lower() in _BFS_SCANNABLE
2107
+ and diff_severities.get(f, "unknown") != "trivial"
2010
2108
  ]
2011
2109
 
2012
2110
  # (max results added from this hop, max_candidates scanned per seed)
@@ -2035,6 +2133,8 @@ class TaskContextBuilder:
2035
2133
 
2036
2134
  # collect (score, path) pairs for this hop to build the next frontier
2037
2135
  _hop_scored: list[tuple[float, str]] = []
2136
+ # per-hop staging list — capped at _max_results before merging into _bfs_collected
2137
+ _hop_bfs_staged: list[tuple[int, float, str, RelevantFile]] = []
2038
2138
 
2039
2139
  for _seed_path, _dep_paths in _hop_dep_map.items():
2040
2140
  _seed_atype = (
@@ -2042,6 +2142,9 @@ class TaskContextBuilder:
2042
2142
  if _seed_path in classifications
2043
2143
  else self._classify_changed_file(_seed_path)["artifact_type"]
2044
2144
  )
2145
+ # diff severity for original changed files only (hop-1 seeds);
2146
+ # hop-2+ seeds are dep files not in diff_severities → "unknown"
2147
+ _seed_severity = diff_severities.get(_seed_path, "unknown")
2045
2148
  for _dep_path in _dep_paths:
2046
2149
  if _dep_path in _bfs_seen:
2047
2150
  continue
@@ -2052,9 +2155,29 @@ class TaskContextBuilder:
2052
2155
  continue
2053
2156
 
2054
2157
  _dep_atype = _dep_cls["artifact_type"]
2158
+ _dep_module = _dep_cls["module"]
2159
+
2160
+ # Cross-module gating: if dep lives in a different domain module,
2161
+ # only allow it if:
2162
+ # hop-1 AND dep_atype is explicitly in seed's _EXPANSION_TARGETS
2163
+ # For hop-2+, cross-module deps are always excluded — transitives
2164
+ # must stay within the changed modules to avoid system-wide explosion.
2165
+ _is_cross_module = bool(_dep_module) and _dep_module not in affected_modules_set
2166
+ if _is_cross_module:
2167
+ _seed_expansion = _EXPANSION_TARGETS.get(_seed_atype, frozenset())
2168
+ # security_change seeds are allowed to cross into the security chain
2169
+ # even when their base expansion targets don't include those types
2170
+ if _seed_severity == "security_change":
2171
+ _seed_expansion = _seed_expansion | frozenset({"security", "spring_config", "config"})
2172
+ if _hop_num >= 2 or _dep_atype not in _seed_expansion:
2173
+ continue
2174
+
2055
2175
  _dep_score_base = _ARTIFACT_SCORE.get(_dep_atype, 0.45)
2056
2176
  # score decays 30% per hop so transitives rank below direct dependents
2057
- _dep_score = round(_dep_score_base * (0.70 ** _hop_num), 2)
2177
+ # cross-module deps get additional 40% penalty so same-module files
2178
+ # always rank higher in the per-hop cap
2179
+ _cross_module_factor = 0.60 if _is_cross_module else 1.0
2180
+ _dep_score = round(_dep_score_base * (0.70 ** _hop_num) * _cross_module_factor, 2)
2058
2181
  _dep_role = _role_in_system(_dep_path, _dep_atype, _dep_path in ep_paths)
2059
2182
 
2060
2183
  _why_str = (
@@ -2069,27 +2192,44 @@ class TaskContextBuilder:
2069
2192
  f" ({_seed_atype}) | score: {_dep_score:.2f}"
2070
2193
  )
2071
2194
  why[_dep_path] = _why_str
2072
- # Tests are consumers, not structural dependencies — exclude from import graph.
2073
- # They remain in relevant_files but must not seed further BFS hops.
2195
+ # Tests import production code but are not structural dependencies —
2196
+ # exclude from graph, frontier, and bfs_collected entirely.
2074
2197
  _is_test = _dep_atype == "test"
2075
2198
  if not _is_test:
2076
2199
  graph_edges.append({
2077
2200
  "from": _seed_path, "to": _dep_path,
2078
2201
  "edge_type": "import_dependency", "hop": _hop_num,
2079
2202
  })
2080
- _hop_scored.append((_dep_score, _dep_path))
2081
- _bfs_collected.append((_hop_num, _dep_score, _dep_path, RelevantFile(
2082
- path=_dep_path, role=_dep_role, score=_dep_score,
2083
- reason=_reason, why=_why_str,
2084
- )))
2203
+ # field_change seeds don't propagate to hop-2+ frontier:
2204
+ # a field-level change (getter, attribute) is collected at hop-1
2205
+ # but its callers are not recursively expanded further
2206
+ if _seed_severity != "field_change":
2207
+ _hop_scored.append((_dep_score, _dep_path))
2208
+ _hop_bfs_staged.append((_hop_num, _dep_score, _dep_path, RelevantFile(
2209
+ path=_dep_path, role=_dep_role, score=_dep_score,
2210
+ reason=_reason, why=_why_str,
2211
+ )))
2212
+
2213
+ # Per-hop cap: keep only the top-_max_results by score before merging.
2214
+ # Prevents a single high-fanout seed (e.g. User.java imported by every
2215
+ # controller) from flooding _bfs_collected and pushing out hop-2/3 results.
2216
+ _hop_bfs_staged.sort(key=lambda x: (-x[1], x[2]))
2217
+ _bfs_collected.extend(_hop_bfs_staged[:_max_results])
2085
2218
 
2086
2219
  # next frontier = top-N files by score from this hop
2087
2220
  _hop_scored.sort(key=lambda x: -x[0])
2088
2221
  _bfs_frontier = [p for _, p in _hop_scored[:_max_results]]
2089
2222
 
2090
- # merge into relevant: closer hops first, then higher score; cap total at 20
2223
+ # merge into relevant: closer hops first, then higher score; cap total at 18
2091
2224
  _bfs_collected.sort(key=lambda x: (x[0], -x[1], x[2]))
2092
- relevant.extend(rf for _, _, _, rf in _bfs_collected[:20])
2225
+ _bfs_cap = sum(budget[0] for budget in _BFS_HOP_BUDGET) # 8+6+4 = 18
2226
+ relevant.extend(rf for _, _, _, rf in _bfs_collected[:_bfs_cap])
2227
+
2228
+ # Truncation guard: flag excess expansion — gap message added in Step 6.
2229
+ _EXPANSION_HARD_LIMIT = 40
2230
+ _expansion_truncated = len(relevant) > _EXPANSION_HARD_LIMIT
2231
+ if _expansion_truncated:
2232
+ relevant = relevant[:_EXPANSION_HARD_LIMIT]
2093
2233
 
2094
2234
  # ── Step 3d: per-file impact scores, change_type, system_impact ─────────
2095
2235
  # Downstream fanout: count graph edges originating from each changed file
@@ -2263,6 +2403,11 @@ class TaskContextBuilder:
2263
2403
  analysis_gaps: list[str] = [
2264
2404
  f"Related file expansion: type-aware chain expansion + {_bfs_note} + module/directory heuristics",
2265
2405
  ]
2406
+ if _expansion_truncated:
2407
+ analysis_gaps.insert(0,
2408
+ f"truncated_dependency_graph: expansion exceeded {_EXPANSION_HARD_LIMIT} nodes"
2409
+ " — lower-priority files omitted. Narrow scope with --since <ref> for precision."
2410
+ )
2266
2411
  if noise_count > 0 and meaningful > 0:
2267
2412
  analysis_gaps.append(
2268
2413
  f"{noise_count} IDE/tooling file(s) in diff excluded from impact analysis"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sourcecode
3
- Version: 1.30.0
3
+ Version: 1.30.2
4
4
  Summary: Deterministic codebase context for AI coding agents
5
5
  License: Apache License
6
6
  Version 2.0, January 2004
@@ -221,7 +221,7 @@ Description-Content-Type: text/markdown
221
221
 
222
222
  **Compressed AI-ready context for Java/Spring enterprise codebases.**
223
223
 
224
- ![Version](https://img.shields.io/badge/version-1.30.0-blue)
224
+ ![Version](https://img.shields.io/badge/version-1.30.2-blue)
225
225
  ![Python](https://img.shields.io/badge/python-3.10%2B-green)
226
226
 
227
227
  ---
@@ -255,7 +255,7 @@ pipx install sourcecode
255
255
 
256
256
  ```bash
257
257
  sourcecode version
258
- # sourcecode 1.30.0
258
+ # sourcecode 1.30.2
259
259
  ```
260
260
 
261
261
  ---
@@ -1,10 +1,10 @@
1
- sourcecode/__init__.py,sha256=Bqhw95H9r5IFRlnJFDRt1uCsK_ahVHjggAAWdJ3d-5c,103
1
+ sourcecode/__init__.py,sha256=ERxetwuKJX_1UzzbbdymfXL8AXwRFp03HJG6sY-iJO4,103
2
2
  sourcecode/adaptive_scanner.py,sha256=RTNExwWPXzjgLaRueT7UuxkPj5ZEToWjGbx1j0LSZ9E,10250
3
3
  sourcecode/architecture_analyzer.py,sha256=MyBa0Hf5HmkudZQDLKrjcWDKETXETXl0mQX1swtTwAA,39091
4
4
  sourcecode/architecture_summary.py,sha256=z34_6v7cSwy98cof2UVciGho7SCrZ93tiqMmq5WNzRQ,20405
5
5
  sourcecode/ast_extractor.py,sha256=XgrZg2DcWcUm9r87cRG3KGO7IK2TIL_N-CvhSbUmmh4,49901
6
6
  sourcecode/classifier.py,sha256=pYve2J1LqtYssU3lYLMDz18PT-CjN5c18QYE7R_IG1Q,7507
7
- sourcecode/cli.py,sha256=iWzo7u-wmWjj0GYAF54UpbicfpXt2OUxPRy44h2VaCI,80646
7
+ sourcecode/cli.py,sha256=1qVMsC2swT-OtCK6XziIM0J4xKp8kcRhUzfOaHr7vRU,80743
8
8
  sourcecode/code_notes_analyzer.py,sha256=y1MJBnPZHYp4i6cQCXUb9ATIyifS_qMQWjw_8lPkpsU,9215
9
9
  sourcecode/confidence_analyzer.py,sha256=xw_Jv8pAd0wd8t2vvQlorw8Ih0rSF3YCoFS8K-_4aXg,15762
10
10
  sourcecode/context_scorer.py,sha256=QpChSpsmaAYz91rXA4Ue5xzQmNz_ZboZN09YOHScq1U,14679
@@ -17,10 +17,11 @@ sourcecode/doc_analyzer.py,sha256=afA4uJFwXZ_uR2l4J0pQwbeTkRkGmKdN9KhRVYePBUw,24
17
17
  sourcecode/entrypoint_classifier.py,sha256=gvKgl0f5T8ol1r4JMmkeqGHuZTfZJiOwFOWdc7EYwYw,4061
18
18
  sourcecode/env_analyzer.py,sha256=GxCidahAAIptTdDFIlVB6URd4HBnBlIX_SqUov3MBRQ,22076
19
19
  sourcecode/file_classifier.py,sha256=48ly5Z6exkzBy8lNy1AkdP4-oJqIA1zT3LZfffuTyDo,11572
20
+ sourcecode/flow_analyzer.py,sha256=VQDrItg3NBqOOD8PxHXyntXQnPweUuUn6JtOY8lNWys,12841
20
21
  sourcecode/git_analyzer.py,sha256=_pCg2V4d2aa17k9hayTzpexAj8syvyk4y9NYNvvgOAI,12802
21
22
  sourcecode/graph_analyzer.py,sha256=iUK-7pSV-cvGqqD2hENdYmhnm0wcXFEyK-xnu5ul8OU,62515
22
23
  sourcecode/metrics_analyzer.py,sha256=m0ENgtqKeBL17kUIK3fmGkgo7UfXBNHxCMj0H_Y5K7c,22750
23
- sourcecode/prepare_context.py,sha256=UxAwXHLZC61WFmYWwp-LWRUXnH6CbaX_lsyn6W7ok4o,121062
24
+ sourcecode/prepare_context.py,sha256=ELrCIIcttip4B3y9aQZdMPqIgzaEJR0evDdG8QYTBLc,129623
24
25
  sourcecode/progress.py,sha256=qn30sWaHOkjTgXsSBmiPkz7Rsbwc5oSlIe6JNEMYp_k,3149
25
26
  sourcecode/ranking_engine.py,sha256=virVglafZufioHpZpwktjMvUiL0TZELWQCQnQNV8dFo,9360
26
27
  sourcecode/redactor.py,sha256=xuGcadGEHaPw4qZXlMDvzMCsr4VOkdp3oBQptHyJk8c,2884
@@ -61,8 +62,8 @@ sourcecode/telemetry/consent.py,sha256=wLMvGNJeSSyZoNkQXpoUioY6mMv4Qdvuw7S9jAEWn
61
62
  sourcecode/telemetry/events.py,sha256=oEvvulfsv5GIDWG2174gSS6tNB95w38AIYiYeifGKlE,2294
62
63
  sourcecode/telemetry/filters.py,sha256=Asa71oRl7q3Wt_FMwuufIZJFzSYdgRNKS8LHCIyFeYE,4805
63
64
  sourcecode/telemetry/transport.py,sha256=KJeIPCPWMdmbCP3ySGs2iUlia34U6vWne2dZsUezesw,1560
64
- sourcecode-1.30.0.dist-info/METADATA,sha256=RDWe-iF73ttF7ZeXsUoUp2kQQUGB8lnxCWHI7dZQroM,23417
65
- sourcecode-1.30.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
66
- sourcecode-1.30.0.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
67
- sourcecode-1.30.0.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
68
- sourcecode-1.30.0.dist-info/RECORD,,
65
+ sourcecode-1.30.2.dist-info/METADATA,sha256=3bLQsn6BmYa9Rum0jjejw2627bPdOMaYxbqI2XMyOLY,23417
66
+ sourcecode-1.30.2.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
67
+ sourcecode-1.30.2.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
68
+ sourcecode-1.30.2.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
69
+ sourcecode-1.30.2.dist-info/RECORD,,