sourcecode 1.30.0__py3-none-any.whl → 1.30.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sourcecode/__init__.py +1 -1
- sourcecode/cli.py +2 -0
- sourcecode/flow_analyzer.py +310 -0
- sourcecode/prepare_context.py +155 -10
- {sourcecode-1.30.0.dist-info → sourcecode-1.30.2.dist-info}/METADATA +3 -3
- {sourcecode-1.30.0.dist-info → sourcecode-1.30.2.dist-info}/RECORD +9 -8
- {sourcecode-1.30.0.dist-info → sourcecode-1.30.2.dist-info}/WHEEL +0 -0
- {sourcecode-1.30.0.dist-info → sourcecode-1.30.2.dist-info}/entry_points.txt +0 -0
- {sourcecode-1.30.0.dist-info → sourcecode-1.30.2.dist-info}/licenses/LICENSE +0 -0
sourcecode/__init__.py
CHANGED
sourcecode/cli.py
CHANGED
|
@@ -1866,6 +1866,8 @@ def prepare_context_cmd(
|
|
|
1866
1866
|
out["review_hotspots"] = output.review_hotspots
|
|
1867
1867
|
if output.suggested_review_order:
|
|
1868
1868
|
out["suggested_review_order"] = output.suggested_review_order
|
|
1869
|
+
if output.execution_paths:
|
|
1870
|
+
out["execution_paths"] = output.execution_paths
|
|
1869
1871
|
if output.impact_summary:
|
|
1870
1872
|
out["impact_summary"] = output.impact_summary
|
|
1871
1873
|
if output.why_these_files:
|
|
@@ -0,0 +1,310 @@
|
|
|
1
|
+
"""flow_analyzer.py — Evidence-based execution path extraction for PR context.
|
|
2
|
+
|
|
3
|
+
Builds Entry → Service → Repository → EndState ordered sequences using ONLY
|
|
4
|
+
direct code evidence: field injection, constructor params, type annotations,
|
|
5
|
+
method calls, explicit instantiation.
|
|
6
|
+
|
|
7
|
+
V3: execution_paths with runtime_notes — conditional branches, optional execution,
|
|
8
|
+
and async side-effects are surfaced when explicit code signals exist.
|
|
9
|
+
No inference, no naming, no invented behavior.
|
|
10
|
+
"""
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import re
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import Callable, Optional
|
|
16
|
+
|
|
17
|
+
_ENTRY_ARTIFACT_TYPES = frozenset({"controller", "entrypoint"})
|
|
18
|
+
_SERVICE_ARTIFACT_TYPES = frozenset({"service"})
|
|
19
|
+
_REPO_ARTIFACT_TYPES = frozenset({"repository", "mapper"})
|
|
20
|
+
|
|
21
|
+
_DB_KEYWORDS = frozenset({"repository", "dao", "mapper", "store", "jpa", "jdbc", "sql"})
|
|
22
|
+
_EVENT_KEYWORDS = frozenset({"event", "publish", "emit", "kafka", "queue", "rabbit", "sns", "bus"})
|
|
23
|
+
|
|
24
|
+
_HTTP_ENTRY_RE = re.compile(
|
|
25
|
+
r'@(?:Get|Post|Put|Delete|Patch|Request)Mapping[^)]*\)'
|
|
26
|
+
r'|@(?:Get|Post|Put|Delete|Patch)\([^)]*\)'
|
|
27
|
+
r'|@\w+\.(?:get|post|put|delete|patch)\([^)]*\)',
|
|
28
|
+
re.IGNORECASE,
|
|
29
|
+
)
|
|
30
|
+
_METHOD_NAME_RE = re.compile(
|
|
31
|
+
r'(?:public\s+|async\s+|def\s+|function\s+)*'
|
|
32
|
+
r'(?:[\w<>\[\]]+\s+)?'
|
|
33
|
+
r'(\w+)\s*\(',
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
# Runtime signal patterns: (compiled_regex, note_text)
|
|
37
|
+
# Only signals with explicit code evidence — no inference.
|
|
38
|
+
# Three categories: condition | branch | async
|
|
39
|
+
_RUNTIME_SIGNALS: list[tuple[re.Pattern, str]] = [
|
|
40
|
+
# ── Conditional / auth guards ─────────────────────────────────────────────
|
|
41
|
+
(re.compile(r'@PreAuthorize|@Secured|@RolesAllowed', re.IGNORECASE),
|
|
42
|
+
"condition: authorization check present (@PreAuthorize / @Secured)"),
|
|
43
|
+
(re.compile(r'isAuthenticated\(\)|hasRole\(|hasAuthority\(|SecurityContextHolder', re.IGNORECASE),
|
|
44
|
+
"condition: reads authentication context"),
|
|
45
|
+
(re.compile(r'featureFlag|FeatureToggle|\.isEnabled\s*\(|\.isActive\s*\(', re.IGNORECASE),
|
|
46
|
+
"condition: feature flag gates execution"),
|
|
47
|
+
# Null/empty guard with early return — matches if (...null/empty...) return/throw on same line
|
|
48
|
+
(re.compile(r'if\s*\([^)]*(?:==\s*null|!=\s*null|isEmpty\s*\(\)|isBlank\s*\(\))[^)]*\)'
|
|
49
|
+
r'\s*(?:\{?\s*)?(?:return|throw)\b', re.IGNORECASE),
|
|
50
|
+
"condition: null/empty guard with early return"),
|
|
51
|
+
|
|
52
|
+
# ── Optional execution / branching ────────────────────────────────────────
|
|
53
|
+
(re.compile(r'@Cacheable|@CacheEvict|@CachePut', re.IGNORECASE),
|
|
54
|
+
"branch: Spring cache may short-circuit downstream call"),
|
|
55
|
+
(re.compile(r'\.getIfPresent\s*\(|cache\.get\s*\(|cacheManager\.', re.IGNORECASE),
|
|
56
|
+
"branch: manual cache lookup may short-circuit"),
|
|
57
|
+
(re.compile(r'Optional\s*<|\.orElseThrow\s*\(|\.orElseGet\s*\(|\.orElse\s*\(', re.IGNORECASE),
|
|
58
|
+
"branch: result may be absent (Optional)"),
|
|
59
|
+
|
|
60
|
+
# ── Async / side effects ──────────────────────────────────────────────────
|
|
61
|
+
(re.compile(r'@Async\b'),
|
|
62
|
+
"async: runs in separate thread (@Async)"),
|
|
63
|
+
(re.compile(r'CompletableFuture|\.supplyAsync\s*\(|\.runAsync\s*\('),
|
|
64
|
+
"async: non-blocking future-based execution"),
|
|
65
|
+
(re.compile(r'\basync\s+def\b|\bawait\b', re.IGNORECASE),
|
|
66
|
+
"async: non-blocking (async/await)"),
|
|
67
|
+
(re.compile(r'publishEvent\s*\(|applicationEventPublisher|eventPublisher\.', re.IGNORECASE),
|
|
68
|
+
"async: Spring application event emitted"),
|
|
69
|
+
(re.compile(r'kafkaTemplate\.|KafkaProducer|@KafkaListener', re.IGNORECASE),
|
|
70
|
+
"async: Kafka message produced"),
|
|
71
|
+
(re.compile(r'rabbitTemplate\.|amqpTemplate\.|@RabbitListener', re.IGNORECASE),
|
|
72
|
+
"async: RabbitMQ message sent"),
|
|
73
|
+
]
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _detect_lang(path: str) -> str:
|
|
77
|
+
return {
|
|
78
|
+
".java": "java", ".kt": "kotlin",
|
|
79
|
+
".py": "python",
|
|
80
|
+
".ts": "typescript", ".tsx": "typescript",
|
|
81
|
+
".js": "javascript", ".jsx": "javascript",
|
|
82
|
+
".go": "go", ".cs": "csharp", ".rb": "ruby", ".php": "php",
|
|
83
|
+
}.get(Path(path).suffix.lower(), "unknown")
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _strip_comments(content: str, lang: str) -> str:
|
|
87
|
+
content = re.sub(r"/\*.*?\*/", " ", content, flags=re.DOTALL)
|
|
88
|
+
content = re.sub(r"//[^\n]*", " ", content)
|
|
89
|
+
if lang in ("python", "ruby", "go"):
|
|
90
|
+
content = re.sub(r"#[^\n]*", " ", content)
|
|
91
|
+
return content
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _read_safe(root: Path, rel_path: str) -> str:
|
|
95
|
+
try:
|
|
96
|
+
return (root / rel_path).read_text(encoding="utf-8", errors="ignore")
|
|
97
|
+
except (OSError, ValueError):
|
|
98
|
+
return ""
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def _collect_runtime_notes(content: str, lang: str) -> list[str]:
|
|
102
|
+
"""Scan comment-stripped content for explicit runtime behavior signals.
|
|
103
|
+
|
|
104
|
+
Returns only notes backed by a direct code pattern match.
|
|
105
|
+
Returns [] when no signals are found.
|
|
106
|
+
"""
|
|
107
|
+
clean = _strip_comments(content, lang)
|
|
108
|
+
notes: list[str] = []
|
|
109
|
+
seen: set[str] = set()
|
|
110
|
+
for pattern, note in _RUNTIME_SIGNALS:
|
|
111
|
+
if note not in seen and pattern.search(clean):
|
|
112
|
+
notes.append(note)
|
|
113
|
+
seen.add(note)
|
|
114
|
+
return notes
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _find_entry_method(clean: str) -> Optional[str]:
|
|
118
|
+
m = _HTTP_ENTRY_RE.search(clean)
|
|
119
|
+
if not m:
|
|
120
|
+
return None
|
|
121
|
+
after = clean[m.end():]
|
|
122
|
+
mn = _METHOD_NAME_RE.match(after.lstrip())
|
|
123
|
+
if mn:
|
|
124
|
+
name = mn.group(1)
|
|
125
|
+
if name.lower() not in ("public", "async", "def", "function", "void", "override"):
|
|
126
|
+
return name
|
|
127
|
+
return None
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def _build_field_map(clean: str) -> dict[str, str]:
|
|
131
|
+
"""Map field_name_lower → ClassName from injection patterns."""
|
|
132
|
+
fmap: dict[str, str] = {}
|
|
133
|
+
for m in re.finditer(r"private\s+(\w+)(?:<[^>]+>)?\s+(\w+)\s*[;=,)]", clean):
|
|
134
|
+
fmap[m.group(2).lower()] = m.group(1)
|
|
135
|
+
for m in re.finditer(r"(?:private|protected|readonly)\s+(\w+)\s*:\s*(\w+)", clean):
|
|
136
|
+
fmap[m.group(1).lower()] = m.group(2)
|
|
137
|
+
for m in re.finditer(r"self\.(\w+)\s*=\s*(\w+)\s*\(", clean):
|
|
138
|
+
fmap[m.group(1).lower()] = m.group(2)
|
|
139
|
+
return fmap
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def _find_called_method(clean: str, class_name: str, fmap: dict[str, str]) -> Optional[str]:
|
|
143
|
+
fields = [f for f, t in fmap.items() if t.lower() == class_name.lower()]
|
|
144
|
+
for field in fields:
|
|
145
|
+
pat = rf"\bthis\.{re.escape(field)}\.(\w+)\s*\(|\b{re.escape(field)}\.(\w+)\s*\("
|
|
146
|
+
for m in re.finditer(pat, clean, re.IGNORECASE):
|
|
147
|
+
name = m.group(1) or m.group(2)
|
|
148
|
+
if name and name.lower() not in ("class", "new", "super", "get", "set"):
|
|
149
|
+
return name
|
|
150
|
+
for m in re.finditer(rf"\b{re.escape(class_name)}\.(\w+)\s*\(", clean, re.IGNORECASE):
|
|
151
|
+
name = m.group(1)
|
|
152
|
+
if name.lower() not in ("class", "new", "super"):
|
|
153
|
+
return name
|
|
154
|
+
return None
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def _has_code_evidence(clean: str, class_name: str) -> bool:
|
|
158
|
+
"""True only when class_name has direct code evidence in pre-stripped content."""
|
|
159
|
+
esc = re.escape(class_name)
|
|
160
|
+
if re.search(rf"\b(?:private|protected)\s+{esc}\b", clean, re.IGNORECASE):
|
|
161
|
+
return True
|
|
162
|
+
if re.search(rf"[,(]\s*{esc}\s+\w+", clean, re.IGNORECASE):
|
|
163
|
+
return True
|
|
164
|
+
if re.search(rf":\s*{esc}\b", clean, re.IGNORECASE):
|
|
165
|
+
return True
|
|
166
|
+
if re.search(rf"\bnew\s+{esc}\s*\(", clean, re.IGNORECASE):
|
|
167
|
+
return True
|
|
168
|
+
if re.search(rf"\b{esc}\s*\(", clean):
|
|
169
|
+
return True
|
|
170
|
+
if re.search(rf"\b{esc}\b", clean, re.IGNORECASE):
|
|
171
|
+
non_import = re.search(
|
|
172
|
+
rf"^(?!\s*(?:import|require|from|//|#|\*)\b).*\b{esc}\b",
|
|
173
|
+
clean, re.IGNORECASE | re.MULTILINE,
|
|
174
|
+
)
|
|
175
|
+
if non_import:
|
|
176
|
+
return True
|
|
177
|
+
return False
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def _find_evidenced_ordered(
|
|
181
|
+
root: Path,
|
|
182
|
+
source_path: str,
|
|
183
|
+
candidates: list[str],
|
|
184
|
+
) -> list[tuple[str, Optional[str]]]:
|
|
185
|
+
"""Return (class_name, method_or_None) for candidates with direct code evidence,
|
|
186
|
+
ordered by their first appearance position in the source file."""
|
|
187
|
+
content = _read_safe(root, source_path)
|
|
188
|
+
if not content:
|
|
189
|
+
return []
|
|
190
|
+
lang = _detect_lang(source_path)
|
|
191
|
+
clean = _strip_comments(content, lang)
|
|
192
|
+
fmap = _build_field_map(clean)
|
|
193
|
+
|
|
194
|
+
positioned: list[tuple[int, str, Optional[str]]] = []
|
|
195
|
+
for cand_path in candidates:
|
|
196
|
+
class_name = Path(cand_path).stem
|
|
197
|
+
if not _has_code_evidence(clean, class_name):
|
|
198
|
+
continue
|
|
199
|
+
method = _find_called_method(clean, class_name, fmap)
|
|
200
|
+
m = re.search(rf"\b{re.escape(class_name)}\b", clean, re.IGNORECASE)
|
|
201
|
+
pos = m.start() if m else len(clean)
|
|
202
|
+
positioned.append((pos, class_name, method))
|
|
203
|
+
|
|
204
|
+
positioned.sort(key=lambda x: x[0])
|
|
205
|
+
return [(cls, meth) for _, cls, meth in positioned]
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def _detect_end_state(path: list[str]) -> str:
|
|
209
|
+
for step in path:
|
|
210
|
+
s = step.lower()
|
|
211
|
+
if any(kw in s for kw in _DB_KEYWORDS):
|
|
212
|
+
return "DB write"
|
|
213
|
+
if any(kw in s for kw in _EVENT_KEYWORDS):
|
|
214
|
+
return "event emitted"
|
|
215
|
+
return "HTTP response"
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def _step_label(class_name: str, method: Optional[str]) -> str:
|
|
219
|
+
return f"{class_name}.{method}" if method else class_name
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def _path_name(entry_class: str) -> str:
|
|
223
|
+
domain = re.sub(
|
|
224
|
+
r"(?:RestController|Controller|Resource|Handler|Api|Endpoint|Router|Servlet)$",
|
|
225
|
+
"", entry_class, flags=re.IGNORECASE,
|
|
226
|
+
)
|
|
227
|
+
return re.sub(r"(?<=[a-z])(?=[A-Z])", " ", domain).strip()
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def analyze_execution_paths(
|
|
231
|
+
changed_files: list[str],
|
|
232
|
+
all_paths: list[str],
|
|
233
|
+
root: Path,
|
|
234
|
+
classify_fn: Callable[[str], dict],
|
|
235
|
+
max_paths: int = 3,
|
|
236
|
+
) -> list[dict]:
|
|
237
|
+
"""Build ordered execution paths with runtime behavior signals.
|
|
238
|
+
|
|
239
|
+
Each path:
|
|
240
|
+
- One service per entry point (most evident, earliest-referenced)
|
|
241
|
+
- Each step requires direct code evidence
|
|
242
|
+
- runtime_notes populated from explicit code signals only (never inferred)
|
|
243
|
+
- Forward-only: Controller → Service → Repository
|
|
244
|
+
|
|
245
|
+
Returns list of: {name, entry_point, path, runtime_notes, end_state}
|
|
246
|
+
Returns [] when no verifiable path exists.
|
|
247
|
+
"""
|
|
248
|
+
entry_files = [
|
|
249
|
+
f for f in changed_files
|
|
250
|
+
if classify_fn(f)["artifact_type"] in _ENTRY_ARTIFACT_TYPES
|
|
251
|
+
]
|
|
252
|
+
if not entry_files:
|
|
253
|
+
return []
|
|
254
|
+
|
|
255
|
+
all_services = [p for p in all_paths if classify_fn(p)["artifact_type"] in _SERVICE_ARTIFACT_TYPES]
|
|
256
|
+
all_repos = [p for p in all_paths if classify_fn(p)["artifact_type"] in _REPO_ARTIFACT_TYPES]
|
|
257
|
+
|
|
258
|
+
result: list[dict] = []
|
|
259
|
+
|
|
260
|
+
for entry_path in entry_files[:max_paths]:
|
|
261
|
+
entry_class = Path(entry_path).stem
|
|
262
|
+
lang = _detect_lang(entry_path)
|
|
263
|
+
|
|
264
|
+
entry_content = _read_safe(root, entry_path)
|
|
265
|
+
entry_clean = _strip_comments(entry_content, lang) if entry_content else ""
|
|
266
|
+
entry_method = _find_entry_method(entry_clean) if entry_clean else None
|
|
267
|
+
entry_point_str = _step_label(entry_class, entry_method)
|
|
268
|
+
|
|
269
|
+
evidenced_svcs = _find_evidenced_ordered(root, entry_path, all_services)
|
|
270
|
+
if not evidenced_svcs:
|
|
271
|
+
continue
|
|
272
|
+
|
|
273
|
+
svc_class, svc_method = evidenced_svcs[0]
|
|
274
|
+
svc_label = _step_label(svc_class, svc_method)
|
|
275
|
+
|
|
276
|
+
svc_path = next((p for p in all_services if Path(p).stem == svc_class), None)
|
|
277
|
+
svc_content = _read_safe(root, svc_path) if svc_path else ""
|
|
278
|
+
svc_lang = _detect_lang(svc_path) if svc_path else "unknown"
|
|
279
|
+
|
|
280
|
+
# Service step — notes scoped to service file only
|
|
281
|
+
path_items: list[dict] = [
|
|
282
|
+
{"step": svc_label,
|
|
283
|
+
"notes": _collect_runtime_notes(svc_content, svc_lang) if svc_content else []},
|
|
284
|
+
]
|
|
285
|
+
|
|
286
|
+
# Repository step — notes scoped to repo file only
|
|
287
|
+
if svc_path:
|
|
288
|
+
evidenced_repos = _find_evidenced_ordered(root, svc_path, all_repos)
|
|
289
|
+
if evidenced_repos:
|
|
290
|
+
repo_class, repo_method = evidenced_repos[0]
|
|
291
|
+
repo_label = _step_label(repo_class, repo_method)
|
|
292
|
+
repo_path = next((p for p in all_repos if Path(p).stem == repo_class), None)
|
|
293
|
+
repo_content = _read_safe(root, repo_path) if repo_path else ""
|
|
294
|
+
repo_lang = _detect_lang(repo_path) if repo_path else "unknown"
|
|
295
|
+
path_items.append(
|
|
296
|
+
{"step": repo_label,
|
|
297
|
+
"notes": _collect_runtime_notes(repo_content, repo_lang) if repo_content else []},
|
|
298
|
+
)
|
|
299
|
+
|
|
300
|
+
# Entry-point notes scoped to controller file
|
|
301
|
+
entry_notes = _collect_runtime_notes(entry_content, lang) if entry_content else []
|
|
302
|
+
|
|
303
|
+
result.append({
|
|
304
|
+
"name": _path_name(entry_class),
|
|
305
|
+
"entry_point": {"step": entry_point_str, "notes": entry_notes},
|
|
306
|
+
"path": path_items,
|
|
307
|
+
"end_state": _detect_end_state([item["step"] for item in path_items]),
|
|
308
|
+
})
|
|
309
|
+
|
|
310
|
+
return result
|
sourcecode/prepare_context.py
CHANGED
|
@@ -351,6 +351,7 @@ class TaskOutput:
|
|
|
351
351
|
test_coverage_risk: dict = field(default_factory=dict)
|
|
352
352
|
review_hotspots: list[str] = field(default_factory=list)
|
|
353
353
|
suggested_review_order: list[str] = field(default_factory=list)
|
|
354
|
+
execution_paths: list[dict] = field(default_factory=list)
|
|
354
355
|
|
|
355
356
|
|
|
356
357
|
# ─────────────────────────────────────────────────────────────────────────────
|
|
@@ -874,6 +875,17 @@ class TaskContextBuilder:
|
|
|
874
875
|
_pr_suggested_review_order.append(_f)
|
|
875
876
|
_seen_order.add(_f)
|
|
876
877
|
|
|
878
|
+
# ── 6d. review-pr: execution paths ──────────────────────────────────
|
|
879
|
+
_execution_paths: list[dict] = []
|
|
880
|
+
if task_name == "review-pr" and _delta_files:
|
|
881
|
+
from sourcecode.flow_analyzer import analyze_execution_paths
|
|
882
|
+
_execution_paths = analyze_execution_paths(
|
|
883
|
+
changed_files=sorted(_delta_files),
|
|
884
|
+
all_paths=all_paths,
|
|
885
|
+
root=self.root,
|
|
886
|
+
classify_fn=self._classify_changed_file,
|
|
887
|
+
)
|
|
888
|
+
|
|
877
889
|
# ── 6c. Symptom keyword boost + related notes (fix-bug + --symptom) ──
|
|
878
890
|
symptom_keywords: list[str] = []
|
|
879
891
|
related_notes: list[dict] = []
|
|
@@ -1104,6 +1116,7 @@ class TaskContextBuilder:
|
|
|
1104
1116
|
test_coverage_risk=_pr_test_coverage_risk,
|
|
1105
1117
|
review_hotspots=_pr_review_hotspots,
|
|
1106
1118
|
suggested_review_order=_pr_suggested_review_order,
|
|
1119
|
+
execution_paths=_execution_paths,
|
|
1107
1120
|
)
|
|
1108
1121
|
|
|
1109
1122
|
def render_prompt(self, output: TaskOutput) -> str:
|
|
@@ -1605,6 +1618,78 @@ class TaskContextBuilder:
|
|
|
1605
1618
|
# Binaries, images, lock files — treat as noise (closed taxonomy: no unknown_*)
|
|
1606
1619
|
return {"artifact_type": "ide_noise", "risk_areas": [], "impact_level": "noise", "is_noise": True, "module": module, "confidence": "low"}
|
|
1607
1620
|
|
|
1621
|
+
def _classify_diff_severity(self, path: str, since: Optional[str]) -> str:
|
|
1622
|
+
"""Classify the semantic severity of a file's diff to gate BFS expansion.
|
|
1623
|
+
|
|
1624
|
+
Returns: 'trivial' | 'field_change' | 'api_change' | 'security_change' | 'unknown'
|
|
1625
|
+
|
|
1626
|
+
- trivial: only comments/whitespace changed — no BFS expansion seeded
|
|
1627
|
+
- field_change: field/attribute declarations changed — hop-1 only, no hop-2+ frontier
|
|
1628
|
+
- api_change: method signatures or class structure changed — full BFS
|
|
1629
|
+
- security_change: auth/security keywords in changed lines — full BFS + security chain
|
|
1630
|
+
- unknown: diff unreadable — treated as api_change (safe default)
|
|
1631
|
+
"""
|
|
1632
|
+
import subprocess as _subprocess
|
|
1633
|
+
import re as _re
|
|
1634
|
+
|
|
1635
|
+
try:
|
|
1636
|
+
if since:
|
|
1637
|
+
cmd = ["git", "diff", since, "HEAD", "--", path]
|
|
1638
|
+
else:
|
|
1639
|
+
cmd = ["git", "diff", "HEAD", "--", path]
|
|
1640
|
+
result = _subprocess.run(
|
|
1641
|
+
cmd, capture_output=True, text=True, timeout=5,
|
|
1642
|
+
cwd=str(self.root), encoding="utf-8", errors="ignore",
|
|
1643
|
+
)
|
|
1644
|
+
diff_text = result.stdout
|
|
1645
|
+
except Exception:
|
|
1646
|
+
return "unknown"
|
|
1647
|
+
|
|
1648
|
+
if not diff_text.strip():
|
|
1649
|
+
return "unknown"
|
|
1650
|
+
|
|
1651
|
+
changed_lines = [
|
|
1652
|
+
line[1:] for line in diff_text.splitlines()
|
|
1653
|
+
if line.startswith(("+", "-")) and not line.startswith(("+++", "---"))
|
|
1654
|
+
]
|
|
1655
|
+
if not changed_lines:
|
|
1656
|
+
return "trivial"
|
|
1657
|
+
|
|
1658
|
+
suffix = Path(path).suffix.lower()
|
|
1659
|
+
if suffix in (".java", ".kt"):
|
|
1660
|
+
_TRIVIAL = _re.compile(r'^\s*(?://|/\*|\*)')
|
|
1661
|
+
_FIELD = _re.compile(r'^\s*(?:private|protected|public|final|static)\s+\w[\w<>, ]*\s+\w+\s*[;=]')
|
|
1662
|
+
_API = _re.compile(r'^\s*(?:public|protected)\s+\S.*\(')
|
|
1663
|
+
# Exclude 'password', 'role', 'permission' — these are common field names
|
|
1664
|
+
# in domain models and don't indicate auth logic changes. Keep mechanism
|
|
1665
|
+
# keywords: jwt, auth (as class prefix), token, credential, encrypt, decrypt, oauth.
|
|
1666
|
+
_SECURITY = _re.compile(r'\b(?:jwt|auth|token|credential|encrypt|decrypt|oauth|saml|ldap|principal|Security)\b')
|
|
1667
|
+
_STRUCT = _re.compile(r'^\s*(?:class|interface|enum|record|import|package)\s')
|
|
1668
|
+
elif suffix == ".py":
|
|
1669
|
+
_TRIVIAL = _re.compile(r'^\s*#')
|
|
1670
|
+
_FIELD = _re.compile(r'^\s*(?:self\.\w+\s*=|\w+:\s*\w)')
|
|
1671
|
+
_API = _re.compile(r'^\s*def\s+\w')
|
|
1672
|
+
_SECURITY = _re.compile(r'\b(?:jwt|auth|token|credential|encrypt|decrypt|oauth|saml|ldap|principal|security)\b', _re.IGNORECASE)
|
|
1673
|
+
_STRUCT = _re.compile(r'^\s*(?:class|import|from)\s')
|
|
1674
|
+
elif suffix in (".ts", ".tsx", ".js", ".jsx", ".mjs"):
|
|
1675
|
+
_TRIVIAL = _re.compile(r'^\s*(?://|/\*|\*)')
|
|
1676
|
+
_FIELD = _re.compile(r'^\s*(?:private|readonly|public)?\s*\w+[?!]?\s*[=:]')
|
|
1677
|
+
_API = _re.compile(r'^\s*(?:(?:public|private|protected|async|export)\s+)*(?:function\s+\w|\w+\s*\()')
|
|
1678
|
+
_SECURITY = _re.compile(r'\b(?:jwt|auth|token|credential|encrypt|decrypt|oauth|saml|ldap|principal|security)\b', _re.IGNORECASE)
|
|
1679
|
+
_STRUCT = _re.compile(r'^\s*(?:class|interface|import|export\s+(?:class|interface|type))\s')
|
|
1680
|
+
else:
|
|
1681
|
+
return "unknown"
|
|
1682
|
+
|
|
1683
|
+
if any(_SECURITY.search(line) for line in changed_lines):
|
|
1684
|
+
return "security_change"
|
|
1685
|
+
if any(_API.match(line) or _STRUCT.match(line) for line in changed_lines):
|
|
1686
|
+
return "api_change"
|
|
1687
|
+
if any(_FIELD.match(line) for line in changed_lines):
|
|
1688
|
+
return "field_change"
|
|
1689
|
+
if all(_TRIVIAL.match(line) or not line.strip() for line in changed_lines):
|
|
1690
|
+
return "trivial"
|
|
1691
|
+
return "field_change" # safe default: treat unknown non-trivial as field-level
|
|
1692
|
+
|
|
1608
1693
|
def _scan_import_dependents(
|
|
1609
1694
|
self,
|
|
1610
1695
|
changed_paths: list[str],
|
|
@@ -1888,6 +1973,16 @@ class TaskContextBuilder:
|
|
|
1888
1973
|
f: self._classify_changed_file(f) for f in changed_files
|
|
1889
1974
|
}
|
|
1890
1975
|
|
|
1976
|
+
# ── Step 1b: classify diff severity to gate BFS expansion ─────────────
|
|
1977
|
+
# trivial → no BFS seeding (comments/whitespace only)
|
|
1978
|
+
# field_change → hop-1 BFS only, deps excluded from hop-2+ frontier
|
|
1979
|
+
# api_change → full BFS (method signature or class structure changed)
|
|
1980
|
+
# security_change → full BFS + security chain allowed cross-module
|
|
1981
|
+
# unknown → treated as api_change (safe default)
|
|
1982
|
+
diff_severities: dict[str, str] = {
|
|
1983
|
+
f: self._classify_diff_severity(f, since) for f in changed_files
|
|
1984
|
+
}
|
|
1985
|
+
|
|
1891
1986
|
# ── Step 2: build relevant_files from the changed set ─────────────────
|
|
1892
1987
|
relevant: list[RelevantFile] = []
|
|
1893
1988
|
why: dict[str, str] = {}
|
|
@@ -2004,9 +2099,12 @@ class TaskContextBuilder:
|
|
|
2004
2099
|
]
|
|
2005
2100
|
|
|
2006
2101
|
_bfs_seen: set[str] = {rf.path for rf in relevant}
|
|
2102
|
+
# trivial changes (comments/whitespace only) don't seed BFS — nothing structural
|
|
2103
|
+
# to propagate, so excluding them prevents false expansion on cosmetic commits
|
|
2007
2104
|
_bfs_frontier: list[str] = [
|
|
2008
2105
|
f for f in changed_files
|
|
2009
2106
|
if Path(f).suffix.lower() in _BFS_SCANNABLE
|
|
2107
|
+
and diff_severities.get(f, "unknown") != "trivial"
|
|
2010
2108
|
]
|
|
2011
2109
|
|
|
2012
2110
|
# (max results added from this hop, max_candidates scanned per seed)
|
|
@@ -2035,6 +2133,8 @@ class TaskContextBuilder:
|
|
|
2035
2133
|
|
|
2036
2134
|
# collect (score, path) pairs for this hop to build the next frontier
|
|
2037
2135
|
_hop_scored: list[tuple[float, str]] = []
|
|
2136
|
+
# per-hop staging list — capped at _max_results before merging into _bfs_collected
|
|
2137
|
+
_hop_bfs_staged: list[tuple[int, float, str, RelevantFile]] = []
|
|
2038
2138
|
|
|
2039
2139
|
for _seed_path, _dep_paths in _hop_dep_map.items():
|
|
2040
2140
|
_seed_atype = (
|
|
@@ -2042,6 +2142,9 @@ class TaskContextBuilder:
|
|
|
2042
2142
|
if _seed_path in classifications
|
|
2043
2143
|
else self._classify_changed_file(_seed_path)["artifact_type"]
|
|
2044
2144
|
)
|
|
2145
|
+
# diff severity for original changed files only (hop-1 seeds);
|
|
2146
|
+
# hop-2+ seeds are dep files not in diff_severities → "unknown"
|
|
2147
|
+
_seed_severity = diff_severities.get(_seed_path, "unknown")
|
|
2045
2148
|
for _dep_path in _dep_paths:
|
|
2046
2149
|
if _dep_path in _bfs_seen:
|
|
2047
2150
|
continue
|
|
@@ -2052,9 +2155,29 @@ class TaskContextBuilder:
|
|
|
2052
2155
|
continue
|
|
2053
2156
|
|
|
2054
2157
|
_dep_atype = _dep_cls["artifact_type"]
|
|
2158
|
+
_dep_module = _dep_cls["module"]
|
|
2159
|
+
|
|
2160
|
+
# Cross-module gating: if dep lives in a different domain module,
|
|
2161
|
+
# only allow it if:
|
|
2162
|
+
# hop-1 AND dep_atype is explicitly in seed's _EXPANSION_TARGETS
|
|
2163
|
+
# For hop-2+, cross-module deps are always excluded — transitives
|
|
2164
|
+
# must stay within the changed modules to avoid system-wide explosion.
|
|
2165
|
+
_is_cross_module = bool(_dep_module) and _dep_module not in affected_modules_set
|
|
2166
|
+
if _is_cross_module:
|
|
2167
|
+
_seed_expansion = _EXPANSION_TARGETS.get(_seed_atype, frozenset())
|
|
2168
|
+
# security_change seeds are allowed to cross into the security chain
|
|
2169
|
+
# even when their base expansion targets don't include those types
|
|
2170
|
+
if _seed_severity == "security_change":
|
|
2171
|
+
_seed_expansion = _seed_expansion | frozenset({"security", "spring_config", "config"})
|
|
2172
|
+
if _hop_num >= 2 or _dep_atype not in _seed_expansion:
|
|
2173
|
+
continue
|
|
2174
|
+
|
|
2055
2175
|
_dep_score_base = _ARTIFACT_SCORE.get(_dep_atype, 0.45)
|
|
2056
2176
|
# score decays 30% per hop so transitives rank below direct dependents
|
|
2057
|
-
|
|
2177
|
+
# cross-module deps get additional 40% penalty so same-module files
|
|
2178
|
+
# always rank higher in the per-hop cap
|
|
2179
|
+
_cross_module_factor = 0.60 if _is_cross_module else 1.0
|
|
2180
|
+
_dep_score = round(_dep_score_base * (0.70 ** _hop_num) * _cross_module_factor, 2)
|
|
2058
2181
|
_dep_role = _role_in_system(_dep_path, _dep_atype, _dep_path in ep_paths)
|
|
2059
2182
|
|
|
2060
2183
|
_why_str = (
|
|
@@ -2069,27 +2192,44 @@ class TaskContextBuilder:
|
|
|
2069
2192
|
f" ({_seed_atype}) | score: {_dep_score:.2f}"
|
|
2070
2193
|
)
|
|
2071
2194
|
why[_dep_path] = _why_str
|
|
2072
|
-
# Tests are
|
|
2073
|
-
#
|
|
2195
|
+
# Tests import production code but are not structural dependencies —
|
|
2196
|
+
# exclude from graph, frontier, and bfs_collected entirely.
|
|
2074
2197
|
_is_test = _dep_atype == "test"
|
|
2075
2198
|
if not _is_test:
|
|
2076
2199
|
graph_edges.append({
|
|
2077
2200
|
"from": _seed_path, "to": _dep_path,
|
|
2078
2201
|
"edge_type": "import_dependency", "hop": _hop_num,
|
|
2079
2202
|
})
|
|
2080
|
-
|
|
2081
|
-
|
|
2082
|
-
|
|
2083
|
-
|
|
2084
|
-
|
|
2203
|
+
# field_change seeds don't propagate to hop-2+ frontier:
|
|
2204
|
+
# a field-level change (getter, attribute) is collected at hop-1
|
|
2205
|
+
# but its callers are not recursively expanded further
|
|
2206
|
+
if _seed_severity != "field_change":
|
|
2207
|
+
_hop_scored.append((_dep_score, _dep_path))
|
|
2208
|
+
_hop_bfs_staged.append((_hop_num, _dep_score, _dep_path, RelevantFile(
|
|
2209
|
+
path=_dep_path, role=_dep_role, score=_dep_score,
|
|
2210
|
+
reason=_reason, why=_why_str,
|
|
2211
|
+
)))
|
|
2212
|
+
|
|
2213
|
+
# Per-hop cap: keep only the top-_max_results by score before merging.
|
|
2214
|
+
# Prevents a single high-fanout seed (e.g. User.java imported by every
|
|
2215
|
+
# controller) from flooding _bfs_collected and pushing out hop-2/3 results.
|
|
2216
|
+
_hop_bfs_staged.sort(key=lambda x: (-x[1], x[2]))
|
|
2217
|
+
_bfs_collected.extend(_hop_bfs_staged[:_max_results])
|
|
2085
2218
|
|
|
2086
2219
|
# next frontier = top-N files by score from this hop
|
|
2087
2220
|
_hop_scored.sort(key=lambda x: -x[0])
|
|
2088
2221
|
_bfs_frontier = [p for _, p in _hop_scored[:_max_results]]
|
|
2089
2222
|
|
|
2090
|
-
# merge into relevant: closer hops first, then higher score; cap total at
|
|
2223
|
+
# merge into relevant: closer hops first, then higher score; cap total at 18
|
|
2091
2224
|
_bfs_collected.sort(key=lambda x: (x[0], -x[1], x[2]))
|
|
2092
|
-
|
|
2225
|
+
_bfs_cap = sum(budget[0] for budget in _BFS_HOP_BUDGET) # 8+6+4 = 18
|
|
2226
|
+
relevant.extend(rf for _, _, _, rf in _bfs_collected[:_bfs_cap])
|
|
2227
|
+
|
|
2228
|
+
# Truncation guard: flag excess expansion — gap message added in Step 6.
|
|
2229
|
+
_EXPANSION_HARD_LIMIT = 40
|
|
2230
|
+
_expansion_truncated = len(relevant) > _EXPANSION_HARD_LIMIT
|
|
2231
|
+
if _expansion_truncated:
|
|
2232
|
+
relevant = relevant[:_EXPANSION_HARD_LIMIT]
|
|
2093
2233
|
|
|
2094
2234
|
# ── Step 3d: per-file impact scores, change_type, system_impact ─────────
|
|
2095
2235
|
# Downstream fanout: count graph edges originating from each changed file
|
|
@@ -2263,6 +2403,11 @@ class TaskContextBuilder:
|
|
|
2263
2403
|
analysis_gaps: list[str] = [
|
|
2264
2404
|
f"Related file expansion: type-aware chain expansion + {_bfs_note} + module/directory heuristics",
|
|
2265
2405
|
]
|
|
2406
|
+
if _expansion_truncated:
|
|
2407
|
+
analysis_gaps.insert(0,
|
|
2408
|
+
f"truncated_dependency_graph: expansion exceeded {_EXPANSION_HARD_LIMIT} nodes"
|
|
2409
|
+
" — lower-priority files omitted. Narrow scope with --since <ref> for precision."
|
|
2410
|
+
)
|
|
2266
2411
|
if noise_count > 0 and meaningful > 0:
|
|
2267
2412
|
analysis_gaps.append(
|
|
2268
2413
|
f"{noise_count} IDE/tooling file(s) in diff excluded from impact analysis"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sourcecode
|
|
3
|
-
Version: 1.30.
|
|
3
|
+
Version: 1.30.2
|
|
4
4
|
Summary: Deterministic codebase context for AI coding agents
|
|
5
5
|
License: Apache License
|
|
6
6
|
Version 2.0, January 2004
|
|
@@ -221,7 +221,7 @@ Description-Content-Type: text/markdown
|
|
|
221
221
|
|
|
222
222
|
**Compressed AI-ready context for Java/Spring enterprise codebases.**
|
|
223
223
|
|
|
224
|
-

|
|
225
225
|

|
|
226
226
|
|
|
227
227
|
---
|
|
@@ -255,7 +255,7 @@ pipx install sourcecode
|
|
|
255
255
|
|
|
256
256
|
```bash
|
|
257
257
|
sourcecode version
|
|
258
|
-
# sourcecode 1.30.
|
|
258
|
+
# sourcecode 1.30.2
|
|
259
259
|
```
|
|
260
260
|
|
|
261
261
|
---
|
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
sourcecode/__init__.py,sha256=
|
|
1
|
+
sourcecode/__init__.py,sha256=ERxetwuKJX_1UzzbbdymfXL8AXwRFp03HJG6sY-iJO4,103
|
|
2
2
|
sourcecode/adaptive_scanner.py,sha256=RTNExwWPXzjgLaRueT7UuxkPj5ZEToWjGbx1j0LSZ9E,10250
|
|
3
3
|
sourcecode/architecture_analyzer.py,sha256=MyBa0Hf5HmkudZQDLKrjcWDKETXETXl0mQX1swtTwAA,39091
|
|
4
4
|
sourcecode/architecture_summary.py,sha256=z34_6v7cSwy98cof2UVciGho7SCrZ93tiqMmq5WNzRQ,20405
|
|
5
5
|
sourcecode/ast_extractor.py,sha256=XgrZg2DcWcUm9r87cRG3KGO7IK2TIL_N-CvhSbUmmh4,49901
|
|
6
6
|
sourcecode/classifier.py,sha256=pYve2J1LqtYssU3lYLMDz18PT-CjN5c18QYE7R_IG1Q,7507
|
|
7
|
-
sourcecode/cli.py,sha256=
|
|
7
|
+
sourcecode/cli.py,sha256=1qVMsC2swT-OtCK6XziIM0J4xKp8kcRhUzfOaHr7vRU,80743
|
|
8
8
|
sourcecode/code_notes_analyzer.py,sha256=y1MJBnPZHYp4i6cQCXUb9ATIyifS_qMQWjw_8lPkpsU,9215
|
|
9
9
|
sourcecode/confidence_analyzer.py,sha256=xw_Jv8pAd0wd8t2vvQlorw8Ih0rSF3YCoFS8K-_4aXg,15762
|
|
10
10
|
sourcecode/context_scorer.py,sha256=QpChSpsmaAYz91rXA4Ue5xzQmNz_ZboZN09YOHScq1U,14679
|
|
@@ -17,10 +17,11 @@ sourcecode/doc_analyzer.py,sha256=afA4uJFwXZ_uR2l4J0pQwbeTkRkGmKdN9KhRVYePBUw,24
|
|
|
17
17
|
sourcecode/entrypoint_classifier.py,sha256=gvKgl0f5T8ol1r4JMmkeqGHuZTfZJiOwFOWdc7EYwYw,4061
|
|
18
18
|
sourcecode/env_analyzer.py,sha256=GxCidahAAIptTdDFIlVB6URd4HBnBlIX_SqUov3MBRQ,22076
|
|
19
19
|
sourcecode/file_classifier.py,sha256=48ly5Z6exkzBy8lNy1AkdP4-oJqIA1zT3LZfffuTyDo,11572
|
|
20
|
+
sourcecode/flow_analyzer.py,sha256=VQDrItg3NBqOOD8PxHXyntXQnPweUuUn6JtOY8lNWys,12841
|
|
20
21
|
sourcecode/git_analyzer.py,sha256=_pCg2V4d2aa17k9hayTzpexAj8syvyk4y9NYNvvgOAI,12802
|
|
21
22
|
sourcecode/graph_analyzer.py,sha256=iUK-7pSV-cvGqqD2hENdYmhnm0wcXFEyK-xnu5ul8OU,62515
|
|
22
23
|
sourcecode/metrics_analyzer.py,sha256=m0ENgtqKeBL17kUIK3fmGkgo7UfXBNHxCMj0H_Y5K7c,22750
|
|
23
|
-
sourcecode/prepare_context.py,sha256=
|
|
24
|
+
sourcecode/prepare_context.py,sha256=ELrCIIcttip4B3y9aQZdMPqIgzaEJR0evDdG8QYTBLc,129623
|
|
24
25
|
sourcecode/progress.py,sha256=qn30sWaHOkjTgXsSBmiPkz7Rsbwc5oSlIe6JNEMYp_k,3149
|
|
25
26
|
sourcecode/ranking_engine.py,sha256=virVglafZufioHpZpwktjMvUiL0TZELWQCQnQNV8dFo,9360
|
|
26
27
|
sourcecode/redactor.py,sha256=xuGcadGEHaPw4qZXlMDvzMCsr4VOkdp3oBQptHyJk8c,2884
|
|
@@ -61,8 +62,8 @@ sourcecode/telemetry/consent.py,sha256=wLMvGNJeSSyZoNkQXpoUioY6mMv4Qdvuw7S9jAEWn
|
|
|
61
62
|
sourcecode/telemetry/events.py,sha256=oEvvulfsv5GIDWG2174gSS6tNB95w38AIYiYeifGKlE,2294
|
|
62
63
|
sourcecode/telemetry/filters.py,sha256=Asa71oRl7q3Wt_FMwuufIZJFzSYdgRNKS8LHCIyFeYE,4805
|
|
63
64
|
sourcecode/telemetry/transport.py,sha256=KJeIPCPWMdmbCP3ySGs2iUlia34U6vWne2dZsUezesw,1560
|
|
64
|
-
sourcecode-1.30.
|
|
65
|
-
sourcecode-1.30.
|
|
66
|
-
sourcecode-1.30.
|
|
67
|
-
sourcecode-1.30.
|
|
68
|
-
sourcecode-1.30.
|
|
65
|
+
sourcecode-1.30.2.dist-info/METADATA,sha256=3bLQsn6BmYa9Rum0jjejw2627bPdOMaYxbqI2XMyOLY,23417
|
|
66
|
+
sourcecode-1.30.2.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
67
|
+
sourcecode-1.30.2.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
|
|
68
|
+
sourcecode-1.30.2.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
|
|
69
|
+
sourcecode-1.30.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|