sourcecode 1.31.23__py3-none-any.whl → 1.31.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sourcecode/__init__.py +1 -1
- sourcecode/architecture_analyzer.py +68 -1
- sourcecode/cli.py +76 -12
- sourcecode/repository_ir.py +130 -2
- sourcecode/serializer.py +1 -1
- sourcecode/summarizer.py +25 -0
- {sourcecode-1.31.23.dist-info → sourcecode-1.31.24.dist-info}/METADATA +3 -3
- {sourcecode-1.31.23.dist-info → sourcecode-1.31.24.dist-info}/RECORD +11 -11
- {sourcecode-1.31.23.dist-info → sourcecode-1.31.24.dist-info}/WHEEL +0 -0
- {sourcecode-1.31.23.dist-info → sourcecode-1.31.24.dist-info}/entry_points.txt +0 -0
- {sourcecode-1.31.23.dist-info → sourcecode-1.31.24.dist-info}/licenses/LICENSE +0 -0
sourcecode/__init__.py
CHANGED
|
@@ -280,7 +280,7 @@ class ArchitectureAnalyzer:
|
|
|
280
280
|
})
|
|
281
281
|
|
|
282
282
|
# Step 4: bounded context inference
|
|
283
|
-
bounded_contexts = self._infer_bounded_contexts(domains, graph)
|
|
283
|
+
bounded_contexts = self._infer_bounded_contexts(domains, graph, sm.file_paths)
|
|
284
284
|
|
|
285
285
|
# Overall confidence — based on domain quality, not raw count
|
|
286
286
|
confidence: Literal["high", "medium", "low"]
|
|
@@ -703,11 +703,78 @@ class ArchitectureAnalyzer:
|
|
|
703
703
|
]
|
|
704
704
|
return result[:16]
|
|
705
705
|
|
|
706
|
+
@staticmethod
|
|
707
|
+
def _maven_module_bounded_contexts(file_paths: list[str]) -> list[BoundedContext]:
|
|
708
|
+
"""Priority 0: extract bounded contexts from Maven module directory names.
|
|
709
|
+
|
|
710
|
+
Maven multi-module projects have structure: <module>/src/main/java/...
|
|
711
|
+
The module directory name is a strong bounded context signal
|
|
712
|
+
(e.g. broadleaf-order, keycloak-services → order, services).
|
|
713
|
+
Strips common project-name prefixes (longest common prefix across modules).
|
|
714
|
+
Returns empty list when fewer than 2 distinct modules are found.
|
|
715
|
+
"""
|
|
716
|
+
import re as _re
|
|
717
|
+
_MAVEN_SRC = "src/main/java/"
|
|
718
|
+
_MAVEN_TEST = "src/test/java/"
|
|
719
|
+
module_names: dict[str, list[str]] = {} # module_name → [files]
|
|
720
|
+
for p in file_paths:
|
|
721
|
+
norm = p.replace("\\", "/")
|
|
722
|
+
for marker in (_MAVEN_SRC, _MAVEN_TEST):
|
|
723
|
+
idx = norm.find(marker)
|
|
724
|
+
if idx > 0:
|
|
725
|
+
# Everything before the marker is the module path
|
|
726
|
+
module_path = norm[:idx].rstrip("/")
|
|
727
|
+
# Take the last path segment as module name
|
|
728
|
+
module_seg = module_path.split("/")[-1] if "/" in module_path else module_path
|
|
729
|
+
if module_seg:
|
|
730
|
+
module_names.setdefault(module_seg, []).append(p)
|
|
731
|
+
break
|
|
732
|
+
|
|
733
|
+
if len(module_names) < 2:
|
|
734
|
+
return []
|
|
735
|
+
|
|
736
|
+
# Strip common project-name prefix (e.g. "keycloak-", "broadleaf-")
|
|
737
|
+
# by finding longest common prefix across all module names
|
|
738
|
+
all_names = sorted(module_names)
|
|
739
|
+
common = ""
|
|
740
|
+
for i, ch in enumerate(all_names[0]):
|
|
741
|
+
if all(n[i:i+1] == ch for n in all_names[1:]):
|
|
742
|
+
common += ch
|
|
743
|
+
else:
|
|
744
|
+
break
|
|
745
|
+
# Only strip prefix up to last '-' (avoid stripping into meaningful segment)
|
|
746
|
+
prefix_to_strip = common[:common.rfind("-") + 1] if "-" in common else ""
|
|
747
|
+
|
|
748
|
+
_GENERIC_EXTENDED = _GENERIC_NAMES | {
|
|
749
|
+
"api", "impl", "base", "test", "tests", "main", "java",
|
|
750
|
+
"integration", "parent", "bom", "platform",
|
|
751
|
+
}
|
|
752
|
+
bc_list: list[BoundedContext] = []
|
|
753
|
+
for raw_name, files in sorted(module_names.items()):
|
|
754
|
+
clean = raw_name[len(prefix_to_strip):] if prefix_to_strip else raw_name
|
|
755
|
+
# Remove trailing -api, -impl, -core suffixes
|
|
756
|
+
clean = _re.sub(r"-(api|impl|core|base|common|parent|test)$", "", clean)
|
|
757
|
+
if not clean or clean in _GENERIC_EXTENDED:
|
|
758
|
+
continue
|
|
759
|
+
bc_list.append(BoundedContext(
|
|
760
|
+
name=clean,
|
|
761
|
+
modules=files[:20], # cap file list
|
|
762
|
+
confidence="high",
|
|
763
|
+
))
|
|
764
|
+
return bc_list
|
|
765
|
+
|
|
706
766
|
def _infer_bounded_contexts(
|
|
707
767
|
self,
|
|
708
768
|
domains: list[ArchitectureDomain],
|
|
709
769
|
graph: Optional[ModuleGraph],
|
|
770
|
+
file_paths: list[str] | None = None,
|
|
710
771
|
) -> list[BoundedContext]:
|
|
772
|
+
# Priority 0: Maven module names — strong bounded context signal for Java projects
|
|
773
|
+
if file_paths:
|
|
774
|
+
maven_bcs = self._maven_module_bounded_contexts(file_paths)
|
|
775
|
+
if maven_bcs:
|
|
776
|
+
return maven_bcs
|
|
777
|
+
|
|
711
778
|
# Priority 1: use graph SCCs when available
|
|
712
779
|
if graph is not None:
|
|
713
780
|
sccs = self._find_sccs(graph)
|
sourcecode/cli.py
CHANGED
|
@@ -1960,7 +1960,11 @@ def _serialize_relevant_file(f: Any) -> dict:
|
|
|
1960
1960
|
d = {k: v for k, v in _asdict(f).items() if v != "" and v is not None}
|
|
1961
1961
|
reason = d.pop("reason", "") or ""
|
|
1962
1962
|
why = d.pop("why", "") or ""
|
|
1963
|
-
|
|
1963
|
+
# Expose score as a rounded float so agents can rank/filter files deterministically.
|
|
1964
|
+
# Kept as "score" (0.0–1.0 normalized relevance) — higher = more relevant.
|
|
1965
|
+
raw_score = d.pop("score", None)
|
|
1966
|
+
if raw_score is not None:
|
|
1967
|
+
d["score"] = round(float(raw_score), 4)
|
|
1964
1968
|
explanation = _make_explanation(reason, why)
|
|
1965
1969
|
if explanation:
|
|
1966
1970
|
d["explanation"] = explanation
|
|
@@ -2147,6 +2151,26 @@ def prepare_context_cmd(
|
|
|
2147
2151
|
)
|
|
2148
2152
|
raise typer.Exit(code=1)
|
|
2149
2153
|
|
|
2154
|
+
# Validate --format: only "json" and "github-comment" are valid for prepare-context.
|
|
2155
|
+
# "yaml" is intentionally NOT supported here (use main command for yaml output).
|
|
2156
|
+
# Invalid values must error loudly — silently falling through to JSON is a lie.
|
|
2157
|
+
_PC_FORMAT_CHOICES = ("json", "github-comment")
|
|
2158
|
+
if format is not None and format not in _PC_FORMAT_CHOICES:
|
|
2159
|
+
typer.echo(
|
|
2160
|
+
f"Error: invalid value '{format}' for --format. "
|
|
2161
|
+
f"Valid options: {', '.join(_PC_FORMAT_CHOICES)}.",
|
|
2162
|
+
err=True,
|
|
2163
|
+
)
|
|
2164
|
+
raise typer.Exit(code=2)
|
|
2165
|
+
# github-comment only renders for review-pr; warn and normalize for other tasks.
|
|
2166
|
+
if format == "github-comment" and task != "review-pr":
|
|
2167
|
+
typer.echo(
|
|
2168
|
+
f"[warning] --format github-comment is only supported for the review-pr task. "
|
|
2169
|
+
f"Outputting JSON for '{task}'.",
|
|
2170
|
+
err=True,
|
|
2171
|
+
)
|
|
2172
|
+
format = "json"
|
|
2173
|
+
|
|
2150
2174
|
target = path.resolve()
|
|
2151
2175
|
if not target.exists() or not target.is_dir():
|
|
2152
2176
|
typer.echo(f"Error: '{target}' is not a valid directory.", err=True)
|
|
@@ -3169,6 +3193,21 @@ def modernize_cmd(
|
|
|
3169
3193
|
subsystems: list = ir.get("subsystems") or []
|
|
3170
3194
|
reverse_graph: dict = ir.get("reverse_graph") or {}
|
|
3171
3195
|
|
|
3196
|
+
# Git churn: commit frequency per file in last 90 days → proxy for volatility
|
|
3197
|
+
from sourcecode.contract_pipeline import _get_git_churn
|
|
3198
|
+
_java_rel_paths = [
|
|
3199
|
+
str(Path(p).relative_to(root)).replace("\\", "/") if Path(p).is_absolute() else p.replace("\\", "/")
|
|
3200
|
+
for p in file_list
|
|
3201
|
+
]
|
|
3202
|
+
_file_churn: dict[str, int] = _get_git_churn(root, _java_rel_paths)
|
|
3203
|
+
|
|
3204
|
+
# Build fqn → churn mapping via source_file field on graph nodes
|
|
3205
|
+
_fqn_churn: dict[str, int] = {}
|
|
3206
|
+
for _n in graph_nodes:
|
|
3207
|
+
_src = (_n.get("source_file") or "").replace("\\", "/")
|
|
3208
|
+
if _src and _src in _file_churn:
|
|
3209
|
+
_fqn_churn[_n["fqn"]] = _file_churn[_src]
|
|
3210
|
+
|
|
3172
3211
|
# High-coupling nodes: high in_degree (many dependents = risky to change)
|
|
3173
3212
|
coupling_nodes = sorted(
|
|
3174
3213
|
[n for n in graph_nodes if n.get("in_degree", 0) >= 3],
|
|
@@ -3183,17 +3222,42 @@ def modernize_cmd(
|
|
|
3183
3222
|
key=lambda n: n.get("fqn", ""),
|
|
3184
3223
|
)[:20]
|
|
3185
3224
|
|
|
3186
|
-
# Hotspot candidates: high in-degree service/repository nodes
|
|
3187
|
-
|
|
3188
|
-
|
|
3189
|
-
|
|
3190
|
-
|
|
3191
|
-
|
|
3192
|
-
|
|
3193
|
-
|
|
3194
|
-
|
|
3195
|
-
|
|
3196
|
-
|
|
3225
|
+
# Hotspot candidates: high in-degree service/repository/controller nodes,
|
|
3226
|
+
# ranked by composite score (in_degree × 2 + git_churn) for volatility signal.
|
|
3227
|
+
_HOTSPOT_ROLES = frozenset({"service", "repository", "controller", "entity"})
|
|
3228
|
+
_hotspot_candidates = [
|
|
3229
|
+
n for n in coupling_nodes if n.get("role") in _HOTSPOT_ROLES
|
|
3230
|
+
]
|
|
3231
|
+
# Also include high-coupling nodes with name-based role inference even if
|
|
3232
|
+
# they didn't appear in coupling_nodes (in_degree >= 1 is sufficient here)
|
|
3233
|
+
_seen_hotspot_fqns = {n["fqn"] for n in _hotspot_candidates}
|
|
3234
|
+
for _n in graph_nodes:
|
|
3235
|
+
if (_n.get("fqn") not in _seen_hotspot_fqns
|
|
3236
|
+
and _n.get("role") in _HOTSPOT_ROLES
|
|
3237
|
+
and _n.get("in_degree", 0) >= 1
|
|
3238
|
+
and _fqn_churn.get(_n["fqn"], 0) >= 3):
|
|
3239
|
+
_hotspot_candidates.append(_n)
|
|
3240
|
+
_seen_hotspot_fqns.add(_n["fqn"])
|
|
3241
|
+
|
|
3242
|
+
_max_churn = max(_fqn_churn.values(), default=1)
|
|
3243
|
+
hotspots = sorted(
|
|
3244
|
+
[
|
|
3245
|
+
{
|
|
3246
|
+
"fqn": n["fqn"],
|
|
3247
|
+
"role": n.get("role", "other"),
|
|
3248
|
+
"in_degree": n.get("in_degree", 0),
|
|
3249
|
+
"out_degree": n.get("out_degree", 0),
|
|
3250
|
+
"git_churn_90d": _fqn_churn.get(n["fqn"], 0),
|
|
3251
|
+
"hotspot_score": round(
|
|
3252
|
+
n.get("in_degree", 0) * 2.0
|
|
3253
|
+
+ (_fqn_churn.get(n["fqn"], 0) / _max_churn) * 5.0,
|
|
3254
|
+
2,
|
|
3255
|
+
),
|
|
3256
|
+
}
|
|
3257
|
+
for n in _hotspot_candidates
|
|
3258
|
+
],
|
|
3259
|
+
key=lambda h: (-h["hotspot_score"], h["fqn"]),
|
|
3260
|
+
)[:15]
|
|
3197
3261
|
|
|
3198
3262
|
# Cross-module tangles: subsystems with high member count
|
|
3199
3263
|
tangle_modules = sorted(
|
sourcecode/repository_ir.py
CHANGED
|
@@ -14,6 +14,7 @@ No inference, approximation, or heuristics.
|
|
|
14
14
|
|
|
15
15
|
from __future__ import annotations
|
|
16
16
|
|
|
17
|
+
import random
|
|
17
18
|
import re
|
|
18
19
|
import subprocess
|
|
19
20
|
from collections import deque
|
|
@@ -217,6 +218,11 @@ _JAVA_ROLE_MAP: dict[str, str] = {
|
|
|
217
218
|
"@Component": "component",
|
|
218
219
|
"@Configuration": "config",
|
|
219
220
|
"@Bean": "config",
|
|
221
|
+
# JPA / Hibernate
|
|
222
|
+
"@Entity": "entity",
|
|
223
|
+
"@MappedSuperclass": "entity",
|
|
224
|
+
"@Embeddable": "entity",
|
|
225
|
+
"@Table": "entity",
|
|
220
226
|
# CDI / Jakarta EE
|
|
221
227
|
"@ApplicationScoped": "service",
|
|
222
228
|
"@RequestScoped": "service",
|
|
@@ -226,6 +232,9 @@ _JAVA_ROLE_MAP: dict[str, str] = {
|
|
|
226
232
|
"@Dependent": "component",
|
|
227
233
|
"@Named": "component",
|
|
228
234
|
"@Produces": "component",
|
|
235
|
+
"@Stateless": "service",
|
|
236
|
+
"@Stateful": "service",
|
|
237
|
+
"@MessageDriven": "service",
|
|
229
238
|
# JAX-RS
|
|
230
239
|
"@Provider": "provider",
|
|
231
240
|
"@Consumes": "controller",
|
|
@@ -233,6 +242,11 @@ _JAVA_ROLE_MAP: dict[str, str] = {
|
|
|
233
242
|
"@QuarkusMain": "entrypoint",
|
|
234
243
|
"@QuarkusTest": "test",
|
|
235
244
|
"@QuarkusIntegrationTest": "test",
|
|
245
|
+
"@RegisterForReflection": "component",
|
|
246
|
+
# Spring Security / AOP
|
|
247
|
+
"@Aspect": "config",
|
|
248
|
+
"@EnableWebSecurity": "config",
|
|
249
|
+
"@EnableMethodSecurity": "config",
|
|
236
250
|
}
|
|
237
251
|
|
|
238
252
|
# Backward-compatible alias — external callers may reference this name.
|
|
@@ -746,6 +760,36 @@ def _java_role(annotations: list[str]) -> str:
|
|
|
746
760
|
return "unknown"
|
|
747
761
|
|
|
748
762
|
|
|
763
|
+
# Name-suffix patterns for role inference when annotations are absent.
|
|
764
|
+
# Ordered: more specific patterns first.
|
|
765
|
+
_JAVA_NAME_ROLE_PATTERNS: list[tuple[re.Pattern, str]] = [
|
|
766
|
+
(re.compile(r"(?:Controller|Resource|Endpoint|Handler|Servlet|Filter|Action)$"), "controller"),
|
|
767
|
+
(re.compile(r"(?:ServiceImpl|ServiceBean|ServiceFacade|Facade)$"), "service"),
|
|
768
|
+
(re.compile(r"(?:Service|Manager|Processor|Coordinator|Orchestrator|UseCase|Interactor)$"), "service"),
|
|
769
|
+
(re.compile(r"(?:RepositoryImpl|DaoImpl|DAOImpl)$"), "repository"),
|
|
770
|
+
(re.compile(r"(?:Repository|Dao|DAO|Store|Persistence|JpaRepository|CrudRepository)$"), "repository"),
|
|
771
|
+
(re.compile(r"(?:Entity|Model|Domain|Vo|ValueObject|Record)$"), "entity"),
|
|
772
|
+
(re.compile(r"(?:Config|Configuration|Configurer|AutoConfiguration|Properties|Settings)$"), "config"),
|
|
773
|
+
(re.compile(r"(?:Factory|Builder|Provider|Supplier|Creator|Generator)$"), "provider"),
|
|
774
|
+
(re.compile(r"(?:Listener|Observer|Handler|EventHandler|MessageListener|Consumer)$"), "component"),
|
|
775
|
+
(re.compile(r"(?:Util|Utils|Helper|Helpers|Converter|Transformer|Mapper|Adapter)$"), "component"),
|
|
776
|
+
(re.compile(r"(?:Exception|Error)$"), "other"),
|
|
777
|
+
(re.compile(r"(?:Test|Tests|Spec|IT|IntegrationTest)$"), "test"),
|
|
778
|
+
]
|
|
779
|
+
|
|
780
|
+
|
|
781
|
+
def _java_role_from_name(simple_name: str) -> str:
|
|
782
|
+
"""Infer role from Java class simple name when annotations don't classify it.
|
|
783
|
+
|
|
784
|
+
Returns 'other' (never 'unknown') — callers use 'unknown' to mean
|
|
785
|
+
'not classified at all'; 'other' means 'classified but no interesting role'.
|
|
786
|
+
"""
|
|
787
|
+
for pattern, role in _JAVA_NAME_ROLE_PATTERNS:
|
|
788
|
+
if pattern.search(simple_name):
|
|
789
|
+
return role
|
|
790
|
+
return "other"
|
|
791
|
+
|
|
792
|
+
|
|
749
793
|
# Backward-compatible alias used by external callers and serializer.
|
|
750
794
|
_spring_role = _java_role
|
|
751
795
|
|
|
@@ -1849,7 +1893,7 @@ def _assemble(
|
|
|
1849
1893
|
sorted_rels = sorted(relations, key=lambda e: (e.from_symbol, e.type, e.to_symbol))
|
|
1850
1894
|
sorted_changed = sorted(changed_symbols, key=lambda c: c.symbol)
|
|
1851
1895
|
|
|
1852
|
-
# Java role map: fqn → role (annotation evidence + JAX-RS @Path heuristic)
|
|
1896
|
+
# Java role map: fqn → role (annotation evidence + JAX-RS @Path heuristic + name fallback)
|
|
1853
1897
|
spring_role_map: dict[str, str] = {}
|
|
1854
1898
|
for sym in sorted_syms:
|
|
1855
1899
|
if sym.type in ("class", "interface"):
|
|
@@ -1857,6 +1901,10 @@ def _assemble(
|
|
|
1857
1901
|
# JAX-RS resource: class-level @Path without a recognized annotation → controller
|
|
1858
1902
|
if role == "unknown" and "@Path" in sym.annotations:
|
|
1859
1903
|
role = "controller"
|
|
1904
|
+
# Name-based fallback: when annotations provide no signal, infer from class name
|
|
1905
|
+
if role == "unknown":
|
|
1906
|
+
simple = sym.symbol.split(".")[-1].split("#")[0]
|
|
1907
|
+
role = _java_role_from_name(simple)
|
|
1860
1908
|
spring_role_map[sym.symbol] = role
|
|
1861
1909
|
|
|
1862
1910
|
# Degree maps (graph-derived)
|
|
@@ -2976,12 +3024,19 @@ def compute_blast_radius(
|
|
|
2976
3024
|
# KeycloakSession with 2023 importers), deep BFS is O(n^depth) and collapses
|
|
2977
3025
|
# to 70-91s at depth=4. Cap effective depth to 1 for hub classes so the
|
|
2978
3026
|
# direct-caller list is still accurate but we skip the catastrophic expansion.
|
|
3027
|
+
# Instead of omitting indirect callers entirely, we do a sampled BFS: pick
|
|
3028
|
+
# _SAMPLE_SIZE random direct callers, run depth-2 BFS from those, then scale
|
|
3029
|
+
# up to estimate total indirect reach.
|
|
2979
3030
|
_HUB_CALLER_THRESHOLD = 500
|
|
3031
|
+
_HUB_SAMPLE_SIZE = 20
|
|
3032
|
+
_HUB_SAMPLE_DEPTH = 2
|
|
2980
3033
|
_effective_depth = max_depth
|
|
3034
|
+
_hub_class_guard = False
|
|
2981
3035
|
for seed in matched_fqns:
|
|
2982
3036
|
_seed_callers = _all_callers_from_rg(seed, reverse_graph)
|
|
2983
3037
|
if len(_seed_callers) > _HUB_CALLER_THRESHOLD and max_depth > 1:
|
|
2984
3038
|
_effective_depth = 1
|
|
3039
|
+
_hub_class_guard = True
|
|
2985
3040
|
break
|
|
2986
3041
|
|
|
2987
3042
|
for seed in matched_fqns:
|
|
@@ -3078,6 +3133,35 @@ def compute_blast_radius(
|
|
|
3078
3133
|
indirect_callers.append(caller)
|
|
3079
3134
|
queue.append((caller, depth + 1))
|
|
3080
3135
|
|
|
3136
|
+
# Sampled BFS for hub classes: direct BFS was capped at depth=1, so
|
|
3137
|
+
# indirect_callers is empty. Sample _HUB_SAMPLE_SIZE random direct callers,
|
|
3138
|
+
# run depth-_HUB_SAMPLE_DEPTH BFS from those, and scale up to estimate reach.
|
|
3139
|
+
_indirect_sampled = False
|
|
3140
|
+
_indirect_estimated_count: int | None = None
|
|
3141
|
+
if _hub_class_guard and direct_callers:
|
|
3142
|
+
_n_direct = len(direct_callers)
|
|
3143
|
+
_k = min(_HUB_SAMPLE_SIZE, _n_direct)
|
|
3144
|
+
_sample_seeds = random.sample(direct_callers, _k)
|
|
3145
|
+
_sample_visited: set[str] = set(matched_fqns) | set(direct_callers)
|
|
3146
|
+
_sample_queue: list[tuple[str, int]] = [(c, 1) for c in _sample_seeds]
|
|
3147
|
+
_sample_indirect: list[str] = []
|
|
3148
|
+
while _sample_queue:
|
|
3149
|
+
_snode, _sdepth = _sample_queue.pop(0)
|
|
3150
|
+
if _sdepth >= _HUB_SAMPLE_DEPTH:
|
|
3151
|
+
continue
|
|
3152
|
+
for _scaller in _all_callers_from_rg(_snode, reverse_graph):
|
|
3153
|
+
if _scaller not in _sample_visited:
|
|
3154
|
+
_sample_visited.add(_scaller)
|
|
3155
|
+
all_affected[_scaller] = _sdepth + 1
|
|
3156
|
+
_sample_indirect.append(_scaller)
|
|
3157
|
+
_sample_queue.append((_scaller, _sdepth + 1))
|
|
3158
|
+
if _sample_indirect:
|
|
3159
|
+
indirect_callers = _sample_indirect
|
|
3160
|
+
_indirect_sampled = True
|
|
3161
|
+
# Scale: sample covered _k of _n_direct seeds; extrapolate linearly
|
|
3162
|
+
_scale = _n_direct / _k
|
|
3163
|
+
_indirect_estimated_count = round(len(_sample_indirect) * _scale)
|
|
3164
|
+
|
|
3081
3165
|
# ── 3. Identify affected endpoints from route_surface ─────────────────────
|
|
3082
3166
|
affected_classes: set[str] = set(matched_fqns) | set(direct_callers) | set(indirect_callers)
|
|
3083
3167
|
# Expand to enclosing classes of field/method FQNs in affected set.
|
|
@@ -3275,6 +3359,8 @@ def compute_blast_radius(
|
|
|
3275
3359
|
confidence_level = "low"
|
|
3276
3360
|
|
|
3277
3361
|
# ── 10. Explanation ───────────────────────────────────────────────────────
|
|
3362
|
+
_bfs_truncated = _effective_depth < max_depth
|
|
3363
|
+
|
|
3278
3364
|
_parts: list[str] = []
|
|
3279
3365
|
if n_direct:
|
|
3280
3366
|
_parts.append(f"{n_direct} direct caller{'s' if n_direct != 1 else ''}")
|
|
@@ -3298,6 +3384,22 @@ def compute_blast_radius(
|
|
|
3298
3384
|
f"({', '.join(_iface_names)}) — Spring/CDI DI pattern"
|
|
3299
3385
|
)
|
|
3300
3386
|
|
|
3387
|
+
# Transparency: hub-class BFS truncation must appear in explanation so the
|
|
3388
|
+
# text and JSON are semantically identical.
|
|
3389
|
+
if _bfs_truncated:
|
|
3390
|
+
if _indirect_sampled and _indirect_estimated_count is not None:
|
|
3391
|
+
_parts.append(
|
|
3392
|
+
f"indirect callers sampled ({_HUB_SAMPLE_SIZE} of {n_direct} seeds, "
|
|
3393
|
+
f"depth={_HUB_SAMPLE_DEPTH}): {n_indirect} found in sample, "
|
|
3394
|
+
f"~{_indirect_estimated_count} estimated total"
|
|
3395
|
+
)
|
|
3396
|
+
else:
|
|
3397
|
+
_parts.append(
|
|
3398
|
+
f"indirect BFS skipped (hub class: {n_direct} direct callers "
|
|
3399
|
+
f"exceed {_HUB_CALLER_THRESHOLD} threshold; no indirect callers reachable "
|
|
3400
|
+
"from sample — graph may be a terminal sink)"
|
|
3401
|
+
)
|
|
3402
|
+
|
|
3301
3403
|
if not _parts:
|
|
3302
3404
|
explanation = f"No callers or dependents found for {target!r}. Low-risk isolated change."
|
|
3303
3405
|
else:
|
|
@@ -3324,10 +3426,13 @@ def compute_blast_radius(
|
|
|
3324
3426
|
"security_surface_affected": security_surface_affected,
|
|
3325
3427
|
"cross_module_impact": cross_module_impact,
|
|
3326
3428
|
"transactional_boundaries_touched": txn_nodes,
|
|
3327
|
-
"depth_reached":
|
|
3429
|
+
"depth_reached": _effective_depth, # actual BFS depth used, not the requested max
|
|
3430
|
+
"bfs_truncated": _bfs_truncated,
|
|
3328
3431
|
"stats": {
|
|
3329
3432
|
"direct_caller_count": n_direct,
|
|
3330
3433
|
"indirect_caller_count": n_indirect,
|
|
3434
|
+
"indirect_callers_computed": not _bfs_truncated or _indirect_sampled,
|
|
3435
|
+
"indirect_callers_sampled": _indirect_sampled,
|
|
3331
3436
|
"endpoints_affected_count": n_ep,
|
|
3332
3437
|
"transactional_boundaries_count": n_txn,
|
|
3333
3438
|
"mappers_affected_count": n_mappers,
|
|
@@ -3335,6 +3440,14 @@ def compute_blast_radius(
|
|
|
3335
3440
|
"security_surface_count": n_sec,
|
|
3336
3441
|
},
|
|
3337
3442
|
}
|
|
3443
|
+
if _indirect_sampled and _indirect_estimated_count is not None:
|
|
3444
|
+
out["indirect_callers_estimated_count"] = _indirect_estimated_count
|
|
3445
|
+
out["indirect_callers_sample_note"] = (
|
|
3446
|
+
f"indirect_callers contains a sample (BFS depth={_HUB_SAMPLE_DEPTH} from "
|
|
3447
|
+
f"{min(_HUB_SAMPLE_SIZE, n_direct)} of {n_direct} direct callers). "
|
|
3448
|
+
f"Estimated total indirect reach: ~{_indirect_estimated_count}. "
|
|
3449
|
+
"Actual count may differ; use a lower-fan-in entry point for exact traversal."
|
|
3450
|
+
)
|
|
3338
3451
|
if _candidates_out:
|
|
3339
3452
|
out["candidates"] = _candidates_out
|
|
3340
3453
|
if _iface_bridging:
|
|
@@ -3344,6 +3457,21 @@ def compute_blast_radius(
|
|
|
3344
3457
|
"(Spring/CDI/Guice). direct_callers includes callers of the implemented "
|
|
3345
3458
|
"interface(s) — these are the real production dependents."
|
|
3346
3459
|
)
|
|
3460
|
+
if _bfs_truncated:
|
|
3461
|
+
out["bfs_truncation_reason"] = "hub_class_depth_cap"
|
|
3462
|
+
if _indirect_sampled:
|
|
3463
|
+
out["bfs_truncation_note"] = (
|
|
3464
|
+
f"Full BFS capped at depth=1 (hub class: {n_direct} direct callers "
|
|
3465
|
+
f">{_HUB_CALLER_THRESHOLD}). indirect_callers is a sampled estimate — "
|
|
3466
|
+
f"BFS from {min(_HUB_SAMPLE_SIZE, n_direct)} random seeds at depth={_HUB_SAMPLE_DEPTH}."
|
|
3467
|
+
)
|
|
3468
|
+
else:
|
|
3469
|
+
out["bfs_truncation_note"] = (
|
|
3470
|
+
f"Indirect BFS capped at depth=1: target has {n_direct} direct callers "
|
|
3471
|
+
f"(>{_HUB_CALLER_THRESHOLD} threshold). indirect_callers is empty — "
|
|
3472
|
+
"no indirect callers reachable from sampled seeds (terminal sink or sparse graph). "
|
|
3473
|
+
"Use a lower-fan-in entry point for full transitive traversal."
|
|
3474
|
+
)
|
|
3347
3475
|
if len(direct_callers) > 30:
|
|
3348
3476
|
out["direct_callers_note"] = (
|
|
3349
3477
|
f"Showing 30/{n_direct} direct callers. Use --output to inspect full IR."
|
sourcecode/serializer.py
CHANGED
|
@@ -1979,7 +1979,7 @@ def agent_view(sm: SourceMap, *, full: bool = False) -> dict[str, Any]:
|
|
|
1979
1979
|
result["file_relevance_hint"] = (
|
|
1980
1980
|
f"Showing top {_fr_limit}/{_total_paths} files by score "
|
|
1981
1981
|
f"({'--full' if full else 'normal'} mode, bounded for signal quality). "
|
|
1982
|
-
f"Use --
|
|
1982
|
+
f"Use --full for up to {compute_context_limit('full', _FR_AGENT_CAP)} files."
|
|
1983
1983
|
)
|
|
1984
1984
|
|
|
1985
1985
|
# ── 5. Monorepo package roles (when available), capped ───────────────────
|
sourcecode/summarizer.py
CHANGED
|
@@ -300,6 +300,7 @@ class ProjectSummarizer:
|
|
|
300
300
|
|
|
301
301
|
# Stack with frameworks — keep brief, skip internal module listings
|
|
302
302
|
non_tooling_stacks = self._filter_non_tooling_stacks(sm)
|
|
303
|
+
primary = None
|
|
303
304
|
if non_tooling_stacks:
|
|
304
305
|
primary = self._select_summary_primary_stack(non_tooling_stacks)
|
|
305
306
|
frameworks = [fw.name for fw in primary.frameworks[:2]]
|
|
@@ -313,6 +314,30 @@ class ProjectSummarizer:
|
|
|
313
314
|
if domains:
|
|
314
315
|
parts.append(f"Domains: {', '.join(domains)}")
|
|
315
316
|
|
|
317
|
+
# Quantitative structural suffix for Java projects — adds concrete scale signals
|
|
318
|
+
# that README descriptions omit (class count, transactional boundary count).
|
|
319
|
+
if primary is not None and primary.stack.lower() == "java":
|
|
320
|
+
quant_parts: list[str] = []
|
|
321
|
+
java_files = sum(
|
|
322
|
+
1 for p in sm.file_paths if p.endswith(".java")
|
|
323
|
+
)
|
|
324
|
+
if java_files >= 50:
|
|
325
|
+
quant_parts.append(f"{java_files:,} Java classes")
|
|
326
|
+
txn_classes: list[str] = []
|
|
327
|
+
for stack in non_tooling_stacks:
|
|
328
|
+
txn_classes.extend(getattr(stack, "transactional_classes", []))
|
|
329
|
+
n_txn = len(set(txn_classes))
|
|
330
|
+
if n_txn > 0:
|
|
331
|
+
quant_parts.append(f"{n_txn} transactional boundaries")
|
|
332
|
+
ep_controllers = [
|
|
333
|
+
ep for ep in sm.entry_points
|
|
334
|
+
if ep.kind in ("controller", "rest_controller", "rest", "endpoint")
|
|
335
|
+
]
|
|
336
|
+
if ep_controllers:
|
|
337
|
+
quant_parts.append(f"{len(ep_controllers)} controller entry points")
|
|
338
|
+
if quant_parts:
|
|
339
|
+
parts.append(", ".join(quant_parts))
|
|
340
|
+
|
|
316
341
|
return ". ".join(parts) + "."
|
|
317
342
|
|
|
318
343
|
def _detect_architecture_pattern(self, file_paths: list[str]) -> str | None:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sourcecode
|
|
3
|
-
Version: 1.31.
|
|
3
|
+
Version: 1.31.24
|
|
4
4
|
Summary: Deterministic codebase context for AI coding agents
|
|
5
5
|
License: Apache License
|
|
6
6
|
Version 2.0, January 2004
|
|
@@ -225,7 +225,7 @@ Description-Content-Type: text/markdown
|
|
|
225
225
|
|
|
226
226
|
**AI-ready change intelligence for Java/Spring enterprise monoliths.**
|
|
227
227
|
|
|
228
|
-

|
|
229
229
|

|
|
230
230
|
|
|
231
231
|
---
|
|
@@ -263,7 +263,7 @@ pipx install sourcecode
|
|
|
263
263
|
|
|
264
264
|
```bash
|
|
265
265
|
sourcecode version
|
|
266
|
-
# sourcecode 1.31.
|
|
266
|
+
# sourcecode 1.31.24
|
|
267
267
|
```
|
|
268
268
|
|
|
269
269
|
---
|
|
@@ -1,12 +1,12 @@
|
|
|
1
|
-
sourcecode/__init__.py,sha256=
|
|
1
|
+
sourcecode/__init__.py,sha256=OFgxQ97Ujgsq98XhK0hoPwDBX_R6E7oO8JgpjanvaHQ,104
|
|
2
2
|
sourcecode/adaptive_scanner.py,sha256=XffluXKzJUXrMtjEiAOnSNPZnztdIcts17T9ouHeID0,10521
|
|
3
|
-
sourcecode/architecture_analyzer.py,sha256=
|
|
3
|
+
sourcecode/architecture_analyzer.py,sha256=Ry3aYT9dc7XuLmWLT5IZ93RkCf_P14Qtew0nGPvUl_8,42184
|
|
4
4
|
sourcecode/architecture_summary.py,sha256=z34_6v7cSwy98cof2UVciGho7SCrZ93tiqMmq5WNzRQ,20405
|
|
5
5
|
sourcecode/ast_extractor.py,sha256=_btmeOJIe3t-NicF94D5ZAesa2YIJ0_QNExGnbHxGFE,50578
|
|
6
6
|
sourcecode/cache.py,sha256=TiYa3ECjBKtvlfCk7GvQ9v6gZkAITpH3ow9PubA7sUo,22946
|
|
7
7
|
sourcecode/canonical_ir.py,sha256=NZu0XICv__hkQGKzW2LNQLRqb1L28K2p_WQCQKS5Zlk,23141
|
|
8
8
|
sourcecode/classifier.py,sha256=yWeq6agTjkFa3zuNa-gdVIHtjoBoPoVlJnX-b7tdVJs,7851
|
|
9
|
-
sourcecode/cli.py,sha256=
|
|
9
|
+
sourcecode/cli.py,sha256=zykj3wNxSXAdiBIgmn6KWLdrNLHCEUrhv4YL9rlRlUE,150539
|
|
10
10
|
sourcecode/code_notes_analyzer.py,sha256=EJemNCNc9Dn-1RZYu-aNbK0ELzmsyC4s6FdHi3XyNEI,9392
|
|
11
11
|
sourcecode/confidence_analyzer.py,sha256=_jckZSxksV-OU38vbkxfVNBnWCtlCq8Vwfg23x1uspA,19054
|
|
12
12
|
sourcecode/context_scorer.py,sha256=QpChSpsmaAYz91rXA4Ue5xzQmNz_ZboZN09YOHScq1U,14679
|
|
@@ -32,13 +32,13 @@ sourcecode/ranking_engine.py,sha256=ZAucq_YX2KkWUuAZf4P0lhtQ_38vEFnUhuGtSZd1S0E,
|
|
|
32
32
|
sourcecode/redactor.py,sha256=xuGcadGEHaPw4qZXlMDvzMCsr4VOkdp3oBQptHyJk8c,2884
|
|
33
33
|
sourcecode/relevance_scorer.py,sha256=MYF4FFkveAQps9SmTeTlh6ODiBz2F--_hWNeHMLtUHQ,8405
|
|
34
34
|
sourcecode/repo_classifier.py,sha256=FG1vaWKdWXsWdl-S8hjVMiTqcwgaRXkDyvK4rPcOGtQ,22681
|
|
35
|
-
sourcecode/repository_ir.py,sha256=
|
|
35
|
+
sourcecode/repository_ir.py,sha256=sp6IdcZbFAQjznUthMBu_6Mu5RBxVP72d5Vw0hKnH7o,148437
|
|
36
36
|
sourcecode/runtime_classifier.py,sha256=uTAD6BDCiBLUZEDRfqk718kM4RTT_vAbfkcOI2_Xx58,18432
|
|
37
37
|
sourcecode/scanner.py,sha256=WdOQ78mMzjR1NjmKTlbxdgwinnCTfAhxCVLBEFQiFHU,8899
|
|
38
38
|
sourcecode/schema.py,sha256=aHNXDf8LGyUC8ZDE_VS9kiskC2-Oswhi_WnpdGy6HDw,24897
|
|
39
39
|
sourcecode/semantic_analyzer.py,sha256=TDuC3wzZR2DPm1mgrAg1YSLk2QzJoueS3TZAmyGGpCU,89417
|
|
40
|
-
sourcecode/serializer.py,sha256=
|
|
41
|
-
sourcecode/summarizer.py,sha256=
|
|
40
|
+
sourcecode/serializer.py,sha256=7TzN2GLtIP3PIVatoB98_7DQdoAkUNvvNVU7Bz7r_K8,123313
|
|
41
|
+
sourcecode/summarizer.py,sha256=BMHJA0Do4rBnabc1_BxHoETTNb5ew0VqCX_eY3_PdCg,20706
|
|
42
42
|
sourcecode/tree_utils.py,sha256=8GAkIfQAsvtEudIeW1l4ooH_oRtrWR8cpJQJsEa_Pfw,2093
|
|
43
43
|
sourcecode/workspace.py,sha256=X_6NmNnitvT3_38V-JDChydo_sR68s249hLFlrQskU0,8271
|
|
44
44
|
sourcecode/detectors/__init__.py,sha256=A0AACJFF6HWf_RgatNtWu3PUzstcKtIGM9f1PoFcJug,1987
|
|
@@ -76,8 +76,8 @@ sourcecode/telemetry/consent.py,sha256=wLMvGNJeSSyZoNkQXpoUioY6mMv4Qdvuw7S9jAEWn
|
|
|
76
76
|
sourcecode/telemetry/events.py,sha256=oEvvulfsv5GIDWG2174gSS6tNB95w38AIYiYeifGKlE,2294
|
|
77
77
|
sourcecode/telemetry/filters.py,sha256=Asa71oRl7q3Wt_FMwuufIZJFzSYdgRNKS8LHCIyFeYE,4805
|
|
78
78
|
sourcecode/telemetry/transport.py,sha256=KJeIPCPWMdmbCP3ySGs2iUlia34U6vWne2dZsUezesw,1560
|
|
79
|
-
sourcecode-1.31.
|
|
80
|
-
sourcecode-1.31.
|
|
81
|
-
sourcecode-1.31.
|
|
82
|
-
sourcecode-1.31.
|
|
83
|
-
sourcecode-1.31.
|
|
79
|
+
sourcecode-1.31.24.dist-info/METADATA,sha256=y1qV8wDttJuezYPLbBUeZZwzQMWWrtJ8clEuBAchsJ0,31103
|
|
80
|
+
sourcecode-1.31.24.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
81
|
+
sourcecode-1.31.24.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
|
|
82
|
+
sourcecode-1.31.24.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
|
|
83
|
+
sourcecode-1.31.24.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|