sourcecode 1.31.22__py3-none-any.whl → 1.31.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sourcecode/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  """sourcecode — Deterministic codebase context maps for AI coding agents."""
2
2
 
3
- __version__ = "1.31.22"
3
+ __version__ = "1.31.24"
@@ -280,7 +280,7 @@ class ArchitectureAnalyzer:
280
280
  })
281
281
 
282
282
  # Step 4: bounded context inference
283
- bounded_contexts = self._infer_bounded_contexts(domains, graph)
283
+ bounded_contexts = self._infer_bounded_contexts(domains, graph, sm.file_paths)
284
284
 
285
285
  # Overall confidence — based on domain quality, not raw count
286
286
  confidence: Literal["high", "medium", "low"]
@@ -703,11 +703,78 @@ class ArchitectureAnalyzer:
703
703
  ]
704
704
  return result[:16]
705
705
 
706
+ @staticmethod
707
+ def _maven_module_bounded_contexts(file_paths: list[str]) -> list[BoundedContext]:
708
+ """Priority 0: extract bounded contexts from Maven module directory names.
709
+
710
+ Maven multi-module projects have structure: <module>/src/main/java/...
711
+ The module directory name is a strong bounded context signal
712
+ (e.g. broadleaf-order, keycloak-services → order, services).
713
+ Strips common project-name prefixes (longest common prefix across modules).
714
+ Returns empty list when fewer than 2 distinct modules are found.
715
+ """
716
+ import re as _re
717
+ _MAVEN_SRC = "src/main/java/"
718
+ _MAVEN_TEST = "src/test/java/"
719
+ module_names: dict[str, list[str]] = {} # module_name → [files]
720
+ for p in file_paths:
721
+ norm = p.replace("\\", "/")
722
+ for marker in (_MAVEN_SRC, _MAVEN_TEST):
723
+ idx = norm.find(marker)
724
+ if idx > 0:
725
+ # Everything before the marker is the module path
726
+ module_path = norm[:idx].rstrip("/")
727
+ # Take the last path segment as module name
728
+ module_seg = module_path.split("/")[-1] if "/" in module_path else module_path
729
+ if module_seg:
730
+ module_names.setdefault(module_seg, []).append(p)
731
+ break
732
+
733
+ if len(module_names) < 2:
734
+ return []
735
+
736
+ # Strip common project-name prefix (e.g. "keycloak-", "broadleaf-")
737
+ # by finding longest common prefix across all module names
738
+ all_names = sorted(module_names)
739
+ common = ""
740
+ for i, ch in enumerate(all_names[0]):
741
+ if all(n[i:i+1] == ch for n in all_names[1:]):
742
+ common += ch
743
+ else:
744
+ break
745
+ # Only strip prefix up to last '-' (avoid stripping into meaningful segment)
746
+ prefix_to_strip = common[:common.rfind("-") + 1] if "-" in common else ""
747
+
748
+ _GENERIC_EXTENDED = _GENERIC_NAMES | {
749
+ "api", "impl", "base", "test", "tests", "main", "java",
750
+ "integration", "parent", "bom", "platform",
751
+ }
752
+ bc_list: list[BoundedContext] = []
753
+ for raw_name, files in sorted(module_names.items()):
754
+ clean = raw_name[len(prefix_to_strip):] if prefix_to_strip else raw_name
755
+ # Remove trailing -api, -impl, -core suffixes
756
+ clean = _re.sub(r"-(api|impl|core|base|common|parent|test)$", "", clean)
757
+ if not clean or clean in _GENERIC_EXTENDED:
758
+ continue
759
+ bc_list.append(BoundedContext(
760
+ name=clean,
761
+ modules=files[:20], # cap file list
762
+ confidence="high",
763
+ ))
764
+ return bc_list
765
+
706
766
  def _infer_bounded_contexts(
707
767
  self,
708
768
  domains: list[ArchitectureDomain],
709
769
  graph: Optional[ModuleGraph],
770
+ file_paths: list[str] | None = None,
710
771
  ) -> list[BoundedContext]:
772
+ # Priority 0: Maven module names — strong bounded context signal for Java projects
773
+ if file_paths:
774
+ maven_bcs = self._maven_module_bounded_contexts(file_paths)
775
+ if maven_bcs:
776
+ return maven_bcs
777
+
711
778
  # Priority 1: use graph SCCs when available
712
779
  if graph is not None:
713
780
  sccs = self._find_sccs(graph)
sourcecode/cli.py CHANGED
@@ -423,7 +423,7 @@ def main(
423
423
  "High-signal summary (typically 1000–3000 tokens depending on repo size): "
424
424
  "stacks, entry points, dependency summary, confidence, and gaps. "
425
425
  "Includes security_surface (when @M3FiltroSeguridad detected), mybatis (when MyBatis framework detected), and transactional_boundaries for Java projects. "
426
- "Use --agent for maximum signal or --slim (when available) for minimal token footprint."
426
+ "Use --agent for maximum signal."
427
427
  ),
428
428
  ),
429
429
  dependencies: bool = typer.Option(
@@ -1960,7 +1960,11 @@ def _serialize_relevant_file(f: Any) -> dict:
1960
1960
  d = {k: v for k, v in _asdict(f).items() if v != "" and v is not None}
1961
1961
  reason = d.pop("reason", "") or ""
1962
1962
  why = d.pop("why", "") or ""
1963
- d.pop("score", None) # score removed from public output (internal ranking only)
1963
+ # Expose score as a rounded float so agents can rank/filter files deterministically.
1964
+ # Kept as "score" (0.0–1.0 normalized relevance) — higher = more relevant.
1965
+ raw_score = d.pop("score", None)
1966
+ if raw_score is not None:
1967
+ d["score"] = round(float(raw_score), 4)
1964
1968
  explanation = _make_explanation(reason, why)
1965
1969
  if explanation:
1966
1970
  d["explanation"] = explanation
@@ -2147,6 +2151,26 @@ def prepare_context_cmd(
2147
2151
  )
2148
2152
  raise typer.Exit(code=1)
2149
2153
 
2154
+ # Validate --format: only "json" and "github-comment" are valid for prepare-context.
2155
+ # "yaml" is intentionally NOT supported here (use main command for yaml output).
2156
+ # Invalid values must error loudly — silently falling through to JSON is a lie.
2157
+ _PC_FORMAT_CHOICES = ("json", "github-comment")
2158
+ if format is not None and format not in _PC_FORMAT_CHOICES:
2159
+ typer.echo(
2160
+ f"Error: invalid value '{format}' for --format. "
2161
+ f"Valid options: {', '.join(_PC_FORMAT_CHOICES)}.",
2162
+ err=True,
2163
+ )
2164
+ raise typer.Exit(code=2)
2165
+ # github-comment only renders for review-pr; warn and normalize for other tasks.
2166
+ if format == "github-comment" and task != "review-pr":
2167
+ typer.echo(
2168
+ f"[warning] --format github-comment is only supported for the review-pr task. "
2169
+ f"Outputting JSON for '{task}'.",
2170
+ err=True,
2171
+ )
2172
+ format = "json"
2173
+
2150
2174
  target = path.resolve()
2151
2175
  if not target.exists() or not target.is_dir():
2152
2176
  typer.echo(f"Error: '{target}' is not a valid directory.", err=True)
@@ -3169,6 +3193,21 @@ def modernize_cmd(
3169
3193
  subsystems: list = ir.get("subsystems") or []
3170
3194
  reverse_graph: dict = ir.get("reverse_graph") or {}
3171
3195
 
3196
+ # Git churn: commit frequency per file in last 90 days → proxy for volatility
3197
+ from sourcecode.contract_pipeline import _get_git_churn
3198
+ _java_rel_paths = [
3199
+ str(Path(p).relative_to(root)).replace("\\", "/") if Path(p).is_absolute() else p.replace("\\", "/")
3200
+ for p in file_list
3201
+ ]
3202
+ _file_churn: dict[str, int] = _get_git_churn(root, _java_rel_paths)
3203
+
3204
+ # Build fqn → churn mapping via source_file field on graph nodes
3205
+ _fqn_churn: dict[str, int] = {}
3206
+ for _n in graph_nodes:
3207
+ _src = (_n.get("source_file") or "").replace("\\", "/")
3208
+ if _src and _src in _file_churn:
3209
+ _fqn_churn[_n["fqn"]] = _file_churn[_src]
3210
+
3172
3211
  # High-coupling nodes: high in_degree (many dependents = risky to change)
3173
3212
  coupling_nodes = sorted(
3174
3213
  [n for n in graph_nodes if n.get("in_degree", 0) >= 3],
@@ -3183,17 +3222,42 @@ def modernize_cmd(
3183
3222
  key=lambda n: n.get("fqn", ""),
3184
3223
  )[:20]
3185
3224
 
3186
- # Hotspot candidates: high in-degree service/repository nodes
3187
- hotspots = [
3188
- {
3189
- "fqn": n["fqn"],
3190
- "role": n.get("role", "other"),
3191
- "in_degree": n.get("in_degree", 0),
3192
- "out_degree": n.get("out_degree", 0),
3193
- }
3194
- for n in coupling_nodes
3195
- if n.get("role") in ("service", "repository", "controller")
3196
- ][:15]
3225
+ # Hotspot candidates: high in-degree service/repository/controller nodes,
3226
+ # ranked by composite score (in_degree × 2 + git_churn) for volatility signal.
3227
+ _HOTSPOT_ROLES = frozenset({"service", "repository", "controller", "entity"})
3228
+ _hotspot_candidates = [
3229
+ n for n in coupling_nodes if n.get("role") in _HOTSPOT_ROLES
3230
+ ]
3231
+ # Also include high-coupling nodes with name-based role inference even if
3232
+ # they didn't appear in coupling_nodes (in_degree >= 1 is sufficient here)
3233
+ _seen_hotspot_fqns = {n["fqn"] for n in _hotspot_candidates}
3234
+ for _n in graph_nodes:
3235
+ if (_n.get("fqn") not in _seen_hotspot_fqns
3236
+ and _n.get("role") in _HOTSPOT_ROLES
3237
+ and _n.get("in_degree", 0) >= 1
3238
+ and _fqn_churn.get(_n["fqn"], 0) >= 3):
3239
+ _hotspot_candidates.append(_n)
3240
+ _seen_hotspot_fqns.add(_n["fqn"])
3241
+
3242
+ _max_churn = max(_fqn_churn.values(), default=1)
3243
+ hotspots = sorted(
3244
+ [
3245
+ {
3246
+ "fqn": n["fqn"],
3247
+ "role": n.get("role", "other"),
3248
+ "in_degree": n.get("in_degree", 0),
3249
+ "out_degree": n.get("out_degree", 0),
3250
+ "git_churn_90d": _fqn_churn.get(n["fqn"], 0),
3251
+ "hotspot_score": round(
3252
+ n.get("in_degree", 0) * 2.0
3253
+ + (_fqn_churn.get(n["fqn"], 0) / _max_churn) * 5.0,
3254
+ 2,
3255
+ ),
3256
+ }
3257
+ for n in _hotspot_candidates
3258
+ ],
3259
+ key=lambda h: (-h["hotspot_score"], h["fqn"]),
3260
+ )[:15]
3197
3261
 
3198
3262
  # Cross-module tangles: subsystems with high member count
3199
3263
  tangle_modules = sorted(
@@ -14,6 +14,7 @@ No inference, approximation, or heuristics.
14
14
 
15
15
  from __future__ import annotations
16
16
 
17
+ import random
17
18
  import re
18
19
  import subprocess
19
20
  from collections import deque
@@ -217,6 +218,11 @@ _JAVA_ROLE_MAP: dict[str, str] = {
217
218
  "@Component": "component",
218
219
  "@Configuration": "config",
219
220
  "@Bean": "config",
221
+ # JPA / Hibernate
222
+ "@Entity": "entity",
223
+ "@MappedSuperclass": "entity",
224
+ "@Embeddable": "entity",
225
+ "@Table": "entity",
220
226
  # CDI / Jakarta EE
221
227
  "@ApplicationScoped": "service",
222
228
  "@RequestScoped": "service",
@@ -226,6 +232,9 @@ _JAVA_ROLE_MAP: dict[str, str] = {
226
232
  "@Dependent": "component",
227
233
  "@Named": "component",
228
234
  "@Produces": "component",
235
+ "@Stateless": "service",
236
+ "@Stateful": "service",
237
+ "@MessageDriven": "service",
229
238
  # JAX-RS
230
239
  "@Provider": "provider",
231
240
  "@Consumes": "controller",
@@ -233,6 +242,11 @@ _JAVA_ROLE_MAP: dict[str, str] = {
233
242
  "@QuarkusMain": "entrypoint",
234
243
  "@QuarkusTest": "test",
235
244
  "@QuarkusIntegrationTest": "test",
245
+ "@RegisterForReflection": "component",
246
+ # Spring Security / AOP
247
+ "@Aspect": "config",
248
+ "@EnableWebSecurity": "config",
249
+ "@EnableMethodSecurity": "config",
236
250
  }
237
251
 
238
252
  # Backward-compatible alias — external callers may reference this name.
@@ -746,6 +760,36 @@ def _java_role(annotations: list[str]) -> str:
746
760
  return "unknown"
747
761
 
748
762
 
763
+ # Name-suffix patterns for role inference when annotations are absent.
764
+ # Ordered: more specific patterns first.
765
+ _JAVA_NAME_ROLE_PATTERNS: list[tuple[re.Pattern, str]] = [
766
+ (re.compile(r"(?:Controller|Resource|Endpoint|Handler|Servlet|Filter|Action)$"), "controller"),
767
+ (re.compile(r"(?:ServiceImpl|ServiceBean|ServiceFacade|Facade)$"), "service"),
768
+ (re.compile(r"(?:Service|Manager|Processor|Coordinator|Orchestrator|UseCase|Interactor)$"), "service"),
769
+ (re.compile(r"(?:RepositoryImpl|DaoImpl|DAOImpl)$"), "repository"),
770
+ (re.compile(r"(?:Repository|Dao|DAO|Store|Persistence|JpaRepository|CrudRepository)$"), "repository"),
771
+ (re.compile(r"(?:Entity|Model|Domain|Vo|ValueObject|Record)$"), "entity"),
772
+ (re.compile(r"(?:Config|Configuration|Configurer|AutoConfiguration|Properties|Settings)$"), "config"),
773
+ (re.compile(r"(?:Factory|Builder|Provider|Supplier|Creator|Generator)$"), "provider"),
774
+ (re.compile(r"(?:Listener|Observer|Handler|EventHandler|MessageListener|Consumer)$"), "component"),
775
+ (re.compile(r"(?:Util|Utils|Helper|Helpers|Converter|Transformer|Mapper|Adapter)$"), "component"),
776
+ (re.compile(r"(?:Exception|Error)$"), "other"),
777
+ (re.compile(r"(?:Test|Tests|Spec|IT|IntegrationTest)$"), "test"),
778
+ ]
779
+
780
+
781
+ def _java_role_from_name(simple_name: str) -> str:
782
+ """Infer role from Java class simple name when annotations don't classify it.
783
+
784
+ Returns 'other' (never 'unknown') — callers use 'unknown' to mean
785
+ 'not classified at all'; 'other' means 'classified but no interesting role'.
786
+ """
787
+ for pattern, role in _JAVA_NAME_ROLE_PATTERNS:
788
+ if pattern.search(simple_name):
789
+ return role
790
+ return "other"
791
+
792
+
749
793
  # Backward-compatible alias used by external callers and serializer.
750
794
  _spring_role = _java_role
751
795
 
@@ -1093,7 +1137,18 @@ def _resolve_jaxrs_prefixes(
1093
1137
 
1094
1138
  for parent_simple, locator_path in locator_map[cls_simple]:
1095
1139
  parent_full = _resolve_jaxrs_prefixes(parent_simple, class_info, locator_map, new_visited)
1096
- for pp in parent_full:
1140
+ # Skip implementation/unrooted parents: if the parent resolves to only empty
1141
+ # prefixes AND has no class-level @Path annotation, it is a concrete impl class
1142
+ # (e.g. DefaultClientsApi implements ClientsApi) that duplicates a locator method
1143
+ # from its interface. Including it would produce spurious short paths like /{id}
1144
+ # alongside the correctly-resolved full path. The interface version is already
1145
+ # in the locator_map and will produce the correct full path.
1146
+ _parent_has_path_ann = class_info.get(parent_simple, {}).get("has_path_ann", False)
1147
+ _non_empty_parent = [p for p in parent_full if p]
1148
+ if not _non_empty_parent and not _parent_has_path_ann:
1149
+ continue
1150
+ use_parent_paths = _non_empty_parent if _non_empty_parent else parent_full
1151
+ for pp in use_parent_paths:
1097
1152
  for op in own_prefixes:
1098
1153
  combined = _join_path_segments(pp, locator_path, op)
1099
1154
  full_prefixes.append(combined)
@@ -1838,7 +1893,7 @@ def _assemble(
1838
1893
  sorted_rels = sorted(relations, key=lambda e: (e.from_symbol, e.type, e.to_symbol))
1839
1894
  sorted_changed = sorted(changed_symbols, key=lambda c: c.symbol)
1840
1895
 
1841
- # Java role map: fqn → role (annotation evidence + JAX-RS @Path heuristic)
1896
+ # Java role map: fqn → role (annotation evidence + JAX-RS @Path heuristic + name fallback)
1842
1897
  spring_role_map: dict[str, str] = {}
1843
1898
  for sym in sorted_syms:
1844
1899
  if sym.type in ("class", "interface"):
@@ -1846,6 +1901,10 @@ def _assemble(
1846
1901
  # JAX-RS resource: class-level @Path without a recognized annotation → controller
1847
1902
  if role == "unknown" and "@Path" in sym.annotations:
1848
1903
  role = "controller"
1904
+ # Name-based fallback: when annotations provide no signal, infer from class name
1905
+ if role == "unknown":
1906
+ simple = sym.symbol.split(".")[-1].split("#")[0]
1907
+ role = _java_role_from_name(simple)
1849
1908
  spring_role_map[sym.symbol] = role
1850
1909
 
1851
1910
  # Degree maps (graph-derived)
@@ -2833,6 +2892,18 @@ def extract_java_endpoints(root: Path) -> "dict[str, Any]":
2833
2892
  entry["required_permission"] = security_info["required_permission"]
2834
2893
  endpoints.append(entry)
2835
2894
 
2895
+ # Filter out endpoints whose path looks like a Java FQN (e.g. dynamic admin routing
2896
+ # in frameworks like Broadleaf Commerce where @AdminSection registers entity class
2897
+ # FQNs as URL segments). These are not real REST paths — they are resolved at
2898
+ # runtime by the framework. Including them pollutes the endpoint surface with 20+
2899
+ # garbage entries that confuse agents and break endpoint count accuracy.
2900
+ # Pattern: path segment that matches a Java package hierarchy (org.foo.Bar).
2901
+ import re as _re_fqn
2902
+ _FQN_PATH_RE = _re_fqn.compile(
2903
+ r"/(org|com|net|io|edu)\.[a-z][a-z0-9]*\.[a-zA-Z]",
2904
+ )
2905
+ endpoints = [e for e in endpoints if not _FQN_PATH_RE.search(e.get("path", ""))]
2906
+
2836
2907
  # "no_security_signal" = no recognized security annotation at method OR class level.
2837
2908
  # Note: repos may use framework-level security (e.g. Keycloak itself) with no
2838
2909
  # per-endpoint annotations — this count reflects annotation-based coverage only.
@@ -2953,12 +3024,19 @@ def compute_blast_radius(
2953
3024
  # KeycloakSession with 2023 importers), deep BFS is O(n^depth) and collapses
2954
3025
  # to 70-91s at depth=4. Cap effective depth to 1 for hub classes so the
2955
3026
  # direct-caller list is still accurate but we skip the catastrophic expansion.
3027
+ # Instead of omitting indirect callers entirely, we do a sampled BFS: pick
3028
+ # _SAMPLE_SIZE random direct callers, run depth-2 BFS from those, then scale
3029
+ # up to estimate total indirect reach.
2956
3030
  _HUB_CALLER_THRESHOLD = 500
3031
+ _HUB_SAMPLE_SIZE = 20
3032
+ _HUB_SAMPLE_DEPTH = 2
2957
3033
  _effective_depth = max_depth
3034
+ _hub_class_guard = False
2958
3035
  for seed in matched_fqns:
2959
3036
  _seed_callers = _all_callers_from_rg(seed, reverse_graph)
2960
3037
  if len(_seed_callers) > _HUB_CALLER_THRESHOLD and max_depth > 1:
2961
3038
  _effective_depth = 1
3039
+ _hub_class_guard = True
2962
3040
  break
2963
3041
 
2964
3042
  for seed in matched_fqns:
@@ -3055,6 +3133,35 @@ def compute_blast_radius(
3055
3133
  indirect_callers.append(caller)
3056
3134
  queue.append((caller, depth + 1))
3057
3135
 
3136
+ # Sampled BFS for hub classes: direct BFS was capped at depth=1, so
3137
+ # indirect_callers is empty. Sample _HUB_SAMPLE_SIZE random direct callers,
3138
+ # run depth-_HUB_SAMPLE_DEPTH BFS from those, and scale up to estimate reach.
3139
+ _indirect_sampled = False
3140
+ _indirect_estimated_count: int | None = None
3141
+ if _hub_class_guard and direct_callers:
3142
+ _n_direct = len(direct_callers)
3143
+ _k = min(_HUB_SAMPLE_SIZE, _n_direct)
3144
+ _sample_seeds = random.sample(direct_callers, _k)
3145
+ _sample_visited: set[str] = set(matched_fqns) | set(direct_callers)
3146
+ _sample_queue: list[tuple[str, int]] = [(c, 1) for c in _sample_seeds]
3147
+ _sample_indirect: list[str] = []
3148
+ while _sample_queue:
3149
+ _snode, _sdepth = _sample_queue.pop(0)
3150
+ if _sdepth >= _HUB_SAMPLE_DEPTH:
3151
+ continue
3152
+ for _scaller in _all_callers_from_rg(_snode, reverse_graph):
3153
+ if _scaller not in _sample_visited:
3154
+ _sample_visited.add(_scaller)
3155
+ all_affected[_scaller] = _sdepth + 1
3156
+ _sample_indirect.append(_scaller)
3157
+ _sample_queue.append((_scaller, _sdepth + 1))
3158
+ if _sample_indirect:
3159
+ indirect_callers = _sample_indirect
3160
+ _indirect_sampled = True
3161
+ # Scale: sample covered _k of _n_direct seeds; extrapolate linearly
3162
+ _scale = _n_direct / _k
3163
+ _indirect_estimated_count = round(len(_sample_indirect) * _scale)
3164
+
3058
3165
  # ── 3. Identify affected endpoints from route_surface ─────────────────────
3059
3166
  affected_classes: set[str] = set(matched_fqns) | set(direct_callers) | set(indirect_callers)
3060
3167
  # Expand to enclosing classes of field/method FQNs in affected set.
@@ -3252,6 +3359,8 @@ def compute_blast_radius(
3252
3359
  confidence_level = "low"
3253
3360
 
3254
3361
  # ── 10. Explanation ───────────────────────────────────────────────────────
3362
+ _bfs_truncated = _effective_depth < max_depth
3363
+
3255
3364
  _parts: list[str] = []
3256
3365
  if n_direct:
3257
3366
  _parts.append(f"{n_direct} direct caller{'s' if n_direct != 1 else ''}")
@@ -3275,6 +3384,22 @@ def compute_blast_radius(
3275
3384
  f"({', '.join(_iface_names)}) — Spring/CDI DI pattern"
3276
3385
  )
3277
3386
 
3387
+ # Transparency: hub-class BFS truncation must appear in explanation so the
3388
+ # text and JSON are semantically identical.
3389
+ if _bfs_truncated:
3390
+ if _indirect_sampled and _indirect_estimated_count is not None:
3391
+ _parts.append(
3392
+ f"indirect callers sampled ({_HUB_SAMPLE_SIZE} of {n_direct} seeds, "
3393
+ f"depth={_HUB_SAMPLE_DEPTH}): {n_indirect} found in sample, "
3394
+ f"~{_indirect_estimated_count} estimated total"
3395
+ )
3396
+ else:
3397
+ _parts.append(
3398
+ f"indirect BFS skipped (hub class: {n_direct} direct callers "
3399
+ f"exceed {_HUB_CALLER_THRESHOLD} threshold; no indirect callers reachable "
3400
+ "from sample — graph may be a terminal sink)"
3401
+ )
3402
+
3278
3403
  if not _parts:
3279
3404
  explanation = f"No callers or dependents found for {target!r}. Low-risk isolated change."
3280
3405
  else:
@@ -3301,10 +3426,13 @@ def compute_blast_radius(
3301
3426
  "security_surface_affected": security_surface_affected,
3302
3427
  "cross_module_impact": cross_module_impact,
3303
3428
  "transactional_boundaries_touched": txn_nodes,
3304
- "depth_reached": max_depth,
3429
+ "depth_reached": _effective_depth, # actual BFS depth used, not the requested max
3430
+ "bfs_truncated": _bfs_truncated,
3305
3431
  "stats": {
3306
3432
  "direct_caller_count": n_direct,
3307
3433
  "indirect_caller_count": n_indirect,
3434
+ "indirect_callers_computed": not _bfs_truncated or _indirect_sampled,
3435
+ "indirect_callers_sampled": _indirect_sampled,
3308
3436
  "endpoints_affected_count": n_ep,
3309
3437
  "transactional_boundaries_count": n_txn,
3310
3438
  "mappers_affected_count": n_mappers,
@@ -3312,6 +3440,14 @@ def compute_blast_radius(
3312
3440
  "security_surface_count": n_sec,
3313
3441
  },
3314
3442
  }
3443
+ if _indirect_sampled and _indirect_estimated_count is not None:
3444
+ out["indirect_callers_estimated_count"] = _indirect_estimated_count
3445
+ out["indirect_callers_sample_note"] = (
3446
+ f"indirect_callers contains a sample (BFS depth={_HUB_SAMPLE_DEPTH} from "
3447
+ f"{min(_HUB_SAMPLE_SIZE, n_direct)} of {n_direct} direct callers). "
3448
+ f"Estimated total indirect reach: ~{_indirect_estimated_count}. "
3449
+ "Actual count may differ; use a lower-fan-in entry point for exact traversal."
3450
+ )
3315
3451
  if _candidates_out:
3316
3452
  out["candidates"] = _candidates_out
3317
3453
  if _iface_bridging:
@@ -3321,6 +3457,21 @@ def compute_blast_radius(
3321
3457
  "(Spring/CDI/Guice). direct_callers includes callers of the implemented "
3322
3458
  "interface(s) — these are the real production dependents."
3323
3459
  )
3460
+ if _bfs_truncated:
3461
+ out["bfs_truncation_reason"] = "hub_class_depth_cap"
3462
+ if _indirect_sampled:
3463
+ out["bfs_truncation_note"] = (
3464
+ f"Full BFS capped at depth=1 (hub class: {n_direct} direct callers "
3465
+ f">{_HUB_CALLER_THRESHOLD}). indirect_callers is a sampled estimate — "
3466
+ f"BFS from {min(_HUB_SAMPLE_SIZE, n_direct)} random seeds at depth={_HUB_SAMPLE_DEPTH}."
3467
+ )
3468
+ else:
3469
+ out["bfs_truncation_note"] = (
3470
+ f"Indirect BFS capped at depth=1: target has {n_direct} direct callers "
3471
+ f"(>{_HUB_CALLER_THRESHOLD} threshold). indirect_callers is empty — "
3472
+ "no indirect callers reachable from sampled seeds (terminal sink or sparse graph). "
3473
+ "Use a lower-fan-in entry point for full transitive traversal."
3474
+ )
3324
3475
  if len(direct_callers) > 30:
3325
3476
  out["direct_callers_note"] = (
3326
3477
  f"Showing 30/{n_direct} direct callers. Use --output to inspect full IR."
sourcecode/serializer.py CHANGED
@@ -1771,24 +1771,47 @@ def _angular_analysis(sm: "SourceMap") -> "Optional[dict[str, Any]]":
1771
1771
  if val and val not in route_paths:
1772
1772
  route_paths.append(val)
1773
1773
 
1774
- # Angular version from package.json
1774
+ # Angular version from package.json — check root first, then subdirectories.
1775
+ # In monorepos (Java + Angular), the Angular package.json is in a subdirectory
1776
+ # like frontend/ and not at the repo root. We probe candidate locations.
1775
1777
  angular_version: Optional[str] = None
1776
- pkg_json = root / "package.json"
1777
- if pkg_json.exists():
1778
+
1779
+ def _read_angular_version_from_pkg(pkg_path: Path) -> Optional[str]:
1780
+ """Extract @angular/core version from a package.json file."""
1778
1781
  try:
1779
- pkg = _json.loads(pkg_json.read_text(encoding="utf-8", errors="replace"))
1780
- # Use `or {}` so explicit `null` values in package.json don't
1781
- # raise TypeError when unpacking (BUG-4).
1782
+ pkg = _json.loads(pkg_path.read_text(encoding="utf-8", errors="replace"))
1782
1783
  deps = {
1783
1784
  **(pkg.get("dependencies") or {}),
1784
1785
  **(pkg.get("devDependencies") or {}),
1785
1786
  **(pkg.get("peerDependencies") or {}),
1786
1787
  }
1787
1788
  av = deps.get("@angular/core")
1788
- if av:
1789
- angular_version = av.lstrip("^~>=")
1789
+ if av and isinstance(av, str):
1790
+ return av.lstrip("^~>=")
1790
1791
  except Exception:
1791
1792
  pass
1793
+ return None
1794
+
1795
+ # 1. Try root package.json first (fastest, most common for pure Angular projects)
1796
+ _root_pkg = root / "package.json"
1797
+ if _root_pkg.exists():
1798
+ angular_version = _read_angular_version_from_pkg(_root_pkg)
1799
+
1800
+ # 2. If not found at root, search subdirectory package.json files.
1801
+ # Limit to ts_files-derived subdirs to avoid scanning the whole repo.
1802
+ if angular_version is None and ts_files:
1803
+ _candidate_dirs: set[str] = set()
1804
+ for ts_rel in ts_files[:200]: # sample first 200 ts files
1805
+ parts = ts_rel.replace("\\", "/").split("/")
1806
+ if len(parts) >= 2:
1807
+ _candidate_dirs.add(parts[0]) # top-level subdir (e.g. "frontend")
1808
+ for subdir in sorted(_candidate_dirs):
1809
+ _sub_pkg = root / subdir / "package.json"
1810
+ if _sub_pkg.exists():
1811
+ _v = _read_angular_version_from_pkg(_sub_pkg)
1812
+ if _v:
1813
+ angular_version = _v
1814
+ break
1792
1815
 
1793
1816
  # Also check angular.json for entry point
1794
1817
  entry_point: Optional[str] = None
@@ -1956,7 +1979,7 @@ def agent_view(sm: SourceMap, *, full: bool = False) -> dict[str, Any]:
1956
1979
  result["file_relevance_hint"] = (
1957
1980
  f"Showing top {_fr_limit}/{_total_paths} files by score "
1958
1981
  f"({'--full' if full else 'normal'} mode, bounded for signal quality). "
1959
- f"Use --deep for up to {compute_context_limit('deep', _FR_AGENT_CAP)} files."
1982
+ f"Use --full for up to {compute_context_limit('full', _FR_AGENT_CAP)} files."
1960
1983
  )
1961
1984
 
1962
1985
  # ── 5. Monorepo package roles (when available), capped ───────────────────
sourcecode/summarizer.py CHANGED
@@ -203,34 +203,78 @@ class ProjectSummarizer:
203
203
  __import__("re").IGNORECASE,
204
204
  )
205
205
 
206
+ # Patterns that indicate license notices or user-facing marketing text.
207
+ # These describe what the product does FOR users or its licensing terms,
208
+ # not the codebase architecture.
209
+ _LICENSE_MARKETING_RE = __import__("re").compile(
210
+ r"\bfair[- ]use\b" # Fair Use license
211
+ r"|\bcommunity edition\b" # product tier labels
212
+ r"|\benterprise edition\b"
213
+ r"|\bcommercial licen[sc]e\b"
214
+ r"|\bsource.available\b"
215
+ r"|\bavailable to companies\b" # license restriction
216
+ r"|\bunder \$\d+[MK]\b" # revenue threshold
217
+ r"|\bimportant:\s" # WARNING/IMPORTANT caveats
218
+ r"|\badd authentication to\b" # user-facing "add X to Y" marketing
219
+ r"|\bno need to deal with\b"
220
+ r"|\bwith minimum effort\b"
221
+ r"|\bsign up\b.*\bevaluation\b"
222
+ r"|\bcontact us\b.*\bmore information\b",
223
+ __import__("re").IGNORECASE,
224
+ )
225
+
206
226
  def _extract_first_useful_paragraph(self, content: str) -> str | None:
227
+ """Extract the first paragraph that describes the project architecture, not its license or marketing."""
207
228
  import re as _re
208
229
  _BADGE_RE = _re.compile(r"^\[?!\[") # [![badge](...)] or ![img](...)
209
230
  _LINK_ONLY_RE = _re.compile(r"^\[.*?\]\(.*?\)$") # pure link line
210
- lines: list[str] = []
231
+
232
+ paragraphs: list[str] = []
233
+ current_lines: list[str] = []
211
234
  in_code_block = False
235
+
212
236
  for raw_line in content.splitlines():
213
237
  line = raw_line.strip()
214
238
  if line.startswith("```"):
215
239
  in_code_block = not in_code_block
216
240
  continue
217
- if in_code_block or not line or line.startswith(("#", "<!--", ">")):
218
- if lines:
219
- break
241
+ if in_code_block:
220
242
  continue
221
- # Skip badge-only lines and pure-link lines — they are metadata, not descriptions
222
- if _BADGE_RE.match(line) or (not lines and _LINK_ONLY_RE.match(line)):
243
+ if not line or line.startswith(("#", "<!--", ">")):
244
+ if current_lines:
245
+ paragraphs.append(" ".join(current_lines).strip())
246
+ current_lines = []
223
247
  continue
224
- lines.append(line)
225
- if not lines:
226
- return None
227
- paragraph = " ".join(lines).strip()
228
- # Reject paragraphs that are startup/setup snippets, not domain descriptions.
229
- # Count how many startup signals appear; >1 means the paragraph is instructions.
230
- _startup_hits = len(self._STARTUP_RE.findall(paragraph))
231
- if _startup_hits >= 2:
232
- return None
233
- return paragraph
248
+ if _BADGE_RE.match(line) or _LINK_ONLY_RE.match(line):
249
+ if current_lines:
250
+ paragraphs.append(" ".join(current_lines).strip())
251
+ current_lines = []
252
+ continue
253
+ current_lines.append(line)
254
+ if current_lines:
255
+ paragraphs.append(" ".join(current_lines).strip())
256
+
257
+ _MD_LINK_RE = _re.compile(r"\[.+?\]\(.+?\)")
258
+ for paragraph in paragraphs[:6]: # Check up to 6 paragraphs
259
+ if not paragraph:
260
+ continue
261
+ # Reject very short fragments (< 30 chars) — likely just a section title
262
+ if len(paragraph) < 30:
263
+ continue
264
+ # Reject startup/setup snippets
265
+ _startup_hits = len(self._STARTUP_RE.findall(paragraph))
266
+ if _startup_hits >= 2:
267
+ continue
268
+ # Reject license notices and user-facing marketing text
269
+ if self._LICENSE_MARKETING_RE.search(paragraph):
270
+ continue
271
+ # Reject link-list paragraphs (docs/navigation sections):
272
+ # if more than 2 markdown links dominate the paragraph, it's a nav section
273
+ _link_count = len(_MD_LINK_RE.findall(paragraph))
274
+ if _link_count > 2 and _link_count * 30 > len(paragraph):
275
+ continue
276
+ return paragraph
277
+ return None
234
278
 
235
279
  _TYPE_LABELS: dict[str, str] = {
236
280
  "cli": "CLI",
@@ -256,6 +300,7 @@ class ProjectSummarizer:
256
300
 
257
301
  # Stack with frameworks — keep brief, skip internal module listings
258
302
  non_tooling_stacks = self._filter_non_tooling_stacks(sm)
303
+ primary = None
259
304
  if non_tooling_stacks:
260
305
  primary = self._select_summary_primary_stack(non_tooling_stacks)
261
306
  frameworks = [fw.name for fw in primary.frameworks[:2]]
@@ -269,6 +314,30 @@ class ProjectSummarizer:
269
314
  if domains:
270
315
  parts.append(f"Domains: {', '.join(domains)}")
271
316
 
317
+ # Quantitative structural suffix for Java projects — adds concrete scale signals
318
+ # that README descriptions omit (class count, transactional boundary count).
319
+ if primary is not None and primary.stack.lower() == "java":
320
+ quant_parts: list[str] = []
321
+ java_files = sum(
322
+ 1 for p in sm.file_paths if p.endswith(".java")
323
+ )
324
+ if java_files >= 50:
325
+ quant_parts.append(f"{java_files:,} Java classes")
326
+ txn_classes: list[str] = []
327
+ for stack in non_tooling_stacks:
328
+ txn_classes.extend(getattr(stack, "transactional_classes", []))
329
+ n_txn = len(set(txn_classes))
330
+ if n_txn > 0:
331
+ quant_parts.append(f"{n_txn} transactional boundaries")
332
+ ep_controllers = [
333
+ ep for ep in sm.entry_points
334
+ if ep.kind in ("controller", "rest_controller", "rest", "endpoint")
335
+ ]
336
+ if ep_controllers:
337
+ quant_parts.append(f"{len(ep_controllers)} controller entry points")
338
+ if quant_parts:
339
+ parts.append(", ".join(quant_parts))
340
+
272
341
  return ". ".join(parts) + "."
273
342
 
274
343
  def _detect_architecture_pattern(self, file_paths: list[str]) -> str | None:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sourcecode
3
- Version: 1.31.22
3
+ Version: 1.31.24
4
4
  Summary: Deterministic codebase context for AI coding agents
5
5
  License: Apache License
6
6
  Version 2.0, January 2004
@@ -225,7 +225,7 @@ Description-Content-Type: text/markdown
225
225
 
226
226
  **AI-ready change intelligence for Java/Spring enterprise monoliths.**
227
227
 
228
- ![Version](https://img.shields.io/badge/version-1.31.22-blue)
228
+ ![Version](https://img.shields.io/badge/version-1.31.24-blue)
229
229
  ![Python](https://img.shields.io/badge/python-3.10%2B-green)
230
230
 
231
231
  ---
@@ -263,7 +263,7 @@ pipx install sourcecode
263
263
 
264
264
  ```bash
265
265
  sourcecode version
266
- # sourcecode 1.31.22
266
+ # sourcecode 1.31.24
267
267
  ```
268
268
 
269
269
  ---
@@ -1,12 +1,12 @@
1
- sourcecode/__init__.py,sha256=Wsav7BZkVmw8XZqjz_WUnhLQyGjtZVwjYnyc_N4sraE,104
1
+ sourcecode/__init__.py,sha256=OFgxQ97Ujgsq98XhK0hoPwDBX_R6E7oO8JgpjanvaHQ,104
2
2
  sourcecode/adaptive_scanner.py,sha256=XffluXKzJUXrMtjEiAOnSNPZnztdIcts17T9ouHeID0,10521
3
- sourcecode/architecture_analyzer.py,sha256=4R13Yb02OrPeB4IH3z6V_g7HWhmGcRHbI8CobCVnRrc,39111
3
+ sourcecode/architecture_analyzer.py,sha256=Ry3aYT9dc7XuLmWLT5IZ93RkCf_P14Qtew0nGPvUl_8,42184
4
4
  sourcecode/architecture_summary.py,sha256=z34_6v7cSwy98cof2UVciGho7SCrZ93tiqMmq5WNzRQ,20405
5
5
  sourcecode/ast_extractor.py,sha256=_btmeOJIe3t-NicF94D5ZAesa2YIJ0_QNExGnbHxGFE,50578
6
6
  sourcecode/cache.py,sha256=TiYa3ECjBKtvlfCk7GvQ9v6gZkAITpH3ow9PubA7sUo,22946
7
7
  sourcecode/canonical_ir.py,sha256=NZu0XICv__hkQGKzW2LNQLRqb1L28K2p_WQCQKS5Zlk,23141
8
8
  sourcecode/classifier.py,sha256=yWeq6agTjkFa3zuNa-gdVIHtjoBoPoVlJnX-b7tdVJs,7851
9
- sourcecode/cli.py,sha256=qMn-4zD8v03dmkn-AZsf2TSplyhjbq9ZPMAcWl_Lrxg,147576
9
+ sourcecode/cli.py,sha256=zykj3wNxSXAdiBIgmn6KWLdrNLHCEUrhv4YL9rlRlUE,150539
10
10
  sourcecode/code_notes_analyzer.py,sha256=EJemNCNc9Dn-1RZYu-aNbK0ELzmsyC4s6FdHi3XyNEI,9392
11
11
  sourcecode/confidence_analyzer.py,sha256=_jckZSxksV-OU38vbkxfVNBnWCtlCq8Vwfg23x1uspA,19054
12
12
  sourcecode/context_scorer.py,sha256=QpChSpsmaAYz91rXA4Ue5xzQmNz_ZboZN09YOHScq1U,14679
@@ -32,13 +32,13 @@ sourcecode/ranking_engine.py,sha256=ZAucq_YX2KkWUuAZf4P0lhtQ_38vEFnUhuGtSZd1S0E,
32
32
  sourcecode/redactor.py,sha256=xuGcadGEHaPw4qZXlMDvzMCsr4VOkdp3oBQptHyJk8c,2884
33
33
  sourcecode/relevance_scorer.py,sha256=MYF4FFkveAQps9SmTeTlh6ODiBz2F--_hWNeHMLtUHQ,8405
34
34
  sourcecode/repo_classifier.py,sha256=FG1vaWKdWXsWdl-S8hjVMiTqcwgaRXkDyvK4rPcOGtQ,22681
35
- sourcecode/repository_ir.py,sha256=NooCrMJYqycKSYTEroVWTYR8X83hHaAYKTsgYxvlz-I,140221
35
+ sourcecode/repository_ir.py,sha256=sp6IdcZbFAQjznUthMBu_6Mu5RBxVP72d5Vw0hKnH7o,148437
36
36
  sourcecode/runtime_classifier.py,sha256=uTAD6BDCiBLUZEDRfqk718kM4RTT_vAbfkcOI2_Xx58,18432
37
37
  sourcecode/scanner.py,sha256=WdOQ78mMzjR1NjmKTlbxdgwinnCTfAhxCVLBEFQiFHU,8899
38
38
  sourcecode/schema.py,sha256=aHNXDf8LGyUC8ZDE_VS9kiskC2-Oswhi_WnpdGy6HDw,24897
39
39
  sourcecode/semantic_analyzer.py,sha256=TDuC3wzZR2DPm1mgrAg1YSLk2QzJoueS3TZAmyGGpCU,89417
40
- sourcecode/serializer.py,sha256=V8ZV3Y1j4T6rkpO09-PvpVORioWWWbSnOvDjZ2hmQ2U,122144
41
- sourcecode/summarizer.py,sha256=lPlKhMh28nueXkPo2xKeD3DUFYVGRlJMIdY-8TSM-ls,17486
40
+ sourcecode/serializer.py,sha256=7TzN2GLtIP3PIVatoB98_7DQdoAkUNvvNVU7Bz7r_K8,123313
41
+ sourcecode/summarizer.py,sha256=BMHJA0Do4rBnabc1_BxHoETTNb5ew0VqCX_eY3_PdCg,20706
42
42
  sourcecode/tree_utils.py,sha256=8GAkIfQAsvtEudIeW1l4ooH_oRtrWR8cpJQJsEa_Pfw,2093
43
43
  sourcecode/workspace.py,sha256=X_6NmNnitvT3_38V-JDChydo_sR68s249hLFlrQskU0,8271
44
44
  sourcecode/detectors/__init__.py,sha256=A0AACJFF6HWf_RgatNtWu3PUzstcKtIGM9f1PoFcJug,1987
@@ -76,8 +76,8 @@ sourcecode/telemetry/consent.py,sha256=wLMvGNJeSSyZoNkQXpoUioY6mMv4Qdvuw7S9jAEWn
76
76
  sourcecode/telemetry/events.py,sha256=oEvvulfsv5GIDWG2174gSS6tNB95w38AIYiYeifGKlE,2294
77
77
  sourcecode/telemetry/filters.py,sha256=Asa71oRl7q3Wt_FMwuufIZJFzSYdgRNKS8LHCIyFeYE,4805
78
78
  sourcecode/telemetry/transport.py,sha256=KJeIPCPWMdmbCP3ySGs2iUlia34U6vWne2dZsUezesw,1560
79
- sourcecode-1.31.22.dist-info/METADATA,sha256=zjEDrWUQ-08LOjvIfXTDUgQ4UTPkneyr4CFGZc5yaOo,31103
80
- sourcecode-1.31.22.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
81
- sourcecode-1.31.22.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
82
- sourcecode-1.31.22.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
83
- sourcecode-1.31.22.dist-info/RECORD,,
79
+ sourcecode-1.31.24.dist-info/METADATA,sha256=y1qV8wDttJuezYPLbBUeZZwzQMWWrtJ8clEuBAchsJ0,31103
80
+ sourcecode-1.31.24.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
81
+ sourcecode-1.31.24.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
82
+ sourcecode-1.31.24.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
83
+ sourcecode-1.31.24.dist-info/RECORD,,